oksh-noxz

[fork] Portable OpenBSD ksh, based on the Public Domain Korn Shell (pdksh).
git clone https://noxz.tech/git/oksh-noxz.git
Log | Files | Tags

lex.c
1/*	$OpenBSD: lex.c,v 1.79 2023/02/08 17:22:10 kn Exp $	*/
2
3/*
4 * lexical analysis and source input
5 */
6
7#include <ctype.h>
8#include <errno.h>
9#include <libgen.h>
10#include <stdio.h>
11#include <string.h>
12#include <unistd.h>
13
14#include "sh.h"
15
16/*
17 * states while lexing word
18 */
19#define	SINVALID	-1	/* invalid state */
20#define	SBASE	0		/* outside any lexical constructs */
21#define	SWORD	1		/* implicit quoting for substitute() */
22#define	SLETPAREN 2		/* inside (( )), implicit quoting */
23#define	SSQUOTE	3		/* inside '' */
24#define	SDQUOTE	4		/* inside "" */
25#define	SBRACE	5		/* inside ${} */
26#define	SCSPAREN 6		/* inside $() */
27#define	SBQUOTE	7		/* inside `` */
28#define	SASPAREN 8		/* inside $(( )) */
29#define SHEREDELIM 9		/* parsing <<,<<- delimiter */
30#define SHEREDQUOTE 10		/* parsing " in <<,<<- delimiter */
31#define SPATTERN 11		/* parsing *(...|...) pattern (*+?@!) */
32#define STBRACE 12		/* parsing ${..[#%]..} */
33#define	SBRACEQ	13		/* inside "${}" */
34
35/* Structure to keep track of the lexing state and the various pieces of info
36 * needed for each particular state.
37 */
38typedef struct lex_state Lex_state;
39struct lex_state {
40	int ls_state;
41	union {
42		/* $(...) */
43		struct scsparen_info {
44			int nparen;	/* count open parenthesis */
45			int csstate;	/* XXX remove */
46#define ls_scsparen ls_info.u_scsparen
47		} u_scsparen;
48
49		/* $((...)) */
50		struct sasparen_info {
51			int nparen;	/* count open parenthesis */
52			int start;	/* marks start of $(( in output str */
53#define ls_sasparen ls_info.u_sasparen
54		} u_sasparen;
55
56		/* ((...)) */
57		struct sletparen_info {
58			int nparen;	/* count open parenthesis */
59#define ls_sletparen ls_info.u_sletparen
60		} u_sletparen;
61
62		/* `...` */
63		struct sbquote_info {
64			int indquotes;	/* true if in double quotes: "`...`" */
65#define ls_sbquote ls_info.u_sbquote
66		} u_sbquote;
67
68		Lex_state *base;	/* used to point to next state block */
69	} ls_info;
70};
71
72typedef struct State_info State_info;
73struct State_info {
74	Lex_state	*base;
75	Lex_state	*end;
76};
77
78
79static void	readhere(struct ioword *);
80static int	getsc__(void);
81static void	getsc_line(Source *);
82static int	getsc_bn(void);
83static char	*get_brace_var(XString *, char *);
84static int	arraysub(char **);
85static const char *ungetsc(int);
86static void	gethere(void);
87static Lex_state *push_state_(State_info *, Lex_state *);
88static Lex_state *pop_state_(State_info *, Lex_state *);
89static char	*special_prompt_expand(char *);
90static int	dopprompt(const char *, int, const char **, int);
91int		promptlen(const char *cp, const char **spp);
92
93static int backslash_skip;
94static int ignore_backslash_newline;
95
96Source *source;		/* yyparse/yylex source */
97YYSTYPE	yylval;		/* result from yylex */
98struct ioword *heres[HERES], **herep;
99char	ident[IDENT+1];
100
101char   **history;	/* saved commands */
102char   **histptr;	/* last history item */
103uint32_t histsize;	/* history size */
104
105/* optimized getsc_bn() */
106#define getsc()		(*source->str != '\0' && *source->str != '\\' \
107			 && !backslash_skip ? *source->str++ : getsc_bn())
108/* optimized getsc__() */
109#define	getsc_()	((*source->str != '\0') ? *source->str++ : getsc__())
110
111#define STATE_BSIZE	32
112
113#define PUSH_STATE(s)	do { \
114			    if (++statep == state_info.end) \
115				statep = push_state_(&state_info, statep); \
116			    state = statep->ls_state = (s); \
117			} while (0)
118
119#define POP_STATE()	do { \
120			    if (--statep == state_info.base) \
121				statep = pop_state_(&state_info, statep); \
122			    state = statep->ls_state; \
123			} while (0)
124
125
126
127/*
128 * Lexical analyzer
129 *
130 * tokens are not regular expressions, they are LL(1).
131 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
132 * hence the state stack.
133 */
134
135int
136yylex(int cf)
137{
138	Lex_state states[STATE_BSIZE], *statep;
139	State_info state_info;
140	int c, state;
141	XString ws;		/* expandable output word */
142	char *wp;		/* output word pointer */
143	char *sp, *dp;
144	int c2;
145
146
147  Again:
148	states[0].ls_state = SINVALID;
149	states[0].ls_info.base = NULL;
150	statep = &states[1];
151	state_info.base = states;
152	state_info.end = &states[STATE_BSIZE];
153
154	Xinit(ws, wp, 64, ATEMP);
155
156	backslash_skip = 0;
157	ignore_backslash_newline = 0;
158
159	if (cf&ONEWORD)
160		state = SWORD;
161	else if (cf&LETEXPR) {
162		*wp++ = OQUOTE;	 /* enclose arguments in (double) quotes */
163		state = SLETPAREN;
164		statep->ls_sletparen.nparen = 0;
165	} else {		/* normal lexing */
166		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
167		while ((c = getsc()) == ' ' || c == '\t')
168			;
169		if (c == '#') {
170			ignore_backslash_newline++;
171			while ((c = getsc()) != '\0' && c != '\n')
172				;
173			ignore_backslash_newline--;
174		}
175		ungetsc(c);
176	}
177	if (source->flags & SF_ALIAS) {	/* trailing ' ' in alias definition */
178		source->flags &= ~SF_ALIAS;
179		/* In POSIX mode, a trailing space only counts if we are
180		 * parsing a simple command
181		 */
182		if (!Flag(FPOSIX) || (cf & CMDWORD))
183			cf |= ALIAS;
184	}
185
186	/* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
187	statep->ls_state = state;
188
189	/* collect non-special or quoted characters to form word */
190	while (!((c = getsc()) == 0 ||
191	    ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
192		Xcheck(ws, wp);
193		switch (state) {
194		case SBASE:
195			if (Flag(FCSHHISTORY) && (source->flags & SF_TTY) &&
196			    c == '!') {
197				char **replace = NULL;
198				int get, i;
199				char match[200] = { 0 }, *str = match;
200				size_t mlen;
201
202				c2 = getsc();
203				if (c2 == '\0' || c2 == ' ' || c2 == '\t')
204					;
205				else if (c2 == '!')
206					replace = hist_get_newest(0);
207				else if (isdigit(c2) || c2 == '-' ||
208				    isalpha(c2)) {
209					get = !isalpha(c2);
210
211					*str++ = c2;
212					do {
213						if ((c2 = getsc()) == '\0')
214							break;
215						if (c2 == '\t' || c2 == ' ' ||
216						    c2 == '\n') {
217							ungetsc(c2);
218							break;
219						}
220						*str++ = c2;
221					} while (str < &match[sizeof(match)-1]);
222					*str = '\0';
223
224					if (get) {
225						int h = findhistrel(match);
226						if (h >= 0)
227							replace = &history[h];
228					} else {
229						int h = findhist(-1, 0, match, true);
230						if (h >= 0)
231							replace = &history[h];
232					}
233				}
234
235				/*
236				 * XXX ksh history buffer saves un-expanded
237				 * commands. Until the history buffer code is
238				 * changed to contain expanded commands, we
239				 * ignore the bad commands (spinning sucks)
240				 */
241				if (replace && **replace == '!')
242					ungetsc(c2);
243				else if (replace) {
244					Source *s;
245
246					/* do not strdup replacement via alloc */
247					s = pushs(SREREAD, source->areap);
248					s->start = s->str = *replace;
249					s->next = source;
250					s->u.freeme = NULL;
251					source = s;
252					continue;
253				} else if (*match != '\0') {
254					/* restore what followed the '!' */
255					mlen = strlen(match);
256					for (i = mlen-1; i >= 0; i--)
257						ungetsc(match[i]);
258				} else
259					ungetsc(c2);
260			}
261			if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
262				*wp = EOS; /* temporary */
263				if (is_wdvarname(Xstring(ws, wp), false)) {
264					char *p, *tmp;
265
266					if (arraysub(&tmp)) {
267						*wp++ = CHAR;
268						*wp++ = c;
269						for (p = tmp; *p; ) {
270							Xcheck(ws, wp);
271							*wp++ = CHAR;
272							*wp++ = *p++;
273						}
274						afree(tmp, ATEMP);
275						break;
276					} else {
277						Source *s;
278
279						s = pushs(SREREAD,
280							  source->areap);
281						s->start = s->str
282							= s->u.freeme = tmp;
283						s->next = source;
284						source = s;
285					}
286				}
287				*wp++ = CHAR;
288				*wp++ = c;
289				break;
290			}
291			/* FALLTHROUGH */
292		  Sbase1:	/* includes *(...|...) pattern (*+?@!) */
293			if (c == '*' || c == '@' || c == '+' || c == '?' ||
294			    c == '!') {
295				c2 = getsc();
296				if (c2 == '(' /*)*/ ) {
297					*wp++ = OPAT;
298					*wp++ = c;
299					PUSH_STATE(SPATTERN);
300					break;
301				}
302				ungetsc(c2);
303			}
304			/* FALLTHROUGH */
305		  Sbase2:	/* doesn't include *(...|...) pattern (*+?@!) */
306			switch (c) {
307			case '\\':
308				c = getsc();
309				if (c) /* trailing \ is lost */
310					*wp++ = QCHAR, *wp++ = c;
311				break;
312			case '\'':
313				if ((cf & HEREDOC) || state == SBRACEQ) {
314					*wp++ = CHAR, *wp++ = c;
315					break;
316				}
317				*wp++ = OQUOTE;
318				ignore_backslash_newline++;
319				PUSH_STATE(SSQUOTE);
320				break;
321			case '"':
322				*wp++ = OQUOTE;
323				PUSH_STATE(SDQUOTE);
324				break;
325			default:
326				goto Subst;
327			}
328			break;
329
330		  Subst:
331			switch (c) {
332			case '\\':
333				c = getsc();
334				switch (c) {
335				case '\\':
336				case '$': case '`':
337					*wp++ = QCHAR, *wp++ = c;
338					break;
339				case '"':
340					if ((cf & HEREDOC) == 0) {
341						*wp++ = QCHAR, *wp++ = c;
342						break;
343					}
344					/* FALLTHROUGH */
345				default:
346					if (cf & UNESCAPE) {
347						*wp++ = QCHAR, *wp++ = c;
348						break;
349					}
350					Xcheck(ws, wp);
351					if (c) { /* trailing \ is lost */
352						*wp++ = CHAR, *wp++ = '\\';
353						*wp++ = CHAR, *wp++ = c;
354					}
355					break;
356				}
357				break;
358			case '$':
359				c = getsc();
360				if (c == '(') /*)*/ {
361					c = getsc();
362					if (c == '(') /*)*/ {
363						PUSH_STATE(SASPAREN);
364						statep->ls_sasparen.nparen = 2;
365						statep->ls_sasparen.start =
366						    Xsavepos(ws, wp);
367						*wp++ = EXPRSUB;
368					} else {
369						ungetsc(c);
370						PUSH_STATE(SCSPAREN);
371						statep->ls_scsparen.nparen = 1;
372						statep->ls_scsparen.csstate = 0;
373						*wp++ = COMSUB;
374					}
375				} else if (c == '{') /*}*/ {
376					*wp++ = OSUBST;
377					*wp++ = '{'; /*}*/
378					wp = get_brace_var(&ws, wp);
379					c = getsc();
380					/* allow :# and :% (ksh88 compat) */
381					if (c == ':') {
382						*wp++ = CHAR, *wp++ = c;
383						c = getsc();
384					}
385					/* If this is a trim operation,
386					 * treat (,|,) specially in STBRACE.
387					 */
388					if (c == '#' || c == '%') {
389						ungetsc(c);
390						PUSH_STATE(STBRACE);
391					} else {
392						ungetsc(c);
393						if (state == SDQUOTE ||
394						    state == SBRACEQ)
395							PUSH_STATE(SBRACEQ);
396						else
397							PUSH_STATE(SBRACE);
398					}
399				} else if (ctype(c, C_ALPHA)) {
400					*wp++ = OSUBST;
401					*wp++ = 'X';
402					do {
403						Xcheck(ws, wp);
404						*wp++ = c;
405						c = getsc();
406					} while (ctype(c, C_ALPHA) || digit(c));
407					*wp++ = '\0';
408					*wp++ = CSUBST;
409					*wp++ = 'X';
410					ungetsc(c);
411				} else if (ctype(c, C_VAR1) || digit(c)) {
412					Xcheck(ws, wp);
413					*wp++ = OSUBST;
414					*wp++ = 'X';
415					*wp++ = c;
416					*wp++ = '\0';
417					*wp++ = CSUBST;
418					*wp++ = 'X';
419				} else {
420					*wp++ = CHAR, *wp++ = '$';
421					ungetsc(c);
422				}
423				break;
424			case '`':
425				PUSH_STATE(SBQUOTE);
426				*wp++ = COMSUB;
427				/* Need to know if we are inside double quotes
428				 * since sh/at&t-ksh translate the \" to " in
429				 * "`..\"..`".
430				 */
431				statep->ls_sbquote.indquotes = 0;
432				Lex_state *s = statep;
433				Lex_state *base = state_info.base;
434				while (1) {
435					for (; s != base; s--) {
436						if (s->ls_state == SDQUOTE) {
437							statep->ls_sbquote.indquotes = 1;
438							break;
439						}
440					}
441					if (s != base)
442						break;
443					if (!(s = s->ls_info.base))
444						break;
445					base = s-- - STATE_BSIZE;
446				}
447				break;
448			default:
449				*wp++ = CHAR, *wp++ = c;
450			}
451			break;
452
453		case SSQUOTE:
454			if (c == '\'') {
455				POP_STATE();
456				if (state == SBRACEQ) {
457					*wp++ = CHAR, *wp++ = c;
458					break;
459				}
460				*wp++ = CQUOTE;
461				ignore_backslash_newline--;
462			} else
463				*wp++ = QCHAR, *wp++ = c;
464			break;
465
466		case SDQUOTE:
467			if (c == '"') {
468				POP_STATE();
469				*wp++ = CQUOTE;
470			} else
471				goto Subst;
472			break;
473
474		case SCSPAREN: /* $( .. ) */
475			/* todo: deal with $(...) quoting properly
476			 * kludge to partly fake quoting inside $(..): doesn't
477			 * really work because nested $(..) or ${..} inside
478			 * double quotes aren't dealt with.
479			 */
480			switch (statep->ls_scsparen.csstate) {
481			case 0: /* normal */
482				switch (c) {
483				case '(':
484					statep->ls_scsparen.nparen++;
485					break;
486				case ')':
487					statep->ls_scsparen.nparen--;
488					break;
489				case '\\':
490					statep->ls_scsparen.csstate = 1;
491					break;
492				case '"':
493					statep->ls_scsparen.csstate = 2;
494					break;
495				case '\'':
496					statep->ls_scsparen.csstate = 4;
497					ignore_backslash_newline++;
498					break;
499				}
500				break;
501
502			case 1: /* backslash in normal mode */
503			case 3: /* backslash in double quotes */
504				--statep->ls_scsparen.csstate;
505				break;
506
507			case 2: /* double quotes */
508				if (c == '"')
509					statep->ls_scsparen.csstate = 0;
510				else if (c == '\\')
511					statep->ls_scsparen.csstate = 3;
512				break;
513
514			case 4: /* single quotes */
515				if (c == '\'') {
516					statep->ls_scsparen.csstate = 0;
517					ignore_backslash_newline--;
518				}
519				break;
520			}
521			if (statep->ls_scsparen.nparen == 0) {
522				POP_STATE();
523				*wp++ = 0; /* end of COMSUB */
524			} else
525				*wp++ = c;
526			break;
527
528		case SASPAREN: /* $(( .. )) */
529			/* todo: deal with $((...); (...)) properly */
530			/* XXX should nest using existing state machine
531			 * (embed "..", $(...), etc.) */
532			if (c == '(')
533				statep->ls_sasparen.nparen++;
534			else if (c == ')') {
535				statep->ls_sasparen.nparen--;
536				if (statep->ls_sasparen.nparen == 1) {
537					/*(*/
538					if ((c2 = getsc()) == ')') {
539						POP_STATE();
540						*wp++ = 0; /* end of EXPRSUB */
541						break;
542					} else {
543						char *s;
544
545						ungetsc(c2);
546						/* mismatched parenthesis -
547						 * assume we were really
548						 * parsing a $(..) expression
549						 */
550						s = Xrestpos(ws, wp,
551						    statep->ls_sasparen.start);
552						memmove(s + 1, s, wp - s);
553						*s++ = COMSUB;
554						*s = '('; /*)*/
555						wp++;
556						statep->ls_scsparen.nparen = 1;
557						statep->ls_scsparen.csstate = 0;
558						state = statep->ls_state =
559						    SCSPAREN;
560					}
561				}
562			}
563			*wp++ = c;
564			break;
565
566		case SBRACEQ:
567			/*{*/
568			if (c == '}') {
569				POP_STATE();
570				*wp++ = CSUBST;
571				*wp++ = /*{*/ '}';
572			} else
573				goto Sbase2;
574			break;
575
576		case SBRACE:
577			/*{*/
578			if (c == '}') {
579				POP_STATE();
580				*wp++ = CSUBST;
581				*wp++ = /*{*/ '}';
582			} else
583				goto Sbase1;
584			break;
585
586		case STBRACE:
587			/* Same as SBRACE, except (,|,) treated specially */
588			/*{*/
589			if (c == '}') {
590				POP_STATE();
591				*wp++ = CSUBST;
592				*wp++ = /*{*/ '}';
593			} else if (c == '|') {
594				*wp++ = SPAT;
595			} else if (c == '(') {
596				*wp++ = OPAT;
597				*wp++ = ' ';	/* simile for @ */
598				PUSH_STATE(SPATTERN);
599			} else
600				goto Sbase1;
601			break;
602
603		case SBQUOTE:
604			if (c == '`') {
605				*wp++ = 0;
606				POP_STATE();
607			} else if (c == '\\') {
608				switch (c = getsc()) {
609				case '\\':
610				case '$': case '`':
611					*wp++ = c;
612					break;
613				case '"':
614					if (statep->ls_sbquote.indquotes) {
615						*wp++ = c;
616						break;
617					}
618					/* FALLTHROUGH */
619				default:
620					if (c) { /* trailing \ is lost */
621						*wp++ = '\\';
622						*wp++ = c;
623					}
624					break;
625				}
626			} else
627				*wp++ = c;
628			break;
629
630		case SWORD:	/* ONEWORD */
631			goto Subst;
632
633		case SLETPAREN:	/* LETEXPR: (( ... )) */
634			/*(*/
635			if (c == ')') {
636				if (statep->ls_sletparen.nparen > 0)
637				    --statep->ls_sletparen.nparen;
638				/*(*/
639				else if ((c2 = getsc()) == ')') {
640					c = 0;
641					*wp++ = CQUOTE;
642					goto Done;
643				} else
644					ungetsc(c2);
645			} else if (c == '(')
646				/* parenthesis inside quotes and backslashes
647				 * are lost, but at&t ksh doesn't count them
648				 * either
649				 */
650				++statep->ls_sletparen.nparen;
651			goto Sbase2;
652
653		case SHEREDELIM:	/* <<,<<- delimiter */
654			/* XXX chuck this state (and the next) - use
655			 * the existing states ($ and \`..` should be
656			 * stripped of their specialness after the
657			 * fact).
658			 */
659			/* here delimiters need a special case since
660			 * $ and `..` are not to be treated specially
661			 */
662			if (c == '\\') {
663				c = getsc();
664				if (c) { /* trailing \ is lost */
665					*wp++ = QCHAR;
666					*wp++ = c;
667				}
668			} else if (c == '\'') {
669				PUSH_STATE(SSQUOTE);
670				*wp++ = OQUOTE;
671				ignore_backslash_newline++;
672			} else if (c == '"') {
673				state = statep->ls_state = SHEREDQUOTE;
674				*wp++ = OQUOTE;
675			} else {
676				*wp++ = CHAR;
677				*wp++ = c;
678			}
679			break;
680
681		case SHEREDQUOTE:	/* " in <<,<<- delimiter */
682			if (c == '"') {
683				*wp++ = CQUOTE;
684				state = statep->ls_state = SHEREDELIM;
685			} else {
686				if (c == '\\') {
687					switch (c = getsc()) {
688					case '\\': case '"':
689					case '$': case '`':
690						break;
691					default:
692						if (c) { /* trailing \ lost */
693							*wp++ = CHAR;
694							*wp++ = '\\';
695						}
696						break;
697					}
698				}
699				*wp++ = CHAR;
700				*wp++ = c;
701			}
702			break;
703
704		case SPATTERN:	/* in *(...|...) pattern (*+?@!) */
705			if ( /*(*/ c == ')') {
706				*wp++ = CPAT;
707				POP_STATE();
708			} else if (c == '|') {
709				*wp++ = SPAT;
710			} else if (c == '(') {
711				*wp++ = OPAT;
712				*wp++ = ' ';	/* simile for @ */
713				PUSH_STATE(SPATTERN);
714			} else
715				goto Sbase1;
716			break;
717		}
718	}
719Done:
720	Xcheck(ws, wp);
721	if (statep != &states[1])
722		/* XXX figure out what is missing */
723		yyerror("no closing quote\n");
724
725	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
726	if (state == SHEREDELIM)
727		state = SBASE;
728
729	dp = Xstring(ws, wp);
730	if ((c == '<' || c == '>') && state == SBASE &&
731	    ((c2 = Xlength(ws, wp)) == 0 ||
732	    (c2 == 2 && dp[0] == CHAR && digit(dp[1])))) {
733		struct ioword *iop = alloc(sizeof(*iop), ATEMP);
734
735		if (c2 == 2)
736			iop->unit = dp[1] - '0';
737		else
738			iop->unit = c == '>'; /* 0 for <, 1 for > */
739
740		c2 = getsc();
741		/* <<, >>, <> are ok, >< is not */
742		if (c == c2 || (c == '<' && c2 == '>')) {
743			iop->flag = c == c2 ?
744			    (c == '>' ? IOCAT : IOHERE) : IORDWR;
745			if (iop->flag == IOHERE) {
746				if ((c2 = getsc()) == '-')
747					iop->flag |= IOSKIP;
748				else
749					ungetsc(c2);
750			}
751		} else if (c2 == '&')
752			iop->flag = IODUP | (c == '<' ? IORDUP : 0);
753		else {
754			iop->flag = c == '>' ? IOWRITE : IOREAD;
755			if (c == '>' && c2 == '|')
756				iop->flag |= IOCLOB;
757			else
758				ungetsc(c2);
759		}
760
761		iop->name = NULL;
762		iop->delim = NULL;
763		iop->heredoc = NULL;
764		Xfree(ws, wp);	/* free word */
765		yylval.iop = iop;
766		return REDIR;
767	}
768
769	if (wp == dp && state == SBASE) {
770		Xfree(ws, wp);	/* free word */
771		/* no word, process LEX1 character */
772		switch (c) {
773		default:
774			return c;
775
776		case '|':
777		case '&':
778		case ';':
779			if ((c2 = getsc()) == c)
780				c = (c == ';') ? BREAK :
781				    (c == '|') ? LOGOR :
782				    (c == '&') ? LOGAND :
783				    YYERRCODE;
784			else if (c == '|' && c2 == '&')
785				c = COPROC;
786			else
787				ungetsc(c2);
788			return c;
789
790		case '\n':
791			gethere();
792			if (cf & CONTIN)
793				goto Again;
794			return c;
795
796		case '(':  /*)*/
797			if (!Flag(FSH)) {
798				if ((c2 = getsc()) == '(') /*)*/
799					/* XXX need to handle ((...); (...)) */
800					c = MDPAREN;
801				else
802					ungetsc(c2);
803			}
804			return c;
805		  /*(*/
806		case ')':
807			return c;
808		}
809	}
810
811	*wp++ = EOS;		/* terminate word */
812	yylval.cp = Xclose(ws, wp);
813	if (state == SWORD || state == SLETPAREN)	/* ONEWORD? */
814		return LWORD;
815	ungetsc(c);		/* unget terminator */
816
817	/* copy word to unprefixed string ident */
818	for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; )
819		*dp++ = *sp++;
820	/* Make sure the ident array stays '\0' padded */
821	memset(dp, 0, (ident+IDENT) - dp + 1);
822	if (c != EOS)
823		*ident = '\0';	/* word is not unquoted */
824
825	if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
826		struct tbl *p;
827		int h = hash(ident);
828
829		/* { */
830		if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
831		    (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}')) {
832			afree(yylval.cp, ATEMP);
833			return p->val.i;
834		}
835		if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
836		    (p->flag & ISSET)) {
837			Source *s;
838
839			for (s = source; s->type == SALIAS; s = s->next)
840				if (s->u.tblp == p)
841					return LWORD;
842			/* push alias expansion */
843			s = pushs(SALIAS, source->areap);
844			s->start = s->str = p->val.s;
845			s->u.tblp = p;
846			s->next = source;
847			source = s;
848			afree(yylval.cp, ATEMP);
849			goto Again;
850		}
851	}
852
853	return LWORD;
854}
855
856static void
857gethere(void)
858{
859	struct ioword **p;
860
861	for (p = heres; p < herep; p++)
862		readhere(*p);
863	herep = heres;
864}
865
866/*
867 * read "<<word" text into temp file
868 */
869
870static void
871readhere(struct ioword *iop)
872{
873	int c;
874	char *volatile eof;
875	char *eofp;
876	int skiptabs;
877	XString xs;
878	char *xp;
879	int xpos;
880
881	eof = evalstr(iop->delim, 0);
882
883	if (!(iop->flag & IOEVAL))
884		ignore_backslash_newline++;
885
886	Xinit(xs, xp, 256, ATEMP);
887
888	for (;;) {
889		eofp = eof;
890		skiptabs = iop->flag & IOSKIP;
891		xpos = Xsavepos(xs, xp);
892		while ((c = getsc()) != 0) {
893			if (skiptabs) {
894				if (c == '\t')
895					continue;
896				skiptabs = 0;
897			}
898			if (c != *eofp)
899				break;
900			Xcheck(xs, xp);
901			Xput(xs, xp, c);
902			eofp++;
903		}
904		/* Allow EOF here so commands with out trailing newlines
905		 * will work (eg, ksh -c '...', $(...), etc).
906		 */
907		if (*eofp == '\0' && (c == 0 || c == '\n')) {
908			xp = Xrestpos(xs, xp, xpos);
909			break;
910		}
911		ungetsc(c);
912		while ((c = getsc()) != '\n') {
913			if (c == 0)
914				yyerror("here document `%s' unclosed\n", eof);
915			Xcheck(xs, xp);
916			Xput(xs, xp, c);
917		}
918		Xcheck(xs, xp);
919		Xput(xs, xp, c);
920	}
921	Xput(xs, xp, '\0');
922	iop->heredoc = Xclose(xs, xp);
923
924	if (!(iop->flag & IOEVAL))
925		ignore_backslash_newline--;
926}
927
928void
929yyerror(const char *fmt, ...)
930{
931	va_list va;
932
933	/* pop aliases and re-reads */
934	while (source->type == SALIAS || source->type == SREREAD)
935		source = source->next;
936	source->str = null;	/* zap pending input */
937
938	error_prefix(true);
939	va_start(va, fmt);
940	shf_vfprintf(shl_out, fmt, va);
941	va_end(va);
942	errorf(NULL);
943}
944
945/*
946 * input for yylex with alias expansion
947 */
948
949Source *
950pushs(int type, Area *areap)
951{
952	Source *s;
953
954	s = alloc(sizeof(Source), areap);
955	s->type = type;
956	s->str = null;
957	s->start = NULL;
958	s->line = 0;
959	s->cmd_offset = 0;
960	s->errline = 0;
961	s->file = NULL;
962	s->flags = 0;
963	s->next = NULL;
964	s->areap = areap;
965	if (type == SFILE || type == SSTDIN) {
966		char *dummy;
967		Xinit(s->xs, dummy, 256, s->areap);
968	} else
969		memset(&s->xs, 0, sizeof(s->xs));
970	return s;
971}
972
973static int
974getsc__(void)
975{
976	Source *s = source;
977	int c;
978
979	while ((c = *s->str++) == 0) {
980		s->str = NULL;		/* return 0 for EOF by default */
981		switch (s->type) {
982		case SEOF:
983			s->str = null;
984			return 0;
985
986		case SSTDIN:
987		case SFILE:
988			getsc_line(s);
989			break;
990
991		case SWSTR:
992			break;
993
994		case SSTRING:
995			break;
996
997		case SWORDS:
998			s->start = s->str = *s->u.strv++;
999			s->type = SWORDSEP;
1000			break;
1001
1002		case SWORDSEP:
1003			if (*s->u.strv == NULL) {
1004				s->start = s->str = "\n";
1005				s->type = SEOF;
1006			} else {
1007				s->start = s->str = " ";
1008				s->type = SWORDS;
1009			}
1010			break;
1011
1012		case SALIAS:
1013			if (s->flags & SF_ALIASEND) {
1014				/* pass on an unused SF_ALIAS flag */
1015				source = s->next;
1016				source->flags |= s->flags & SF_ALIAS;
1017				s = source;
1018			} else if (*s->u.tblp->val.s &&
1019			    isspace((unsigned char)strchr(s->u.tblp->val.s, 0)[-1])) {
1020				source = s = s->next;	/* pop source stack */
1021				/* Note that this alias ended with a space,
1022				 * enabling alias expansion on the following
1023				 * word.
1024				 */
1025				s->flags |= SF_ALIAS;
1026			} else {
1027				/* At this point, we need to keep the current
1028				 * alias in the source list so recursive
1029				 * aliases can be detected and we also need
1030				 * to return the next character.  Do this
1031				 * by temporarily popping the alias to get
1032				 * the next character and then put it back
1033				 * in the source list with the SF_ALIASEND
1034				 * flag set.
1035				 */
1036				source = s->next;	/* pop source stack */
1037				source->flags |= s->flags & SF_ALIAS;
1038				c = getsc__();
1039				if (c) {
1040					s->flags |= SF_ALIASEND;
1041					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1042					s->start = s->str = s->ugbuf;
1043					s->next = source;
1044					source = s;
1045				} else {
1046					s = source;
1047					/* avoid reading eof twice */
1048					s->str = NULL;
1049					break;
1050				}
1051			}
1052			continue;
1053
1054		case SREREAD:
1055			if (s->start != s->ugbuf) /* yuck */
1056				afree(s->u.freeme, ATEMP);
1057			source = s = s->next;
1058			continue;
1059		}
1060		if (s->str == NULL) {
1061			s->type = SEOF;
1062			s->start = s->str = null;
1063			return '\0';
1064		}
1065		if (s->flags & SF_ECHO) {
1066			shf_puts(s->str, shl_out);
1067			shf_flush(shl_out);
1068		}
1069	}
1070	return c;
1071}
1072
1073static void
1074getsc_line(Source *s)
1075{
1076	char *xp = Xstring(s->xs, xp);
1077	int interactive = Flag(FTALKING) && s->type == SSTDIN;
1078	int have_tty = interactive && (s->flags & SF_TTY);
1079
1080	/* Done here to ensure nothing odd happens when a timeout occurs */
1081	XcheckN(s->xs, xp, LINE);
1082	*xp = '\0';
1083	s->start = s->str = xp;
1084
1085	if (have_tty && ksh_tmout) {
1086		ksh_tmout_state = TMOUT_READING;
1087		alarm(ksh_tmout);
1088	}
1089	if (have_tty && (0
1090#ifdef VI
1091	    || Flag(FVI)
1092#endif /* VI */
1093#ifdef EMACS
1094	    || Flag(FEMACS) || Flag(FGMACS)
1095#endif /* EMACS */
1096	    )) {
1097		int nread;
1098
1099		nread = x_read(xp, LINE);
1100		if (nread < 0)	/* read error */
1101			nread = 0;
1102		xp[nread] = '\0';
1103		xp += nread;
1104	} else {
1105		if (interactive) {
1106			pprompt(prompt, 0);
1107		} else
1108			s->line++;
1109
1110		while (1) {
1111			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1112
1113			if (!p && shf_error(s->u.shf) &&
1114			    s->u.shf->errno_ == EINTR) {
1115				shf_clearerr(s->u.shf);
1116				if (trap)
1117					runtraps(0);
1118				continue;
1119			}
1120			if (!p || (xp = p, xp[-1] == '\n'))
1121				break;
1122			/* double buffer size */
1123			xp++; /* move past null so doubling works... */
1124			XcheckN(s->xs, xp, Xlength(s->xs, xp));
1125			xp--; /* ...and move back again */
1126		}
1127		/* flush any unwanted input so other programs/builtins
1128		 * can read it.  Not very optimal, but less error prone
1129		 * than flushing else where, dealing with redirections,
1130		 * etc..
1131		 * todo: reduce size of shf buffer (~128?) if SSTDIN
1132		 */
1133		if (s->type == SSTDIN)
1134			shf_flush(s->u.shf);
1135	}
1136	/* XXX: temporary kludge to restore source after a
1137	 * trap may have been executed.
1138	 */
1139	source = s;
1140	if (have_tty && ksh_tmout) {
1141		ksh_tmout_state = TMOUT_EXECUTING;
1142		alarm(0);
1143	}
1144	s->start = s->str = Xstring(s->xs, xp);
1145	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1146	/* Note: if input is all nulls, this is not eof */
1147	if (Xlength(s->xs, xp) == 0) { /* EOF */
1148		if (s->type == SFILE)
1149			shf_fdclose(s->u.shf);
1150		s->str = NULL;
1151	} else if (interactive) {
1152		char *p = Xstring(s->xs, xp);
1153		if (cur_prompt == PS1)
1154			while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS))
1155				p++;
1156		if (*p) {
1157			s->line++;
1158			histsave(s->line, s->str, 1);
1159		}
1160		/* Set term title */
1161		char *d = str_val(global("DISPLAY"));
1162		char str[41];
1163		memset(str, '\0', 41);
1164		for (int i = 0; i < 40; i++) {
1165			str[i] = p[i];
1166			if ((str[i] > 0x7e) || (str[i] < 0x20)) {
1167				str[i] = '\0';
1168				break;
1169			}
1170		}
1171		if (d[0])
1172			shellf("%c]0;%s%c", '\033', str, '\007');
1173	}
1174	if (interactive)
1175		set_prompt(PS2);
1176}
1177
1178static char *
1179special_prompt_expand(char *str)
1180{
1181	char *p = str;
1182
1183	while ((p = strstr(p, "\\$")) != NULL) {
1184		*(p+1) = 'p';
1185	}
1186	return str;
1187}
1188
1189void
1190set_prompt(int to)
1191{
1192	char *ps1;
1193	Area *saved_atemp;
1194
1195	cur_prompt = to;
1196
1197	switch (to) {
1198	case PS1: /* command */
1199		ps1 = str_save(str_val(global("PS1")), ATEMP);
1200		saved_atemp = ATEMP;	/* ps1 is freed by substitute() */
1201		newenv(E_ERRH);
1202		if (sigsetjmp(genv->jbuf, 0)) {
1203			prompt = safe_prompt;
1204			/* Don't print an error - assume it has already
1205			 * been printed.  Reason is we may have forked
1206			 * to run a command and the child may be
1207			 * unwinding its stack through this code as it
1208			 * exits.
1209			 */
1210		} else {
1211			/* expand \$ before other substitutions are done */
1212			char *tmp = special_prompt_expand(ps1);
1213			prompt = str_save(substitute(tmp, 0), saved_atemp);
1214		}
1215		quitenv(NULL);
1216		break;
1217	case PS2: /* command continuation */
1218		prompt = str_val(global("PS2"));
1219		break;
1220	}
1221}
1222
1223static int
1224dopprompt(const char *sp, int ntruncate, const char **spp, int doprint)
1225{
1226	char strbuf[1024], tmpbuf[1024], *p, *str, nbuf[32], delimiter = '\0';
1227	int len, c, n, totlen = 0, indelimit = 0, counting = 1, delimitthis;
1228	const char *cp = sp;
1229	struct tm *tm;
1230	time_t t;
1231
1232	if (*cp && cp[1] == '\r') {
1233		delimiter = *cp;
1234		cp += 2;
1235	}
1236
1237	while (*cp != 0) {
1238		delimitthis = 0;
1239		if (indelimit && *cp != delimiter)
1240			;
1241		else if (*cp == '\n' || *cp == '\r') {
1242			totlen = 0;
1243			sp = cp + 1;
1244		} else if (*cp == '\t') {
1245			if (counting)
1246				totlen = (totlen | 7) + 1;
1247		} else if (*cp == delimiter) {
1248			indelimit = !indelimit;
1249			delimitthis = 1;
1250		}
1251
1252		if (*cp == '\\') {
1253			cp++;
1254			if (!*cp)
1255				break;
1256			/* Expand \h and \$ for both, sh(1) and ksh(1) */
1257			if (Flag(FSH) && !(*cp == 'h' || *cp == 'p'))
1258				snprintf(strbuf, sizeof strbuf, "\\%c", *cp);
1259			else switch (*cp) {
1260			case 'a':	/* '\' 'a' bell */
1261				strbuf[0] = '\007';
1262				strbuf[1] = '\0';
1263				break;
1264			case 'd':	/* '\' 'd' Dow Mon DD */
1265				time(&t);
1266				tm = localtime(&t);
1267				strftime(strbuf, sizeof strbuf, "%a %b %d", tm);
1268				break;
1269			case 'D': /* '\' 'D' '{' strftime format '}' */
1270				p = strchr(cp + 2, '}');
1271				if (cp[1] != '{' || p == NULL) {
1272					snprintf(strbuf, sizeof strbuf,
1273					    "\\%c", *cp);
1274					break;
1275				}
1276				strlcpy(tmpbuf, cp + 2, sizeof tmpbuf);
1277				p = strchr(tmpbuf, '}');
1278				if (p)
1279					*p = '\0';
1280				time(&t);
1281				tm = localtime(&t);
1282				strftime(strbuf, sizeof strbuf, tmpbuf, tm);
1283				cp = strchr(cp + 2, '}');
1284				break;
1285			case 'e':	/* '\' 'e' escape */
1286				strbuf[0] = '\033';
1287				strbuf[1] = '\0';
1288				break;
1289			case 'h':	/* '\' 'h' shortened hostname */
1290				gethostname(strbuf, sizeof strbuf);
1291				p = strchr(strbuf, '.');
1292				if (p)
1293					*p = '\0';
1294				break;
1295			case 'H':	/* '\' 'H' full hostname */
1296				gethostname(strbuf, sizeof strbuf);
1297				break;
1298			case 'j':	/* '\' 'j' number of jobs */
1299				snprintf(strbuf, sizeof strbuf, "%d",
1300				    j_njobs());
1301				break;
1302			case 'l':	/* '\' 'l' basename of tty */
1303				p = ttyname(0);
1304				if (p)
1305					p = basename(p);
1306				if (p)
1307					strlcpy(strbuf, p, sizeof strbuf);
1308				break;
1309			case 'n':	/* '\' 'n' newline */
1310				strbuf[0] = '\n';
1311				strbuf[1] = '\0';
1312				totlen = 0;	/* reset for prompt re-print */
1313				sp = cp + 1;
1314				break;
1315			case 'p':	/* '\' '$' $ or # */
1316				strbuf[0] = ksheuid ? '$' : '#';
1317				strbuf[1] = '\0';
1318				break;
1319			case 'r':	/* '\' 'r' return */
1320				strbuf[0] = '\r';
1321				strbuf[1] = '\0';
1322				totlen = 0;	/* reset for prompt re-print */
1323				sp = cp + 1;
1324				break;
1325			case 's':	/* '\' 's' basename $0 */
1326				strlcpy(strbuf, kshname, sizeof strbuf);
1327				break;
1328			case 't':	/* '\' 't' 24 hour HH:MM:SS */
1329				time(&t);
1330				tm = localtime(&t);
1331				strftime(strbuf, sizeof strbuf, "%T", tm);
1332				break;
1333			case 'T':	/* '\' 'T' 12 hour HH:MM:SS */
1334				time(&t);
1335				tm = localtime(&t);
1336				strftime(strbuf, sizeof strbuf, "%l:%M:%S", tm);
1337				break;
1338			case '@':	/* '\' '@' 12 hour am/pm format */
1339				time(&t);
1340				tm = localtime(&t);
1341				strftime(strbuf, sizeof strbuf, "%r", tm);
1342				break;
1343			case 'A':	/* '\' 'A' 24 hour HH:MM */
1344				time(&t);
1345				tm = localtime(&t);
1346				strftime(strbuf, sizeof strbuf, "%R", tm);
1347				break;
1348			case 'u':	/* '\' 'u' username */
1349				strlcpy(strbuf, username, sizeof strbuf);
1350				break;
1351#ifndef SMALL
1352			case 'v':	/* '\' 'v' version (short) */
1353				p = strchr(ksh_version, ' ');
1354				if (p)
1355					p = strchr(p + 1, ' ');
1356				if (p) {
1357					p++;
1358					strlcpy(strbuf, p, sizeof strbuf);
1359					p = strchr(strbuf, ' ');
1360					if (p)
1361						*p = '\0';
1362				}
1363				break;
1364			case 'V':	/* '\' 'V' version (long) */
1365				strlcpy(strbuf, ksh_version, sizeof strbuf);
1366				break;
1367#endif /* SMALL */
1368			case 'w':	/* '\' 'w' cwd */
1369				p = str_val(global("PWD"));
1370				n = strlen(str_val(global("HOME")));
1371				if (strcmp(p, "/") == 0) {
1372					strlcpy(strbuf, p, sizeof strbuf);
1373				} else if (strcmp(p, str_val(global("HOME"))) == 0) {
1374					strbuf[0] = '~';
1375					strbuf[1] = '\0';
1376				} else if (strncmp(p, str_val(global("HOME")), n)
1377				    == 0 && p[n] == '/') {
1378					snprintf(strbuf, sizeof strbuf, "~/%s",
1379					    str_val(global("PWD")) + n + 1);
1380				} else
1381					strlcpy(strbuf, p, sizeof strbuf);
1382				break;
1383			case 'W':	/* '\' 'W' basename(cwd) */
1384				p = str_val(global("PWD"));
1385				if (strcmp(p, str_val(global("HOME"))) == 0) {
1386					strbuf[0] = '~';
1387					strbuf[1] = '\0';
1388				} else
1389					strlcpy(strbuf, basename(p), sizeof strbuf);
1390				break;
1391			case '!':	/* '\' '!' history line number */
1392				snprintf(strbuf, sizeof strbuf, "%d",
1393				    source->line + 1);
1394				break;
1395			case '#':	/* '\' '#' command line number */
1396				snprintf(strbuf, sizeof strbuf, "%d",
1397				    source->line - source->cmd_offset + 1);
1398				break;
1399			case '0':	/* '\' '#' '#' ' #' octal numeric handling */
1400			case '1':
1401			case '2':
1402			case '3':
1403			case '4':
1404			case '5':
1405			case '6':
1406			case '7':
1407				if ((cp[1] > '7' || cp[1] < '0') ||
1408				    (cp[2] > '7' || cp[2] < '0')) {
1409					snprintf(strbuf, sizeof strbuf,
1410					    "\\%c", *cp);
1411					break;
1412				}
1413				n = (cp[0] - '0') * 8 * 8 + (cp[1] - '0') * 8 +
1414				    (cp[2] - '0');
1415				snprintf(strbuf, sizeof strbuf, "%c", n);
1416				cp += 2;
1417				break;
1418			case '\\':	/* '\' '\' */
1419				strbuf[0] = '\\';
1420				strbuf[1] = '\0';
1421				break;
1422			case '[': /* '\' '[' .... stop counting */
1423				strbuf[0] = '\0';
1424				counting = 0;
1425				break;
1426			case ']': /* '\' ']' restart counting */
1427				strbuf[0] = '\0';
1428				counting = 1;
1429				break;
1430
1431			default:
1432				snprintf(strbuf, sizeof strbuf, "\\%c", *cp);
1433				break;
1434			}
1435			cp++;
1436
1437			str = strbuf;
1438			len = strlen(str);
1439			if (ntruncate) {
1440				if (ntruncate >= len) {
1441					ntruncate -= len;
1442					continue;
1443				}
1444				str += ntruncate;
1445				len -= ntruncate;
1446				ntruncate = 0;
1447			}
1448			if (doprint)
1449				shf_write(str, len, shl_out);
1450			if (counting && !indelimit && !delimitthis)
1451				totlen += len;
1452			continue;
1453		} else if (*cp != '!')
1454			c = *cp++;
1455		else if (*++cp == '!')
1456			c = *cp++;
1457		else {
1458			shf_snprintf(p = nbuf, sizeof(nbuf), "%d",
1459			    source->line + 1);
1460			len = strlen(nbuf);
1461			if (ntruncate) {
1462				if (ntruncate >= len) {
1463					ntruncate -= len;
1464					continue;
1465				}
1466				p += ntruncate;
1467				len -= ntruncate;
1468				ntruncate = 0;
1469			}
1470			if (doprint)
1471				shf_write(p, len, shl_out);
1472			if (counting && !indelimit && !delimitthis)
1473				totlen += len;
1474			continue;
1475		}
1476		if (counting && ntruncate)
1477			--ntruncate;
1478		else if (doprint) {
1479			shf_putc(c, shl_out);
1480		}
1481		if (counting && !indelimit && !delimitthis)
1482			totlen++;
1483	}
1484	if (doprint)
1485		shf_flush(shl_out);
1486	if (spp)
1487		*spp = sp;
1488	return (totlen);
1489}
1490
1491void
1492pprompt(const char *cp, int ntruncate)
1493{
1494	dopprompt(cp, ntruncate, NULL, 1);
1495}
1496
1497int
1498promptlen(const char *cp, const char **spp)
1499{
1500	return dopprompt(cp, 0, spp, 0);
1501}
1502
1503/* Read the variable part of a ${...} expression (ie, up to but not including
1504 * the :[-+?=#%] or close-brace.
1505 */
1506static char *
1507get_brace_var(XString *wsp, char *wp)
1508{
1509	enum parse_state {
1510			   PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1511			   PS_NUMBER, PS_VAR1, PS_END
1512			 }
1513		state;
1514	char c;
1515
1516	state = PS_INITIAL;
1517	while (1) {
1518		c = getsc();
1519		/* State machine to figure out where the variable part ends. */
1520		switch (state) {
1521		case PS_INITIAL:
1522			if (c == '#') {
1523				state = PS_SAW_HASH;
1524				break;
1525			}
1526			/* FALLTHROUGH */
1527		case PS_SAW_HASH:
1528			if (letter(c))
1529				state = PS_IDENT;
1530			else if (digit(c))
1531				state = PS_NUMBER;
1532			else if (ctype(c, C_VAR1))
1533				state = PS_VAR1;
1534			else
1535				state = PS_END;
1536			break;
1537		case PS_IDENT:
1538			if (!letnum(c)) {
1539				state = PS_END;
1540				if (c == '[') {
1541					char *tmp, *p;
1542
1543					if (!arraysub(&tmp))
1544						yyerror("missing ]\n");
1545					*wp++ = c;
1546					for (p = tmp; *p; ) {
1547						Xcheck(*wsp, wp);
1548						*wp++ = *p++;
1549					}
1550					afree(tmp, ATEMP);
1551					c = getsc(); /* the ] */
1552				}
1553			}
1554			break;
1555		case PS_NUMBER:
1556			if (!digit(c))
1557				state = PS_END;
1558			break;
1559		case PS_VAR1:
1560			state = PS_END;
1561			break;
1562		case PS_END: /* keep gcc happy */
1563			break;
1564		}
1565		if (state == PS_END) {
1566			*wp++ = '\0';	/* end of variable part */
1567			ungetsc(c);
1568			break;
1569		}
1570		Xcheck(*wsp, wp);
1571		*wp++ = c;
1572	}
1573	return wp;
1574}
1575
1576/*
1577 * Save an array subscript - returns true if matching bracket found, false
1578 * if eof or newline was found.
1579 * (Returned string double null terminated)
1580 */
1581static int
1582arraysub(char **strp)
1583{
1584	XString ws;
1585	char	*wp;
1586	char	c;
1587	int	depth = 1;	/* we are just past the initial [ */
1588
1589	Xinit(ws, wp, 32, ATEMP);
1590
1591	do {
1592		c = getsc();
1593		Xcheck(ws, wp);
1594		*wp++ = c;
1595		if (c == '[')
1596			depth++;
1597		else if (c == ']')
1598			depth--;
1599	} while (depth > 0 && c && c != '\n');
1600
1601	*wp++ = '\0';
1602	*strp = Xclose(ws, wp);
1603
1604	return depth == 0 ? 1 : 0;
1605}
1606
1607/* Unget a char: handles case when we are already at the start of the buffer */
1608static const char *
1609ungetsc(int c)
1610{
1611	if (backslash_skip)
1612		backslash_skip--;
1613	/* Don't unget eof... */
1614	if (source->str == null && c == '\0')
1615		return source->str;
1616	if (source->str > source->start)
1617		source->str--;
1618	else {
1619		Source *s;
1620
1621		s = pushs(SREREAD, source->areap);
1622		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1623		s->start = s->str = s->ugbuf;
1624		s->next = source;
1625		source = s;
1626	}
1627	return source->str;
1628}
1629
1630
1631/* Called to get a char that isn't a \newline sequence. */
1632static int
1633getsc_bn(void)
1634{
1635	int c, c2;
1636
1637	if (ignore_backslash_newline)
1638		return getsc_();
1639
1640	if (backslash_skip == 1) {
1641		backslash_skip = 2;
1642		return getsc_();
1643	}
1644
1645	backslash_skip = 0;
1646
1647	while (1) {
1648		c = getsc_();
1649		if (c == '\\') {
1650			if ((c2 = getsc_()) == '\n')
1651				/* ignore the \newline; get the next char... */
1652				continue;
1653			ungetsc(c2);
1654			backslash_skip = 1;
1655		}
1656		return c;
1657	}
1658}
1659
1660static Lex_state *
1661push_state_(State_info *si, Lex_state *old_end)
1662{
1663	Lex_state *new = areallocarray(NULL, STATE_BSIZE,
1664	    sizeof(Lex_state), ATEMP);
1665
1666	new[0].ls_info.base = old_end;
1667	si->base = &new[0];
1668	si->end = &new[STATE_BSIZE];
1669	return &new[1];
1670}
1671
1672static Lex_state *
1673pop_state_(State_info *si, Lex_state *old_end)
1674{
1675	Lex_state *old_base = si->base;
1676
1677	si->base = old_end->ls_info.base - STATE_BSIZE;
1678	si->end = old_end->ls_info.base;
1679
1680	afree(old_base, ATEMP);
1681
1682	return si->base + STATE_BSIZE - 1;
1683}