• R/O
  • SSH
  • HTTPS

yash: Commit


Commit MetaInfo

Revision3873 (tree)
Time2018-09-19 00:55:14
Authormagicant

Log Message

Rewrite parser functions (token-based parsing)

One test case is set to be skipped as it reveals an existing bug.

Change Summary

Incremental Difference

--- yash/branches/token_based_parser/parser.c (revision 3872)
+++ yash/branches/token_based_parser/parser.c (revision 3873)
@@ -807,6 +807,7 @@
807807 }
808808
809809 /* Rewind `ps->index` to `oldindex' and decrease `ps->info->lineno' accordingly.
810+ * Note that `ps->next_index' is not updated in this function.
810811 *
811812 * You MUST use this function when rewinding the index in order to correctly
812813 * rewind the line number. The following pattern of code does not work because
@@ -1182,7 +1183,8 @@
11821183 return NULL;
11831184 }
11841185
1185-/* Performs alias substitution with the given parse state. */
1186+/* Performs alias substitution with the given parse state. Proceeds to the
1187+ * next token if substitution occurred. */
11861188 bool psubstitute_alias(parsestate_T *ps, substaliasflags_T flags)
11871189 {
11881190 if (!ps->enable_alias)
@@ -1189,8 +1191,14 @@
11891191 return false;
11901192
11911193 size_t len = count_name_length(ps, is_alias_name_char);
1192- return substitute_alias_range(
1194+ bool substituted = substitute_alias_range(
11931195 &ps->src, ps->index, ps->index + len, &ps->aliases, flags);
1196+ if (substituted) {
1197+ /* parse the result of the substitution. */
1198+ ps->next_index = ps->index;
1199+ next_token(ps);
1200+ }
1201+ return substituted;
11941202 }
11951203
11961204 /* Performs alias substitution recursively. This should not be used where the
@@ -1198,8 +1206,7 @@
11981206 * be alias-substituted. */
11991207 void psubstitute_alias_recursive(parsestate_T *ps, substaliasflags_T flags)
12001208 {
1201- while (psubstitute_alias(ps, flags))
1202- skip_blanks_and_comment(ps);
1209+ while (psubstitute_alias(ps, flags)) ;
12031210 }
12041211
12051212 /* Parses commands.
@@ -1216,18 +1223,26 @@
12161223
12171224 if (!toeol && !ps->info->interactive)
12181225 ps->error = false;
1226+ if (ps->tokentype == TT_UNKNOWN)
1227+ next_token(ps);
1228+
12191229 while (!ps->error) {
12201230 if (toeol) {
1221- skip_blanks_and_comment(ps);
1222- if (ps->src.contents[ps->index] == L'\n') {
1231+ if (ps->tokentype == TT_NEWLINE) {
12231232 next_line(ps);
12241233 need_separator = false;
1225- if (ps->src.contents[ps->index] != L'\0')
1234+ if (ps->next_index != ps->src.length) {
1235+ next_token(ps);
12261236 continue;
1237+ }
1238+ wordfree(ps->token);
1239+ ps->token = NULL;
1240+ ps->index = ps->next_index;
1241+ ps->tokentype = TT_END_OF_INPUT;
12271242 }
1228- if (ps->src.contents[ps->index] == L'\0') {
1243+ if (ps->tokentype == TT_END_OF_INPUT) {
12291244 break;
1230- } else if (ps->src.contents[ps->index] == L')') {
1245+ } else if (ps->tokentype == TT_RPAREN) {
12311246 serror(ps, get_errmsg_unexpected_token(L")"), L")");
12321247 break;
12331248 } else if (need_separator) {
@@ -1235,11 +1250,10 @@
12351250 break;
12361251 }
12371252 } else {
1238- if (skip_to_next_token(ps))
1253+ if (parse_newline_list(ps))
12391254 need_separator = false;
1240- if (need_separator
1241- || ps->src.contents[ps->index] == L'\0'
1242- || check_closing_token(ps))
1255+ if (need_separator || ps->tokentype == TT_END_OF_INPUT ||
1256+ check_closing_token(ps))
12431257 break;
12441258 }
12451259
@@ -1253,13 +1267,11 @@
12531267 continue;
12541268 }
12551269
1256- need_separator = true;
1257- ensure_buffer(ps, 2);
1258- if (ps->src.contents[ps->index] == L'&'
1259- || (ps->src.contents[ps->index] == L';'
1260- && ps->src.contents[ps->index + 1] != L';')) {
1261- ps->index++;
1270+ if (ps->tokentype != TT_AMP && ps->tokentype != TT_SEMICOLON) {
1271+ need_separator = true;
1272+ } else {
12621273 need_separator = false;
1274+ next_token(ps);
12631275 }
12641276 }
12651277 if (!toeol)
@@ -1291,7 +1303,7 @@
12911303 and_or_T *result = xmalloc(sizeof *result);
12921304 result->next = NULL;
12931305 result->ao_pipelines = p;
1294- result->ao_async = (ps->src.contents[ps->index] == L'&');
1306+ result->ao_async = (ps->tokentype == TT_AMP);
12951307 return result;
12961308 }
12971309
@@ -1318,19 +1330,15 @@
13181330 break;
13191331 }
13201332
1321- ensure_buffer(ps, 2);
1322- if (ps->src.contents[ps->index] == L'&'
1323- && ps->src.contents[ps->index + 1] == L'&') {
1333+ if (ps->tokentype == TT_AMPAMP)
13241334 cond = true;
1325- } else if (ps->src.contents[ps->index] == L'|'
1326- && ps->src.contents[ps->index + 1] == L'|') {
1335+ else if (ps->tokentype == TT_PIPEPIPE)
13271336 cond = false;
1328- } else {
1337+ else
13291338 break;
1330- }
1331- ps->index += 2;
1339+ next_token(ps);
13321340 next:
1333- skip_to_next_token(ps);
1341+ parse_newline_list(ps);
13341342 }
13351343 return first;
13361344 }
@@ -1343,15 +1351,13 @@
13431351 bool neg;
13441352 command_T *c;
13451353
1346- ensure_buffer(ps, 2);
1347- if (has_token(ps, L"!")) {
1354+ if (ps->tokentype == TT_BANG) {
13481355 neg = true;
1349- ps->index += 1;
1350- if (posixly_correct && ps->src.contents[ps->index] == L'(')
1356+ if (posixly_correct && ps->src.contents[ps->next_index] == L'(')
13511357 serror(ps, Ngt("ksh-like extended glob pattern `!(...)' "
13521358 "is not supported"));
1359+ next_token(ps);
13531360 do {
1354- skip_blanks_and_comment(ps);
13551361 c = parse_commands_in_pipeline(ps);
13561362 if (ps->reparse)
13571363 assert(c == NULL);
@@ -1394,15 +1400,12 @@
13941400 break;
13951401 }
13961402
1397- ensure_buffer(ps, 2);
1398- if (ps->src.contents[ps->index] == L'|' &&
1399- ps->src.contents[ps->index + 1] != L'|') {
1400- ps->index++;
1401- } else {
1403+ if (ps->tokentype != TT_PIPE)
14021404 break;
1403- }
1405+
1406+ next_token(ps);
14041407 next:
1405- skip_to_next_token(ps);
1408+ parse_newline_list(ps);
14061409 }
14071410 return first;
14081411 }
@@ -1419,17 +1422,16 @@
14191422 if (t != NULL) {
14201423 serror(ps, get_errmsg_unexpected_token(t), t);
14211424 return NULL;
1422- } else if (has_token(ps, L"!")) {
1425+ } else if (ps->tokentype == TT_BANG) {
14231426 serror(ps, get_errmsg_unexpected_token(L"!"), L"!");
14241427 return NULL;
1425- } else if (has_token(ps, L"in")) {
1428+ } else if (ps->tokentype == TT_IN) {
14261429 serror(ps, get_errmsg_unexpected_token(L"in"), L"in");
14271430 return NULL;
1428- } else if (ps->src.contents[ps->index] == L'(') {
1431+ } else if (ps->tokentype == TT_LPAREN) {
14291432 return parse_compound_command(ps, L"(");
1430- } else if (is_command_delimiter_char(ps->src.contents[ps->index])) {
1431- if (ps->src.contents[ps->index] == L'\0' ||
1432- ps->src.contents[ps->index] == L'\n')
1433+ } else if (is_command_delimiter_tokentype(ps->tokentype)) {
1434+ if (ps->tokentype == TT_END_OF_INPUT || ps->tokentype == TT_NEWLINE)
14331435 serror(ps, Ngt("a command is missing at the end of input"));
14341436 else
14351437 serror(ps, Ngt("a command is missing before `%lc'"),
@@ -1474,8 +1476,7 @@
14741476
14751477 c->c_assigns = NULL;
14761478 c->c_redirs = NULL;
1477- while (ensure_buffer(ps, 1),
1478- !is_command_delimiter_char(ps->src.contents[ps->index])) {
1479+ while (!is_command_delimiter_tokentype(ps->tokentype)) {
14791480 if ((redir = tryparse_redirect(ps)) != NULL) {
14801481 *redirlastp = redir;
14811482 redirlastp = &redir->next;
@@ -1506,16 +1507,16 @@
15061507
15071508 assert(*redirlastp == NULL);
15081509 pl_init(&wordlist);
1509- while (ensure_buffer(ps, 1),
1510- !is_command_delimiter_char(ps->src.contents[ps->index])) {
1510+ while (!is_command_delimiter_tokentype(ps->tokentype)) {
15111511 if (!first)
15121512 psubstitute_alias_recursive(ps, 0);
15131513 if ((redir = tryparse_redirect(ps)) != NULL) {
15141514 *redirlastp = redir;
15151515 redirlastp = &redir->next;
1516- } else if ((word = parse_word(ps, false)) != NULL) {
1516+ } else if ((word = ps->token) != NULL) {
1517+ ps->token = NULL;
15171518 pl_add(&wordlist, word);
1518- skip_blanks_and_comment(ps);
1519+ next_token(ps);
15191520 first = false;
15201521 } else {
15211522 break;
@@ -1540,37 +1541,55 @@
15401541 }
15411542 }
15421543
1543-/* If there is an assignment at the current position, parses and returns it.
1544- * Otherwise, returns NULL without moving the position. */
1544+/* Re-parses the current token as an assignment word. If successful, the token
1545+ * is consumed and the assignment is returned. For an array assignment, all
1546+ * tokens up to (and including) the closing parenthesis are consumed. If
1547+ * unsuccessful, the current token is not modified and NULL is returned. */
15451548 assign_T *tryparse_assignment(parsestate_T *ps)
15461549 {
1547- if (iswdigit(ps->src.contents[ps->index]))
1550+ if (ps->token == NULL)
15481551 return NULL;
1552+ if (ps->token->wu_type != WT_STRING)
1553+ return NULL;
15491554
1550- size_t namelen = count_name_length(ps, is_name_char);
1551- if (namelen == 0 || ps->src.contents[ps->index + namelen] != L'=')
1555+ const wchar_t *nameend = skip_name(ps->token->wu_string, is_name_char);
1556+ size_t namelen = nameend - ps->token->wu_string;
1557+ if (namelen == 0 || *nameend != L'=')
15521558 return NULL;
15531559
15541560 assign_T *result = xmalloc(sizeof *result);
15551561 result->next = NULL;
1556- result->a_name = xwcsndup(&ps->src.contents[ps->index], namelen);
1557- ps->index += namelen + 1;
1562+ result->a_name = xwcsndup(ps->token->wu_string, namelen);
15581563
1559- ensure_buffer(ps, 1);
1560- if (posixly_correct || ps->src.contents[ps->index] != L'(') {
1564+ /* remove the name and '=' from the token */
1565+ size_t index_after_first_token = ps->next_index;
1566+ wordunit_T *first_token = ps->token;
1567+ ps->token = NULL;
1568+ wmemmove(first_token->wu_string, &nameend[1], wcslen(&nameend[1]) + 1);
1569+ if (first_token->wu_string[0] == L'\0') {
1570+ wordunit_T *wu = first_token->next;
1571+ wordunitfree(first_token);
1572+ first_token = wu;
1573+ }
1574+
1575+ next_token(ps);
1576+
1577+ if (posixly_correct || first_token != NULL ||
1578+ ps->index != index_after_first_token ||
1579+ ps->tokentype != TT_LPAREN) {
1580+ /* scalar assignment */
15611581 result->a_type = A_SCALAR;
1562- result->a_scalar = parse_word(ps, false);
1582+ result->a_scalar = first_token;
15631583 } else {
1564- ps->index++;
1565- skip_to_next_token(ps);
1584+ /* array assignment */
1585+ next_token(ps);
15661586 result->a_type = A_ARRAY;
15671587 result->a_array = parse_words_to_paren(ps);
1568- if (ps->src.contents[ps->index] == L')')
1569- ps->index++;
1588+ if (ps->tokentype == TT_RPAREN)
1589+ next_token(ps);
15701590 else
15711591 serror(ps, Ngt("`%ls' is missing"), L")");
15721592 }
1573- skip_blanks_and_comment(ps);
15741593 return result;
15751594 }
15761595
@@ -1582,13 +1601,16 @@
15821601 plist_T list;
15831602
15841603 pl_init(&list);
1585- while (ps->src.contents[ps->index] != L')') {
1586- wordunit_T *word = parse_word(ps, true);
1587- if (word != NULL)
1588- pl_add(&list, word);
1589- else
1604+ while (psubstitute_alias_recursive(ps, 0), ps->tokentype != TT_RPAREN) {
1605+ if (ps->tokentype == TT_NEWLINE) {
1606+ next_line(ps);
1607+ next_token(ps);
1608+ continue;
1609+ }
1610+ if (ps->token == NULL)
15901611 break;
1591- skip_to_next_token(ps);
1612+ pl_add(&list, ps->token), ps->token = NULL;
1613+ next_token(ps);
15921614 }
15931615 return pl_toary(&list);
15941616 }
@@ -1599,25 +1621,21 @@
15991621 {
16001622 int fd;
16011623
1602- ensure_buffer(ps, 2);
1603- if (iswdigit(ps->src.contents[ps->index])) {
1624+ if (ps->tokentype == TT_IO_NUMBER) {
16041625 unsigned long lfd;
16051626 wchar_t *endptr;
16061627
1607-reparse:
1628+ assert(ps->token != NULL);
1629+ assert(ps->token->wu_type == WT_STRING);
1630+ assert(ps->token->next == NULL);
16081631 errno = 0;
1609- lfd = wcstoul(&ps->src.contents[ps->index], &endptr, 10);
1632+ lfd = wcstoul(ps->token->wu_string, &endptr, 10);
16101633 if (errno != 0 || lfd > INT_MAX)
16111634 fd = -1; /* invalid fd */
16121635 else
16131636 fd = (int) lfd;
1614- if (endptr[0] == L'\\' && endptr[1] == L'\n') {
1615- line_continuation(ps, endptr - ps->src.contents);
1616- goto reparse;
1617- } else if (endptr[0] != L'<' && endptr[0] != L'>') {
1618- return NULL;
1619- }
1620- ps->index = endptr - ps->src.contents;
1637+ assert(*endptr == L'\0');
1638+ next_token(ps);
16211639 } else if (ps->src.contents[ps->index] == L'<') {
16221640 fd = STDIN_FILENO;
16231641 } else if (ps->src.contents[ps->index] == L'>') {
@@ -1629,97 +1647,87 @@
16291647 redir_T *result = xmalloc(sizeof *result);
16301648 result->next = NULL;
16311649 result->rd_fd = fd;
1632- ensure_buffer(ps, 3);
1633- switch (ps->src.contents[ps->index]) {
1634- case L'<':
1635- switch (ps->src.contents[ps->index + 1]) {
1636- case L'<':
1637- if (ps->src.contents[ps->index + 2] == L'-') {
1638- result->rd_type = RT_HERERT;
1639- ps->index += 3;
1640- } else if (!posixly_correct &&
1641- ps->src.contents[ps->index + 2] == L'<') {
1642- result->rd_type = RT_HERESTR;
1643- ps->index += 3;
1644- } else {
1645- result->rd_type = RT_HERE;
1646- ps->index += 2;
1647- }
1650+ switch (ps->tokentype) {
1651+ case TT_LESS:
1652+ result->rd_type = RT_INPUT;
16481653 break;
1649- case L'(':
1650- if (!posixly_correct) {
1651- result->rd_type = RT_PROCIN;
1652- goto parse_command;
1653- } else {
1654- result->rd_type = RT_INPUT;
1655- ps->index += 1;
1656- }
1654+ case TT_LESSGREATER:
1655+ result->rd_type = RT_INOUT;
16571656 break;
1658- case L'>': result->rd_type = RT_INOUT; ps->index += 2; break;
1659- case L'&': result->rd_type = RT_DUPIN; ps->index += 2; break;
1660- default: result->rd_type = RT_INPUT; ps->index += 1; break;
1661- }
1662- break;
1663- case L'>':
1664- switch (ps->src.contents[ps->index + 1]) {
1665- case L'(':
1666- if (!posixly_correct) {
1667- result->rd_type = RT_PROCOUT;
1668- goto parse_command;
1669- } else {
1670- result->rd_type = RT_OUTPUT;
1671- ps->index += 1;
1672- }
1657+ case TT_LESSAMP:
1658+ result->rd_type = RT_DUPIN;
16731659 break;
1674- case L'>':
1675- if (!posixly_correct && ps->src.contents[ps->index + 2] == L'|') {
1676- result->rd_type = RT_PIPE;
1677- ps->index += 3;
1678- } else {
1679- result->rd_type = RT_APPEND;
1680- ps->index += 2;
1681- }
1660+ case TT_GREATER:
1661+ result->rd_type = RT_OUTPUT;
16821662 break;
1683- case L'|': result->rd_type = RT_CLOBBER; ps->index += 2; break;
1684- case L'&': result->rd_type = RT_DUPOUT; ps->index += 2; break;
1685- default: result->rd_type = RT_OUTPUT; ps->index += 1; break;
1686- }
1687- break;
1688- default:
1689- assert(false);
1663+ case TT_GREATERGREATER:
1664+ result->rd_type = RT_APPEND;
1665+ break;
1666+ case TT_GREATERPIPE:
1667+ result->rd_type = RT_CLOBBER;
1668+ break;
1669+ case TT_GREATERAMP:
1670+ result->rd_type = RT_DUPOUT;
1671+ break;
1672+ case TT_GREATERGREATERPIPE:
1673+ if (posixly_correct)
1674+ serror(ps, Ngt("pipe redirection is not supported"));
1675+ result->rd_type = RT_PIPE;
1676+ break;
1677+ case TT_LESSLPAREN:
1678+ result->rd_type = RT_PROCIN;
1679+ goto parse_command;
1680+ case TT_GREATERLPAREN:
1681+ result->rd_type = RT_PROCOUT;
1682+ goto parse_command;
1683+ case TT_LESSLESS:
1684+ result->rd_type = RT_HERE;
1685+ goto parse_here_document_tag;
1686+ case TT_LESSLESSDASH:
1687+ result->rd_type = RT_HERERT;
1688+ goto parse_here_document_tag;
1689+ case TT_LESSLESSLESS:
1690+ if (posixly_correct)
1691+ serror(ps, Ngt("here-string is not supported"));
1692+ result->rd_type = RT_HERESTR;
1693+ break;
1694+ default:
1695+ assert(false);
16901696 }
1691- skip_blanks_and_comment(ps);
1692- if (result->rd_type != RT_HERE && result->rd_type != RT_HERERT) {
1693- result->rd_filename = parse_word(ps, true);
1694- if (result->rd_filename == NULL) {
1695- serror(ps, Ngt("the redirection target is missing"));
1696- free(result);
1697- return NULL;
1698- }
1699- } else {
1700- wchar_t *endofheredoc = parse_word_as_wcs(ps);
1701- if (endofheredoc[0] == L'\0') {
1702- serror(ps, Ngt("the end-of-here-document indicator is missing"));
1703- free(endofheredoc);
1704- free(result);
1705- return NULL;
1706- }
1707- result->rd_hereend = endofheredoc;
1708- result->rd_herecontent = NULL;
1709- pl_add(&ps->pending_heredocs, result);
1697+
1698+ /* parse redirection target file token */
1699+ next_token(ps);
1700+ psubstitute_alias_recursive(ps, 0);
1701+ result->rd_filename = ps->token, ps->token = NULL;
1702+ if (result->rd_filename != NULL)
1703+ next_token(ps);
1704+ else
1705+ serror(ps, Ngt("the redirection target is missing"));
1706+ return result;
1707+
1708+parse_here_document_tag:
1709+ next_token(ps);
1710+ psubstitute_alias_recursive(ps, 0);
1711+ if (ps->token == NULL) {
1712+ serror(ps, Ngt("the end-of-here-document indicator is missing"));
1713+ free(result);
1714+ return NULL;
17101715 }
1711- skip_blanks_and_comment(ps);
1716+ result->rd_hereend =
1717+ xwcsndup(&ps->src.contents[ps->index], ps->next_index - ps->index);
1718+ result->rd_herecontent = NULL;
1719+ pl_add(&ps->pending_heredocs, result);
1720+ next_token(ps);
17121721 return result;
17131722
17141723 parse_command:
1715- ps->index += 1;
1724+ if (posixly_correct)
1725+ serror(ps, Ngt("process redirection is not supported"));
17161726 result->rd_command = extract_command_in_paren(ps);
1717- ensure_buffer(ps, 1);
1718- if (ps->src.contents[ps->index] == L')')
1719- ps->index++;
1727+ if (ps->tokentype == TT_RPAREN)
1728+ next_token(ps);
17201729 else
17211730 serror(ps, Ngt("unclosed process redirection"));
1722- skip_blanks_and_comment(ps);
17231731 return result;
17241732 }
17251733
@@ -1863,6 +1871,7 @@
18631871 if (wu != NULL)
18641872 return wu;
18651873 }
1874+ ps->next_index = ps->index + 1;
18661875 return parse_cmdsubst_in_paren(ps);
18671876 default:
18681877 return tryparse_paramexp_raw(ps);
@@ -2101,9 +2110,9 @@
21012110 }
21022111
21032112 /* Parses a command substitution that starts with "$(".
2104- * The current position must be at the opening parenthesis L'(' when this
2105- * function is called and the position is advanced to the closing parenthesis
2106- * L')'. */
2113+ * When this function is called, `ps->next_index' must be just after the opening
2114+ * "(". When this function returns, `ps->index' is just after the closing ")".
2115+ */
21072116 wordunit_T *parse_cmdsubst_in_paren(parsestate_T *ps)
21082117 {
21092118 wordunit_T *result = xmalloc(sizeof *result);
@@ -2120,15 +2129,16 @@
21202129 }
21212130
21222131 /* Extracts commands between '(' and ')'.
2123- * The current position must be at the opening parenthesis L'(' when this
2124- * function is called. The position is advanced to the closing parenthesis
2125- * L')'. */
2132+ * When this function is called, `ps->next_index' must be just after the opening
2133+ * "(". When this function returns, the current token will be the closing ")".
2134+ */
21262135 embedcmd_T extract_command_in_paren(parsestate_T *ps)
21272136 {
21282137 plist_T save_pending_heredocs;
21292138 embedcmd_T result;
21302139
2131- assert(ps->src.contents[ps->index] == L'(');
2140+ assert(ps->next_index > 0);
2141+ assert(ps->src.contents[ps->next_index - 1] == L'(');
21322142
21332143 save_pending_heredocs = ps->pending_heredocs;
21342144 pl_init(&ps->pending_heredocs);
@@ -2137,7 +2147,7 @@
21372147 result.is_preparsed = false;
21382148 result.value.unparsed = extract_command_in_paren_unparsed(ps);
21392149 } else {
2140- ps->index++;
2150+ next_token(ps);
21412151 result.is_preparsed = true;
21422152 result.value.preparsed = parse_compound_list(ps);
21432153 }
@@ -2149,15 +2159,15 @@
21492159 }
21502160
21512161 /* Parses commands between '(' and ')'.
2152- * The current position must be at the opening parenthesis L'(' when this
2153- * function is called. The position is advanced to the closing parenthesis
2154- * L')'. */
2162+ * The current token must be the opening parenthesis L'(' when this function is
2163+ * called. The current token is advanced to the closing parenthesis L')'. */
21552164 wchar_t *extract_command_in_paren_unparsed(parsestate_T *ps)
21562165 {
21572166 bool save_enable_alias = ps->enable_alias;
21582167 ps->enable_alias = false;
21592168
2160- size_t startindex = ++ps->index;
2169+ size_t startindex = ps->next_index;
2170+ next_token(ps);
21612171 andorsfree(parse_compound_list(ps));
21622172 assert(startindex <= ps->index);
21632173
@@ -2316,8 +2326,6 @@
23162326 * `command' is the name of the command to parse such as "(" and "if". */
23172327 command_T *parse_compound_command(parsestate_T *ps, const wchar_t *command)
23182328 {
2319- /* `parse_group', `parse_if', etc. don't call `skip_blanks_and_comment'
2320- * before they return nor parse redirections. */
23212329 command_T *result;
23222330 switch (command[0]) {
23232331 case L'(':
@@ -2353,7 +2361,6 @@
23532361 default:
23542362 assert(false);
23552363 }
2356- skip_blanks_and_comment(ps);
23572364 parse_redirect_list(ps, &result->c_redirs);
23582365 return result;
23592366 }
@@ -2362,21 +2369,23 @@
23622369 * `type' must be either CT_GROUP or CT_SUBSHELL. */
23632370 command_T *parse_group(parsestate_T *ps, commandtype_T type)
23642371 {
2365- const wchar_t *start, *end;
2372+ tokentype_T starttt, endtt;
2373+ const wchar_t *starts, *ends;
23662374
23672375 switch (type) {
23682376 case CT_GROUP:
2369- start = L"{", end = L"}";
2370- assert(has_token(ps, start));
2377+ starttt = TT_LBRACE, endtt = TT_RBRACE;
2378+ starts = L"{", ends = L"}";
23712379 break;
23722380 case CT_SUBSHELL:
2373- start = L"(", end = L")";
2374- assert(ps->src.contents[ps->index] == start[0]);
2381+ starttt = TT_LPAREN, endtt = TT_RPAREN;
2382+ starts = L"(", ends = L")";
23752383 break;
23762384 default:
23772385 assert(false);
23782386 }
2379- ps->index++;
2387+ assert(ps->tokentype == starttt);
2388+ next_token(ps);
23802389
23812390 command_T *result = xmalloc(sizeof *result);
23822391 result->next = NULL;
@@ -2387,11 +2396,11 @@
23872396 result->c_subcmds = parse_compound_list(ps);
23882397 if (posixly_correct && result->c_subcmds == NULL)
23892398 serror(ps, Ngt("commands are missing between `%ls' and `%ls'"),
2390- start, end);
2391- if (ps->src.contents[ps->index] == end[0])
2392- ps->index++;
2399+ starts, ends);
2400+ if (ps->tokentype == endtt)
2401+ next_token(ps);
23932402 else
2394- print_errmsg_token_missing(ps, end);
2403+ print_errmsg_token_missing(ps, ends);
23952404 return result;
23962405 }
23972406
@@ -2398,8 +2407,8 @@
23982407 /* Parses a if command */
23992408 command_T *parse_if(parsestate_T *ps)
24002409 {
2401- assert(has_token(ps, L"if"));
2402- ps->index += 2;
2410+ assert(ps->tokentype == TT_IF);
2411+ next_token(ps);
24032412
24042413 command_T *result = xmalloc(sizeof *result);
24052414 result->next = NULL;
@@ -2422,9 +2431,8 @@
24222431 serror(ps, Ngt("commands are missing between `%ls' and `%ls'"),
24232432 (result->c_ifcmds->next == NULL) ? L"if" : L"elif",
24242433 L"then");
2425- ensure_buffer(ps, 5);
2426- if (has_token(ps, L"then"))
2427- ps->index += 4;
2434+ if (ps->tokentype == TT_THEN)
2435+ next_token(ps);
24282436 else
24292437 print_errmsg_token_missing(ps, L"then");
24302438 } else {
@@ -2434,14 +2442,13 @@
24342442 if (posixly_correct && ic->ic_commands == NULL)
24352443 serror(ps, Ngt("commands are missing after `%ls'"),
24362444 after_else ? L"else" : L"then");
2437- ensure_buffer(ps, 5);
2438- if (!after_else && has_token(ps, L"else")) {
2439- ps->index += 4;
2445+ if (!after_else && ps->tokentype == TT_ELSE) {
2446+ next_token(ps);
24402447 after_else = true;
2441- } else if (!after_else && has_token(ps, L"elif")) {
2442- ps->index += 4;
2443- } else if (has_token(ps, L"fi")) {
2444- ps->index += 2;
2448+ } else if (!after_else && ps->tokentype == TT_ELIF) {
2449+ next_token(ps);
2450+ } else if (ps->tokentype == TT_FI) {
2451+ next_token(ps);
24452452 break;
24462453 } else {
24472454 print_errmsg_token_missing(ps, L"fi");
@@ -2454,9 +2461,9 @@
24542461 /* Parses a for command. */
24552462 command_T *parse_for(parsestate_T *ps)
24562463 {
2457- assert(has_token(ps, L"for"));
2458- ps->index += 3;
2459- skip_blanks_and_comment(ps);
2464+ assert(ps->tokentype == TT_FOR);
2465+ next_token(ps);
2466+ psubstitute_alias_recursive(ps, 0);
24602467
24612468 command_T *result = xmalloc(sizeof *result);
24622469 result->next = NULL;
@@ -2465,35 +2472,35 @@
24652472 result->c_lineno = ps->info->lineno;
24662473 result->c_redirs = NULL;
24672474
2468- wchar_t *name = parse_word_as_wcs(ps);
2469- if (!(posixly_correct ? is_portable_name : is_name)(name)) {
2470- if (name[0] == L'\0')
2475+ result->c_forname =
2476+ xwcsndup(&ps->src.contents[ps->index], ps->next_index - ps->index);
2477+ if (!is_name_word(ps->token)) {
2478+ if (ps->token == NULL)
24712479 serror(ps, Ngt("an identifier is required after `for'"));
24722480 else
2473- serror(ps, Ngt("`%ls' is not a valid identifier"), name);
2481+ serror(ps, Ngt("`%ls' is not a valid identifier"),
2482+ result->c_forname);
24742483 }
2475- result->c_forname = name;
2484+ next_token(ps);
24762485
24772486 parse_in:;
2478- bool on_next_line = skip_to_next_token(ps);
2479- ensure_buffer(ps, 3);
2480- if (has_token(ps, L"in")) {
2487+ bool on_next_line = parse_newline_list(ps);
2488+ if (ps->tokentype == TT_IN) {
24812489 redir_T *redirs = NULL;
2482- ps->index += 2;
2483- skip_blanks_and_comment(ps);
2490+ next_token(ps);
24842491 result->c_forwords = parse_words_and_redirects(ps, &redirs, false);
24852492 if (redirs != NULL) {
24862493 serror(ps, Ngt("redirections are not allowed after `in'"));
24872494 redirsfree(redirs);
24882495 }
2489- if (ps->src.contents[ps->index] == L';')
2490- ps->index++;
2496+ if (ps->tokentype == TT_SEMICOLON)
2497+ next_token(ps);
24912498 } else if (psubstitute_alias(ps, 0)) {
24922499 goto parse_in;
24932500 } else {
24942501 result->c_forwords = NULL;
2495- if (ps->src.contents[ps->index] == L';') {
2496- ps->index++;
2502+ if (ps->tokentype == TT_SEMICOLON) {
2503+ next_token(ps);
24972504 if (on_next_line)
24982505 serror(ps, Ngt("`;' cannot appear on a new line"));
24992506 }
@@ -2500,10 +2507,9 @@
25002507 }
25012508
25022509 parse_do:
2503- skip_to_next_token(ps);
2504- ensure_buffer(ps, 3);
2505- if (has_token(ps, L"do"))
2506- ps->index += 2;
2510+ parse_newline_list(ps);
2511+ if (ps->tokentype == TT_DO)
2512+ next_token(ps);
25072513 else if (psubstitute_alias(ps, 0))
25082514 goto parse_do;
25092515 else
@@ -2515,11 +2521,11 @@
25152521 serror(ps, Ngt("commands are missing between `%ls' and `%ls'"),
25162522 L"do", L"done");
25172523
2518- ensure_buffer(ps, 5);
2519- if (has_token(ps, L"done"))
2520- ps->index += 4;
2524+ if (ps->tokentype == TT_DONE)
2525+ next_token(ps);
25212526 else
25222527 print_errmsg_token_missing(ps, L"done");
2528+
25232529 return result;
25242530 }
25252531
@@ -2528,8 +2534,11 @@
25282534 */
25292535 command_T *parse_while(parsestate_T *ps, bool whltype)
25302536 {
2531- assert(has_token(ps, whltype ? L"while" : L"until"));
2532- ps->index += 5;
2537+ if (whltype)
2538+ assert(ps->tokentype == TT_WHILE);
2539+ else
2540+ assert(ps->tokentype == TT_UNTIL);
2541+ next_token(ps);
25332542
25342543 command_T *result = xmalloc(sizeof *result);
25352544 result->next = NULL;
@@ -2538,24 +2547,27 @@
25382547 result->c_lineno = ps->info->lineno;
25392548 result->c_redirs = NULL;
25402549 result->c_whltype = whltype;
2550+
25412551 result->c_whlcond = parse_compound_list(ps);
25422552 if (posixly_correct && result->c_whlcond == NULL)
25432553 serror(ps, Ngt("commands are missing after `%ls'"),
25442554 whltype ? L"while" : L"until");
2545- ensure_buffer(ps, 3);
2546- if (has_token(ps, L"do"))
2547- ps->index += 2;
2555+
2556+ if (ps->tokentype == TT_DO)
2557+ next_token(ps);
25482558 else
25492559 print_errmsg_token_missing(ps, L"do");
2560+
25502561 result->c_whlcmds = parse_compound_list(ps);
25512562 if (posixly_correct && result->c_whlcmds == NULL)
25522563 serror(ps, Ngt("commands are missing between `%ls' and `%ls'"),
25532564 L"do", L"done");
2554- ensure_buffer(ps, 5);
2555- if (has_token(ps, L"done"))
2556- ps->index += 4;
2565+
2566+ if (ps->tokentype == TT_DONE)
2567+ next_token(ps);
25572568 else
25582569 print_errmsg_token_missing(ps, L"done");
2570+
25592571 return result;
25602572 }
25612573
@@ -2562,9 +2574,9 @@
25622574 /* Parses a case command. */
25632575 command_T *parse_case(parsestate_T *ps)
25642576 {
2565- assert(has_token(ps, L"case"));
2566- ps->index += 4;
2567- skip_blanks_and_comment(ps);
2577+ assert(ps->tokentype == TT_CASE);
2578+ next_token(ps);
2579+ psubstitute_alias_recursive(ps, 0);
25682580
25692581 command_T *result = xmalloc(sizeof *result);
25702582 result->next = NULL;
@@ -2572,15 +2584,16 @@
25722584 result->c_type = CT_CASE;
25732585 result->c_lineno = ps->info->lineno;
25742586 result->c_redirs = NULL;
2575- result->c_casword = parse_word(ps, true);
2576- if (result->c_casword == NULL)
2587+ result->c_casword = ps->token, ps->token = NULL;
2588+ if (result->c_casword != NULL)
2589+ next_token(ps);
2590+ else
25772591 serror(ps, Ngt("a word is required after `%ls'"), L"case");
25782592
25792593 parse_in:
2580- skip_to_next_token(ps);
2581- ensure_buffer(ps, 3);
2582- if (has_token(ps, L"in")) {
2583- ps->index += 2;
2594+ parse_newline_list(ps);
2595+ if (ps->tokentype == TT_IN) {
2596+ next_token(ps);
25842597 result->c_casitems = parse_case_list(ps);
25852598 } else if (psubstitute_alias(ps, 0)) {
25862599 goto parse_in;
@@ -2590,24 +2603,22 @@
25902603 result->c_casitems = NULL;
25912604 }
25922605
2593- ensure_buffer(ps, 5);
2594- if (has_token(ps, L"esac"))
2595- ps->index += 4;
2606+ if (ps->tokentype == TT_ESAC)
2607+ next_token(ps);
25962608 else
25972609 print_errmsg_token_missing(ps, L"esac");
2610+
25982611 return result;
25992612 }
26002613
2601-/* Parses the body of a case command (the part between "in" and "esac").
2602- * You don't have to call `skip_to_next_token' before calling this function. */
2614+/* Parses the body of a case command (the part between "in" and "esac"). */
26032615 caseitem_T *parse_case_list(parsestate_T *ps)
26042616 {
26052617 caseitem_T *first = NULL, **lastp = &first;
26062618
26072619 do {
2608- skip_to_next_token(ps);
2609- ensure_buffer(ps, 5);
2610- if (has_token(ps, L"esac"))
2620+ parse_newline_list(ps);
2621+ if (ps->tokentype == TT_ESAC)
26112622 break;
26122623 if (psubstitute_alias(ps, 0))
26132624 continue;
@@ -2619,68 +2630,62 @@
26192630 ci->ci_patterns = parse_case_patterns(ps);
26202631 ci->ci_commands = parse_compound_list(ps);
26212632 /* `ci_commands' may be NULL unlike for and while commands */
2622- ensure_buffer(ps, 2);
2623- if (ps->src.contents[ps->index] == L';' &&
2624- ps->src.contents[ps->index + 1] == L';') {
2625- ps->index += 2;
2626- } else {
2633+ if (ps->tokentype == TT_DOUBLE_SEMICOLON)
2634+ next_token(ps);
2635+ else
26272636 break;
2628- }
26292637 } while (!ps->error);
26302638 return first;
26312639 }
26322640
26332641 /* Parses patterns of a case item.
2634- * The current position is advanced to the character that just follows ')', not
2635- * to the next token.
2636- * Call `skip_to_next_token' and `ensure_buffer(ps, 1)' before calling this
2637- * function. */
2642+ * This function consumes the closing ")".
2643+ * Perform alias substitution before calling this function. */
26382644 void **parse_case_patterns(parsestate_T *ps)
26392645 {
26402646 plist_T wordlist;
26412647 pl_init(&wordlist);
26422648
2643- if (ps->src.contents[ps->index] == L'(') { /* ignore the first '(' */
2644- ps->index++;
2645- skip_blanks_and_comment(ps);
2646- if (posixly_correct) {
2647- ensure_buffer(ps, 5);
2648- if (has_token(ps, L"esac"))
2649- serror(ps, Ngt(
2650- "an unquoted `esac' cannot be the first case pattern"));
2651- }
2649+ if (ps->tokentype == TT_LPAREN) { /* ignore the first '(' */
2650+ next_token(ps);
2651+ do {
2652+ if (posixly_correct && ps->tokentype == TT_ESAC)
2653+ serror(ps,
2654+ Ngt("an unquoted `esac' cannot be the first case pattern"));
2655+ } while (psubstitute_alias(ps, 0));
26522656 }
26532657
26542658 const wchar_t *predecessor = L"(";
26552659 do {
2656- if (is_token_delimiter_char(ps->src.contents[ps->index])) {
2657- if (ps->src.contents[ps->index] != L'\0') {
2658- if (ps->src.contents[ps->index] == L'\n')
2659- serror(ps, Ngt("a word is required after `%ls'"),
2660- predecessor);
2661- else
2662- serror(ps, Ngt("encountered an invalid character `%lc' "
2663- "in the case pattern"),
2664- (wint_t) ps->src.contents[ps->index]);
2660+ if (ps->token == NULL) {
2661+ if (ps->tokentype == TT_END_OF_INPUT) {
2662+ // serror(ps, ...);
2663+ } else if (ps->tokentype == TT_NEWLINE) {
2664+ serror(ps, Ngt("a word is required after `%ls'"),
2665+ predecessor);
2666+ } else {
2667+ serror(ps, Ngt("encountered an invalid character `%lc' "
2668+ "in the case pattern"),
2669+ (wint_t) ps->src.contents[ps->index]);
26652670 }
26662671 break;
26672672 }
2668- pl_add(&wordlist, parse_word(ps, true));
2669- skip_blanks_and_comment(ps);
2673+ pl_add(&wordlist, ps->token), ps->token = NULL;
2674+
2675+ next_token(ps);
26702676 psubstitute_alias_recursive(ps, 0);
2671- ensure_buffer(ps, 1);
2672- if (ps->src.contents[ps->index] == L'|') {
2673- predecessor = L"|";
2674- ps->index++;
2675- } else if (ps->src.contents[ps->index] == L')') {
2676- ps->index++;
2677+ if (ps->tokentype != TT_PIPE) {
2678+ if (ps->tokentype == TT_RPAREN)
2679+ next_token(ps);
2680+ else
2681+ serror(ps, Ngt("`%ls' is missing"), L")");
26772682 break;
2678- } else {
2679- serror(ps, Ngt("`%ls' is missing"), L")");
2680- break;
26812683 }
2682- skip_blanks_and_comment(ps);
2684+ predecessor = L"|";
2685+ next_token(ps);
2686+ psubstitute_alias_recursive(ps, 0);
26832687 } while (!ps->error);
2688+
26842689 return pl_toary(&wordlist);
26852690 }
26862691
@@ -2690,9 +2695,9 @@
26902695 if (posixly_correct)
26912696 serror(ps, Ngt("`%ls' cannot be used as a command name"), L"function");
26922697
2693- assert(has_token(ps, L"function"));
2694- ps->index += 8;
2695- skip_blanks_and_comment(ps);
2698+ assert(ps->tokentype == TT_FUNCTION);
2699+ next_token(ps);
2700+ psubstitute_alias_recursive(ps, 0);
26962701
26972702 command_T *result = xmalloc(sizeof *result);
26982703 result->next = NULL;
@@ -2700,28 +2705,32 @@
27002705 result->c_type = CT_FUNCDEF;
27012706 result->c_lineno = ps->info->lineno;
27022707 result->c_redirs = NULL;
2703- result->c_funcname = parse_word(ps, true);
2708+ result->c_funcname = ps->token, ps->token = NULL;
27042709 if (result->c_funcname == NULL)
27052710 serror(ps, Ngt("a word is required after `%ls'"), L"function");
2706- skip_blanks_and_comment(ps);
27072711
27082712 bool paren = false;
2709-parse_parentheses:;
2710- size_t saveindex = ps->index;
2711- if (ps->src.contents[ps->index] == L'(') {
2712- ps->index++;
2713+ next_token(ps);
2714+parse_parentheses:
2715+ if (ps->tokentype == TT_LPAREN) {
2716+ size_t saveindex = ps->index;
2717+ next_token(ps);
27132718 parse_close_parenthesis:
2714- skip_blanks_and_comment(ps);
2715- if (ps->src.contents[ps->index] == L')')
2716- paren = true, ps->index++;
2717- else if (psubstitute_alias(ps, AF_NONGLOBAL))
2719+ if (ps->tokentype == TT_RPAREN) {
2720+ paren = true;
2721+ next_token(ps);
2722+ } else if (psubstitute_alias(ps, AF_NONGLOBAL)) {
27182723 goto parse_close_parenthesis;
2719- else
2724+ } else {
2725+ /* rewind to '(' */
27202726 rewind_index(ps, saveindex);
2727+ ps->next_index = ps->index;
2728+ next_token(ps);
2729+ }
27212730 }
2722- skip_to_next_token(ps);
2731+parse_function_body:
2732+ parse_newline_list(ps);
27232733
2724-parse_function_body:;
27252734 const wchar_t *t = check_opening_token(ps);
27262735 if (t != NULL) {
27272736 result->c_funcbody = parse_compound_command(ps, t);
@@ -2746,8 +2755,7 @@
27462755 * If successful, `c' is directly modified to the function definition parsed. */
27472756 command_T *try_reparse_as_function(parsestate_T *ps, command_T *c)
27482757 {
2749- // ensure_buffer(ps, 1);
2750- if (ps->src.contents[ps->index] != L'(') // not a function definition?
2758+ if (ps->tokentype != TT_LPAREN) // not a function definition?
27512759 return c;
27522760
27532761 /* If this is a function definition, there must be exactly one command word
@@ -2767,19 +2775,18 @@
27672775 }
27682776
27692777 /* Skip '('. */
2770- ps->index++;
2771- skip_blanks_and_comment(ps);
2778+ next_token(ps);
27722779
27732780 /* Parse ')'. */
27742781 psubstitute_alias_recursive(ps, 0);
2775- // ensure_buffer(ps, 1);
2776- if (ps->src.contents[ps->index] != L')') {
2782+ if (ps->tokentype != TT_RPAREN) {
27772783 serror(ps, Ngt("`(' must be followed by `)' in a function definition"));
27782784 return c;
27792785 }
2780- ps->index++;
2786+ next_token(ps);
2787+
27812788 parse_function_body:
2782- skip_to_next_token(ps);
2789+ parse_newline_list(ps);
27832790
27842791 const wchar_t *t = check_opening_token(ps);
27852792 if (t == NULL) {
Show on old repository browser