• R/O
  • SSH
  • HTTPS

yash: Commit


Commit MetaInfo

Revision3885 (tree)
Time2018-09-19 00:55:28
Authormagicant

Log Message

Reorder parser functions

Now functions are arranged in sections.

Change Summary

Incremental Difference

--- yash/branches/token_based_parser/parser.c (revision 3884)
+++ yash/branches/token_based_parser/parser.c (revision 3885)
@@ -519,6 +519,13 @@
519519
520520 static void serror(parsestate_T *restrict ps, const char *restrict format, ...)
521521 __attribute__((nonnull(1,2),format(printf,2,3)));
522+static const char *get_errmsg_unexpected_tokentype(tokentype_T tokentype)
523+ __attribute__((const));
524+static void print_errmsg_token_unexpected(parsestate_T *ps)
525+ __attribute__((nonnull));
526+static void print_errmsg_token_missing(parsestate_T *ps, const wchar_t *t)
527+ __attribute__((nonnull));
528+
522529 static inputresult_T read_more_input(parsestate_T *ps)
523530 __attribute__((nonnull));
524531 static void line_continuation(parsestate_T *ps, size_t index)
@@ -529,12 +536,35 @@
529536 __attribute__((nonnull));
530537 static size_t count_name_length(parsestate_T *ps, bool isnamechar(wchar_t c))
531538 __attribute__((nonnull));
539+
532540 static void next_token(parsestate_T *ps)
533541 __attribute__((nonnull));
542+static wordunit_T *parse_word(parsestate_T *ps, bool testfunc(wchar_t c))
543+ __attribute__((nonnull,malloc,warn_unused_result));
544+static void skip_to_next_single_quote(parsestate_T *ps)
545+ __attribute__((nonnull));
546+static wordunit_T *parse_special_word_unit(parsestate_T *ps, bool indq)
547+ __attribute__((nonnull,malloc,warn_unused_result));
548+static wordunit_T *tryparse_paramexp_raw(parsestate_T *ps)
549+ __attribute__((nonnull,malloc,warn_unused_result));
550+static wordunit_T *parse_paramexp_in_brace(parsestate_T *ps)
551+ __attribute__((nonnull,malloc,warn_unused_result));
552+static wordunit_T *parse_cmdsubst_in_paren(parsestate_T *ps)
553+ __attribute__((nonnull,malloc,warn_unused_result));
554+static embedcmd_T extract_command_in_paren(parsestate_T *ps)
555+ __attribute__((nonnull,warn_unused_result));
556+static wchar_t *extract_command_in_paren_unparsed(parsestate_T *ps)
557+ __attribute__((nonnull,malloc,warn_unused_result));
558+static wordunit_T *parse_cmdsubst_in_backquote(parsestate_T *ps, bool bsbq)
559+ __attribute__((nonnull,malloc,warn_unused_result));
560+static wordunit_T *tryparse_arith(parsestate_T *ps)
561+ __attribute__((nonnull,malloc,warn_unused_result));
562+
534563 static void next_line(parsestate_T *ps)
535564 __attribute__((nonnull));
536565 static bool parse_newline_list(parsestate_T *ps)
537566 __attribute__((nonnull));
567+
538568 static bool is_comma_or_closing_bracket(wchar_t c)
539569 __attribute__((const));
540570 static bool is_slash_or_closing_brace(wchar_t c)
@@ -541,10 +571,12 @@
541571 __attribute__((const));
542572 static bool is_closing_brace(wchar_t c)
543573 __attribute__((const));
574+
544575 static bool psubstitute_alias(parsestate_T *ps, substaliasflags_T f)
545576 __attribute__((nonnull));
546577 static void psubstitute_alias_recursive(parsestate_T *ps, substaliasflags_T f)
547578 __attribute__((nonnull));
579+
548580 static and_or_T *parse_command_list(parsestate_T *ps, bool toeol)
549581 __attribute__((nonnull,malloc,warn_unused_result));
550582 static and_or_T *parse_compound_list(parsestate_T *ps)
@@ -570,26 +602,6 @@
570602 __attribute__((nonnull,malloc,warn_unused_result));
571603 static redir_T *tryparse_redirect(parsestate_T *ps)
572604 __attribute__((nonnull,malloc,warn_unused_result));
573-static wordunit_T *parse_word(parsestate_T *ps, bool testfunc(wchar_t c))
574- __attribute__((nonnull,malloc,warn_unused_result));
575-static void skip_to_next_single_quote(parsestate_T *ps)
576- __attribute__((nonnull));
577-static wordunit_T *parse_special_word_unit(parsestate_T *ps, bool indq)
578- __attribute__((nonnull,malloc,warn_unused_result));
579-static wordunit_T *tryparse_paramexp_raw(parsestate_T *ps)
580- __attribute__((nonnull,malloc,warn_unused_result));
581-static wordunit_T *parse_paramexp_in_brace(parsestate_T *ps)
582- __attribute__((nonnull,malloc,warn_unused_result));
583-static wordunit_T *parse_cmdsubst_in_paren(parsestate_T *ps)
584- __attribute__((nonnull,malloc,warn_unused_result));
585-static embedcmd_T extract_command_in_paren(parsestate_T *ps)
586- __attribute__((nonnull,warn_unused_result));
587-static wchar_t *extract_command_in_paren_unparsed(parsestate_T *ps)
588- __attribute__((nonnull,malloc,warn_unused_result));
589-static wordunit_T *parse_cmdsubst_in_backquote(parsestate_T *ps, bool bsbq)
590- __attribute__((nonnull,malloc,warn_unused_result));
591-static wordunit_T *tryparse_arith(parsestate_T *ps)
592- __attribute__((nonnull,malloc,warn_unused_result));
593605 static command_T *parse_compound_command(parsestate_T *ps)
594606 __attribute__((nonnull,malloc,warn_unused_result));
595607 static command_T *parse_group(parsestate_T *ps)
@@ -610,6 +622,7 @@
610622 __attribute__((nonnull,malloc,warn_unused_result));
611623 static command_T *try_reparse_as_function(parsestate_T *ps, command_T *c)
612624 __attribute__((nonnull,warn_unused_result));
625+
613626 static void read_heredoc_contents(parsestate_T *ps, redir_T *redir)
614627 __attribute__((nonnull));
615628 static void read_heredoc_contents_without_expansion(
@@ -626,16 +639,12 @@
626639 parsestate_T *ps, bool backquote, bool stoponnewline,
627640 wordunit_T **lastp)
628641 __attribute__((nonnull));
629-static const char *get_errmsg_unexpected_tokentype(tokentype_T tokentype)
630- __attribute__((const));
631-static void print_errmsg_token_unexpected(parsestate_T *ps)
632- __attribute__((nonnull));
633-static void print_errmsg_token_missing(parsestate_T *ps, const wchar_t *t)
634- __attribute__((nonnull));
635642
636643 #define QUOTES L"\"'\\"
637644
638645
646+/***** Entry points *****/
647+
639648 /* The functions below may return non-NULL even on error.
640649 * The error condition must be tested by the `error' flag of the parsestate_T
641650 * structure. It is set to true when `serror' is called. */
@@ -717,6 +726,52 @@
717726 assert(false);
718727 }
719728
729+/* Parses a string recognizing parameter expansions, command substitutions of
730+ * the form "$(...)" and arithmetic expansions.
731+ * All the members of `info' except `lastinputresult' must have been initialized
732+ * beforehand.
733+ * This function reads and parses the input to the end of file.
734+ * Iff successful, the result is assigned to `*resultp' and true is returned.
735+ * If the input is empty, NULL is assigned.
736+ * On error, the value of `*resultp' is undefined. */
737+bool parse_string(parseparam_T *info, wordunit_T **restrict resultp)
738+{
739+ parsestate_T ps = {
740+ .info = info,
741+ .error = false,
742+ .index = 0,
743+ .next_index = 0,
744+ .tokentype = TT_UNKNOWN,
745+ .token = NULL,
746+ .enable_alias = false,
747+ .reparse = false,
748+ .aliases = NULL,
749+ };
750+ wb_init(&ps.src);
751+
752+ ps.info->lastinputresult = INPUT_OK;
753+ read_more_input(&ps);
754+ pl_init(&ps.pending_heredocs);
755+
756+ resultp = parse_string_without_quotes(&ps, false, false, resultp);
757+ *resultp = NULL;
758+
759+ wb_destroy(&ps.src);
760+ pl_destroy(&ps.pending_heredocs);
761+ assert(ps.aliases == NULL);
762+ //destroy_aliaslist(ps.aliases);
763+ wordfree(ps.token);
764+
765+ if (ps.info->lastinputresult != INPUT_EOF || ps.error) {
766+ wordfree(*resultp);
767+ return false;
768+ } else {
769+ return true;
770+ }
771+}
772+
773+/***** Error message utility *****/
774+
720775 /* Prints the specified error message to the standard error.
721776 * `format' is passed to `gettext' in this function.
722777 * `format' need not to have a trailing newline since a newline is automatically
@@ -740,6 +795,64 @@
740795 ps->error = true;
741796 }
742797
798+const char *get_errmsg_unexpected_tokentype(tokentype_T tokentype)
799+{
800+ switch (tokentype) {
801+ case TT_RPAREN:
802+ return Ngt("encountered `%ls' without a matching `('");
803+ case TT_RBRACE:
804+ return Ngt("encountered `%ls' without a matching `{'");
805+ case TT_DOUBLE_SEMICOLON:
806+ return Ngt("`%ls' is used outside `case'");
807+ case TT_BANG:
808+ return Ngt("`%ls' cannot be used as a command name");
809+ case TT_IN:
810+ return Ngt("`%ls' cannot be used as a command name");
811+ case TT_FI:
812+ return Ngt("encountered `%ls' "
813+ "without a matching `if' and/or `then'");
814+ case TT_THEN:
815+ return Ngt("encountered `%ls' without a matching `if' or `elif'");
816+ case TT_DO:
817+ return Ngt("encountered `%ls' "
818+ "without a matching `for', `while', or `until'");
819+ case TT_DONE:
820+ return Ngt("encountered `%ls' without a matching `do'");
821+ case TT_ESAC:
822+ return Ngt("encountered `%ls' without a matching `case'");
823+ case TT_ELIF:
824+ case TT_ELSE:
825+ return Ngt("encountered `%ls' "
826+ "without a matching `if' and/or `then'");
827+ default:
828+ assert(false);
829+ }
830+}
831+
832+void print_errmsg_token_unexpected(parsestate_T *ps)
833+{
834+ assert(ps->index <= ps->next_index);
835+ size_t length = ps->next_index - ps->index;
836+ wchar_t token[length + 1];
837+ wcsncpy(token, &ps->src.contents[ps->index], length);
838+ token[length] = L'\0';
839+
840+ const char *message = get_errmsg_unexpected_tokentype(ps->tokentype);
841+ serror(ps, message, token);
842+}
843+
844+void print_errmsg_token_missing(parsestate_T *ps, const wchar_t *t)
845+{
846+ if (is_closing_tokentype(ps->tokentype)) {
847+ print_errmsg_token_unexpected(ps);
848+ serror(ps, Ngt("(maybe you missed `%ls'?)"), t);
849+ } else {
850+ serror(ps, Ngt("`%ls' is missing"), t);
851+ }
852+}
853+
854+/***** Input buffer manipulators *****/
855+
743856 /* Reads the next line of input and returns the result type, which is assigned
744857 * to `ps->info->lastinputresult'.
745858 * If `ps->info->lastinputresult' is not INPUT_OK, it is simply returned
@@ -829,6 +942,8 @@
829942 return index - ps->index;
830943 }
831944
945+/***** Tokenizer *****/
946+
832947 /* Moves to the next token, updating `index', `next_index', `tokentype', and
833948 * `token' of the parse state.
834949 * The existing `token' is freed. */
@@ -956,6 +1071,587 @@
9561071 ps->next_index = index;
9571072 }
9581073
1074+/* Parses a word at the current position.
1075+ * `testfunc' is a function that determines if a character is a word delimiter.
1076+ * The parsing proceeds up to an unescaped character for which `testfunc'
1077+ * returns false.
1078+ * It is not an error if there is no characters to be a word, in which case
1079+ * NULL is returned. */
1080+wordunit_T *parse_word(parsestate_T *ps, bool testfunc(wchar_t c))
1081+{
1082+ wordunit_T *first = NULL, **lastp = &first;
1083+ bool indq = false; /* in double quotes? */
1084+ size_t startindex = ps->index;
1085+
1086+/* appends the substring from `startindex' to `index' as a new word unit
1087+ * to `*lastp' */
1088+#define MAKE_WORDUNIT_STRING \
1089+ do { \
1090+ size_t len = ps->index - startindex; \
1091+ if (len > 0) { \
1092+ wordunit_T *w = xmalloc(sizeof *w); \
1093+ w->next = NULL; \
1094+ w->wu_type = WT_STRING; \
1095+ w->wu_string = xwcsndup(&ps->src.contents[startindex], len); \
1096+ *lastp = w; \
1097+ lastp = &w->next; \
1098+ } \
1099+ } while (0)
1100+
1101+ while (maybe_line_continuations(ps, ps->index),
1102+ indq || !testfunc(ps->src.contents[ps->index])) {
1103+
1104+ switch (ps->src.contents[ps->index]) {
1105+ case L'\0':
1106+ goto done; // reached EOF
1107+ case L'\\':
1108+ if (ps->src.contents[ps->index + 1] != L'\0') {
1109+ assert(ps->src.contents[ps->index + 1] != L'\n');
1110+ ps->index += 2;
1111+ continue;
1112+ }
1113+ break;
1114+ case L'\n':
1115+ ps->info->lineno++;
1116+ break;
1117+ case L'$':
1118+ case L'`':
1119+ MAKE_WORDUNIT_STRING;
1120+ wordunit_T *wu = parse_special_word_unit(ps, indq);
1121+ startindex = ps->index;
1122+ if (wu != NULL) {
1123+ *lastp = wu;
1124+ lastp = &wu->next;
1125+ continue;
1126+ } else if (ps->src.contents[ps->index] == L'\0') {
1127+ continue;
1128+ }
1129+ break;
1130+ case L'\'':
1131+ if (!indq) {
1132+ ps->index++;
1133+ skip_to_next_single_quote(ps);
1134+ if (ps->src.contents[ps->index] == L'\'')
1135+ ps->index++;
1136+ continue;
1137+ }
1138+ break;
1139+ case L'"':
1140+ indq = !indq;
1141+ /* falls thru! */
1142+ default:
1143+ break;
1144+ }
1145+ ps->index++;
1146+ }
1147+done:
1148+ MAKE_WORDUNIT_STRING;
1149+
1150+ if (indq)
1151+ serror(ps, Ngt("the double quotation is not closed"));
1152+
1153+ return first;
1154+}
1155+
1156+/* Skips to the next single quote.
1157+ * If the current position is already at a single quote, the position is not
1158+ * moved.
1159+ * It is an error if there is no single quote before the end of file. */
1160+void skip_to_next_single_quote(parsestate_T *ps)
1161+{
1162+ for (;;) {
1163+ switch (ps->src.contents[ps->index]) {
1164+ case L'\'':
1165+ return;
1166+ case L'\0':
1167+ if (read_more_input(ps) != INPUT_OK) {
1168+ serror(ps, Ngt("the single quotation is not closed"));
1169+ return;
1170+ }
1171+ continue;
1172+ case L'\n':
1173+ ps->info->lineno++;
1174+ break;
1175+ default:
1176+ break;
1177+ }
1178+ ps->index++;
1179+ }
1180+}
1181+
1182+/* Parses a parameter expansion or command substitution that starts with '$' or
1183+ * '`'. The character at the current position must be '$' or '`' when this
1184+ * function is called and the position is advanced to right after the expansion
1185+ * or substitution.
1186+ * If the character at the current position is '$' but it is not an expansion,
1187+ * the position is not moved and the return value is NULL. Otherwise, The
1188+ * position is advanced by at least one character.
1189+ * Between double quotes, `indq' must be true. */
1190+wordunit_T *parse_special_word_unit(parsestate_T *ps, bool indq)
1191+{
1192+ switch (ps->src.contents[ps->index++]) {
1193+ case L'$':
1194+ maybe_line_continuations(ps, ps->index);
1195+ switch (ps->src.contents[ps->index]) {
1196+ case L'{':
1197+ return parse_paramexp_in_brace(ps);
1198+ case L'(':
1199+ maybe_line_continuations(ps, ps->index + 1);
1200+ if (ps->src.contents[ps->index + 1] == L'(') {
1201+ wordunit_T *wu = tryparse_arith(ps);
1202+ if (wu != NULL)
1203+ return wu;
1204+ }
1205+ ps->next_index = ps->index + 1;
1206+ return parse_cmdsubst_in_paren(ps);
1207+ default:
1208+ return tryparse_paramexp_raw(ps);
1209+ }
1210+ case L'`':
1211+ return parse_cmdsubst_in_backquote(ps, indq);
1212+ default:
1213+ assert(false);
1214+ }
1215+}
1216+
1217+/* Parses a parameter that is not enclosed by { }.
1218+ * The current position must be at the first character of the parameter name
1219+ * that follows L'$'. The position is advanced to right after the name.
1220+ * If there is no parameter, the position is put back to L'$'. */
1221+wordunit_T *tryparse_paramexp_raw(parsestate_T *ps)
1222+{
1223+ paramexp_T *pe;
1224+ size_t namelen; /* parameter name length */
1225+
1226+ maybe_line_continuations(ps, ps->index);
1227+ switch (ps->src.contents[ps->index]) {
1228+ case L'@': case L'*': case L'#': case L'?':
1229+ case L'-': case L'$': case L'!':
1230+ namelen = 1;
1231+ goto success;
1232+ }
1233+ if (!is_portable_name_char(ps->src.contents[ps->index]))
1234+ goto error;
1235+ if (iswdigit(ps->src.contents[ps->index]))
1236+ namelen = 1;
1237+ else
1238+ namelen = count_name_length(ps, is_portable_name_char);
1239+
1240+success:
1241+ pe = xmalloc(sizeof *pe);
1242+ pe->pe_type = PT_NONE;
1243+ pe->pe_name = xwcsndup(&ps->src.contents[ps->index], namelen);
1244+ pe->pe_start = pe->pe_end = pe->pe_match = pe->pe_subst = NULL;
1245+
1246+ wordunit_T *result = xmalloc(sizeof *result);
1247+ result->next = NULL;
1248+ result->wu_type = WT_PARAM;
1249+ result->wu_param = pe;
1250+ ps->index += namelen;
1251+ return result;
1252+
1253+error:
1254+ ps->index--;
1255+ assert(ps->src.contents[ps->index] == L'$');
1256+ return NULL;
1257+}
1258+
1259+/* Parses a parameter expansion that starts with "${".
1260+ * The current position must be at the opening brace L'{' when this function is
1261+ * called and the position is advanced to the closing brace L'}'. */
1262+wordunit_T *parse_paramexp_in_brace(parsestate_T *ps)
1263+{
1264+ paramexp_T *pe = xmalloc(sizeof *pe);
1265+ pe->pe_type = 0;
1266+ pe->pe_name = NULL;
1267+ pe->pe_start = pe->pe_end = pe->pe_match = pe->pe_subst = NULL;
1268+
1269+ assert(ps->src.contents[ps->index] == L'{');
1270+ ps->index++;
1271+
1272+ /* parse PT_NUMBER */
1273+ maybe_line_continuations(ps, ps->index);
1274+ if (ps->src.contents[ps->index] == L'#') {
1275+ maybe_line_continuations(ps, ps->index + 1);
1276+ switch (ps->src.contents[ps->index + 1]) {
1277+ case L'\0': case L'}':
1278+ case L'+': case L'=': case L':': case L'/': case L'%':
1279+ break;
1280+ case L'-': case L'?': case L'#':
1281+ maybe_line_continuations(ps, ps->index + 2);
1282+ if (ps->src.contents[ps->index + 2] != L'}')
1283+ break;
1284+ /* falls thru! */
1285+ default:
1286+ pe->pe_type |= PT_NUMBER;
1287+ ps->index++;
1288+ break;
1289+ }
1290+ }
1291+
1292+ /* parse nested expansion */
1293+ // maybe_line_continuations(ps, ps->index); // already called above
1294+ if (!posixly_correct && ps->src.contents[ps->index] == L'{') {
1295+ pe->pe_type |= PT_NEST;
1296+ pe->pe_nest = parse_paramexp_in_brace(ps);
1297+ } else if (!posixly_correct
1298+ && (ps->src.contents[ps->index] == L'`'
1299+ || (ps->src.contents[ps->index] == L'$'
1300+ && (maybe_line_continuations(ps, ps->index + 1),
1301+ ps->src.contents[ps->index + 1] == L'{'
1302+ || ps->src.contents[ps->index + 1] == L'(')))) {
1303+ size_t neststartindex = ps->index;
1304+ pe->pe_nest = parse_special_word_unit(ps, false);
1305+ if (ps->index == neststartindex)
1306+ goto parse_name;
1307+ pe->pe_type |= PT_NEST;
1308+ maybe_line_continuations(ps, ps->index);
1309+ } else {
1310+parse_name:;
1311+ /* no nesting: parse parameter name normally */
1312+ size_t namestartindex = ps->index;
1313+ switch (ps->src.contents[ps->index]) {
1314+ case L'@': case L'*': case L'#': case L'?':
1315+ case L'-': case L'$': case L'!':
1316+ ps->index++;
1317+ break;
1318+ default:
1319+ while (maybe_line_continuations(ps, ps->index),
1320+ is_name_char(ps->src.contents[ps->index]))
1321+ ps->index++;
1322+ break;
1323+ }
1324+ size_t namelen = ps->index - namestartindex;
1325+ if (namelen == 0) {
1326+ serror(ps, Ngt("the parameter name is missing or invalid"));
1327+ goto end;
1328+ }
1329+ pe->pe_name = xwcsndup(&ps->src.contents[namestartindex], namelen);
1330+ }
1331+
1332+ /* parse indices */
1333+ // maybe_line_continuations(ps, ps->index); // already called above
1334+ if (!posixly_correct && ps->src.contents[ps->index] == L'[') {
1335+ ps->index++;
1336+ pe->pe_start = parse_word(ps, is_comma_or_closing_bracket);
1337+ if (pe->pe_start == NULL)
1338+ serror(ps, Ngt("the index is missing"));
1339+ if (ps->src.contents[ps->index] == L',') {
1340+ ps->index++;
1341+ pe->pe_end = parse_word(ps, is_comma_or_closing_bracket);
1342+ if (pe->pe_end == NULL)
1343+ serror(ps, Ngt("the index is missing"));
1344+ }
1345+ if (ps->src.contents[ps->index] == L']') {
1346+ maybe_line_continuations(ps, ++ps->index);
1347+ } else {
1348+ serror(ps, Ngt("`%ls' is missing"), L"]");
1349+ }
1350+ }
1351+
1352+ /* parse PT_COLON */
1353+ // maybe_line_continuations(ps, ps->index); // already called above
1354+ if (ps->src.contents[ps->index] == L':') {
1355+ pe->pe_type |= PT_COLON;
1356+ maybe_line_continuations(ps, ++ps->index);
1357+ }
1358+
1359+ /* parse '-', '+', '#', etc. */
1360+ // maybe_line_continuations(ps, ps->index); // already called above
1361+ switch (ps->src.contents[ps->index]) {
1362+ case L'-': pe->pe_type |= PT_MINUS; goto parse_subst;
1363+ case L'+': pe->pe_type |= PT_PLUS; goto parse_subst;
1364+ case L'=': pe->pe_type |= PT_ASSIGN; goto parse_subst;
1365+ case L'?': pe->pe_type |= PT_ERROR; goto parse_subst;
1366+ case L'#': pe->pe_type |= PT_MATCH | PT_MATCHHEAD; goto parse_match;
1367+ case L'%': pe->pe_type |= PT_MATCH | PT_MATCHTAIL; goto parse_match;
1368+ case L'/':
1369+ if (posixly_correct)
1370+ serror(ps, Ngt("invalid character `%lc' in parameter expansion"),
1371+ (wint_t) L'/');
1372+ pe->pe_type |= PT_SUBST | PT_MATCHLONGEST;
1373+ goto parse_match;
1374+ case L'\0':
1375+ case L'\n':
1376+ case L'}':
1377+ pe->pe_type |= PT_NONE;
1378+ if (pe->pe_type & PT_COLON)
1379+ serror(ps, Ngt("invalid use of `%lc' in parameter expansion"),
1380+ (wint_t) L':');
1381+ goto check_closing_brace;
1382+ default:
1383+ serror(ps, Ngt("invalid character `%lc' in parameter expansion"),
1384+ (wint_t) ps->src.contents[ps->index]);
1385+ goto end;
1386+ }
1387+
1388+parse_match:
1389+ maybe_line_continuations(ps, ps->index + 1);
1390+ if (pe->pe_type & PT_COLON) {
1391+ if ((pe->pe_type & PT_MASK) == PT_SUBST)
1392+ pe->pe_type |= PT_MATCHHEAD | PT_MATCHTAIL;
1393+ else
1394+ serror(ps, Ngt("invalid use of `%lc' in parameter expansion"),
1395+ (wint_t) L':');
1396+ maybe_line_continuations(ps, ++ps->index);
1397+ } else if (ps->src.contents[ps->index] ==
1398+ ps->src.contents[ps->index + 1]) {
1399+ if ((pe->pe_type & PT_MASK) == PT_MATCH)
1400+ pe->pe_type |= PT_MATCHLONGEST;
1401+ else
1402+ pe->pe_type |= PT_SUBSTALL;
1403+ ps->index += 2;
1404+ } else if (ps->src.contents[ps->index] == L'/') {
1405+ if (ps->src.contents[ps->index + 1] == L'#') {
1406+ pe->pe_type |= PT_MATCHHEAD;
1407+ ps->index += 2;
1408+ } else if (ps->src.contents[ps->index + 1] == L'%') {
1409+ pe->pe_type |= PT_MATCHTAIL;
1410+ ps->index += 2;
1411+ } else {
1412+ ps->index += 1;
1413+ }
1414+ } else {
1415+ ps->index += 1;
1416+ }
1417+ if ((pe->pe_type & PT_MASK) == PT_MATCH) {
1418+ pe->pe_match = parse_word(ps, is_closing_brace);
1419+ goto check_closing_brace;
1420+ } else {
1421+ pe->pe_match = parse_word(ps, is_slash_or_closing_brace);
1422+ // maybe_line_continuations(ps, ps->index); // called in parse_word
1423+ if (ps->src.contents[ps->index] != L'/')
1424+ goto check_closing_brace;
1425+ }
1426+
1427+parse_subst:
1428+ ps->index++;
1429+ pe->pe_subst = parse_word(ps, is_closing_brace);
1430+
1431+check_closing_brace:
1432+ // maybe_line_continuations(ps, ps->index); // already called above
1433+ if (ps->src.contents[ps->index] == L'}')
1434+ ps->index++;
1435+ else
1436+ serror(ps, Ngt("`%ls' is missing"), L"}");
1437+ if ((pe->pe_type & PT_NUMBER) && (pe->pe_type & PT_MASK) != PT_NONE)
1438+ serror(ps, Ngt("invalid use of `%lc' in parameter expansion"),
1439+ (wint_t) L'#');
1440+
1441+end:;
1442+ wordunit_T *result = xmalloc(sizeof *result);
1443+ result->next = NULL;
1444+ result->wu_type = WT_PARAM;
1445+ result->wu_param = pe;
1446+ return result;
1447+}
1448+
1449+/* Parses a command substitution that starts with "$(".
1450+ * When this function is called, `ps->next_index' must be just after the opening
1451+ * "(". When this function returns, `ps->index' is just after the closing ")".
1452+ */
1453+wordunit_T *parse_cmdsubst_in_paren(parsestate_T *ps)
1454+{
1455+ wordunit_T *result = xmalloc(sizeof *result);
1456+ result->next = NULL;
1457+ result->wu_type = WT_CMDSUB;
1458+ result->wu_cmdsub = extract_command_in_paren(ps);
1459+
1460+ maybe_line_continuations(ps, ps->index);
1461+ if (ps->src.contents[ps->index] == L')')
1462+ ps->index++;
1463+ else
1464+ serror(ps, Ngt("`%ls' is missing"), L")");
1465+ return result;
1466+}
1467+
1468+/* Extracts commands between '(' and ')'.
1469+ * When this function is called, `ps->next_index' must be just after the opening
1470+ * "(". When this function returns, the current token will be the closing ")".
1471+ */
1472+embedcmd_T extract_command_in_paren(parsestate_T *ps)
1473+{
1474+ plist_T save_pending_heredocs;
1475+ embedcmd_T result;
1476+
1477+ assert(ps->next_index > 0);
1478+ assert(ps->src.contents[ps->next_index - 1] == L'(');
1479+
1480+ save_pending_heredocs = ps->pending_heredocs;
1481+ pl_init(&ps->pending_heredocs);
1482+
1483+ if (posixly_correct && ps->info->enable_alias) {
1484+ result.is_preparsed = false;
1485+ result.value.unparsed = extract_command_in_paren_unparsed(ps);
1486+ } else {
1487+ next_token(ps);
1488+ result.is_preparsed = true;
1489+ result.value.preparsed = parse_compound_list(ps);
1490+ }
1491+
1492+ pl_destroy(&ps->pending_heredocs);
1493+ ps->pending_heredocs = save_pending_heredocs;
1494+
1495+ return result;
1496+}
1497+
1498+/* Parses commands between '(' and ')'.
1499+ * The current token must be the opening parenthesis L'(' when this function is
1500+ * called. The current token is advanced to the closing parenthesis L')'. */
1501+wchar_t *extract_command_in_paren_unparsed(parsestate_T *ps)
1502+{
1503+ bool save_enable_alias = ps->enable_alias;
1504+ ps->enable_alias = false;
1505+
1506+ size_t startindex = ps->next_index;
1507+ next_token(ps);
1508+ andorsfree(parse_compound_list(ps));
1509+ assert(startindex <= ps->index);
1510+
1511+ wchar_t *result = xwcsndup(
1512+ &ps->src.contents[startindex], ps->index - startindex);
1513+
1514+ ps->enable_alias = save_enable_alias;
1515+ return result;
1516+}
1517+
1518+/* Parses a command substitution enclosed by backquotes.
1519+ * When this function is called, the current position must be at the character
1520+ * that just follows the opening backquote L'`'. This function advances the
1521+ * position to the character that just follows the closing backquote L'`'.
1522+ * If `bsbq' is true, backslash-escaped backquotes are handled; otherwise, they
1523+ * are left intact. */
1524+wordunit_T *parse_cmdsubst_in_backquote(parsestate_T *ps, bool bsbq)
1525+{
1526+ xwcsbuf_T buf;
1527+ wordunit_T *result = xmalloc(sizeof *result);
1528+ result->next = NULL;
1529+ result->wu_type = WT_CMDSUB;
1530+ result->wu_cmdsub.is_preparsed = false;
1531+
1532+ assert(ps->src.contents[ps->index - 1] == L'`');
1533+ wb_init(&buf);
1534+ for (;;) {
1535+ maybe_line_continuations(ps, ps->index);
1536+ switch (ps->src.contents[ps->index]) {
1537+ case L'\0':
1538+ serror(ps,
1539+ Ngt("the backquoted command substitution is not closed"));
1540+ goto end;
1541+ case L'`':
1542+ ps->index++;
1543+ goto end;
1544+ case L'\\':
1545+ ps->index++;
1546+ switch (ps->src.contents[ps->index]) {
1547+ case L'$': case L'`': case L'\\':
1548+ goto default_;
1549+ case L'"':
1550+ if (bsbq)
1551+ goto default_;
1552+ /* falls thru! */
1553+ default:
1554+ wb_wccat(&buf, L'\\');
1555+ continue;
1556+ }
1557+ case L'\n':
1558+ ps->info->lineno++;
1559+ /* falls thru! */
1560+ default: default_:
1561+ wb_wccat(&buf, ps->src.contents[ps->index]);
1562+ ps->index++;
1563+ break;
1564+ }
1565+ }
1566+end:
1567+ result->wu_cmdsub.value.unparsed = wb_towcs(&buf);
1568+ return result;
1569+}
1570+
1571+/* Parses an arithmetic expansion.
1572+ * The current position must be at the first opening parenthesis L'(' when this
1573+ * function is called and the position is advanced to the character that just
1574+ * follows the last closing parenthesis L')'. If there is no arithmetic
1575+ * expansion, the return value is NULL and the position is not moved. */
1576+wordunit_T *tryparse_arith(parsestate_T *ps)
1577+{
1578+ size_t saveindex = ps->index;
1579+ assert(ps->src.contents[ps->index] == L'(' &&
1580+ ps->src.contents[ps->index + 1] == L'(');
1581+ ps->index += 2;
1582+
1583+ wordunit_T *first = NULL, **lastp = &first;
1584+ size_t startindex = ps->index;
1585+ int nestparen = 0;
1586+
1587+ for (;;) {
1588+ maybe_line_continuations(ps, ps->index);
1589+ switch (ps->src.contents[ps->index]) {
1590+ case L'\0':
1591+ serror(ps, Ngt("`%ls' is missing"), L"))");
1592+ goto end;
1593+ case L'\\':
1594+ if (ps->src.contents[ps->index + 1] != L'\0') {
1595+ assert(ps->src.contents[ps->index + 1] != L'\n');
1596+ ps->index += 2;
1597+ continue;
1598+ }
1599+ break;
1600+ case L'\n':
1601+ ps->info->lineno++;
1602+ break;
1603+ case L'$':
1604+ case L'`':
1605+ MAKE_WORDUNIT_STRING;
1606+ wordunit_T *wu = parse_special_word_unit(ps, false);
1607+ startindex = ps->index;
1608+ if (wu != NULL) {
1609+ *lastp = wu;
1610+ lastp = &wu->next;
1611+ continue;
1612+ } else if (ps->src.contents[ps->index] == L'\0') {
1613+ continue;
1614+ }
1615+ break;
1616+ case L'(':
1617+ nestparen++;
1618+ break;
1619+ case L')':
1620+ nestparen--;
1621+ if (nestparen >= 0)
1622+ break;
1623+ maybe_line_continuations(ps, ps->index + 1);
1624+ switch (ps->src.contents[ps->index + 1]) {
1625+ case L')':
1626+ MAKE_WORDUNIT_STRING;
1627+ ps->index += 2;
1628+ goto end;
1629+ case L'\0':
1630+ serror(ps, Ngt("`%ls' is missing"), L")");
1631+ goto end;
1632+ default:
1633+ goto not_arithmetic_expansion;
1634+ }
1635+ default:
1636+ break;
1637+ }
1638+ ps->index++;
1639+ }
1640+end:;
1641+ wordunit_T *result = xmalloc(sizeof *result);
1642+ result->next = NULL;
1643+ result->wu_type = WT_ARITH;
1644+ result->wu_arith = first;
1645+ return result;
1646+
1647+not_arithmetic_expansion:
1648+ wordfree(first);
1649+ rewind_index(ps, saveindex);
1650+ return NULL;
1651+}
1652+
1653+/***** Newline token parser *****/
1654+
9591655 /* Parses the newline token at the current position and proceeds to the next
9601656 * line. The contents of pending here-documents are read if any. The current
9611657 * token is cleared. */
@@ -988,6 +1684,8 @@
9881684 return found;
9891685 }
9901686
1687+/***** Character classifiers *****/
1688+
9911689 /* Checks if the specified character is a token separator. */
9921690 bool is_token_delimiter_char(wchar_t c)
9931691 {
@@ -1015,6 +1713,7 @@
10151713 return c == L'}';
10161714 }
10171715
1716+/***** Aliases *****/
10181717
10191718 /* Performs alias substitution with the given parse state. Proceeds to the
10201719 * next token if substitution occurred. This function does not substitute an
@@ -1046,6 +1745,8 @@
10461745 while (psubstitute_alias(ps, flags)) ;
10471746 }
10481747
1748+/***** Syntax parser functions *****/
1749+
10491750 /* Parses commands.
10501751 * If `toeol' is true, commands are parsed up to the end of the current input;
10511752 * otherwise, up to the next closing token. */
@@ -1546,585 +2247,6 @@
15462247 return result;
15472248 }
15482249
1549-/* Parses a word at the current position.
1550- * `testfunc' is a function that determines if a character is a word delimiter.
1551- * The parsing proceeds up to an unescaped character for which `testfunc'
1552- * returns false.
1553- * It is not an error if there is no characters to be a word, in which case
1554- * NULL is returned. */
1555-wordunit_T *parse_word(parsestate_T *ps, bool testfunc(wchar_t c))
1556-{
1557- wordunit_T *first = NULL, **lastp = &first;
1558- bool indq = false; /* in double quotes? */
1559- size_t startindex = ps->index;
1560-
1561-/* appends the substring from `startindex' to `index' as a new word unit
1562- * to `*lastp' */
1563-#define MAKE_WORDUNIT_STRING \
1564- do { \
1565- size_t len = ps->index - startindex; \
1566- if (len > 0) { \
1567- wordunit_T *w = xmalloc(sizeof *w); \
1568- w->next = NULL; \
1569- w->wu_type = WT_STRING; \
1570- w->wu_string = xwcsndup(&ps->src.contents[startindex], len); \
1571- *lastp = w; \
1572- lastp = &w->next; \
1573- } \
1574- } while (0)
1575-
1576- while (maybe_line_continuations(ps, ps->index),
1577- indq || !testfunc(ps->src.contents[ps->index])) {
1578-
1579- switch (ps->src.contents[ps->index]) {
1580- case L'\0':
1581- goto done; // reached EOF
1582- case L'\\':
1583- if (ps->src.contents[ps->index + 1] != L'\0') {
1584- assert(ps->src.contents[ps->index + 1] != L'\n');
1585- ps->index += 2;
1586- continue;
1587- }
1588- break;
1589- case L'\n':
1590- ps->info->lineno++;
1591- break;
1592- case L'$':
1593- case L'`':
1594- MAKE_WORDUNIT_STRING;
1595- wordunit_T *wu = parse_special_word_unit(ps, indq);
1596- startindex = ps->index;
1597- if (wu != NULL) {
1598- *lastp = wu;
1599- lastp = &wu->next;
1600- continue;
1601- } else if (ps->src.contents[ps->index] == L'\0') {
1602- continue;
1603- }
1604- break;
1605- case L'\'':
1606- if (!indq) {
1607- ps->index++;
1608- skip_to_next_single_quote(ps);
1609- if (ps->src.contents[ps->index] == L'\'')
1610- ps->index++;
1611- continue;
1612- }
1613- break;
1614- case L'"':
1615- indq = !indq;
1616- /* falls thru! */
1617- default:
1618- break;
1619- }
1620- ps->index++;
1621- }
1622-done:
1623- MAKE_WORDUNIT_STRING;
1624-
1625- if (indq)
1626- serror(ps, Ngt("the double quotation is not closed"));
1627-
1628- return first;
1629-}
1630-
1631-/* Skips to the next single quote.
1632- * If the current position is already at a single quote, the position is not
1633- * moved.
1634- * It is an error if there is no single quote before the end of file. */
1635-void skip_to_next_single_quote(parsestate_T *ps)
1636-{
1637- for (;;) {
1638- switch (ps->src.contents[ps->index]) {
1639- case L'\'':
1640- return;
1641- case L'\0':
1642- if (read_more_input(ps) != INPUT_OK) {
1643- serror(ps, Ngt("the single quotation is not closed"));
1644- return;
1645- }
1646- continue;
1647- case L'\n':
1648- ps->info->lineno++;
1649- break;
1650- default:
1651- break;
1652- }
1653- ps->index++;
1654- }
1655-}
1656-
1657-/* Parses a parameter expansion or command substitution that starts with '$' or
1658- * '`'. The character at the current position must be '$' or '`' when this
1659- * function is called and the position is advanced to right after the expansion
1660- * or substitution.
1661- * If the character at the current position is '$' but it is not an expansion,
1662- * the position is not moved and the return value is NULL. Otherwise, The
1663- * position is advanced by at least one character.
1664- * Between double quotes, `indq' must be true. */
1665-wordunit_T *parse_special_word_unit(parsestate_T *ps, bool indq)
1666-{
1667- switch (ps->src.contents[ps->index++]) {
1668- case L'$':
1669- maybe_line_continuations(ps, ps->index);
1670- switch (ps->src.contents[ps->index]) {
1671- case L'{':
1672- return parse_paramexp_in_brace(ps);
1673- case L'(':
1674- maybe_line_continuations(ps, ps->index + 1);
1675- if (ps->src.contents[ps->index + 1] == L'(') {
1676- wordunit_T *wu = tryparse_arith(ps);
1677- if (wu != NULL)
1678- return wu;
1679- }
1680- ps->next_index = ps->index + 1;
1681- return parse_cmdsubst_in_paren(ps);
1682- default:
1683- return tryparse_paramexp_raw(ps);
1684- }
1685- case L'`':
1686- return parse_cmdsubst_in_backquote(ps, indq);
1687- default:
1688- assert(false);
1689- }
1690-}
1691-
1692-/* Parses a parameter that is not enclosed by { }.
1693- * The current position must be at the first character of the parameter name
1694- * that follows L'$'. The position is advanced to right after the name.
1695- * If there is no parameter, the position is put back to L'$'. */
1696-wordunit_T *tryparse_paramexp_raw(parsestate_T *ps)
1697-{
1698- paramexp_T *pe;
1699- size_t namelen; /* parameter name length */
1700-
1701- maybe_line_continuations(ps, ps->index);
1702- switch (ps->src.contents[ps->index]) {
1703- case L'@': case L'*': case L'#': case L'?':
1704- case L'-': case L'$': case L'!':
1705- namelen = 1;
1706- goto success;
1707- }
1708- if (!is_portable_name_char(ps->src.contents[ps->index]))
1709- goto error;
1710- if (iswdigit(ps->src.contents[ps->index]))
1711- namelen = 1;
1712- else
1713- namelen = count_name_length(ps, is_portable_name_char);
1714-
1715-success:
1716- pe = xmalloc(sizeof *pe);
1717- pe->pe_type = PT_NONE;
1718- pe->pe_name = xwcsndup(&ps->src.contents[ps->index], namelen);
1719- pe->pe_start = pe->pe_end = pe->pe_match = pe->pe_subst = NULL;
1720-
1721- wordunit_T *result = xmalloc(sizeof *result);
1722- result->next = NULL;
1723- result->wu_type = WT_PARAM;
1724- result->wu_param = pe;
1725- ps->index += namelen;
1726- return result;
1727-
1728-error:
1729- ps->index--;
1730- assert(ps->src.contents[ps->index] == L'$');
1731- return NULL;
1732-}
1733-
1734-/* Parses a parameter expansion that starts with "${".
1735- * The current position must be at the opening brace L'{' when this function is
1736- * called and the position is advanced to the closing brace L'}'. */
1737-wordunit_T *parse_paramexp_in_brace(parsestate_T *ps)
1738-{
1739- paramexp_T *pe = xmalloc(sizeof *pe);
1740- pe->pe_type = 0;
1741- pe->pe_name = NULL;
1742- pe->pe_start = pe->pe_end = pe->pe_match = pe->pe_subst = NULL;
1743-
1744- assert(ps->src.contents[ps->index] == L'{');
1745- ps->index++;
1746-
1747- /* parse PT_NUMBER */
1748- maybe_line_continuations(ps, ps->index);
1749- if (ps->src.contents[ps->index] == L'#') {
1750- maybe_line_continuations(ps, ps->index + 1);
1751- switch (ps->src.contents[ps->index + 1]) {
1752- case L'\0': case L'}':
1753- case L'+': case L'=': case L':': case L'/': case L'%':
1754- break;
1755- case L'-': case L'?': case L'#':
1756- maybe_line_continuations(ps, ps->index + 2);
1757- if (ps->src.contents[ps->index + 2] != L'}')
1758- break;
1759- /* falls thru! */
1760- default:
1761- pe->pe_type |= PT_NUMBER;
1762- ps->index++;
1763- break;
1764- }
1765- }
1766-
1767- /* parse nested expansion */
1768- // maybe_line_continuations(ps, ps->index); // already called above
1769- if (!posixly_correct && ps->src.contents[ps->index] == L'{') {
1770- pe->pe_type |= PT_NEST;
1771- pe->pe_nest = parse_paramexp_in_brace(ps);
1772- } else if (!posixly_correct
1773- && (ps->src.contents[ps->index] == L'`'
1774- || (ps->src.contents[ps->index] == L'$'
1775- && (maybe_line_continuations(ps, ps->index + 1),
1776- ps->src.contents[ps->index + 1] == L'{'
1777- || ps->src.contents[ps->index + 1] == L'(')))) {
1778- size_t neststartindex = ps->index;
1779- pe->pe_nest = parse_special_word_unit(ps, false);
1780- if (ps->index == neststartindex)
1781- goto parse_name;
1782- pe->pe_type |= PT_NEST;
1783- maybe_line_continuations(ps, ps->index);
1784- } else {
1785-parse_name:;
1786- /* no nesting: parse parameter name normally */
1787- size_t namestartindex = ps->index;
1788- switch (ps->src.contents[ps->index]) {
1789- case L'@': case L'*': case L'#': case L'?':
1790- case L'-': case L'$': case L'!':
1791- ps->index++;
1792- break;
1793- default:
1794- while (maybe_line_continuations(ps, ps->index),
1795- is_name_char(ps->src.contents[ps->index]))
1796- ps->index++;
1797- break;
1798- }
1799- size_t namelen = ps->index - namestartindex;
1800- if (namelen == 0) {
1801- serror(ps, Ngt("the parameter name is missing or invalid"));
1802- goto end;
1803- }
1804- pe->pe_name = xwcsndup(&ps->src.contents[namestartindex], namelen);
1805- }
1806-
1807- /* parse indices */
1808- // maybe_line_continuations(ps, ps->index); // already called above
1809- if (!posixly_correct && ps->src.contents[ps->index] == L'[') {
1810- ps->index++;
1811- pe->pe_start = parse_word(ps, is_comma_or_closing_bracket);
1812- if (pe->pe_start == NULL)
1813- serror(ps, Ngt("the index is missing"));
1814- if (ps->src.contents[ps->index] == L',') {
1815- ps->index++;
1816- pe->pe_end = parse_word(ps, is_comma_or_closing_bracket);
1817- if (pe->pe_end == NULL)
1818- serror(ps, Ngt("the index is missing"));
1819- }
1820- if (ps->src.contents[ps->index] == L']') {
1821- maybe_line_continuations(ps, ++ps->index);
1822- } else {
1823- serror(ps, Ngt("`%ls' is missing"), L"]");
1824- }
1825- }
1826-
1827- /* parse PT_COLON */
1828- // maybe_line_continuations(ps, ps->index); // already called above
1829- if (ps->src.contents[ps->index] == L':') {
1830- pe->pe_type |= PT_COLON;
1831- maybe_line_continuations(ps, ++ps->index);
1832- }
1833-
1834- /* parse '-', '+', '#', etc. */
1835- // maybe_line_continuations(ps, ps->index); // already called above
1836- switch (ps->src.contents[ps->index]) {
1837- case L'-': pe->pe_type |= PT_MINUS; goto parse_subst;
1838- case L'+': pe->pe_type |= PT_PLUS; goto parse_subst;
1839- case L'=': pe->pe_type |= PT_ASSIGN; goto parse_subst;
1840- case L'?': pe->pe_type |= PT_ERROR; goto parse_subst;
1841- case L'#': pe->pe_type |= PT_MATCH | PT_MATCHHEAD; goto parse_match;
1842- case L'%': pe->pe_type |= PT_MATCH | PT_MATCHTAIL; goto parse_match;
1843- case L'/':
1844- if (posixly_correct)
1845- serror(ps, Ngt("invalid character `%lc' in parameter expansion"),
1846- (wint_t) L'/');
1847- pe->pe_type |= PT_SUBST | PT_MATCHLONGEST;
1848- goto parse_match;
1849- case L'\0':
1850- case L'\n':
1851- case L'}':
1852- pe->pe_type |= PT_NONE;
1853- if (pe->pe_type & PT_COLON)
1854- serror(ps, Ngt("invalid use of `%lc' in parameter expansion"),
1855- (wint_t) L':');
1856- goto check_closing_brace;
1857- default:
1858- serror(ps, Ngt("invalid character `%lc' in parameter expansion"),
1859- (wint_t) ps->src.contents[ps->index]);
1860- goto end;
1861- }
1862-
1863-parse_match:
1864- maybe_line_continuations(ps, ps->index + 1);
1865- if (pe->pe_type & PT_COLON) {
1866- if ((pe->pe_type & PT_MASK) == PT_SUBST)
1867- pe->pe_type |= PT_MATCHHEAD | PT_MATCHTAIL;
1868- else
1869- serror(ps, Ngt("invalid use of `%lc' in parameter expansion"),
1870- (wint_t) L':');
1871- maybe_line_continuations(ps, ++ps->index);
1872- } else if (ps->src.contents[ps->index] ==
1873- ps->src.contents[ps->index + 1]) {
1874- if ((pe->pe_type & PT_MASK) == PT_MATCH)
1875- pe->pe_type |= PT_MATCHLONGEST;
1876- else
1877- pe->pe_type |= PT_SUBSTALL;
1878- ps->index += 2;
1879- } else if (ps->src.contents[ps->index] == L'/') {
1880- if (ps->src.contents[ps->index + 1] == L'#') {
1881- pe->pe_type |= PT_MATCHHEAD;
1882- ps->index += 2;
1883- } else if (ps->src.contents[ps->index + 1] == L'%') {
1884- pe->pe_type |= PT_MATCHTAIL;
1885- ps->index += 2;
1886- } else {
1887- ps->index += 1;
1888- }
1889- } else {
1890- ps->index += 1;
1891- }
1892- if ((pe->pe_type & PT_MASK) == PT_MATCH) {
1893- pe->pe_match = parse_word(ps, is_closing_brace);
1894- goto check_closing_brace;
1895- } else {
1896- pe->pe_match = parse_word(ps, is_slash_or_closing_brace);
1897- // maybe_line_continuations(ps, ps->index); // called in parse_word
1898- if (ps->src.contents[ps->index] != L'/')
1899- goto check_closing_brace;
1900- }
1901-
1902-parse_subst:
1903- ps->index++;
1904- pe->pe_subst = parse_word(ps, is_closing_brace);
1905-
1906-check_closing_brace:
1907- // maybe_line_continuations(ps, ps->index); // already called above
1908- if (ps->src.contents[ps->index] == L'}')
1909- ps->index++;
1910- else
1911- serror(ps, Ngt("`%ls' is missing"), L"}");
1912- if ((pe->pe_type & PT_NUMBER) && (pe->pe_type & PT_MASK) != PT_NONE)
1913- serror(ps, Ngt("invalid use of `%lc' in parameter expansion"),
1914- (wint_t) L'#');
1915-
1916-end:;
1917- wordunit_T *result = xmalloc(sizeof *result);
1918- result->next = NULL;
1919- result->wu_type = WT_PARAM;
1920- result->wu_param = pe;
1921- return result;
1922-}
1923-
1924-/* Parses a command substitution that starts with "$(".
1925- * When this function is called, `ps->next_index' must be just after the opening
1926- * "(". When this function returns, `ps->index' is just after the closing ")".
1927- */
1928-wordunit_T *parse_cmdsubst_in_paren(parsestate_T *ps)
1929-{
1930- wordunit_T *result = xmalloc(sizeof *result);
1931- result->next = NULL;
1932- result->wu_type = WT_CMDSUB;
1933- result->wu_cmdsub = extract_command_in_paren(ps);
1934-
1935- maybe_line_continuations(ps, ps->index);
1936- if (ps->src.contents[ps->index] == L')')
1937- ps->index++;
1938- else
1939- serror(ps, Ngt("`%ls' is missing"), L")");
1940- return result;
1941-}
1942-
1943-/* Extracts commands between '(' and ')'.
1944- * When this function is called, `ps->next_index' must be just after the opening
1945- * "(". When this function returns, the current token will be the closing ")".
1946- */
1947-embedcmd_T extract_command_in_paren(parsestate_T *ps)
1948-{
1949- plist_T save_pending_heredocs;
1950- embedcmd_T result;
1951-
1952- assert(ps->next_index > 0);
1953- assert(ps->src.contents[ps->next_index - 1] == L'(');
1954-
1955- save_pending_heredocs = ps->pending_heredocs;
1956- pl_init(&ps->pending_heredocs);
1957-
1958- if (posixly_correct && ps->info->enable_alias) {
1959- result.is_preparsed = false;
1960- result.value.unparsed = extract_command_in_paren_unparsed(ps);
1961- } else {
1962- next_token(ps);
1963- result.is_preparsed = true;
1964- result.value.preparsed = parse_compound_list(ps);
1965- }
1966-
1967- pl_destroy(&ps->pending_heredocs);
1968- ps->pending_heredocs = save_pending_heredocs;
1969-
1970- return result;
1971-}
1972-
1973-/* Parses commands between '(' and ')'.
1974- * The current token must be the opening parenthesis L'(' when this function is
1975- * called. The current token is advanced to the closing parenthesis L')'. */
1976-wchar_t *extract_command_in_paren_unparsed(parsestate_T *ps)
1977-{
1978- bool save_enable_alias = ps->enable_alias;
1979- ps->enable_alias = false;
1980-
1981- size_t startindex = ps->next_index;
1982- next_token(ps);
1983- andorsfree(parse_compound_list(ps));
1984- assert(startindex <= ps->index);
1985-
1986- wchar_t *result = xwcsndup(
1987- &ps->src.contents[startindex], ps->index - startindex);
1988-
1989- ps->enable_alias = save_enable_alias;
1990- return result;
1991-}
1992-
1993-/* Parses a command substitution enclosed by backquotes.
1994- * When this function is called, the current position must be at the character
1995- * that just follows the opening backquote L'`'. This function advances the
1996- * position to the character that just follows the closing backquote L'`'.
1997- * If `bsbq' is true, backslash-escaped backquotes are handled; otherwise, they
1998- * are left intact. */
1999-wordunit_T *parse_cmdsubst_in_backquote(parsestate_T *ps, bool bsbq)
2000-{
2001- xwcsbuf_T buf;
2002- wordunit_T *result = xmalloc(sizeof *result);
2003- result->next = NULL;
2004- result->wu_type = WT_CMDSUB;
2005- result->wu_cmdsub.is_preparsed = false;
2006-
2007- assert(ps->src.contents[ps->index - 1] == L'`');
2008- wb_init(&buf);
2009- for (;;) {
2010- maybe_line_continuations(ps, ps->index);
2011- switch (ps->src.contents[ps->index]) {
2012- case L'\0':
2013- serror(ps,
2014- Ngt("the backquoted command substitution is not closed"));
2015- goto end;
2016- case L'`':
2017- ps->index++;
2018- goto end;
2019- case L'\\':
2020- ps->index++;
2021- switch (ps->src.contents[ps->index]) {
2022- case L'$': case L'`': case L'\\':
2023- goto default_;
2024- case L'"':
2025- if (bsbq)
2026- goto default_;
2027- /* falls thru! */
2028- default:
2029- wb_wccat(&buf, L'\\');
2030- continue;
2031- }
2032- case L'\n':
2033- ps->info->lineno++;
2034- /* falls thru! */
2035- default: default_:
2036- wb_wccat(&buf, ps->src.contents[ps->index]);
2037- ps->index++;
2038- break;
2039- }
2040- }
2041-end:
2042- result->wu_cmdsub.value.unparsed = wb_towcs(&buf);
2043- return result;
2044-}
2045-
2046-/* Parses an arithmetic expansion.
2047- * The current position must be at the first opening parenthesis L'(' when this
2048- * function is called and the position is advanced to the character that just
2049- * follows the last closing parenthesis L')'. If there is no arithmetic
2050- * expansion, the return value is NULL and the position is not moved. */
2051-wordunit_T *tryparse_arith(parsestate_T *ps)
2052-{
2053- size_t saveindex = ps->index;
2054- assert(ps->src.contents[ps->index] == L'(' &&
2055- ps->src.contents[ps->index + 1] == L'(');
2056- ps->index += 2;
2057-
2058- wordunit_T *first = NULL, **lastp = &first;
2059- size_t startindex = ps->index;
2060- int nestparen = 0;
2061-
2062- for (;;) {
2063- maybe_line_continuations(ps, ps->index);
2064- switch (ps->src.contents[ps->index]) {
2065- case L'\0':
2066- serror(ps, Ngt("`%ls' is missing"), L"))");
2067- goto end;
2068- case L'\\':
2069- if (ps->src.contents[ps->index + 1] != L'\0') {
2070- assert(ps->src.contents[ps->index + 1] != L'\n');
2071- ps->index += 2;
2072- continue;
2073- }
2074- break;
2075- case L'\n':
2076- ps->info->lineno++;
2077- break;
2078- case L'$':
2079- case L'`':
2080- MAKE_WORDUNIT_STRING;
2081- wordunit_T *wu = parse_special_word_unit(ps, false);
2082- startindex = ps->index;
2083- if (wu != NULL) {
2084- *lastp = wu;
2085- lastp = &wu->next;
2086- continue;
2087- } else if (ps->src.contents[ps->index] == L'\0') {
2088- continue;
2089- }
2090- break;
2091- case L'(':
2092- nestparen++;
2093- break;
2094- case L')':
2095- nestparen--;
2096- if (nestparen >= 0)
2097- break;
2098- maybe_line_continuations(ps, ps->index + 1);
2099- switch (ps->src.contents[ps->index + 1]) {
2100- case L')':
2101- MAKE_WORDUNIT_STRING;
2102- ps->index += 2;
2103- goto end;
2104- case L'\0':
2105- serror(ps, Ngt("`%ls' is missing"), L")");
2106- goto end;
2107- default:
2108- goto not_arithmetic_expansion;
2109- }
2110- default:
2111- break;
2112- }
2113- ps->index++;
2114- }
2115-end:;
2116- wordunit_T *result = xmalloc(sizeof *result);
2117- result->next = NULL;
2118- result->wu_type = WT_ARITH;
2119- result->wu_arith = first;
2120- return result;
2121-
2122-not_arithmetic_expansion:
2123- wordfree(first);
2124- rewind_index(ps, saveindex);
2125- return NULL;
2126-}
2127-
21282250 /* Parses a compound command.
21292251 * `command' is the name of the command to parse such as "(" and "if".
21302252 * Returns NULL iff the current token does not start a compound command. */
@@ -2596,6 +2718,8 @@
25962718 return c;
25972719 }
25982720
2721+/***** Here-document contents *****/
2722+
25992723 /* Reads the contents of a here-document. */
26002724 void read_heredoc_contents(parsestate_T *ps, redir_T *r)
26012725 {
@@ -2779,110 +2903,7 @@
27792903 return lastp;
27802904 }
27812905
2782-/* Parses a string recognizing parameter expansions, command substitutions of
2783- * the form "$(...)" and arithmetic expansions.
2784- * All the members of `info' except `lastinputresult' must have been initialized
2785- * beforehand.
2786- * This function reads and parses the input to the end of file.
2787- * Iff successful, the result is assigned to `*resultp' and true is returned.
2788- * If the input is empty, NULL is assigned.
2789- * On error, the value of `*resultp' is undefined. */
2790-bool parse_string(parseparam_T *info, wordunit_T **restrict resultp)
2791-{
2792- parsestate_T ps = {
2793- .info = info,
2794- .error = false,
2795- .index = 0,
2796- .next_index = 0,
2797- .tokentype = TT_UNKNOWN,
2798- .token = NULL,
2799- .enable_alias = false,
2800- .reparse = false,
2801- .aliases = NULL,
2802- };
2803- wb_init(&ps.src);
28042906
2805- ps.info->lastinputresult = INPUT_OK;
2806- read_more_input(&ps);
2807- pl_init(&ps.pending_heredocs);
2808-
2809- resultp = parse_string_without_quotes(&ps, false, false, resultp);
2810- *resultp = NULL;
2811-
2812- wb_destroy(&ps.src);
2813- pl_destroy(&ps.pending_heredocs);
2814- assert(ps.aliases == NULL);
2815- //destroy_aliaslist(ps.aliases);
2816- wordfree(ps.token);
2817-
2818- if (ps.info->lastinputresult != INPUT_EOF || ps.error) {
2819- wordfree(*resultp);
2820- return false;
2821- } else {
2822- return true;
2823- }
2824-}
2825-
2826-
2827-/***** Auxiliaries about Error Messages *****/
2828-
2829-const char *get_errmsg_unexpected_tokentype(tokentype_T tokentype)
2830-{
2831- switch (tokentype) {
2832- case TT_RPAREN:
2833- return Ngt("encountered `%ls' without a matching `('");
2834- case TT_RBRACE:
2835- return Ngt("encountered `%ls' without a matching `{'");
2836- case TT_DOUBLE_SEMICOLON:
2837- return Ngt("`%ls' is used outside `case'");
2838- case TT_BANG:
2839- return Ngt("`%ls' cannot be used as a command name");
2840- case TT_IN:
2841- return Ngt("`%ls' cannot be used as a command name");
2842- case TT_FI:
2843- return Ngt("encountered `%ls' "
2844- "without a matching `if' and/or `then'");
2845- case TT_THEN:
2846- return Ngt("encountered `%ls' without a matching `if' or `elif'");
2847- case TT_DO:
2848- return Ngt("encountered `%ls' "
2849- "without a matching `for', `while', or `until'");
2850- case TT_DONE:
2851- return Ngt("encountered `%ls' without a matching `do'");
2852- case TT_ESAC:
2853- return Ngt("encountered `%ls' without a matching `case'");
2854- case TT_ELIF:
2855- case TT_ELSE:
2856- return Ngt("encountered `%ls' "
2857- "without a matching `if' and/or `then'");
2858- default:
2859- assert(false);
2860- }
2861-}
2862-
2863-void print_errmsg_token_unexpected(parsestate_T *ps)
2864-{
2865- assert(ps->index <= ps->next_index);
2866- size_t length = ps->next_index - ps->index;
2867- wchar_t token[length + 1];
2868- wcsncpy(token, &ps->src.contents[ps->index], length);
2869- token[length] = L'\0';
2870-
2871- const char *message = get_errmsg_unexpected_tokentype(ps->tokentype);
2872- serror(ps, message, token);
2873-}
2874-
2875-void print_errmsg_token_missing(parsestate_T *ps, const wchar_t *t)
2876-{
2877- if (is_closing_tokentype(ps->tokentype)) {
2878- print_errmsg_token_unexpected(ps);
2879- serror(ps, Ngt("(maybe you missed `%ls'?)"), t);
2880- } else {
2881- serror(ps, Ngt("`%ls' is missing"), t);
2882- }
2883-}
2884-
2885-
28862907 /********** Functions that Convert Parse Trees into Strings **********/
28872908
28882909 struct print {
Show on old repository browser