• R/O
  • SSH
  • HTTPS

yash: Commit


Commit MetaInfo

Revision4151 (tree)
Time2020-12-02 00:16:04
Authormagicant

Log Message

Allow parentheses and vertical bars in regex (#39094, #39449)

Change Summary

Incremental Difference

--- yash/trunk/NEWS (revision 4150)
+++ yash/trunk/NEWS (revision 4151)
@@ -45,6 +45,8 @@
4545 character.
4646 * When there are no positional parameters, the nested expansion
4747 "${{@}}" now expands to nothing rather than one empty field.
48+ * Unquoted parentheses and vertical bars now can be used in a
49+ regular expression in the "[[ word =~ regex ]]" syntax.
4850
4951 ----------------------------------------------------------------------
5052 Yash 2.50
--- yash/trunk/parser.c (revision 4150)
+++ yash/trunk/parser.c (revision 4151)
@@ -681,6 +681,8 @@
681681 __attribute__((nonnull,malloc,warn_unused_result));
682682 static wordunit_T *parse_double_bracket_operand(parsestate_T *ps)
683683 __attribute__((nonnull,malloc,warn_unused_result));
684+static wordunit_T *parse_double_bracket_operand_regex(parsestate_T *ps)
685+ __attribute__((nonnull,malloc,warn_unused_result));
684686 #endif /* YASH_ENABLE_DOUBLE_BRACKET */
685687 static command_T *parse_function(parsestate_T *ps)
686688 __attribute__((nonnull,malloc,warn_unused_result));
@@ -2849,6 +2851,7 @@
28492851 dbexptype_T type;
28502852 wchar_t *op;
28512853 wordunit_T *lhs, *rhs;
2854+ bool rhs_regex = false;
28522855
28532856 if (is_single_string_word(ps->token) &&
28542857 is_unary_primary(ps->token->wu_string)) {
@@ -2867,6 +2870,8 @@
28672870 } else if (is_single_string_word(ps->token) &&
28682871 is_binary_primary(ps->token->wu_string)) {
28692872 type = DBE_BINARY;
2873+ if (wcscmp(ps->token->wu_string, L"=~") == 0)
2874+ rhs_regex = true;
28702875 parse_primary_operator:
28712876 op = ps->token->wu_string, ps->token->wu_string = NULL;
28722877 } else {
@@ -2878,7 +2883,10 @@
28782883 next_token(ps);
28792884 psubstitute_alias_recursive(ps, 0);
28802885
2881- rhs = parse_double_bracket_operand(ps);
2886+ if (rhs_regex)
2887+ rhs = parse_double_bracket_operand_regex(ps);
2888+ else
2889+ rhs = parse_double_bracket_operand(ps);
28822890
28832891 return_result:;
28842892 dbexp_T *result = xmalloc(sizeof *result);
@@ -2889,7 +2897,7 @@
28892897 return result;
28902898 }
28912899
2892-/* Parses a operand token of a primary conditional expression in the double-
2900+/* Parses an operand token of a primary conditional expression in the double-
28932901 * bracket command. Returns NULL on error. */
28942902 wordunit_T *parse_double_bracket_operand(parsestate_T *ps)
28952903 {
@@ -2916,6 +2924,107 @@
29162924 return result;
29172925 }
29182926
2927+/* Parses the right-hand-side operand of a "=~" binary operator. Returns NULL on
2928+ * error. */
2929+wordunit_T *parse_double_bracket_operand_regex(parsestate_T *ps)
2930+{
2931+ /* Unlike a normal word token, this operand token is special because
2932+ * vertical bars (`|') and parentheses (`(' and `)') can appear in the token
2933+ * as if they were normal word characters. Additionally, parentheses can
2934+ * even nest! */
2935+
2936+ size_t grandstartindex = ps->index;
2937+
2938+ /* So, before calling this function, the current token must have been parsed
2939+ * as usual by the `next_token' function. We start by examining the current
2940+ * status to decide how we continue parsing possible remainder of the token.
2941+ */
2942+ if (ps->token == NULL) {
2943+ switch (ps->src.contents[ps->index]) {
2944+ case L'(':
2945+ case L'|':
2946+ /* The current token begins with a character that should be
2947+ * parsed as part of the token. */
2948+ break;
2949+ default:
2950+ /* The current token is a normal operator. */
2951+ return parse_double_bracket_operand(ps);
2952+ }
2953+ } else {
2954+ ps->index = ps->next_index;
2955+ }
2956+
2957+ /* Find the end of the result from the previous `next_token' call. */
2958+ wordunit_T **lastp = &ps->token;
2959+ while (*lastp != NULL)
2960+ lastp = &(*lastp)->next;
2961+
2962+ /* Now parse the remainder of the token. */
2963+ int nestparen = 0;
2964+ size_t startindex = ps->index;
2965+ for (;;) {
2966+ maybe_line_continuations(ps, ps->index);
2967+ switch (ps->src.contents[ps->index]) {
2968+ case L'\0':
2969+ serror(ps, Ngt("`%ls' is missing"), L"]]");
2970+ goto end;
2971+ case L'\\':
2972+ if (ps->src.contents[ps->index + 1] != L'\0') {
2973+ assert(ps->src.contents[ps->index + 1] != L'\n');
2974+ ps->index += 2;
2975+ continue;
2976+ }
2977+ break;
2978+ case L'$':
2979+ case L'`':
2980+ MAKE_WORDUNIT_STRING;
2981+ wordunit_T *wu = parse_special_word_unit(ps, false);
2982+ startindex = ps->index;
2983+ if (wu != NULL) {
2984+ *lastp = wu;
2985+ lastp = &wu->next;
2986+ continue;
2987+ } else if (ps->src.contents[ps->index] == L'\0') {
2988+ continue;
2989+ }
2990+ break;
2991+ case L'(':
2992+ nestparen++;
2993+ break;
2994+ case L')':
2995+ if (nestparen == 0) {
2996+ serror(ps, Ngt("encountered `%ls' without a matching `('"),
2997+ L")");
2998+ goto end;
2999+ }
3000+ nestparen--;
3001+ break;
3002+ case L'|':
3003+ break;
3004+ case L'\n':
3005+ if (nestparen == 0)
3006+ goto end;
3007+ ps->info->lineno++;
3008+ break;
3009+ default:
3010+ if (nestparen > 0)
3011+ break;
3012+ if (is_token_delimiter_char(ps->src.contents[ps->index]))
3013+ goto end;
3014+ break;
3015+ }
3016+ ps->index++;
3017+ }
3018+
3019+end:;
3020+ MAKE_WORDUNIT_STRING;
3021+ ps->next_index = ps->index;
3022+ ps->index = grandstartindex;
3023+ if (ps->token != NULL)
3024+ ps->tokentype = TT_WORD;
3025+ return parse_double_bracket_operand(ps);
3026+}
3027+
29193028 #endif /* YASH_ENABLE_DOUBLE_BRACKET */
29203029
29213030 /* Parses a function definition that starts with the "function" keyword.
Show on old repository browser