Develop and Download Open Source Software

Browse CVS Repository

Contents of /netruby/netruby/Scanner.cs

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1.1.1 - (show annotations) (download) (vendor branch)
Mon Apr 8 13:29:58 2002 UTC (22 years ago) by arton
Branch: MAIN, vendor
CVS Tags: start, HEAD
Changes since 1.1: +0 -0 lines
initial version 0.8

1 /*
2 Copyright(C) 2001-2002 arton
3
4 Permission is granted for use, copying, modification, distribution,
5 and distribution of modified versions of this work as long as the
6 above copyright notice is included.
7 */
8
9 using System;
10 using System.Text;
11 using System.Collections;
12 using System.IO;
13 using System.Reflection;
14 using System.Text.RegularExpressions;
15
16 namespace arton.NETRuby
17 {
18 public enum EXPR
19 {
20 BEG, /* ignore newline, +/- is a sign. */
21 END, /* newline significant, +/- is a operator. */
22 ARG, /* newline significant, +/- is a operator. */
23 MID, /* newline significant, +/- is a operator. */
24 FNAME, /* ignore newline, no reserved words. */
25 DOT, /* right after `.' or `::', no reserved words. */
26 CLASS, /* immediate after `class', no here document. */
27 }
28
29 enum RE_OPTION
30 {
31 IGNORECASE = 1,
32 EXTENDED = 2,
33 MULTILINE = 4,
34 SINGLELINE = 8,
35 POSIXLINE = 16,
36 LONGEST = 32,
37 MAY_IGNORECASE = 64,
38 OPTIMIZE_ANCHOR = 128,
39 OPTIMIZE_EXACTN = 256,
40 OPTIMIZE_NO_BM = 512,
41 OPTIMIZE_BMATCH = 1024,
42 }
43
44 namespace yyParser
45 {
46
47 public class Scanner : yyInput,
48 Parser.Lexer
49 {
50 public Scanner(Parser psr, TextReader rdr, NetRuby rb, RThread th)
51 {
52 parser = psr;
53 reader = rdr;
54 ruby = rb;
55 thread = th;
56 thread.line = 0 - 1;
57 thread.file = "(eval)";
58 rb.__end__seen = false;
59 tokenbuf = new StringBuilder(128);
60 }
61
62 public Scanner(Parser psr, TextReader rdr, string fname, int start, NetRuby rb,
63 RThread th)
64 {
65 parser = psr;
66 reader = rdr;
67 ruby = rb;
68 thread = th;
69 thread.line = start - 1;
70 thread.file = fname;
71 rb.__end__seen = false;
72 tokenbuf = new StringBuilder(128);
73 }
74
75 EXPR Parser.Lexer.State
76 {
77 get { return lex_state; }
78 set { lex_state = value; }
79 }
80
81 private RThread thread;
82 private NetRuby ruby;
83 private int cond_nest = 0;
84 private uint cond_stack = 0;
85
86 void Parser.Lexer.COND_PUSH()
87 {
88 cond_nest++;
89 cond_stack = (cond_stack<<1)|1;
90 }
91 void Parser.Lexer.COND_POP()
92 {
93 cond_nest--;
94 cond_stack >>= 1;
95 }
96 private bool COND_P()
97 {
98 return (cond_nest > 0 && (cond_stack & 1) == 1);
99 }
100 private uint cmdarg_stack = 0;
101 void Parser.Lexer.CMDARG_PUSH()
102 {
103 cmdarg_stack = ((cmdarg_stack<<1)|1);
104 }
105 void Parser.Lexer.CMDARG_POP()
106 {
107 cmdarg_stack >>= 1;
108 }
109 private bool CMDARG_P()
110 {
111 return ((cmdarg_stack&1) != 0);
112 }
113
114 private EXPR lex_state = EXPR.BEG;
115
116 private int curr = -1;
117
118 bool yyInput.advance ()
119 {
120 curr = yylex();
121 #if _SCANNER_DEBUG
122 System.Console.WriteLine("token:" + tok() + ", result=" + curr.ToString() + "(" + Char.ToString((char)curr) +")");
123 #endif
124 return (curr <= 0) ? false : true;
125 }
126
127 int yyInput.token ()
128 {
129 return curr;
130 }
131 Object yyInput.value ()
132 {
133 #if _SCANNER_DEBUG
134 System.Console.WriteLine("value=" + ((yylval==null)?"null":yylval.ToString()));
135 #endif
136 return yylval;
137 }
138
139 struct kwtable
140 {
141 internal kwtable(string s, int i0, int i1, EXPR st)
142 {
143 name = s;
144 id0 = i0;
145 id1 = i1;
146 state = st;
147 }
148 internal kwtable(string s)
149 {
150 name = s;
151 id0 = id1 = 0;
152 state = EXPR.BEG;
153 }
154 internal string name;
155 int id0;
156 int id1;
157 internal EXPR state;
158 public int this[int i]
159 {
160 get {
161 if (i == 0)
162 return id0;
163 return id1;
164 }
165 set {
166 if (i == 0)
167 id0 = value;
168 id1 = value;
169 }
170 }
171 public int id(bool f)
172 {
173 if (f == false)
174 return id0;
175 return id1;
176 }
177 }
178
179 internal const int TOTAL_KEYWORDS = 40;
180 internal const int MIN_WORD_LENGTH = 2;
181 internal const int MAX_WORD_LENGTH = 8;
182 internal const int MIN_HASH_VALUE = 6;
183 internal const int MAX_HASH_VALUE = 55;
184
185 static private byte[] asso_values = {
186 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
187 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
188 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
189 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
190 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
191 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
192 56, 56, 56, 11, 56, 56, 36, 56, 1, 37,
193 31, 1, 56, 56, 56, 56, 29, 56, 1, 56,
194 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
195 56, 56, 56, 56, 56, 1, 56, 32, 1, 2,
196 1, 1, 4, 23, 56, 17, 56, 20, 9, 2,
197 9, 26, 14, 56, 5, 1, 1, 16, 56, 21,
198 20, 9, 56, 56, 56, 56, 56, 56, 56, 56,
199 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
200 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
201 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
202 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
203 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
204 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
205 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
206 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
207 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
208 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
209 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
210 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
211 56, 56, 56, 56, 56, 56
212 };
213 private int hash(string str, int len)
214 {
215 int hval = len;
216 switch (hval)
217 {
218 case 1:
219 hval += asso_values[str[0]];
220 break;
221 case 2:
222 goto case 1;
223 case 3:
224 hval += asso_values[str[2]];
225 goto case 1;
226 default:
227 goto case 3;
228 }
229 return hval + asso_values[str[(int)len - 1]];
230 }
231
232 private static readonly kwtable nullword = new kwtable(null, 0, 0, 0);
233 private static readonly kwtable[] wordlist = new kwtable[]
234 {
235 new kwtable(""),new kwtable(""),new kwtable(""),new kwtable(""),new kwtable(""),new kwtable(""),
236 new kwtable("end", Token.kEND, Token.kEND, EXPR.END),
237 new kwtable("else", Token.kELSE, Token.kELSE, EXPR.BEG),
238 new kwtable("case", Token.kCASE, Token.kCASE, EXPR.BEG),
239 new kwtable("ensure", Token.kENSURE, Token.kENSURE, EXPR.BEG),
240 new kwtable("module", Token.kMODULE, Token.kMODULE, EXPR.BEG),
241 new kwtable("elsif", Token.kELSIF, Token.kELSIF, EXPR.BEG),
242 new kwtable("def", Token.kDEF, Token.kDEF, EXPR.FNAME),
243 new kwtable("rescue", Token.kRESCUE, Token.kRESCUE_MOD, EXPR.END),
244 new kwtable("not", Token.kNOT, Token.kNOT, EXPR.BEG),
245 new kwtable("then", Token.kTHEN, Token.kTHEN, EXPR.BEG),
246 new kwtable("yield", Token.kYIELD, Token.kYIELD, EXPR.ARG),
247 new kwtable("for", Token.kFOR, Token.kFOR, EXPR.BEG),
248 new kwtable("self", Token.kSELF, Token.kSELF, EXPR.END),
249 new kwtable("false", Token.kFALSE, Token.kFALSE, EXPR.END),
250 new kwtable("retry", Token.kRETRY, Token.kRETRY, EXPR.END),
251 new kwtable("return", Token.kRETURN, Token.kRETURN, EXPR.MID),
252 new kwtable("true", Token.kTRUE, Token.kTRUE, EXPR.END),
253 new kwtable("if", Token.kIF, Token.kIF_MOD, EXPR.BEG),
254 new kwtable("defined?", Token.kDEFINED, Token.kDEFINED, EXPR.ARG),
255 new kwtable("super", Token.kSUPER, Token.kSUPER, EXPR.ARG),
256 new kwtable("undef", Token.kUNDEF, Token.kUNDEF, EXPR.FNAME),
257 new kwtable("break", Token.kBREAK, Token.kBREAK, EXPR.END),
258 new kwtable("in", Token.kIN, Token.kIN, EXPR.BEG),
259 new kwtable("do", Token.kDO, Token.kDO, EXPR.BEG),
260 new kwtable("nil", Token.kNIL, Token.kNIL, EXPR.END),
261 new kwtable("until", Token.kUNTIL, Token.kUNTIL_MOD, EXPR.BEG),
262 new kwtable("unless", Token.kUNLESS, Token.kUNLESS_MOD, EXPR.BEG),
263 new kwtable("or", Token.kOR, Token.kOR, EXPR.BEG),
264 new kwtable("next", Token.kNEXT, Token.kNEXT, EXPR.END),
265 new kwtable("when", Token.kWHEN, Token.kWHEN, EXPR.BEG),
266 new kwtable("redo", Token.kREDO, Token.kREDO, EXPR.END),
267 new kwtable("and", Token.kAND, Token.kAND, EXPR.BEG),
268 new kwtable("begin", Token.kBEGIN, Token.kBEGIN, EXPR.BEG),
269 new kwtable("__LINE__", Token.k__LINE__, Token.k__LINE__, EXPR.END),
270 new kwtable("class", Token.kCLASS, Token.kCLASS, EXPR.CLASS),
271 new kwtable("__FILE__", Token.k__FILE__, Token.k__FILE__, EXPR.END),
272 new kwtable("END", Token.klEND, Token.klEND, EXPR.END),
273 new kwtable("BEGIN", Token.klBEGIN, Token.klBEGIN, EXPR.END),
274 new kwtable("while", Token.kWHILE, Token.kWHILE_MOD, EXPR.BEG),
275 new kwtable(""),
276 new kwtable(""),
277 new kwtable(""),
278 new kwtable(""),
279 new kwtable(""),
280 new kwtable(""),
281 new kwtable(""),
282 new kwtable(""),
283 new kwtable(""),
284 new kwtable(""),
285 new kwtable("alias", Token.kALIAS, Token.kALIAS, EXPR.FNAME)
286 };
287
288 kwtable reserved_word(string str, int len)
289 {
290 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
291 {
292 int key = hash(str, len);
293 if (key <= MAX_HASH_VALUE && key >= 0)
294 {
295 string s = wordlist[key].name;
296 if (str == s)
297 {
298 return wordlist[key];
299 }
300 }
301 }
302 return nullword;
303 }
304
305 private object yylval;
306 private Parser parser;
307 private TextReader reader;
308 private StringBuilder tokenbuf;
309 private string tok() { return tokenbuf.ToString(); }
310 private int toklen() { return tokenbuf.Length; }
311 private char toklast() { return tokenbuf[tokenbuf.Length - 1]; }
312 private int gets_ptr;
313 private string lastline;
314 private int pbeg;
315 private int pcur;
316 private int pend;
317 private int heredoc_end;
318
319 private int yylex()
320 {
321 bool space_seen = false;
322 kwtable kw;
323
324 retry:
325 int c = nextc();
326 switch (c)
327 {
328 case '\0': /* NUL */
329 case '\x0004': /* ^D */
330 case '\x001a': /* ^Z */
331 case -1: /* end of script. */
332 return 0;
333
334 /* white spaces */
335 case ' ': case '\t': case '\f': case '\r':
336 case '\v':
337 space_seen = true;
338 goto retry;
339
340 case '#': /* it's a comment */
341 while ((c = nextc()) != '\n') {
342 if (c == -1)
343 return 0;
344 }
345 /* fall through */
346 goto case '\n';
347 case '\n':
348 switch (lex_state)
349 {
350 case EXPR.BEG:
351 case EXPR.FNAME:
352 case EXPR.DOT:
353 goto retry;
354 default:
355 break;
356 }
357 lex_state = EXPR.BEG;
358 return '\n';
359
360 case '*':
361 if ((c = nextc()) == '*') {
362 lex_state = EXPR.BEG;
363 if (nextc() == '=') {
364 yylval = Token.tPOW;
365 return Token.tOP_ASGN;
366 }
367 pushback(c);
368 return Token.tPOW;
369 }
370 if (c == '=') {
371 yylval = '*';
372 lex_state = EXPR.BEG;
373 return Token.tOP_ASGN;
374 }
375 pushback(c);
376 if (lex_state == EXPR.ARG && space_seen && Char.IsWhiteSpace((char)c) == false)
377 {
378 ruby.warning("`*' interpreted as argument prefix");
379 c = Token.tSTAR;
380 }
381 else if (lex_state == EXPR.BEG || lex_state == EXPR.MID) {
382 c = Token.tSTAR;
383 }
384 else {
385 c = '*';
386 }
387 lex_state = EXPR.BEG;
388 return c;
389
390 case '!':
391 lex_state = EXPR.BEG;
392 if ((c = nextc()) == '=') {
393 return Token.tNEQ;
394 }
395 if (c == '~') {
396 return Token.tNMATCH;
397 }
398 pushback(c);
399 return '!';
400
401 case '=':
402 if (pcur == pbeg + 1)
403 {
404 /* skip embedded rd document */
405 if (String.Compare(lastline, pcur, "begin", 0, 5) == 0
406 && Char.IsWhiteSpace(lastline[pcur + 5]))
407 {
408 for (;;) {
409 pcur = pend;
410 c = nextc();
411 if (c == -1) {
412 thread.CompileError("embedded document meets end of file");
413 return 0;
414 }
415 if (c != '=') continue;
416 if (String.Compare(lastline, pcur, "end", 0, 3) == 0
417 && Char.IsWhiteSpace(lastline[pcur + 3])) {
418 break;
419 }
420 }
421 pcur = pend;
422 goto retry;
423 }
424 }
425
426 lex_state = EXPR.BEG;
427 if ((c = nextc()) == '=') {
428 if ((c = nextc()) == '=') {
429 return Token.tEQQ;
430 }
431 pushback(c);
432 return Token.tEQ;
433 }
434 if (c == '~') {
435 return Token.tMATCH;
436 }
437 else if (c == '>') {
438 return Token.tASSOC;
439 }
440 pushback(c);
441 return '=';
442
443 case '<':
444 c = nextc();
445 if (c == '<' &&
446 lex_state != EXPR.END && lex_state != EXPR.CLASS &&
447 (lex_state != EXPR.ARG || space_seen)) {
448 int c2 = nextc();
449 int indent = 0;
450 if (c2 == '-') {
451 indent = 1;
452 c2 = nextc();
453 }
454 if (Char.IsWhiteSpace((char)c2) && "\"'`".IndexOf((char)c2) > 0 || is_identchar(c2))
455 {
456 return here_document(c2, indent);
457 }
458 pushback(c2);
459 }
460 lex_state = EXPR.BEG;
461 if (c == '=') {
462 if ((c = nextc()) == '>') {
463 return Token.tCMP;
464 }
465 pushback(c);
466 return Token.tLEQ;
467 }
468 if (c == '<') {
469 if (nextc() == '=') {
470 yylval = Token.tLSHFT;
471 return Token.tOP_ASGN;
472 }
473 pushback(c);
474 return Token.tLSHFT;
475 }
476 pushback(c);
477 return '<';
478
479 case '>':
480 lex_state = EXPR.BEG;
481 if ((c = nextc()) == '=') {
482 return Token.tGEQ;
483 }
484 if (c == '>') {
485 if ((c = nextc()) == '=') {
486 yylval = Token.tRSHFT;
487 return Token.tOP_ASGN;
488 }
489 pushback(c);
490 return Token.tRSHFT;
491 }
492 pushback(c);
493 return '>';
494
495 case '"':
496 return parse_string(c,c,c);
497 case '`':
498 if (lex_state == EXPR.FNAME) return c;
499 if (lex_state == EXPR.DOT) return c;
500 return parse_string(c,c,c);
501
502 case '\'':
503 return parse_qstring(c,0);
504
505 case '?':
506 if (lex_state == EXPR.END) {
507 lex_state = EXPR.BEG;
508 return '?';
509 }
510 c = nextc();
511 if (c == -1) {
512 thread.CompileError("incomplete character syntax");
513 return 0;
514 }
515 if (lex_state == EXPR.ARG && Char.IsWhiteSpace((char)c))
516 {
517 pushback(c);
518 lex_state = EXPR.BEG;
519 return '?';
520 }
521 if (c == '\\') {
522 c = read_escape();
523 }
524 c &= 0xff;
525 yylval = c; //INT2FIX(c);
526 lex_state = EXPR.END;
527 return Token.tINTEGER;
528
529 case '&':
530 if ((c = nextc()) == '&') {
531 lex_state = EXPR.BEG;
532 if ((c = nextc()) == '=') {
533 yylval = Token.tANDOP;
534 return Token.tOP_ASGN;
535 }
536 pushback(c);
537 return Token.tANDOP;
538 }
539 else if (c == '=') {
540 yylval = '&';
541 lex_state = EXPR.BEG;
542 return Token.tOP_ASGN;
543 }
544 pushback(c);
545 if (lex_state == EXPR.ARG && space_seen && Char.IsWhiteSpace((char)c) == false)
546 {
547 ruby.warning("`&' interpreted as argument prefix");
548 c = Token.tAMPER;
549 }
550 else if (lex_state == EXPR.BEG || lex_state == EXPR.MID) {
551 c = Token.tAMPER;
552 }
553 else {
554 c = '&';
555 }
556 lex_state = EXPR.BEG;
557 return c;
558
559 case '|':
560 lex_state = EXPR.BEG;
561 if ((c = nextc()) == '|') {
562 if ((c = nextc()) == '=') {
563 yylval = Token.tOROP;
564 return Token.tOP_ASGN;
565 }
566 pushback(c);
567 return Token.tOROP;
568 }
569 else if (c == '=') {
570 yylval = '|';
571 return Token.tOP_ASGN;
572 }
573 pushback(c);
574 return '|';
575
576 case '+':
577 c = nextc();
578 if (lex_state == EXPR.FNAME || lex_state == EXPR.DOT) {
579 if (c == '@') {
580 return Token.tUPLUS;
581 }
582 pushback(c);
583 return '+';
584 }
585 if (c == '=') {
586 lex_state = EXPR.BEG;
587 yylval = '+';
588 return Token.tOP_ASGN;
589 }
590 if (lex_state == EXPR.BEG || lex_state == EXPR.MID ||
591 (lex_state == EXPR.ARG && space_seen && Char.IsWhiteSpace((char)c) == false))
592 {
593 if (lex_state == EXPR.ARG) arg_ambiguous();
594 lex_state = EXPR.BEG;
595 pushback(c);
596 if (Char.IsDigit((char)c))
597 {
598 c = '+';
599 goto start_num;
600 }
601 return Token.tUPLUS;
602 }
603 lex_state = EXPR.BEG;
604 pushback(c);
605 return '+';
606
607 case '-':
608 c = nextc();
609 if (lex_state == EXPR.FNAME || lex_state == EXPR.DOT) {
610 if (c == '@') {
611 return Token.tUMINUS;
612 }
613 pushback(c);
614 return '-';
615 }
616 if (c == '=') {
617 lex_state = EXPR.BEG;
618 yylval = '-';
619 return Token.tOP_ASGN;
620 }
621 if (lex_state == EXPR.BEG || lex_state == EXPR.MID ||
622 (lex_state == EXPR.ARG && space_seen && Char.IsWhiteSpace((char)c) == false))
623 {
624 if (lex_state == EXPR.ARG) arg_ambiguous();
625 lex_state = EXPR.BEG;
626 pushback(c);
627 if (Char.IsDigit((char)c))
628 {
629 c = '-';
630 goto start_num;
631 }
632 return Token.tUMINUS;
633 }
634 lex_state = EXPR.BEG;
635 pushback(c);
636 return '-';
637
638 case '.':
639 lex_state = EXPR.BEG;
640 if ((c = nextc()) == '.') {
641 if ((c = nextc()) == '.') {
642 return Token.tDOT3;
643 }
644 pushback(c);
645 return Token.tDOT2;
646 }
647 pushback(c);
648 if (!Char.IsDigit((char)c))
649 {
650 lex_state = EXPR.DOT;
651 return '.';
652 }
653 c = '.';
654 /* fall through */
655 start_num:
656 goto case '9';
657 case '0': case '1': case '2': case '3': case '4':
658 case '5': case '6': case '7': case '8': case '9':
659 {
660 bool is_float = false;
661 bool seen_point = false;
662 bool seen_e = false;
663 bool seen_uc = false;
664
665 lex_state = EXPR.END;
666 newtok();
667 if (c == '-' || c == '+') {
668 tokadd(c);
669 c = nextc();
670 }
671 if (c == '0') {
672 c = nextc();
673 if (c == 'x' || c == 'X') {
674 /* hexadecimal */
675 c = nextc();
676 do {
677 if (c == '_') {
678 seen_uc = true;
679 continue;
680 }
681 if (!ISXDIGIT((char)c)) break;
682 seen_uc = false;
683 tokadd(c);
684 } while ((c = nextc()) > 0);
685 pushback(c);
686 if (toklen() == 0) {
687 parser.yyerror("hexadecimal number without hex-digits");
688 }
689 else if (seen_uc) goto trailing_uc;
690 yylval = new LitParam(tok(), 16);
691 return Token.tINTEGER;
692 }
693 if (c == 'b' || c == 'B') {
694 /* binary */
695 c = nextc();
696 do {
697 if (c == '_') {
698 seen_uc = true;
699 continue;
700 }
701 if (c != '0'&& c != '1') break;
702 seen_uc = false;
703 tokadd(c);
704 } while ((c = nextc()) > 0);
705 pushback(c);
706 if (toklen() == 0) {
707 parser.yyerror("numeric literal without digits");
708 }
709 else if (seen_uc) goto trailing_uc;
710 yylval = new LitParam(tok(), 2);
711 return Token.tINTEGER;
712 }
713 if (c >= '0' && c <= '7' || c == '_') {
714 /* octal */
715 do {
716 if (c == '_') {
717 seen_uc = true;
718 continue;
719 }
720 if (c < '0' || c > '7') break;
721 seen_uc = false;
722 tokadd(c);
723 } while ((c = nextc()) > 0);
724 pushback(c);
725 if (seen_uc) goto trailing_uc;
726 yylval = new LitParam(tok(), 8);
727 return Token.tINTEGER;
728 }
729 if (c > '7' && c <= '9') {
730 parser.yyerror("Illegal octal digit");
731 }
732 else if (c == '.') {
733 tokadd('0');
734 }
735 else {
736 pushback(c);
737 yylval = 0; //INT2FIX(0);
738 return Token.tINTEGER;
739 }
740 }
741
742 for (;;) {
743 switch (c) {
744 case '0': case '1': case '2': case '3': case '4':
745 case '5': case '6': case '7': case '8': case '9':
746 seen_uc = false;
747 tokadd(c);
748 break;
749
750 case '.':
751 if (seen_uc) goto trailing_uc;
752 if (seen_point || seen_e) {
753 goto decode_num;
754 }
755 else {
756 int c0 = nextc();
757 if (!Char.IsDigit((char)c0)) {
758 pushback(c0);
759 goto decode_num;
760 }
761 c = c0;
762 }
763 tokadd('.');
764 tokadd(c);
765 is_float = true;
766 seen_point = true;
767 seen_uc = false;
768 break;
769
770 case 'e':
771 case 'E':
772 if (seen_e) {
773 goto decode_num;
774 }
775 tokadd(c);
776 seen_e = true;
777 is_float = true;
778 while ((c = nextc()) == '_')
779 seen_uc = true;
780 if (c == '-' || c == '+')
781 tokadd(c);
782 else
783 continue;
784 break;
785
786 case '_': /* `_' in number just ignored */
787 seen_uc = true;
788 break;
789
790 default:
791 goto decode_num;
792 }
793 c = nextc();
794 }
795
796 decode_num:
797 pushback(c);
798 trailing_uc:
799 if (seen_uc) {
800 parser.yyerror("trailing `_' in number");
801 }
802 if (is_float) {
803 double d = 0.0;
804 try
805 {
806 d = Convert.ToDouble(tok());
807 }
808 catch (OverflowException)
809 {
810 ruby.warn("Float {0} out of range", tok());
811 }
812 yylval = d;
813 return Token.tFLOAT;
814 }
815 yylval = new LitParam(tok(), 0);
816 return Token.tINTEGER;
817 }
818
819 case ']':
820 case '}':
821 lex_state = EXPR.END;
822 return c;
823
824 case ')':
825 if (cond_nest > 0) {
826 cond_stack >>= 1;
827 }
828 lex_state = EXPR.END;
829 return c;
830
831 case ':':
832 c = nextc();
833 if (c == ':') {
834 if (lex_state == EXPR.BEG || lex_state == EXPR.MID ||
835 (lex_state == EXPR.ARG && space_seen)) {
836 lex_state = EXPR.BEG;
837 return Token.tCOLON3;
838 }
839 lex_state = EXPR.DOT;
840 return Token.tCOLON2;
841 }
842 pushback(c);
843 if (lex_state == EXPR.END || Char.IsWhiteSpace((char)c)) {
844 lex_state = EXPR.BEG;
845 return ':';
846 }
847 lex_state = EXPR.FNAME;
848 return Token.tSYMBEG;
849
850 case '/':
851 if (lex_state == EXPR.BEG || lex_state == EXPR.MID) {
852 return parse_regx('/', '/');
853 }
854 if ((c = nextc()) == '=') {
855 lex_state = EXPR.BEG;
856 yylval = '/';
857 return Token.tOP_ASGN;
858 }
859 pushback(c);
860 if (lex_state == EXPR.ARG && space_seen) {
861 if (!Char.IsWhiteSpace((char)c))
862 {
863 arg_ambiguous();
864 return parse_regx('/', '/');
865 }
866 }
867 lex_state = EXPR.BEG;
868 return '/';
869
870 case '^':
871 lex_state = EXPR.BEG;
872 if ((c = nextc()) == '=') {
873 yylval = '^';
874 return Token.tOP_ASGN;
875 }
876 pushback(c);
877 return '^';
878
879 case ',':
880 case ';':
881 lex_state = EXPR.BEG;
882 return c;
883
884 case '~':
885 if (lex_state == EXPR.FNAME || lex_state == EXPR.DOT) {
886 if ((c = nextc()) != '@') {
887 pushback(c);
888 }
889 }
890 lex_state = EXPR.BEG;
891 return '~';
892
893 case '(':
894 if (cond_nest > 0) {
895 cond_stack = (cond_stack<<1)|0;
896 }
897 if (lex_state == EXPR.BEG || lex_state == EXPR.MID) {
898 c = Token.tLPAREN;
899 }
900 else if (lex_state == EXPR.ARG && space_seen) {
901 ruby.warning(tok() + " (...) interpreted as method call", tok());
902 }
903 lex_state = EXPR.BEG;
904 return c;
905
906 case '[':
907 if (lex_state == EXPR.FNAME || lex_state == EXPR.DOT) {
908 if ((c = nextc()) == ']') {
909 if ((c = nextc()) == '=') {
910 return Token.tASET;
911 }
912 pushback(c);
913 return Token.tAREF;
914 }
915 pushback(c);
916 return '[';
917 }
918 else if (lex_state == EXPR.BEG || lex_state == EXPR.MID) {
919 c = Token.tLBRACK;
920 }
921 else if (lex_state == EXPR.ARG && space_seen) {
922 c = Token.tLBRACK;
923 }
924 lex_state = EXPR.BEG;
925 return c;
926
927 case '{':
928 if (lex_state != EXPR.END && lex_state != EXPR.ARG)
929 c = Token.tLBRACE;
930 lex_state = EXPR.BEG;
931 return c;
932
933 case '\\':
934 c = nextc();
935 if (c == '\n') {
936 space_seen = true;
937 goto retry; /* skip \\n */
938 }
939 pushback(c);
940 return '\\';
941
942 case '%':
943 if (lex_state == EXPR.BEG || lex_state == EXPR.MID) {
944 c = nextc();
945 return quotation(c);
946 }
947 if ((c = nextc()) == '=') {
948 yylval = '%';
949 return Token.tOP_ASGN;
950 }
951 if (lex_state == EXPR.ARG && space_seen && Char.IsWhiteSpace((char)c) == false) {
952 return quotation(c);
953 }
954 lex_state = EXPR.BEG;
955 pushback(c);
956 return '%';
957
958 case '$':
959 lex_state = EXPR.END;
960 newtok();
961 c = nextc();
962 switch (c) {
963 case '_': /* $_: last read line string */
964 c = nextc();
965 if (is_identchar(c)) {
966 tokadd('$');
967 tokadd('_');
968 break;
969 }
970 pushback(c);
971 c = '_';
972 goto case '~';
973 /* fall through */
974 case '~': /* $~: match-data */
975 thread.LocalCnt((uint)c);
976 goto case '*';
977 /* fall through */
978 case '*': /* $*: argv */
979 case '$': /* $$: pid */
980 case '?': /* $?: last status */
981 case '!': /* $!: error string */
982 case '@': /* $@: error position */
983 case '/': /* $/: input record separator */
984 case '\\': /* $\: output record separator */
985 case ';': /* $;: field separator */
986 case ',': /* $,: output field separator */
987 case '.': /* $.: last read line number */
988 case '=': /* $=: ignorecase */
989 case ':': /* $:: load path */
990 case '<': /* $<: reading filename */
991 case '>': /* $>: default output handle */
992 case '\"': /* $": already loaded files */
993 tokadd('$');
994 tokadd(c);
995 yylval = ruby.intern(tok());
996 return Token.tGVAR;
997
998 case '-':
999 tokadd('$');
1000 tokadd(c);
1001 c = nextc();
1002 tokadd(c);
1003 yylval = ruby.intern(tok());
1004 /* xxx shouldn't check if valid option variable */
1005 return Token.tGVAR;
1006
1007 case '&': /* $&: last match */
1008 case '`': /* $`: string before last match */
1009 case '\'': /* $': string after last match */
1010 case '+': /* $+: string matches last paren. */
1011 yylval = new RNBackRef(thread, c);
1012 return Token.tBACK_REF;
1013
1014 case '1': case '2': case '3':
1015 case '4': case '5': case '6':
1016 case '7': case '8': case '9':
1017 tokadd('$');
1018 while (Char.IsDigit((char)c))
1019 {
1020 tokadd(c);
1021 c = nextc();
1022 }
1023 if (is_identchar(c))
1024 break;
1025 pushback(c);
1026 yylval = new RNNthRef(thread, Convert.ToInt32(tok().Substring(1)));
1027 return Token.tNTH_REF;
1028
1029 default:
1030 if (!is_identchar(c)) {
1031 pushback(c);
1032 return '$';
1033 }
1034 goto case '0';
1035 case '0':
1036 tokadd('$');
1037 break;
1038 }
1039 break;
1040
1041 case '@':
1042 c = nextc();
1043 newtok();
1044 tokadd('@');
1045 if (c == '@') {
1046 tokadd('@');
1047 c = nextc();
1048 }
1049 if (Char.IsDigit((char)c))
1050 {
1051 thread.CompileError(String.Format("`@{0}' is not a valid instance variable name", c));
1052 }
1053 if (!is_identchar(c)) {
1054 pushback(c);
1055 return '@';
1056 }
1057 break;
1058
1059 default:
1060 if (!is_identchar(c) || Char.IsDigit((char)c))
1061 {
1062 thread.CompileError(String.Format("Invalid char `0x{0:x2}' in expression", c));
1063 goto retry;
1064 }
1065
1066 newtok();
1067 break;
1068 }
1069
1070 while (is_identchar(c)) {
1071 tokadd(c);
1072 #if NONE_UCS2
1073 if (ismbchar(c)) {
1074 int i, len = mbclen(c)-1;
1075
1076 for (i = 0; i < len; i++) {
1077 c = nextc();
1078 tokadd(c);
1079 }
1080 }
1081 #endif
1082 c = nextc();
1083 }
1084 if ((c == '!' || c == '?') && is_identchar(tok()[0]) && !peek('=')) {
1085 tokadd(c);
1086 }
1087 else {
1088 pushback(c);
1089 }
1090
1091 {
1092 int result = 0;
1093
1094 switch (tok()[0]) {
1095 case '$':
1096 lex_state = EXPR.END;
1097 result = Token.tGVAR;
1098 break;
1099 case '@':
1100 lex_state = EXPR.END;
1101 if (tok()[1] == '@')
1102 result = Token.tCVAR;
1103 else
1104 result = Token.tIVAR;
1105 break;
1106 default:
1107 if (lex_state != EXPR.DOT) {
1108 /* See if it is a reserved word. */
1109 kw = reserved_word(tok(), toklen());
1110 if (kw.name != null) {
1111 EXPR state = lex_state;
1112 lex_state = kw.state;
1113 if (state == EXPR.FNAME) {
1114 yylval = ruby.intern(kw.name);
1115 }
1116 if (kw[0] == Token.kDO) {
1117 if (COND_P()) return Token.kDO_COND;
1118 if (CMDARG_P()) return Token.kDO_BLOCK;
1119 return Token.kDO;
1120 }
1121 if (state == EXPR.BEG)
1122 return kw[0];
1123 else {
1124 if (kw[0] != kw[1])
1125 lex_state = EXPR.BEG;
1126 return kw[1];
1127 }
1128 }
1129 }
1130
1131 if (toklast() == '!' || toklast() == '?') {
1132 result = Token.tFID;
1133 }
1134 else {
1135 if (lex_state == EXPR.FNAME) {
1136 if ((c = nextc()) == '=' && !peek('~') && !peek('>') &&
1137 (!peek('=') || pcur + 1 < pend && lastline[pcur + 1] == '>')) {
1138 result = Token.tIDENTIFIER;
1139 tokadd(c);
1140 }
1141 else {
1142 pushback(c);
1143 }
1144 }
1145 if (result == 0 && Char.IsUpper(tok()[0])) {
1146 result = Token.tCONSTANT;
1147 }
1148 else {
1149 result = Token.tIDENTIFIER;
1150 }
1151 }
1152 if (lex_state == EXPR.BEG ||
1153 lex_state == EXPR.DOT ||
1154 lex_state == EXPR.ARG) {
1155 lex_state = EXPR.ARG;
1156 }
1157 else {
1158 lex_state = EXPR.END;
1159 }
1160 break;
1161 }
1162 yylval = ruby.intern(tok());
1163 return result;
1164 }
1165 }
1166
1167 internal bool is_identchar(int c)
1168 {
1169 if (Char.IsLetterOrDigit((char)c) || c == '_')
1170 return true;
1171 return false;
1172 }
1173 internal bool ISXDIGIT(char c)
1174 {
1175 return ("012345679ABCDEFabcdef".IndexOf(c) >= 0);
1176 }
1177
1178 private string get_str(string s)
1179 {
1180 string rs;
1181 if (gets_ptr > 0)
1182 {
1183 if (s.Length == gets_ptr) return null;
1184 }
1185 int i = s.IndexOf('\n', gets_ptr);
1186 if (i < 0)
1187 {
1188 rs = s.Substring(gets_ptr);
1189 gets_ptr = s.Length;
1190 return rs;
1191 }
1192 rs = s.Substring(gets_ptr, i - gets_ptr);
1193 gets_ptr = i;
1194 return rs;
1195 }
1196
1197 private string getline()
1198 {
1199 string s = reader.ReadLine();
1200 if (s == null)
1201 {
1202 return null;
1203 }
1204 return s + "\n";
1205 }
1206
1207 private int nextc()
1208 {
1209 curr = 0;
1210 if (pcur == pend)
1211 {
1212 if (reader != null)
1213 {
1214 string v = getline();
1215
1216 if (v == null)
1217 {
1218 return -1;
1219 }
1220 if (heredoc_end > 0)
1221 {
1222 thread.line = heredoc_end;
1223 heredoc_end = 0;
1224 }
1225 thread.line++;
1226 pbeg = 0;
1227 pcur = 0;
1228 pend = v.Length;
1229 if (pend >= 7)
1230 {
1231 if (String.Compare(v, pbeg, "__END__", 0, 7) == 0
1232 && (v.Length == 7 || v[7] == '\n' || v[7] == '\r'))
1233 {
1234 ruby.__end__seen = true;
1235 lastline = String.Empty;
1236 return -1;
1237 }
1238 }
1239 lastline = v;
1240 }
1241 else
1242 {
1243 lastline = String.Empty;
1244 return -1;
1245 }
1246 }
1247 curr = lastline[pcur++];
1248 if (curr == '\r' && pcur <= pend && lastline[pcur] == '\n')
1249 {
1250 pcur++;
1251 curr = '\n';
1252 }
1253 return curr;
1254 }
1255
1256 private void pushback(int c)
1257 {
1258 if (c == -1) return;
1259 pcur--;
1260 }
1261
1262 private bool peek(char c)
1263 {
1264 return (pcur != pend && c == lastline[pcur]);
1265 }
1266
1267 private string newtok()
1268 {
1269 tokenbuf.Length = 0;
1270 return String.Empty;
1271 }
1272
1273 private void tokadd(int c)
1274 {
1275 tokenbuf.Append((char)c);
1276 }
1277
1278 private int read_escape()
1279 {
1280 int c = nextc();
1281 switch (c)
1282 {
1283 case '\\':
1284 return c;
1285
1286 case 'n':
1287 return '\n';
1288
1289 case 't':
1290 return '\t';
1291
1292 case 'r':
1293 return '\r';
1294
1295 case 'f':
1296 return '\f';
1297
1298 case 'v':
1299 return '\v';
1300
1301 case 'a':
1302 return '\a';
1303
1304 case 'e':
1305 return 0x1b;
1306
1307 case '0': goto case '9';
1308 case '1': goto case '9';
1309 case '2': goto case '9';
1310 case '3': goto case '9';
1311 case '4': goto case '9';
1312 case '5': goto case '9';
1313 case '6': goto case '9';
1314 case '7': goto case '9';
1315 case '8': goto case '9';
1316 case '9': {
1317 pushback(c);
1318 string s = "";
1319 for (int i = 0; i < 3; i++)
1320 {
1321 c = nextc();
1322 if (c == -1) goto eof;
1323 if (c < '0' || '7' < c) {
1324 pushback(c);
1325 break;
1326 }
1327 s += c;
1328 }
1329 c = Convert.ToByte(s, 8);
1330 return c; }
1331
1332 case 'x':
1333 c = Convert.ToByte(lastline.Substring(pcur, 2), 16);
1334 pcur += 2;
1335 return c;
1336
1337 case 'b':
1338 return '\b';
1339
1340 case 's':
1341 return ' ';
1342
1343 case 'M':
1344 if ((c = nextc()) != '-')
1345 {
1346 return read_escape() | 0x80;
1347 }
1348 else if (c == -1) goto eof;
1349 else {
1350 return ((c & 0xff) | 0x80);
1351 }
1352
1353 case 'C':
1354 if ((c = nextc()) == '\\')
1355 {
1356 c = read_escape();
1357 }
1358 else if (c == '?')
1359 return 0177;
1360 else if (c == -1)
1361 goto eof;
1362 return c & 0x9f;
1363
1364 eof:
1365 goto case -1;
1366 case -1:
1367 parser.yyerror("Invalid escape character syntax");
1368 return 0;
1369
1370 default:
1371 return c;
1372 }
1373 }
1374
1375 private int tokadd_escape()
1376 {
1377 int c = nextc();
1378 switch (c)
1379 {
1380 case 'n':
1381 return 0;
1382
1383 case '0': goto case '7';
1384 case '1': goto case '7';
1385 case '2': goto case '7';
1386 case '3': goto case '7';
1387 case '4': goto case '7';
1388 case '5': goto case '7';
1389 case '6': goto case '7';
1390 case '7': {
1391 tokadd('\\');
1392 tokadd(c);
1393 for (int i = 0; i < 2; i++)
1394 {
1395 c = nextc();
1396 if (c == -1) goto eof;
1397 if (c < '0' || '7' < c) {
1398 pushback(c);
1399 break;
1400 }
1401 tokadd(c);
1402 }
1403 } return 0;
1404
1405 case 'x': {
1406 tokadd('\\');
1407 tokadd(c);
1408 c = Convert.ToByte(lastline.Substring(pcur, 2), 16);
1409 tokadd(nextc());
1410 tokadd(nextc());
1411 } return 0;
1412
1413 case 'M':
1414 if ((c = nextc()) != '-') {
1415 parser.yyerror("Invalid escape character syntax");
1416 pushback(c);
1417 return 0;
1418 }
1419 tokadd('\\'); tokadd('M'); tokadd('-');
1420 goto escaped;
1421
1422 case 'C':
1423 if ((c = nextc()) != '-') {
1424 parser.yyerror("Invalid escape character syntax");
1425 pushback(c);
1426 return 0;
1427 }
1428 tokadd('\\'); tokadd('C'); tokadd('-');
1429 goto escaped;
1430
1431 case 'c':
1432 tokadd('\\'); tokadd('c');
1433
1434 escaped:
1435 if ((c = nextc()) == '\\') {
1436 return tokadd_escape();
1437 }
1438 else if (c == -1) goto eof;
1439 tokadd(c);
1440 return 0;
1441
1442 eof:
1443 goto case -1;
1444 case -1:
1445 parser.yyerror("Invalid escape character syntax");
1446 return -1;
1447
1448 default:
1449 tokadd('\\');
1450 tokadd(c);
1451 break;
1452 }
1453 return 0;
1454 }
1455
1456 int parse_regx(int term, int paren)
1457 {
1458 int c;
1459 char kcode = '\0';
1460 bool once = false;
1461 int nest = 0;
1462 RegexOptions options = RegexOptions.None;
1463 int re_start = thread.line;
1464 RNode list = null;
1465
1466 newtok();
1467 while ((c = nextc()) != -1) {
1468 if (c == term && nest == 0) {
1469 c = -100; // goto regx_end
1470 }
1471
1472 switch (c) {
1473 case '#':
1474 list = str_extend(list, term);
1475 if (list is RNEOF) goto unterminated;
1476 continue;
1477
1478 case '\\':
1479 if (tokadd_escape() < 0)
1480 return 0;
1481 continue;
1482
1483 case -1:
1484 goto unterminated;
1485
1486 default:
1487 if (paren != 0) {
1488 if (c == paren) nest++;
1489 if (c == term) nest--;
1490 }
1491 break;
1492
1493 case -100:
1494 regx_end:
1495 for (;;) {
1496 switch (c = nextc()) {
1497 case 'i':
1498 options |= RegexOptions.IgnoreCase;
1499 break;
1500 case 'x':
1501 options |= RegexOptions.IgnorePatternWhitespace;
1502 break;
1503 case 'p': /* /p is obsolete */
1504 ruby.warn("/p option is obsolete; use /m\n\tnote: /m does not change ^, $ behavior");
1505 break;
1506 case 'm':
1507 options |= RegexOptions.Multiline;
1508 break;
1509 case 'o':
1510 once = true;
1511 break;
1512 case 'n':
1513 kcode = '\x16';
1514 break;
1515 case 'e':
1516 kcode = '\x32';
1517 break;
1518 case 's':
1519 kcode = '\x48';
1520 break;
1521 case 'u':
1522 kcode = '\x64';
1523 break;
1524 default:
1525 pushback(c);
1526 goto end_options;
1527 }
1528 }
1529
1530 end_options:
1531 lex_state = EXPR.END;
1532 if (list != null) {
1533 list.SetLine(re_start);
1534 if (toklen() > 0) {
1535 RNode.list_append(thread, list, new RNStr(thread, ruby, tok()));
1536 }
1537 if (once)
1538 list = new RNDRegxOnce(list);
1539 else
1540 list = new RNDRegx(list);
1541 list.cflag = (uint)options | (uint)kcode;
1542 yylval = list;
1543 return Token.tDREGEXP;
1544 }
1545 else {
1546 yylval = RRegexpClass.s_new(ruby.cRegexp, tok(), options);
1547 return Token.tREGEXP;
1548 }
1549 }
1550 tokadd(c);
1551 }
1552 unterminated:
1553 thread.line = re_start;
1554 thread.CompileError("unterminated regexp meets end of file");
1555 return 0;
1556 }
1557
1558 int parse_string(int func, int term, int paren)
1559 {
1560 int c;
1561 RNode list = null;
1562 int strstart;
1563 int nest = 0;
1564
1565 if (func == '\'') {
1566 return parse_qstring(term, paren);
1567 }
1568 if (func == 0) { /* read 1 line for heredoc */
1569 /* -1 for chomp */
1570 yylval = lastline.Substring(pbeg, pend - pbeg - 1);
1571 pcur = pend;
1572 return Token.tSTRING;
1573 }
1574 strstart = thread.line;
1575 newtok();
1576 while ((c = nextc()) != term || nest > 0) {
1577 if (c == -1) {
1578 thread.line = strstart;
1579 thread.CompileError("unterminated string meets end of file");
1580 return 0;
1581 }
1582 /*
1583 if (ismbchar(c)) {
1584 int i, len = mbclen(c)-1;
1585
1586 for (i = 0; i < len; i++) {
1587 tokadd(c);
1588 c = nextc();
1589 }
1590 }
1591 */
1592 else if (c == '#') {
1593 list = str_extend(list, term);
1594 if (list is RNEOF)
1595 {
1596 thread.line = strstart;
1597 thread.CompileError("unterminated string meets end of file");
1598 return 0;
1599 }
1600 continue;
1601 }
1602 else if (c == '\\') {
1603 c = nextc();
1604 if (c == '\n')
1605 continue;
1606 if (c == term) {
1607 tokadd(c);
1608 }
1609 else {
1610 pushback(c);
1611 if (func != '"') tokadd('\\');
1612 tokadd(read_escape());
1613 }
1614 continue;
1615 }
1616 if (paren != 0) {
1617 if (c == paren) nest++;
1618 if (c == term && nest-- == 0) break;
1619 }
1620 tokadd(c);
1621 }
1622
1623 lex_state = EXPR.END;
1624
1625 if (list != null) {
1626 list.SetLine(strstart);
1627 if (toklen() > 0) {
1628 RNode.list_append(thread, list, new RNStr(thread, ruby, tok()));
1629 }
1630 yylval = list;
1631 if (func == '`') {
1632 yylval = new RNDXStr(list);
1633 return Token.tDXSTRING;
1634 }
1635 else {
1636 return Token.tDSTRING;
1637 }
1638 }
1639 else {
1640 yylval = tok();
1641 return (func == '`') ? Token.tXSTRING : Token.tSTRING;
1642 }
1643 }
1644
1645 int parse_qstring(int term, int paren)
1646 {
1647 int c;
1648 int nest = 0;
1649
1650 int strstart = thread.line;
1651 newtok();
1652 while ((c = nextc()) != term || nest > 0) {
1653 if (c == -1) {
1654 thread.line = strstart;
1655 thread.CompileError("unterminated string meets end of file");
1656 return 0;
1657 }
1658 /*
1659 if (ismbchar(c)) {
1660 int i, len = mbclen(c)-1;
1661
1662 for (i = 0; i < len; i++) {
1663 tokadd(c);
1664 c = nextc();
1665 }
1666 }
1667 */
1668 else if (c == '\\') {
1669 c = nextc();
1670 switch (c) {
1671 case '\n':
1672 continue;
1673
1674 case '\\':
1675 c = '\\';
1676 break;
1677
1678 case '\'':
1679 if (term == '\'') {
1680 c = '\'';
1681 break;
1682 }
1683 goto default;
1684 default:
1685 tokadd('\\');
1686 break;
1687 }
1688 }
1689 if (paren != 0) {
1690 if (c == paren) nest++;
1691 if (c == term && nest-- == 0) break;
1692 }
1693 tokadd(c);
1694 }
1695
1696 yylval = tok();
1697 lex_state = EXPR.END;
1698 return Token.tSTRING;
1699 }
1700
1701 int parse_quotedwords(int term, int paren)
1702 {
1703 RNode qwords = null;
1704 int strstart = thread.line;
1705 int c;
1706 int nest = 0;
1707
1708 newtok();
1709
1710 for (c = nextc(); Char.IsWhiteSpace((char)c); c = nextc())
1711 ; /* skip preceding spaces */
1712 pushback(c);
1713 while ((c = nextc()) != term || nest > 0) {
1714 if (c == -1) {
1715 thread.line = strstart;
1716 thread.CompileError("unterminated string meets end of file");
1717 return 0;
1718 }
1719 /*
1720 if (ismbchar(c)) {
1721 int i, len = mbclen(c)-1;
1722
1723 for (i = 0; i < len; i++) {
1724 tokadd(c);
1725 c = nextc();
1726 }
1727 }
1728 */
1729 else if (c == '\\') {
1730 c = nextc();
1731 switch (c) {
1732 case '\n':
1733 continue;
1734 case '\\':
1735 c = '\\';
1736 break;
1737 default:
1738 if (c == term) {
1739 tokadd(c);
1740 continue;
1741 }
1742 if (!Char.IsWhiteSpace((char)c))
1743 tokadd('\\');
1744 break;
1745 }
1746 }
1747 else if (Char.IsWhiteSpace((char)c)) {
1748
1749 RNode str = new RNStr(thread, ruby, tok());
1750 newtok();
1751 if (qwords == null) qwords = new RNArray(thread, str);
1752 else RNode.list_append(thread, qwords, str);
1753 for (c = nextc(); Char.IsWhiteSpace((char)c); c = nextc())
1754 ; /* skip continuous spaces */
1755 pushback(c);
1756 continue;
1757 }
1758
1759 if (paren != 0) {
1760 if (c == paren) nest++;
1761 if (c == term && nest-- == 0) break;
1762 }
1763 tokadd(c);
1764 }
1765
1766 if (toklen() > 0) {
1767 RNode str = new RNStr(thread, ruby, tok());
1768 if (qwords == null) qwords = new RNArray(thread, str);
1769 else RNode.list_append(thread, qwords, str);
1770 }
1771 if (qwords == null) qwords = new RNZArray(thread);
1772 yylval = qwords;
1773 lex_state = EXPR.END;
1774 return Token.tDSTRING;
1775 }
1776
1777 int here_document(int term, int indent)
1778 {
1779 int c;
1780 string line = String.Empty;
1781 RNode list = null;
1782 int linesave = thread.line;
1783
1784 newtok();
1785 switch (term) {
1786 case '\'':
1787 goto case '`';
1788 case '"':
1789 goto case '`';
1790 case '`':
1791 while ((c = nextc()) != term) {
1792 tokadd(c);
1793 }
1794 if (term == '\'') term = '\0';
1795 break;
1796
1797 default:
1798 c = term;
1799 term = '"';
1800 if (!is_identchar(c)) {
1801 ruby.warn("use of bare << to mean <<\"\" is deprecated");
1802 break;
1803 }
1804 while (is_identchar(c)) {
1805 tokadd(c);
1806 c = nextc();
1807 }
1808 pushback(c);
1809 break;
1810 }
1811 string lastline_save = lastline;
1812 int offset_save = pcur - pbeg;
1813 string eos = string.Copy(tok());
1814 int len = eos.Length;
1815
1816 string str = String.Empty;
1817 for (;;) {
1818 lastline = line = getline();
1819 if (line == null) {
1820 thread.line = linesave;
1821 thread.CompileError("can't find string \"" + eos + "\" anywhere before EOF");
1822 return 0;
1823 }
1824 thread.line++;
1825 string p = line;
1826 if (indent > 0) {
1827 while (p.Length > 0 && (p[0] == ' ' || p[0] == '\t')) {
1828 p = p.Substring(1);
1829 }
1830 }
1831 if (String.Compare(eos, 0, p, 0, len) == 0) {
1832 if (p[len] == '\n' || p[len] == '\r')
1833 break;
1834 if (len == line.Length)
1835 break;
1836 }
1837 pbeg = pcur = 0;
1838 pend = pcur + line.Length;
1839 retry:
1840 switch (parse_string(term, '\n', '\n')) {
1841 case Token.tSTRING:
1842 // fall down to the next case
1843 case Token.tXSTRING:
1844 {
1845 yylval = (string)yylval + "\n";
1846 }
1847 if (list == null) {
1848 str += (string)yylval;
1849 }
1850 else {
1851 RNode.list_append(thread, list, new RNStr(thread, ruby, (string)yylval));
1852 }
1853 break;
1854 case Token.tDSTRING:
1855 if (list == null) list = new RNDStr(thread, ruby, str);
1856 goto case Token.tDXSTRING;
1857 case Token.tDXSTRING:
1858 if (list == null) list = new RNDXStr(thread, ruby, str);
1859
1860 RNode.list_append(thread, (RNode)yylval, new RNStr(thread, ruby, "\n"));
1861 RNStr val = new RNStr((RNStr)yylval);
1862 yylval = new RNArray(thread, val);
1863 ((RNode)yylval).next = ((RNode)yylval).head.next;
1864 RNode.list_concat(list, (RNode)yylval);
1865 break;
1866
1867 case 0:
1868 thread.line = linesave;
1869 thread.CompileError("can't find string \"" + eos + "\" anywhere before EOF");
1870 return 0;
1871 }
1872 if (pcur != pend) {
1873 goto retry;
1874 }
1875 }
1876 lastline = lastline_save;
1877 pbeg = 0;
1878 pend = lastline.Length;
1879 pcur = offset_save;
1880
1881 lex_state = EXPR.END;
1882 heredoc_end = thread.line;
1883 thread.line = linesave;
1884 if (list != null) {
1885 list.SetLine(linesave+1);
1886 yylval = list;
1887 }
1888 switch (term) {
1889 case '\0':
1890 goto case '"';
1891 case '\'':
1892 goto case '"';
1893 case '"':
1894 if (list != null) return Token.tDSTRING;
1895 yylval = str;
1896 return Token.tSTRING;
1897 case '`':
1898 if (list != null) return Token.tDXSTRING;
1899 yylval = str;
1900 return Token.tXSTRING;
1901 }
1902 return 0;
1903 }
1904
1905 RNode str_extend(RNode list, int term)
1906 {
1907 int brace = -1;
1908 RNode node;
1909 int nest;
1910
1911 int c = nextc();
1912 switch (c) {
1913 case '$':
1914 break;
1915 case '@':
1916 break;
1917 case '{':
1918 break;
1919 default:
1920 tokadd('#');
1921 pushback(c);
1922 return list;
1923 }
1924
1925 string ss = tok();
1926 if (list == null) {
1927 list = new RNDStr(thread, ruby, ss);
1928 }
1929 else if (toklen() > 0) {
1930 RNode.list_append(thread, list, new RNStr(thread, ruby, ss));
1931 }
1932 newtok();
1933
1934 switch (c) {
1935 case '$':
1936 tokadd('$');
1937 c = nextc();
1938 if (c == -1) return new RNEOF();
1939 switch (c) {
1940 case '1':
1941 case '2':
1942 case '3':
1943 case '4':
1944 case '5':
1945 case '6':
1946 case '7':
1947 case '8':
1948 case '9':
1949 while (Char.IsDigit((char)c)) {
1950 tokadd(c);
1951 c = nextc();
1952 }
1953 pushback(c);
1954 goto fetch_id;
1955
1956 case '&':
1957 case '+':
1958 case '_':
1959 case '~':
1960 case '*':
1961 case '$':
1962 case '?':
1963 case '!':
1964 case '@':
1965 case ',':
1966 case '.':
1967 case '=':
1968 case ':':
1969 case '<':
1970 case '>':
1971 case '\\':
1972 refetch:
1973 tokadd(c);
1974 goto fetch_id;
1975
1976 default:
1977 if (c == term) {
1978 RNode.list_append(thread, list, new RNStr(thread, ruby, "#$"));
1979 pushback(c);
1980 newtok();
1981 return list;
1982 }
1983 switch (c) {
1984 case '\"':
1985 case '/':
1986 case '\'':
1987 case '`':
1988 goto refetch;
1989 }
1990 if (!is_identchar(c)) {
1991 parser.yyerror("bad global variable in string");
1992 newtok();
1993 return list;
1994 }
1995 break;
1996 }
1997
1998 while (is_identchar(c)) {
1999 tokadd(c);
2000 /*
2001 if (ismbchar(c)) {
2002 int i, len = mbclen(c)-1;
2003
2004 for (i = 0; i < len; i++) {
2005 c = nextc();
2006 tokadd(c);
2007 }
2008 }
2009 */
2010 c = nextc();
2011 }
2012 pushback(c);
2013 break;
2014
2015 case '@':
2016 tokadd(c);
2017 c = nextc();
2018 if (c == '@') {
2019 tokadd(c);
2020 c = nextc();
2021 }
2022 while (is_identchar(c)) {
2023 tokadd(c);
2024 /*
2025 if (ismbchar(c)) {
2026 int i, len = mbclen(c)-1;
2027
2028 for (i = 0; i < len; i++) {
2029 c = nextc();
2030 tokadd(c);
2031 }
2032 }
2033 */
2034 c = nextc();
2035 }
2036 pushback(c);
2037 break;
2038
2039 case '{':
2040 if (c == '{') brace = '}';
2041 nest = 0;
2042 do {
2043 loop_again:
2044 c = nextc();
2045 switch (c) {
2046 case -1:
2047 if (nest > 0) {
2048 parser.yyerror("bad substitution in string");
2049 newtok();
2050 return list;
2051 }
2052 return new RNEOF();
2053 case '}':
2054 if (c == brace) {
2055 if (nest == 0) break;
2056 nest--;
2057 }
2058 tokadd(c);
2059 goto loop_again;
2060 case '\\':
2061 c = nextc();
2062 if (c == -1) return new RNEOF();
2063 if (c == term) {
2064 tokadd(c);
2065 }
2066 else {
2067 tokadd('\\');
2068 tokadd(c);
2069 }
2070 break;
2071 case '{':
2072 if (brace != -1) nest++;
2073 goto case '`';
2074 case '\"':
2075 case '/':
2076 case '`':
2077 if (c == term) {
2078 pushback(c);
2079 RNode.list_append(thread, list, new RNStr(thread, ruby, "#"));
2080 ruby.warn("bad substitution in string");
2081 RNode.list_append(thread, list, new RNStr(thread, ruby, tok()));
2082 newtok();
2083 return list;
2084 }
2085 goto default;
2086 default:
2087 tokadd(c);
2088 break;
2089 }
2090 } while (c != brace);
2091 break;
2092 }
2093
2094 fetch_id:
2095 node = new RNEvStr(thread, ruby, tok());
2096 RNode.list_append(thread, list, node);
2097 newtok();
2098
2099 return list;
2100 }
2101
2102 private int quotation(int c)
2103 {
2104 int term;
2105 int paren;
2106 if (Char.IsLetterOrDigit((char)c) == false)
2107 {
2108 term = c;
2109 c = 'Q';
2110 }
2111 else {
2112 term = nextc();
2113 #if CHECKQUOT
2114 if (ISALNUM(term) || ismbchar(term)) {
2115 parser.yyerror("unknown type of %string");
2116 return 0;
2117 }
2118 #endif
2119 }
2120 if (c == -1 || term == -1) {
2121 thread.CompileError("unterminated quoted string meets end of file");
2122 return 0;
2123 }
2124 paren = term;
2125 if (term == '(') term = ')';
2126 else if (term == '[') term = ']';
2127 else if (term == '{') term = '}';
2128 else if (term == '<') term = '>';
2129 else paren = 0;
2130
2131 switch (c) {
2132 case 'Q':
2133 return parse_string('"', term, paren);
2134
2135 case 'q':
2136 return parse_qstring(term, paren);
2137
2138 case 'w':
2139 return parse_quotedwords(term, paren);
2140
2141 case 'x':
2142 return parse_string('`', term, paren);
2143
2144 case 'r':
2145 return parse_regx(term, paren);
2146
2147 default:
2148 parser.yyerror("unknown type of %string");
2149 return 0;
2150 }
2151 }
2152 void arg_ambiguous()
2153 {
2154 ruby.warning("ambiguous first argument; make sure");
2155 }
2156 }
2157
2158 } // namepsace yyParser
2159 }
2160

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26