OILS / ysh / expr_to_ast.py View on Github | oils.pub

1729 lines, 1042 significant
1"""expr_to_ast.py."""
2from __future__ import print_function
3
4from _devbuild.gen.id_kind_asdl import Id, Id_t, Id_str, Kind
5from _devbuild.gen.syntax_asdl import (
6 Token,
7 SimpleVarSub,
8 loc,
9 loc_t,
10 DoubleQuoted,
11 SingleQuoted,
12 BracedVarSub,
13 CommandSub,
14 YshArrayLiteral,
15 expr,
16 expr_e,
17 expr_t,
18 expr_context_e,
19 re,
20 re_t,
21 re_repeat,
22 re_repeat_t,
23 class_literal_term,
24 class_literal_term_t,
25 PosixClass,
26 PerlClass,
27 NameType,
28 y_lhs_t,
29 Comprehension,
30 Subscript,
31 Attribute,
32 proc_sig,
33 proc_sig_t,
34 Param,
35 RestParam,
36 ParamGroup,
37 NamedArg,
38 ArgList,
39 pat,
40 pat_t,
41 TypeExpr,
42 Func,
43 Eggex,
44 EggexFlag,
45 CharCode,
46 CharRange,
47 VarDecl,
48 Mutation,
49)
50from _devbuild.gen.value_asdl import value, value_t
51from _devbuild.gen import grammar_nt
52from core.error import p_die
53from data_lang import j8
54from frontend import consts
55from frontend import lexer
56from frontend import location
57from mycpp import mops
58from mycpp import mylib
59from mycpp.mylib import log, tagswitch
60from osh import word_compile
61from ysh import expr_parse
62from ysh import regex_translate
63
64from typing import TYPE_CHECKING, Dict, List, Tuple, Optional, cast
65if TYPE_CHECKING:
66 from pgen2.grammar import Grammar
67 from pgen2.pnode import PNode
68
69_ = log
70
71PERL_CLASSES = {
72 'd': 'd',
73 'w': 'w',
74 'word': 'w',
75 's': 's',
76}
77# https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html
78POSIX_CLASSES = [
79 'alnum',
80 'cntrl',
81 'lower',
82 'space',
83 'alpha',
84 'digit',
85 'print',
86 'upper',
87 'blank',
88 'graph',
89 'punct',
90 'xdigit',
91]
92# NOTE: There are also things like \p{Greek} that we could put in the
93# "non-sigil" namespace.
94
95RANGE_POINT_TOO_LONG = "Range start/end shouldn't have more than one character"
96
97POS_ARG_MISPLACED = "Positional arg can't appear in group of named args"
98
99# Copied from pgen2/token.py to avoid dependency.
100NT_OFFSET = 256
101
102if mylib.PYTHON:
103
104 def MakeGrammarNames(ysh_grammar):
105 # type: (Grammar) -> Dict[int, str]
106
107 # TODO: Break this dependency
108 from frontend import lexer_def
109
110 names = {}
111
112 for id_name, k in lexer_def.ID_SPEC.id_str2int.items():
113 # Hm some are out of range
114 #assert k < 256, (k, id_name)
115
116 # TODO: Some tokens have values greater than NT_OFFSET
117 if k < NT_OFFSET:
118 names[k] = id_name
119
120 for k, v in ysh_grammar.number2symbol.items():
121 assert k >= NT_OFFSET, (k, v)
122 names[k] = v
123
124 return names
125
126
127class Transformer(object):
128 """Homogeneous parse tree -> heterogeneous AST ("lossless syntax tree")
129
130 pgen2 (Python's LL parser generator) doesn't have semantic actions like yacc,
131 so this "transformer" is the equivalent.
132
133 Files to refer to when modifying this function:
134
135 ysh/grammar.pgen2 (generates _devbuild/gen/grammar_nt.py)
136 frontend/syntax.asdl (generates _devbuild/gen/syntax_asdl.py)
137
138 Related examples:
139
140 opy/compiler2/transformer.py (Python's parse tree -> AST, ~1500 lines)
141 Python-2.7.13/Python/ast.c (the "real" CPython version, ~3600 lines)
142
143 Other:
144 frontend/parse_lib.py (turn on print_parse_tree)
145
146 Public methods:
147 Expr, VarDecl
148 atom, trailer, etc. are private, named after productions in grammar.pgen2.
149 """
150
151 def __init__(self, gr):
152 # type: (Grammar) -> None
153 self.number2symbol = gr.number2symbol
154 if mylib.PYTHON:
155 names = MakeGrammarNames(gr)
156 # print raw nodes
157 self.p_printer = expr_parse.ParseTreePrinter(names)
158
159 def _LeftAssoc(self, p_node):
160 # type: (PNode) -> expr_t
161 """For an associative binary operation.
162
163 Examples:
164 xor_expr: and_expr ('xor' and_expr)*
165 term: factor (('*'|'/'|'%'|'div') factor)*
166
167 3 - 1 - 2 must be grouped as ((3 - 1) - 2).
168 """
169 # Note: Compare the iteractive com_binary() method in
170 # opy/compiler2/transformer.py.
171
172 # Examples:
173 # - The PNode for '3 - 1' will have 3 children
174 # - The PNode for '3 - 1 - 2' will have 5 children
175
176 #self.p_printer.Print(p_node)
177
178 i = 1 # index of the operator
179 n = p_node.NumChildren()
180
181 left = self.Expr(p_node.GetChild(0))
182 while i < n:
183 op = p_node.GetChild(i)
184 right = self.Expr(p_node.GetChild(i + 1))
185
186 # create a new left node
187 left = expr.Binary(op.tok, left, right)
188 i += 2
189
190 return left
191
192 def _Trailer(self, base, p_trailer):
193 # type: (expr_t, PNode) -> expr_t
194 """
195 trailer: ( '(' [arglist] ')' | '[' subscriptlist ']'
196 | '.' NAME | '->' NAME | '::' NAME
197 )
198 """
199 tok0 = p_trailer.GetChild(0).tok
200 typ0 = p_trailer.GetChild(0).typ
201
202 if typ0 == Id.Op_LParen:
203 lparen = tok0
204 rparen = p_trailer.GetChild(-1).tok
205 arglist = ArgList(lparen, [], None, [], None, None, rparen)
206 if p_trailer.NumChildren() == 2: # ()
207 return expr.FuncCall(base, arglist)
208
209 p = p_trailer.GetChild(1) # the X in ( X )
210 assert p.typ == grammar_nt.arglist # f(x, y)
211 self._ArgList(p, arglist)
212 return expr.FuncCall(base, arglist)
213
214 if typ0 == Id.Op_LBracket:
215 p_args = p_trailer.GetChild(1)
216 assert p_args.typ == grammar_nt.subscriptlist
217
218 n = p_args.NumChildren()
219 if n == 1: # a[1] a[1:2] a[:] etc.
220 subscript = self._Subscript(p_args.GetChild(0))
221 else: # a[1, 2] a[1:2, :]
222 slices = [] # type: List[expr_t]
223 for i in xrange(0, n, 2):
224 slices.append(self._Subscript(p_args.GetChild(i)))
225 # expr.Tuple evaluates to List in YSH.
226 #
227 # Note that syntactically, a[1:2, 3:4] is the the only way to
228 # get a List[Slice]. [1:2, 3:4] by itself is not allowed.
229 comma_tok = p_args.GetChild(1).tok
230 subscript = expr.Tuple(comma_tok, slices, expr_context_e.Store)
231
232 return Subscript(tok0, base, subscript)
233
234 if typ0 in (Id.Expr_Dot, Id.Expr_RArrow, Id.Expr_RDArrow):
235 attr = p_trailer.GetChild(1).tok # will be Id.Expr_Name
236 return Attribute(base, tok0, attr, lexer.TokenVal(attr),
237 expr_context_e.Store)
238
239 raise AssertionError(typ0)
240
241 def _DictPair(self, p_node):
242 # type: (PNode) -> Tuple[expr_t, expr_t]
243 """
244 dict_pair: ( Expr_Name [':' test]
245 | '[' testlist ']' ':' test )
246 | sq_string ':' test
247 | dq_string ':' test )
248 """
249 assert p_node.typ == grammar_nt.dict_pair
250
251 typ = p_node.GetChild(0).typ
252
253 if typ in (grammar_nt.sq_string, grammar_nt.dq_string):
254 key = self.Expr(p_node.GetChild(0)) # type: expr_t
255 val = self.Expr(p_node.GetChild(2))
256 return key, val
257
258 tok0 = p_node.GetChild(0).tok
259 id_ = tok0.id
260
261 if id_ == Id.Expr_Name:
262 key_str = value.Str(lexer.TokenVal(tok0))
263 key = expr.Const(tok0, key_str)
264 if p_node.NumChildren() >= 3:
265 val = self.Expr(p_node.GetChild(2))
266 else:
267 val = expr.Implicit
268
269 if id_ == Id.Op_LBracket: # {[x+y]: 'val'}
270 key = self.Expr(p_node.GetChild(1))
271 val = self.Expr(p_node.GetChild(4))
272 return key, val
273
274 return key, val
275
276 def _Dict(self, parent, p_node):
277 # type: (PNode, PNode) -> expr.Dict
278 """
279 dict: dict_pair (comma_newline dict_pair)* [comma_newline]
280 """
281 if p_node.typ == Id.Op_RBrace: # {}
282 return expr.Dict(parent.tok, [], [])
283
284 assert p_node.typ == grammar_nt.dict
285
286 keys = [] # type: List[expr_t]
287 values = [] # type: List[expr_t]
288
289 n = p_node.NumChildren()
290 for i in xrange(0, n, 2):
291 key, val = self._DictPair(p_node.GetChild(i))
292 keys.append(key)
293 values.append(val)
294
295 return expr.Dict(parent.tok, keys, values)
296
297 def _Tuple(self, parent):
298 # type: (PNode) -> expr_t
299
300 n = parent.NumChildren()
301
302 # (x) -- not a tuple
303 if n == 1:
304 return self.Expr(parent.GetChild(0))
305
306 # x, and (x,) aren't allowed
307 if n == 2:
308 p_die('Invalid trailing comma', parent.GetChild(1).tok)
309
310 elts = [] # type: List[expr_t]
311 for i in xrange(0, n, 2): # skip commas
312 p_node = parent.GetChild(i)
313 elts.append(self.Expr(p_node))
314
315 return expr.Tuple(parent.tok, elts,
316 expr_context_e.Store) # unused expr_context_e
317
318 def _TestlistComp(self, parent, p_node, id0):
319 # type: (PNode, PNode, Id_t) -> expr_t
320 """
321 testlist_comp:
322 (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
323 """
324 assert p_node.typ == grammar_nt.testlist_comp
325
326 n = p_node.NumChildren()
327 if n > 1 and p_node.GetChild(1).typ == grammar_nt.comp_for:
328 child0 = p_node.GetChild(0)
329 if child0.typ == grammar_nt.splat_expr:
330 p_die('Splat not currently supported', child0.tok)
331 elt = self.Expr(child0)
332
333 comp = self._CompFor(p_node.GetChild(1))
334 if id0 == Id.Op_LParen: # (x+1 for x in y)
335 return expr.GeneratorExp(elt, [comp])
336 if id0 == Id.Op_LBracket: # [x+1 for x in y]
337 return expr.ListComp(parent.tok, elt, [comp])
338 raise AssertionError()
339
340 if id0 == Id.Op_LParen:
341 # Parenthesized expression like (x+1) or (x)
342 if n == 1:
343 return self.Expr(p_node.GetChild(0))
344
345 # Tuples (1,) (1, 2) etc. - TODO: should be a list literal?
346 if p_node.GetChild(1).typ == Id.Arith_Comma:
347 return self._Tuple(p_node)
348
349 raise AssertionError()
350
351 if id0 == Id.Op_LBracket: # List [1,2,3]
352 elts = [] # type: List[expr_t]
353 for i in xrange(0, n, 2): # skip commas
354 child = p_node.GetChild(i)
355 if child.typ == grammar_nt.splat_expr:
356 p_die('Splat not currently supported', child.tok)
357 elts.append(self.Expr(child))
358
359 return expr.List(parent.tok, elts,
360 expr_context_e.Store) # unused expr_context_e
361
362 raise AssertionError(Id_str(id0))
363
364 def _Atom(self, parent):
365 # type: (PNode) -> expr_t
366 """Handle alternatives of 'atom' where there's more than one child."""
367
368 tok = parent.GetChild(0).tok
369 id_ = tok.id
370 n = parent.NumChildren()
371
372 if id_ == Id.Op_LParen:
373 # atom: '(' [yield_expr|testlist_comp] ')' | ...
374 if n == 2: # () is a tuple
375 assert (
376 parent.GetChild(1).typ == Id.Op_RParen), parent.GetChild(1)
377 return expr.Tuple(tok, [], expr_context_e.Store)
378
379 return self._TestlistComp(parent, parent.GetChild(1), id_)
380
381 if id_ == Id.Op_LBracket:
382 # atom: ... | '[' [testlist_comp] ']' | ...
383
384 if n == 2: # []
385 assert (parent.GetChild(1).typ == Id.Op_RBracket
386 ), parent.GetChild(1)
387 return expr.List(tok, [],
388 expr_context_e.Store) # unused expr_context_e
389
390 return self._TestlistComp(parent, parent.GetChild(1), id_)
391
392 if id_ == Id.Left_CaretBracket: # ^[42 + x]
393 child = self.Expr(parent.GetChild(1))
394 return expr.Literal(child)
395
396 if id_ == Id.Op_LBrace:
397 # atom: ... | '{' [Op_Newline] [dict] '}'
398 i = 1
399 if parent.GetChild(i).typ == Id.Op_Newline:
400 i += 1
401 return self._Dict(parent, parent.GetChild(i))
402
403 if id_ == Id.Arith_Amp:
404 n = parent.NumChildren()
405 if n >= 3:
406 p_die("Places in containers not implemented yet",
407 parent.GetChild(2).tok)
408
409 name_tok = parent.GetChild(1).tok
410 return expr.Place(name_tok, lexer.TokenVal(name_tok), [])
411
412 if id_ == Id.Expr_Func:
413 # STUB. This should really be a Func, not Lambda.
414 return expr.Lambda([], expr.Implicit)
415
416 # 100 M
417 # Ignoring the suffix for now
418 if id_ == Id.Expr_DecInt:
419 assert n > 1
420 p_die("Units suffix not implemented", parent.GetChild(1).tok)
421 #return self.Expr(parent.GetChild(0))
422
423 # 100.5 M
424 # Ignoring the suffix for now
425 if id_ == Id.Expr_Float:
426 assert n > 1
427 p_die("unix suffix implemented", parent.GetChild(1).tok)
428 #return self.Expr(parent.GetChild(0))
429
430 raise AssertionError(Id_str(id_))
431
432 def _NameType(self, p_node):
433 # type: (PNode) -> NameType
434 """ name_type: Expr_Name [':'] [type_expr] """
435 name_tok = p_node.GetChild(0).tok
436 typ = None # type: Optional[TypeExpr]
437
438 n = p_node.NumChildren()
439 if n == 2:
440 typ = self._TypeExpr(p_node.GetChild(1))
441 if n == 3:
442 typ = self._TypeExpr(p_node.GetChild(2))
443
444 return NameType(name_tok, lexer.TokenVal(name_tok), typ)
445
446 def _NameTypeList(self, p_node):
447 # type: (PNode) -> List[NameType]
448 """ name_type_list: name_type (',' name_type)* """
449 assert p_node.typ == grammar_nt.name_type_list
450 results = [] # type: List[NameType]
451
452 n = p_node.NumChildren()
453 for i in xrange(0, n, 2): # was children[::2]
454 results.append(self._NameType(p_node.GetChild(i)))
455 return results
456
457 def _CompFor(self, p_node):
458 # type: (PNode) -> Comprehension
459 """comp_for: 'for' exprlist 'in' or_test ['if' or_test]"""
460 lhs = self._NameTypeList(p_node.GetChild(1))
461 iterable = self.Expr(p_node.GetChild(3))
462
463 if p_node.NumChildren() >= 6:
464 cond = self.Expr(p_node.GetChild(5))
465 else:
466 cond = None
467
468 return Comprehension(lhs, iterable, cond)
469
470 def _CompareChain(self, parent):
471 # type: (PNode) -> expr_t
472 """comparison: expr (comp_op expr)*"""
473 cmp_ops = [] # type: List[Token]
474 comparators = [] # type: List[expr_t]
475 left = self.Expr(parent.GetChild(0))
476
477 i = 1
478 n = parent.NumChildren()
479 while i < n:
480 p = parent.GetChild(i)
481 op = p.GetChild(0).tok
482 if p.NumChildren() == 2:
483 # Blame the first token, and change its type
484 if op.id == Id.Expr_Not: # not in
485 op.id = Id.Node_NotIn
486 elif op.id == Id.Expr_Is: # is not
487 op.id = Id.Node_IsNot
488 else:
489 raise AssertionError()
490 else:
491 # is, <, ==, etc.
492 pass
493
494 cmp_ops.append(op)
495 i += 1
496 comparators.append(self.Expr(parent.GetChild(i)))
497 i += 1
498 return expr.Compare(left, cmp_ops, comparators)
499
500 def _Subscript(self, parent):
501 # type: (PNode) -> expr_t
502 """subscript: expr | [expr] ':' [expr]"""
503 typ0 = parent.GetChild(0).typ
504
505 n = parent.NumChildren()
506
507 if typ0 == grammar_nt.expr:
508 if n == 3: # a[1:2]
509 lower = self.Expr(parent.GetChild(0))
510 op_tok = parent.GetChild(1).tok
511 upper = self.Expr(parent.GetChild(2))
512
513 elif n == 2: # a[1:]
514 lower = self.Expr(parent.GetChild(0))
515 op_tok = parent.GetChild(1).tok
516 upper = None
517 else: # a[1]
518 return self.Expr(parent.GetChild(0))
519 else:
520 assert typ0 == Id.Arith_Colon
521 lower = None
522 if n == 1: # a[:]
523 op_tok = parent.GetChild(0).tok
524 upper = None
525 else: # a[:3]
526 op_tok = parent.GetChild(0).tok
527 upper = self.Expr(parent.GetChild(1))
528
529 return expr.Slice(lower, op_tok, upper)
530
531 def Expr(self, pnode):
532 # type: (PNode) -> expr_t
533 """Transform expressions (as opposed to statements)"""
534 typ = pnode.typ
535
536 #
537 # YSH Entry Points / Additions
538 #
539
540 if typ == grammar_nt.ysh_expr: # for if/while
541 # ysh_expr: '(' testlist ')'
542 return self.Expr(pnode.GetChild(1))
543
544 if typ == grammar_nt.command_expr:
545 # return_expr: testlist end_stmt
546 return self.Expr(pnode.GetChild(0))
547
548 #
549 # Python-like Expressions / Operators
550 #
551
552 if typ == grammar_nt.atom:
553 if pnode.NumChildren() == 1:
554 return self.Expr(pnode.GetChild(0))
555 return self._Atom(pnode)
556
557 if typ == grammar_nt.testlist:
558 # testlist: test (',' test)* [',']
559 return self._Tuple(pnode)
560
561 if typ == grammar_nt.test:
562 # test: or_test ['if' or_test 'else' test] | lambdef
563 if pnode.NumChildren() == 1:
564 return self.Expr(pnode.GetChild(0))
565
566 # TODO: Handle lambdef
567
568 test = self.Expr(pnode.GetChild(2))
569 body = self.Expr(pnode.GetChild(0))
570 orelse = self.Expr(pnode.GetChild(4))
571 return expr.IfExp(test, body, orelse)
572
573 if typ == grammar_nt.lambdef:
574 # lambdef: '|' [name_type_list] '|' test
575
576 n = pnode.NumChildren()
577 if n == 4:
578 params = self._NameTypeList(pnode.GetChild(1))
579 else:
580 params = []
581
582 body = self.Expr(pnode.GetChild(n - 1))
583 return expr.Lambda(params, body)
584
585 #
586 # Operators with Precedence
587 #
588
589 if typ == grammar_nt.or_test:
590 # or_test: and_test ('or' and_test)*
591 return self._LeftAssoc(pnode)
592
593 if typ == grammar_nt.and_test:
594 # and_test: not_test ('and' not_test)*
595 return self._LeftAssoc(pnode)
596
597 if typ == grammar_nt.not_test:
598 # not_test: 'not' not_test | comparison
599 if pnode.NumChildren() == 1:
600 return self.Expr(pnode.GetChild(0))
601
602 op_tok = pnode.GetChild(0).tok # not
603 return expr.Unary(op_tok, self.Expr(pnode.GetChild(1)))
604
605 elif typ == grammar_nt.comparison:
606 if pnode.NumChildren() == 1:
607 return self.Expr(pnode.GetChild(0))
608
609 return self._CompareChain(pnode)
610
611 elif typ == grammar_nt.range_expr:
612 n = pnode.NumChildren()
613 if n == 1:
614 return self.Expr(pnode.GetChild(0))
615
616 if n == 3:
617 return expr.Range(self.Expr(pnode.GetChild(0)),
618 pnode.GetChild(1).tok,
619 self.Expr(pnode.GetChild(2)))
620
621 raise AssertionError(n)
622
623 elif typ == grammar_nt.expr:
624 # expr: xor_expr ('|' xor_expr)*
625 return self._LeftAssoc(pnode)
626
627 if typ == grammar_nt.xor_expr:
628 # xor_expr: and_expr ('xor' and_expr)*
629 return self._LeftAssoc(pnode)
630
631 if typ == grammar_nt.and_expr: # a & b
632 # and_expr: shift_expr ('&' shift_expr)*
633 return self._LeftAssoc(pnode)
634
635 elif typ == grammar_nt.shift_expr:
636 # shift_expr: arith_expr (('<<'|'>>') arith_expr)*
637 return self._LeftAssoc(pnode)
638
639 elif typ == grammar_nt.arith_expr:
640 # arith_expr: term (('+'|'-') term)*
641 return self._LeftAssoc(pnode)
642
643 elif typ == grammar_nt.term:
644 # term: factor (('*'|'/'|'div'|'mod') factor)*
645 return self._LeftAssoc(pnode)
646
647 elif typ == grammar_nt.factor:
648 # factor: ('+'|'-'|'~') factor | power
649 # the power would have already been reduced
650 if pnode.NumChildren() == 1:
651 return self.Expr(pnode.GetChild(0))
652
653 assert pnode.NumChildren() == 2
654 op = pnode.GetChild(0)
655 e = pnode.GetChild(1)
656
657 assert isinstance(op.tok, Token)
658 return expr.Unary(op.tok, self.Expr(e))
659
660 elif typ == grammar_nt.power:
661 # power: atom trailer* ['**' factor]
662
663 node = self.Expr(pnode.GetChild(0))
664 if pnode.NumChildren() == 1: # No trailers
665 return node
666
667 # Support a->startswith(b) and mydict.key
668 n = pnode.NumChildren()
669 i = 1
670 while i < n and pnode.GetChild(i).typ == grammar_nt.trailer:
671 node = self._Trailer(node, pnode.GetChild(i))
672 i += 1
673
674 if i != n: # ['**' factor]
675 op_tok = pnode.GetChild(i).tok
676 assert op_tok.id == Id.Arith_DStar, op_tok
677 factor = self.Expr(pnode.GetChild(i + 1))
678 node = expr.Binary(op_tok, node, factor)
679
680 return node
681
682 elif typ == grammar_nt.eggex:
683 return self._Eggex(pnode)
684
685 elif typ == grammar_nt.ysh_expr_sub:
686 return self.Expr(pnode.GetChild(0))
687
688 #
689 # YSH Lexer Modes
690 #
691
692 elif typ == grammar_nt.sh_array_literal:
693 return cast(YshArrayLiteral, pnode.GetChild(1).tok)
694
695 elif typ == grammar_nt.old_sh_array_literal:
696 return cast(YshArrayLiteral, pnode.GetChild(1).tok)
697
698 elif typ == grammar_nt.sh_command_sub:
699 return cast(CommandSub, pnode.GetChild(1).tok)
700
701 elif typ == grammar_nt.braced_var_sub:
702 return cast(BracedVarSub, pnode.GetChild(1).tok)
703
704 elif typ == grammar_nt.dq_string:
705 dq = cast(DoubleQuoted, pnode.GetChild(1).tok)
706 # sugar: ^"..." is short for ^["..."]
707 if pnode.GetChild(0).typ == Id.Left_CaretDoubleQuote:
708 return expr.Literal(dq)
709 return dq
710
711 elif typ == grammar_nt.sq_string:
712 return cast(SingleQuoted, pnode.GetChild(1).tok)
713
714 elif typ == grammar_nt.simple_var_sub:
715 tok = pnode.GetChild(0).tok
716
717 if tok.id == Id.VSub_DollarName: # $foo is disallowed
718 bare = lexer.TokenSliceLeft(tok, 1)
719 p_die(
720 'In expressions, remove $ and use `%s`, or sometimes "$%s"'
721 % (bare, bare), tok)
722
723 # $? is allowed
724 return SimpleVarSub(tok)
725
726 #
727 # Terminals
728 #
729
730 tok = pnode.tok
731 if typ == Id.Expr_Name:
732 return expr.Var(tok, lexer.TokenVal(tok))
733
734 # Everything else is an expr.Const
735 tok_str = lexer.TokenVal(tok)
736 # Remove underscores from 1_000_000. The lexer is responsible for
737 # validation.
738 c_under = tok_str.replace('_', '')
739
740 if typ == Id.Expr_DecInt:
741 ok, big_int = mops.FromStr2(c_under)
742 if not ok:
743 p_die('Decimal int constant is too large', tok)
744 cval = value.Int(big_int) # type: value_t
745
746 elif typ == Id.Expr_BinInt:
747 assert c_under[:2] in ('0b', '0B'), c_under
748 ok, big_int = mops.FromStr2(c_under[2:], 2)
749 if not ok:
750 p_die('Binary int constant is too large', tok)
751 cval = value.Int(big_int)
752
753 elif typ == Id.Expr_OctInt:
754 assert c_under[:2] in ('0o', '0O'), c_under
755 ok, big_int = mops.FromStr2(c_under[2:], 8)
756 if not ok:
757 p_die('Octal int constant is too large', tok)
758 cval = value.Int(big_int)
759
760 elif typ == Id.Expr_HexInt:
761 assert c_under[:2] in ('0x', '0X'), c_under
762 ok, big_int = mops.FromStr2(c_under[2:], 16)
763 if not ok:
764 p_die('Hex int constant is too large', tok)
765 cval = value.Int(big_int)
766
767 elif typ == Id.Expr_Float:
768 # Note: float() in mycpp/gc_builtins.cc currently uses strtod
769 # I think this never raises ValueError, because the lexer
770 # should only accept strings that strtod() does?
771 cval = value.Float(float(c_under))
772
773 elif typ == Id.Expr_Null:
774 cval = value.Null
775
776 elif typ == Id.Expr_True:
777 cval = value.Bool(True)
778
779 elif typ == Id.Expr_False:
780 cval = value.Bool(False)
781
782 elif typ == Id.Char_OneChar: # \n
783 assert len(tok_str) == 2, tok_str
784 s = consts.LookupCharC(lexer.TokenSliceLeft(tok, 1))
785 cval = value.Str(s)
786
787 elif typ == Id.Char_YHex: # \yff
788 assert len(tok_str) == 4, tok_str
789 hex_str = lexer.TokenSliceLeft(tok, 2)
790 s = chr(int(hex_str, 16))
791 cval = value.Str(s)
792
793 elif typ == Id.Char_UBraced: # \u{123}
794 hex_str = lexer.TokenSlice(tok, 3, -1)
795 code_point = int(hex_str, 16)
796 s = j8.Utf8Encode(code_point)
797 cval = value.Str(s)
798
799 else:
800 raise AssertionError(typ)
801
802 return expr.Const(tok, cval)
803
804 def _CheckLhs(self, lhs):
805 # type: (expr_t) -> None
806
807 UP_lhs = lhs
808 with tagswitch(lhs) as case:
809 if case(expr_e.Var):
810 # OK - e.g. setvar a.b.c[i] = 42
811 pass
812
813 elif case(expr_e.Subscript):
814 lhs = cast(Subscript, UP_lhs)
815 self._CheckLhs(lhs.obj) # recurse on LHS
816
817 elif case(expr_e.Attribute):
818 lhs = cast(Attribute, UP_lhs)
819 self._CheckLhs(lhs.obj) # recurse on LHS
820
821 else:
822 # Illegal - e.g. setglobal {}["key"] = 42
823 p_die("Subscript/Attribute not allowed on this LHS expression",
824 location.TokenForExpr(lhs))
825
826 def _LhsExprList(self, p_node):
827 # type: (PNode) -> List[y_lhs_t]
828 """lhs_list: expr (',' expr)*"""
829 assert p_node.typ == grammar_nt.lhs_list
830
831 lhs_list = [] # type: List[y_lhs_t]
832 n = p_node.NumChildren()
833 for i in xrange(0, n, 2):
834 p = p_node.GetChild(i)
835 #self.p_printer.Print(p)
836
837 e = self.Expr(p)
838 UP_e = e
839 with tagswitch(e) as case:
840 if case(expr_e.Var):
841 e = cast(expr.Var, UP_e)
842 lhs_list.append(e.left)
843
844 elif case(expr_e.Subscript):
845 e = cast(Subscript, UP_e)
846 self._CheckLhs(e)
847 lhs_list.append(e)
848
849 elif case(expr_e.Attribute):
850 e = cast(Attribute, UP_e)
851 self._CheckLhs(e)
852 if e.op.id != Id.Expr_Dot:
853 # e.g. setvar obj->method is not valid
854 p_die("Can't assign to this attribute expr", e.op)
855 lhs_list.append(e)
856
857 else:
858 pass # work around mycpp bug
859
860 # TODO: could blame arbitary expr_t, bu this works most of
861 # the time
862 if p.tok:
863 blame = p.tok # type: loc_t
864 else:
865 blame = loc.Missing
866 p_die("Can't assign to this expression", blame)
867
868 return lhs_list
869
870 def MakeVarDecl(self, p_node):
871 # type: (PNode) -> VarDecl
872 """
873 ysh_var_decl: name_type_list ['=' testlist] end_stmt
874 """
875 assert p_node.typ == grammar_nt.ysh_var_decl
876
877 lhs = self._NameTypeList(p_node.GetChild(0)) # could be a tuple
878
879 # This syntax is confusing, and different than JavaScript
880 # var x, y = 1, 2
881 # But this is useful:
882 # var flag, i = parseArgs(spec, argv)
883
884 n = p_node.NumChildren()
885 if n >= 3:
886 rhs = self.Expr(p_node.GetChild(2))
887 else:
888 rhs = None
889
890 # The caller should fill in the keyword token.
891 return VarDecl(None, lhs, rhs)
892
893 def MakeMutation(self, p_node):
894 # type: (PNode) -> Mutation
895 """
896 ysh_mutation: lhs_list (augassign | '=') testlist end_stmt
897 """
898 assert p_node.typ == grammar_nt.ysh_mutation
899
900 lhs_list = self._LhsExprList(p_node.GetChild(0)) # could be a tuple
901 op_tok = p_node.GetChild(1).tok
902 if len(lhs_list) > 1 and op_tok.id != Id.Arith_Equal:
903 p_die('Multiple assignment must use =', op_tok)
904 rhs = self.Expr(p_node.GetChild(2))
905 return Mutation(None, lhs_list, op_tok, rhs)
906
907 def _EggexFlag(self, p_node):
908 # type: (PNode) -> EggexFlag
909 n = p_node.NumChildren()
910 if n == 1:
911 return EggexFlag(False, p_node.GetChild(0).tok)
912 elif n == 2:
913 return EggexFlag(True, p_node.GetChild(1).tok)
914 else:
915 raise AssertionError()
916
917 def _Eggex(self, p_node):
918 # type: (PNode) -> Eggex
919 """
920 eggex: '/' regex [';' re_flag* [';' Expr_Name] ] '/'
921 """
922 left = p_node.GetChild(0).tok
923 regex = self._Regex(p_node.GetChild(1))
924
925 flags = [] # type: List[EggexFlag]
926 trans_pref = None # type: Optional[Token]
927
928 i = 2
929 current = p_node.GetChild(i)
930 if current.typ == Id.Op_Semi:
931 i += 1
932 while True:
933 current = p_node.GetChild(i)
934 if current.typ != grammar_nt.re_flag:
935 break
936 flags.append(self._EggexFlag(current))
937 i += 1
938
939 if current.typ == Id.Op_Semi:
940 i += 1
941 trans_pref = p_node.GetChild(i).tok
942
943 # Canonicalize and validate flags for ERE only. Default is ERE.
944 if trans_pref is None or lexer.TokenVal(trans_pref) == 'ERE':
945 canonical_flags = regex_translate.CanonicalFlags(flags)
946 else:
947 canonical_flags = None
948
949 return Eggex(left, regex, flags, trans_pref, canonical_flags)
950
951 def YshCasePattern(self, pnode):
952 # type: (PNode) -> pat_t
953 assert pnode.typ == grammar_nt.ysh_case_pat, pnode
954
955 pattern = pnode.GetChild(0)
956 typ = pattern.typ
957 if typ == Id.Op_LParen:
958 # pat_expr or pat_else
959 pattern = pnode.GetChild(1)
960 typ = pattern.typ
961
962 if typ == grammar_nt.pat_else:
963 return pat.Else
964
965 if typ == grammar_nt.pat_exprs:
966 exprs = [] # type: List[expr_t]
967 for i in xrange(pattern.NumChildren()):
968 child = pattern.GetChild(i)
969 if child.typ == grammar_nt.expr:
970 expr = self.Expr(child)
971 exprs.append(expr)
972 return pat.YshExprs(exprs)
973
974 if typ == grammar_nt.eggex:
975 return self._Eggex(pattern)
976
977 raise AssertionError()
978
979 def _BlockArg(self, p_node):
980 # type: (PNode) -> expr_t
981
982 n = p_node.NumChildren()
983 if n == 1:
984 child = p_node.GetChild(0)
985 return self.Expr(child)
986
987 # It can only be an expression, not a=42, or ...expr
988 p_die('Invalid block expression argument', p_node.tok)
989
990 def _Argument(self, p_node, after_semi, arglist):
991 # type: (PNode, bool, ArgList) -> None
992 """
993 argument: (
994 test [comp_for]
995 | test '=' test # named arg
996 | '...' test # var args
997 )
998 """
999 pos_args = arglist.pos_args
1000 named_args = arglist.named_args
1001
1002 assert p_node.typ == grammar_nt.argument, p_node
1003 n = p_node.NumChildren()
1004 if n == 1:
1005 child = p_node.GetChild(0)
1006 if after_semi:
1007 p_die(POS_ARG_MISPLACED, child.tok)
1008 arg = self.Expr(child)
1009 pos_args.append(arg)
1010 return
1011
1012 if n == 2:
1013 # Note: We allow multiple spreads, just like Julia. They are
1014 # concatenated as in lists and dicts.
1015 tok0 = p_node.GetChild(0).tok
1016 if tok0.id == Id.Expr_Ellipsis:
1017 spread_expr = expr.Spread(tok0, self.Expr(p_node.GetChild(1)))
1018 if after_semi: # f(; ... named)
1019 named_args.append(NamedArg(None, spread_expr))
1020 else: # f(...named)
1021 pos_args.append(spread_expr)
1022 return
1023
1024 # Note: generator expression not implemented
1025 if p_node.GetChild(1).typ == grammar_nt.comp_for:
1026 child = p_node.GetChild(0)
1027 if after_semi:
1028 p_die(POS_ARG_MISPLACED, child.tok)
1029
1030 elt = self.Expr(child)
1031 comp = self._CompFor(p_node.GetChild(1))
1032 arg = expr.GeneratorExp(elt, [comp])
1033 pos_args.append(arg)
1034 return
1035
1036 raise AssertionError()
1037
1038 if n == 3: # named args can come before or after the semicolon
1039 n1 = NamedArg(
1040 p_node.GetChild(0).tok, self.Expr(p_node.GetChild(2)))
1041 named_args.append(n1)
1042 return
1043
1044 raise AssertionError()
1045
1046 def _ArgGroup(self, p_node, after_semi, arglist):
1047 # type: (PNode, bool, ArgList) -> None
1048 """
1049 arg_group: argument (',' argument)* [',']
1050 """
1051 for i in xrange(p_node.NumChildren()):
1052 p_child = p_node.GetChild(i)
1053 if p_child.typ == grammar_nt.argument:
1054 self._Argument(p_child, after_semi, arglist)
1055
1056 def _ArgList(self, p_node, arglist):
1057 # type: (PNode, ArgList) -> None
1058 """For both funcs and procs
1059
1060 arglist: (
1061 [arg_group]
1062 [';' [arg_group]]
1063 )
1064
1065 arglist3: ...
1066 """
1067 n = p_node.NumChildren()
1068 if n == 0:
1069 return
1070
1071 i = 0
1072
1073 if i >= n:
1074 return
1075 child = p_node.GetChild(i)
1076 if child.typ == grammar_nt.arg_group:
1077 self._ArgGroup(child, False, arglist)
1078 i += 1
1079
1080 if i >= n:
1081 return
1082 child = p_node.GetChild(i)
1083 if child.typ == Id.Op_Semi:
1084 arglist.semi_tok = child.tok
1085 i += 1
1086
1087 # Named args after first semi-colon
1088 if i >= n:
1089 return
1090 child = p_node.GetChild(i)
1091 if child.typ == grammar_nt.arg_group:
1092 self._ArgGroup(child, True, arglist)
1093 i += 1
1094
1095 #
1096 # Special third group may have block expression - only for arglist3,
1097 # used for procs!
1098 #
1099
1100 if i >= n:
1101 return
1102 assert p_node.typ == grammar_nt.arglist3, p_node
1103
1104 child = p_node.GetChild(i)
1105 if child.typ == Id.Op_Semi:
1106 arglist.semi_tok2 = child.tok
1107 i += 1
1108
1109 if i >= n:
1110 return
1111 child = p_node.GetChild(i)
1112 if child.typ == grammar_nt.argument:
1113 arglist.block_expr = self._BlockArg(child)
1114 i += 1
1115
1116 def ProcCallArgs(self, pnode, arglist):
1117 # type: (PNode, ArgList) -> None
1118 """
1119 ysh_eager_arglist: '(' [arglist3] ')'
1120 ysh_lazy_arglist: '[' [arglist] ']'
1121 """
1122 n = pnode.NumChildren()
1123 if n == 2: # f()
1124 return
1125
1126 if n == 3:
1127 child1 = pnode.GetChild(1) # the X in '( X )'
1128
1129 self._ArgList(child1, arglist)
1130 return
1131
1132 raise AssertionError()
1133
1134 def _TypeExpr(self, pnode):
1135 # type: (PNode) -> TypeExpr
1136 """
1137 type_expr: Expr_Name [ '[' type_expr (',' type_expr)* ']' ]
1138 """
1139 assert pnode.typ == grammar_nt.type_expr, pnode.typ
1140
1141 ty = TypeExpr.CreateNull() # don't allocate children
1142
1143 ty.tok = pnode.GetChild(0).tok
1144 ty.name = lexer.TokenVal(ty.tok)
1145
1146 n = pnode.NumChildren()
1147 if n == 1:
1148 return ty
1149
1150 ty.params = []
1151 i = 2
1152 while i < n:
1153 p = self._TypeExpr(pnode.GetChild(i))
1154 ty.params.append(p)
1155 i += 2 # skip comma
1156
1157 return ty
1158
1159 def _Param(self, pnode):
1160 # type: (PNode) -> Param
1161 """
1162 param: Expr_Name [type_expr] ['=' expr]
1163 """
1164 assert pnode.typ == grammar_nt.param
1165
1166 name_tok = pnode.GetChild(0).tok
1167 n = pnode.NumChildren()
1168
1169 assert name_tok.id == Id.Expr_Name, name_tok
1170
1171 default_val = None # type: expr_t
1172 type_ = None # type: TypeExpr
1173
1174 if n == 1:
1175 # proc p(a)
1176 pass
1177
1178 elif n == 2:
1179 # proc p(a Int)
1180 type_ = self._TypeExpr(pnode.GetChild(1))
1181
1182 elif n == 3:
1183 # proc p(a = 3)
1184 default_val = self.Expr(pnode.GetChild(2))
1185
1186 elif n == 4:
1187 # proc p(a Int = 3)
1188 type_ = self._TypeExpr(pnode.GetChild(1))
1189 default_val = self.Expr(pnode.GetChild(3))
1190
1191 return Param(name_tok, lexer.TokenVal(name_tok), type_, default_val)
1192
1193 def _ParamGroup(self, p_node):
1194 # type: (PNode) -> ParamGroup
1195 """
1196 param_group:
1197 (param ',')*
1198 [ (param | '...' Expr_Name) [,] ]
1199 """
1200 assert p_node.typ == grammar_nt.param_group, p_node
1201
1202 params = [] # type: List[Param]
1203 rest_of = None # type: Optional[RestParam]
1204
1205 n = p_node.NumChildren()
1206 i = 0
1207 while i < n:
1208 child = p_node.GetChild(i)
1209 if child.typ == grammar_nt.param:
1210 params.append(self._Param(child))
1211
1212 elif child.typ == Id.Expr_Ellipsis:
1213 tok = p_node.GetChild(i + 1).tok
1214 rest_of = RestParam(tok, lexer.TokenVal(tok))
1215
1216 i += 2
1217
1218 return ParamGroup(params, rest_of)
1219
1220 def Proc(self, p_node):
1221 # type: (PNode) -> proc_sig_t
1222 """
1223 ysh_proc: (
1224 [ '('
1225 [ param_group ] # word params, with defaults
1226 [ ';' [ param_group ] ] # positional typed params, with defaults
1227 [ ';' [ param_group ] ] # named params, with defaults
1228 [ ';' Expr_Name ] # optional block param, with no type or default
1229 ')'
1230 ]
1231 '{' # opening { for pgen2
1232 )
1233 """
1234 assert p_node.typ == grammar_nt.ysh_proc
1235
1236 n = p_node.NumChildren()
1237 if n == 1: # proc f {
1238 return proc_sig.Open
1239
1240 if n == 3: # proc f () {
1241 sig = proc_sig.Closed.CreateNull(alloc_lists=True) # no params
1242
1243 # proc f( three param groups, and block group )
1244 sig = proc_sig.Closed.CreateNull(alloc_lists=True) # no params
1245
1246 # Word args
1247 i = 1
1248 child = p_node.GetChild(i)
1249 if child.typ == grammar_nt.param_group:
1250 sig.word = self._ParamGroup(p_node.GetChild(i))
1251
1252 # Validate word args
1253 for word in sig.word.params:
1254 if word.type:
1255 if word.type.name not in ('Str', 'Ref'):
1256 p_die('Word params may only have type Str or Ref',
1257 word.type.tok)
1258 if word.type.params is not None:
1259 p_die('Unexpected type parameters', word.type.tok)
1260
1261 i += 2
1262 else:
1263 i += 1
1264
1265 #log('i %d n %d', i, n)
1266 if i >= n:
1267 return sig
1268
1269 # Positional args
1270 child = p_node.GetChild(i)
1271 if child.typ == grammar_nt.param_group:
1272 sig.positional = self._ParamGroup(p_node.GetChild(i))
1273 i += 2
1274 else:
1275 i += 1
1276
1277 #log('i %d n %d', i, n)
1278 if i >= n:
1279 return sig
1280
1281 # Keyword args
1282 child = p_node.GetChild(i)
1283 if child.typ == grammar_nt.param_group:
1284 sig.named = self._ParamGroup(p_node.GetChild(i))
1285 i += 2
1286 else:
1287 i += 1
1288
1289 #log('i %d n %d', i, n)
1290 if i >= n:
1291 return sig
1292
1293 child = p_node.GetChild(i)
1294 if child.typ == grammar_nt.param_group:
1295 group = self._ParamGroup(p_node.GetChild(i))
1296 params = group.params
1297 if len(params) > 1:
1298 p_die('Only 1 block param is allowed', params[1].blame_tok)
1299 if group.rest_of:
1300 p_die("Rest param isn't allowed for blocks",
1301 group.rest_of.blame_tok)
1302
1303 if len(params) == 1:
1304 if params[0].type:
1305 if params[0].type.name != 'Command':
1306 p_die('Block param must have type Command',
1307 params[0].type.tok)
1308 if params[0].type.params is not None:
1309 p_die('Unexpected type parameters', params[0].type.tok)
1310
1311 sig.block_param = params[0]
1312
1313 return sig
1314
1315 def YshFunc(self, p_node, out):
1316 # type: (PNode, Func) -> None
1317 """
1318 ysh_func: Expr_Name '(' [param_group] [';' param_group] ')'
1319 """
1320 assert p_node.typ == grammar_nt.ysh_func
1321
1322 #self.p_printer.Print(p_node)
1323
1324 out.name = p_node.GetChild(0).tok
1325
1326 n = p_node.NumChildren()
1327 i = 2 # after (
1328
1329 child = p_node.GetChild(i)
1330 if child.typ == grammar_nt.param_group:
1331 out.positional = self._ParamGroup(child)
1332 i += 2 # skip past ;
1333 else:
1334 i += 1
1335
1336 if i >= n:
1337 return
1338
1339 child = p_node.GetChild(i)
1340 if child.typ == grammar_nt.param_group:
1341 out.named = self._ParamGroup(child)
1342
1343 #
1344 # Eggex Language
1345 #
1346
1347 def _RangeCharSingleQuoted(self, p_node):
1348 # type: (PNode) -> Optional[CharCode]
1349
1350 assert p_node.typ == grammar_nt.range_char, p_node
1351
1352 # 'a' in 'a'-'b'
1353
1354 child0 = p_node.GetChild(0)
1355 if child0.typ == grammar_nt.sq_string:
1356 sq_part = cast(SingleQuoted, child0.GetChild(1).tok)
1357 n = len(sq_part.sval)
1358 if n == 0:
1359 p_die("Quoted range char can't be empty",
1360 loc.WordPart(sq_part))
1361 elif n == 1:
1362 return CharCode(sq_part.left, ord(sq_part.sval[0]), False)
1363 else:
1364 p_die(RANGE_POINT_TOO_LONG, loc.WordPart(sq_part))
1365 return None
1366
1367 def _OtherRangeToken(self, p_node):
1368 # type: (PNode) -> Token
1369 """An endpoint of a range (single char)
1370
1371 range_char: Expr_Name | Expr_DecInt | sq_string | char_literal
1372 a-z 0-9 'a'-'z' \x00-\xff
1373 """
1374 assert p_node.typ == grammar_nt.range_char, p_node
1375
1376 child0 = p_node.GetChild(0)
1377 if child0.typ == grammar_nt.char_literal:
1378 # \x00 in /[\x00 - \x20]/
1379 tok = child0.GetChild(0).tok
1380 return tok
1381
1382 tok = p_node.tok
1383 # a in a-z is Expr_Name
1384 # 0 in 0-9 is Expr_DecInt
1385 assert tok.id in (Id.Expr_Name, Id.Expr_DecInt), tok
1386
1387 if tok.length != 1:
1388 p_die(RANGE_POINT_TOO_LONG, tok)
1389 return tok
1390
1391 def _NonRangeChars(self, p_node):
1392 # type: (PNode) -> class_literal_term_t
1393 """
1394 \" \u1234 '#'
1395 """
1396 assert p_node.typ == grammar_nt.range_char, p_node
1397
1398 child0 = p_node.GetChild(0)
1399 typ0 = p_node.GetChild(0).typ
1400
1401 if typ0 == grammar_nt.sq_string:
1402 return cast(SingleQuoted, child0.GetChild(1).tok)
1403
1404 if typ0 == grammar_nt.char_literal:
1405 return word_compile.EvalCharLiteralForRegex(child0.tok)
1406
1407 if typ0 == Id.Expr_Name:
1408 # Look up PerlClass and PosixClass
1409 return self._NameInClass(None, child0.tok)
1410
1411 raise AssertionError()
1412
1413 def _ClassLiteralTerm(self, p_node):
1414 # type: (PNode) -> class_literal_term_t
1415 """
1416 class_literal_term:
1417 range_char ['-' range_char ]
1418 | '@' Expr_Name # splice
1419 | '!' Expr_Name # negate char class
1420 ...
1421 """
1422 assert p_node.typ == grammar_nt.class_literal_term, p_node
1423
1424 typ0 = p_node.GetChild(0).typ
1425
1426 if typ0 == grammar_nt.range_char:
1427 n = p_node.NumChildren()
1428
1429 if n == 1:
1430 return self._NonRangeChars(p_node.GetChild(0))
1431
1432 # 'a'-'z' etc.
1433 if n == 3:
1434 assert p_node.GetChild(1).typ == Id.Arith_Minus, p_node
1435
1436 left = p_node.GetChild(0)
1437 right = p_node.GetChild(2)
1438
1439 code1 = self._RangeCharSingleQuoted(left)
1440 if code1 is None:
1441 tok1 = self._OtherRangeToken(left)
1442 code1 = word_compile.EvalCharLiteralForRegex(tok1)
1443
1444 code2 = self._RangeCharSingleQuoted(right)
1445 if code2 is None:
1446 tok2 = self._OtherRangeToken(right)
1447 code2 = word_compile.EvalCharLiteralForRegex(tok2)
1448 return CharRange(code1, code2)
1449
1450 raise AssertionError()
1451
1452 if typ0 == Id.Expr_At:
1453 tok1 = p_node.GetChild(1).tok
1454 return class_literal_term.Splice(tok1, lexer.TokenVal(tok1))
1455
1456 if typ0 == Id.Expr_Bang:
1457 return self._NameInClass(
1458 p_node.GetChild(0).tok,
1459 p_node.GetChild(1).tok)
1460
1461 p_die("This kind of class literal term isn't implemented",
1462 p_node.GetChild(0).tok)
1463
1464 def _ClassLiteral(self, p_node):
1465 # type: (PNode) -> List[class_literal_term_t]
1466 """class_literal: '[' class_literal_term+ ']'."""
1467 assert p_node.typ == grammar_nt.class_literal
1468 # skip [ and ]
1469 terms = [] # type: List[class_literal_term_t]
1470 for i in xrange(1, p_node.NumChildren() - 1):
1471 terms.append(self._ClassLiteralTerm(p_node.GetChild(i)))
1472
1473 return terms
1474
1475 def _NameInRegex(self, negated_tok, tok):
1476 # type: (Token, Token) -> re_t
1477 tok_str = lexer.TokenVal(tok)
1478 if tok_str == 'dot':
1479 if negated_tok:
1480 p_die("Can't negate this symbol", tok)
1481 return re.Primitive(tok, Id.Eggex_Dot)
1482
1483 if tok_str in POSIX_CLASSES:
1484 return PosixClass(negated_tok, tok_str)
1485
1486 perl = PERL_CLASSES.get(tok_str)
1487 if perl is not None:
1488 return PerlClass(negated_tok, perl)
1489
1490 if tok_str[0].isupper(): # e.g. HexDigit
1491 return re.Splice(tok, lexer.TokenVal(tok))
1492
1493 p_die("%r isn't a character class" % tok_str, tok)
1494
1495 def _NameInClass(self, negated_tok, tok):
1496 # type: (Token, Token) -> class_literal_term_t
1497 """Like the above, but 'dot' and 'd' don't mean anything within []"""
1498 tok_str = lexer.TokenVal(tok)
1499
1500 # A bare, unquoted character literal. In the grammar, this is expressed as
1501 # range_char without an ending.
1502
1503 # d is NOT 'digit', it's a literal 'd'!
1504 if len(tok_str) == 1:
1505 # Expr_Name matches VAR_NAME_RE, which starts with [a-zA-Z_]
1506 assert tok.id in (Id.Expr_Name, Id.Expr_DecInt)
1507
1508 if negated_tok: # [~d] is not allowed, only [~digit]
1509 p_die("Can't negate this symbol", tok)
1510 return word_compile.EvalCharLiteralForRegex(tok)
1511
1512 # digit, word, but not d, w, etc.
1513 if tok_str in POSIX_CLASSES:
1514 return PosixClass(negated_tok, tok_str)
1515
1516 perl = PERL_CLASSES.get(tok_str)
1517 if perl is not None:
1518 return PerlClass(negated_tok, perl)
1519 p_die("%r isn't a character class" % tok_str, tok)
1520
1521 def _ReAtom(self, p_atom):
1522 # type: (PNode) -> re_t
1523 """
1524 re_atom: ( char_literal | ...
1525 """
1526 assert p_atom.typ == grammar_nt.re_atom, p_atom.typ
1527
1528 child0 = p_atom.GetChild(0)
1529
1530 typ0 = p_atom.GetChild(0).typ
1531 tok0 = p_atom.GetChild(0).tok
1532
1533 # Non-terminals
1534
1535 if typ0 == grammar_nt.class_literal:
1536 return re.CharClassLiteral(False, self._ClassLiteral(child0))
1537
1538 if typ0 == grammar_nt.sq_string:
1539 return cast(SingleQuoted, child0.GetChild(1).tok)
1540
1541 if typ0 == grammar_nt.char_literal:
1542 # Note: ERE doesn't seem to support escapes like Python
1543 # https://docs.python.org/3/library/re.html
1544 # We might want to do a translation like this;
1545 #
1546 # \u{03bc} -> \u03bc
1547 # \x00 -> \x00
1548 # \n -> \n
1549
1550 # Must be Id.Char_{OneChar,Hex,UBraced}
1551 assert consts.GetKind(tok0.id) == Kind.Char
1552 s = word_compile.EvalCStringToken(tok0.id, lexer.TokenVal(tok0))
1553 return re.LiteralChars(tok0, s)
1554
1555 # Special punctuation
1556 if typ0 == Id.Expr_Dot: # .
1557 return re.Primitive(tok0, Id.Eggex_Dot)
1558
1559 if typ0 == Id.Arith_Caret: # ^
1560 return re.Primitive(tok0, Id.Eggex_Start)
1561
1562 if typ0 == Id.Expr_Dollar: # $
1563 return re.Primitive(tok0, Id.Eggex_End)
1564
1565 if typ0 == Id.Expr_Name:
1566 # d digit -> PosixClass PerlClass etc.
1567 return self._NameInRegex(None, tok0)
1568
1569 if typ0 == Id.Expr_Symbol:
1570 # Validate symbols here, like we validate PerlClass, etc.
1571 tok_str = lexer.TokenVal(tok0)
1572 if tok_str == '%start':
1573 return re.Primitive(tok0, Id.Eggex_Start)
1574 if tok_str == '%end':
1575 return re.Primitive(tok0, Id.Eggex_End)
1576 p_die("Unexpected token %r in regex" % tok_str, tok0)
1577
1578 if typ0 == Id.Expr_At:
1579 # | '@' Expr_Name
1580 tok1 = p_atom.GetChild(1).tok
1581 return re.Splice(tok0, lexer.TokenVal(tok1))
1582
1583 if typ0 == Id.Expr_Bang:
1584 # | '!' (Expr_Name | class_literal)
1585 # | '!' '!' Expr_Name (Expr_Name | Expr_DecInt | '(' regex ')')
1586 n = p_atom.NumChildren()
1587 if n == 2:
1588 child1 = p_atom.GetChild(1)
1589 if child1.typ == grammar_nt.class_literal:
1590 return re.CharClassLiteral(True,
1591 self._ClassLiteral(child1))
1592 else:
1593 return self._NameInRegex(tok0, p_atom.GetChild(1).tok)
1594 else:
1595 # Note: !! conflicts with shell history
1596 p_die(
1597 "Backtracking with !! isn't implemented (requires Python/PCRE)",
1598 p_atom.GetChild(1).tok)
1599
1600 if typ0 == Id.Op_LParen:
1601 # | '(' regex ')'
1602
1603 # Note: in ERE (d+) is the same as <d+>. That is, Group becomes
1604 # Capture.
1605 return re.Group(self._Regex(p_atom.GetChild(1)))
1606
1607 if typ0 == Id.Arith_Less:
1608 # | '<' 'capture' regex ['as' Expr_Name] [':' Expr_Name] '>'
1609
1610 n = p_atom.NumChildren()
1611 assert n == 4 or n == 6 or n == 8, n
1612
1613 # < capture d+ >
1614 regex = self._Regex(p_atom.GetChild(2))
1615
1616 as_name = None # type: Optional[Token]
1617 func_name = None # type: Optional[Token]
1618
1619 i = 3 # points at any of > as :
1620
1621 typ = p_atom.GetChild(i).typ
1622 if typ == Id.Expr_As:
1623 as_name = p_atom.GetChild(i + 1).tok
1624 i += 2
1625
1626 typ = p_atom.GetChild(i).typ
1627 if typ == Id.Arith_Colon:
1628 func_name = p_atom.GetChild(i + 1).tok
1629
1630 return re.Capture(regex, as_name, func_name)
1631
1632 raise AssertionError(typ0)
1633
1634 def _RepeatOp(self, p_repeat):
1635 # type: (PNode) -> re_repeat_t
1636 """
1637 repeat_op: '+' | '*' | '?'
1638 | '{' [Expr_Name] ('+' | '*' | '?' | repeat_range) '}'
1639 """
1640 assert p_repeat.typ == grammar_nt.repeat_op, p_repeat
1641
1642 tok = p_repeat.GetChild(0).tok
1643 id_ = tok.id
1644
1645 if id_ in (Id.Arith_Plus, Id.Arith_Star, Id.Arith_QMark):
1646 return tok # a+ a* a?
1647
1648 if id_ == Id.Op_LBrace:
1649 child1 = p_repeat.GetChild(1)
1650 if child1.typ != grammar_nt.repeat_range:
1651 # e.g. dot{N *} is .*?
1652 p_die("Perl-style repetition isn't implemented with libc",
1653 child1.tok)
1654
1655 # repeat_range: (
1656 # Expr_DecInt [',']
1657 # | ',' Expr_DecInt
1658 # | Expr_DecInt ',' Expr_DecInt
1659 # )
1660
1661 n = child1.NumChildren()
1662 if n == 1: # {3}
1663 tok = child1.GetChild(0).tok
1664 return tok # different operator than + * ?
1665
1666 if n == 2:
1667 if child1.GetChild(0).typ == Id.Expr_DecInt: # {,3}
1668 left = child1.GetChild(0).tok
1669 return re_repeat.Range(left, lexer.TokenVal(left), '',
1670 None)
1671 else: # {1,}
1672 right = child1.GetChild(1).tok
1673 return re_repeat.Range(None, '', lexer.TokenVal(right),
1674 right)
1675
1676 if n == 3: # {1,3}
1677 left = child1.GetChild(0).tok
1678 right = child1.GetChild(2).tok
1679 return re_repeat.Range(left, lexer.TokenVal(left),
1680 lexer.TokenVal(right), right)
1681
1682 raise AssertionError(n)
1683
1684 raise AssertionError(id_)
1685
1686 def _ReAlt(self, p_node):
1687 # type: (PNode) -> re_t
1688 """
1689 re_alt: (re_atom [repeat_op])+
1690 """
1691 assert p_node.typ == grammar_nt.re_alt
1692
1693 i = 0
1694 n = p_node.NumChildren()
1695 seq = [] # type: List[re_t]
1696 while i < n:
1697 r = self._ReAtom(p_node.GetChild(i))
1698 i += 1
1699 if i < n and p_node.GetChild(i).typ == grammar_nt.repeat_op:
1700 repeat_op = self._RepeatOp(p_node.GetChild(i))
1701 r = re.Repeat(r, repeat_op)
1702 i += 1
1703 seq.append(r)
1704
1705 if len(seq) == 1:
1706 return seq[0]
1707 else:
1708 return re.Seq(seq)
1709
1710 def _Regex(self, p_node):
1711 # type: (PNode) -> re_t
1712 """
1713 regex: [re_alt] (('|'|'or') re_alt)*
1714 """
1715 assert p_node.typ == grammar_nt.regex
1716
1717 n = p_node.NumChildren()
1718 alts = [] # type: List[re_t]
1719 for i in xrange(0, n, 2): # was children[::2]
1720 c = p_node.GetChild(i)
1721 alts.append(self._ReAlt(c))
1722
1723 if len(alts) == 1:
1724 return alts[0]
1725 else:
1726 return re.Alt(alts)
1727
1728
1729# vim: sw=4