| 1 | """expr_to_ast.py."""
|
| 2 | from __future__ import print_function
|
| 3 |
|
| 4 | from _devbuild.gen.id_kind_asdl import Id, Id_t, Id_str, Kind
|
| 5 | from _devbuild.gen.syntax_asdl import (
|
| 6 | Token,
|
| 7 | SimpleVarSub,
|
| 8 | loc,
|
| 9 | loc_t,
|
| 10 | DoubleQuoted,
|
| 11 | SingleQuoted,
|
| 12 | BracedVarSub,
|
| 13 | CommandSub,
|
| 14 | YshArrayLiteral,
|
| 15 | expr,
|
| 16 | expr_e,
|
| 17 | expr_t,
|
| 18 | expr_context_e,
|
| 19 | re,
|
| 20 | re_t,
|
| 21 | re_repeat,
|
| 22 | re_repeat_t,
|
| 23 | class_literal_term,
|
| 24 | class_literal_term_t,
|
| 25 | PosixClass,
|
| 26 | PerlClass,
|
| 27 | NameType,
|
| 28 | y_lhs_t,
|
| 29 | Comprehension,
|
| 30 | Subscript,
|
| 31 | Attribute,
|
| 32 | proc_sig,
|
| 33 | proc_sig_t,
|
| 34 | Param,
|
| 35 | RestParam,
|
| 36 | ParamGroup,
|
| 37 | NamedArg,
|
| 38 | ArgList,
|
| 39 | pat,
|
| 40 | pat_t,
|
| 41 | TypeExpr,
|
| 42 | Func,
|
| 43 | Eggex,
|
| 44 | EggexFlag,
|
| 45 | CharCode,
|
| 46 | CharRange,
|
| 47 | VarDecl,
|
| 48 | Mutation,
|
| 49 | )
|
| 50 | from _devbuild.gen.value_asdl import value, value_t
|
| 51 | from _devbuild.gen import grammar_nt
|
| 52 | from core.error import p_die
|
| 53 | from data_lang import j8
|
| 54 | from frontend import consts
|
| 55 | from frontend import lexer
|
| 56 | from frontend import location
|
| 57 | from mycpp import mops
|
| 58 | from mycpp import mylib
|
| 59 | from mycpp.mylib import log, tagswitch
|
| 60 | from osh import word_compile
|
| 61 | from ysh import expr_parse
|
| 62 | from ysh import regex_translate
|
| 63 |
|
| 64 | from typing import TYPE_CHECKING, Dict, List, Tuple, Optional, cast
|
| 65 | if TYPE_CHECKING:
|
| 66 | from pgen2.grammar import Grammar
|
| 67 | from pgen2.pnode import PNode
|
| 68 |
|
| 69 | _ = log
|
| 70 |
|
| 71 | PERL_CLASSES = {
|
| 72 | 'd': 'd',
|
| 73 | 'w': 'w',
|
| 74 | 'word': 'w',
|
| 75 | 's': 's',
|
| 76 | }
|
| 77 | # https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html
|
| 78 | POSIX_CLASSES = [
|
| 79 | 'alnum',
|
| 80 | 'cntrl',
|
| 81 | 'lower',
|
| 82 | 'space',
|
| 83 | 'alpha',
|
| 84 | 'digit',
|
| 85 | 'print',
|
| 86 | 'upper',
|
| 87 | 'blank',
|
| 88 | 'graph',
|
| 89 | 'punct',
|
| 90 | 'xdigit',
|
| 91 | ]
|
| 92 | # NOTE: There are also things like \p{Greek} that we could put in the
|
| 93 | # "non-sigil" namespace.
|
| 94 |
|
| 95 | RANGE_POINT_TOO_LONG = "Range start/end shouldn't have more than one character"
|
| 96 |
|
| 97 | POS_ARG_MISPLACED = "Positional arg can't appear in group of named args"
|
| 98 |
|
| 99 | # Copied from pgen2/token.py to avoid dependency.
|
| 100 | NT_OFFSET = 256
|
| 101 |
|
| 102 | if mylib.PYTHON:
|
| 103 |
|
| 104 | def MakeGrammarNames(ysh_grammar):
|
| 105 | # type: (Grammar) -> Dict[int, str]
|
| 106 |
|
| 107 | # TODO: Break this dependency
|
| 108 | from frontend import lexer_def
|
| 109 |
|
| 110 | names = {}
|
| 111 |
|
| 112 | for id_name, k in lexer_def.ID_SPEC.id_str2int.items():
|
| 113 | # Hm some are out of range
|
| 114 | #assert k < 256, (k, id_name)
|
| 115 |
|
| 116 | # TODO: Some tokens have values greater than NT_OFFSET
|
| 117 | if k < NT_OFFSET:
|
| 118 | names[k] = id_name
|
| 119 |
|
| 120 | for k, v in ysh_grammar.number2symbol.items():
|
| 121 | assert k >= NT_OFFSET, (k, v)
|
| 122 | names[k] = v
|
| 123 |
|
| 124 | return names
|
| 125 |
|
| 126 |
|
| 127 | class Transformer(object):
|
| 128 | """Homogeneous parse tree -> heterogeneous AST ("lossless syntax tree")
|
| 129 |
|
| 130 | pgen2 (Python's LL parser generator) doesn't have semantic actions like yacc,
|
| 131 | so this "transformer" is the equivalent.
|
| 132 |
|
| 133 | Files to refer to when modifying this function:
|
| 134 |
|
| 135 | ysh/grammar.pgen2 (generates _devbuild/gen/grammar_nt.py)
|
| 136 | frontend/syntax.asdl (generates _devbuild/gen/syntax_asdl.py)
|
| 137 |
|
| 138 | Related examples:
|
| 139 |
|
| 140 | opy/compiler2/transformer.py (Python's parse tree -> AST, ~1500 lines)
|
| 141 | Python-2.7.13/Python/ast.c (the "real" CPython version, ~3600 lines)
|
| 142 |
|
| 143 | Other:
|
| 144 | frontend/parse_lib.py (turn on print_parse_tree)
|
| 145 |
|
| 146 | Public methods:
|
| 147 | Expr, VarDecl
|
| 148 | atom, trailer, etc. are private, named after productions in grammar.pgen2.
|
| 149 | """
|
| 150 |
|
| 151 | def __init__(self, gr):
|
| 152 | # type: (Grammar) -> None
|
| 153 | self.number2symbol = gr.number2symbol
|
| 154 | if mylib.PYTHON:
|
| 155 | names = MakeGrammarNames(gr)
|
| 156 | # print raw nodes
|
| 157 | self.p_printer = expr_parse.ParseTreePrinter(names)
|
| 158 |
|
| 159 | def _LeftAssoc(self, p_node):
|
| 160 | # type: (PNode) -> expr_t
|
| 161 | """For an associative binary operation.
|
| 162 |
|
| 163 | Examples:
|
| 164 | xor_expr: and_expr ('xor' and_expr)*
|
| 165 | term: factor (('*'|'/'|'%'|'div') factor)*
|
| 166 |
|
| 167 | 3 - 1 - 2 must be grouped as ((3 - 1) - 2).
|
| 168 | """
|
| 169 | # Note: Compare the iteractive com_binary() method in
|
| 170 | # opy/compiler2/transformer.py.
|
| 171 |
|
| 172 | # Examples:
|
| 173 | # - The PNode for '3 - 1' will have 3 children
|
| 174 | # - The PNode for '3 - 1 - 2' will have 5 children
|
| 175 |
|
| 176 | #self.p_printer.Print(p_node)
|
| 177 |
|
| 178 | i = 1 # index of the operator
|
| 179 | n = p_node.NumChildren()
|
| 180 |
|
| 181 | left = self.Expr(p_node.GetChild(0))
|
| 182 | while i < n:
|
| 183 | op = p_node.GetChild(i)
|
| 184 | right = self.Expr(p_node.GetChild(i + 1))
|
| 185 |
|
| 186 | # create a new left node
|
| 187 | left = expr.Binary(op.tok, left, right)
|
| 188 | i += 2
|
| 189 |
|
| 190 | return left
|
| 191 |
|
| 192 | def _Trailer(self, base, p_trailer):
|
| 193 | # type: (expr_t, PNode) -> expr_t
|
| 194 | """
|
| 195 | trailer: ( '(' [arglist] ')' | '[' subscriptlist ']'
|
| 196 | | '.' NAME | '->' NAME | '::' NAME
|
| 197 | )
|
| 198 | """
|
| 199 | tok0 = p_trailer.GetChild(0).tok
|
| 200 | typ0 = p_trailer.GetChild(0).typ
|
| 201 |
|
| 202 | if typ0 == Id.Op_LParen:
|
| 203 | lparen = tok0
|
| 204 | rparen = p_trailer.GetChild(-1).tok
|
| 205 | arglist = ArgList(lparen, [], None, [], None, None, rparen)
|
| 206 | if p_trailer.NumChildren() == 2: # ()
|
| 207 | return expr.FuncCall(base, arglist)
|
| 208 |
|
| 209 | p = p_trailer.GetChild(1) # the X in ( X )
|
| 210 | assert p.typ == grammar_nt.arglist # f(x, y)
|
| 211 | self._ArgList(p, arglist)
|
| 212 | return expr.FuncCall(base, arglist)
|
| 213 |
|
| 214 | if typ0 == Id.Op_LBracket:
|
| 215 | p_args = p_trailer.GetChild(1)
|
| 216 | assert p_args.typ == grammar_nt.subscriptlist
|
| 217 |
|
| 218 | n = p_args.NumChildren()
|
| 219 | if n == 1: # a[1] a[1:2] a[:] etc.
|
| 220 | subscript = self._Subscript(p_args.GetChild(0))
|
| 221 | else: # a[1, 2] a[1:2, :]
|
| 222 | slices = [] # type: List[expr_t]
|
| 223 | for i in xrange(0, n, 2):
|
| 224 | slices.append(self._Subscript(p_args.GetChild(i)))
|
| 225 | # expr.Tuple evaluates to List in YSH.
|
| 226 | #
|
| 227 | # Note that syntactically, a[1:2, 3:4] is the the only way to
|
| 228 | # get a List[Slice]. [1:2, 3:4] by itself is not allowed.
|
| 229 | comma_tok = p_args.GetChild(1).tok
|
| 230 | subscript = expr.Tuple(comma_tok, slices, expr_context_e.Store)
|
| 231 |
|
| 232 | return Subscript(tok0, base, subscript)
|
| 233 |
|
| 234 | if typ0 in (Id.Expr_Dot, Id.Expr_RArrow, Id.Expr_RDArrow):
|
| 235 | attr = p_trailer.GetChild(1).tok # will be Id.Expr_Name
|
| 236 | return Attribute(base, tok0, attr, lexer.TokenVal(attr),
|
| 237 | expr_context_e.Store)
|
| 238 |
|
| 239 | raise AssertionError(typ0)
|
| 240 |
|
| 241 | def _DictPair(self, p_node):
|
| 242 | # type: (PNode) -> Tuple[expr_t, expr_t]
|
| 243 | """
|
| 244 | dict_pair: ( Expr_Name [':' test]
|
| 245 | | '[' testlist ']' ':' test )
|
| 246 | | sq_string ':' test
|
| 247 | | dq_string ':' test )
|
| 248 | """
|
| 249 | assert p_node.typ == grammar_nt.dict_pair
|
| 250 |
|
| 251 | typ = p_node.GetChild(0).typ
|
| 252 |
|
| 253 | if typ in (grammar_nt.sq_string, grammar_nt.dq_string):
|
| 254 | key = self.Expr(p_node.GetChild(0)) # type: expr_t
|
| 255 | val = self.Expr(p_node.GetChild(2))
|
| 256 | return key, val
|
| 257 |
|
| 258 | tok0 = p_node.GetChild(0).tok
|
| 259 | id_ = tok0.id
|
| 260 |
|
| 261 | if id_ == Id.Expr_Name:
|
| 262 | key_str = value.Str(lexer.TokenVal(tok0))
|
| 263 | key = expr.Const(tok0, key_str)
|
| 264 | if p_node.NumChildren() >= 3:
|
| 265 | val = self.Expr(p_node.GetChild(2))
|
| 266 | else:
|
| 267 | val = expr.Implicit
|
| 268 |
|
| 269 | if id_ == Id.Op_LBracket: # {[x+y]: 'val'}
|
| 270 | key = self.Expr(p_node.GetChild(1))
|
| 271 | val = self.Expr(p_node.GetChild(4))
|
| 272 | return key, val
|
| 273 |
|
| 274 | return key, val
|
| 275 |
|
| 276 | def _Dict(self, parent, p_node):
|
| 277 | # type: (PNode, PNode) -> expr.Dict
|
| 278 | """
|
| 279 | dict: dict_pair (comma_newline dict_pair)* [comma_newline]
|
| 280 | """
|
| 281 | if p_node.typ == Id.Op_RBrace: # {}
|
| 282 | return expr.Dict(parent.tok, [], [])
|
| 283 |
|
| 284 | assert p_node.typ == grammar_nt.dict
|
| 285 |
|
| 286 | keys = [] # type: List[expr_t]
|
| 287 | values = [] # type: List[expr_t]
|
| 288 |
|
| 289 | n = p_node.NumChildren()
|
| 290 | for i in xrange(0, n, 2):
|
| 291 | key, val = self._DictPair(p_node.GetChild(i))
|
| 292 | keys.append(key)
|
| 293 | values.append(val)
|
| 294 |
|
| 295 | return expr.Dict(parent.tok, keys, values)
|
| 296 |
|
| 297 | def _Tuple(self, parent):
|
| 298 | # type: (PNode) -> expr_t
|
| 299 |
|
| 300 | n = parent.NumChildren()
|
| 301 |
|
| 302 | # (x) -- not a tuple
|
| 303 | if n == 1:
|
| 304 | return self.Expr(parent.GetChild(0))
|
| 305 |
|
| 306 | # x, and (x,) aren't allowed
|
| 307 | if n == 2:
|
| 308 | p_die('Invalid trailing comma', parent.GetChild(1).tok)
|
| 309 |
|
| 310 | elts = [] # type: List[expr_t]
|
| 311 | for i in xrange(0, n, 2): # skip commas
|
| 312 | p_node = parent.GetChild(i)
|
| 313 | elts.append(self.Expr(p_node))
|
| 314 |
|
| 315 | return expr.Tuple(parent.tok, elts,
|
| 316 | expr_context_e.Store) # unused expr_context_e
|
| 317 |
|
| 318 | def _TestlistComp(self, parent, p_node, id0):
|
| 319 | # type: (PNode, PNode, Id_t) -> expr_t
|
| 320 | """
|
| 321 | testlist_comp:
|
| 322 | (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
| 323 | """
|
| 324 | assert p_node.typ == grammar_nt.testlist_comp
|
| 325 |
|
| 326 | n = p_node.NumChildren()
|
| 327 | if n > 1 and p_node.GetChild(1).typ == grammar_nt.comp_for:
|
| 328 | child0 = p_node.GetChild(0)
|
| 329 | if child0.typ == grammar_nt.splat_expr:
|
| 330 | p_die('Splat not currently supported', child0.tok)
|
| 331 | elt = self.Expr(child0)
|
| 332 |
|
| 333 | comp = self._CompFor(p_node.GetChild(1))
|
| 334 | if id0 == Id.Op_LParen: # (x+1 for x in y)
|
| 335 | return expr.GeneratorExp(elt, [comp])
|
| 336 | if id0 == Id.Op_LBracket: # [x+1 for x in y]
|
| 337 | return expr.ListComp(parent.tok, elt, [comp])
|
| 338 | raise AssertionError()
|
| 339 |
|
| 340 | if id0 == Id.Op_LParen:
|
| 341 | # Parenthesized expression like (x+1) or (x)
|
| 342 | if n == 1:
|
| 343 | return self.Expr(p_node.GetChild(0))
|
| 344 |
|
| 345 | # Tuples (1,) (1, 2) etc. - TODO: should be a list literal?
|
| 346 | if p_node.GetChild(1).typ == Id.Arith_Comma:
|
| 347 | return self._Tuple(p_node)
|
| 348 |
|
| 349 | raise AssertionError()
|
| 350 |
|
| 351 | if id0 == Id.Op_LBracket: # List [1,2,3]
|
| 352 | elts = [] # type: List[expr_t]
|
| 353 | for i in xrange(0, n, 2): # skip commas
|
| 354 | child = p_node.GetChild(i)
|
| 355 | if child.typ == grammar_nt.splat_expr:
|
| 356 | p_die('Splat not currently supported', child.tok)
|
| 357 | elts.append(self.Expr(child))
|
| 358 |
|
| 359 | return expr.List(parent.tok, elts,
|
| 360 | expr_context_e.Store) # unused expr_context_e
|
| 361 |
|
| 362 | raise AssertionError(Id_str(id0))
|
| 363 |
|
| 364 | def _Atom(self, parent):
|
| 365 | # type: (PNode) -> expr_t
|
| 366 | """Handle alternatives of 'atom' where there's more than one child."""
|
| 367 |
|
| 368 | tok = parent.GetChild(0).tok
|
| 369 | id_ = tok.id
|
| 370 | n = parent.NumChildren()
|
| 371 |
|
| 372 | if id_ == Id.Op_LParen:
|
| 373 | # atom: '(' [yield_expr|testlist_comp] ')' | ...
|
| 374 | if n == 2: # () is a tuple
|
| 375 | assert (
|
| 376 | parent.GetChild(1).typ == Id.Op_RParen), parent.GetChild(1)
|
| 377 | return expr.Tuple(tok, [], expr_context_e.Store)
|
| 378 |
|
| 379 | return self._TestlistComp(parent, parent.GetChild(1), id_)
|
| 380 |
|
| 381 | if id_ == Id.Op_LBracket:
|
| 382 | # atom: ... | '[' [testlist_comp] ']' | ...
|
| 383 |
|
| 384 | if n == 2: # []
|
| 385 | assert (parent.GetChild(1).typ == Id.Op_RBracket
|
| 386 | ), parent.GetChild(1)
|
| 387 | return expr.List(tok, [],
|
| 388 | expr_context_e.Store) # unused expr_context_e
|
| 389 |
|
| 390 | return self._TestlistComp(parent, parent.GetChild(1), id_)
|
| 391 |
|
| 392 | if id_ == Id.Left_CaretBracket: # ^[42 + x]
|
| 393 | child = self.Expr(parent.GetChild(1))
|
| 394 | return expr.Literal(child)
|
| 395 |
|
| 396 | if id_ == Id.Op_LBrace:
|
| 397 | # atom: ... | '{' [Op_Newline] [dict] '}'
|
| 398 | i = 1
|
| 399 | if parent.GetChild(i).typ == Id.Op_Newline:
|
| 400 | i += 1
|
| 401 | return self._Dict(parent, parent.GetChild(i))
|
| 402 |
|
| 403 | if id_ == Id.Arith_Amp:
|
| 404 | n = parent.NumChildren()
|
| 405 | if n >= 3:
|
| 406 | p_die("Places in containers not implemented yet",
|
| 407 | parent.GetChild(2).tok)
|
| 408 |
|
| 409 | name_tok = parent.GetChild(1).tok
|
| 410 | return expr.Place(name_tok, lexer.TokenVal(name_tok), [])
|
| 411 |
|
| 412 | if id_ == Id.Expr_Func:
|
| 413 | # STUB. This should really be a Func, not Lambda.
|
| 414 | return expr.Lambda([], expr.Implicit)
|
| 415 |
|
| 416 | # 100 M
|
| 417 | # Ignoring the suffix for now
|
| 418 | if id_ == Id.Expr_DecInt:
|
| 419 | assert n > 1
|
| 420 | p_die("Units suffix not implemented", parent.GetChild(1).tok)
|
| 421 | #return self.Expr(parent.GetChild(0))
|
| 422 |
|
| 423 | # 100.5 M
|
| 424 | # Ignoring the suffix for now
|
| 425 | if id_ == Id.Expr_Float:
|
| 426 | assert n > 1
|
| 427 | p_die("unix suffix implemented", parent.GetChild(1).tok)
|
| 428 | #return self.Expr(parent.GetChild(0))
|
| 429 |
|
| 430 | raise AssertionError(Id_str(id_))
|
| 431 |
|
| 432 | def _NameType(self, p_node):
|
| 433 | # type: (PNode) -> NameType
|
| 434 | """ name_type: Expr_Name [':'] [type_expr] """
|
| 435 | name_tok = p_node.GetChild(0).tok
|
| 436 | typ = None # type: Optional[TypeExpr]
|
| 437 |
|
| 438 | n = p_node.NumChildren()
|
| 439 | if n == 2:
|
| 440 | typ = self._TypeExpr(p_node.GetChild(1))
|
| 441 | if n == 3:
|
| 442 | typ = self._TypeExpr(p_node.GetChild(2))
|
| 443 |
|
| 444 | return NameType(name_tok, lexer.TokenVal(name_tok), typ)
|
| 445 |
|
| 446 | def _NameTypeList(self, p_node):
|
| 447 | # type: (PNode) -> List[NameType]
|
| 448 | """ name_type_list: name_type (',' name_type)* """
|
| 449 | assert p_node.typ == grammar_nt.name_type_list
|
| 450 | results = [] # type: List[NameType]
|
| 451 |
|
| 452 | n = p_node.NumChildren()
|
| 453 | for i in xrange(0, n, 2): # was children[::2]
|
| 454 | results.append(self._NameType(p_node.GetChild(i)))
|
| 455 | return results
|
| 456 |
|
| 457 | def _CompFor(self, p_node):
|
| 458 | # type: (PNode) -> Comprehension
|
| 459 | """comp_for: 'for' exprlist 'in' or_test ['if' or_test]"""
|
| 460 | lhs = self._NameTypeList(p_node.GetChild(1))
|
| 461 | iterable = self.Expr(p_node.GetChild(3))
|
| 462 |
|
| 463 | if p_node.NumChildren() >= 6:
|
| 464 | cond = self.Expr(p_node.GetChild(5))
|
| 465 | else:
|
| 466 | cond = None
|
| 467 |
|
| 468 | return Comprehension(lhs, iterable, cond)
|
| 469 |
|
| 470 | def _CompareChain(self, parent):
|
| 471 | # type: (PNode) -> expr_t
|
| 472 | """comparison: expr (comp_op expr)*"""
|
| 473 | cmp_ops = [] # type: List[Token]
|
| 474 | comparators = [] # type: List[expr_t]
|
| 475 | left = self.Expr(parent.GetChild(0))
|
| 476 |
|
| 477 | i = 1
|
| 478 | n = parent.NumChildren()
|
| 479 | while i < n:
|
| 480 | p = parent.GetChild(i)
|
| 481 | op = p.GetChild(0).tok
|
| 482 | if p.NumChildren() == 2:
|
| 483 | # Blame the first token, and change its type
|
| 484 | if op.id == Id.Expr_Not: # not in
|
| 485 | op.id = Id.Node_NotIn
|
| 486 | elif op.id == Id.Expr_Is: # is not
|
| 487 | op.id = Id.Node_IsNot
|
| 488 | else:
|
| 489 | raise AssertionError()
|
| 490 | else:
|
| 491 | # is, <, ==, etc.
|
| 492 | pass
|
| 493 |
|
| 494 | cmp_ops.append(op)
|
| 495 | i += 1
|
| 496 | comparators.append(self.Expr(parent.GetChild(i)))
|
| 497 | i += 1
|
| 498 | return expr.Compare(left, cmp_ops, comparators)
|
| 499 |
|
| 500 | def _Subscript(self, parent):
|
| 501 | # type: (PNode) -> expr_t
|
| 502 | """subscript: expr | [expr] ':' [expr]"""
|
| 503 | typ0 = parent.GetChild(0).typ
|
| 504 |
|
| 505 | n = parent.NumChildren()
|
| 506 |
|
| 507 | if typ0 == grammar_nt.expr:
|
| 508 | if n == 3: # a[1:2]
|
| 509 | lower = self.Expr(parent.GetChild(0))
|
| 510 | op_tok = parent.GetChild(1).tok
|
| 511 | upper = self.Expr(parent.GetChild(2))
|
| 512 |
|
| 513 | elif n == 2: # a[1:]
|
| 514 | lower = self.Expr(parent.GetChild(0))
|
| 515 | op_tok = parent.GetChild(1).tok
|
| 516 | upper = None
|
| 517 | else: # a[1]
|
| 518 | return self.Expr(parent.GetChild(0))
|
| 519 | else:
|
| 520 | assert typ0 == Id.Arith_Colon
|
| 521 | lower = None
|
| 522 | if n == 1: # a[:]
|
| 523 | op_tok = parent.GetChild(0).tok
|
| 524 | upper = None
|
| 525 | else: # a[:3]
|
| 526 | op_tok = parent.GetChild(0).tok
|
| 527 | upper = self.Expr(parent.GetChild(1))
|
| 528 |
|
| 529 | return expr.Slice(lower, op_tok, upper)
|
| 530 |
|
| 531 | def Expr(self, pnode):
|
| 532 | # type: (PNode) -> expr_t
|
| 533 | """Transform expressions (as opposed to statements)"""
|
| 534 | typ = pnode.typ
|
| 535 |
|
| 536 | #
|
| 537 | # YSH Entry Points / Additions
|
| 538 | #
|
| 539 |
|
| 540 | if typ == grammar_nt.ysh_expr: # for if/while
|
| 541 | # ysh_expr: '(' testlist ')'
|
| 542 | return self.Expr(pnode.GetChild(1))
|
| 543 |
|
| 544 | if typ == grammar_nt.command_expr:
|
| 545 | # return_expr: testlist end_stmt
|
| 546 | return self.Expr(pnode.GetChild(0))
|
| 547 |
|
| 548 | #
|
| 549 | # Python-like Expressions / Operators
|
| 550 | #
|
| 551 |
|
| 552 | if typ == grammar_nt.atom:
|
| 553 | if pnode.NumChildren() == 1:
|
| 554 | return self.Expr(pnode.GetChild(0))
|
| 555 | return self._Atom(pnode)
|
| 556 |
|
| 557 | if typ == grammar_nt.testlist:
|
| 558 | # testlist: test (',' test)* [',']
|
| 559 | return self._Tuple(pnode)
|
| 560 |
|
| 561 | if typ == grammar_nt.test:
|
| 562 | # test: or_test ['if' or_test 'else' test] | lambdef
|
| 563 | if pnode.NumChildren() == 1:
|
| 564 | return self.Expr(pnode.GetChild(0))
|
| 565 |
|
| 566 | # TODO: Handle lambdef
|
| 567 |
|
| 568 | test = self.Expr(pnode.GetChild(2))
|
| 569 | body = self.Expr(pnode.GetChild(0))
|
| 570 | orelse = self.Expr(pnode.GetChild(4))
|
| 571 | return expr.IfExp(test, body, orelse)
|
| 572 |
|
| 573 | if typ == grammar_nt.lambdef:
|
| 574 | # lambdef: '|' [name_type_list] '|' test
|
| 575 |
|
| 576 | n = pnode.NumChildren()
|
| 577 | if n == 4:
|
| 578 | params = self._NameTypeList(pnode.GetChild(1))
|
| 579 | else:
|
| 580 | params = []
|
| 581 |
|
| 582 | body = self.Expr(pnode.GetChild(n - 1))
|
| 583 | return expr.Lambda(params, body)
|
| 584 |
|
| 585 | #
|
| 586 | # Operators with Precedence
|
| 587 | #
|
| 588 |
|
| 589 | if typ == grammar_nt.or_test:
|
| 590 | # or_test: and_test ('or' and_test)*
|
| 591 | return self._LeftAssoc(pnode)
|
| 592 |
|
| 593 | if typ == grammar_nt.and_test:
|
| 594 | # and_test: not_test ('and' not_test)*
|
| 595 | return self._LeftAssoc(pnode)
|
| 596 |
|
| 597 | if typ == grammar_nt.not_test:
|
| 598 | # not_test: 'not' not_test | comparison
|
| 599 | if pnode.NumChildren() == 1:
|
| 600 | return self.Expr(pnode.GetChild(0))
|
| 601 |
|
| 602 | op_tok = pnode.GetChild(0).tok # not
|
| 603 | return expr.Unary(op_tok, self.Expr(pnode.GetChild(1)))
|
| 604 |
|
| 605 | elif typ == grammar_nt.comparison:
|
| 606 | if pnode.NumChildren() == 1:
|
| 607 | return self.Expr(pnode.GetChild(0))
|
| 608 |
|
| 609 | return self._CompareChain(pnode)
|
| 610 |
|
| 611 | elif typ == grammar_nt.range_expr:
|
| 612 | n = pnode.NumChildren()
|
| 613 | if n == 1:
|
| 614 | return self.Expr(pnode.GetChild(0))
|
| 615 |
|
| 616 | if n == 3:
|
| 617 | return expr.Range(self.Expr(pnode.GetChild(0)),
|
| 618 | pnode.GetChild(1).tok,
|
| 619 | self.Expr(pnode.GetChild(2)))
|
| 620 |
|
| 621 | raise AssertionError(n)
|
| 622 |
|
| 623 | elif typ == grammar_nt.expr:
|
| 624 | # expr: xor_expr ('|' xor_expr)*
|
| 625 | return self._LeftAssoc(pnode)
|
| 626 |
|
| 627 | if typ == grammar_nt.xor_expr:
|
| 628 | # xor_expr: and_expr ('xor' and_expr)*
|
| 629 | return self._LeftAssoc(pnode)
|
| 630 |
|
| 631 | if typ == grammar_nt.and_expr: # a & b
|
| 632 | # and_expr: shift_expr ('&' shift_expr)*
|
| 633 | return self._LeftAssoc(pnode)
|
| 634 |
|
| 635 | elif typ == grammar_nt.shift_expr:
|
| 636 | # shift_expr: arith_expr (('<<'|'>>') arith_expr)*
|
| 637 | return self._LeftAssoc(pnode)
|
| 638 |
|
| 639 | elif typ == grammar_nt.arith_expr:
|
| 640 | # arith_expr: term (('+'|'-') term)*
|
| 641 | return self._LeftAssoc(pnode)
|
| 642 |
|
| 643 | elif typ == grammar_nt.term:
|
| 644 | # term: factor (('*'|'/'|'div'|'mod') factor)*
|
| 645 | return self._LeftAssoc(pnode)
|
| 646 |
|
| 647 | elif typ == grammar_nt.factor:
|
| 648 | # factor: ('+'|'-'|'~') factor | power
|
| 649 | # the power would have already been reduced
|
| 650 | if pnode.NumChildren() == 1:
|
| 651 | return self.Expr(pnode.GetChild(0))
|
| 652 |
|
| 653 | assert pnode.NumChildren() == 2
|
| 654 | op = pnode.GetChild(0)
|
| 655 | e = pnode.GetChild(1)
|
| 656 |
|
| 657 | assert isinstance(op.tok, Token)
|
| 658 | return expr.Unary(op.tok, self.Expr(e))
|
| 659 |
|
| 660 | elif typ == grammar_nt.power:
|
| 661 | # power: atom trailer* ['**' factor]
|
| 662 |
|
| 663 | node = self.Expr(pnode.GetChild(0))
|
| 664 | if pnode.NumChildren() == 1: # No trailers
|
| 665 | return node
|
| 666 |
|
| 667 | # Support a->startswith(b) and mydict.key
|
| 668 | n = pnode.NumChildren()
|
| 669 | i = 1
|
| 670 | while i < n and pnode.GetChild(i).typ == grammar_nt.trailer:
|
| 671 | node = self._Trailer(node, pnode.GetChild(i))
|
| 672 | i += 1
|
| 673 |
|
| 674 | if i != n: # ['**' factor]
|
| 675 | op_tok = pnode.GetChild(i).tok
|
| 676 | assert op_tok.id == Id.Arith_DStar, op_tok
|
| 677 | factor = self.Expr(pnode.GetChild(i + 1))
|
| 678 | node = expr.Binary(op_tok, node, factor)
|
| 679 |
|
| 680 | return node
|
| 681 |
|
| 682 | elif typ == grammar_nt.eggex:
|
| 683 | return self._Eggex(pnode)
|
| 684 |
|
| 685 | elif typ == grammar_nt.ysh_expr_sub:
|
| 686 | return self.Expr(pnode.GetChild(0))
|
| 687 |
|
| 688 | #
|
| 689 | # YSH Lexer Modes
|
| 690 | #
|
| 691 |
|
| 692 | elif typ == grammar_nt.sh_array_literal:
|
| 693 | return cast(YshArrayLiteral, pnode.GetChild(1).tok)
|
| 694 |
|
| 695 | elif typ == grammar_nt.old_sh_array_literal:
|
| 696 | return cast(YshArrayLiteral, pnode.GetChild(1).tok)
|
| 697 |
|
| 698 | elif typ == grammar_nt.sh_command_sub:
|
| 699 | return cast(CommandSub, pnode.GetChild(1).tok)
|
| 700 |
|
| 701 | elif typ == grammar_nt.braced_var_sub:
|
| 702 | return cast(BracedVarSub, pnode.GetChild(1).tok)
|
| 703 |
|
| 704 | elif typ == grammar_nt.dq_string:
|
| 705 | dq = cast(DoubleQuoted, pnode.GetChild(1).tok)
|
| 706 | # sugar: ^"..." is short for ^["..."]
|
| 707 | if pnode.GetChild(0).typ == Id.Left_CaretDoubleQuote:
|
| 708 | return expr.Literal(dq)
|
| 709 | return dq
|
| 710 |
|
| 711 | elif typ == grammar_nt.sq_string:
|
| 712 | return cast(SingleQuoted, pnode.GetChild(1).tok)
|
| 713 |
|
| 714 | elif typ == grammar_nt.simple_var_sub:
|
| 715 | tok = pnode.GetChild(0).tok
|
| 716 |
|
| 717 | if tok.id == Id.VSub_DollarName: # $foo is disallowed
|
| 718 | bare = lexer.TokenSliceLeft(tok, 1)
|
| 719 | p_die(
|
| 720 | 'In expressions, remove $ and use `%s`, or sometimes "$%s"'
|
| 721 | % (bare, bare), tok)
|
| 722 |
|
| 723 | # $? is allowed
|
| 724 | return SimpleVarSub(tok)
|
| 725 |
|
| 726 | #
|
| 727 | # Terminals
|
| 728 | #
|
| 729 |
|
| 730 | tok = pnode.tok
|
| 731 | if typ == Id.Expr_Name:
|
| 732 | return expr.Var(tok, lexer.TokenVal(tok))
|
| 733 |
|
| 734 | # Everything else is an expr.Const
|
| 735 | tok_str = lexer.TokenVal(tok)
|
| 736 | # Remove underscores from 1_000_000. The lexer is responsible for
|
| 737 | # validation.
|
| 738 | c_under = tok_str.replace('_', '')
|
| 739 |
|
| 740 | if typ == Id.Expr_DecInt:
|
| 741 | ok, big_int = mops.FromStr2(c_under)
|
| 742 | if not ok:
|
| 743 | p_die('Decimal int constant is too large', tok)
|
| 744 | cval = value.Int(big_int) # type: value_t
|
| 745 |
|
| 746 | elif typ == Id.Expr_BinInt:
|
| 747 | assert c_under[:2] in ('0b', '0B'), c_under
|
| 748 | ok, big_int = mops.FromStr2(c_under[2:], 2)
|
| 749 | if not ok:
|
| 750 | p_die('Binary int constant is too large', tok)
|
| 751 | cval = value.Int(big_int)
|
| 752 |
|
| 753 | elif typ == Id.Expr_OctInt:
|
| 754 | assert c_under[:2] in ('0o', '0O'), c_under
|
| 755 | ok, big_int = mops.FromStr2(c_under[2:], 8)
|
| 756 | if not ok:
|
| 757 | p_die('Octal int constant is too large', tok)
|
| 758 | cval = value.Int(big_int)
|
| 759 |
|
| 760 | elif typ == Id.Expr_HexInt:
|
| 761 | assert c_under[:2] in ('0x', '0X'), c_under
|
| 762 | ok, big_int = mops.FromStr2(c_under[2:], 16)
|
| 763 | if not ok:
|
| 764 | p_die('Hex int constant is too large', tok)
|
| 765 | cval = value.Int(big_int)
|
| 766 |
|
| 767 | elif typ == Id.Expr_Float:
|
| 768 | # Note: float() in mycpp/gc_builtins.cc currently uses strtod
|
| 769 | # I think this never raises ValueError, because the lexer
|
| 770 | # should only accept strings that strtod() does?
|
| 771 | cval = value.Float(float(c_under))
|
| 772 |
|
| 773 | elif typ == Id.Expr_Null:
|
| 774 | cval = value.Null
|
| 775 |
|
| 776 | elif typ == Id.Expr_True:
|
| 777 | cval = value.Bool(True)
|
| 778 |
|
| 779 | elif typ == Id.Expr_False:
|
| 780 | cval = value.Bool(False)
|
| 781 |
|
| 782 | elif typ == Id.Char_OneChar: # \n
|
| 783 | assert len(tok_str) == 2, tok_str
|
| 784 | s = consts.LookupCharC(lexer.TokenSliceLeft(tok, 1))
|
| 785 | cval = value.Str(s)
|
| 786 |
|
| 787 | elif typ == Id.Char_YHex: # \yff
|
| 788 | assert len(tok_str) == 4, tok_str
|
| 789 | hex_str = lexer.TokenSliceLeft(tok, 2)
|
| 790 | s = chr(int(hex_str, 16))
|
| 791 | cval = value.Str(s)
|
| 792 |
|
| 793 | elif typ == Id.Char_UBraced: # \u{123}
|
| 794 | hex_str = lexer.TokenSlice(tok, 3, -1)
|
| 795 | code_point = int(hex_str, 16)
|
| 796 | s = j8.Utf8Encode(code_point)
|
| 797 | cval = value.Str(s)
|
| 798 |
|
| 799 | else:
|
| 800 | raise AssertionError(typ)
|
| 801 |
|
| 802 | return expr.Const(tok, cval)
|
| 803 |
|
| 804 | def _CheckLhs(self, lhs):
|
| 805 | # type: (expr_t) -> None
|
| 806 |
|
| 807 | UP_lhs = lhs
|
| 808 | with tagswitch(lhs) as case:
|
| 809 | if case(expr_e.Var):
|
| 810 | # OK - e.g. setvar a.b.c[i] = 42
|
| 811 | pass
|
| 812 |
|
| 813 | elif case(expr_e.Subscript):
|
| 814 | lhs = cast(Subscript, UP_lhs)
|
| 815 | self._CheckLhs(lhs.obj) # recurse on LHS
|
| 816 |
|
| 817 | elif case(expr_e.Attribute):
|
| 818 | lhs = cast(Attribute, UP_lhs)
|
| 819 | self._CheckLhs(lhs.obj) # recurse on LHS
|
| 820 |
|
| 821 | else:
|
| 822 | # Illegal - e.g. setglobal {}["key"] = 42
|
| 823 | p_die("Subscript/Attribute not allowed on this LHS expression",
|
| 824 | location.TokenForExpr(lhs))
|
| 825 |
|
| 826 | def _LhsExprList(self, p_node):
|
| 827 | # type: (PNode) -> List[y_lhs_t]
|
| 828 | """lhs_list: expr (',' expr)*"""
|
| 829 | assert p_node.typ == grammar_nt.lhs_list
|
| 830 |
|
| 831 | lhs_list = [] # type: List[y_lhs_t]
|
| 832 | n = p_node.NumChildren()
|
| 833 | for i in xrange(0, n, 2):
|
| 834 | p = p_node.GetChild(i)
|
| 835 | #self.p_printer.Print(p)
|
| 836 |
|
| 837 | e = self.Expr(p)
|
| 838 | UP_e = e
|
| 839 | with tagswitch(e) as case:
|
| 840 | if case(expr_e.Var):
|
| 841 | e = cast(expr.Var, UP_e)
|
| 842 | lhs_list.append(e.left)
|
| 843 |
|
| 844 | elif case(expr_e.Subscript):
|
| 845 | e = cast(Subscript, UP_e)
|
| 846 | self._CheckLhs(e)
|
| 847 | lhs_list.append(e)
|
| 848 |
|
| 849 | elif case(expr_e.Attribute):
|
| 850 | e = cast(Attribute, UP_e)
|
| 851 | self._CheckLhs(e)
|
| 852 | if e.op.id != Id.Expr_Dot:
|
| 853 | # e.g. setvar obj->method is not valid
|
| 854 | p_die("Can't assign to this attribute expr", e.op)
|
| 855 | lhs_list.append(e)
|
| 856 |
|
| 857 | else:
|
| 858 | pass # work around mycpp bug
|
| 859 |
|
| 860 | # TODO: could blame arbitary expr_t, bu this works most of
|
| 861 | # the time
|
| 862 | if p.tok:
|
| 863 | blame = p.tok # type: loc_t
|
| 864 | else:
|
| 865 | blame = loc.Missing
|
| 866 | p_die("Can't assign to this expression", blame)
|
| 867 |
|
| 868 | return lhs_list
|
| 869 |
|
| 870 | def MakeVarDecl(self, p_node):
|
| 871 | # type: (PNode) -> VarDecl
|
| 872 | """
|
| 873 | ysh_var_decl: name_type_list ['=' testlist] end_stmt
|
| 874 | """
|
| 875 | assert p_node.typ == grammar_nt.ysh_var_decl
|
| 876 |
|
| 877 | lhs = self._NameTypeList(p_node.GetChild(0)) # could be a tuple
|
| 878 |
|
| 879 | # This syntax is confusing, and different than JavaScript
|
| 880 | # var x, y = 1, 2
|
| 881 | # But this is useful:
|
| 882 | # var flag, i = parseArgs(spec, argv)
|
| 883 |
|
| 884 | n = p_node.NumChildren()
|
| 885 | if n >= 3:
|
| 886 | rhs = self.Expr(p_node.GetChild(2))
|
| 887 | else:
|
| 888 | rhs = None
|
| 889 |
|
| 890 | # The caller should fill in the keyword token.
|
| 891 | return VarDecl(None, lhs, rhs)
|
| 892 |
|
| 893 | def MakeMutation(self, p_node):
|
| 894 | # type: (PNode) -> Mutation
|
| 895 | """
|
| 896 | ysh_mutation: lhs_list (augassign | '=') testlist end_stmt
|
| 897 | """
|
| 898 | assert p_node.typ == grammar_nt.ysh_mutation
|
| 899 |
|
| 900 | lhs_list = self._LhsExprList(p_node.GetChild(0)) # could be a tuple
|
| 901 | op_tok = p_node.GetChild(1).tok
|
| 902 | if len(lhs_list) > 1 and op_tok.id != Id.Arith_Equal:
|
| 903 | p_die('Multiple assignment must use =', op_tok)
|
| 904 | rhs = self.Expr(p_node.GetChild(2))
|
| 905 | return Mutation(None, lhs_list, op_tok, rhs)
|
| 906 |
|
| 907 | def _EggexFlag(self, p_node):
|
| 908 | # type: (PNode) -> EggexFlag
|
| 909 | n = p_node.NumChildren()
|
| 910 | if n == 1:
|
| 911 | return EggexFlag(False, p_node.GetChild(0).tok)
|
| 912 | elif n == 2:
|
| 913 | return EggexFlag(True, p_node.GetChild(1).tok)
|
| 914 | else:
|
| 915 | raise AssertionError()
|
| 916 |
|
| 917 | def _Eggex(self, p_node):
|
| 918 | # type: (PNode) -> Eggex
|
| 919 | """
|
| 920 | eggex: '/' regex [';' re_flag* [';' Expr_Name] ] '/'
|
| 921 | """
|
| 922 | left = p_node.GetChild(0).tok
|
| 923 | regex = self._Regex(p_node.GetChild(1))
|
| 924 |
|
| 925 | flags = [] # type: List[EggexFlag]
|
| 926 | trans_pref = None # type: Optional[Token]
|
| 927 |
|
| 928 | i = 2
|
| 929 | current = p_node.GetChild(i)
|
| 930 | if current.typ == Id.Op_Semi:
|
| 931 | i += 1
|
| 932 | while True:
|
| 933 | current = p_node.GetChild(i)
|
| 934 | if current.typ != grammar_nt.re_flag:
|
| 935 | break
|
| 936 | flags.append(self._EggexFlag(current))
|
| 937 | i += 1
|
| 938 |
|
| 939 | if current.typ == Id.Op_Semi:
|
| 940 | i += 1
|
| 941 | trans_pref = p_node.GetChild(i).tok
|
| 942 |
|
| 943 | # Canonicalize and validate flags for ERE only. Default is ERE.
|
| 944 | if trans_pref is None or lexer.TokenVal(trans_pref) == 'ERE':
|
| 945 | canonical_flags = regex_translate.CanonicalFlags(flags)
|
| 946 | else:
|
| 947 | canonical_flags = None
|
| 948 |
|
| 949 | return Eggex(left, regex, flags, trans_pref, canonical_flags)
|
| 950 |
|
| 951 | def YshCasePattern(self, pnode):
|
| 952 | # type: (PNode) -> pat_t
|
| 953 | assert pnode.typ == grammar_nt.ysh_case_pat, pnode
|
| 954 |
|
| 955 | pattern = pnode.GetChild(0)
|
| 956 | typ = pattern.typ
|
| 957 | if typ == Id.Op_LParen:
|
| 958 | # pat_expr or pat_else
|
| 959 | pattern = pnode.GetChild(1)
|
| 960 | typ = pattern.typ
|
| 961 |
|
| 962 | if typ == grammar_nt.pat_else:
|
| 963 | return pat.Else
|
| 964 |
|
| 965 | if typ == grammar_nt.pat_exprs:
|
| 966 | exprs = [] # type: List[expr_t]
|
| 967 | for i in xrange(pattern.NumChildren()):
|
| 968 | child = pattern.GetChild(i)
|
| 969 | if child.typ == grammar_nt.expr:
|
| 970 | expr = self.Expr(child)
|
| 971 | exprs.append(expr)
|
| 972 | return pat.YshExprs(exprs)
|
| 973 |
|
| 974 | if typ == grammar_nt.eggex:
|
| 975 | return self._Eggex(pattern)
|
| 976 |
|
| 977 | raise AssertionError()
|
| 978 |
|
| 979 | def _BlockArg(self, p_node):
|
| 980 | # type: (PNode) -> expr_t
|
| 981 |
|
| 982 | n = p_node.NumChildren()
|
| 983 | if n == 1:
|
| 984 | child = p_node.GetChild(0)
|
| 985 | return self.Expr(child)
|
| 986 |
|
| 987 | # It can only be an expression, not a=42, or ...expr
|
| 988 | p_die('Invalid block expression argument', p_node.tok)
|
| 989 |
|
| 990 | def _Argument(self, p_node, after_semi, arglist):
|
| 991 | # type: (PNode, bool, ArgList) -> None
|
| 992 | """
|
| 993 | argument: (
|
| 994 | test [comp_for]
|
| 995 | | test '=' test # named arg
|
| 996 | | '...' test # var args
|
| 997 | )
|
| 998 | """
|
| 999 | pos_args = arglist.pos_args
|
| 1000 | named_args = arglist.named_args
|
| 1001 |
|
| 1002 | assert p_node.typ == grammar_nt.argument, p_node
|
| 1003 | n = p_node.NumChildren()
|
| 1004 | if n == 1:
|
| 1005 | child = p_node.GetChild(0)
|
| 1006 | if after_semi:
|
| 1007 | p_die(POS_ARG_MISPLACED, child.tok)
|
| 1008 | arg = self.Expr(child)
|
| 1009 | pos_args.append(arg)
|
| 1010 | return
|
| 1011 |
|
| 1012 | if n == 2:
|
| 1013 | # Note: We allow multiple spreads, just like Julia. They are
|
| 1014 | # concatenated as in lists and dicts.
|
| 1015 | tok0 = p_node.GetChild(0).tok
|
| 1016 | if tok0.id == Id.Expr_Ellipsis:
|
| 1017 | spread_expr = expr.Spread(tok0, self.Expr(p_node.GetChild(1)))
|
| 1018 | if after_semi: # f(; ... named)
|
| 1019 | named_args.append(NamedArg(None, spread_expr))
|
| 1020 | else: # f(...named)
|
| 1021 | pos_args.append(spread_expr)
|
| 1022 | return
|
| 1023 |
|
| 1024 | # Note: generator expression not implemented
|
| 1025 | if p_node.GetChild(1).typ == grammar_nt.comp_for:
|
| 1026 | child = p_node.GetChild(0)
|
| 1027 | if after_semi:
|
| 1028 | p_die(POS_ARG_MISPLACED, child.tok)
|
| 1029 |
|
| 1030 | elt = self.Expr(child)
|
| 1031 | comp = self._CompFor(p_node.GetChild(1))
|
| 1032 | arg = expr.GeneratorExp(elt, [comp])
|
| 1033 | pos_args.append(arg)
|
| 1034 | return
|
| 1035 |
|
| 1036 | raise AssertionError()
|
| 1037 |
|
| 1038 | if n == 3: # named args can come before or after the semicolon
|
| 1039 | n1 = NamedArg(
|
| 1040 | p_node.GetChild(0).tok, self.Expr(p_node.GetChild(2)))
|
| 1041 | named_args.append(n1)
|
| 1042 | return
|
| 1043 |
|
| 1044 | raise AssertionError()
|
| 1045 |
|
| 1046 | def _ArgGroup(self, p_node, after_semi, arglist):
|
| 1047 | # type: (PNode, bool, ArgList) -> None
|
| 1048 | """
|
| 1049 | arg_group: argument (',' argument)* [',']
|
| 1050 | """
|
| 1051 | for i in xrange(p_node.NumChildren()):
|
| 1052 | p_child = p_node.GetChild(i)
|
| 1053 | if p_child.typ == grammar_nt.argument:
|
| 1054 | self._Argument(p_child, after_semi, arglist)
|
| 1055 |
|
| 1056 | def _ArgList(self, p_node, arglist):
|
| 1057 | # type: (PNode, ArgList) -> None
|
| 1058 | """For both funcs and procs
|
| 1059 |
|
| 1060 | arglist: (
|
| 1061 | [arg_group]
|
| 1062 | [';' [arg_group]]
|
| 1063 | )
|
| 1064 |
|
| 1065 | arglist3: ...
|
| 1066 | """
|
| 1067 | n = p_node.NumChildren()
|
| 1068 | if n == 0:
|
| 1069 | return
|
| 1070 |
|
| 1071 | i = 0
|
| 1072 |
|
| 1073 | if i >= n:
|
| 1074 | return
|
| 1075 | child = p_node.GetChild(i)
|
| 1076 | if child.typ == grammar_nt.arg_group:
|
| 1077 | self._ArgGroup(child, False, arglist)
|
| 1078 | i += 1
|
| 1079 |
|
| 1080 | if i >= n:
|
| 1081 | return
|
| 1082 | child = p_node.GetChild(i)
|
| 1083 | if child.typ == Id.Op_Semi:
|
| 1084 | arglist.semi_tok = child.tok
|
| 1085 | i += 1
|
| 1086 |
|
| 1087 | # Named args after first semi-colon
|
| 1088 | if i >= n:
|
| 1089 | return
|
| 1090 | child = p_node.GetChild(i)
|
| 1091 | if child.typ == grammar_nt.arg_group:
|
| 1092 | self._ArgGroup(child, True, arglist)
|
| 1093 | i += 1
|
| 1094 |
|
| 1095 | #
|
| 1096 | # Special third group may have block expression - only for arglist3,
|
| 1097 | # used for procs!
|
| 1098 | #
|
| 1099 |
|
| 1100 | if i >= n:
|
| 1101 | return
|
| 1102 | assert p_node.typ == grammar_nt.arglist3, p_node
|
| 1103 |
|
| 1104 | child = p_node.GetChild(i)
|
| 1105 | if child.typ == Id.Op_Semi:
|
| 1106 | arglist.semi_tok2 = child.tok
|
| 1107 | i += 1
|
| 1108 |
|
| 1109 | if i >= n:
|
| 1110 | return
|
| 1111 | child = p_node.GetChild(i)
|
| 1112 | if child.typ == grammar_nt.argument:
|
| 1113 | arglist.block_expr = self._BlockArg(child)
|
| 1114 | i += 1
|
| 1115 |
|
| 1116 | def ProcCallArgs(self, pnode, arglist):
|
| 1117 | # type: (PNode, ArgList) -> None
|
| 1118 | """
|
| 1119 | ysh_eager_arglist: '(' [arglist3] ')'
|
| 1120 | ysh_lazy_arglist: '[' [arglist] ']'
|
| 1121 | """
|
| 1122 | n = pnode.NumChildren()
|
| 1123 | if n == 2: # f()
|
| 1124 | return
|
| 1125 |
|
| 1126 | if n == 3:
|
| 1127 | child1 = pnode.GetChild(1) # the X in '( X )'
|
| 1128 |
|
| 1129 | self._ArgList(child1, arglist)
|
| 1130 | return
|
| 1131 |
|
| 1132 | raise AssertionError()
|
| 1133 |
|
| 1134 | def _TypeExpr(self, pnode):
|
| 1135 | # type: (PNode) -> TypeExpr
|
| 1136 | """
|
| 1137 | type_expr: Expr_Name [ '[' type_expr (',' type_expr)* ']' ]
|
| 1138 | """
|
| 1139 | assert pnode.typ == grammar_nt.type_expr, pnode.typ
|
| 1140 |
|
| 1141 | ty = TypeExpr.CreateNull() # don't allocate children
|
| 1142 |
|
| 1143 | ty.tok = pnode.GetChild(0).tok
|
| 1144 | ty.name = lexer.TokenVal(ty.tok)
|
| 1145 |
|
| 1146 | n = pnode.NumChildren()
|
| 1147 | if n == 1:
|
| 1148 | return ty
|
| 1149 |
|
| 1150 | ty.params = []
|
| 1151 | i = 2
|
| 1152 | while i < n:
|
| 1153 | p = self._TypeExpr(pnode.GetChild(i))
|
| 1154 | ty.params.append(p)
|
| 1155 | i += 2 # skip comma
|
| 1156 |
|
| 1157 | return ty
|
| 1158 |
|
| 1159 | def _Param(self, pnode):
|
| 1160 | # type: (PNode) -> Param
|
| 1161 | """
|
| 1162 | param: Expr_Name [type_expr] ['=' expr]
|
| 1163 | """
|
| 1164 | assert pnode.typ == grammar_nt.param
|
| 1165 |
|
| 1166 | name_tok = pnode.GetChild(0).tok
|
| 1167 | n = pnode.NumChildren()
|
| 1168 |
|
| 1169 | assert name_tok.id == Id.Expr_Name, name_tok
|
| 1170 |
|
| 1171 | default_val = None # type: expr_t
|
| 1172 | type_ = None # type: TypeExpr
|
| 1173 |
|
| 1174 | if n == 1:
|
| 1175 | # proc p(a)
|
| 1176 | pass
|
| 1177 |
|
| 1178 | elif n == 2:
|
| 1179 | # proc p(a Int)
|
| 1180 | type_ = self._TypeExpr(pnode.GetChild(1))
|
| 1181 |
|
| 1182 | elif n == 3:
|
| 1183 | # proc p(a = 3)
|
| 1184 | default_val = self.Expr(pnode.GetChild(2))
|
| 1185 |
|
| 1186 | elif n == 4:
|
| 1187 | # proc p(a Int = 3)
|
| 1188 | type_ = self._TypeExpr(pnode.GetChild(1))
|
| 1189 | default_val = self.Expr(pnode.GetChild(3))
|
| 1190 |
|
| 1191 | return Param(name_tok, lexer.TokenVal(name_tok), type_, default_val)
|
| 1192 |
|
| 1193 | def _ParamGroup(self, p_node):
|
| 1194 | # type: (PNode) -> ParamGroup
|
| 1195 | """
|
| 1196 | param_group:
|
| 1197 | (param ',')*
|
| 1198 | [ (param | '...' Expr_Name) [,] ]
|
| 1199 | """
|
| 1200 | assert p_node.typ == grammar_nt.param_group, p_node
|
| 1201 |
|
| 1202 | params = [] # type: List[Param]
|
| 1203 | rest_of = None # type: Optional[RestParam]
|
| 1204 |
|
| 1205 | n = p_node.NumChildren()
|
| 1206 | i = 0
|
| 1207 | while i < n:
|
| 1208 | child = p_node.GetChild(i)
|
| 1209 | if child.typ == grammar_nt.param:
|
| 1210 | params.append(self._Param(child))
|
| 1211 |
|
| 1212 | elif child.typ == Id.Expr_Ellipsis:
|
| 1213 | tok = p_node.GetChild(i + 1).tok
|
| 1214 | rest_of = RestParam(tok, lexer.TokenVal(tok))
|
| 1215 |
|
| 1216 | i += 2
|
| 1217 |
|
| 1218 | return ParamGroup(params, rest_of)
|
| 1219 |
|
| 1220 | def Proc(self, p_node):
|
| 1221 | # type: (PNode) -> proc_sig_t
|
| 1222 | """
|
| 1223 | ysh_proc: (
|
| 1224 | [ '('
|
| 1225 | [ param_group ] # word params, with defaults
|
| 1226 | [ ';' [ param_group ] ] # positional typed params, with defaults
|
| 1227 | [ ';' [ param_group ] ] # named params, with defaults
|
| 1228 | [ ';' Expr_Name ] # optional block param, with no type or default
|
| 1229 | ')'
|
| 1230 | ]
|
| 1231 | '{' # opening { for pgen2
|
| 1232 | )
|
| 1233 | """
|
| 1234 | assert p_node.typ == grammar_nt.ysh_proc
|
| 1235 |
|
| 1236 | n = p_node.NumChildren()
|
| 1237 | if n == 1: # proc f {
|
| 1238 | return proc_sig.Open
|
| 1239 |
|
| 1240 | if n == 3: # proc f () {
|
| 1241 | sig = proc_sig.Closed.CreateNull(alloc_lists=True) # no params
|
| 1242 |
|
| 1243 | # proc f( three param groups, and block group )
|
| 1244 | sig = proc_sig.Closed.CreateNull(alloc_lists=True) # no params
|
| 1245 |
|
| 1246 | # Word args
|
| 1247 | i = 1
|
| 1248 | child = p_node.GetChild(i)
|
| 1249 | if child.typ == grammar_nt.param_group:
|
| 1250 | sig.word = self._ParamGroup(p_node.GetChild(i))
|
| 1251 |
|
| 1252 | # Validate word args
|
| 1253 | for word in sig.word.params:
|
| 1254 | if word.type:
|
| 1255 | if word.type.name not in ('Str', 'Ref'):
|
| 1256 | p_die('Word params may only have type Str or Ref',
|
| 1257 | word.type.tok)
|
| 1258 | if word.type.params is not None:
|
| 1259 | p_die('Unexpected type parameters', word.type.tok)
|
| 1260 |
|
| 1261 | i += 2
|
| 1262 | else:
|
| 1263 | i += 1
|
| 1264 |
|
| 1265 | #log('i %d n %d', i, n)
|
| 1266 | if i >= n:
|
| 1267 | return sig
|
| 1268 |
|
| 1269 | # Positional args
|
| 1270 | child = p_node.GetChild(i)
|
| 1271 | if child.typ == grammar_nt.param_group:
|
| 1272 | sig.positional = self._ParamGroup(p_node.GetChild(i))
|
| 1273 | i += 2
|
| 1274 | else:
|
| 1275 | i += 1
|
| 1276 |
|
| 1277 | #log('i %d n %d', i, n)
|
| 1278 | if i >= n:
|
| 1279 | return sig
|
| 1280 |
|
| 1281 | # Keyword args
|
| 1282 | child = p_node.GetChild(i)
|
| 1283 | if child.typ == grammar_nt.param_group:
|
| 1284 | sig.named = self._ParamGroup(p_node.GetChild(i))
|
| 1285 | i += 2
|
| 1286 | else:
|
| 1287 | i += 1
|
| 1288 |
|
| 1289 | #log('i %d n %d', i, n)
|
| 1290 | if i >= n:
|
| 1291 | return sig
|
| 1292 |
|
| 1293 | child = p_node.GetChild(i)
|
| 1294 | if child.typ == grammar_nt.param_group:
|
| 1295 | group = self._ParamGroup(p_node.GetChild(i))
|
| 1296 | params = group.params
|
| 1297 | if len(params) > 1:
|
| 1298 | p_die('Only 1 block param is allowed', params[1].blame_tok)
|
| 1299 | if group.rest_of:
|
| 1300 | p_die("Rest param isn't allowed for blocks",
|
| 1301 | group.rest_of.blame_tok)
|
| 1302 |
|
| 1303 | if len(params) == 1:
|
| 1304 | if params[0].type:
|
| 1305 | if params[0].type.name != 'Command':
|
| 1306 | p_die('Block param must have type Command',
|
| 1307 | params[0].type.tok)
|
| 1308 | if params[0].type.params is not None:
|
| 1309 | p_die('Unexpected type parameters', params[0].type.tok)
|
| 1310 |
|
| 1311 | sig.block_param = params[0]
|
| 1312 |
|
| 1313 | return sig
|
| 1314 |
|
| 1315 | def YshFunc(self, p_node, out):
|
| 1316 | # type: (PNode, Func) -> None
|
| 1317 | """
|
| 1318 | ysh_func: Expr_Name '(' [param_group] [';' param_group] ')'
|
| 1319 | """
|
| 1320 | assert p_node.typ == grammar_nt.ysh_func
|
| 1321 |
|
| 1322 | #self.p_printer.Print(p_node)
|
| 1323 |
|
| 1324 | out.name = p_node.GetChild(0).tok
|
| 1325 |
|
| 1326 | n = p_node.NumChildren()
|
| 1327 | i = 2 # after (
|
| 1328 |
|
| 1329 | child = p_node.GetChild(i)
|
| 1330 | if child.typ == grammar_nt.param_group:
|
| 1331 | out.positional = self._ParamGroup(child)
|
| 1332 | i += 2 # skip past ;
|
| 1333 | else:
|
| 1334 | i += 1
|
| 1335 |
|
| 1336 | if i >= n:
|
| 1337 | return
|
| 1338 |
|
| 1339 | child = p_node.GetChild(i)
|
| 1340 | if child.typ == grammar_nt.param_group:
|
| 1341 | out.named = self._ParamGroup(child)
|
| 1342 |
|
| 1343 | #
|
| 1344 | # Eggex Language
|
| 1345 | #
|
| 1346 |
|
| 1347 | def _RangeCharSingleQuoted(self, p_node):
|
| 1348 | # type: (PNode) -> Optional[CharCode]
|
| 1349 |
|
| 1350 | assert p_node.typ == grammar_nt.range_char, p_node
|
| 1351 |
|
| 1352 | # 'a' in 'a'-'b'
|
| 1353 |
|
| 1354 | child0 = p_node.GetChild(0)
|
| 1355 | if child0.typ == grammar_nt.sq_string:
|
| 1356 | sq_part = cast(SingleQuoted, child0.GetChild(1).tok)
|
| 1357 | n = len(sq_part.sval)
|
| 1358 | if n == 0:
|
| 1359 | p_die("Quoted range char can't be empty",
|
| 1360 | loc.WordPart(sq_part))
|
| 1361 | elif n == 1:
|
| 1362 | return CharCode(sq_part.left, ord(sq_part.sval[0]), False)
|
| 1363 | else:
|
| 1364 | p_die(RANGE_POINT_TOO_LONG, loc.WordPart(sq_part))
|
| 1365 | return None
|
| 1366 |
|
| 1367 | def _OtherRangeToken(self, p_node):
|
| 1368 | # type: (PNode) -> Token
|
| 1369 | """An endpoint of a range (single char)
|
| 1370 |
|
| 1371 | range_char: Expr_Name | Expr_DecInt | sq_string | char_literal
|
| 1372 | a-z 0-9 'a'-'z' \x00-\xff
|
| 1373 | """
|
| 1374 | assert p_node.typ == grammar_nt.range_char, p_node
|
| 1375 |
|
| 1376 | child0 = p_node.GetChild(0)
|
| 1377 | if child0.typ == grammar_nt.char_literal:
|
| 1378 | # \x00 in /[\x00 - \x20]/
|
| 1379 | tok = child0.GetChild(0).tok
|
| 1380 | return tok
|
| 1381 |
|
| 1382 | tok = p_node.tok
|
| 1383 | # a in a-z is Expr_Name
|
| 1384 | # 0 in 0-9 is Expr_DecInt
|
| 1385 | assert tok.id in (Id.Expr_Name, Id.Expr_DecInt), tok
|
| 1386 |
|
| 1387 | if tok.length != 1:
|
| 1388 | p_die(RANGE_POINT_TOO_LONG, tok)
|
| 1389 | return tok
|
| 1390 |
|
| 1391 | def _NonRangeChars(self, p_node):
|
| 1392 | # type: (PNode) -> class_literal_term_t
|
| 1393 | """
|
| 1394 | \" \u1234 '#'
|
| 1395 | """
|
| 1396 | assert p_node.typ == grammar_nt.range_char, p_node
|
| 1397 |
|
| 1398 | child0 = p_node.GetChild(0)
|
| 1399 | typ0 = p_node.GetChild(0).typ
|
| 1400 |
|
| 1401 | if typ0 == grammar_nt.sq_string:
|
| 1402 | return cast(SingleQuoted, child0.GetChild(1).tok)
|
| 1403 |
|
| 1404 | if typ0 == grammar_nt.char_literal:
|
| 1405 | return word_compile.EvalCharLiteralForRegex(child0.tok)
|
| 1406 |
|
| 1407 | if typ0 == Id.Expr_Name:
|
| 1408 | # Look up PerlClass and PosixClass
|
| 1409 | return self._NameInClass(None, child0.tok)
|
| 1410 |
|
| 1411 | raise AssertionError()
|
| 1412 |
|
| 1413 | def _ClassLiteralTerm(self, p_node):
|
| 1414 | # type: (PNode) -> class_literal_term_t
|
| 1415 | """
|
| 1416 | class_literal_term:
|
| 1417 | range_char ['-' range_char ]
|
| 1418 | | '@' Expr_Name # splice
|
| 1419 | | '!' Expr_Name # negate char class
|
| 1420 | ...
|
| 1421 | """
|
| 1422 | assert p_node.typ == grammar_nt.class_literal_term, p_node
|
| 1423 |
|
| 1424 | typ0 = p_node.GetChild(0).typ
|
| 1425 |
|
| 1426 | if typ0 == grammar_nt.range_char:
|
| 1427 | n = p_node.NumChildren()
|
| 1428 |
|
| 1429 | if n == 1:
|
| 1430 | return self._NonRangeChars(p_node.GetChild(0))
|
| 1431 |
|
| 1432 | # 'a'-'z' etc.
|
| 1433 | if n == 3:
|
| 1434 | assert p_node.GetChild(1).typ == Id.Arith_Minus, p_node
|
| 1435 |
|
| 1436 | left = p_node.GetChild(0)
|
| 1437 | right = p_node.GetChild(2)
|
| 1438 |
|
| 1439 | code1 = self._RangeCharSingleQuoted(left)
|
| 1440 | if code1 is None:
|
| 1441 | tok1 = self._OtherRangeToken(left)
|
| 1442 | code1 = word_compile.EvalCharLiteralForRegex(tok1)
|
| 1443 |
|
| 1444 | code2 = self._RangeCharSingleQuoted(right)
|
| 1445 | if code2 is None:
|
| 1446 | tok2 = self._OtherRangeToken(right)
|
| 1447 | code2 = word_compile.EvalCharLiteralForRegex(tok2)
|
| 1448 | return CharRange(code1, code2)
|
| 1449 |
|
| 1450 | raise AssertionError()
|
| 1451 |
|
| 1452 | if typ0 == Id.Expr_At:
|
| 1453 | tok1 = p_node.GetChild(1).tok
|
| 1454 | return class_literal_term.Splice(tok1, lexer.TokenVal(tok1))
|
| 1455 |
|
| 1456 | if typ0 == Id.Expr_Bang:
|
| 1457 | return self._NameInClass(
|
| 1458 | p_node.GetChild(0).tok,
|
| 1459 | p_node.GetChild(1).tok)
|
| 1460 |
|
| 1461 | p_die("This kind of class literal term isn't implemented",
|
| 1462 | p_node.GetChild(0).tok)
|
| 1463 |
|
| 1464 | def _ClassLiteral(self, p_node):
|
| 1465 | # type: (PNode) -> List[class_literal_term_t]
|
| 1466 | """class_literal: '[' class_literal_term+ ']'."""
|
| 1467 | assert p_node.typ == grammar_nt.class_literal
|
| 1468 | # skip [ and ]
|
| 1469 | terms = [] # type: List[class_literal_term_t]
|
| 1470 | for i in xrange(1, p_node.NumChildren() - 1):
|
| 1471 | terms.append(self._ClassLiteralTerm(p_node.GetChild(i)))
|
| 1472 |
|
| 1473 | return terms
|
| 1474 |
|
| 1475 | def _NameInRegex(self, negated_tok, tok):
|
| 1476 | # type: (Token, Token) -> re_t
|
| 1477 | tok_str = lexer.TokenVal(tok)
|
| 1478 | if tok_str == 'dot':
|
| 1479 | if negated_tok:
|
| 1480 | p_die("Can't negate this symbol", tok)
|
| 1481 | return re.Primitive(tok, Id.Eggex_Dot)
|
| 1482 |
|
| 1483 | if tok_str in POSIX_CLASSES:
|
| 1484 | return PosixClass(negated_tok, tok_str)
|
| 1485 |
|
| 1486 | perl = PERL_CLASSES.get(tok_str)
|
| 1487 | if perl is not None:
|
| 1488 | return PerlClass(negated_tok, perl)
|
| 1489 |
|
| 1490 | if tok_str[0].isupper(): # e.g. HexDigit
|
| 1491 | return re.Splice(tok, lexer.TokenVal(tok))
|
| 1492 |
|
| 1493 | p_die("%r isn't a character class" % tok_str, tok)
|
| 1494 |
|
| 1495 | def _NameInClass(self, negated_tok, tok):
|
| 1496 | # type: (Token, Token) -> class_literal_term_t
|
| 1497 | """Like the above, but 'dot' and 'd' don't mean anything within []"""
|
| 1498 | tok_str = lexer.TokenVal(tok)
|
| 1499 |
|
| 1500 | # A bare, unquoted character literal. In the grammar, this is expressed as
|
| 1501 | # range_char without an ending.
|
| 1502 |
|
| 1503 | # d is NOT 'digit', it's a literal 'd'!
|
| 1504 | if len(tok_str) == 1:
|
| 1505 | # Expr_Name matches VAR_NAME_RE, which starts with [a-zA-Z_]
|
| 1506 | assert tok.id in (Id.Expr_Name, Id.Expr_DecInt)
|
| 1507 |
|
| 1508 | if negated_tok: # [~d] is not allowed, only [~digit]
|
| 1509 | p_die("Can't negate this symbol", tok)
|
| 1510 | return word_compile.EvalCharLiteralForRegex(tok)
|
| 1511 |
|
| 1512 | # digit, word, but not d, w, etc.
|
| 1513 | if tok_str in POSIX_CLASSES:
|
| 1514 | return PosixClass(negated_tok, tok_str)
|
| 1515 |
|
| 1516 | perl = PERL_CLASSES.get(tok_str)
|
| 1517 | if perl is not None:
|
| 1518 | return PerlClass(negated_tok, perl)
|
| 1519 | p_die("%r isn't a character class" % tok_str, tok)
|
| 1520 |
|
| 1521 | def _ReAtom(self, p_atom):
|
| 1522 | # type: (PNode) -> re_t
|
| 1523 | """
|
| 1524 | re_atom: ( char_literal | ...
|
| 1525 | """
|
| 1526 | assert p_atom.typ == grammar_nt.re_atom, p_atom.typ
|
| 1527 |
|
| 1528 | child0 = p_atom.GetChild(0)
|
| 1529 |
|
| 1530 | typ0 = p_atom.GetChild(0).typ
|
| 1531 | tok0 = p_atom.GetChild(0).tok
|
| 1532 |
|
| 1533 | # Non-terminals
|
| 1534 |
|
| 1535 | if typ0 == grammar_nt.class_literal:
|
| 1536 | return re.CharClassLiteral(False, self._ClassLiteral(child0))
|
| 1537 |
|
| 1538 | if typ0 == grammar_nt.sq_string:
|
| 1539 | return cast(SingleQuoted, child0.GetChild(1).tok)
|
| 1540 |
|
| 1541 | if typ0 == grammar_nt.char_literal:
|
| 1542 | # Note: ERE doesn't seem to support escapes like Python
|
| 1543 | # https://docs.python.org/3/library/re.html
|
| 1544 | # We might want to do a translation like this;
|
| 1545 | #
|
| 1546 | # \u{03bc} -> \u03bc
|
| 1547 | # \x00 -> \x00
|
| 1548 | # \n -> \n
|
| 1549 |
|
| 1550 | # Must be Id.Char_{OneChar,Hex,UBraced}
|
| 1551 | assert consts.GetKind(tok0.id) == Kind.Char
|
| 1552 | s = word_compile.EvalCStringToken(tok0.id, lexer.TokenVal(tok0))
|
| 1553 | return re.LiteralChars(tok0, s)
|
| 1554 |
|
| 1555 | # Special punctuation
|
| 1556 | if typ0 == Id.Expr_Dot: # .
|
| 1557 | return re.Primitive(tok0, Id.Eggex_Dot)
|
| 1558 |
|
| 1559 | if typ0 == Id.Arith_Caret: # ^
|
| 1560 | return re.Primitive(tok0, Id.Eggex_Start)
|
| 1561 |
|
| 1562 | if typ0 == Id.Expr_Dollar: # $
|
| 1563 | return re.Primitive(tok0, Id.Eggex_End)
|
| 1564 |
|
| 1565 | if typ0 == Id.Expr_Name:
|
| 1566 | # d digit -> PosixClass PerlClass etc.
|
| 1567 | return self._NameInRegex(None, tok0)
|
| 1568 |
|
| 1569 | if typ0 == Id.Expr_Symbol:
|
| 1570 | # Validate symbols here, like we validate PerlClass, etc.
|
| 1571 | tok_str = lexer.TokenVal(tok0)
|
| 1572 | if tok_str == '%start':
|
| 1573 | return re.Primitive(tok0, Id.Eggex_Start)
|
| 1574 | if tok_str == '%end':
|
| 1575 | return re.Primitive(tok0, Id.Eggex_End)
|
| 1576 | p_die("Unexpected token %r in regex" % tok_str, tok0)
|
| 1577 |
|
| 1578 | if typ0 == Id.Expr_At:
|
| 1579 | # | '@' Expr_Name
|
| 1580 | tok1 = p_atom.GetChild(1).tok
|
| 1581 | return re.Splice(tok0, lexer.TokenVal(tok1))
|
| 1582 |
|
| 1583 | if typ0 == Id.Expr_Bang:
|
| 1584 | # | '!' (Expr_Name | class_literal)
|
| 1585 | # | '!' '!' Expr_Name (Expr_Name | Expr_DecInt | '(' regex ')')
|
| 1586 | n = p_atom.NumChildren()
|
| 1587 | if n == 2:
|
| 1588 | child1 = p_atom.GetChild(1)
|
| 1589 | if child1.typ == grammar_nt.class_literal:
|
| 1590 | return re.CharClassLiteral(True,
|
| 1591 | self._ClassLiteral(child1))
|
| 1592 | else:
|
| 1593 | return self._NameInRegex(tok0, p_atom.GetChild(1).tok)
|
| 1594 | else:
|
| 1595 | # Note: !! conflicts with shell history
|
| 1596 | p_die(
|
| 1597 | "Backtracking with !! isn't implemented (requires Python/PCRE)",
|
| 1598 | p_atom.GetChild(1).tok)
|
| 1599 |
|
| 1600 | if typ0 == Id.Op_LParen:
|
| 1601 | # | '(' regex ')'
|
| 1602 |
|
| 1603 | # Note: in ERE (d+) is the same as <d+>. That is, Group becomes
|
| 1604 | # Capture.
|
| 1605 | return re.Group(self._Regex(p_atom.GetChild(1)))
|
| 1606 |
|
| 1607 | if typ0 == Id.Arith_Less:
|
| 1608 | # | '<' 'capture' regex ['as' Expr_Name] [':' Expr_Name] '>'
|
| 1609 |
|
| 1610 | n = p_atom.NumChildren()
|
| 1611 | assert n == 4 or n == 6 or n == 8, n
|
| 1612 |
|
| 1613 | # < capture d+ >
|
| 1614 | regex = self._Regex(p_atom.GetChild(2))
|
| 1615 |
|
| 1616 | as_name = None # type: Optional[Token]
|
| 1617 | func_name = None # type: Optional[Token]
|
| 1618 |
|
| 1619 | i = 3 # points at any of > as :
|
| 1620 |
|
| 1621 | typ = p_atom.GetChild(i).typ
|
| 1622 | if typ == Id.Expr_As:
|
| 1623 | as_name = p_atom.GetChild(i + 1).tok
|
| 1624 | i += 2
|
| 1625 |
|
| 1626 | typ = p_atom.GetChild(i).typ
|
| 1627 | if typ == Id.Arith_Colon:
|
| 1628 | func_name = p_atom.GetChild(i + 1).tok
|
| 1629 |
|
| 1630 | return re.Capture(regex, as_name, func_name)
|
| 1631 |
|
| 1632 | raise AssertionError(typ0)
|
| 1633 |
|
| 1634 | def _RepeatOp(self, p_repeat):
|
| 1635 | # type: (PNode) -> re_repeat_t
|
| 1636 | """
|
| 1637 | repeat_op: '+' | '*' | '?'
|
| 1638 | | '{' [Expr_Name] ('+' | '*' | '?' | repeat_range) '}'
|
| 1639 | """
|
| 1640 | assert p_repeat.typ == grammar_nt.repeat_op, p_repeat
|
| 1641 |
|
| 1642 | tok = p_repeat.GetChild(0).tok
|
| 1643 | id_ = tok.id
|
| 1644 |
|
| 1645 | if id_ in (Id.Arith_Plus, Id.Arith_Star, Id.Arith_QMark):
|
| 1646 | return tok # a+ a* a?
|
| 1647 |
|
| 1648 | if id_ == Id.Op_LBrace:
|
| 1649 | child1 = p_repeat.GetChild(1)
|
| 1650 | if child1.typ != grammar_nt.repeat_range:
|
| 1651 | # e.g. dot{N *} is .*?
|
| 1652 | p_die("Perl-style repetition isn't implemented with libc",
|
| 1653 | child1.tok)
|
| 1654 |
|
| 1655 | # repeat_range: (
|
| 1656 | # Expr_DecInt [',']
|
| 1657 | # | ',' Expr_DecInt
|
| 1658 | # | Expr_DecInt ',' Expr_DecInt
|
| 1659 | # )
|
| 1660 |
|
| 1661 | n = child1.NumChildren()
|
| 1662 | if n == 1: # {3}
|
| 1663 | tok = child1.GetChild(0).tok
|
| 1664 | return tok # different operator than + * ?
|
| 1665 |
|
| 1666 | if n == 2:
|
| 1667 | if child1.GetChild(0).typ == Id.Expr_DecInt: # {,3}
|
| 1668 | left = child1.GetChild(0).tok
|
| 1669 | return re_repeat.Range(left, lexer.TokenVal(left), '',
|
| 1670 | None)
|
| 1671 | else: # {1,}
|
| 1672 | right = child1.GetChild(1).tok
|
| 1673 | return re_repeat.Range(None, '', lexer.TokenVal(right),
|
| 1674 | right)
|
| 1675 |
|
| 1676 | if n == 3: # {1,3}
|
| 1677 | left = child1.GetChild(0).tok
|
| 1678 | right = child1.GetChild(2).tok
|
| 1679 | return re_repeat.Range(left, lexer.TokenVal(left),
|
| 1680 | lexer.TokenVal(right), right)
|
| 1681 |
|
| 1682 | raise AssertionError(n)
|
| 1683 |
|
| 1684 | raise AssertionError(id_)
|
| 1685 |
|
| 1686 | def _ReAlt(self, p_node):
|
| 1687 | # type: (PNode) -> re_t
|
| 1688 | """
|
| 1689 | re_alt: (re_atom [repeat_op])+
|
| 1690 | """
|
| 1691 | assert p_node.typ == grammar_nt.re_alt
|
| 1692 |
|
| 1693 | i = 0
|
| 1694 | n = p_node.NumChildren()
|
| 1695 | seq = [] # type: List[re_t]
|
| 1696 | while i < n:
|
| 1697 | r = self._ReAtom(p_node.GetChild(i))
|
| 1698 | i += 1
|
| 1699 | if i < n and p_node.GetChild(i).typ == grammar_nt.repeat_op:
|
| 1700 | repeat_op = self._RepeatOp(p_node.GetChild(i))
|
| 1701 | r = re.Repeat(r, repeat_op)
|
| 1702 | i += 1
|
| 1703 | seq.append(r)
|
| 1704 |
|
| 1705 | if len(seq) == 1:
|
| 1706 | return seq[0]
|
| 1707 | else:
|
| 1708 | return re.Seq(seq)
|
| 1709 |
|
| 1710 | def _Regex(self, p_node):
|
| 1711 | # type: (PNode) -> re_t
|
| 1712 | """
|
| 1713 | regex: [re_alt] (('|'|'or') re_alt)*
|
| 1714 | """
|
| 1715 | assert p_node.typ == grammar_nt.regex
|
| 1716 |
|
| 1717 | n = p_node.NumChildren()
|
| 1718 | alts = [] # type: List[re_t]
|
| 1719 | for i in xrange(0, n, 2): # was children[::2]
|
| 1720 | c = p_node.GetChild(i)
|
| 1721 | alts.append(self._ReAlt(c))
|
| 1722 |
|
| 1723 | if len(alts) == 1:
|
| 1724 | return alts[0]
|
| 1725 | else:
|
| 1726 | return re.Alt(alts)
|
| 1727 |
|
| 1728 |
|
| 1729 | # vim: sw=4
|