| 1 | """
|
| 2 | parse_lib.py - Consolidate various parser instantiations here.
|
| 3 | """
|
| 4 |
|
| 5 | from _devbuild.gen.id_kind_asdl import Id_t
|
| 6 | from _devbuild.gen.syntax_asdl import (Token, CompoundWord, expr_t, Redir,
|
| 7 | ArgList, Proc, Func, pat_t, VarDecl,
|
| 8 | Mutation, source, loc, loc_t)
|
| 9 | from _devbuild.gen.types_asdl import lex_mode_e
|
| 10 | from _devbuild.gen import grammar_nt
|
| 11 |
|
| 12 | from asdl import format as fmt
|
| 13 | from core import alloc
|
| 14 | from core import state
|
| 15 | from frontend import lexer
|
| 16 | from frontend import reader
|
| 17 | from osh import tdop
|
| 18 | from osh import arith_parse
|
| 19 | from osh import cmd_parse
|
| 20 | from osh import word_parse
|
| 21 | from mycpp import mylib
|
| 22 | from mycpp.mylib import log
|
| 23 | from ysh import expr_parse
|
| 24 | from ysh import expr_to_ast
|
| 25 | from ysh.expr_parse import ctx_PNodeAllocator
|
| 26 |
|
| 27 | _ = log
|
| 28 |
|
| 29 | from typing import Any, List, Tuple, Dict, TYPE_CHECKING
|
| 30 | if TYPE_CHECKING:
|
| 31 | from core.util import _DebugFile
|
| 32 | from core import optview
|
| 33 | from frontend.lexer import Lexer
|
| 34 | from frontend.reader import _Reader
|
| 35 | from osh.tdop import TdopParser
|
| 36 | from osh.word_parse import WordParser
|
| 37 | from osh.cmd_parse import CommandParser
|
| 38 | from pgen2.grammar import Grammar
|
| 39 |
|
| 40 |
|
| 41 | class _BaseTrail(object):
|
| 42 | """Base class has members, but no-ops for methods."""
|
| 43 |
|
| 44 | def __init__(self):
|
| 45 | # type: () -> None
|
| 46 | # word from a partially completed command.
|
| 47 | # Filled in by _ScanSimpleCommand in osh/cmd_parse.py.
|
| 48 | self.words = [] # type: List[CompoundWord]
|
| 49 | self.redirects = [] # type: List[Redir]
|
| 50 | # TODO: We should maintain the LST invariant and have a single list, but
|
| 51 | # that I ran into the "cases classes are better than variants" problem.
|
| 52 |
|
| 53 | # Non-ignored tokens, after PushHint translation. Used for variable name
|
| 54 | # completion. Filled in by _Peek() in osh/word_parse.py.
|
| 55 | #
|
| 56 | # Example:
|
| 57 | # $ echo $\
|
| 58 | # f<TAB>
|
| 59 | # This could complete $foo.
|
| 60 | # Problem: readline doesn't even allow that, because it spans more than one
|
| 61 | # line!
|
| 62 | self.tokens = [] # type: List[Token]
|
| 63 |
|
| 64 | self.alias_words = [
|
| 65 | ] # type: List[CompoundWord] # words INSIDE an alias expansion
|
| 66 | self._expanding_alias = False
|
| 67 |
|
| 68 | def Clear(self):
|
| 69 | # type: () -> None
|
| 70 | pass
|
| 71 |
|
| 72 | def SetLatestWords(self, words, redirects):
|
| 73 | # type: (List[CompoundWord], List[Redir]) -> None
|
| 74 | pass
|
| 75 |
|
| 76 | def AppendToken(self, token):
|
| 77 | # type: (Token) -> None
|
| 78 | pass
|
| 79 |
|
| 80 | def BeginAliasExpansion(self):
|
| 81 | # type: () -> None
|
| 82 | pass
|
| 83 |
|
| 84 | def EndAliasExpansion(self):
|
| 85 | # type: () -> None
|
| 86 | pass
|
| 87 |
|
| 88 | if mylib.PYTHON:
|
| 89 |
|
| 90 | def PrintDebugString(self, debug_f):
|
| 91 | # type: (_DebugFile) -> None
|
| 92 |
|
| 93 | # note: could cast DebugFile to IO[str] instead of ignoring?
|
| 94 | debug_f.writeln(' words:')
|
| 95 | for w in self.words:
|
| 96 | fmt.PrettyPrint(w, f=debug_f) # type: ignore
|
| 97 | debug_f.writeln('')
|
| 98 |
|
| 99 | debug_f.writeln(' redirects:')
|
| 100 | for r in self.redirects:
|
| 101 | fmt.PrettyPrint(r, f=debug_f) # type: ignore
|
| 102 | debug_f.writeln('')
|
| 103 |
|
| 104 | debug_f.writeln(' tokens:')
|
| 105 | for p in self.tokens:
|
| 106 | fmt.PrettyPrint(p, f=debug_f) # type: ignore
|
| 107 | debug_f.writeln('')
|
| 108 |
|
| 109 | debug_f.writeln(' alias_words:')
|
| 110 | for w in self.alias_words:
|
| 111 | fmt.PrettyPrint(w, f=debug_f) # type: ignore
|
| 112 | debug_f.writeln('')
|
| 113 |
|
| 114 | def __repr__(self):
|
| 115 | # type: () -> str
|
| 116 | return '<Trail %s %s %s %s>' % (self.words, self.redirects,
|
| 117 | self.tokens, self.alias_words)
|
| 118 |
|
| 119 |
|
| 120 | class ctx_Alias(object):
|
| 121 | """Used by CommandParser so we know to be ready for FIRST alias word.
|
| 122 |
|
| 123 | For example, for
|
| 124 |
|
| 125 | alias ll='ls -l'
|
| 126 |
|
| 127 | Then we want to capture 'ls' as the first word.
|
| 128 |
|
| 129 | We do NOT want SetLatestWords or AppendToken to be active, because we don't
|
| 130 | need other tokens from 'ls -l'.
|
| 131 |
|
| 132 | It would also probably cause bugs in history expansion, e.g. echo !1 should
|
| 133 | be the first word the user typed, not the first word after alias expansion.
|
| 134 | """
|
| 135 |
|
| 136 | def __init__(self, trail):
|
| 137 | # type: (_BaseTrail) -> None
|
| 138 | trail._expanding_alias = True
|
| 139 | self.trail = trail
|
| 140 |
|
| 141 | def __enter__(self):
|
| 142 | # type: () -> None
|
| 143 | pass
|
| 144 |
|
| 145 | def __exit__(self, type, value, traceback):
|
| 146 | # type: (Any, Any, Any) -> None
|
| 147 | self.trail._expanding_alias = False
|
| 148 |
|
| 149 |
|
| 150 | class Trail(_BaseTrail):
|
| 151 | """Info left by the parser to help us complete shell syntax and commands.
|
| 152 |
|
| 153 | It's also used for history expansion.
|
| 154 | """
|
| 155 |
|
| 156 | def __init__(self):
|
| 157 | # type: () -> None
|
| 158 | """Empty constructor for mycpp."""
|
| 159 | _BaseTrail.__init__(self)
|
| 160 |
|
| 161 | def Clear(self):
|
| 162 | # type: () -> None
|
| 163 | del self.words[:]
|
| 164 | del self.redirects[:]
|
| 165 | # The other ones don't need to be reset?
|
| 166 | del self.tokens[:]
|
| 167 | del self.alias_words[:]
|
| 168 |
|
| 169 | def SetLatestWords(self, words, redirects):
|
| 170 | # type: (List[CompoundWord], List[Redir]) -> None
|
| 171 | if self._expanding_alias:
|
| 172 | self.alias_words = words # Save these separately
|
| 173 | return
|
| 174 | self.words = words
|
| 175 | self.redirects = redirects
|
| 176 |
|
| 177 | def AppendToken(self, token):
|
| 178 | # type: (Token) -> None
|
| 179 | if self._expanding_alias: # We don't want tokens inside aliases
|
| 180 | return
|
| 181 | self.tokens.append(token)
|
| 182 |
|
| 183 |
|
| 184 | if TYPE_CHECKING:
|
| 185 | AliasesInFlight = List[Tuple[str, int]]
|
| 186 |
|
| 187 |
|
| 188 | class ParseContext(object):
|
| 189 | """Context shared between the mutually recursive Command and Word parsers.
|
| 190 |
|
| 191 | In contrast, STATE is stored in the CommandParser and WordParser
|
| 192 | instances.
|
| 193 | """
|
| 194 |
|
| 195 | def __init__(self,
|
| 196 | arena,
|
| 197 | parse_opts,
|
| 198 | aliases,
|
| 199 | ysh_grammar,
|
| 200 | do_lossless=False):
|
| 201 | # type: (alloc.Arena, optview.Parse, Dict[str, str], Grammar, bool) -> None
|
| 202 | self.arena = arena
|
| 203 | self.parse_opts = parse_opts
|
| 204 | self.aliases = aliases
|
| 205 | self.ysh_grammar = ysh_grammar
|
| 206 | self.do_lossless = do_lossless
|
| 207 |
|
| 208 | # NOTE: The transformer is really a pure function.
|
| 209 | if ysh_grammar:
|
| 210 | self.tr = expr_to_ast.Transformer(ysh_grammar)
|
| 211 | else: # hack for unit tests, which pass None
|
| 212 | self.tr = None
|
| 213 |
|
| 214 | if mylib.PYTHON:
|
| 215 | if self.tr:
|
| 216 | self.p_printer = self.tr.p_printer
|
| 217 | else:
|
| 218 | self.p_printer = None
|
| 219 |
|
| 220 | # Completion state lives here since it may span multiple parsers.
|
| 221 | self.trail = _BaseTrail() # no-op by default
|
| 222 |
|
| 223 | def Init_Trail(self, trail):
|
| 224 | # type: (_BaseTrail) -> None
|
| 225 | self.trail = trail
|
| 226 |
|
| 227 | def MakeLexer(self, line_reader):
|
| 228 | # type: (_Reader) -> Lexer
|
| 229 | """Helper function.
|
| 230 |
|
| 231 | NOTE: I tried to combine the LineLexer and Lexer, and it didn't perform
|
| 232 | better.
|
| 233 | """
|
| 234 | # Take Arena from LineReader
|
| 235 | line_lexer = lexer.LineLexer(line_reader.arena)
|
| 236 | return lexer.Lexer(line_lexer, line_reader)
|
| 237 |
|
| 238 | def MakeOshParser(self, line_reader, emit_comp_dummy=False):
|
| 239 | # type: (_Reader, bool) -> CommandParser
|
| 240 | lx = self.MakeLexer(line_reader)
|
| 241 | if emit_comp_dummy:
|
| 242 | lx.EmitCompDummy() # A special token before EOF!
|
| 243 |
|
| 244 | w_parser = word_parse.WordParser(self, lx, line_reader)
|
| 245 | c_parser = cmd_parse.CommandParser(self, self.parse_opts, w_parser, lx,
|
| 246 | line_reader)
|
| 247 | return c_parser
|
| 248 |
|
| 249 | def MakeConfigParser(self, line_reader):
|
| 250 | # type: (_Reader) -> CommandParser
|
| 251 | lx = self.MakeLexer(line_reader)
|
| 252 | parse_opts = state.MakeYshParseOpts()
|
| 253 | w_parser = word_parse.WordParser(self, lx, line_reader)
|
| 254 | c_parser = cmd_parse.CommandParser(self, parse_opts, w_parser, lx,
|
| 255 | line_reader)
|
| 256 | return c_parser
|
| 257 |
|
| 258 | def MakeWordParserForHereDoc(self, line_reader):
|
| 259 | # type: (_Reader) -> WordParser
|
| 260 | lx = self.MakeLexer(line_reader)
|
| 261 | return word_parse.WordParser(self, lx, line_reader)
|
| 262 |
|
| 263 | def MakeWordParser(self, lx, line_reader):
|
| 264 | # type: (Lexer, _Reader) -> WordParser
|
| 265 | return word_parse.WordParser(self, lx, line_reader)
|
| 266 |
|
| 267 | def MakeArithParser(self, code_str, blame_loc=loc.Missing):
|
| 268 | # type: (str, loc_t) -> TdopParser
|
| 269 | """Used for a[x+1]=foo in the CommandParser, unset, printf -v"""
|
| 270 | # Save lines into temp arena, for dynamic parsing
|
| 271 | arena = alloc.Arena()
|
| 272 | arena.PushSource(source.Dynamic('sh arith expr', blame_loc))
|
| 273 | line_reader = reader.StringLineReader(code_str, arena)
|
| 274 | lx = self.MakeLexer(line_reader)
|
| 275 | w_parser = word_parse.WordParser(self, lx, line_reader)
|
| 276 | w_parser.Init(lex_mode_e.Arith) # Special initialization
|
| 277 | a_parser = tdop.TdopParser(arith_parse.Spec(), w_parser,
|
| 278 | self.parse_opts)
|
| 279 | return a_parser
|
| 280 |
|
| 281 | def MakeParserForCommandSub(self, line_reader, lexer, eof_id):
|
| 282 | # type: (_Reader, Lexer, Id_t) -> CommandParser
|
| 283 | """To parse command sub, we want a fresh word parser state."""
|
| 284 | w_parser = word_parse.WordParser(self, lexer, line_reader)
|
| 285 | c_parser = cmd_parse.CommandParser(self,
|
| 286 | self.parse_opts,
|
| 287 | w_parser,
|
| 288 | lexer,
|
| 289 | line_reader,
|
| 290 | eof_id=eof_id)
|
| 291 | return c_parser
|
| 292 |
|
| 293 | def MakeWordParserForPlugin(self, code_str):
|
| 294 | # type: (str) -> WordParser
|
| 295 | """For $PS1, $PS4, etc."""
|
| 296 | line_reader = reader.StringLineReader(code_str, self.arena)
|
| 297 | lx = self.MakeLexer(line_reader)
|
| 298 | return word_parse.WordParser(self, lx, line_reader)
|
| 299 |
|
| 300 | def _YshParser(self):
|
| 301 | # type: () -> expr_parse.ExprParser
|
| 302 | return expr_parse.ExprParser(self, self.ysh_grammar)
|
| 303 |
|
| 304 | def ParseVarDecl(self, kw_token, lexer):
|
| 305 | # type: (Token, Lexer) -> Tuple[VarDecl, Token]
|
| 306 | """ var mylist = [1, 2, 3] """
|
| 307 | e_parser = self._YshParser()
|
| 308 | with ctx_PNodeAllocator(e_parser):
|
| 309 | pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_var_decl)
|
| 310 |
|
| 311 | if 0:
|
| 312 | self.p_printer.Print(pnode)
|
| 313 |
|
| 314 | ast_node = self.tr.MakeVarDecl(pnode)
|
| 315 | ast_node.keyword = kw_token # VarDecl didn't fill this in
|
| 316 |
|
| 317 | return ast_node, last_token
|
| 318 |
|
| 319 | def ParseMutation(self, kw_token, lexer):
|
| 320 | # type: (Token, Lexer) -> Tuple[Mutation, Token]
|
| 321 | """ setvar d['a'] += 1 """
|
| 322 | e_parser = self._YshParser()
|
| 323 | with ctx_PNodeAllocator(e_parser):
|
| 324 | pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_mutation)
|
| 325 | if 0:
|
| 326 | self.p_printer.Print(pnode)
|
| 327 | ast_node = self.tr.MakeMutation(pnode)
|
| 328 | ast_node.keyword = kw_token # VarDecl didn't fill this in
|
| 329 |
|
| 330 | return ast_node, last_token
|
| 331 |
|
| 332 | def ParseProcCallArgs(self, lx, out, start_symbol):
|
| 333 | # type: (Lexer, ArgList, int) -> None
|
| 334 | """ json write (x, foo=1) and assert [42 === x] """
|
| 335 |
|
| 336 | e_parser = self._YshParser()
|
| 337 | with ctx_PNodeAllocator(e_parser):
|
| 338 | pnode, last_token = e_parser.Parse(lx, start_symbol)
|
| 339 |
|
| 340 | if 0:
|
| 341 | self.p_printer.Print(pnode)
|
| 342 |
|
| 343 | self.tr.ProcCallArgs(pnode, out)
|
| 344 | out.right = last_token
|
| 345 |
|
| 346 | def ParseYshExpr(self, lx, start_symbol):
|
| 347 | # type: (Lexer, int) -> Tuple[expr_t, Token]
|
| 348 | """if (x > 0) { ...
|
| 349 |
|
| 350 | }, while, etc.
|
| 351 | """
|
| 352 |
|
| 353 | e_parser = self._YshParser()
|
| 354 | with ctx_PNodeAllocator(e_parser):
|
| 355 | pnode, last_token = e_parser.Parse(lx, start_symbol)
|
| 356 | if 0:
|
| 357 | self.p_printer.Print(pnode)
|
| 358 |
|
| 359 | ast_node = self.tr.Expr(pnode)
|
| 360 |
|
| 361 | return ast_node, last_token
|
| 362 |
|
| 363 | def ParseYshCasePattern(self, lexer):
|
| 364 | # type: (Lexer) -> Tuple[pat_t, Token, Token]
|
| 365 | """(6) | (7), / dot* '.py' /, (else), etc.
|
| 366 |
|
| 367 | Alongside the pattern, this returns the first token in the pattern and
|
| 368 | the LBrace token at the start of the case arm body.
|
| 369 | """
|
| 370 | e_parser = self._YshParser()
|
| 371 | with ctx_PNodeAllocator(e_parser):
|
| 372 | pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_case_pat)
|
| 373 |
|
| 374 | left_tok = pnode.GetChild(0).tok
|
| 375 | pattern = self.tr.YshCasePattern(pnode)
|
| 376 |
|
| 377 | return pattern, left_tok, last_token
|
| 378 |
|
| 379 | def ParseProc(self, lexer, out):
|
| 380 | # type: (Lexer, Proc) -> Token
|
| 381 | """proc f(x, y, @args) {"""
|
| 382 | e_parser = self._YshParser()
|
| 383 | with ctx_PNodeAllocator(e_parser):
|
| 384 | pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_proc)
|
| 385 |
|
| 386 | if 0:
|
| 387 | self.p_printer.Print(pnode)
|
| 388 |
|
| 389 | out.sig = self.tr.Proc(pnode)
|
| 390 |
|
| 391 | return last_token
|
| 392 |
|
| 393 | def ParseFunc(self, lexer, out):
|
| 394 | # type: (Lexer, Func) -> Token
|
| 395 | """ func f(x Int, y Int = 0, ...args; z Int = 3, ...named) => Int """
|
| 396 | e_parser = self._YshParser()
|
| 397 | with ctx_PNodeAllocator(e_parser):
|
| 398 | pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_func)
|
| 399 |
|
| 400 | if 0:
|
| 401 | self.p_printer.Print(pnode)
|
| 402 |
|
| 403 | self.tr.YshFunc(pnode, out)
|
| 404 | return last_token
|
| 405 |
|
| 406 |
|
| 407 | # Another parser instantiation:
|
| 408 | # - For Array Literal in word_parse.py WordParser:
|
| 409 | # w_parser = WordParser(self.lexer, self.line_reader)
|