1 | """
|
2 | parse_lib.py - Consolidate various parser instantiations here.
|
3 | """
|
4 |
|
5 | from _devbuild.gen.id_kind_asdl import Id_t
|
6 | from _devbuild.gen.syntax_asdl import (Token, CompoundWord, expr_t, Redir,
|
7 | ArgList, Proc, Func, pat_t, VarDecl,
|
8 | Mutation, source, loc, loc_t)
|
9 | from _devbuild.gen.types_asdl import lex_mode_e
|
10 | from _devbuild.gen import grammar_nt
|
11 |
|
12 | from asdl import format as fmt
|
13 | from core import alloc
|
14 | from core import state
|
15 | from frontend import lexer
|
16 | from frontend import reader
|
17 | from osh import tdop
|
18 | from osh import arith_parse
|
19 | from osh import cmd_parse
|
20 | from osh import word_parse
|
21 | from mycpp import mylib
|
22 | from mycpp.mylib import log
|
23 | from ysh import expr_parse
|
24 | from ysh import expr_to_ast
|
25 | from ysh.expr_parse import ctx_PNodeAllocator
|
26 |
|
27 | _ = log
|
28 |
|
29 | from typing import Any, List, Tuple, Dict, TYPE_CHECKING
|
30 | if TYPE_CHECKING:
|
31 | from core.util import _DebugFile
|
32 | from core import optview
|
33 | from frontend.lexer import Lexer
|
34 | from frontend.reader import _Reader
|
35 | from osh.tdop import TdopParser
|
36 | from osh.word_parse import WordParser
|
37 | from osh.cmd_parse import CommandParser
|
38 | from pgen2.grammar import Grammar
|
39 |
|
40 |
|
41 | class _BaseTrail(object):
|
42 | """Base class has members, but no-ops for methods."""
|
43 |
|
44 | def __init__(self):
|
45 | # type: () -> None
|
46 | # word from a partially completed command.
|
47 | # Filled in by _ScanSimpleCommand in osh/cmd_parse.py.
|
48 | self.words = [] # type: List[CompoundWord]
|
49 | self.redirects = [] # type: List[Redir]
|
50 | # TODO: We should maintain the LST invariant and have a single list, but
|
51 | # that I ran into the "cases classes are better than variants" problem.
|
52 |
|
53 | # Non-ignored tokens, after PushHint translation. Used for variable name
|
54 | # completion. Filled in by _Peek() in osh/word_parse.py.
|
55 | #
|
56 | # Example:
|
57 | # $ echo $\
|
58 | # f<TAB>
|
59 | # This could complete $foo.
|
60 | # Problem: readline doesn't even allow that, because it spans more than one
|
61 | # line!
|
62 | self.tokens = [] # type: List[Token]
|
63 |
|
64 | self.alias_words = [
|
65 | ] # type: List[CompoundWord] # words INSIDE an alias expansion
|
66 | self._expanding_alias = False
|
67 |
|
68 | def Clear(self):
|
69 | # type: () -> None
|
70 | pass
|
71 |
|
72 | def SetLatestWords(self, words, redirects):
|
73 | # type: (List[CompoundWord], List[Redir]) -> None
|
74 | pass
|
75 |
|
76 | def AppendToken(self, token):
|
77 | # type: (Token) -> None
|
78 | pass
|
79 |
|
80 | def BeginAliasExpansion(self):
|
81 | # type: () -> None
|
82 | pass
|
83 |
|
84 | def EndAliasExpansion(self):
|
85 | # type: () -> None
|
86 | pass
|
87 |
|
88 | if mylib.PYTHON:
|
89 |
|
90 | def PrintDebugString(self, debug_f):
|
91 | # type: (_DebugFile) -> None
|
92 |
|
93 | # note: could cast DebugFile to IO[str] instead of ignoring?
|
94 | debug_f.writeln(' words:')
|
95 | for w in self.words:
|
96 | fmt.PrettyPrint(w, f=debug_f) # type: ignore
|
97 | debug_f.writeln('')
|
98 |
|
99 | debug_f.writeln(' redirects:')
|
100 | for r in self.redirects:
|
101 | fmt.PrettyPrint(r, f=debug_f) # type: ignore
|
102 | debug_f.writeln('')
|
103 |
|
104 | debug_f.writeln(' tokens:')
|
105 | for p in self.tokens:
|
106 | fmt.PrettyPrint(p, f=debug_f) # type: ignore
|
107 | debug_f.writeln('')
|
108 |
|
109 | debug_f.writeln(' alias_words:')
|
110 | for w in self.alias_words:
|
111 | fmt.PrettyPrint(w, f=debug_f) # type: ignore
|
112 | debug_f.writeln('')
|
113 |
|
114 | def __repr__(self):
|
115 | # type: () -> str
|
116 | return '<Trail %s %s %s %s>' % (self.words, self.redirects,
|
117 | self.tokens, self.alias_words)
|
118 |
|
119 |
|
120 | class ctx_Alias(object):
|
121 | """Used by CommandParser so we know to be ready for FIRST alias word.
|
122 |
|
123 | For example, for
|
124 |
|
125 | alias ll='ls -l'
|
126 |
|
127 | Then we want to capture 'ls' as the first word.
|
128 |
|
129 | We do NOT want SetLatestWords or AppendToken to be active, because we don't
|
130 | need other tokens from 'ls -l'.
|
131 |
|
132 | It would also probably cause bugs in history expansion, e.g. echo !1 should
|
133 | be the first word the user typed, not the first word after alias expansion.
|
134 | """
|
135 |
|
136 | def __init__(self, trail):
|
137 | # type: (_BaseTrail) -> None
|
138 | trail._expanding_alias = True
|
139 | self.trail = trail
|
140 |
|
141 | def __enter__(self):
|
142 | # type: () -> None
|
143 | pass
|
144 |
|
145 | def __exit__(self, type, value, traceback):
|
146 | # type: (Any, Any, Any) -> None
|
147 | self.trail._expanding_alias = False
|
148 |
|
149 |
|
150 | class Trail(_BaseTrail):
|
151 | """Info left by the parser to help us complete shell syntax and commands.
|
152 |
|
153 | It's also used for history expansion.
|
154 | """
|
155 |
|
156 | def __init__(self):
|
157 | # type: () -> None
|
158 | """Empty constructor for mycpp."""
|
159 | _BaseTrail.__init__(self)
|
160 |
|
161 | def Clear(self):
|
162 | # type: () -> None
|
163 | del self.words[:]
|
164 | del self.redirects[:]
|
165 | # The other ones don't need to be reset?
|
166 | del self.tokens[:]
|
167 | del self.alias_words[:]
|
168 |
|
169 | def SetLatestWords(self, words, redirects):
|
170 | # type: (List[CompoundWord], List[Redir]) -> None
|
171 | if self._expanding_alias:
|
172 | self.alias_words = words # Save these separately
|
173 | return
|
174 | self.words = words
|
175 | self.redirects = redirects
|
176 |
|
177 | def AppendToken(self, token):
|
178 | # type: (Token) -> None
|
179 | if self._expanding_alias: # We don't want tokens inside aliases
|
180 | return
|
181 | self.tokens.append(token)
|
182 |
|
183 |
|
184 | if TYPE_CHECKING:
|
185 | AliasesInFlight = List[Tuple[str, int]]
|
186 |
|
187 |
|
188 | class ParseContext(object):
|
189 | """Context shared between the mutually recursive Command and Word parsers.
|
190 |
|
191 | In contrast, STATE is stored in the CommandParser and WordParser
|
192 | instances.
|
193 | """
|
194 |
|
195 | def __init__(self,
|
196 | arena,
|
197 | parse_opts,
|
198 | aliases,
|
199 | ysh_grammar,
|
200 | do_lossless=False):
|
201 | # type: (alloc.Arena, optview.Parse, Dict[str, str], Grammar, bool) -> None
|
202 | self.arena = arena
|
203 | self.parse_opts = parse_opts
|
204 | self.aliases = aliases
|
205 | self.ysh_grammar = ysh_grammar
|
206 | self.do_lossless = do_lossless
|
207 |
|
208 | # NOTE: The transformer is really a pure function.
|
209 | if ysh_grammar:
|
210 | self.tr = expr_to_ast.Transformer(ysh_grammar)
|
211 | else: # hack for unit tests, which pass None
|
212 | self.tr = None
|
213 |
|
214 | if mylib.PYTHON:
|
215 | if self.tr:
|
216 | self.p_printer = self.tr.p_printer
|
217 | else:
|
218 | self.p_printer = None
|
219 |
|
220 | # Completion state lives here since it may span multiple parsers.
|
221 | self.trail = _BaseTrail() # no-op by default
|
222 |
|
223 | def Init_Trail(self, trail):
|
224 | # type: (_BaseTrail) -> None
|
225 | self.trail = trail
|
226 |
|
227 | def MakeLexer(self, line_reader):
|
228 | # type: (_Reader) -> Lexer
|
229 | """Helper function.
|
230 |
|
231 | NOTE: I tried to combine the LineLexer and Lexer, and it didn't perform
|
232 | better.
|
233 | """
|
234 | # Take Arena from LineReader
|
235 | line_lexer = lexer.LineLexer(line_reader.arena)
|
236 | return lexer.Lexer(line_lexer, line_reader)
|
237 |
|
238 | def MakeOshParser(self, line_reader, emit_comp_dummy=False):
|
239 | # type: (_Reader, bool) -> CommandParser
|
240 | lx = self.MakeLexer(line_reader)
|
241 | if emit_comp_dummy:
|
242 | lx.EmitCompDummy() # A special token before EOF!
|
243 |
|
244 | w_parser = word_parse.WordParser(self, lx, line_reader)
|
245 | c_parser = cmd_parse.CommandParser(self, self.parse_opts, w_parser, lx,
|
246 | line_reader)
|
247 | return c_parser
|
248 |
|
249 | def MakeConfigParser(self, line_reader):
|
250 | # type: (_Reader) -> CommandParser
|
251 | lx = self.MakeLexer(line_reader)
|
252 | parse_opts = state.MakeYshParseOpts()
|
253 | w_parser = word_parse.WordParser(self, lx, line_reader)
|
254 | c_parser = cmd_parse.CommandParser(self, parse_opts, w_parser, lx,
|
255 | line_reader)
|
256 | return c_parser
|
257 |
|
258 | def MakeWordParserForHereDoc(self, line_reader):
|
259 | # type: (_Reader) -> WordParser
|
260 | lx = self.MakeLexer(line_reader)
|
261 | return word_parse.WordParser(self, lx, line_reader)
|
262 |
|
263 | def MakeWordParser(self, lx, line_reader):
|
264 | # type: (Lexer, _Reader) -> WordParser
|
265 | return word_parse.WordParser(self, lx, line_reader)
|
266 |
|
267 | def MakeArithParser(self, code_str, blame_loc=loc.Missing):
|
268 | # type: (str, loc_t) -> TdopParser
|
269 | """Used for a[x+1]=foo in the CommandParser, unset, printf -v"""
|
270 | # Save lines into temp arena, for dynamic parsing
|
271 | arena = alloc.Arena()
|
272 | arena.PushSource(source.Dynamic('sh arith expr', blame_loc))
|
273 | line_reader = reader.StringLineReader(code_str, arena)
|
274 | lx = self.MakeLexer(line_reader)
|
275 | w_parser = word_parse.WordParser(self, lx, line_reader)
|
276 | w_parser.Init(lex_mode_e.Arith) # Special initialization
|
277 | a_parser = tdop.TdopParser(arith_parse.Spec(), w_parser,
|
278 | self.parse_opts)
|
279 | return a_parser
|
280 |
|
281 | def MakeParserForCommandSub(self, line_reader, lexer, eof_id):
|
282 | # type: (_Reader, Lexer, Id_t) -> CommandParser
|
283 | """To parse command sub, we want a fresh word parser state."""
|
284 | w_parser = word_parse.WordParser(self, lexer, line_reader)
|
285 | c_parser = cmd_parse.CommandParser(self,
|
286 | self.parse_opts,
|
287 | w_parser,
|
288 | lexer,
|
289 | line_reader,
|
290 | eof_id=eof_id)
|
291 | return c_parser
|
292 |
|
293 | def MakeWordParserForPlugin(self, code_str):
|
294 | # type: (str) -> WordParser
|
295 | """For $PS1, $PS4, etc."""
|
296 | line_reader = reader.StringLineReader(code_str, self.arena)
|
297 | lx = self.MakeLexer(line_reader)
|
298 | return word_parse.WordParser(self, lx, line_reader)
|
299 |
|
300 | def _YshParser(self):
|
301 | # type: () -> expr_parse.ExprParser
|
302 | return expr_parse.ExprParser(self, self.ysh_grammar)
|
303 |
|
304 | def ParseVarDecl(self, kw_token, lexer):
|
305 | # type: (Token, Lexer) -> Tuple[VarDecl, Token]
|
306 | """ var mylist = [1, 2, 3] """
|
307 | e_parser = self._YshParser()
|
308 | with ctx_PNodeAllocator(e_parser):
|
309 | pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_var_decl)
|
310 |
|
311 | if 0:
|
312 | self.p_printer.Print(pnode)
|
313 |
|
314 | ast_node = self.tr.MakeVarDecl(pnode)
|
315 | ast_node.keyword = kw_token # VarDecl didn't fill this in
|
316 |
|
317 | return ast_node, last_token
|
318 |
|
319 | def ParseMutation(self, kw_token, lexer):
|
320 | # type: (Token, Lexer) -> Tuple[Mutation, Token]
|
321 | """ setvar d['a'] += 1 """
|
322 | e_parser = self._YshParser()
|
323 | with ctx_PNodeAllocator(e_parser):
|
324 | pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_mutation)
|
325 | if 0:
|
326 | self.p_printer.Print(pnode)
|
327 | ast_node = self.tr.MakeMutation(pnode)
|
328 | ast_node.keyword = kw_token # VarDecl didn't fill this in
|
329 |
|
330 | return ast_node, last_token
|
331 |
|
332 | def ParseProcCallArgs(self, lx, out, start_symbol):
|
333 | # type: (Lexer, ArgList, int) -> None
|
334 | """ json write (x, foo=1) and assert [42 === x] """
|
335 |
|
336 | e_parser = self._YshParser()
|
337 | with ctx_PNodeAllocator(e_parser):
|
338 | pnode, last_token = e_parser.Parse(lx, start_symbol)
|
339 |
|
340 | if 0:
|
341 | self.p_printer.Print(pnode)
|
342 |
|
343 | self.tr.ProcCallArgs(pnode, out)
|
344 | out.right = last_token
|
345 |
|
346 | def ParseYshExpr(self, lx, start_symbol):
|
347 | # type: (Lexer, int) -> Tuple[expr_t, Token]
|
348 | """if (x > 0) { ...
|
349 |
|
350 | }, while, etc.
|
351 | """
|
352 |
|
353 | e_parser = self._YshParser()
|
354 | with ctx_PNodeAllocator(e_parser):
|
355 | pnode, last_token = e_parser.Parse(lx, start_symbol)
|
356 | if 0:
|
357 | self.p_printer.Print(pnode)
|
358 |
|
359 | ast_node = self.tr.Expr(pnode)
|
360 |
|
361 | return ast_node, last_token
|
362 |
|
363 | def ParseYshCasePattern(self, lexer):
|
364 | # type: (Lexer) -> Tuple[pat_t, Token, Token]
|
365 | """(6) | (7), / dot* '.py' /, (else), etc.
|
366 |
|
367 | Alongside the pattern, this returns the first token in the pattern and
|
368 | the LBrace token at the start of the case arm body.
|
369 | """
|
370 | e_parser = self._YshParser()
|
371 | with ctx_PNodeAllocator(e_parser):
|
372 | pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_case_pat)
|
373 |
|
374 | left_tok = pnode.GetChild(0).tok
|
375 | pattern = self.tr.YshCasePattern(pnode)
|
376 |
|
377 | return pattern, left_tok, last_token
|
378 |
|
379 | def ParseProc(self, lexer, out):
|
380 | # type: (Lexer, Proc) -> Token
|
381 | """proc f(x, y, @args) {"""
|
382 | e_parser = self._YshParser()
|
383 | with ctx_PNodeAllocator(e_parser):
|
384 | pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_proc)
|
385 |
|
386 | if 0:
|
387 | self.p_printer.Print(pnode)
|
388 |
|
389 | out.sig = self.tr.Proc(pnode)
|
390 |
|
391 | return last_token
|
392 |
|
393 | def ParseFunc(self, lexer, out):
|
394 | # type: (Lexer, Func) -> Token
|
395 | """ func f(x Int, y Int = 0, ...args; z Int = 3, ...named) => Int """
|
396 | e_parser = self._YshParser()
|
397 | with ctx_PNodeAllocator(e_parser):
|
398 | pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_func)
|
399 |
|
400 | if 0:
|
401 | self.p_printer.Print(pnode)
|
402 |
|
403 | self.tr.YshFunc(pnode, out)
|
404 | return last_token
|
405 |
|
406 |
|
407 | # Another parser instantiation:
|
408 | # - For Array Literal in word_parse.py WordParser:
|
409 | # w_parser = WordParser(self.lexer, self.line_reader)
|