OILS / frontend / parse_lib.py View on Github | oils.pub

409 lines, 227 significant
1"""
2parse_lib.py - Consolidate various parser instantiations here.
3"""
4
5from _devbuild.gen.id_kind_asdl import Id_t
6from _devbuild.gen.syntax_asdl import (Token, CompoundWord, expr_t, Redir,
7 ArgList, Proc, Func, pat_t, VarDecl,
8 Mutation, source, loc, loc_t)
9from _devbuild.gen.types_asdl import lex_mode_e
10from _devbuild.gen import grammar_nt
11
12from asdl import format as fmt
13from core import alloc
14from core import state
15from frontend import lexer
16from frontend import reader
17from osh import tdop
18from osh import arith_parse
19from osh import cmd_parse
20from osh import word_parse
21from mycpp import mylib
22from mycpp.mylib import log
23from ysh import expr_parse
24from ysh import expr_to_ast
25from ysh.expr_parse import ctx_PNodeAllocator
26
27_ = log
28
29from typing import Any, List, Tuple, Dict, TYPE_CHECKING
30if TYPE_CHECKING:
31 from core.util import _DebugFile
32 from core import optview
33 from frontend.lexer import Lexer
34 from frontend.reader import _Reader
35 from osh.tdop import TdopParser
36 from osh.word_parse import WordParser
37 from osh.cmd_parse import CommandParser
38 from pgen2.grammar import Grammar
39
40
41class _BaseTrail(object):
42 """Base class has members, but no-ops for methods."""
43
44 def __init__(self):
45 # type: () -> None
46 # word from a partially completed command.
47 # Filled in by _ScanSimpleCommand in osh/cmd_parse.py.
48 self.words = [] # type: List[CompoundWord]
49 self.redirects = [] # type: List[Redir]
50 # TODO: We should maintain the LST invariant and have a single list, but
51 # that I ran into the "cases classes are better than variants" problem.
52
53 # Non-ignored tokens, after PushHint translation. Used for variable name
54 # completion. Filled in by _Peek() in osh/word_parse.py.
55 #
56 # Example:
57 # $ echo $\
58 # f<TAB>
59 # This could complete $foo.
60 # Problem: readline doesn't even allow that, because it spans more than one
61 # line!
62 self.tokens = [] # type: List[Token]
63
64 self.alias_words = [
65 ] # type: List[CompoundWord] # words INSIDE an alias expansion
66 self._expanding_alias = False
67
68 def Clear(self):
69 # type: () -> None
70 pass
71
72 def SetLatestWords(self, words, redirects):
73 # type: (List[CompoundWord], List[Redir]) -> None
74 pass
75
76 def AppendToken(self, token):
77 # type: (Token) -> None
78 pass
79
80 def BeginAliasExpansion(self):
81 # type: () -> None
82 pass
83
84 def EndAliasExpansion(self):
85 # type: () -> None
86 pass
87
88 if mylib.PYTHON:
89
90 def PrintDebugString(self, debug_f):
91 # type: (_DebugFile) -> None
92
93 # note: could cast DebugFile to IO[str] instead of ignoring?
94 debug_f.writeln(' words:')
95 for w in self.words:
96 fmt.PrettyPrint(w, f=debug_f) # type: ignore
97 debug_f.writeln('')
98
99 debug_f.writeln(' redirects:')
100 for r in self.redirects:
101 fmt.PrettyPrint(r, f=debug_f) # type: ignore
102 debug_f.writeln('')
103
104 debug_f.writeln(' tokens:')
105 for p in self.tokens:
106 fmt.PrettyPrint(p, f=debug_f) # type: ignore
107 debug_f.writeln('')
108
109 debug_f.writeln(' alias_words:')
110 for w in self.alias_words:
111 fmt.PrettyPrint(w, f=debug_f) # type: ignore
112 debug_f.writeln('')
113
114 def __repr__(self):
115 # type: () -> str
116 return '<Trail %s %s %s %s>' % (self.words, self.redirects,
117 self.tokens, self.alias_words)
118
119
120class ctx_Alias(object):
121 """Used by CommandParser so we know to be ready for FIRST alias word.
122
123 For example, for
124
125 alias ll='ls -l'
126
127 Then we want to capture 'ls' as the first word.
128
129 We do NOT want SetLatestWords or AppendToken to be active, because we don't
130 need other tokens from 'ls -l'.
131
132 It would also probably cause bugs in history expansion, e.g. echo !1 should
133 be the first word the user typed, not the first word after alias expansion.
134 """
135
136 def __init__(self, trail):
137 # type: (_BaseTrail) -> None
138 trail._expanding_alias = True
139 self.trail = trail
140
141 def __enter__(self):
142 # type: () -> None
143 pass
144
145 def __exit__(self, type, value, traceback):
146 # type: (Any, Any, Any) -> None
147 self.trail._expanding_alias = False
148
149
150class Trail(_BaseTrail):
151 """Info left by the parser to help us complete shell syntax and commands.
152
153 It's also used for history expansion.
154 """
155
156 def __init__(self):
157 # type: () -> None
158 """Empty constructor for mycpp."""
159 _BaseTrail.__init__(self)
160
161 def Clear(self):
162 # type: () -> None
163 del self.words[:]
164 del self.redirects[:]
165 # The other ones don't need to be reset?
166 del self.tokens[:]
167 del self.alias_words[:]
168
169 def SetLatestWords(self, words, redirects):
170 # type: (List[CompoundWord], List[Redir]) -> None
171 if self._expanding_alias:
172 self.alias_words = words # Save these separately
173 return
174 self.words = words
175 self.redirects = redirects
176
177 def AppendToken(self, token):
178 # type: (Token) -> None
179 if self._expanding_alias: # We don't want tokens inside aliases
180 return
181 self.tokens.append(token)
182
183
184if TYPE_CHECKING:
185 AliasesInFlight = List[Tuple[str, int]]
186
187
188class ParseContext(object):
189 """Context shared between the mutually recursive Command and Word parsers.
190
191 In contrast, STATE is stored in the CommandParser and WordParser
192 instances.
193 """
194
195 def __init__(self,
196 arena,
197 parse_opts,
198 aliases,
199 ysh_grammar,
200 do_lossless=False):
201 # type: (alloc.Arena, optview.Parse, Dict[str, str], Grammar, bool) -> None
202 self.arena = arena
203 self.parse_opts = parse_opts
204 self.aliases = aliases
205 self.ysh_grammar = ysh_grammar
206 self.do_lossless = do_lossless
207
208 # NOTE: The transformer is really a pure function.
209 if ysh_grammar:
210 self.tr = expr_to_ast.Transformer(ysh_grammar)
211 else: # hack for unit tests, which pass None
212 self.tr = None
213
214 if mylib.PYTHON:
215 if self.tr:
216 self.p_printer = self.tr.p_printer
217 else:
218 self.p_printer = None
219
220 # Completion state lives here since it may span multiple parsers.
221 self.trail = _BaseTrail() # no-op by default
222
223 def Init_Trail(self, trail):
224 # type: (_BaseTrail) -> None
225 self.trail = trail
226
227 def MakeLexer(self, line_reader):
228 # type: (_Reader) -> Lexer
229 """Helper function.
230
231 NOTE: I tried to combine the LineLexer and Lexer, and it didn't perform
232 better.
233 """
234 # Take Arena from LineReader
235 line_lexer = lexer.LineLexer(line_reader.arena)
236 return lexer.Lexer(line_lexer, line_reader)
237
238 def MakeOshParser(self, line_reader, emit_comp_dummy=False):
239 # type: (_Reader, bool) -> CommandParser
240 lx = self.MakeLexer(line_reader)
241 if emit_comp_dummy:
242 lx.EmitCompDummy() # A special token before EOF!
243
244 w_parser = word_parse.WordParser(self, lx, line_reader)
245 c_parser = cmd_parse.CommandParser(self, self.parse_opts, w_parser, lx,
246 line_reader)
247 return c_parser
248
249 def MakeConfigParser(self, line_reader):
250 # type: (_Reader) -> CommandParser
251 lx = self.MakeLexer(line_reader)
252 parse_opts = state.MakeYshParseOpts()
253 w_parser = word_parse.WordParser(self, lx, line_reader)
254 c_parser = cmd_parse.CommandParser(self, parse_opts, w_parser, lx,
255 line_reader)
256 return c_parser
257
258 def MakeWordParserForHereDoc(self, line_reader):
259 # type: (_Reader) -> WordParser
260 lx = self.MakeLexer(line_reader)
261 return word_parse.WordParser(self, lx, line_reader)
262
263 def MakeWordParser(self, lx, line_reader):
264 # type: (Lexer, _Reader) -> WordParser
265 return word_parse.WordParser(self, lx, line_reader)
266
267 def MakeArithParser(self, code_str, blame_loc=loc.Missing):
268 # type: (str, loc_t) -> TdopParser
269 """Used for a[x+1]=foo in the CommandParser, unset, printf -v"""
270 # Save lines into temp arena, for dynamic parsing
271 arena = alloc.Arena()
272 arena.PushSource(source.Dynamic('sh arith expr', blame_loc))
273 line_reader = reader.StringLineReader(code_str, arena)
274 lx = self.MakeLexer(line_reader)
275 w_parser = word_parse.WordParser(self, lx, line_reader)
276 w_parser.Init(lex_mode_e.Arith) # Special initialization
277 a_parser = tdop.TdopParser(arith_parse.Spec(), w_parser,
278 self.parse_opts)
279 return a_parser
280
281 def MakeParserForCommandSub(self, line_reader, lexer, eof_id):
282 # type: (_Reader, Lexer, Id_t) -> CommandParser
283 """To parse command sub, we want a fresh word parser state."""
284 w_parser = word_parse.WordParser(self, lexer, line_reader)
285 c_parser = cmd_parse.CommandParser(self,
286 self.parse_opts,
287 w_parser,
288 lexer,
289 line_reader,
290 eof_id=eof_id)
291 return c_parser
292
293 def MakeWordParserForPlugin(self, code_str):
294 # type: (str) -> WordParser
295 """For $PS1, $PS4, etc."""
296 line_reader = reader.StringLineReader(code_str, self.arena)
297 lx = self.MakeLexer(line_reader)
298 return word_parse.WordParser(self, lx, line_reader)
299
300 def _YshParser(self):
301 # type: () -> expr_parse.ExprParser
302 return expr_parse.ExprParser(self, self.ysh_grammar)
303
304 def ParseVarDecl(self, kw_token, lexer):
305 # type: (Token, Lexer) -> Tuple[VarDecl, Token]
306 """ var mylist = [1, 2, 3] """
307 e_parser = self._YshParser()
308 with ctx_PNodeAllocator(e_parser):
309 pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_var_decl)
310
311 if 0:
312 self.p_printer.Print(pnode)
313
314 ast_node = self.tr.MakeVarDecl(pnode)
315 ast_node.keyword = kw_token # VarDecl didn't fill this in
316
317 return ast_node, last_token
318
319 def ParseMutation(self, kw_token, lexer):
320 # type: (Token, Lexer) -> Tuple[Mutation, Token]
321 """ setvar d['a'] += 1 """
322 e_parser = self._YshParser()
323 with ctx_PNodeAllocator(e_parser):
324 pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_mutation)
325 if 0:
326 self.p_printer.Print(pnode)
327 ast_node = self.tr.MakeMutation(pnode)
328 ast_node.keyword = kw_token # VarDecl didn't fill this in
329
330 return ast_node, last_token
331
332 def ParseProcCallArgs(self, lx, out, start_symbol):
333 # type: (Lexer, ArgList, int) -> None
334 """ json write (x, foo=1) and assert [42 === x] """
335
336 e_parser = self._YshParser()
337 with ctx_PNodeAllocator(e_parser):
338 pnode, last_token = e_parser.Parse(lx, start_symbol)
339
340 if 0:
341 self.p_printer.Print(pnode)
342
343 self.tr.ProcCallArgs(pnode, out)
344 out.right = last_token
345
346 def ParseYshExpr(self, lx, start_symbol):
347 # type: (Lexer, int) -> Tuple[expr_t, Token]
348 """if (x > 0) { ...
349
350 }, while, etc.
351 """
352
353 e_parser = self._YshParser()
354 with ctx_PNodeAllocator(e_parser):
355 pnode, last_token = e_parser.Parse(lx, start_symbol)
356 if 0:
357 self.p_printer.Print(pnode)
358
359 ast_node = self.tr.Expr(pnode)
360
361 return ast_node, last_token
362
363 def ParseYshCasePattern(self, lexer):
364 # type: (Lexer) -> Tuple[pat_t, Token, Token]
365 """(6) | (7), / dot* '.py' /, (else), etc.
366
367 Alongside the pattern, this returns the first token in the pattern and
368 the LBrace token at the start of the case arm body.
369 """
370 e_parser = self._YshParser()
371 with ctx_PNodeAllocator(e_parser):
372 pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_case_pat)
373
374 left_tok = pnode.GetChild(0).tok
375 pattern = self.tr.YshCasePattern(pnode)
376
377 return pattern, left_tok, last_token
378
379 def ParseProc(self, lexer, out):
380 # type: (Lexer, Proc) -> Token
381 """proc f(x, y, @args) {"""
382 e_parser = self._YshParser()
383 with ctx_PNodeAllocator(e_parser):
384 pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_proc)
385
386 if 0:
387 self.p_printer.Print(pnode)
388
389 out.sig = self.tr.Proc(pnode)
390
391 return last_token
392
393 def ParseFunc(self, lexer, out):
394 # type: (Lexer, Func) -> Token
395 """ func f(x Int, y Int = 0, ...args; z Int = 3, ...named) => Int """
396 e_parser = self._YshParser()
397 with ctx_PNodeAllocator(e_parser):
398 pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_func)
399
400 if 0:
401 self.p_printer.Print(pnode)
402
403 self.tr.YshFunc(pnode, out)
404 return last_token
405
406
407# Another parser instantiation:
408# - For Array Literal in word_parse.py WordParser:
409# w_parser = WordParser(self.lexer, self.line_reader)