frontend/parse

OILS / frontend / parse_lib.py View on Github | oils.pub

409 lines, 227 significant

1	"""
2	parse_lib.py - Consolidate various parser instantiations here.
3	"""
4
5	from _devbuild.gen.id_kind_asdl import Id_t
6	from _devbuild.gen.syntax_asdl import (Token, CompoundWord, expr_t, Redir,
7	ArgList, Proc, Func, pat_t, VarDecl,
8	Mutation, source, loc, loc_t)
9	from _devbuild.gen.types_asdl import lex_mode_e
10	from _devbuild.gen import grammar_nt
11
12	from asdl import format as fmt
13	from core import alloc
14	from core import state
15	from frontend import lexer
16	from frontend import reader
17	from osh import tdop
18	from osh import arith_parse
19	from osh import cmd_parse
20	from osh import word_parse
21	from mycpp import mylib
22	from mycpp.mylib import log
23	from ysh import expr_parse
24	from ysh import expr_to_ast
25	from ysh.expr_parse import ctx_PNodeAllocator
26
27	_ = log
28
29	from typing import Any, List, Tuple, Dict, TYPE_CHECKING
30	if TYPE_CHECKING:
31	from core.util import _DebugFile
32	from core import optview
33	from frontend.lexer import Lexer
34	from frontend.reader import _Reader
35	from osh.tdop import TdopParser
36	from osh.word_parse import WordParser
37	from osh.cmd_parse import CommandParser
38	from pgen2.grammar import Grammar
39
40
41	class _BaseTrail(object):
42	"""Base class has members, but no-ops for methods."""
43
44	def __init__(self):
45	# type: () -> None
46	# word from a partially completed command.
47	# Filled in by _ScanSimpleCommand in osh/cmd_parse.py.
48	self.words = [] # type: List[CompoundWord]
49	self.redirects = [] # type: List[Redir]
50	# TODO: We should maintain the LST invariant and have a single list, but
51	# that I ran into the "cases classes are better than variants" problem.
52
53	# Non-ignored tokens, after PushHint translation. Used for variable name
54	# completion. Filled in by _Peek() in osh/word_parse.py.
55	#
56	# Example:
57	# $ echo $\
58	# f<TAB>
59	# This could complete $foo.
60	# Problem: readline doesn't even allow that, because it spans more than one
61	# line!
62	self.tokens = [] # type: List[Token]
63
64	self.alias_words = [
65	] # type: List[CompoundWord] # words INSIDE an alias expansion
66	self._expanding_alias = False
67
68	def Clear(self):
69	# type: () -> None
70	pass
71
72	def SetLatestWords(self, words, redirects):
73	# type: (List[CompoundWord], List[Redir]) -> None
74	pass
75
76	def AppendToken(self, token):
77	# type: (Token) -> None
78	pass
79
80	def BeginAliasExpansion(self):
81	# type: () -> None
82	pass
83
84	def EndAliasExpansion(self):
85	# type: () -> None
86	pass
87
88	if mylib.PYTHON:
89
90	def PrintDebugString(self, debug_f):
91	# type: (_DebugFile) -> None
92
93	# note: could cast DebugFile to IO[str] instead of ignoring?
94	debug_f.writeln(' words:')
95	for w in self.words:
96	fmt.PrettyPrint(w, f=debug_f) # type: ignore
97	debug_f.writeln('')
98
99	debug_f.writeln(' redirects:')
100	for r in self.redirects:
101	fmt.PrettyPrint(r, f=debug_f) # type: ignore
102	debug_f.writeln('')
103
104	debug_f.writeln(' tokens:')
105	for p in self.tokens:
106	fmt.PrettyPrint(p, f=debug_f) # type: ignore
107	debug_f.writeln('')
108
109	debug_f.writeln(' alias_words:')
110	for w in self.alias_words:
111	fmt.PrettyPrint(w, f=debug_f) # type: ignore
112	debug_f.writeln('')
113
114	def __repr__(self):
115	# type: () -> str
116	return '<Trail %s %s %s %s>' % (self.words, self.redirects,
117	self.tokens, self.alias_words)
118
119
120	class ctx_Alias(object):
121	"""Used by CommandParser so we know to be ready for FIRST alias word.
122
123	For example, for
124
125	alias ll='ls -l'
126
127	Then we want to capture 'ls' as the first word.
128
129	We do NOT want SetLatestWords or AppendToken to be active, because we don't
130	need other tokens from 'ls -l'.
131
132	It would also probably cause bugs in history expansion, e.g. echo !1 should
133	be the first word the user typed, not the first word after alias expansion.
134	"""
135
136	def __init__(self, trail):
137	# type: (_BaseTrail) -> None
138	trail._expanding_alias = True
139	self.trail = trail
140
141	def __enter__(self):
142	# type: () -> None
143	pass
144
145	def __exit__(self, type, value, traceback):
146	# type: (Any, Any, Any) -> None
147	self.trail._expanding_alias = False
148
149
150	class Trail(_BaseTrail):
151	"""Info left by the parser to help us complete shell syntax and commands.
152
153	It's also used for history expansion.
154	"""
155
156	def __init__(self):
157	# type: () -> None
158	"""Empty constructor for mycpp."""
159	_BaseTrail.__init__(self)
160
161	def Clear(self):
162	# type: () -> None
163	del self.words[:]
164	del self.redirects[:]
165	# The other ones don't need to be reset?
166	del self.tokens[:]
167	del self.alias_words[:]
168
169	def SetLatestWords(self, words, redirects):
170	# type: (List[CompoundWord], List[Redir]) -> None
171	if self._expanding_alias:
172	self.alias_words = words # Save these separately
173	return
174	self.words = words
175	self.redirects = redirects
176
177	def AppendToken(self, token):
178	# type: (Token) -> None
179	if self._expanding_alias: # We don't want tokens inside aliases
180	return
181	self.tokens.append(token)
182
183
184	if TYPE_CHECKING:
185	AliasesInFlight = List[Tuple[str, int]]
186
187
188	class ParseContext(object):
189	"""Context shared between the mutually recursive Command and Word parsers.
190
191	In contrast, STATE is stored in the CommandParser and WordParser
192	instances.
193	"""
194
195	def __init__(self,
196	arena,
197	parse_opts,
198	aliases,
199	ysh_grammar,
200	do_lossless=False):
201	# type: (alloc.Arena, optview.Parse, Dict[str, str], Grammar, bool) -> None
202	self.arena = arena
203	self.parse_opts = parse_opts
204	self.aliases = aliases
205	self.ysh_grammar = ysh_grammar
206	self.do_lossless = do_lossless
207
208	# NOTE: The transformer is really a pure function.
209	if ysh_grammar:
210	self.tr = expr_to_ast.Transformer(ysh_grammar)
211	else: # hack for unit tests, which pass None
212	self.tr = None
213
214	if mylib.PYTHON:
215	if self.tr:
216	self.p_printer = self.tr.p_printer
217	else:
218	self.p_printer = None
219
220	# Completion state lives here since it may span multiple parsers.
221	self.trail = _BaseTrail() # no-op by default
222
223	def Init_Trail(self, trail):
224	# type: (_BaseTrail) -> None
225	self.trail = trail
226
227	def MakeLexer(self, line_reader):
228	# type: (_Reader) -> Lexer
229	"""Helper function.
230
231	NOTE: I tried to combine the LineLexer and Lexer, and it didn't perform
232	better.
233	"""
234	# Take Arena from LineReader
235	line_lexer = lexer.LineLexer(line_reader.arena)
236	return lexer.Lexer(line_lexer, line_reader)
237
238	def MakeOshParser(self, line_reader, emit_comp_dummy=False):
239	# type: (_Reader, bool) -> CommandParser
240	lx = self.MakeLexer(line_reader)
241	if emit_comp_dummy:
242	lx.EmitCompDummy() # A special token before EOF!
243
244	w_parser = word_parse.WordParser(self, lx, line_reader)
245	c_parser = cmd_parse.CommandParser(self, self.parse_opts, w_parser, lx,
246	line_reader)
247	return c_parser
248
249	def MakeConfigParser(self, line_reader):
250	# type: (_Reader) -> CommandParser
251	lx = self.MakeLexer(line_reader)
252	parse_opts = state.MakeYshParseOpts()
253	w_parser = word_parse.WordParser(self, lx, line_reader)
254	c_parser = cmd_parse.CommandParser(self, parse_opts, w_parser, lx,
255	line_reader)
256	return c_parser
257
258	def MakeWordParserForHereDoc(self, line_reader):
259	# type: (_Reader) -> WordParser
260	lx = self.MakeLexer(line_reader)
261	return word_parse.WordParser(self, lx, line_reader)
262
263	def MakeWordParser(self, lx, line_reader):
264	# type: (Lexer, _Reader) -> WordParser
265	return word_parse.WordParser(self, lx, line_reader)
266
267	def MakeArithParser(self, code_str, blame_loc=loc.Missing):
268	# type: (str, loc_t) -> TdopParser
269	"""Used for a[x+1]=foo in the CommandParser, unset, printf -v"""
270	# Save lines into temp arena, for dynamic parsing
271	arena = alloc.Arena()
272	arena.PushSource(source.Dynamic('sh arith expr', blame_loc))
273	line_reader = reader.StringLineReader(code_str, arena)
274	lx = self.MakeLexer(line_reader)
275	w_parser = word_parse.WordParser(self, lx, line_reader)
276	w_parser.Init(lex_mode_e.Arith) # Special initialization
277	a_parser = tdop.TdopParser(arith_parse.Spec(), w_parser,
278	self.parse_opts)
279	return a_parser
280
281	def MakeParserForCommandSub(self, line_reader, lexer, eof_id):
282	# type: (_Reader, Lexer, Id_t) -> CommandParser
283	"""To parse command sub, we want a fresh word parser state."""
284	w_parser = word_parse.WordParser(self, lexer, line_reader)
285	c_parser = cmd_parse.CommandParser(self,
286	self.parse_opts,
287	w_parser,
288	lexer,
289	line_reader,
290	eof_id=eof_id)
291	return c_parser
292
293	def MakeWordParserForPlugin(self, code_str):
294	# type: (str) -> WordParser
295	"""For $PS1, $PS4, etc."""
296	line_reader = reader.StringLineReader(code_str, self.arena)
297	lx = self.MakeLexer(line_reader)
298	return word_parse.WordParser(self, lx, line_reader)
299
300	def _YshParser(self):
301	# type: () -> expr_parse.ExprParser
302	return expr_parse.ExprParser(self, self.ysh_grammar)
303
304	def ParseVarDecl(self, kw_token, lexer):
305	# type: (Token, Lexer) -> Tuple[VarDecl, Token]
306	""" var mylist = [1, 2, 3] """
307	e_parser = self._YshParser()
308	with ctx_PNodeAllocator(e_parser):
309	pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_var_decl)
310
311	if 0:
312	self.p_printer.Print(pnode)
313
314	ast_node = self.tr.MakeVarDecl(pnode)
315	ast_node.keyword = kw_token # VarDecl didn't fill this in
316
317	return ast_node, last_token
318
319	def ParseMutation(self, kw_token, lexer):
320	# type: (Token, Lexer) -> Tuple[Mutation, Token]
321	""" setvar d['a'] += 1 """
322	e_parser = self._YshParser()
323	with ctx_PNodeAllocator(e_parser):
324	pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_mutation)
325	if 0:
326	self.p_printer.Print(pnode)
327	ast_node = self.tr.MakeMutation(pnode)
328	ast_node.keyword = kw_token # VarDecl didn't fill this in
329
330	return ast_node, last_token
331
332	def ParseProcCallArgs(self, lx, out, start_symbol):
333	# type: (Lexer, ArgList, int) -> None
334	""" json write (x, foo=1) and assert [42 === x] """
335
336	e_parser = self._YshParser()
337	with ctx_PNodeAllocator(e_parser):
338	pnode, last_token = e_parser.Parse(lx, start_symbol)
339
340	if 0:
341	self.p_printer.Print(pnode)
342
343	self.tr.ProcCallArgs(pnode, out)
344	out.right = last_token
345
346	def ParseYshExpr(self, lx, start_symbol):
347	# type: (Lexer, int) -> Tuple[expr_t, Token]
348	"""if (x > 0) { ...
349
350	}, while, etc.
351	"""
352
353	e_parser = self._YshParser()
354	with ctx_PNodeAllocator(e_parser):
355	pnode, last_token = e_parser.Parse(lx, start_symbol)
356	if 0:
357	self.p_printer.Print(pnode)
358
359	ast_node = self.tr.Expr(pnode)
360
361	return ast_node, last_token
362
363	def ParseYshCasePattern(self, lexer):
364	# type: (Lexer) -> Tuple[pat_t, Token, Token]
365	"""(6) \| (7), / dot* '.py' /, (else), etc.
366
367	Alongside the pattern, this returns the first token in the pattern and
368	the LBrace token at the start of the case arm body.
369	"""
370	e_parser = self._YshParser()
371	with ctx_PNodeAllocator(e_parser):
372	pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_case_pat)
373
374	left_tok = pnode.GetChild(0).tok
375	pattern = self.tr.YshCasePattern(pnode)
376
377	return pattern, left_tok, last_token
378
379	def ParseProc(self, lexer, out):
380	# type: (Lexer, Proc) -> Token
381	"""proc f(x, y, @args) {"""
382	e_parser = self._YshParser()
383	with ctx_PNodeAllocator(e_parser):
384	pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_proc)
385
386	if 0:
387	self.p_printer.Print(pnode)
388
389	out.sig = self.tr.Proc(pnode)
390
391	return last_token
392
393	def ParseFunc(self, lexer, out):
394	# type: (Lexer, Func) -> Token
395	""" func f(x Int, y Int = 0, ...args; z Int = 3, ...named) => Int """
396	e_parser = self._YshParser()
397	with ctx_PNodeAllocator(e_parser):
398	pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_func)
399
400	if 0:
401	self.p_printer.Print(pnode)
402
403	self.tr.YshFunc(pnode, out)
404	return last_token
405
406
407	# Another parser instantiation:
408	# - For Array Literal in word_parse.py WordParser:
409	# w_parser = WordParser(self.lexer, self.line_reader)