frontend/syntax.asdl

OILS / frontend / syntax.asdl View on Github | oils.pub

702 lines, 314 significant

1	# Data types for the Oils AST, aka "Lossless Syntax Tree".
2	#
3	# Invariant: the source text can be reconstructed byte-for-byte from this tree.
4	# The test/lossless.sh suite verifies this.
5
6	# We usually try to preserve the physical order of the source in the ASDL
7	# fields. One exception is the order of redirects:
8	#
9	# echo >out.txt hi
10	# # versus
11	# echo hi >out.txt
12
13	# Unrepresented:
14	# - let arithmetic (rarely used)
15	# - coprocesses # one with arg and one without
16	# - select block
17
18	# Possible refactorings:
19	#
20	# # %CompoundWord as first class variant:
21	# bool_expr = WordTest %CompoundWord \| ...
22	#
23	# # Can DoubleQuoted have a subset of parts compared with CompoundWord?
24	# string_part = ... # subset of word_part
25	#
26	# - Distinguish word_t with BracedTree vs. those without? seq_word_t?
27
28	module syntax
29	{
30	use core value {
31	value LiteralBlock
32	}
33
34	# More efficient than the List[bool] pattern we've been using
35	BoolParamBox = (bool b)
36	IntParamBox = (int i)
37
38	# core/main_loop.py
39	parse_result = EmptyLine \| Eof \| Node(command cmd)
40
41	# 'source' represents the location of a line / token.
42	source =
43	Interactive
44	\| Headless
45	\| Unused(str comment) # completion and history never show parse errors?
46	\| CFlag
47	\| Stdin(str comment)
48
49	# MainFile is for main.{osh,ysh}, --eval oshrc/yshrc. They're files loaded
50	# directly by the shell.
51	\| MainFile(str path)
52	# A file loaded by 'source' or 'use'.
53	# TODO: we probably don't need this location? The debug stack provides a
54	# chain of locations back to the sourced script. Maybe we need to point to
55	# a debug_frame instead?
56	# It could be DiskFileShell and DiskFileUser, or just DiskFile.
57	\| OtherFile(str path, loc location)
58
59	# Code parsed from a word. (TODO: rename source.Word?)
60	# used for 'eval arg', 'trap arg', 'printf arg',
61	# parseCommand() - this is a string?
62	# dynamic LHS - move this to Reparsed?
63	# complete -W
64	\| Dynamic(str what, loc location)
65
66	# Point to the original variable reference
67	\| VarRef(Token orig_tok)
68
69	# code parsed from the value of a variable
70	# used for $PS1 $PROMPT_COMMAND
71	\| Variable(str var_name, loc location)
72
73	# alias expansion (location of first word)
74	\| Alias(str argv0, loc argv0_loc)
75
76	# 2 kinds of reparsing: backticks, and x+1 in a[x+1]=y
77	\| Reparsed(str what, Token left_token, Token right_token)
78
79	# For --location-str
80	\| Synthetic(str s)
81
82	SourceLine = (int line_num, str content, source src)
83
84	# Note that ASDL generates:
85	# typedef uint16_t Id_t;
86	# So Token is
87	# 8 bytes GC header + 2 + 2 + 4 + 8 + 8 = 32 bytes on 64-bit machines
88	#
89	# We transpose (id, col, length) -> (id, length, col) for C struct packing.
90	Token = (id id, uint16 length, int col, SourceLine? line, str? tval)
91
92	# I wanted to get rid of Token.tval with this separate WideToken type, but it
93	# is more efficient if word_part.Literal %Token literally is the same thing
94	# that comes out of the lexer. Otherwise we have extra garbage.
95
96	# WideToken = (id id, int length, int col, SourceLine? line, str? tval)
97
98	# Slight ASDL bug: CompoundWord has to be defined before using it as a shared
99	# variant. The _product_counter algorithm should be moved into a separate
100	# tag-assigning pass, and shared between gen_python.py and gen_cpp.py.
101	CompoundWord = (List[word_part] parts)
102
103	# Source location for errors
104	loc =
105	Missing # equivalent of runtime.NO_SPID
106	\| Token %Token
107	# Very common case: argv arrays need original location
108	\| ArgWord %CompoundWord
109	\| WordPart(word_part p)
110	\| Word(word w)
111	\| Arith(arith_expr a)
112	# e.g. for errexit blaming
113	\| Command(command c)
114	# the location of a token that's too long
115	\| TokenTooLong(SourceLine line, id id, int length, int col)
116
117	# debug_frame_t is an EXECUTION stack (proc func source use eval), while
118	# source_t (in some cases) is like a PARSING stack (files, strings from vars,
119	# etc.)
120	debug_frame =
121	# OSH: main_filename => BASH_SOURCE
122	MainFile(str main_filename)
123	# YSH
124	\| Dummy # -c or stdin, not used by BASH_* vars
125	# Note: we could have more "frame 0" than MainFile and Dummy -
126	# - Completion hooks - dev.Tracer is missing these
127	# - PS1
128	# - PROMPT_COMMAND
129
130	# OSH: call_tok => BASH_LINENO, source_name => BASH_SOURCE
131	\| Source(CompoundWord source_loc, str source_name)
132
133	# OSH: call_tok => BASH_LINENO, def_tok => BASH_SOURCE
134	# YSH: procs
135	\| ProcLike(CompoundWord invoke_loc, Token def_tok, str proc_name)
136
137	# for io->eval, myfunc()
138	\| Token %Token
139
140	# For 'eval', 'use', ...
141	\| CompoundWord %CompoundWord
142
143	# Special frame added when running 'trap ERR', for more info, and as a sentinel
144	\| BeforeErrTrap(Token tok)
145
146	#
147	# Shell language
148	#
149
150	bracket_op =
151	WholeArray(id op_id) # * or @
152	\| ArrayIndex(arith_expr expr)
153
154	suffix_op =
155	Nullary %Token # ${x@Q} or ${!prefix@} (which also has prefix_op)
156	\| Unary(Token op, rhs_word arg_word) # e.g. ${v:-default}
157	# TODO: Implement YSH ${x\|html} and ${x %.3f}
158	\| Static(Token tok, str arg)
159	\| PatSub(CompoundWord pat, rhs_word replace, id replace_mode, Token slash_tok)
160	# optional begin is arith_expr.EmptyZero
161	# optional length is None, because it's handled in a special way
162	\| Slice(arith_expr begin, arith_expr? length)
163
164	BracedVarSub = (
165	Token left, # in dynamic ParseVarRef, same as name_tok
166	Token name_tok, # location for the name
167	str var_name, # the name - TODO: remove this, use LazyStr() instead
168	Token? prefix_op, # prefix # or ! operators
169	bracket_op? bracket_op,
170	suffix_op? suffix_op,
171	Token right # in dynamic ParseVarRef, same as name_tok
172	)
173
174	# Variants:
175	# - Look at left token ID for $'' c'' vs r'' '' e.g. Id.Left_DollarSingleQuote
176	# - And """ and ''' e.g. Id.Left_TDoubleQuote
177	DoubleQuoted = (Token left, List[word_part] parts, Token right)
178
179	# Consider making str? sval LAZY, like lexer.LazyStr(tok)
180	SingleQuoted = (Token left, str sval, Token right)
181
182	# e.g. Id.VSub_QMark, Id.VSub_DollarName $foo with lexer.LazyStr()
183	SimpleVarSub = (Token tok)
184
185	CommandSub = (Token left_token, command child, Token right)
186
187	# - can contain word.BracedTree
188	# - no 'Token right' for now, doesn't appear to be used
189	YshArrayLiteral = (Token left, List[word] words, Token right)
190
191	# Unevaluated, typed arguments for func and proc.
192	# Note that ...arg is expr.Spread.
193	ArgList = (
194	Token left, List[expr] pos_args,
195	Token? semi_tok, List[NamedArg] named_args,
196	Token? semi_tok2, expr? block_expr,
197	Token right
198	)
199
200	AssocPair = (CompoundWord key, CompoundWord value, bool has_plus)
201
202	InitializerWord =
203	ArrayWord(word w)
204	\| AssocPair %AssocPair
205
206	word_part =
207	YshArrayLiteral %YshArrayLiteral
208	\| InitializerLiteral(Token left, List[InitializerWord] pairs, Token right)
209	\| Literal %Token
210	# escaped case is separate so the evaluator doesn't have to check token ID
211	\| EscapedLiteral(Token token, str ch)
212	\| SingleQuoted %SingleQuoted
213	\| DoubleQuoted %DoubleQuoted
214	# Could be SimpleVarSub %Token that's VSub_DollarName, but let's not
215	# confuse with the comon word_part.Literal is common for wno
216	\| SimpleVarSub %SimpleVarSub
217	\| BracedVarSub %BracedVarSub
218	\| ZshVarSub (Token left, CompoundWord ignored, Token right)
219	# For command sub and process sub: $(...) <(...) >(...)
220	\| CommandSub %CommandSub
221	# ~ or ~bob
222	\| TildeSub(Token left, # always the tilde
223	Token? name, str? user_name)
224	\| ArithSub(Token left, arith_expr anode, Token right)
225	# {a,b,c}
226	\| BracedTuple(List[CompoundWord] words)
227	# {1..10} or {-5..10..2} or {01..10} (leading zeros matter)
228	# {a..f} or {a..f..2} or {a..f..-2}
229	# the whole range is one Token,
230	\| BracedRange(Token blame_tok, id kind, str start, str end, int step)
231	# expanded version of {1..10}
232	\| BracedRangeDigit(str s, Token orig_tok)
233	# extended globs are parsed statically, unlike globs
234	\| ExtGlob(Token op, List[CompoundWord] arms, Token right)
235	# a regex group is similar to an extended glob part
236	\| BashRegexGroup(Token left, CompoundWord? child, Token right)
237
238	# YSH word_part extensions
239
240	# @myarray - Id.Lit_Splice (could be optimized to %Token)
241	\| Splice(Token blame_tok, str var_name)
242	# $[d.key], etc.
243	\| ExprSub(Token left, expr child, Token right)
244
245	# Use cases for Empty: RHS of 'x=', the argument in "${x:-}".
246	# The latter is semantically necessary. (See osh/word_parse.py).
247	# At runtime: RHS of 'declare x='.
248	rhs_word = Empty \| Compound %CompoundWord
249
250	word =
251	# Returns from WordParser, but not generally stored in LST
252	Operator %Token
253	# A Compound word can contain any word_part except the Braced*Part.
254	# We could model this with another variant type but it incurs runtime
255	# overhead and seems like overkill. Note that DoubleQuoted can't
256	# contain a SingleQuoted, etc. either.
257	\| Compound %CompoundWord
258	# For word sequences command.Simple, YshArrayLiteral, for_iter.Words
259	# Could be its own type
260	\| BracedTree(List[word_part] parts)
261	# For dynamic parsing of test aka [ - the string is already evaluated.
262	\| String(id id, str s, CompoundWord? blame_loc)
263
264	# Note: the name 'foo' is derived from token value 'foo=' or 'foo+='
265	sh_lhs =
266	Name(Token left, str name) # Lit_VarLike foo=
267	# TODO: Could be Name %Token
268	\| IndexedName(Token left, str name, arith_expr index)
269	\| UnparsedIndex(Token left, str name, str index) # for translation
270
271	arith_expr =
272	EmptyZero # these are valid: $(( )) (( )) ${a[@]: : }
273	\| EmptyOne # condition is 1 for infinite loop: for (( ; ; ))
274	\| VarSub %Token # e.g. $(( x )) Id.Arith_VarLike
275	\| Word %CompoundWord # e.g. $(( 123'456'$y ))
276
277	\| UnaryAssign(id op_id, arith_expr child)
278	\| BinaryAssign(id op_id, arith_expr left, arith_expr right)
279
280	\| Unary(id op_id, arith_expr child)
281	\| Binary(Token op, arith_expr left, arith_expr right)
282	\| TernaryOp(arith_expr cond, arith_expr true_expr, arith_expr false_expr)
283
284	bool_expr =
285	WordTest(word w) # e.g. [[ myword ]]
286	\| Binary(id op_id, word left, word right)
287	\| Unary(id op_id, word child)
288	\| LogicalNot(bool_expr child)
289	\| LogicalAnd(bool_expr left, bool_expr right)
290	\| LogicalOr(bool_expr left, bool_expr right)
291
292	redir_loc =
293	Fd(int fd) \| VarName(str name)
294
295	redir_param =
296	Word %CompoundWord
297	\| HereWord(CompoundWord w, bool is_multiline)
298	\| HereDoc(word here_begin, # e.g. EOF or 'EOF'
299	Token? here_end_tok, # Token consisting of the whole line
300	# It's always filled in AFTER creation, but
301	# temporarily so optional
302	List[word_part] stdin_parts # one for each line
303	)
304
305	Redir = (Token op, redir_loc loc, redir_param arg)
306
307	assign_op = Equal \| PlusEqual
308	AssignPair = (Token left, sh_lhs lhs, assign_op op, rhs_word rhs)
309	# TODO: could put Id.Lit_VarLike foo= into LazyStr() with -1 slice
310	EnvPair = (Token left, str name, rhs_word val)
311
312	List_of_command < List[command]
313
314	condition =
315	Shell %List_of_command # if false; true; then echo hi; fi
316	\| YshExpr(expr e) # if (x > 0) { echo hi }
317	# TODO: add more specific blame location
318
319	# Each arm tests one word against multiple words
320	# shell: .cc\|.h) echo C++ ;;
321	# YSH: .cc\|.h { echo C++ }
322	#
323	# Three location tokens:
324	# 1. left - shell has ( or .cc ysh has .cc
325	# 2. middle - shell has ) ysh has {
326	# 3. right - shell has optional ;; ysh has required }
327	#
328	# For YSH typed case, left can be ( and /
329	# And case_pat may contain more details
330	CaseArm = (
331	Token left, pat pattern, Token middle, List[command] action,
332	Token? right
333	)
334
335	# The argument to match against in a case command
336	# In YSH-style case commands we match against an `expr`, but in sh-style case
337	# commands we match against a word.
338	case_arg =
339	Word(word w)
340	\| YshExpr(expr e)
341
342	EggexFlag = (bool negated, Token flag)
343
344	# canonical_flags can be compared for equality. This is needed to splice
345	# eggexes correctly, e.g. / 'abc' @pat ; i /
346	Eggex = (
347	Token left, re regex, List[EggexFlag] flags, Token? trans_pref,
348	str? canonical_flags)
349
350	pat =
351	Else
352	\| Words(List[word] words)
353	\| YshExprs(List[expr] exprs)
354	\| Eggex %Eggex
355
356	# Each if arm starts with either an "if" or "elif" keyword
357	# In YSH, the then keyword is not used (replaced by braces {})
358	IfArm = (
359	Token keyword, condition cond, Token? then_kw, List[command] action,
360	# then_tok used in ysh-ify
361	Token? then_tok)
362
363	for_iter =
364	Args # for x; do echo $x; done # implicit "$@"
365	\| Words(List[word] words) # for x in 'foo' *.py { echo $x }
366	# like YshArrayLiteral, but no location for %(
367	\| YshExpr(expr e, Token blame) # for x in (mylist) { echo $x }
368	#\| Files(Token left, List[word] words)
369	# for x in <> {
370	# for x in < @myfiles > {
371
372	BraceGroup = (
373	Token left, Token? doc_token, List[command] children, Token right
374	)
375
376	Param = (Token blame_tok, str name, TypeExpr? type, expr? default_val)
377	RestParam = (Token blame_tok, str name)
378
379	ParamGroup = (List[Param] params, RestParam? rest_of)
380
381	# 'open' is for proc p { }; closed is for proc p () { }
382	proc_sig =
383	Open
384	\| Closed(ParamGroup? word, ParamGroup? positional, ParamGroup? named,
385	Param? block_param)
386
387	Proc = (Token keyword, Token name, proc_sig sig, command body)
388
389	Func = (
390	Token keyword, Token name,
391	ParamGroup? positional, ParamGroup? named,
392	command body
393	)
394
395	# Represents all these case: s=1 s+=1 s[x]=1 ...
396	ParsedAssignment = (Token? left, Token? close, int part_offset, CompoundWord w)
397
398	# var, const.
399	# - Keyword is None for hay blocks. TODO: consider using BareDecl?
400	# - 'var x' allowed - RHS is None; idiomatic with value.Place
401	VarDecl = (Token? keyword, List[NameType] lhs, expr? rhs)
402
403	# setvar, maybe 'auto' later
404	Mutation = (Token keyword, List[y_lhs] lhs, Token op, expr rhs)
405
406	# call f(x) = 42
407	ExprCommand = (Token keyword, expr e)
408
409	ShFunction = (
410	Token? keyword, Token name_tok, str name, command body,
411	str? code_str
412	)
413
414	command =
415	NoOp
416
417	# can wrap many children, e.g. { }, loops, functions
418	\| Redirect(command child, List[Redir] redirects)
419
420	\| Simple(Token? blame_tok, # TODO: make required (BracedTuple?)
421	List[EnvPair] more_env,
422	List[word] words,
423	ArgList? typed_args, LiteralBlock? block,
424	# is_last_cmd is used for fork() optimizations
425	bool is_last_cmd)
426
427	# This doesn't technically belong in the LST, but it's convenient for
428	# execution
429	\| ExpandedAlias(command child, List[EnvPair] more_env)
430	\| Sentence(command child, Token terminator)
431	# Represents "bare assignment"
432	# Token left is redundant with pairs[0].left
433	\| ShAssignment(Token left, List[AssignPair] pairs)
434
435	\| ControlFlow(Token keyword, CompoundWord? arg_word)
436
437	# ops are \| \|&
438	\| Pipeline(Token? negated, List[command] children, List[Token] ops)
439	# ops are && \|\|
440	\| AndOr(List[command] children, List[Token] ops)
441
442	# Part of for, while, until (but not if, case, ShFunction). No redirects.
443	\| DoGroup(Token left, List[command] children, Token right)
444	# A brace group is a compound command, with redirects.
445	\| BraceGroup %BraceGroup
446	# Contains a single child, like CommandSub
447	\| Subshell(Token left, command child, Token right, bool is_last_cmd)
448	\| DParen(Token left, arith_expr child, Token right)
449	\| DBracket(Token left, bool_expr expr, Token right)
450
451	# up to 3 iterations variables
452	\| ForEach(Token keyword, List[str] iter_names, for_iter iterable,
453	Token? semi_tok, command body)
454	# C-style for loop. Any of the 3 expressions can be omitted.
455	# Note: body is required, but only optional here because of initialization
456	# order.
457	\| ForExpr(Token keyword, arith_expr? init, arith_expr? cond,
458	arith_expr? update, command? body)
459	\| WhileUntil(Token keyword, condition cond, command body)
460
461	\| If(Token if_kw, List[IfArm] arms, Token? else_kw, List[command] else_action,
462	Token? fi_kw)
463	\| Case(Token case_kw, case_arg to_match, Token arms_start, List[CaseArm] arms,
464	Token arms_end)
465
466	# The keyword is optional in the case of bash-style functions
467	# (ie. "foo() { ... }") which do not have one.
468	\| ShFunction %ShFunction
469
470	\| TimeBlock(Token keyword, command pipeline)
471	# Some nodes optimize it out as List[command], but we use CommandList for
472	# 1. the top level
473	# 2. ls ; ls & ls (same line)
474	# 3. CommandSub # single child that's a CommandList
475	# 4. Subshell # single child that's a CommandList
476
477	# TODO: Use List_of_command
478	\| CommandList(List[command] children)
479
480	# YSH command constructs
481
482	\| VarDecl %VarDecl
483
484	# this can behave like 'var', can be desugared
485	\| BareDecl(Token lhs, expr rhs)
486
487	\| Mutation %Mutation
488	\| Expr %ExprCommand
489	\| Proc %Proc
490	\| Func %Func
491	\| Retval(Token keyword, expr val)
492
493	# bytecode
494	b_command =
495	VarDecl %VarDecl
496	\| Mutation %Mutation
497
498	#
499	# Glob representation, for converting ${x//} to extended regexes.
500	#
501
502	# Example: *.[ch] is:
503	# GlobOp(<Glob_Star '*'>),
504	# GlobLit(Glob_OtherLiteral, '.'),
505	# CharClass(False, ['ch']) # from Glob_CleanLiterals token
506
507	glob_part =
508	Literal(id id, str s)
509	\| Operator(id op_id) # * or ?
510	\| CharClass(bool negated, List[str] strs)
511
512	# Char classes are opaque for now. If we ever need them:
513	# - Collating symbols are [. .]
514	# - Equivalence classes are [=
515
516	printf_part =
517	Literal %Token
518	# flags are 0 hyphen space + #
519	# type is 's' for %s, etc.
520	\| Percent(List[Token] flags, Token? width, Token? precision, Token type)
521
522	#
523	# YSH Language
524	#
525	# Copied and modified from Python-3.7/Parser/Python.asdl !
526
527	expr_context = Load \| Store \| Del \| AugLoad \| AugStore \| Param
528
529	# Type expressions: Int List[Int] Dict[Str, Any]
530	# Do we have Func[Int, Int => Int] ? I guess we can parse that into this
531	# system.
532	TypeExpr = (Token tok, str name, List[TypeExpr] params)
533
534	# LHS bindings in var/const, and eggex
535	NameType = (Token left, str name, TypeExpr? typ)
536
537	# TODO: Inline this into GenExp and ListComp? Just use a flag there?
538	Comprehension = (List[NameType] lhs, expr iter, expr? cond)
539
540	# Named arguments supplied to call. Token is null for f(; ...named).
541	NamedArg = (Token? name, expr value)
542
543	# Subscripts are lists of expressions
544	# a[:i, n] (we don't have matrices, but we have data frames)
545	Subscript = (Token left, expr obj, expr index)
546
547	# Attributes are obj.attr, d->key, name::scope,
548	Attribute = (expr obj, Token op, Token attr, str attr_name, expr_context ctx)
549
550	y_lhs =
551	Var %Token # Id.Expr_Name
552	\| Subscript %Subscript
553	\| Attribute %Attribute
554
555	place_op =
556	# &a[i+1]
557	Subscript(Token op, expr index)
558	# &d.mykey
559	\| Attribute(Token op, Token attr)
560
561	expr =
562	Var(Token left, str name) # a variable name to evaluate
563	# Constants are typically Null, Bool, Int, Float
564	# and also Str for key in {key: 42}
565	# But string literals are SingleQuoted or DoubleQuoted
566	# Python uses Num(object n), which doesn't respect our "LST" invariant.
567	\| Const(Token c, value val)
568
569	# read(&x) json read (&x[0])
570	\| Place(Token blame_tok, str var_name, place_op* ops)
571
572	# :\| one 'two' "$three" \|
573	\| YshArrayLiteral %YshArrayLiteral
574
575	# / d+ ; ignorecase; %python /
576	\| Eggex %Eggex
577
578	# $name is not an expr, but $? is, e.g. Id.VSub_QMark
579	\| SimpleVarSub %SimpleVarSub
580	\| BracedVarSub %BracedVarSub
581	\| CommandSub %CommandSub
582	\| SingleQuoted %SingleQuoted
583	\| DoubleQuoted %DoubleQuoted
584
585	\| Literal(expr inner)
586	\| Lambda(List[NameType] params, expr body)
587
588	\| Unary(Token op, expr child)
589	\| Binary(Token op, expr left, expr right)
590	# x < 4 < 3 and (x < 4) < 3
591	\| Compare(expr left, List[Token] ops, List[expr] comparators)
592	\| FuncCall(expr func, ArgList args)
593
594	# TODO: Need a representation for method call. We don't just want
595	# Attribute() and then Call()
596
597	\| IfExp(expr test, expr body, expr orelse)
598	\| Tuple(Token left, List[expr] elts, expr_context ctx)
599
600	\| List(Token left, List[expr] elts, expr_context ctx)
601	\| Dict(Token left, List[expr] keys, List[expr] values)
602	# For the values in {n1, n2}
603	\| Implicit
604
605	\| ListComp(Token left, expr elt, List[Comprehension] generators)
606	# not implemented
607	\| DictComp(Token left, expr key, expr value, List[Comprehension] generators)
608	\| GeneratorExp(expr elt, List[Comprehension] generators)
609
610	# Ranges are written 1:2, with first class expression syntax. There is no
611	# step as in Python. Use range(0, 10, step=2) for that.
612	\| Range(expr lower, Token op, expr upper)
613
614	# Slices occur within [] only. Unlike ranges, the start/end can be #
615	# implicit. Like ranges, denote a step with slice(0, 10, step=2).
616	# a[3:] a[:i]
617	\| Slice(expr? lower, Token op, expr? upper)
618
619	\| Subscript %Subscript
620	\| Attribute %Attribute
621
622	# Ellipsis is like 'Starred' within Python, which are valid on the LHS in
623	# Python for unpacking, and # within list literals for splicing.
624	# (Starred is NOT used for {k:v, **a}. That used a blank "keys"
625	# attribute.)
626
627	# I think we can use { **pairs } like Python
628	\| Spread(Token left, expr child)
629
630	#
631	# Regex Language (Eggex)
632	#
633
634	# e.g. alnum digit
635	PosixClass = (Token? negated, str name)
636	# e.g. d w s
637	PerlClass = (Token? negated, str name)
638
639	# Char Sets and Ranges both use Char Codes
640	# with u_braced == true : \u{ff}
641	# with u_braced == false: \xff \\ 'a' a '0' 0
642	# ERE doesn't make a distinction, but compiling to Python/PCRE can use it
643	CharCode = (Token blame_tok, int i, bool u_braced)
644	CharRange = (CharCode start, CharCode end)
645
646	# Note: .NET has && in character classes, making it a recursive language
647
648	class_literal_term =
649	PosixClass %PosixClass
650	\| PerlClass %PerlClass
651	\| CharRange %CharRange
652	\| CharCode %CharCode
653
654	\| SingleQuoted %SingleQuoted
655	# @chars
656	\| Splice(Token name, str var_name) # coudl be Splice %Token
657
658	# evaluated version of class_literal_term (could be in runtime.asdl)
659	char_class_term =
660	PosixClass %PosixClass
661	\| PerlClass %PerlClass
662
663	\| CharRange %CharRange
664	# For [ \x00 \\ ]
665	\| CharCode %CharCode
666
667	# NOTE: modifier is unused now, can represent L or P
668	re_repeat =
669	Op %Token # + * ? or Expr_DecInt for x{3}
670	\| Range(Token? left, str lower, str upper, Token? right) # dot{1,2}
671	# Haven't implemented the modifier, e.g. x{+ P}
672	# \| Num(Token times, id modifier)
673	# \| Range(Token? lower, Token? upper, id modifier)
674
675	re =
676	Primitive(Token blame_tok, id id) # . ^ $ dot %start %end
677	\| PosixClass %PosixClass
678	\| PerlClass %PerlClass
679	# syntax [ $x \n ]
680	\| CharClassLiteral(bool negated, List[class_literal_term] terms)
681	# evaluated [ 'abc' \n ]
682	\| CharClass(bool negated, List[char_class_term] terms)
683
684	# @D
685	\| Splice(Token name, str var_name) # TODO: Splice %Token ?
686
687	\| SingleQuoted %SingleQuoted
688
689	# Compound:
690	\| Repeat(re child, re_repeat op)
691	\| Seq(List[re] children)
692	\| Alt(List[re] children)
693
694	\| Group(re child)
695	# convert_func is filled in on evaluation
696	# TODO: name and func_name can be expanded to strings
697	\| Capture(re child, Token? name, Token? func_name)
698	\| Backtracking(bool negated, Token name, re child)
699
700	# \u{ff} is parsed as this, but SingleQuoted also evaluates to it
701	\| LiteralChars(Token blame_tok, str s)
702	}