frontend/id_kind

OILS / frontend / id_kind_def.py View on Github | oils.pub

813 lines, 551 significant

1	#!/usr/bin/env python2
2	# Copyright 2016 Andy Chu. All rights reserved.
3	# Licensed under the Apache License, Version 2.0 (the "License");
4	# you may not use this file except in compliance with the License.
5	# You may obtain a copy of the License at
6	#
7	# http://www.apache.org/licenses/LICENSE-2.0
8	"""
9	id_kind_def.py - Id and Kind definitions, stored in Token
10
11	NOTE: If this file changes, rebuild it with build/py.sh all
12	"""
13	from __future__ import print_function
14
15	from _devbuild.gen.types_asdl import (bool_arg_type_e, bool_arg_type_t)
16	#from mycpp.mylib import log
17
18	from typing import List, Tuple, Dict, Optional, TYPE_CHECKING
19	if TYPE_CHECKING: # avoid circular build deps
20	from _devbuild.gen.id_kind_asdl import Id_t, Kind_t
21
22
23	class IdSpec(object):
24	"""Identifiers that form the "spine" of the shell program
25	representation."""
26
27	def __init__(self, kind_lookup, bool_ops):
28	# type: (Dict[int, int], Dict[int, bool_arg_type_t]) -> None
29	self.id_str2int = {} # type: Dict[str, int]
30	self.kind_str2int = {} # type: Dict[str, int]
31
32	self.kind_lookup = kind_lookup # Id int -> Kind int
33	self.kind_name_list = [] # type: List[str]
34	self.kind_sizes = [] # type: List[int] # optional stats
35
36	self.lexer_pairs = {} # type: Dict[int, List[Tuple[bool, str, int]]]
37	self.bool_ops = bool_ops # type: Dict[int, bool_arg_type_t]
38
39	# Incremented on each method call
40	# IMPORTANT: 1-based indices match what asdl/gen_python.py does!!!
41	self.id_index = 1
42	self.kind_index = 1
43
44	def LexerPairs(self, kind):
45	# type: (Kind_t) -> List[Tuple[bool, str, Id_t]]
46	result = []
47	for is_regex, pat, id_ in self.lexer_pairs[kind]:
48	result.append((is_regex, pat, id_))
49	return result
50
51	def _AddId(self, id_name, kind=None):
52	# type: (str, Optional[int]) -> int
53	"""
54	Args:
55	id_name: e.g. BoolBinary_Equal
56	kind: override autoassignment. For AddBoolBinaryForBuiltin
57	"""
58	t = self.id_index
59
60	self.id_str2int[id_name] = t
61
62	if kind is None:
63	kind = self.kind_index
64	self.kind_lookup[t] = kind
65
66	self.id_index += 1 # mutate last
67	return t # the index we used
68
69	def _AddKind(self, kind_name):
70	# type: (str) -> None
71	self.kind_str2int[kind_name] = self.kind_index
72	#log('%s = %d', kind_name, self.kind_index)
73	self.kind_index += 1
74	self.kind_name_list.append(kind_name)
75
76	def AddKind(self, kind_name, tokens):
77	# type: (str, List[str]) -> None
78	assert isinstance(tokens, list), tokens
79
80	for name in tokens:
81	id_name = '%s_%s' % (kind_name, name)
82	self._AddId(id_name)
83
84	# Must be after adding Id
85	self._AddKind(kind_name)
86	self.kind_sizes.append(len(tokens)) # debug info
87
88	def AddKindPairs(self, kind_name, pairs):
89	# type: (str, List[Tuple[str, str]]) -> None
90	assert isinstance(pairs, list), pairs
91
92	lexer_pairs = []
93	for name, char_pat in pairs:
94	id_name = '%s_%s' % (kind_name, name)
95	id_int = self._AddId(id_name)
96	# After _AddId
97	lexer_pairs.append((False, char_pat, id_int)) # Constant
98
99	self.lexer_pairs[self.kind_index] = lexer_pairs
100
101	# Must be after adding Id
102	self._AddKind(kind_name)
103	self.kind_sizes.append(len(pairs)) # debug info
104
105	def AddBoolKind(
106	self,
107	kind_name, # type: str
108	arg_type_pairs, # type: List[Tuple[bool_arg_type_t, List[Tuple[str, str]]]]
109	):
110	# type: (...) -> None
111	"""
112	Args:
113	kind_name: string
114	arg_type_pairs: dictionary of bool_arg_type_e -> []
115	"""
116	lexer_pairs = []
117	num_tokens = 0
118	for arg_type, pairs in arg_type_pairs:
119	#print(arg_type, pairs)
120
121	for name, char_pat in pairs:
122	# BoolUnary_f, BoolBinary_eq, BoolBinary_NEqual
123	id_name = '%s_%s' % (kind_name, name)
124	id_int = self._AddId(id_name)
125	self.AddBoolOp(id_int, arg_type) # register type
126	lexer_pairs.append((False, char_pat, id_int)) # constant
127
128	num_tokens += len(pairs)
129
130	self.lexer_pairs[self.kind_index] = lexer_pairs
131
132	# Must do this after _AddId()
133	self._AddKind(kind_name)
134	self.kind_sizes.append(num_tokens) # debug info
135
136	def AddBoolBinaryForBuiltin(self, id_name, kind):
137	# type: (str, int) -> int
138	"""For [ = ] [ == ] and [ != ].
139
140	These operators are NOT added to the lexer. The are "lexed" as
141	word.String.
142	"""
143	id_name = 'BoolBinary_%s' % id_name
144	id_int = self._AddId(id_name, kind=kind)
145	self.AddBoolOp(id_int, bool_arg_type_e.Str)
146	return id_int
147
148	def AddBoolOp(self, id_int, arg_type):
149	# type: (int, bool_arg_type_t) -> None
150	"""Associate an ID integer with an bool_arg_type_e."""
151	self.bool_ops[id_int] = arg_type
152
153
154	def AddKinds(spec):
155	# type: (IdSpec) -> None
156
157	# A compound word, in arith context, boolean context, or command context.
158	# A['foo'] A["foo"] A[$foo] A["$foo"] A[${foo}] A["${foo}"]
159	spec.AddKind('Word', ['Compound'])
160
161	# Token IDs in Kind.Arith are first to make the TDOP precedence table
162	# small.
163	#
164	# NOTE: Could share Op_Pipe, Op_Amp, Op_DAmp, Op_Semi, Op_LParen, etc.
165	# Actually all of Arith could be folded into Op, because we are using
166	# WordParser._ReadArithWord vs. WordParser._ReadWord.
167	spec.AddKindPairs(
168	'Arith',
169	[
170	('Semi', ';'), # ternary for loop only
171	('Comma', ','), # function call and C comma operator
172	('Plus', '+'),
173	('Minus', '-'),
174	('Star', '*'),
175	('Slash', '/'),
176	('Percent', '%'),
177	('DPlus', '++'),
178	('DMinus', '--'),
179	('DStar', '**'),
180	('LParen', '('),
181	('RParen', ')'), # grouping and function call extension
182	('LBracket', '['),
183	('RBracket', ']'), # array and assoc array subscript
184	('RBrace', '}'), # for end of var sub
185
186	# Logical Ops
187	('QMark', '?'),
188	('Colon', ':'), # Ternary Op: a < b ? 0 : 1
189	('LessEqual', '<='),
190	('Less', '<'),
191	('GreatEqual', '>='),
192	('Great', '>'),
193	('DEqual', '=='),
194	('NEqual', '!='),
195	# note: these 3 are not in YSH Expr. (Could be used in find dialect.)
196	('DAmp', '&&'),
197	('DPipe', '\|\|'),
198	('Bang', '!'),
199
200	# Bitwise ops
201	('DGreat', '>>'),
202	('DLess', '<<'),
203	# YSH: ^ is exponent
204	('Amp', '&'),
205	('Pipe', '\|'),
206	('Caret', '^'),
207	('Tilde', '~'),
208	('Equal', '='),
209
210	# Augmented Assignment for $(( ))
211	# Must match the list in osh/arith_parse.py
212	# YSH has **= //= like Python
213	('PlusEqual', '+='),
214	('MinusEqual', '-='),
215	('StarEqual', '*='),
216	('SlashEqual', '/='),
217	('PercentEqual', '%='),
218	('DGreatEqual', '>>='),
219	('DLessEqual', '<<='),
220	('AmpEqual', '&='),
221	('CaretEqual', '^='),
222	('PipeEqual', '\|='),
223	])
224
225	spec.AddKind('Eof', ['Real', 'RParen', 'Backtick'])
226
227	spec.AddKind('Undefined', ['Tok']) # for initial state
228
229	# The Unknown kind is used when we lex something, but it's invalid.
230	# Examples:
231	# ${^}
232	# $'\z' Such bad codes are accepted when parse_backslash is on
233	# (default in OSH), so we have to lex them.
234	# (x == y) should used === or ~==
235	spec.AddKind('Unknown',
236	['Tok', 'Backslash', 'DEqual', 'DAmp', 'DPipe', 'DDot'])
237
238	spec.AddKind('Eol', ['Tok']) # no more tokens on line (\0)
239
240	# Ignored_Newline is for J8 lexing to count lines
241	spec.AddKind('Ignored', ['LineCont', 'Space', 'Comment', 'Newline'])
242
243	# Id.WS_Space is for lex_mode_e.ShCommand; Id.Ignored_Space is for
244	# lex_mode_e.Arith
245	spec.AddKind('WS', ['Space'])
246
247	spec.AddKind(
248	'Lit',
249	[
250	'Chars',
251	'CharsWithoutPrefix', # for stripping leading whitespace
252	'VarLike',
253	'ArrayLhsOpen',
254	'ArrayLhsClose',
255	'Splice', # @func(a, b)
256	'AtLBracket', # @[split(x)]
257	'AtLBraceDot', # @{.myproc arg1} should be builtin_sub
258	'Other',
259	'EscapedChar', # \* is escaped
260	'LBracket',
261	'RBracket', # for assoc array literals, static globs
262	'Star',
263	'QMark',
264	# Either brace expansion or keyword for { and }
265	'LBrace',
266	'RBrace',
267	'Comma',
268	'Equals', # For = f()
269	'Dollar', # detecting 'echo $'
270	'DRightBracket', # the ]] that matches [[, NOT a keyword
271	'Tilde', # tilde expansion
272	'Pound', # for comment or VarOp state
273	'TPound', # for doc comments like ###
274	'TDot', # for multiline commands ...
275	'Slash',
276	'Percent', # / # % for patsub, NOT unary op
277	'Colon', # x=foo:~:~root needs tilde expansion
278	'Digits', # for lex_mode_e.Arith
279	'At', # for ${a[@]} in lex_mode_e.Arith, and detecting @[]
280	'ArithVarLike', # for $((var+1)). Distinct from Lit_VarLike 'var='
281	'BadBackslash', # for "\z", not Id.Unknown_Backslash because it's a
282	# syntax error in YSH, but NOT OSH
283	'CompDummy', # A fake Lit_* token to get partial words during
284	# completion
285	])
286
287	# For recognizing \` and \" and \\ within backticks. There's an extra layer
288	# of backslash quoting.
289	spec.AddKind('Backtick', ['Right', 'Quoted', 'DoubleQuote', 'Other'])
290
291	spec.AddKind('History', ['Op', 'Num', 'Search', 'Other'])
292
293	spec.AddKind(
294	'Op',
295	[
296	'Newline', # mostly equivalent to SEMI
297	'Amp', # &
298	'Pipe', # \|
299	'PipeAmp', # \|& -- bash extension for stderr
300	'DAmp', # &&
301	'DPipe', # \|\|
302	'Semi', # ;
303	'DSemi', # ;; for case
304	'SemiAmp', # ;& for case
305	'DSemiAmp', # ;;& for case
306	'LParen', # For subshell. Not Kind.Left because it's NOT a WordPart.
307	'RParen', # Default, will be translated to Id.Right_*
308	'DLeftParen',
309	'DRightParen',
310
311	# for [[ ]] language
312	'Less', # <
313	'Great', # >
314	'Bang', # !
315
316	# YSH [] {}
317	'LBracket',
318	'RBracket',
319	'LBrace',
320	'RBrace',
321	])
322
323	# YSH expressions use Kind.Expr and Kind.Arith (further below)
324	spec.AddKind(
325	'Expr',
326	[
327	'Reserved', # <- means nothing but it's reserved now
328	'Symbol', # %foo
329	'Name',
330	'DecInt',
331	'BinInt',
332	'OctInt',
333	'HexInt',
334	'Float',
335	'Bang', # eggex !digit, ![a-z]
336	'Dot',
337	'DDotLessThan',
338	'DDotEqual',
339	'Colon', # mylist:pop()
340	'RArrow',
341	'RDArrow',
342	'DSlash', # integer division
343	'TEqual',
344	'NotDEqual',
345	'TildeDEqual', # === !== ~==
346	'At',
347	'DoubleAt', # splice operators
348	'Ellipsis', # for varargs
349	'Dollar', # legacy regex
350	'NotTilde', # !~
351	'DTilde',
352	'NotDTilde', # ~~ !~~
353	'DStarEqual', # **=, which bash doesn't have
354	'DSlashEqual', # //=, which bash doesn't have
355	'CastedDummy', # Used for @() $() (words in lex_mode_e.ShCommand)
356	# and ${} '' "" (and all other strings)
357
358	# Constants
359	'Null',
360	'True',
361	'False',
362
363	# Keywords are resolved after lexing, but otherwise behave like tokens.
364	'And',
365	'Or',
366	'Not',
367
368	# List comprehensions
369	'For',
370	'Is',
371	'In',
372	'If',
373	'Else',
374	'Capture',
375	'As',
376
377	# Unused
378	'Func',
379	'Proc',
380	])
381
382	# For C-escaped strings.
383	spec.AddKind(
384	'Char',
385	[
386	'OneChar',
387	'Stop',
388	'Hex', # \xff
389	'YHex', # \yff for J8 notation
390
391	# Two variants of Octal: \377, and \0377.
392	'Octal3',
393	'Octal4',
394	'Unicode4',
395	'SurrogatePair', # JSON
396	'Unicode8', # bash
397	'UBraced',
398	'Pound', # YSH
399	'AsciiControl', # \x01-\x1f, what's disallowed in JSON
400	])
401
402	# For lex_mode_e.BashRegex
403	# Bash treats ( \| ) as special, and space is allowed within ()
404	# Note Id.Op_RParen -> Id.Right_BashRegex with lexer hint
405	spec.AddKind('BashRegex', ['LParen', 'AllowedInParens'])
406
407	spec.AddKind(
408	'Eggex',
409	[
410	'Start', # ^ or %start
411	'End', # $ or %end
412	'Dot', # . or dot
413	# Future: %boundary generates \b in Python/Perl, etc.
414	])
415
416	spec.AddKind(
417	'Redir',
418	[
419	'Less', # < stdin
420	'Great', # > stdout
421	'DLess', # << here doc redirect
422	'TLess', # <<< bash only here string
423	'DGreat', # >> append stdout
424	'GreatAnd', # >& descriptor redirect
425	'LessAnd', # <& descriptor redirect
426	'DLessDash', # <<- here doc redirect for tabs?
427	'LessGreat', # <>
428	'Clobber', # >\| POSIX?
429	'AndGreat', # bash &> stdout/stderr to file
430	'AndDGreat', # bash &>> stdout/stderr append to file
431
432	#'GreatPlus', # >+ is append in YSH
433	#'DGreatPlus', # >>+ is append to string in YSH
434	])
435
436	# NOTE: This is for left/right WORDS only. (( is not a word so it doesn't
437	# get that.
438	spec.AddKind(
439	'Left',
440	[
441	'DoubleQuote',
442	'JDoubleQuote', # j" for J8 notation
443	'SingleQuote', # ''
444	'DollarSingleQuote', # $'' for \n escapes
445	'RSingleQuote', # r''
446	'USingleQuote', # u''
447	'BSingleQuote', # b''
448
449	# Multiline versions
450	'TDoubleQuote', # """ """
451	'DollarTDoubleQuote', # $""" """
452	'TSingleQuote', # ''' '''
453	'RTSingleQuote', # r''' '''
454	'UTSingleQuote', # u''' '''
455	'BTSingleQuote', # b''' '''
456	'Backtick', # `
457	'DollarParen', # $(
458	'DollarBrace', # ${
459	'DollarBraceZsh', # ${(foo)
460	'DollarDParen', # $((
461	'DollarBracket', # $[ - synonym for $(( in bash and zsh
462	'DollarDoubleQuote', # $" for bash localized strings
463	'ProcSubIn', # <( )
464	'ProcSubOut', # >( )
465	'AtParen', # @( for split command sub
466	'CaretParen', # ^( for Block literal in expression mode
467	'CaretBracket', # ^[ for Expr literal
468	'CaretBrace', # ^{ for Arglist
469	'CaretDoubleQuote', # ^" for Template
470	'ColonPipe', # :\| for word arrays
471	'PercentParen', # legacy %( for word arrays
472	])
473
474	spec.AddKind(
475	'Right',
476	[
477	'DoubleQuote',
478	'SingleQuote',
479	'Backtick', # `
480	'DollarBrace', # }
481	'DollarDParen', # )) -- really the second one is a PushHint()
482	# ArithSub2 is just Id.Arith_RBracket
483	'DollarDoubleQuote', # "
484	'DollarSingleQuote', # '
485
486	# Disambiguated right parens
487	'Subshell', # )
488	'ShFunction', # )
489	'CasePat', # )
490	'Initializer', # )
491	'ExtGlob', # )
492	'BashRegexGroup', # )
493	'BlockLiteral', # } that matches &{ echo hi }
494	])
495
496	spec.AddKind('ExtGlob', ['Comma', 'At', 'Star', 'Plus', 'QMark', 'Bang'])
497
498	# First position of var sub ${
499	# Id.VOp2_Pound -- however you can't tell the difference at first! It could
500	# be an op or a name. So it makes sense to base i on the state.
501	# Id.VOp2_At
502	# But then you have AS_STAR, or Id.Arith_Star maybe
503
504	spec.AddKind(
505	'VSub',
506	[
507	'DollarName', # $foo
508	'Name', # 'foo' in ${foo}
509	'Number', # $0 .. $9
510	'Bang', # $!
511	'At', # $@ or [@] for array subscripting
512	'Pound', # $# or ${#var} for length
513	'Dollar', # $$
514	'Star', # $*
515	'Hyphen', # $-
516	'QMark', # $?
517	'Dot', # ${.myproc builtin sub}
518	])
519
520	spec.AddKindPairs('VTest', [
521	('ColonHyphen', ':-'),
522	('Hyphen', '-'),
523	('ColonEquals', ':='),
524	('Equals', '='),
525	('ColonQMark', ':?'),
526	('QMark', '?'),
527	('ColonPlus', ':+'),
528	('Plus', '+'),
529	])
530
531	# Statically parse @P, so @x etc. is an error.
532	spec.AddKindPairs(
533	'VOp0',
534	[
535	('Q', '@Q'), # ${x@Q} for quoting
536	('E', '@E'),
537	('P', '@P'), # ${PS1@P} for prompt eval
538	('A', '@A'),
539	('a', '@a'),
540	])
541
542	# String removal ops
543	spec.AddKindPairs(
544	'VOp1',
545	[
546	('Percent', '%'),
547	('DPercent', '%%'),
548	('Pound', '#'),
549	('DPound', '##'),
550	# Case ops, in bash. At least parse them. Execution might require
551	# unicode stuff.
552	('Caret', '^'),
553	('DCaret', '^^'),
554	('Comma', ','),
555	('DComma', ',,'),
556	])
557
558	spec.AddKindPairs(
559	'VOpYsh',
560	[
561	('Pipe', '\|'), # ${x\|html}
562	('Space', ' '), # ${x %.3f}
563	])
564
565	# Not in POSIX, but in Bash
566	spec.AddKindPairs(
567	'VOp2',
568	[
569	('Slash', '/'), # / for replacement
570	('Colon', ':'), # : for slicing
571	('LBracket', '['), # [ for indexing
572	('RBracket', ']'), # ] for indexing
573	])
574
575	# Can only occur after ${!prefix@}
576	spec.AddKindPairs('VOp3', [
577	('At', '@'),
578	('Star', '*'),
579	])
580
581	# This kind is for Node types that are NOT tokens.
582	spec.AddKind(
583	'Node',
584	[
585	# Arithmetic nodes
586	'PostDPlus',
587	'PostDMinus', # Postfix inc/dec.
588	# Prefix inc/dec use Arith_DPlus/Arith_DMinus.
589	'UnaryPlus',
590	'UnaryMinus', # +1 and -1, to distinguish from infix.
591	# Actually we don't need this because we they
592	# will be under Expr1/Plus vs Expr2/Plus.
593	'NotIn',
594	'IsNot', # For YSH comparisons
595	])
596
597	# NOTE: Not doing AddKindPairs() here because oil will have a different set
598	# of keywords. It will probably have for/in/while/until/case/if/else/elif,
599	# and then func/proc.
600	spec.AddKind(
601	'KW',
602	[
603	'DLeftBracket',
604	'Bang',
605	'For',
606	'While',
607	'Until',
608	'Do',
609	'Done',
610	'In',
611	'Case',
612	'Esac',
613	'If',
614	'Fi',
615	'Then',
616	'Else',
617	'Elif',
618	'Function',
619	'Time',
620
621	# YSH keywords.
622	'Const',
623	'Var',
624	'SetVar',
625	'SetGlobal',
626	# later: Auto?
627	'Call',
628	'Proc',
629	'Typed',
630	'Func',
631
632	# builtins, NOT keywords: use, fork, wait, etc.
633	# Things that don't affect parsing shouldn't be keywords.
634	])
635
636	# Unlike bash, we parse control flow statically. They're not
637	# dynamically-resolved builtins.
638	spec.AddKind('ControlFlow', ['Break', 'Continue', 'Return', 'Exit'])
639
640	# Special Kind for lookahead in the lexer. It's never seen by anything else.
641	spec.AddKind('LookAhead', ['FuncParens'])
642
643	# For parsing globs and converting them to regexes.
644	spec.AddKind('Glob', [
645	'LBracket',
646	'RBracket',
647	'Star',
648	'QMark',
649	'Bang',
650	'Caret',
651	'EscapedChar',
652	'BadBackslash',
653	'CleanLiterals',
654	'OtherLiteral',
655	])
656
657	# For C-escaped strings.
658	spec.AddKind(
659	'Format',
660	[
661	'EscapedPercent',
662	'Percent', # starts another lexer mode
663	'Flag',
664	'Num',
665	'Dot',
666	'Type',
667	'Star',
668	'Time',
669	'Zero',
670	])
671
672	# For parsing prompt strings like PS1.
673	spec.AddKind('PS', [
674	'Subst',
675	'Octal3',
676	'LBrace',
677	'RBrace',
678	'Literals',
679	'BadBackslash',
680	])
681
682	spec.AddKind('Range', ['Int', 'Char', 'Dots', 'Other'])
683
684	spec.AddKind(
685	'J8',
686	[
687	'LBracket',
688	'RBracket',
689	'LBrace',
690	'RBrace',
691	'Comma',
692	'Colon',
693	'Null',
694	'Bool',
695	'Int', # Number
696	'Float', # Number
697
698	# High level tokens for "" b'' u''
699	# We don't distinguish them in the parser, because we recognize
700	# strings in the lexer.
701	'String',
702
703	# JSON8 and NIL8
704	'Identifier',
705	'Newline', # J8 Lines only, similar to Op_Newline
706	'Tab', # Reserved for TSV8
707
708	# NIL8 only
709	'LParen',
710	'RParen',
711	#'Symbol',
712	'Operator',
713	])
714
715	spec.AddKind('ShNumber', ['Dec', 'Hex', 'Oct', 'BaseN'])
716
717
718	# Shared between [[ and test/[.
719	_UNARY_STR_CHARS = 'zn' # -z -n
720	_UNARY_OTHER_CHARS = 'otvR' # -o is overloaded
721	_UNARY_PATH_CHARS = 'abcdefghkLprsSuwxOGN' # -a is overloaded
722
723	_BINARY_PATH = ['ef', 'nt', 'ot']
724	_BINARY_INT = ['eq', 'ne', 'gt', 'ge', 'lt', 'le']
725
726
727	def _Dash(strs):
728	# type: (List[str]) -> List[Tuple[str, str]]
729	# Gives a pair of (token name, string to match)
730	return [(s, '-' + s) for s in strs]
731
732
733	def AddBoolKinds(spec):
734	# type: (IdSpec) -> None
735	spec.AddBoolKind('BoolUnary', [
736	(bool_arg_type_e.Str, _Dash(list(_UNARY_STR_CHARS))),
737	(bool_arg_type_e.Other, _Dash(list(_UNARY_OTHER_CHARS))),
738	(bool_arg_type_e.Path, _Dash(list(_UNARY_PATH_CHARS))),
739	])
740
741	Id = spec.id_str2int
742
743	# test --true and test --false have no single letter flags. They need no
744	# lexing.
745	for long_flag in ('true', 'false'):
746	id_name = 'BoolUnary_%s' % long_flag
747	spec._AddId(id_name)
748	spec.AddBoolOp(Id[id_name], bool_arg_type_e.Str)
749
750	spec.AddBoolKind('BoolBinary', [
751	(bool_arg_type_e.Str, [
752	('GlobEqual', '='),
753	('GlobDEqual', '=='),
754	('GlobNEqual', '!='),
755	('EqualTilde', '=~'),
756	]),
757	(bool_arg_type_e.Path, _Dash(_BINARY_PATH)),
758	(bool_arg_type_e.Int, _Dash(_BINARY_INT)),
759	])
760
761	# logical, arity, arg_type
762	spec.AddBoolOp(Id['Op_DAmp'], bool_arg_type_e.Undefined)
763	spec.AddBoolOp(Id['Op_DPipe'], bool_arg_type_e.Undefined)
764	spec.AddBoolOp(Id['KW_Bang'], bool_arg_type_e.Undefined)
765
766	spec.AddBoolOp(Id['Op_Less'], bool_arg_type_e.Str)
767	spec.AddBoolOp(Id['Op_Great'], bool_arg_type_e.Str)
768
769
770	def SetupTestBuiltin(
771	id_spec, # type: IdSpec
772	unary_lookup, # type: Dict[str, int]
773	binary_lookup, # type: Dict[str, int]
774	other_lookup, # type: Dict[str, int]
775	):
776	# type: (...) -> None
777	"""Setup tokens for test/[.
778
779	Similar to _AddBoolKinds above. Differences:
780	- =~ doesn't exist
781	- && -> -a, \|\| -> -o
782	- ( ) -> Op_LParen (they don't appear above)
783	"""
784	Id = id_spec.id_str2int
785	Kind = id_spec.kind_str2int
786
787	for letter in _UNARY_STR_CHARS + _UNARY_OTHER_CHARS + _UNARY_PATH_CHARS:
788	id_name = 'BoolUnary_%s' % letter
789	unary_lookup['-' + letter] = Id[id_name]
790
791	for s in _BINARY_PATH + _BINARY_INT:
792	id_name = 'BoolBinary_%s' % s
793	binary_lookup['-' + s] = Id[id_name]
794
795	# Like the [[ definition above, but without globbing and without =~ .
796
797	for id_name, token_str in [('Equal', '='), ('DEqual', '=='),
798	('NEqual', '!=')]:
799	id_int = id_spec.AddBoolBinaryForBuiltin(id_name, Kind['BoolBinary'])
800
801	binary_lookup[token_str] = id_int
802
803	# Some of these names don't quite match, but it keeps the BoolParser simple.
804	binary_lookup['<'] = Id['Op_Less']
805	binary_lookup['>'] = Id['Op_Great']
806
807	# NOTE: -a and -o overloaded as unary prefix operators BoolUnary_a and
808	# BoolUnary_o. The parser rather than the tokenizer handles this.
809	other_lookup['!'] = Id['KW_Bang'] # like [[ !
810	other_lookup['('] = Id['Op_LParen']
811	other_lookup[')'] = Id['Op_RParen']
812
813	other_lookup[']'] = Id['Arith_RBracket'] # For closing ]