OILS / frontend / consts.py View on Github | oils.pub

409 lines, 197 significant
1#!/usr/bin/env python2
2"""Consts.py."""
3from __future__ import print_function
4
5from _devbuild.gen.types_asdl import (redir_arg_type_e, redir_arg_type_t,
6 bool_arg_type_t, opt_group_i)
7from _devbuild.gen.id_kind_asdl import Id, Id_t, Kind_t
8from frontend import builtin_def
9from frontend import lexer_def
10from frontend import option_def
11
12from typing import Tuple, Optional, TYPE_CHECKING
13if TYPE_CHECKING:
14 from _devbuild.gen.option_asdl import option_t, builtin_t
15
16NO_INDEX = 0 # for Resolve
17
18# Used as consts::STRICT_ALL, etc. Do it explicitly to satisfy MyPy.
19STRICT_ALL = option_def.STRICT_ALL
20YSH_UPGRADE = option_def.YSH_UPGRADE
21YSH_ALL = option_def.YSH_ALL
22DEFAULT_TRUE = option_def.DEFAULT_TRUE
23
24PARSE_OPTION_NUMS = option_def.PARSE_OPTION_NUMS
25
26SET_OPTION_NUMS = [
27 opt.index for opt in option_def._SORTED if opt.builtin == 'set'
28]
29SET_OPTION_NAMES = [
30 opt.name for opt in option_def._SORTED if opt.builtin == 'set'
31]
32
33SHOPT_OPTION_NUMS = [
34 opt.index for opt in option_def._SORTED if opt.builtin == 'shopt'
35]
36SHOPT_OPTION_NAMES = [
37 opt.name for opt in option_def._SORTED if opt.builtin == 'shopt'
38]
39
40VISIBLE_SHOPT_NUMS = option_def.VISIBLE_SHOPT_NUMS # used to print
41
42BUILTIN_NAMES = builtin_def.BUILTIN_NAMES # Used by builtin_comp.py
43
44# Keywords for introspection with bash 'compgen' and 'type'
45OSH_KEYWORD_NAMES = [name for _, name, _ in lexer_def.KEYWORDS]
46OSH_KEYWORD_NAMES.append('{') # not handled by our lexer
47OSH_KEYWORD_NAMES.append('=') # YSH keyword not handled by our lexer
48
49# bash considers these closing delimiters keywords
50OSH_KEYWORD_NAMES.append('}')
51OSH_KEYWORD_NAMES.append(']]')
52
53
54def GetKind(id_):
55 # type: (Id_t) -> Kind_t
56 """To make coarse-grained parsing decisions."""
57
58 from _devbuild.gen.id_kind import ID_TO_KIND # break circular dep
59 return ID_TO_KIND[id_]
60
61
62def BoolArgType(id_):
63 # type: (Id_t) -> bool_arg_type_t
64
65 from _devbuild.gen.id_kind import BOOL_ARG_TYPES # break circular dep
66 return BOOL_ARG_TYPES[id_]
67
68
69#
70# Redirect Tables associated with IDs
71#
72
73REDIR_DEFAULT_FD = {
74 # filename
75 Id.Redir_Less: 0, # cat <input.txt means cat 0<input.txt
76 Id.Redir_Great: 1,
77 Id.Redir_DGreat: 1,
78 Id.Redir_Clobber: 1,
79 Id.Redir_LessGreat: 0, # 'exec <> foo' opens a file with read/write
80 # bash &> and &>>
81 Id.Redir_AndGreat: 1,
82 Id.Redir_AndDGreat: 1,
83
84 # descriptor
85 Id.Redir_GreatAnd: 1, # echo >&2 means echo 1>&2
86 Id.Redir_LessAnd: 0, # echo <&3 means echo 0<&3, I think
87 Id.Redir_TLess: 0, # here word
88
89 # here docs included
90 Id.Redir_DLess: 0,
91 Id.Redir_DLessDash: 0,
92}
93
94REDIR_ARG_TYPES = {
95 # filename
96 Id.Redir_Less: redir_arg_type_e.Path,
97 Id.Redir_Great: redir_arg_type_e.Path,
98 Id.Redir_DGreat: redir_arg_type_e.Path,
99 Id.Redir_Clobber: redir_arg_type_e.Path,
100 Id.Redir_LessGreat: redir_arg_type_e.Path,
101 # bash &> and &>>
102 Id.Redir_AndGreat: redir_arg_type_e.Path,
103 Id.Redir_AndDGreat: redir_arg_type_e.Path,
104
105 # descriptor
106 Id.Redir_GreatAnd: redir_arg_type_e.Desc,
107 Id.Redir_LessAnd: redir_arg_type_e.Desc,
108
109 # Note: here docs aren't included
110}
111
112
113def RedirArgType(id_):
114 # type: (Id_t) -> redir_arg_type_t
115 return REDIR_ARG_TYPES[id_]
116
117
118def RedirDefaultFd(id_):
119 # type: (Id_t) -> int
120 return REDIR_DEFAULT_FD[id_]
121
122
123#
124# Builtins
125#
126
127_BUILTIN_DICT = builtin_def.BuiltinDict()
128
129
130def LookupSpecialBuiltin(argv0):
131 # type: (str) -> builtin_t
132 """Is it a special builtin?"""
133 b = _BUILTIN_DICT.get(argv0)
134 if b and b.kind == 'special':
135 return b.index
136 else:
137 return NO_INDEX
138
139
140def LookupAssignBuiltin(argv0):
141 # type: (str) -> builtin_t
142 """Is it an assignment builtin?"""
143 b = _BUILTIN_DICT.get(argv0)
144 if b and b.kind == 'assign':
145 return b.index
146 else:
147 return NO_INDEX
148
149
150def LookupPrivateBuiltin(argv0):
151 # type: (str) -> builtin_t
152 """Is it a private builtin?"""
153 b = _BUILTIN_DICT.get(argv0)
154 if b and b.kind == 'private':
155 return b.index
156 else:
157 return NO_INDEX
158
159
160def LookupNormalBuiltin(argv0):
161 # type: (str) -> builtin_t
162 """Is it any other builtin?"""
163 b = _BUILTIN_DICT.get(argv0)
164 if b and b.kind == 'normal':
165 return b.index
166 else:
167 return NO_INDEX
168
169
170def OptionName(opt_num):
171 # type: (option_t) -> str
172 """Get the name from an index."""
173 return option_def.OPTION_NAMES[opt_num]
174
175
176OPTION_GROUPS = {
177 'strict:all': opt_group_i.StrictAll,
178 'ysh:upgrade': opt_group_i.YshUpgrade,
179 'ysh:all': opt_group_i.YshAll,
180
181 # Aliases to deprecate
182 'oil:upgrade': opt_group_i.YshUpgrade,
183 'oil:all': opt_group_i.YshAll,
184}
185
186
187def OptionGroupNum(s):
188 # type: (str) -> int
189 return OPTION_GROUPS.get(s, NO_INDEX) # 0 for not found
190
191
192_OPTION_DICT = option_def.OptionDict()
193
194
195def OptionNum(s):
196 # type: (str) -> int
197 """
198 Only considers implemented options.
199 """
200 pair = _OPTION_DICT.get(s)
201 if pair is None:
202 return 0
203 num, impl = pair
204 return num if impl else 0 # 0 means not found
205
206
207def UnimplOptionNum(s):
208 # type: (str) -> int
209 pair = _OPTION_DICT.get(s)
210 if pair is None:
211 return 0
212 num, impl = pair
213 return 0 if impl else num # 0 means not found
214
215
216_CONTROL_FLOW_NAMES = [name for _, name, _ in lexer_def.CONTROL_FLOW]
217_CONTROL_FLOW_LOOKUP = {}
218for _, name, id_ in lexer_def.CONTROL_FLOW:
219 _CONTROL_FLOW_LOOKUP[id_] = name
220
221
222def ControlFlowName(id_):
223 # type: (int) -> str
224 """For tracing"""
225 return _CONTROL_FLOW_LOOKUP[id_]
226
227
228def IsControlFlow(name):
229 # type: (str) -> bool
230 return name in _CONTROL_FLOW_NAMES
231
232
233def IsKeyword(name):
234 # type: (str) -> bool
235 return name in OSH_KEYWORD_NAMES
236
237
238#
239# osh/prompt.py and osh/word_compile.py
240#
241
242_ONE_CHAR_C = {
243 '0': '\0',
244 'a': '\a',
245 'b': '\b',
246 'e': '\x1b',
247 'E': '\x1b',
248 'f': '\f',
249 'n': '\n',
250 'r': '\r',
251 't': '\t',
252 'v': '\v',
253 '\\': '\\',
254 "'": "'", # for $'' only, not echo -e
255 '"': '"', # not sure why this is escaped within $''
256 '/': '/', # for JSON \/ only
257}
258
259
260def LookupCharC(c):
261 # type: (str) -> str
262 """Fatal if not present."""
263 return _ONE_CHAR_C[c]
264
265
266# NOTE: Prompts chars and printf are inconsistent, e.g. \E is \e in printf, but
267# not in PS1.
268_ONE_CHAR_PROMPT = {
269 'a': '\a',
270 'e': '\x1b',
271 'r': '\r',
272 'n': '\n',
273 '\\': '\\',
274}
275
276
277def LookupCharPrompt(c):
278 # type: (str) -> Optional[str]
279 """Returns None if not present."""
280 return _ONE_CHAR_PROMPT.get(c)
281
282
283#
284# Constants used by osh/split.py
285#
286
287# IFS splitting is complicated in general. We handle it with three concepts:
288#
289# - CH.* - Kinds of characters (edge labels)
290# - ST.* - States (node labels)
291# - EMIT.* Actions
292#
293# The Split() loop below classifies characters, follows state transitions, and
294# emits spans. A span is a (ignored Bool, end_index Int) pair.
295
296# As an example, consider this string:
297# 'a _ b'
298#
299# The character classes are:
300#
301# a ' ' _ ' ' b
302# Black DE_White DE_Gray DE_White Black
303#
304# The states are:
305#
306# a ' ' _ ' ' b
307# Black DE_White1 DE_Gray DE_White2 Black
308#
309# DE_White2 is whitespace that follows a "gray" non-whitespace IFS character.
310#
311# The spans emitted are:
312#
313# (part 'a', ignored ' _ ', part 'b')
314
315# SplitForRead() will check if the last two spans are a \ and \\n. Easy.
316
317# Shorter names for state machine enums
318from _devbuild.gen.runtime_asdl import state_t, emit_t, char_kind_t
319from _devbuild.gen.runtime_asdl import emit_i as EMIT
320from _devbuild.gen.runtime_asdl import char_kind_i as CH
321from _devbuild.gen.runtime_asdl import state_i as ST
322
323_IFS_EDGES = {
324 # Whitespace should have been stripped
325 (ST.Start, CH.DE_White): (ST.Invalid, EMIT.Nothing), # ' '
326 (ST.Start, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '_'
327 (ST.Start, CH.Black): (ST.Black, EMIT.Nothing), # 'a'
328 (ST.Start, CH.Backslash): (ST.Backslash, EMIT.Nothing), # '\'
329 (ST.Start, CH.Sentinel): (ST.Done, EMIT.Nothing), # ''
330 (ST.DE_White1, CH.DE_White): (ST.DE_White1, EMIT.Nothing), # ' '
331 (ST.DE_White1, CH.DE_Gray): (ST.DE_Gray, EMIT.Nothing), # ' _'
332 (ST.DE_White1, CH.Black): (ST.Black, EMIT.Delim), # ' a'
333 (ST.DE_White1, CH.Backslash): (ST.Backslash, EMIT.Delim), # ' \'
334 # Ignore trailing IFS whitespace too. This is necessary for the case:
335 # IFS=':' ; read x y z <<< 'a : b : c :'.
336 (ST.DE_White1, CH.Sentinel): (ST.Done, EMIT.Nothing), # 'zz '
337 (ST.DE_Gray, CH.DE_White): (ST.DE_White2, EMIT.Nothing), # '_ '
338 (ST.DE_Gray, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '__'
339 (ST.DE_Gray, CH.Black): (ST.Black, EMIT.Delim), # '_a'
340 (ST.DE_Gray, CH.Backslash): (ST.Black, EMIT.Delim), # '_\'
341 (ST.DE_Gray, CH.Sentinel): (ST.Done, EMIT.Delim), # 'zz:' IFS=': '
342 (ST.DE_White2, CH.DE_White): (ST.DE_White2, EMIT.Nothing), # '_ '
343 (ST.DE_White2, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '_ _'
344 (ST.DE_White2, CH.Black): (ST.Black, EMIT.Delim), # '_ a'
345 (ST.DE_White2, CH.Backslash): (ST.Backslash, EMIT.Delim), # '_ \'
346 (ST.DE_White2, CH.Sentinel): (ST.Done, EMIT.Delim), # 'zz: ' IFS=': '
347 (ST.Black, CH.DE_White): (ST.DE_White1, EMIT.Part), # 'a '
348 (ST.Black, CH.DE_Gray): (ST.DE_Gray, EMIT.Part), # 'a_'
349 (ST.Black, CH.Black): (ST.Black, EMIT.Nothing), # 'aa'
350 (ST.Black, CH.Backslash): (ST.Backslash, EMIT.Part), # 'a\'
351 (ST.Black, CH.Sentinel): (ST.Done, EMIT.Part), # 'zz' IFS=': '
352
353 # Here we emit an ignored \ and the second character as well.
354 # We're emitting TWO spans here; we don't wait until the subsequent
355 # character. That is OK.
356 #
357 # Problem: if '\ ' is the last one, we don't want to emit a trailing span?
358 # In all other cases we do.
359 (ST.Backslash, CH.DE_White): (ST.Black, EMIT.Escape), # '\ '
360 (ST.Backslash, CH.DE_Gray): (ST.Black, EMIT.Escape), # '\_'
361 (ST.Backslash, CH.Black): (ST.Black, EMIT.Escape), # '\a'
362 # NOTE: second character is a backslash, but new state is ST.Black!
363 (ST.Backslash, CH.Backslash): (ST.Black, EMIT.Escape), # '\\'
364 (ST.Backslash, CH.Sentinel): (ST.Done, EMIT.Escape), # 'zz\'
365}
366
367
368def IfsEdge(state, ch):
369 # type: (state_t, char_kind_t) -> Tuple[state_t, emit_t]
370 """Follow edges of the IFS state machine."""
371 return _IFS_EDGES[state, ch]
372
373
374# Pattern to use libc regexec() to parse NAME, NAME=value, and NAME+=value.
375#
376# We want submatch extraction, which would need a new type of binding, and
377# doing it with libc seems easy enough.
378
379ASSIGN_ARG_RE = '^(' + lexer_def.VAR_NAME_RE + r')((=|\+=)(.*))?$'
380
381# Eggex equivalent:
382#
383# VarName = /
384# [a-z A-Z _ ]
385# [a-z A-Z 0-9 _ ]*
386# /
387#
388# SplitArg = /
389# %begin
390# <capture VarName>
391# (
392# <capture '=' | '+='> <capture dot*>
393# )?
394# %end
395
396# Weird rules for brackets: put ] first
397NOT_BRACKETS = '[^][]*'
398TEST_V_RE = '^(' + lexer_def.VAR_NAME_RE + r')(\[(' + NOT_BRACKETS + ')\])?$'
399
400# NotBracket = / ![ ']' '[' ] /
401#
402# TestV = /
403# %begin
404# <capture VarName>
405# (
406# '[' <capture NotBrackets> ']'
407# )?
408# %end
409# /