1 | """
|
2 | word.py - Utility functions for words, e.g. treating them as "tokens".
|
3 | """
|
4 |
|
5 | from _devbuild.gen.id_kind_asdl import Id, Kind, Id_t, Kind_t
|
6 | from _devbuild.gen.syntax_asdl import (
|
7 | Token,
|
8 | CompoundWord,
|
9 | DoubleQuoted,
|
10 | SingleQuoted,
|
11 | word,
|
12 | word_e,
|
13 | word_t,
|
14 | word_str,
|
15 | word_part,
|
16 | word_part_t,
|
17 | word_part_e,
|
18 | AssocPair,
|
19 | )
|
20 | from frontend import consts
|
21 | from frontend import lexer
|
22 | from mycpp import mylib
|
23 | from mycpp.mylib import tagswitch, log
|
24 |
|
25 | from typing import Tuple, Optional, List, Any, cast, TYPE_CHECKING
|
26 | if TYPE_CHECKING:
|
27 | from osh.word_parse import WordParser
|
28 |
|
29 | _ = log
|
30 |
|
31 |
|
32 | def LiteralId(part):
|
33 | # type: (word_part_t) -> Id_t
|
34 | """If the WordPart consists of a single literal token, return its Id.
|
35 |
|
36 | Used for Id.KW_For, or Id.RBrace, etc.
|
37 | """
|
38 | if part.tag() != word_part_e.Literal:
|
39 | return Id.Undefined_Tok # unequal to any other Id
|
40 |
|
41 | return cast(Token, part).id
|
42 |
|
43 |
|
44 | def CheckLiteralId(part, tok_id):
|
45 | # type: (word_part_t, Id_t) -> Optional[Token]
|
46 | """If the WordPart is a Token of a given Id, return the Token."""
|
47 | if part.tag() != word_part_e.Literal:
|
48 | return None
|
49 |
|
50 | tok = cast(Token, part)
|
51 | if tok.id == tok_id:
|
52 | return tok
|
53 |
|
54 | return None
|
55 |
|
56 |
|
57 | def LiteralToken(UP_w):
|
58 | # type: (word_t) -> Optional[Token]
|
59 | """If a word consists of a literal token, return it.
|
60 |
|
61 | Otherwise return None.
|
62 | """
|
63 | # We're casting here because this function is called by the CommandParser for
|
64 | # var, setvar, '...', etc. It's easier to cast in one place.
|
65 | assert UP_w.tag() == word_e.Compound, UP_w
|
66 | w = cast(CompoundWord, UP_w)
|
67 |
|
68 | if len(w.parts) != 1:
|
69 | return None
|
70 |
|
71 | part0 = w.parts[0]
|
72 | if part0.tag() != word_part_e.Literal:
|
73 | return None
|
74 |
|
75 | return cast(Token, part0)
|
76 |
|
77 |
|
78 | def _EvalWordPart(part):
|
79 | # type: (word_part_t) -> Tuple[bool, str, bool]
|
80 | """Evaluate a WordPart at PARSE TIME.
|
81 |
|
82 | Used for:
|
83 |
|
84 | 1. here doc delimiters
|
85 | 2. function names
|
86 | 3. for loop variable names
|
87 | 4. Compiling constant regex words at parse time
|
88 | 5. a special case for ${a////c} to see if we got a leading slash in the
|
89 | pattern.
|
90 |
|
91 | Returns:
|
92 | 3-tuple of
|
93 | ok: bool, success. If there are parts that can't be statically
|
94 | evaluated, then we return false.
|
95 | value: a string (not Value)
|
96 | quoted: whether any part of the word was quoted
|
97 | """
|
98 | UP_part = part
|
99 | with tagswitch(part) as case:
|
100 | if case(word_part_e.Literal):
|
101 | tok = cast(Token, UP_part)
|
102 | # Weird performance issue: if we change this to lexer.LazyStr(),
|
103 | # the parser slows down, e.g. on configure-coreutils from 805 B
|
104 | # irefs to ~830 B. The real issue is that we should avoid calling
|
105 | # this from CommandParser - for the Hay node.
|
106 | return True, lexer.TokenVal(tok), False
|
107 | #return True, lexer.LazyStr(tok), False
|
108 |
|
109 | elif case(word_part_e.EscapedLiteral):
|
110 | part = cast(word_part.EscapedLiteral, UP_part)
|
111 | if mylib.PYTHON:
|
112 | val = lexer.TokenVal(part.token)
|
113 | assert len(val) == 2, val # e.g. \*
|
114 | assert val[0] == '\\'
|
115 | s = lexer.TokenSliceLeft(part.token, 1)
|
116 | return True, s, True
|
117 |
|
118 | elif case(word_part_e.SingleQuoted):
|
119 | part = cast(SingleQuoted, UP_part)
|
120 | return True, part.sval, True
|
121 |
|
122 | elif case(word_part_e.DoubleQuoted):
|
123 | part = cast(DoubleQuoted, UP_part)
|
124 | strs = [] # type: List[str]
|
125 | for p in part.parts:
|
126 | ok, s, _ = _EvalWordPart(p)
|
127 | if not ok:
|
128 | return False, '', True
|
129 | strs.append(s)
|
130 |
|
131 | return True, ''.join(strs), True # At least one part was quoted!
|
132 |
|
133 | elif case(word_part_e.YshArrayLiteral, word_part_e.InitializerLiteral,
|
134 | word_part_e.ZshVarSub, word_part_e.CommandSub,
|
135 | word_part_e.SimpleVarSub, word_part_e.BracedVarSub,
|
136 | word_part_e.TildeSub, word_part_e.ArithSub,
|
137 | word_part_e.ExtGlob, word_part_e.Splice,
|
138 | word_part_e.ExprSub):
|
139 | return False, '', False
|
140 |
|
141 | else:
|
142 | raise AssertionError(part.tag())
|
143 |
|
144 |
|
145 | def FastStrEval(w):
|
146 | # type: (CompoundWord) -> Optional[str]
|
147 | """
|
148 | Detects common case
|
149 |
|
150 | (1) CompoundWord([LiteralPart(Id.LitChars)])
|
151 | For echo -e, test x -lt 0, etc.
|
152 | (2) single quoted word like 'foo'
|
153 |
|
154 | Other patterns we could detect are:
|
155 | (1) "foo"
|
156 | (2) "$var" and "${var}" - I think these are very common in OSH code (but not YSH)
|
157 | - I think val_ops.Stringify() can handle all the errors
|
158 | """
|
159 | if len(w.parts) != 1:
|
160 | return None
|
161 |
|
162 | part0 = w.parts[0]
|
163 | UP_part0 = part0
|
164 | with tagswitch(part0) as case:
|
165 | if case(word_part_e.Literal):
|
166 | part0 = cast(Token, UP_part0)
|
167 |
|
168 | if part0.id in (Id.Lit_Chars, Id.Lit_LBracket, Id.Lit_RBracket):
|
169 | # Could add more tokens in this case
|
170 | # e.g. + is Lit_Other, and it's a Token in 'expr'
|
171 | # Right now it's Lit_Chars (e.g. ls -l) and [ and ] because I
|
172 | # know those are common
|
173 | # { } are not as common
|
174 | return lexer.LazyStr(part0)
|
175 |
|
176 | else:
|
177 | # e.g. Id.Lit_Star needs to be glob expanded
|
178 | # TODO: Consider moving Id.Lit_Star etc. to Kind.MaybeGlob?
|
179 | return None
|
180 |
|
181 | elif case(word_part_e.SingleQuoted):
|
182 | part0 = cast(SingleQuoted, UP_part0)
|
183 | # TODO: SingleQuoted should have lazy (str? sval) field
|
184 | # This would only affect multi-line strings though?
|
185 | return part0.sval
|
186 |
|
187 | else:
|
188 | # e.g. DoubleQuoted can't be optimized to a string, because it
|
189 | # might have "$@" and such
|
190 | return None
|
191 |
|
192 |
|
193 | def StaticEval(UP_w):
|
194 | # type: (word_t) -> Tuple[bool, str, bool]
|
195 | """Evaluate a Compound at PARSE TIME."""
|
196 | quoted = False
|
197 |
|
198 | # e.g. for ( instead of for (( is a token word
|
199 | if UP_w.tag() != word_e.Compound:
|
200 | return False, '', quoted
|
201 |
|
202 | w = cast(CompoundWord, UP_w)
|
203 |
|
204 | strs = [] # type: List[str]
|
205 | for part in w.parts:
|
206 | ok, s, q = _EvalWordPart(part)
|
207 | if not ok:
|
208 | return False, '', quoted
|
209 | if q:
|
210 | quoted = True # at least one part was quoted
|
211 | strs.append(s)
|
212 | #log('StaticEval parts %s', w.parts)
|
213 | return True, ''.join(strs), quoted
|
214 |
|
215 |
|
216 | # From bash, general.c, unquoted_tilde_word():
|
217 | # POSIX.2, 3.6.1: A tilde-prefix consists of an unquoted tilde character at
|
218 | # the beginning of the word, followed by all of the characters preceding the
|
219 | # first unquoted slash in the word, or all the characters in the word if there
|
220 | # is no slash...If none of the characters in the tilde-prefix are quoted, the
|
221 | # characters in the tilde-prefix following the tilde shell be treated as a
|
222 | # possible login name.
|
223 | #define TILDE_END(c) ((c) == '\0' || (c) == '/' || (c) == ':')
|
224 | #
|
225 | # So an unquoted tilde can ALWAYS start a new lex mode? You respect quotes and
|
226 | # substitutions.
|
227 | #
|
228 | # We only detect ~Lit_Chars and split. So we might as well just write a regex.
|
229 |
|
230 |
|
231 | def TildeDetect(UP_w):
|
232 | # type: (word_t) -> Optional[CompoundWord]
|
233 | """Detect tilde expansion in a word.
|
234 |
|
235 | It might begin with Literal that needs to be turned into a TildeSub.
|
236 | (It depends on whether the second token begins with slash).
|
237 |
|
238 | If so, it return a new word. Otherwise return None.
|
239 |
|
240 | NOTE:
|
241 | - The regex for Lit_TildeLike could be expanded. Right now it's
|
242 | conservative, like Lit_Chars without the /.
|
243 | - It's possible to write this in a mutating style, since only the first token
|
244 | is changed. But note that we CANNOT know this during lexing.
|
245 | """
|
246 | # BracedTree can't be tilde expanded
|
247 | if UP_w.tag() != word_e.Compound:
|
248 | return None
|
249 |
|
250 | w = cast(CompoundWord, UP_w)
|
251 | return TildeDetect2(w)
|
252 |
|
253 |
|
254 | def TildeDetect2(w):
|
255 | # type: (CompoundWord) -> Optional[CompoundWord]
|
256 | """If tilde sub is detected, returns a new CompoundWord.
|
257 |
|
258 | Accepts CompoundWord, not word_t. After brace expansion, we know we have a
|
259 | List[CompoundWord].
|
260 |
|
261 | Tilde detection:
|
262 |
|
263 | YES:
|
264 | ~ ~/
|
265 | ~bob ~bob/
|
266 |
|
267 | NO:
|
268 | ~bob# ~bob#/
|
269 | ~bob$x
|
270 | ~$x
|
271 |
|
272 | Pattern to match (all must be word_part_e.Literal):
|
273 |
|
274 | Lit_Tilde Lit_Chars? (Lit_Slash | %end)
|
275 | """
|
276 | if len(w.parts) == 0: # ${a-} has no parts
|
277 | return None
|
278 |
|
279 | tok0 = CheckLiteralId(w.parts[0], Id.Lit_Tilde)
|
280 | if tok0 is None:
|
281 | return None
|
282 |
|
283 | new_parts = [] # type: List[word_part_t]
|
284 |
|
285 | if len(w.parts) == 1: # ~
|
286 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
287 | return CompoundWord(new_parts)
|
288 |
|
289 | id1 = LiteralId(w.parts[1])
|
290 | if id1 == Id.Lit_Slash: # ~/
|
291 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
292 | new_parts.extend(w.parts[1:])
|
293 | return CompoundWord(new_parts)
|
294 |
|
295 | if id1 != Id.Lit_Chars:
|
296 | return None # ~$x is not TildeSub
|
297 |
|
298 | tok1 = cast(Token, w.parts[1])
|
299 |
|
300 | if len(w.parts) == 2: # ~foo
|
301 | new_parts.append(word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
302 | return CompoundWord(new_parts)
|
303 |
|
304 | id2 = LiteralId(w.parts[2])
|
305 | if id2 != Id.Lit_Slash: # ~foo$x is not TildeSub
|
306 | return None
|
307 |
|
308 | new_parts.append(word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
309 | new_parts.extend(w.parts[2:])
|
310 | return CompoundWord(new_parts)
|
311 |
|
312 |
|
313 | def TildeDetectAssign(w):
|
314 | # type: (CompoundWord) -> None
|
315 | """Detects multiple tilde sub, like a=~:~/src:~bob
|
316 |
|
317 | MUTATES its argument.
|
318 |
|
319 | Pattern for to match (all must be word_part_e.Literal):
|
320 |
|
321 | Lit_Tilde Lit_Chars? (Lit_Slash | Lit_Colon | %end)
|
322 | """
|
323 | parts = w.parts
|
324 |
|
325 | # Bail out EARLY if there are no ~ at all
|
326 | has_tilde = False
|
327 | for part in parts:
|
328 | if LiteralId(part) == Id.Lit_Tilde:
|
329 | has_tilde = True
|
330 | break
|
331 | if not has_tilde:
|
332 | return # Avoid further work and allocations
|
333 |
|
334 | # Avoid IndexError, since we have to look ahead up to 2 tokens
|
335 | parts.append(None)
|
336 | parts.append(None)
|
337 |
|
338 | new_parts = [] # type: List[word_part_t]
|
339 |
|
340 | tilde_could_be_next = True # true at first, and true after :
|
341 |
|
342 | i = 0
|
343 | n = len(parts)
|
344 |
|
345 | while i < n:
|
346 | part0 = parts[i]
|
347 | if part0 is None:
|
348 | break
|
349 |
|
350 | #log('i = %d', i)
|
351 | #log('part0 %s', part0)
|
352 |
|
353 | # Skip tilde in middle of word, like a=foo~bar
|
354 | if tilde_could_be_next and LiteralId(part0) == Id.Lit_Tilde:
|
355 | # If ~ ends the string, we have
|
356 | part1 = parts[i + 1]
|
357 | part2 = parts[i + 2]
|
358 |
|
359 | tok0 = cast(Token, part0)
|
360 |
|
361 | if part1 is None: # x=foo:~
|
362 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
363 | break # at end
|
364 |
|
365 | id1 = LiteralId(part1)
|
366 |
|
367 | if id1 in (Id.Lit_Slash, Id.Lit_Colon): # x=foo:~/ or x=foo:~:
|
368 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
369 | new_parts.append(part1)
|
370 | i += 2
|
371 | continue
|
372 |
|
373 | if id1 != Id.Lit_Chars:
|
374 | new_parts.append(part0) # unchanged
|
375 | new_parts.append(part1) # ...
|
376 | i += 2
|
377 | continue # x=foo:~$x is not tilde sub
|
378 |
|
379 | tok1 = cast(Token, part1)
|
380 |
|
381 | if part2 is None: # x=foo:~foo
|
382 | # consume both
|
383 | new_parts.append(
|
384 | word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
385 | break # at end
|
386 |
|
387 | id2 = LiteralId(part2)
|
388 | if id2 not in (Id.Lit_Slash, Id.Lit_Colon): # x=foo:~foo$x
|
389 | new_parts.append(part0) # unchanged
|
390 | new_parts.append(part1) # ...
|
391 | new_parts.append(part2) # ...
|
392 | i += 3
|
393 | continue
|
394 |
|
395 | new_parts.append(
|
396 | word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
397 | new_parts.append(part2)
|
398 | i += 3
|
399 |
|
400 | tilde_could_be_next = (id2 == Id.Lit_Colon)
|
401 |
|
402 | else:
|
403 | new_parts.append(part0)
|
404 | i += 1
|
405 |
|
406 | tilde_could_be_next = (LiteralId(part0) == Id.Lit_Colon)
|
407 |
|
408 | parts.pop()
|
409 | parts.pop()
|
410 |
|
411 | # Mutate argument
|
412 | w.parts = new_parts
|
413 |
|
414 |
|
415 | def TildeDetectAll(words):
|
416 | # type: (List[word_t]) -> List[word_t]
|
417 | out = [] # type: List[word_t]
|
418 | for w in words:
|
419 | t = TildeDetect(w)
|
420 | if t:
|
421 | out.append(t)
|
422 | else:
|
423 | out.append(w)
|
424 | return out
|
425 |
|
426 |
|
427 | def HasArrayPart(w):
|
428 | # type: (CompoundWord) -> bool
|
429 | """Used in cmd_parse."""
|
430 | for part in w.parts:
|
431 | if part.tag() == word_part_e.InitializerLiteral:
|
432 | return True
|
433 | return False
|
434 |
|
435 |
|
436 | def ShFunctionName(w):
|
437 | # type: (CompoundWord) -> str
|
438 | """Returns a valid shell function name, or the empty string.
|
439 |
|
440 | TODO: Maybe use this regex to validate:
|
441 |
|
442 | FUNCTION_NAME_RE = r'[^{}\[\]=]*'
|
443 |
|
444 | Bash is very lenient, but that would disallow confusing characters, for
|
445 | better error messages on a[x]=(), etc.
|
446 | """
|
447 | ok, s, quoted = StaticEval(w)
|
448 | # Function names should not have quotes
|
449 | if not ok or quoted:
|
450 | return ''
|
451 | return s
|
452 |
|
453 |
|
454 | def IsVarLike(w):
|
455 | # type: (CompoundWord) -> bool
|
456 | """Tests whether a word looks like FOO=bar.
|
457 |
|
458 | This is a quick test for the command parser to distinguish:
|
459 |
|
460 | func() { echo hi; }
|
461 | func=(1 2 3)
|
462 | """
|
463 | if len(w.parts) == 0:
|
464 | return False
|
465 |
|
466 | return LiteralId(w.parts[0]) == Id.Lit_VarLike
|
467 |
|
468 |
|
469 | def LooksLikeArithVar(UP_w):
|
470 | # type: (word_t) -> Optional[Token]
|
471 | """Return a token if this word looks like an arith var.
|
472 |
|
473 | NOTE: This can't be combined with DetectShAssignment because VarLike and
|
474 | ArithVarLike must be different tokens. Otherwise _ReadCompoundWord will be
|
475 | confused between array assignments foo=(1 2) and function calls foo(1, 2).
|
476 | """
|
477 | if UP_w.tag() != word_e.Compound:
|
478 | return None
|
479 |
|
480 | w = cast(CompoundWord, UP_w)
|
481 | if len(w.parts) != 1:
|
482 | return None
|
483 |
|
484 | return CheckLiteralId(w.parts[0], Id.Lit_ArithVarLike)
|
485 |
|
486 |
|
487 | def CheckLeadingEquals(w):
|
488 | # type: (CompoundWord) -> Optional[Token]
|
489 | """Test whether a word looks like =word
|
490 |
|
491 | For shopt --set strict_parse_equals
|
492 | """
|
493 | if len(w.parts) == 0:
|
494 | return None
|
495 |
|
496 | return CheckLiteralId(w.parts[0], Id.Lit_Equals)
|
497 |
|
498 |
|
499 | def DetectShAssignment(w):
|
500 | # type: (CompoundWord) -> Tuple[Optional[Token], Optional[Token], int]
|
501 | """Detects whether a word looks like FOO=bar or FOO[x]=bar.
|
502 |
|
503 | Returns:
|
504 | left_token or None # Lit_VarLike, Lit_ArrayLhsOpen, or None if it's not an
|
505 | # assignment
|
506 | close_token, # Lit_ArrayLhsClose if it was detected, or None
|
507 | part_offset # where to start the value word, 0 if not an assignment
|
508 |
|
509 | Cases:
|
510 |
|
511 | s=1
|
512 | s+=1
|
513 | s[x]=1
|
514 | s[x]+=1
|
515 |
|
516 | a=()
|
517 | a+=()
|
518 | a[x]=(
|
519 | a[x]+=() # We parse this (as bash does), but it's never valid because arrays
|
520 | # can't be nested.
|
521 | """
|
522 | no_token = None # type: Optional[Token]
|
523 |
|
524 | n = len(w.parts)
|
525 | if n == 0:
|
526 | return no_token, no_token, 0
|
527 |
|
528 | part0 = w.parts[0]
|
529 | if part0.tag() != word_part_e.Literal:
|
530 | return no_token, no_token, 0
|
531 |
|
532 | tok0 = cast(Token, part0)
|
533 |
|
534 | if tok0.id == Id.Lit_VarLike:
|
535 | return tok0, no_token, 1 # everything after first token is the value
|
536 |
|
537 | if tok0.id == Id.Lit_ArrayLhsOpen:
|
538 | # NOTE that a[]=x should be an error. We don't want to silently decay.
|
539 | if n < 2:
|
540 | return no_token, no_token, 0
|
541 | for i in xrange(1, n):
|
542 | part = w.parts[i]
|
543 | tok_close = CheckLiteralId(part, Id.Lit_ArrayLhsClose)
|
544 | if tok_close:
|
545 | return tok0, tok_close, i + 1
|
546 |
|
547 | # Nothing detected. Could be 'foobar' or a[x+1+2/' without the closing ].
|
548 | return no_token, no_token, 0
|
549 |
|
550 |
|
551 | def DetectAssocPair(w):
|
552 | # type: (CompoundWord) -> Optional[AssocPair]
|
553 | """Like DetectShAssignment, but for A=(['k']=v ['k2']=v)
|
554 |
|
555 | The key and the value are both strings. So we just pick out
|
556 | word_part. Unlike a[k]=v, A=([k]=v) is NOT ambiguous, because the
|
557 | [k] syntax is only used for associative array literals, as opposed
|
558 | to indexed array literals.
|
559 | """
|
560 | parts = w.parts
|
561 | if LiteralId(parts[0]) != Id.Lit_LBracket:
|
562 | return None
|
563 |
|
564 | n = len(parts)
|
565 | for i in xrange(n):
|
566 | id_ = LiteralId(parts[i])
|
567 | if id_ == Id.Lit_ArrayLhsClose: # ]=
|
568 | # e.g. if we have [$x$y]=$a$b
|
569 | key = CompoundWord(parts[1:i]) # $x$y
|
570 | value = CompoundWord(parts[i + 1:]) # $a$b from
|
571 |
|
572 | has_plus = lexer.IsPlusEquals(cast(Token, parts[i]))
|
573 |
|
574 | # Type-annotated intermediate value for mycpp translation
|
575 | return AssocPair(key, value, has_plus)
|
576 |
|
577 | return None
|
578 |
|
579 |
|
580 | def IsControlFlow(w):
|
581 | # type: (CompoundWord) -> Tuple[Kind_t, Optional[Token]]
|
582 | """Tests if a word is a control flow word."""
|
583 | no_token = None # type: Optional[Token]
|
584 |
|
585 | if len(w.parts) != 1:
|
586 | return Kind.Undefined, no_token
|
587 |
|
588 | UP_part0 = w.parts[0]
|
589 | token_type = LiteralId(UP_part0)
|
590 | if token_type == Id.Undefined_Tok:
|
591 | return Kind.Undefined, no_token
|
592 |
|
593 | token_kind = consts.GetKind(token_type)
|
594 | if token_kind == Kind.ControlFlow:
|
595 | return token_kind, cast(Token, UP_part0)
|
596 |
|
597 | return Kind.Undefined, no_token
|
598 |
|
599 |
|
600 | def BraceToken(UP_w):
|
601 | # type: (word_t) -> Optional[Token]
|
602 | """If a word has Id.Lit_LBrace or Lit_RBrace, return a Token.
|
603 |
|
604 | This is a special case for osh/cmd_parse.py
|
605 |
|
606 | The WordParser changes Id.Op_LBrace from ExprParser into Id.Lit_LBrace, so we
|
607 | may get a token, not a word.
|
608 | """
|
609 | with tagswitch(UP_w) as case:
|
610 | if case(word_e.Operator):
|
611 | tok = cast(Token, UP_w)
|
612 | assert tok.id in (Id.Lit_LBrace, Id.Lit_RBrace), tok
|
613 | return tok
|
614 |
|
615 | elif case(word_e.Compound):
|
616 | w = cast(CompoundWord, UP_w)
|
617 | return LiteralToken(w)
|
618 |
|
619 | else:
|
620 | raise AssertionError()
|
621 |
|
622 |
|
623 | def AsKeywordToken(UP_w):
|
624 | # type: (word_t) -> Token
|
625 | """
|
626 | Given a word that IS A CompoundWord containing just a keyword, return the
|
627 | single token at the start.
|
628 | """
|
629 | assert UP_w.tag() == word_e.Compound, UP_w
|
630 | w = cast(CompoundWord, UP_w)
|
631 |
|
632 | part = w.parts[0]
|
633 | assert part.tag() == word_part_e.Literal, part
|
634 | tok = cast(Token, part)
|
635 | assert consts.GetKind(tok.id) == Kind.KW, tok
|
636 | return tok
|
637 |
|
638 |
|
639 | def AsOperatorToken(word):
|
640 | # type: (word_t) -> Token
|
641 | """For a word that IS an operator (word.Token), return that token.
|
642 |
|
643 | This must only be called on a word which is known to be an operator
|
644 | (word.Token).
|
645 | """
|
646 | assert word.tag() == word_e.Operator, word
|
647 | return cast(Token, word)
|
648 |
|
649 |
|
650 | #
|
651 | # Polymorphic between Token and Compound
|
652 | #
|
653 |
|
654 |
|
655 | def ArithId(w):
|
656 | # type: (word_t) -> Id_t
|
657 | """Used by shell arithmetic parsing."""
|
658 | if w.tag() == word_e.Operator:
|
659 | tok = cast(Token, w)
|
660 | return tok.id
|
661 |
|
662 | assert isinstance(w, CompoundWord)
|
663 | return Id.Word_Compound
|
664 |
|
665 |
|
666 | def BoolId(w):
|
667 | # type: (word_t) -> Id_t
|
668 | UP_w = w
|
669 | with tagswitch(w) as case:
|
670 | if case(word_e.String): # for test/[
|
671 | w = cast(word.String, UP_w)
|
672 | return w.id
|
673 |
|
674 | elif case(word_e.Operator):
|
675 | tok = cast(Token, UP_w)
|
676 | return tok.id
|
677 |
|
678 | elif case(word_e.Compound):
|
679 | w = cast(CompoundWord, UP_w)
|
680 |
|
681 | if len(w.parts) != 1:
|
682 | return Id.Word_Compound
|
683 |
|
684 | token_type = LiteralId(w.parts[0])
|
685 | if token_type == Id.Undefined_Tok:
|
686 | return Id.Word_Compound # It's a regular word
|
687 |
|
688 | # This is outside the BoolUnary/BoolBinary namespace, but works the same.
|
689 | if token_type in (Id.KW_Bang, Id.Lit_DRightBracket):
|
690 | return token_type # special boolean "tokens"
|
691 |
|
692 | token_kind = consts.GetKind(token_type)
|
693 | if token_kind in (Kind.BoolUnary, Kind.BoolBinary):
|
694 | return token_type # boolean operators
|
695 |
|
696 | return Id.Word_Compound
|
697 |
|
698 | else:
|
699 | # I think Empty never happens in this context?
|
700 | raise AssertionError(w.tag())
|
701 |
|
702 |
|
703 | def CommandId(w):
|
704 | # type: (word_t) -> Id_t
|
705 | """Used by CommandParser."""
|
706 | UP_w = w
|
707 | with tagswitch(w) as case:
|
708 | if case(word_e.Operator):
|
709 | tok = cast(Token, UP_w)
|
710 | return tok.id
|
711 |
|
712 | elif case(word_e.Compound):
|
713 | w = cast(CompoundWord, UP_w)
|
714 |
|
715 | # Fine-grained categorization of SINGLE literal parts
|
716 | if len(w.parts) != 1:
|
717 | return Id.Word_Compound # generic word
|
718 |
|
719 | token_type = LiteralId(w.parts[0])
|
720 | if token_type == Id.Undefined_Tok:
|
721 | return Id.Word_Compound # Not Kind.Lit, generic word
|
722 |
|
723 | if token_type in (Id.Lit_LBrace, Id.Lit_RBrace, Id.Lit_Equals,
|
724 | Id.Lit_TDot):
|
725 | # - { } are for YSH braces
|
726 | # - = is for the = keyword
|
727 | # - ... is to start multiline mode
|
728 | #
|
729 | # TODO: Should we use Op_{LBrace,RBrace} and Kind.Op when
|
730 | # parse_brace? Lit_Equals could be KW_Equals?
|
731 | return token_type
|
732 |
|
733 | token_kind = consts.GetKind(token_type)
|
734 | if token_kind == Kind.KW:
|
735 | return token_type # Id.KW_Var, etc.
|
736 |
|
737 | return Id.Word_Compound # generic word
|
738 |
|
739 | else:
|
740 | raise AssertionError(w.tag())
|
741 |
|
742 |
|
743 | def CommandKind(w):
|
744 | # type: (word_t) -> Kind_t
|
745 | """The CommandKind is for coarse-grained decisions in the CommandParser.
|
746 |
|
747 | NOTE: This is inconsistent with CommandId(), because we never return
|
748 | Kind.KW or Kind.Lit. But the CommandParser is easier to write this way.
|
749 |
|
750 | For example, these are valid redirects to a Kind.Word, and the parser
|
751 | checks:
|
752 |
|
753 | echo hi > =
|
754 | echo hi > {
|
755 |
|
756 | Invalid:
|
757 | echo hi > (
|
758 | echo hi > ;
|
759 | """
|
760 | if w.tag() == word_e.Operator:
|
761 | tok = cast(Token, w)
|
762 | # CommandParser uses Kind.Redir, Kind.Op, Kind.Eof, etc.
|
763 | return consts.GetKind(tok.id)
|
764 |
|
765 | return Kind.Word
|
766 |
|
767 |
|
768 | # Stubs for converting RHS of assignment to expression mode.
|
769 | # For ysh_ify.py
|
770 | def IsVarSub(w):
|
771 | # type: (word_t) -> bool
|
772 | """Return whether it's any var sub, or a double quoted one."""
|
773 | return False
|
774 |
|
775 |
|
776 | # Doesn't translate with mycpp because of dynamic %
|
777 | def ErrorWord(error_str):
|
778 | # type: (str) -> CompoundWord
|
779 | t = lexer.DummyToken(Id.Lit_Chars, error_str)
|
780 | return CompoundWord([t])
|
781 |
|
782 |
|
783 | def Pretty(w):
|
784 | # type: (word_t) -> str
|
785 | """Return a string to display to the user."""
|
786 | UP_w = w
|
787 | if w.tag() == word_e.String:
|
788 | w = cast(word.String, UP_w)
|
789 | if w.id == Id.Eof_Real:
|
790 | return 'EOF'
|
791 | else:
|
792 | return repr(w.s)
|
793 | else:
|
794 | return word_str(w.tag()) # tag name
|
795 |
|
796 |
|
797 | class ctx_EmitDocToken(object):
|
798 | """For doc comments."""
|
799 |
|
800 | def __init__(self, w_parser):
|
801 | # type: (WordParser) -> None
|
802 | w_parser.EmitDocToken(True)
|
803 | self.w_parser = w_parser
|
804 |
|
805 | def __enter__(self):
|
806 | # type: () -> None
|
807 | pass
|
808 |
|
809 | def __exit__(self, type, value, traceback):
|
810 | # type: (Any, Any, Any) -> None
|
811 | self.w_parser.EmitDocToken(False)
|
812 |
|
813 |
|
814 | class ctx_Multiline(object):
|
815 | """For multiline commands."""
|
816 |
|
817 | def __init__(self, w_parser):
|
818 | # type: (WordParser) -> None
|
819 | w_parser.Multiline(True)
|
820 | self.w_parser = w_parser
|
821 |
|
822 | def __enter__(self):
|
823 | # type: () -> None
|
824 | pass
|
825 |
|
826 | def __exit__(self, type, value, traceback):
|
827 | # type: (Any, Any, Any) -> None
|
828 | self.w_parser.Multiline(False)
|