1 | from __future__ import print_function
|
2 |
|
3 | from errno import EINTR
|
4 |
|
5 | from _devbuild.gen import arg_types
|
6 | from _devbuild.gen.runtime_asdl import (span_e, cmd_value)
|
7 | from _devbuild.gen.syntax_asdl import source, loc_t
|
8 | from _devbuild.gen.value_asdl import value, LeftName
|
9 | from core import alloc
|
10 | from core import error
|
11 | from core.error import e_die
|
12 | from core import pyos
|
13 | from core import pyutil
|
14 | from core import state
|
15 | from display import ui
|
16 | from core import vm
|
17 | from frontend import flag_util
|
18 | from frontend import reader
|
19 | from frontend import typed_args
|
20 | from mycpp import mops
|
21 | from mycpp import mylib
|
22 | from mycpp.mylib import log, STDIN_FILENO
|
23 |
|
24 | import posix_ as posix
|
25 |
|
26 | from typing import Tuple, List, Any, TYPE_CHECKING
|
27 | if TYPE_CHECKING:
|
28 | from _devbuild.gen.runtime_asdl import span_t
|
29 | from frontend.parse_lib import ParseContext
|
30 | from frontend import args
|
31 | from osh.cmd_eval import CommandEvaluator
|
32 | from osh.split import SplitContext
|
33 |
|
34 | _ = log
|
35 |
|
36 | # The read builtin splits using IFS.
|
37 | #
|
38 | # Summary:
|
39 | # - Split with IFS, except \ can escape them! This is different than the
|
40 | # algorithm for splitting words (at least the way I've represented it.)
|
41 |
|
42 | # Bash manual:
|
43 | # - If there are more words than names, the remaining words and their
|
44 | # intervening delimiters are assigned to the last name.
|
45 | # - If there are fewer words read from the input stream than names, the
|
46 | # remaining names are assigned empty values.
|
47 | # - The characters in the value of the IFS variable are used to split the line
|
48 | # into words using the same rules the shell uses for expansion (described
|
49 | # above in Word Splitting).
|
50 | # - The backslash character '\' may be used to remove any special meaning for
|
51 | # the next character read and for line continuation.
|
52 |
|
53 |
|
54 | def _AppendParts(
|
55 | s, # type: str
|
56 | spans, # type: List[Tuple[span_t, int]]
|
57 | max_results, # type: int
|
58 | join_next, # type: bool
|
59 | parts, # type: List[mylib.BufWriter]
|
60 | ):
|
61 | # type: (...) -> Tuple[bool, bool]
|
62 | """Append to 'parts', for the 'read' builtin.
|
63 |
|
64 | Similar to _SpansToParts in osh/split.py
|
65 |
|
66 | Args:
|
67 | s: The original string
|
68 | spans: List of (span, end_index)
|
69 | max_results: the maximum number of parts we want
|
70 | join_next: Whether to join the next span to the previous part. This
|
71 | happens in two cases:
|
72 | - when we have '\ '
|
73 | - and when we have more spans # than max_results.
|
74 | """
|
75 | start_index = 0
|
76 | # If the last span was black, and we get a backslash, set join_next to merge
|
77 | # two black spans.
|
78 | last_span_was_black = False
|
79 |
|
80 | for span_type, end_index in spans:
|
81 | if span_type == span_e.Black:
|
82 | if join_next and len(parts):
|
83 | parts[-1].write(s[start_index:end_index])
|
84 | join_next = False
|
85 | else:
|
86 | buf = mylib.BufWriter()
|
87 | buf.write(s[start_index:end_index])
|
88 | parts.append(buf)
|
89 | last_span_was_black = True
|
90 |
|
91 | elif span_type == span_e.Delim:
|
92 | if join_next:
|
93 | parts[-1].write(s[start_index:end_index])
|
94 | join_next = False
|
95 | last_span_was_black = False
|
96 |
|
97 | elif span_type == span_e.Backslash:
|
98 | if last_span_was_black:
|
99 | join_next = True
|
100 | last_span_was_black = False
|
101 |
|
102 | if max_results and len(parts) >= max_results:
|
103 | join_next = True
|
104 |
|
105 | start_index = end_index
|
106 |
|
107 | done = True
|
108 | if len(spans):
|
109 | #log('%s %s', s, spans)
|
110 | #log('%s', spans[-1])
|
111 | last_span_type, _ = spans[-1]
|
112 | if last_span_type == span_e.Backslash:
|
113 | done = False
|
114 |
|
115 | #log('PARTS %s', parts)
|
116 | return done, join_next
|
117 |
|
118 |
|
119 | #
|
120 | # Three read() wrappers for 'read' builtin that RunPendingTraps: _ReadN,
|
121 | # _ReadPortion, and ReadLineSlowly
|
122 | #
|
123 |
|
124 |
|
125 | def _ReadN(num_bytes, cmd_ev):
|
126 | # type: (int, CommandEvaluator) -> str
|
127 | chunks = [] # type: List[str]
|
128 | bytes_left = num_bytes
|
129 | while bytes_left > 0:
|
130 | n, err_num = pyos.Read(STDIN_FILENO, bytes_left, chunks)
|
131 |
|
132 | if n < 0:
|
133 | if err_num == EINTR:
|
134 | cmd_ev.RunPendingTraps()
|
135 | # retry after running traps
|
136 | else:
|
137 | raise pyos.ReadError(err_num)
|
138 |
|
139 | elif n == 0: # EOF
|
140 | break
|
141 |
|
142 | else:
|
143 | bytes_left -= n
|
144 |
|
145 | return ''.join(chunks)
|
146 |
|
147 |
|
148 | def _ReadPortion(delim_byte, max_chars, cmd_ev):
|
149 | # type: (int, int, CommandEvaluator) -> Tuple[str, bool]
|
150 | """Read a portion of stdin.
|
151 |
|
152 | Reads until delimiter or max_chars, which ever comes first. Will ignore
|
153 | max_chars if it's set to -1.
|
154 |
|
155 | The delimiter is not included in the result.
|
156 | """
|
157 | ch_array = [] # type: List[int]
|
158 | eof = False
|
159 |
|
160 | bytes_read = 0
|
161 | while True:
|
162 | if max_chars >= 0 and bytes_read >= max_chars:
|
163 | break
|
164 |
|
165 | ch, err_num = pyos.ReadByte(0)
|
166 | if ch < 0:
|
167 | if err_num == EINTR:
|
168 | cmd_ev.RunPendingTraps()
|
169 | # retry after running traps
|
170 | else:
|
171 | raise pyos.ReadError(err_num)
|
172 |
|
173 | elif ch == pyos.EOF_SENTINEL:
|
174 | eof = True
|
175 | break
|
176 |
|
177 | elif ch == delim_byte:
|
178 | break
|
179 |
|
180 | elif ch == 0:
|
181 | # Quirk of most shells except zsh: they ignore NUL bytes!
|
182 | pass
|
183 |
|
184 | else:
|
185 | ch_array.append(ch)
|
186 |
|
187 | bytes_read += 1
|
188 |
|
189 | return pyutil.ChArrayToString(ch_array), eof
|
190 |
|
191 |
|
192 | def ReadLineSlowly(cmd_ev, with_eol=True):
|
193 | # type: (CommandEvaluator, bool) -> Tuple[str, bool]
|
194 | """Read a line from stdin, unbuffered
|
195 |
|
196 | Used by mapfile and read --raw-line.
|
197 |
|
198 | sys.stdin.readline() in Python has its own buffering which is incompatible
|
199 | with shell semantics. dash, mksh, and zsh all read a single byte at a time
|
200 | with read(0, 1).
|
201 | """
|
202 | ch_array = [] # type: List[int]
|
203 | eof = False
|
204 | is_first_byte = True
|
205 | while True:
|
206 | ch, err_num = pyos.ReadByte(0)
|
207 | #log(' ch %d', ch)
|
208 |
|
209 | if ch < 0:
|
210 | if err_num == EINTR:
|
211 | cmd_ev.RunPendingTraps()
|
212 | # retry after running traps
|
213 | else:
|
214 | raise pyos.ReadError(err_num)
|
215 |
|
216 | elif ch == pyos.EOF_SENTINEL:
|
217 | if is_first_byte:
|
218 | eof = True
|
219 | break
|
220 |
|
221 | elif ch == pyos.NEWLINE_CH:
|
222 | if with_eol:
|
223 | ch_array.append(ch)
|
224 | break
|
225 |
|
226 | else:
|
227 | ch_array.append(ch)
|
228 |
|
229 | is_first_byte = False
|
230 |
|
231 | return pyutil.ChArrayToString(ch_array), eof
|
232 |
|
233 |
|
234 | def ReadAll():
|
235 | # type: () -> str
|
236 | """Read all of stdin.
|
237 |
|
238 | Similar to command sub in core/executor.py.
|
239 | """
|
240 | chunks = [] # type: List[str]
|
241 | while True:
|
242 | n, err_num = pyos.Read(0, 4096, chunks)
|
243 |
|
244 | if n < 0:
|
245 | if err_num == EINTR:
|
246 | # Retry only. Like read --line (and command sub), read --all
|
247 | # doesn't run traps. It would be a bit weird to run every 4096
|
248 | # bytes.
|
249 | pass
|
250 | else:
|
251 | raise pyos.ReadError(err_num)
|
252 |
|
253 | elif n == 0: # EOF
|
254 | break
|
255 |
|
256 | return ''.join(chunks)
|
257 |
|
258 |
|
259 | class ctx_TermAttrs(object):
|
260 |
|
261 | def __init__(self, fd, local_modes):
|
262 | # type: (int, int) -> None
|
263 | self.fd = fd
|
264 |
|
265 | # We change term_attrs[3] in Python, which is lflag "local modes"
|
266 | self.orig_local_modes, self.term_attrs = pyos.PushTermAttrs(
|
267 | fd, local_modes)
|
268 |
|
269 | def __enter__(self):
|
270 | # type: () -> None
|
271 | pass
|
272 |
|
273 | def __exit__(self, type, value, traceback):
|
274 | # type: (Any, Any, Any) -> None
|
275 | pyos.PopTermAttrs(self.fd, self.orig_local_modes, self.term_attrs)
|
276 |
|
277 |
|
278 | class Read(vm._Builtin):
|
279 |
|
280 | def __init__(
|
281 | self,
|
282 | splitter, # type: SplitContext
|
283 | mem, # type: state.Mem
|
284 | parse_ctx, # type: ParseContext
|
285 | cmd_ev, # type: CommandEvaluator
|
286 | errfmt, # type: ui.ErrorFormatter
|
287 | ):
|
288 | # type: (...) -> None
|
289 | self.splitter = splitter
|
290 | self.mem = mem
|
291 | self.parse_ctx = parse_ctx
|
292 | self.cmd_ev = cmd_ev
|
293 | self.errfmt = errfmt
|
294 | self.stdin_ = mylib.Stdin()
|
295 |
|
296 | # Was --qsn, might be restored as --j8-word or --j8-line
|
297 | if 0:
|
298 | #from data_lang import qsn_native
|
299 | def _MaybeDecodeLine(self, line):
|
300 | # type: (str) -> str
|
301 | """Raises error.Parse if line isn't valid."""
|
302 |
|
303 | # Lines that don't start with a single quote aren't QSN. They may
|
304 | # contain a single quote internally, like:
|
305 | #
|
306 | # Fool's Gold
|
307 | if not line.startswith("'"):
|
308 | return line
|
309 |
|
310 | arena = self.parse_ctx.arena
|
311 | line_reader = reader.StringLineReader(line, arena)
|
312 | lexer = self.parse_ctx.MakeLexer(line_reader)
|
313 |
|
314 | # The parser only yields valid tokens:
|
315 | # Char_OneChar, Char_Hex, Char_UBraced
|
316 | # So we can use word_compile.EvalCStringToken, which is also used for
|
317 | # $''.
|
318 | # Important: we don't generate Id.Unknown_Backslash because that is valid
|
319 | # in echo -e. We just make it Id.Unknown_Tok?
|
320 |
|
321 | # TODO: read location info should know about stdin, and redirects, and
|
322 | # pipelines?
|
323 | with alloc.ctx_SourceCode(arena, source.Stdin('')):
|
324 | #tokens = qsn_native.Parse(lexer)
|
325 | pass
|
326 | #tmp = [word_compile.EvalCStringToken(t) for t in tokens]
|
327 | #return ''.join(tmp)
|
328 | return ''
|
329 |
|
330 | def Run(self, cmd_val):
|
331 | # type: (cmd_value.Argv) -> int
|
332 | try:
|
333 | status = self._Run(cmd_val)
|
334 | except pyos.ReadError as e: # different paths for read -d, etc.
|
335 | # don't quote code since YSH errexit will likely quote
|
336 | self.errfmt.PrintMessage("Oils read error: %s" %
|
337 | posix.strerror(e.err_num))
|
338 | status = 1
|
339 | except (IOError, OSError) as e: # different paths for read -d, etc.
|
340 | self.errfmt.PrintMessage("Oils read I/O error: %s" %
|
341 | pyutil.strerror(e))
|
342 | status = 1
|
343 | return status
|
344 |
|
345 | def _ReadYsh(self, arg, arg_r, cmd_val):
|
346 | # type: (arg_types.read, args.Reader, cmd_value.Argv) -> int
|
347 | """
|
348 | Usage:
|
349 |
|
350 | read --all # sets _reply
|
351 | read --all (&x) # sets x
|
352 |
|
353 | Invalid for now:
|
354 |
|
355 | read (&x) # YSH doesn't have token splitting
|
356 | # we probably want read --row too
|
357 | """
|
358 | place = None # type: value.Place
|
359 |
|
360 | if cmd_val.proc_args: # read --flag (&x)
|
361 | rd = typed_args.ReaderForProc(cmd_val)
|
362 | place = rd.PosPlace()
|
363 | rd.Done()
|
364 |
|
365 | blame_loc = cmd_val.proc_args.typed_args.left # type: loc_t
|
366 |
|
367 | else: # read --flag
|
368 | var_name = '_reply'
|
369 |
|
370 | #log('VAR %s', var_name)
|
371 | blame_loc = cmd_val.arg_locs[0]
|
372 | place = value.Place(LeftName(var_name, blame_loc),
|
373 | self.mem.CurrentFrame())
|
374 |
|
375 | next_arg, next_loc = arg_r.Peek2()
|
376 | if next_arg is not None:
|
377 | raise error.Usage('got extra argument', next_loc)
|
378 |
|
379 | num_bytes = mops.BigTruncate(arg.num_bytes)
|
380 | if num_bytes != -1: # read --num-bytes
|
381 | contents = _ReadN(num_bytes, self.cmd_ev)
|
382 | status = 0
|
383 |
|
384 | elif arg.raw_line: # read --raw-line is unbuffered
|
385 | contents, eof = ReadLineSlowly(self.cmd_ev, with_eol=arg.with_eol)
|
386 | #log('EOF %s', eof)
|
387 | #status = 1 if eof else 0
|
388 | status = 1 if eof else 0
|
389 |
|
390 | elif arg.all: # read --all
|
391 | contents = ReadAll()
|
392 | status = 0
|
393 |
|
394 | else:
|
395 | raise AssertionError()
|
396 |
|
397 | self.mem.SetPlace(place, value.Str(contents), blame_loc)
|
398 | return status
|
399 |
|
400 | def _Run(self, cmd_val):
|
401 | # type: (cmd_value.Argv) -> int
|
402 | attrs, arg_r = flag_util.ParseCmdVal('read',
|
403 | cmd_val,
|
404 | accept_typed_args=True)
|
405 | arg = arg_types.read(attrs.attrs)
|
406 | names = arg_r.Rest()
|
407 |
|
408 | if arg.u != mops.MINUS_ONE:
|
409 | # TODO: could implement this
|
410 | raise error.Usage('-u flag not implemented', cmd_val.arg_locs[0])
|
411 |
|
412 | if arg.raw_line or arg.all or mops.BigTruncate(arg.num_bytes) != -1:
|
413 | return self._ReadYsh(arg, arg_r, cmd_val)
|
414 |
|
415 | if cmd_val.proc_args:
|
416 | raise error.Usage(
|
417 | "doesn't accept typed args without --all, or --num-bytes",
|
418 | cmd_val.proc_args.typed_args.left)
|
419 |
|
420 | if arg.t >= 0.0:
|
421 | if arg.t != 0.0:
|
422 | e_die("read -t isn't implemented (except t=0)")
|
423 | else:
|
424 | return 0 if pyos.InputAvailable(STDIN_FILENO) else 1
|
425 |
|
426 | bits = 0
|
427 | if self.stdin_.isatty():
|
428 | # -d and -n should be unbuffered
|
429 | if arg.d is not None or mops.BigTruncate(arg.n) >= 0:
|
430 | bits |= pyos.TERM_ICANON
|
431 | if arg.s: # silent
|
432 | bits |= pyos.TERM_ECHO
|
433 |
|
434 | if arg.p is not None: # only if tty
|
435 | mylib.Stderr().write(arg.p)
|
436 |
|
437 | if bits == 0:
|
438 | status = self._Read(arg, names)
|
439 | else:
|
440 | with ctx_TermAttrs(STDIN_FILENO, ~bits):
|
441 | status = self._Read(arg, names)
|
442 | return status
|
443 |
|
444 | def _Read(self, arg, names):
|
445 | # type: (arg_types.read, List[str]) -> int
|
446 |
|
447 | # read a certain number of bytes, NOT respecting delimiter (-1 means
|
448 | # unset)
|
449 | arg_N = mops.BigTruncate(arg.N)
|
450 | if arg_N >= 0:
|
451 | s = _ReadN(arg_N, self.cmd_ev)
|
452 |
|
453 | if len(names):
|
454 | name = names[0] # ignore other names
|
455 |
|
456 | # Clear extra names, as bash does
|
457 | for i in xrange(1, len(names)):
|
458 | state.BuiltinSetString(self.mem, names[i], '')
|
459 | else:
|
460 | name = 'REPLY' # default variable name
|
461 |
|
462 | state.BuiltinSetString(self.mem, name, s)
|
463 |
|
464 | # Did we read all the bytes we wanted?
|
465 | return 0 if len(s) == arg_N else 1
|
466 |
|
467 | do_split = False
|
468 |
|
469 | if len(names):
|
470 | do_split = True # read myvar does word splitting
|
471 | else:
|
472 | # read without args does NOT split, and fills in $REPLY
|
473 | names.append('REPLY')
|
474 |
|
475 | if arg.a is not None:
|
476 | max_results = 0 # array can hold all parts
|
477 | do_split = True
|
478 | else:
|
479 | # Assign one part to each variable name; leftovers are assigned to
|
480 | # the last name
|
481 | max_results = len(names)
|
482 |
|
483 | if arg.Z: # -0 is synonym for IFS= read -r -d ''
|
484 | do_split = False
|
485 | raw = True
|
486 | delim_byte = 0
|
487 | else:
|
488 | raw = arg.r
|
489 | if arg.d is not None:
|
490 | if len(arg.d):
|
491 | delim_byte = ord(arg.d[0])
|
492 | else:
|
493 | delim_byte = 0 # -d '' delimits by NUL
|
494 | else:
|
495 | delim_byte = pyos.NEWLINE_CH # read a line
|
496 |
|
497 | # Read MORE THAN ONE line for \ line continuation (and not read -r)
|
498 | parts = [] # type: List[mylib.BufWriter]
|
499 | join_next = False
|
500 | status = 0
|
501 | while True:
|
502 | chunk, eof = _ReadPortion(delim_byte, mops.BigTruncate(arg.n),
|
503 | self.cmd_ev)
|
504 |
|
505 | if eof:
|
506 | # status 1 to terminate loop. (This is true even though we set
|
507 | # variables).
|
508 | status = 1
|
509 |
|
510 | #log('LINE %r', chunk)
|
511 | if len(chunk) == 0:
|
512 | break
|
513 |
|
514 | spans = self.splitter.SplitForRead(chunk, not raw, do_split)
|
515 | done, join_next = _AppendParts(chunk, spans, max_results,
|
516 | join_next, parts)
|
517 |
|
518 | #log('PARTS %s continued %s', parts, continued)
|
519 | if done:
|
520 | break
|
521 |
|
522 | entries = [buf.getvalue() for buf in parts]
|
523 | num_parts = len(entries)
|
524 | if arg.a is not None:
|
525 | state.BuiltinSetArray(self.mem, arg.a, entries)
|
526 | else:
|
527 | for i in xrange(max_results):
|
528 | if i < num_parts:
|
529 | s = entries[i]
|
530 | else:
|
531 | s = '' # if there are too many variables
|
532 | var_name = names[i]
|
533 | #log('read: %s = %s', var_name, s)
|
534 | state.BuiltinSetString(self.mem, var_name, s)
|
535 |
|
536 | return status
|