builtin/read

OILS / builtin / read_osh.py View on Github | oils.pub

536 lines, 311 significant

1	from __future__ import print_function
2
3	from errno import EINTR
4
5	from _devbuild.gen import arg_types
6	from _devbuild.gen.runtime_asdl import (span_e, cmd_value)
7	from _devbuild.gen.syntax_asdl import source, loc_t
8	from _devbuild.gen.value_asdl import value, LeftName
9	from core import alloc
10	from core import error
11	from core.error import e_die
12	from core import pyos
13	from core import pyutil
14	from core import state
15	from display import ui
16	from core import vm
17	from frontend import flag_util
18	from frontend import reader
19	from frontend import typed_args
20	from mycpp import mops
21	from mycpp import mylib
22	from mycpp.mylib import log, STDIN_FILENO
23
24	import posix_ as posix
25
26	from typing import Tuple, List, Any, TYPE_CHECKING
27	if TYPE_CHECKING:
28	from _devbuild.gen.runtime_asdl import span_t
29	from frontend.parse_lib import ParseContext
30	from frontend import args
31	from osh.cmd_eval import CommandEvaluator
32	from osh.split import SplitContext
33
34	_ = log
35
36	# The read builtin splits using IFS.
37	#
38	# Summary:
39	# - Split with IFS, except \ can escape them! This is different than the
40	# algorithm for splitting words (at least the way I've represented it.)
41
42	# Bash manual:
43	# - If there are more words than names, the remaining words and their
44	# intervening delimiters are assigned to the last name.
45	# - If there are fewer words read from the input stream than names, the
46	# remaining names are assigned empty values.
47	# - The characters in the value of the IFS variable are used to split the line
48	# into words using the same rules the shell uses for expansion (described
49	# above in Word Splitting).
50	# - The backslash character '\' may be used to remove any special meaning for
51	# the next character read and for line continuation.
52
53
54	def _AppendParts(
55	s, # type: str
56	spans, # type: List[Tuple[span_t, int]]
57	max_results, # type: int
58	join_next, # type: bool
59	parts, # type: List[mylib.BufWriter]
60	):
61	# type: (...) -> Tuple[bool, bool]
62	"""Append to 'parts', for the 'read' builtin.
63
64	Similar to _SpansToParts in osh/split.py
65
66	Args:
67	s: The original string
68	spans: List of (span, end_index)
69	max_results: the maximum number of parts we want
70	join_next: Whether to join the next span to the previous part. This
71	happens in two cases:
72	- when we have '\ '
73	- and when we have more spans # than max_results.
74	"""
75	start_index = 0
76	# If the last span was black, and we get a backslash, set join_next to merge
77	# two black spans.
78	last_span_was_black = False
79
80	for span_type, end_index in spans:
81	if span_type == span_e.Black:
82	if join_next and len(parts):
83	parts[-1].write(s[start_index:end_index])
84	join_next = False
85	else:
86	buf = mylib.BufWriter()
87	buf.write(s[start_index:end_index])
88	parts.append(buf)
89	last_span_was_black = True
90
91	elif span_type == span_e.Delim:
92	if join_next:
93	parts[-1].write(s[start_index:end_index])
94	join_next = False
95	last_span_was_black = False
96
97	elif span_type == span_e.Backslash:
98	if last_span_was_black:
99	join_next = True
100	last_span_was_black = False
101
102	if max_results and len(parts) >= max_results:
103	join_next = True
104
105	start_index = end_index
106
107	done = True
108	if len(spans):
109	#log('%s %s', s, spans)
110	#log('%s', spans[-1])
111	last_span_type, _ = spans[-1]
112	if last_span_type == span_e.Backslash:
113	done = False
114
115	#log('PARTS %s', parts)
116	return done, join_next
117
118
119	#
120	# Three read() wrappers for 'read' builtin that RunPendingTraps: _ReadN,
121	# _ReadPortion, and ReadLineSlowly
122	#
123
124
125	def _ReadN(num_bytes, cmd_ev):
126	# type: (int, CommandEvaluator) -> str
127	chunks = [] # type: List[str]
128	bytes_left = num_bytes
129	while bytes_left > 0:
130	n, err_num = pyos.Read(STDIN_FILENO, bytes_left, chunks)
131
132	if n < 0:
133	if err_num == EINTR:
134	cmd_ev.RunPendingTraps()
135	# retry after running traps
136	else:
137	raise pyos.ReadError(err_num)
138
139	elif n == 0: # EOF
140	break
141
142	else:
143	bytes_left -= n
144
145	return ''.join(chunks)
146
147
148	def _ReadPortion(delim_byte, max_chars, cmd_ev):
149	# type: (int, int, CommandEvaluator) -> Tuple[str, bool]
150	"""Read a portion of stdin.
151
152	Reads until delimiter or max_chars, which ever comes first. Will ignore
153	max_chars if it's set to -1.
154
155	The delimiter is not included in the result.
156	"""
157	ch_array = [] # type: List[int]
158	eof = False
159
160	bytes_read = 0
161	while True:
162	if max_chars >= 0 and bytes_read >= max_chars:
163	break
164
165	ch, err_num = pyos.ReadByte(0)
166	if ch < 0:
167	if err_num == EINTR:
168	cmd_ev.RunPendingTraps()
169	# retry after running traps
170	else:
171	raise pyos.ReadError(err_num)
172
173	elif ch == pyos.EOF_SENTINEL:
174	eof = True
175	break
176
177	elif ch == delim_byte:
178	break
179
180	elif ch == 0:
181	# Quirk of most shells except zsh: they ignore NUL bytes!
182	pass
183
184	else:
185	ch_array.append(ch)
186
187	bytes_read += 1
188
189	return pyutil.ChArrayToString(ch_array), eof
190
191
192	def ReadLineSlowly(cmd_ev, with_eol=True):
193	# type: (CommandEvaluator, bool) -> Tuple[str, bool]
194	"""Read a line from stdin, unbuffered
195
196	Used by mapfile and read --raw-line.
197
198	sys.stdin.readline() in Python has its own buffering which is incompatible
199	with shell semantics. dash, mksh, and zsh all read a single byte at a time
200	with read(0, 1).
201	"""
202	ch_array = [] # type: List[int]
203	eof = False
204	is_first_byte = True
205	while True:
206	ch, err_num = pyos.ReadByte(0)
207	#log(' ch %d', ch)
208
209	if ch < 0:
210	if err_num == EINTR:
211	cmd_ev.RunPendingTraps()
212	# retry after running traps
213	else:
214	raise pyos.ReadError(err_num)
215
216	elif ch == pyos.EOF_SENTINEL:
217	if is_first_byte:
218	eof = True
219	break
220
221	elif ch == pyos.NEWLINE_CH:
222	if with_eol:
223	ch_array.append(ch)
224	break
225
226	else:
227	ch_array.append(ch)
228
229	is_first_byte = False
230
231	return pyutil.ChArrayToString(ch_array), eof
232
233
234	def ReadAll():
235	# type: () -> str
236	"""Read all of stdin.
237
238	Similar to command sub in core/executor.py.
239	"""
240	chunks = [] # type: List[str]
241	while True:
242	n, err_num = pyos.Read(0, 4096, chunks)
243
244	if n < 0:
245	if err_num == EINTR:
246	# Retry only. Like read --line (and command sub), read --all
247	# doesn't run traps. It would be a bit weird to run every 4096
248	# bytes.
249	pass
250	else:
251	raise pyos.ReadError(err_num)
252
253	elif n == 0: # EOF
254	break
255
256	return ''.join(chunks)
257
258
259	class ctx_TermAttrs(object):
260
261	def __init__(self, fd, local_modes):
262	# type: (int, int) -> None
263	self.fd = fd
264
265	# We change term_attrs[3] in Python, which is lflag "local modes"
266	self.orig_local_modes, self.term_attrs = pyos.PushTermAttrs(
267	fd, local_modes)
268
269	def __enter__(self):
270	# type: () -> None
271	pass
272
273	def __exit__(self, type, value, traceback):
274	# type: (Any, Any, Any) -> None
275	pyos.PopTermAttrs(self.fd, self.orig_local_modes, self.term_attrs)
276
277
278	class Read(vm._Builtin):
279
280	def __init__(
281	self,
282	splitter, # type: SplitContext
283	mem, # type: state.Mem
284	parse_ctx, # type: ParseContext
285	cmd_ev, # type: CommandEvaluator
286	errfmt, # type: ui.ErrorFormatter
287	):
288	# type: (...) -> None
289	self.splitter = splitter
290	self.mem = mem
291	self.parse_ctx = parse_ctx
292	self.cmd_ev = cmd_ev
293	self.errfmt = errfmt
294	self.stdin_ = mylib.Stdin()
295
296	# Was --qsn, might be restored as --j8-word or --j8-line
297	if 0:
298	#from data_lang import qsn_native
299	def _MaybeDecodeLine(self, line):
300	# type: (str) -> str
301	"""Raises error.Parse if line isn't valid."""
302
303	# Lines that don't start with a single quote aren't QSN. They may
304	# contain a single quote internally, like:
305	#
306	# Fool's Gold
307	if not line.startswith("'"):
308	return line
309
310	arena = self.parse_ctx.arena
311	line_reader = reader.StringLineReader(line, arena)
312	lexer = self.parse_ctx.MakeLexer(line_reader)
313
314	# The parser only yields valid tokens:
315	# Char_OneChar, Char_Hex, Char_UBraced
316	# So we can use word_compile.EvalCStringToken, which is also used for
317	# $''.
318	# Important: we don't generate Id.Unknown_Backslash because that is valid
319	# in echo -e. We just make it Id.Unknown_Tok?
320
321	# TODO: read location info should know about stdin, and redirects, and
322	# pipelines?
323	with alloc.ctx_SourceCode(arena, source.Stdin('')):
324	#tokens = qsn_native.Parse(lexer)
325	pass
326	#tmp = [word_compile.EvalCStringToken(t) for t in tokens]
327	#return ''.join(tmp)
328	return ''
329
330	def Run(self, cmd_val):
331	# type: (cmd_value.Argv) -> int
332	try:
333	status = self._Run(cmd_val)
334	except pyos.ReadError as e: # different paths for read -d, etc.
335	# don't quote code since YSH errexit will likely quote
336	self.errfmt.PrintMessage("Oils read error: %s" %
337	posix.strerror(e.err_num))
338	status = 1
339	except (IOError, OSError) as e: # different paths for read -d, etc.
340	self.errfmt.PrintMessage("Oils read I/O error: %s" %
341	pyutil.strerror(e))
342	status = 1
343	return status
344
345	def _ReadYsh(self, arg, arg_r, cmd_val):
346	# type: (arg_types.read, args.Reader, cmd_value.Argv) -> int
347	"""
348	Usage:
349
350	read --all # sets _reply
351	read --all (&x) # sets x
352
353	Invalid for now:
354
355	read (&x) # YSH doesn't have token splitting
356	# we probably want read --row too
357	"""
358	place = None # type: value.Place
359
360	if cmd_val.proc_args: # read --flag (&x)
361	rd = typed_args.ReaderForProc(cmd_val)
362	place = rd.PosPlace()
363	rd.Done()
364
365	blame_loc = cmd_val.proc_args.typed_args.left # type: loc_t
366
367	else: # read --flag
368	var_name = '_reply'
369
370	#log('VAR %s', var_name)
371	blame_loc = cmd_val.arg_locs[0]
372	place = value.Place(LeftName(var_name, blame_loc),
373	self.mem.CurrentFrame())
374
375	next_arg, next_loc = arg_r.Peek2()
376	if next_arg is not None:
377	raise error.Usage('got extra argument', next_loc)
378
379	num_bytes = mops.BigTruncate(arg.num_bytes)
380	if num_bytes != -1: # read --num-bytes
381	contents = _ReadN(num_bytes, self.cmd_ev)
382	status = 0
383
384	elif arg.raw_line: # read --raw-line is unbuffered
385	contents, eof = ReadLineSlowly(self.cmd_ev, with_eol=arg.with_eol)
386	#log('EOF %s', eof)
387	#status = 1 if eof else 0
388	status = 1 if eof else 0
389
390	elif arg.all: # read --all
391	contents = ReadAll()
392	status = 0
393
394	else:
395	raise AssertionError()
396
397	self.mem.SetPlace(place, value.Str(contents), blame_loc)
398	return status
399
400	def _Run(self, cmd_val):
401	# type: (cmd_value.Argv) -> int
402	attrs, arg_r = flag_util.ParseCmdVal('read',
403	cmd_val,
404	accept_typed_args=True)
405	arg = arg_types.read(attrs.attrs)
406	names = arg_r.Rest()
407
408	if arg.u != mops.MINUS_ONE:
409	# TODO: could implement this
410	raise error.Usage('-u flag not implemented', cmd_val.arg_locs[0])
411
412	if arg.raw_line or arg.all or mops.BigTruncate(arg.num_bytes) != -1:
413	return self._ReadYsh(arg, arg_r, cmd_val)
414
415	if cmd_val.proc_args:
416	raise error.Usage(
417	"doesn't accept typed args without --all, or --num-bytes",
418	cmd_val.proc_args.typed_args.left)
419
420	if arg.t >= 0.0:
421	if arg.t != 0.0:
422	e_die("read -t isn't implemented (except t=0)")
423	else:
424	return 0 if pyos.InputAvailable(STDIN_FILENO) else 1
425
426	bits = 0
427	if self.stdin_.isatty():
428	# -d and -n should be unbuffered
429	if arg.d is not None or mops.BigTruncate(arg.n) >= 0:
430	bits \|= pyos.TERM_ICANON
431	if arg.s: # silent
432	bits \|= pyos.TERM_ECHO
433
434	if arg.p is not None: # only if tty
435	mylib.Stderr().write(arg.p)
436
437	if bits == 0:
438	status = self._Read(arg, names)
439	else:
440	with ctx_TermAttrs(STDIN_FILENO, ~bits):
441	status = self._Read(arg, names)
442	return status
443
444	def _Read(self, arg, names):
445	# type: (arg_types.read, List[str]) -> int
446
447	# read a certain number of bytes, NOT respecting delimiter (-1 means
448	# unset)
449	arg_N = mops.BigTruncate(arg.N)
450	if arg_N >= 0:
451	s = _ReadN(arg_N, self.cmd_ev)
452
453	if len(names):
454	name = names[0] # ignore other names
455
456	# Clear extra names, as bash does
457	for i in xrange(1, len(names)):
458	state.BuiltinSetString(self.mem, names[i], '')
459	else:
460	name = 'REPLY' # default variable name
461
462	state.BuiltinSetString(self.mem, name, s)
463
464	# Did we read all the bytes we wanted?
465	return 0 if len(s) == arg_N else 1
466
467	do_split = False
468
469	if len(names):
470	do_split = True # read myvar does word splitting
471	else:
472	# read without args does NOT split, and fills in $REPLY
473	names.append('REPLY')
474
475	if arg.a is not None:
476	max_results = 0 # array can hold all parts
477	do_split = True
478	else:
479	# Assign one part to each variable name; leftovers are assigned to
480	# the last name
481	max_results = len(names)
482
483	if arg.Z: # -0 is synonym for IFS= read -r -d ''
484	do_split = False
485	raw = True
486	delim_byte = 0
487	else:
488	raw = arg.r
489	if arg.d is not None:
490	if len(arg.d):
491	delim_byte = ord(arg.d[0])
492	else:
493	delim_byte = 0 # -d '' delimits by NUL
494	else:
495	delim_byte = pyos.NEWLINE_CH # read a line
496
497	# Read MORE THAN ONE line for \ line continuation (and not read -r)
498	parts = [] # type: List[mylib.BufWriter]
499	join_next = False
500	status = 0
501	while True:
502	chunk, eof = _ReadPortion(delim_byte, mops.BigTruncate(arg.n),
503	self.cmd_ev)
504
505	if eof:
506	# status 1 to terminate loop. (This is true even though we set
507	# variables).
508	status = 1
509
510	#log('LINE %r', chunk)
511	if len(chunk) == 0:
512	break
513
514	spans = self.splitter.SplitForRead(chunk, not raw, do_split)
515	done, join_next = _AppendParts(chunk, spans, max_results,
516	join_next, parts)
517
518	#log('PARTS %s continued %s', parts, continued)
519	if done:
520	break
521
522	entries = [buf.getvalue() for buf in parts]
523	num_parts = len(entries)
524	if arg.a is not None:
525	state.BuiltinSetArray(self.mem, arg.a, entries)
526	else:
527	for i in xrange(max_results):
528	if i < num_parts:
529	s = entries[i]
530	else:
531	s = '' # if there are too many variables
532	var_name = names[i]
533	#log('read: %s = %s', var_name, s)
534	state.BuiltinSetString(self.mem, var_name, s)
535
536	return status