core/executor.py

OILS / core / executor.py View on Github | oils.pub

1053 lines, 538 significant

1	"""executor.py."""
2	from __future__ import print_function
3
4	from errno import EINTR
5
6	from _devbuild.gen.id_kind_asdl import Id
7	from _devbuild.gen.option_asdl import builtin_i, builtin_t
8	from _devbuild.gen.runtime_asdl import RedirValue, trace
9	from _devbuild.gen.syntax_asdl import (
10	command,
11	command_e,
12	CommandSub,
13	CompoundWord,
14	loc,
15	loc_t,
16	word_t,
17	)
18	from builtin import hay_ysh
19	from core import dev
20	from core import error
21	from core.error import e_die, e_die_status
22	from core import process
23	from core import pyos
24	from core import state
25	from core import vm
26	from display import ui
27	from frontend import consts
28	from frontend import lexer
29	from mycpp import mylib
30	from mycpp.mylib import str_switch, log, print_stderr
31	from pylib import os_path
32	from pylib import path_stat
33
34	import posix_ as posix
35	from posix_ import X_OK # translated directly to C macro
36
37	from typing import cast, Dict, List, Tuple, Optional, TYPE_CHECKING
38	if TYPE_CHECKING:
39	from _devbuild.gen.runtime_asdl import (cmd_value, CommandStatus,
40	StatusArray)
41	from _devbuild.gen.syntax_asdl import command_t
42	from builtin import trap_osh
43	from core import optview
44
45	_ = log
46
47
48	def LookupExecutable(name, path_dirs, exec_required=True):
49	# type: (str, List[str], bool) -> Optional[str]
50	"""
51	Returns either
52	- the name if it's a relative path that exists
53	- the executable name resolved against path_dirs
54	- None if not found
55	"""
56	if len(name) == 0: # special case for "$(true)"
57	return None
58
59	if '/' in name:
60	return name if path_stat.exists(name) else None
61
62	for path_dir in path_dirs:
63	full_path = os_path.join(path_dir, name)
64	if exec_required:
65	found = posix.access(full_path, X_OK)
66	else:
67	found = path_stat.exists(full_path)
68
69	if found:
70	return full_path
71
72	return None
73
74
75	def _RewriteExternToBuiltin(argv):
76	# type: (List[str]) -> builtin_t
77	"""Decide whether to rewrite a command as a builtin.
78
79	This function can have false negatives, but NOT false positives.
80
81	False negative:
82	We could have used the builtin, but used the extern instead
83	False positive like 'cat -v'
84	We tried to use the builtin for a feature it doesn't support! This is a
85	BUG
86
87	SOUND optimizations:
88	cat
89	rm
90
91	TODO sound:
92	mkdir - common in Oils
93	mv - used in autoconf
94	- rename() only
95	- if the files live on different devices, then fall back to extern
96	(requires different logic)
97	ln -s -f -v - no reason to shell out
98
99	# Path operations
100	- readlink -f - yeah actually we should do this, it's a transparent
101	optimization
102	- it just calls realpath
103	- dirname
104	- basename
105
106	Quoting: shopt --set rewrite_name_regex
107	- ls - without args?
108	- Is quoting compatible? May not matter
109	- find
110	- Also need a better quoting mode
111	- wc displays filenames
112
113	YSH: fs can be a new design to take the place of ls and find
114
115	- Starting processes
116	- xargs -P
117
118	Regex:
119	grep egrep fgrep -v -o '^ *+'
120	builtin grep *.py (/d+/) # eggex pattern
121	sed ? Because --regexp-extended is GNU? Maybe implement that
122	builtin sed s (/<capture d+ as month>/, ^"$month")
123
124	Why hidden in OSH? Because regex engines can have MINOR syntax
125	differences, like []] for char classes. But it could be ON in YSH,
126	specifically so you can AVOID those differences!
127
128	Meh: explicit builtin grep / builtin sed is better. Make a note about
129	performance in doc/ref.
130
131	Field selection:
132	awk / cut
133
134	Though be careful of being slower than awk to execute.
135
136	Maybe an alias:
137	b grep
138	b sed
139	b wc
140	b ls
141
142	For anything with possible unsoundness. cat, rm,
143	"""
144	assert len(argv) >= 1, argv # enforced in the executor
145
146	arg0 = argv[0]
147	i = 1
148	n = len(argv)
149	with str_switch(arg0) as case:
150	if case('cat'):
151	while i < n:
152	arg = argv[i]
153
154	# allowed: cat -
155	# allowed: cat -- foo
156	if arg in ('-', '--'):
157	pass
158
159	# commands with flags aren't rewritten
160	elif arg.startswith('-'):
161	return consts.NO_INDEX
162
163	i += 1
164
165	# Every arg was OK
166	return builtin_i.cat
167
168	elif case('rm'):
169	while i < n:
170	arg = argv[i]
171
172	# allowed: rm -- foo
173	# allowed: rm -f foo
174	if arg in ('--', '-f'):
175	pass
176
177	# commands with flags aren't rewritten
178	elif arg.startswith('-'):
179	return consts.NO_INDEX
180
181	i += 1
182
183	return builtin_i.rm
184
185	else:
186	return consts.NO_INDEX
187
188
189	class SearchPath(object):
190	"""For looking up files in $PATH or ENV.PATH"""
191
192	def __init__(self, mem, exec_opts):
193	# type: (state.Mem, optview.Exec) -> None
194	self.mem = mem
195	self.cache = {} # type: Dict[str, str]
196
197	def _GetPath(self):
198	# type: () -> List[str]
199
200	# In YSH, we read from ENV.PATH
201	s = self.mem.env_config.Get('PATH')
202	if s is None:
203	return [] # treat as empty path
204
205	# TODO: Could cache this to avoid split() allocating all the time.
206	return s.split(':')
207
208	def LookupOne(self, name, exec_required=True):
209	# type: (str, bool) -> Optional[str]
210	"""
211	Returns the path itself (if relative path), the resolved path, or None.
212	"""
213	return LookupExecutable(name,
214	self._GetPath(),
215	exec_required=exec_required)
216
217	def LookupReflect(self, name, do_all):
218	# type: (str, bool) -> List[str]
219	"""
220	Like LookupOne(), with an option for 'type -a' to return all paths.
221	"""
222	if len(name) == 0: # special case for "$(true)"
223	return []
224
225	if '/' in name:
226	if path_stat.exists(name):
227	return [name]
228	else:
229	return []
230
231	results = [] # type: List[str]
232	for path_dir in self._GetPath():
233	full_path = os_path.join(path_dir, name)
234	if path_stat.exists(full_path):
235	results.append(full_path)
236	if not do_all:
237	return results
238
239	return results
240
241	def CachedLookup(self, name):
242	# type: (str) -> Optional[str]
243	#log('name %r', name)
244	if name in self.cache:
245	return self.cache[name]
246
247	full_path = self.LookupOne(name)
248	if full_path is not None:
249	self.cache[name] = full_path
250	return full_path
251
252	def MaybeRemoveEntry(self, name):
253	# type: (str) -> None
254	"""When the file system changes."""
255	mylib.dict_erase(self.cache, name)
256
257	def ClearCache(self):
258	# type: () -> None
259	"""For hash -r."""
260	self.cache.clear()
261
262	def CachedCommands(self):
263	# type: () -> List[str]
264	return self.cache.values()
265
266
267	class _ProcessSubFrame(object):
268	"""To keep track of diff <(cat 1) <(cat 2) > >(tac)"""
269
270	def __init__(self):
271	# type: () -> None
272
273	# These objects appear unconditionally in the main loop, and aren't
274	# commonly used, so we manually optimize [] into None.
275
276	self._to_wait = [] # type: List[process.Process]
277	self._to_close = [] # type: List[int] # file descriptors
278	self._locs = [] # type: List[loc_t]
279	self._modified = False
280
281	def WasModified(self):
282	# type: () -> bool
283	return self._modified
284
285	def Append(self, p, fd, status_loc):
286	# type: (process.Process, int, loc_t) -> None
287	self._modified = True
288
289	self._to_wait.append(p)
290	self._to_close.append(fd)
291	self._locs.append(status_loc)
292
293	def MaybeWaitOnProcessSubs(self, waiter, status_array):
294	# type: (process.Waiter, StatusArray) -> None
295
296	# Wait in the same order that they were evaluated. That seems fine.
297	for fd in self._to_close:
298	posix.close(fd)
299
300	codes = [] # type: List[int]
301	locs = [] # type: List[loc_t]
302	for i, p in enumerate(self._to_wait):
303	#log('waiting for %s', p)
304	st = p.Wait(waiter)
305	codes.append(st)
306	locs.append(self._locs[i])
307
308	status_array.codes = codes
309	status_array.locs = locs
310
311
312	# Big flags for _RunSimpleCommand
313	IS_LAST_CMD = 1 << 1
314	NO_CALL_PROCS = 1 << 2 # command ls suppresses function lookup
315	USE_DEFAULT_PATH = 1 << 3 # for command -p ls changes the path
316
317	# Copied from var.c in dash
318	DEFAULT_PATH = [
319	'/usr/local/sbin', '/usr/local/bin', '/usr/sbin', '/usr/bin', '/sbin',
320	'/bin'
321	]
322
323	_PURITY_STATUS = 5
324
325
326	class PureExecutor(vm._Executor):
327
328	# mycpp needs this duplicate constructor
329	def __init__(
330	self,
331	mem, # type: state.Mem
332	exec_opts, # type: optview.Exec
333	mutable_opts, # type: state.MutableOpts
334	procs, # type: state.Procs
335	hay_state, # type: hay_ysh.HayState
336	builtins, # type: Dict[int, vm._Builtin]
337	tracer, # type: dev.Tracer
338	errfmt # type: ui.ErrorFormatter
339	):
340	vm._Executor.__init__(self, mem, exec_opts, mutable_opts, procs,
341	hay_state, builtins, tracer, errfmt)
342
343	def _RunSimpleCommand(self, arg0, arg0_loc, cmd_val, cmd_st, run_flags):
344	# type: (str, loc_t, cmd_value.Argv, CommandStatus, int) -> int
345
346	call_procs = not (run_flags & NO_CALL_PROCS)
347	if call_procs:
348	proc_val, self_obj = self.procs.GetInvokable(arg0)
349	if proc_val is not None:
350	return self._RunInvokable(proc_val, self_obj, arg0_loc,
351	cmd_val)
352
353	if self.hay_state.Resolve(arg0):
354	return self.RunBuiltin(builtin_i.haynode, cmd_val)
355
356	self.errfmt.Print_(
357	'Command %r not found in pure mode (OILS-ERR-102)' % arg0,
358	arg0_loc)
359	return 127
360
361	def RunBackgroundJob(self, node):
362	# type: (command_t) -> int
363	raise error.Structured(
364	_PURITY_STATUS,
365	"Background jobs aren't allowed in pure mode (OILS-ERR-204)",
366	loc.Command(node))
367
368	def RunPipeline(self, node, status_out):
369	# type: (command.Pipeline, CommandStatus) -> None
370	raise error.Structured(
371	_PURITY_STATUS,
372	"Pipelines aren't allowed in pure mode (OILS-ERR-204)",
373	loc.Command(node))
374
375	def RunSubshell(self, node):
376	# type: (command_t) -> int
377	raise error.Structured(
378	_PURITY_STATUS,
379	"Subshells aren't allowed in pure mode (OILS-ERR-204)",
380	loc.Command(node))
381
382	def CaptureStdout(self, node):
383	# type: (command_t) -> Tuple[int, str]
384	"""
385	Used by io->captureStdout() method, and called by command sub
386	"""
387	return 0, ''
388
389	def Capture3(self, node):
390	# type: (command_t) -> Tuple[int, str, str]
391	"""
392	Used by io->captureAll() method, and called by command sub
393	"""
394	return 0, '', ''
395
396	def RunCommandSub(self, cs_part):
397	# type: (CommandSub) -> str
398	raise error.Structured(
399	_PURITY_STATUS,
400	"Command subs aren't allowed in pure mode (OILS-ERR-204)",
401	loc.WordPart(cs_part))
402
403	def RunProcessSub(self, cs_part):
404	# type: (CommandSub) -> str
405	raise error.Structured(
406	_PURITY_STATUS,
407	"Process subs aren't allowed in pure mode (OILS-ERR-204)",
408	loc.WordPart(cs_part))
409
410	def PushRedirects(self, redirects, err_out):
411	# type: (List[RedirValue], List[error.IOError_OSError]) -> None
412	pass
413
414	def PopRedirects(self, num_redirects, err_out):
415	# type: (int, List[error.IOError_OSError]) -> None
416	pass
417
418	def PushProcessSub(self):
419	# type: () -> None
420	pass
421
422	def PopProcessSub(self, compound_st):
423	# type: (StatusArray) -> None
424	pass
425
426
427	class ShellExecutor(vm._Executor):
428	"""An executor combined with the OSH language evaluators in osh/ to create
429	a shell interpreter."""
430
431	def __init__(
432	self,
433	mem, # type: state.Mem
434	exec_opts, # type: optview.Exec
435	mutable_opts, # type: state.MutableOpts
436	procs, # type: state.Procs
437	hay_state, # type: hay_ysh.HayState
438	builtins, # type: Dict[int, vm._Builtin]
439	tracer, # type: dev.Tracer
440	errfmt, # type: ui.ErrorFormatter
441	search_path, # type: SearchPath
442	ext_prog, # type: process.ExternalProgram
443	waiter, # type: process.Waiter
444	job_control, # type: process.JobControl
445	job_list, # type: process.JobList
446	fd_state, # type: process.FdState
447	trap_state, # type: trap_osh.TrapState
448	):
449	# type: (...) -> None
450	vm._Executor.__init__(self, mem, exec_opts, mutable_opts, procs,
451	hay_state, builtins, tracer, errfmt)
452	self.search_path = search_path
453	self.ext_prog = ext_prog
454	self.waiter = waiter
455	self.multi_trace = tracer.multi_trace
456	self.job_control = job_control
457	# sleep 5 & puts a (PID, job#) entry here. And then "jobs" displays it.
458	self.job_list = job_list
459	self.fd_state = fd_state
460	self.trap_state = trap_state
461	self.process_sub_stack = [] # type: List[_ProcessSubFrame]
462	self.clean_frame_pool = [] # type: List[_ProcessSubFrame]
463
464	# When starting a pipeline in the foreground, we need to pass a handle to it
465	# through the evaluation of the last node back to ourselves for execution.
466	# We use this handle to make sure any processes forked for the last part of
467	# the pipeline are placed into the same process group as the rest of the
468	# pipeline. Since there is, by design, only ever one foreground pipeline and
469	# any pipelines started within subshells run in their parent's process
470	# group, we only need one pointer here, not some collection.
471	self.fg_pipeline = None # type: Optional[process.Pipeline]
472
473	tok1 = lexer.DummyToken(Id.Lit_Chars, 'builtin')
474	tok2 = lexer.DummyToken(Id.Lit_Chars, 'cat')
475	self.builtin_cat_words = [CompoundWord([tok1]),
476	CompoundWord([tok2])] # type: List[word_t]
477
478	def _MakeProcess(self, node, inherit_errexit, inherit_errtrace):
479	# type: (command_t, bool, bool) -> process.Process
480	"""Assume we will run the node in another process.
481
482	Return a process.
483	"""
484	UP_node = node
485	if node.tag() == command_e.ControlFlow:
486	node = cast(command.ControlFlow, UP_node)
487	# Pipeline or subshells with control flow are invalid, e.g.:
488	# - break \| less
489	# - continue \| less
490	# - ( return )
491	# NOTE: This could be done at parse time too.
492	if node.keyword.id != Id.ControlFlow_Exit:
493	e_die(
494	'Invalid control flow %r in pipeline / subshell / background'
495	% lexer.TokenVal(node.keyword), node.keyword)
496
497	# NOTE: If ErrExit(), we could be verbose about subprogram errors? This
498	# only really matters when executing 'exit 42', because the child shell
499	# inherits errexit and will be verbose. Other notes:
500	#
501	# - We might want errors to fit on a single line so they don't get
502	# interleaved.
503	# - We could turn the `exit` builtin into a error.FatalRuntime exception
504	# and get this check for "free".
505	thunk = process.SubProgramThunk(self.cmd_ev, node, self.trap_state,
506	self.multi_trace, inherit_errexit,
507	inherit_errtrace)
508	p = process.Process(thunk, self.job_control, self.job_list,
509	self.tracer)
510	return p
511
512	def _RunSimpleCommand(self, arg0, arg0_loc, cmd_val, cmd_st, run_flags):
513	# type: (str, loc_t, cmd_value.Argv, CommandStatus, int) -> int
514	"""Run builtins, functions, external commands.
515
516	Possible variations:
517	- YSH might have different, simpler rules. No special builtins, etc.
518	- YSH might have OILS_PATH = :\| /bin /usr/bin \| or something.
519	- Interpreters might want to define all their own builtins.
520	"""
521
522	builtin_id = consts.LookupAssignBuiltin(arg0)
523	if builtin_id != consts.NO_INDEX:
524	# command readonly is disallowed, for technical reasons. Could relax it
525	# later.
526	self.errfmt.Print_("Simple command can't run assignment builtin",
527	arg0_loc)
528	return 1
529
530	builtin_id = consts.LookupSpecialBuiltin(arg0)
531	if builtin_id != consts.NO_INDEX:
532	cmd_st.show_code = True # this is a "leaf" for errors
533	status = self.RunBuiltin(builtin_id, cmd_val)
534	# TODO: Enable this and fix spec test failures.
535	# Also update _SPECIAL_BUILTINS in osh/builtin.py.
536	#if status != 0:
537	# e_die_status(status, 'special builtin failed')
538	return status
539
540	# Call procs first. Builtins like 'true' can be redefined.
541	call_procs = not (run_flags & NO_CALL_PROCS)
542	if call_procs:
543	proc_val, self_obj = self.procs.GetInvokable(arg0)
544	if proc_val is not None:
545	return self._RunInvokable(proc_val, self_obj, arg0_loc,
546	cmd_val)
547
548	# Notes:
549	# - procs shadow hay names
550	# - hay names shadow normal builtins? Should we limit to CAPS or no?
551	if self.hay_state.Resolve(arg0):
552	return self.RunBuiltin(builtin_i.haynode, cmd_val)
553
554	builtin_id = consts.LookupNormalBuiltin(arg0)
555
556	if self.exec_opts._running_hay():
557	# Hay: limit the builtins that can be run
558	# - declare 'use dialect'
559	# - echo and write for debugging
560	# - no JSON?
561	if builtin_id in (builtin_i.haynode, builtin_i.use, builtin_i.echo,
562	builtin_i.write):
563	cmd_st.show_code = True # this is a "leaf" for errors
564	return self.RunBuiltin(builtin_id, cmd_val)
565
566	self.errfmt.Print_('Unknown command %r while running hay' % arg0,
567	arg0_loc)
568	return 127
569
570	if builtin_id != consts.NO_INDEX:
571	cmd_st.show_code = True # this is a "leaf" for errors
572	return self.RunBuiltin(builtin_id, cmd_val)
573
574	# Maybe rewrite 'cat' as 'builtin cat' !
575	# Don't do it interactively, since that can mess up job control.
576	if (self.exec_opts.rewrite_extern() and
577	not self.exec_opts.interactive()):
578	builtin_id = _RewriteExternToBuiltin(cmd_val.argv)
579	if builtin_id != consts.NO_INDEX:
580	return self.RunBuiltin(builtin_id, cmd_val)
581
582	return self.RunExternal(arg0, arg0_loc, cmd_val, cmd_st, run_flags)
583
584	def RunExternal(self, arg0, arg0_loc, cmd_val, cmd_st, run_flags):
585	# type: (str, loc_t, cmd_value.Argv, Optional[CommandStatus], int) -> int
586	environ = self.mem.GetEnv() # Include temporary variables
587
588	if cmd_val.proc_args:
589	e_die(
590	'%r appears to be external. External commands don\'t accept typed args (OILS-ERR-200)'
591	% arg0, cmd_val.proc_args.typed_args.left)
592
593	# Resolve argv[0] BEFORE forking.
594	if run_flags & USE_DEFAULT_PATH:
595	argv0_path = LookupExecutable(arg0, DEFAULT_PATH)
596	else:
597	argv0_path = self.search_path.CachedLookup(arg0)
598	if argv0_path is None:
599	self.errfmt.Print_('Command %r not found (OILS-ERR-100)' % arg0,
600	arg0_loc)
601	return 127
602
603	if self.trap_state.ThisProcessHasTraps():
604	do_fork = True
605	else:
606	do_fork = not cmd_val.is_last_cmd
607
608	# Normal case: ls /
609	if do_fork:
610	thunk = process.ExternalThunk(self.ext_prog, argv0_path, cmd_val,
611	environ)
612	p = process.Process(thunk, self.job_control, self.job_list,
613	self.tracer)
614
615	if self.job_control.Enabled():
616	if self.fg_pipeline is not None:
617	pgid = self.fg_pipeline.ProcessGroupId()
618	# If job control is enabled, this should be true
619	assert pgid != process.INVALID_PGID
620
621	change = process.SetPgid(pgid, self.tracer)
622	self.fg_pipeline = None # clear to avoid confusion in subshells
623	else:
624	change = process.SetPgid(process.OWN_LEADER, self.tracer)
625	p.AddStateChange(change)
626
627	status = p.RunProcess(self.waiter, trace.External(cmd_val.argv))
628
629	# this is close to a "leaf" for errors
630	# problem: permission denied EACCESS prints duplicate messages
631	# TODO: add message command 'ls' failed
632	if cmd_st is not None:
633	cmd_st.show_code = True
634
635	return status
636
637	self.tracer.OnExec(cmd_val.argv)
638
639	# Already forked for pipeline: ls / \| wc -l
640	self.ext_prog.Exec(argv0_path, cmd_val, environ) # NEVER RETURNS
641
642	raise AssertionError('for -Wreturn-type in C++')
643
644	def RunBackgroundJob(self, node):
645	# type: (command_t) -> int
646	"""For & etc."""
647	# Special case for pipeline. There is some evidence here:
648	# https://www.gnu.org/software/libc/manual/html_node/Launching-Jobs.html#Launching-Jobs
649	#
650	# "You can either make all the processes in the process group be children
651	# of the shell process, or you can make one process in group be the
652	# ancestor of all the other processes in that group. The sample shell
653	# program presented in this chapter uses the first approach because it
654	# makes bookkeeping somewhat simpler."
655	UP_node = node
656
657	if UP_node.tag() == command_e.Pipeline:
658	node = cast(command.Pipeline, UP_node)
659	pi = process.Pipeline(self.exec_opts.sigpipe_status_ok(),
660	self.job_control, self.job_list, self.tracer)
661	for child in node.children:
662	p = self._MakeProcess(child, True, self.exec_opts.errtrace())
663	p.Init_ParentPipeline(pi)
664	pi.Add(p)
665
666	pi.StartPipeline(self.waiter)
667	pi.SetBackground()
668	self.mem.last_bg_pid = pi.PidForWait() # for $!
669	job_id = self.job_list.RegisterJob(pi) # show in 'jobs' list
670
671	else:
672	# Problem: to get the 'set -b' behavior of immediate notifications, we
673	# have to register SIGCHLD. But then that introduces race conditions.
674	# If we haven't called Register yet, then we won't know who to notify.
675
676	p = self._MakeProcess(node, True, self.exec_opts.errtrace())
677	if self.job_control.Enabled():
678	p.AddStateChange(
679	process.SetPgid(process.OWN_LEADER, self.tracer))
680
681	p.SetBackground()
682	pid = p.StartProcess(trace.Fork)
683	self.mem.last_bg_pid = p.PidForWait() # for $!
684	job_id = self.job_list.RegisterJob(p) # show in 'jobs' list
685
686	if self.exec_opts.interactive():
687	# Print it like %1 to show it's a job
688	print_stderr('[%%%d] PID %d Started' %
689	(job_id, self.mem.last_bg_pid))
690
691	return 0
692
693	def RunPipeline(self, node, status_out):
694	# type: (command.Pipeline, CommandStatus) -> None
695
696	pi = process.Pipeline(self.exec_opts.sigpipe_status_ok(),
697	self.job_control, self.job_list, self.tracer)
698
699	# initialized with CommandStatus.CreateNull()
700	pipe_locs = [] # type: List[loc_t]
701
702	# First n-1 processes (which is empty when n == 1)
703	n = len(node.children)
704	for i in xrange(n - 1):
705	child = node.children[i]
706
707	# TODO: determine these locations at parse time?
708	pipe_locs.append(loc.Command(child))
709
710	p = self._MakeProcess(child, True, self.exec_opts.errtrace())
711	p.Init_ParentPipeline(pi)
712	pi.Add(p)
713
714	last_child = node.children[n - 1]
715	# Last piece of code is in THIS PROCESS. 'echo foo \| read line; echo $line'
716	pi.AddLast((self.cmd_ev, last_child))
717	pipe_locs.append(loc.Command(last_child))
718
719	with dev.ctx_Tracer(self.tracer, 'pipeline', None):
720	pi.StartPipeline(self.waiter)
721	self.fg_pipeline = pi
722	status_out.pipe_status = pi.RunLastPart(self.waiter, self.fd_state)
723	self.fg_pipeline = None # clear in case we didn't end up forking
724
725	status_out.pipe_locs = pipe_locs
726
727	def RunSubshell(self, node):
728	# type: (command_t) -> int
729	p = self._MakeProcess(node, True, self.exec_opts.errtrace())
730	if self.job_control.Enabled():
731	p.AddStateChange(process.SetPgid(process.OWN_LEADER, self.tracer))
732
733	return p.RunProcess(self.waiter, trace.ForkWait)
734
735	def CaptureStdout(self, node):
736	# type: (command_t) -> Tuple[int, str]
737
738	p = self._MakeProcess(node, self.exec_opts.inherit_errexit(),
739	self.exec_opts.errtrace())
740	# Shell quirk: Command subs remain part of the shell's process group, so we
741	# don't use p.AddStateChange(process.SetPgid(...))
742
743	r, w = posix.pipe()
744	p.AddStateChange(process.StdoutToPipe(r, w))
745
746	p.StartProcess(trace.CommandSub)
747	#log('Command sub started %d', pid)
748
749	chunks = [] # type: List[str]
750	posix.close(w) # not going to write
751	while True:
752	n, err_num = pyos.Read(r, 4096, chunks)
753
754	if n == 0: # EOF
755	break
756
757	elif n > 0:
758	# common shell behavior: remove NUL from stdout
759	chunks[-1] = chunks[-1].replace('\0', '')
760
761	else: # n < 0
762	if err_num == EINTR:
763	pass # retry
764	else:
765	# Like the top level IOError handler
766	e_die_status(
767	2,
768	'Oils I/O error (read): %s' % posix.strerror(err_num))
769
770	posix.close(r)
771
772	status = p.Wait(self.waiter)
773	stdout_str = ''.join(chunks).rstrip('\n')
774
775	return status, stdout_str
776
777	def Capture3(self, node):
778	# type: (command_t) -> Tuple[int, str, str]
779
780	p = self._MakeProcess(node, self.exec_opts.inherit_errexit(),
781	self.exec_opts.errtrace())
782	# Shell quirk: Command subs remain part of the shell's process group, so we
783	# don't use p.AddStateChange(process.SetPgid(...))
784
785	stdout_fd, w = posix.pipe()
786	stderr_fd, w2 = posix.pipe()
787	p.AddStateChange(process.StdoutToPipe(stdout_fd, w))
788	p.AddStateChange(process.StderrToPipe(stderr_fd, w2))
789
790	p.StartProcess(trace.CommandSub)
791	#log('Command sub started %d', pid)
792
793	stdout_chunks = [] # type: List[str]
794	stderr_chunks = [] # type: List[str]
795	posix.close(w) # not going to write
796	posix.close(w2) # not going to write
797	open_fds = [stdout_fd, stderr_fd]
798	while True:
799	fds = pyos.WaitForReading(open_fds)
800
801	# zero outputs mean something went wrong
802	if len(fds) == 0:
803	break
804
805	for fd in fds:
806	if fd == stdout_fd:
807	n, err_num = pyos.Read(fd, 4096, stdout_chunks)
808	else:
809	n, err_num = pyos.Read(fd, 4096, stderr_chunks)
810	if n < 0:
811	if err_num == EINTR:
812	pass # retry
813	else:
814	# Like the top level IOError handler
815	e_die_status(
816	2, 'Oils I/O error (read): %s' %
817	posix.strerror(err_num))
818	elif n == 0: # EOF
819	open_fds.remove(fd)
820
821	if len(open_fds) == 0:
822	break
823
824	posix.close(stdout_fd)
825	posix.close(stderr_fd)
826
827	status = p.Wait(self.waiter)
828	stdout_str = ''.join(stdout_chunks)
829	stderr_str = ''.join(stderr_chunks)
830
831	return status, stdout_str, stderr_str
832
833	def RunCommandSub(self, cs_part):
834	# type: (CommandSub) -> str
835
836	if not self.exec_opts._allow_command_sub():
837	# _allow_command_sub is used in two places. Only one of them turns
838	# off _allow_process_sub
839	if not self.exec_opts._allow_process_sub():
840	why = "status wouldn't be checked (strict_errexit)"
841	else:
842	why = 'eval_unsafe_arith is off'
843
844	e_die("Command subs not allowed here because %s" % why,
845	loc.WordPart(cs_part))
846
847	node = cs_part.child
848
849	# Hack for weird $(<file) construct.
850	# TODO: This should be detected at PARSE time, and turned into
851	# word_part.Slurp.
852	# - All shells that implement it do it as a special # case.
853	# - Then document it under chap-word-lang.md
854	# - In YSH, it could be $[io.slurp('myfile')]
855
856	if node.tag() == command_e.Redirect:
857	redir_node = cast(command.Redirect, node)
858	# Detect '< file'
859	if (len(redir_node.redirects) == 1 and
860	redir_node.redirects[0].op.id == Id.Redir_Less and
861	redir_node.child.tag() == command_e.NoOp):
862
863	# Change it to builtin cat < file.
864	# Blame < because 'builtin cat' has no location
865	blame_tok = redir_node.redirects[0].op
866	simple = command.Simple(blame_tok, [], self.builtin_cat_words,
867	None, None, False)
868
869	# MUTATE redir node so it's like $(<file _cat)
870	redir_node.child = simple
871
872	status, stdout_str = self.CaptureStdout(node)
873
874	# OSH has the concept of aborting in the middle of a WORD. We're not
875	# waiting until the command is over!
876	if self.exec_opts.command_sub_errexit():
877	if status != 0:
878	msg = 'Command Sub exited with status %d' % status
879	raise error.ErrExit(status, msg, loc.WordPart(cs_part))
880
881	else:
882	# Set a flag so we check errexit at the same time as bash. Example:
883	#
884	# a=$(false)
885	# echo foo # no matter what comes here, the flag is reset
886	#
887	# Set ONLY until this command node has finished executing.
888
889	# HACK: move this
890	self.cmd_ev.check_command_sub_status = True
891	self.mem.SetLastStatus(status)
892
893	# Runtime errors test case: # $("echo foo > $@")
894	# Why rstrip()?
895	# https://unix.stackexchange.com/questions/17747/why-does-shell-command-substitution-gobble-up-a-trailing-newline-char
896	return stdout_str
897
898	def RunProcessSub(self, cs_part):
899	# type: (CommandSub) -> str
900	"""Process sub creates a forks a process connected to a pipe.
901
902	The pipe is typically passed to another process via a /dev/fd/$FD path.
903
904	Life cycle of a process substitution:
905
906	1. Start with this code
907
908	diff <(seq 3) <(seq 4)
909
910	2. To evaluate the command line, we evaluate every word. The
911	NormalWordEvaluator this method, RunProcessSub(), which does 3 things:
912
913	a. Create a pipe(), getting r and w
914	b. Starts the seq process, which inherits r and w
915	It has a StdoutToPipe() redirect, which means that it dup2(w, 1)
916	and close(r)
917	c. Close the w FD, because neither the shell or 'diff' will write to it.
918	However we must retain 'r', because 'diff' hasn't opened /dev/fd yet!
919	d. We evaluate <(seq 3) to /dev/fd/$r, so "diff" can read from it
920
921	3. Now we're done evaluating every word, so we know the command line of
922	diff, which looks like
923
924	diff /dev/fd/64 /dev/fd/65
925
926	Those are the FDs for the read ends of the pipes we created.
927
928	4. diff inherits a copy of the read end of bot pipes. But it actually
929	calls open() both files passed as argv. (I think this is fine.)
930
931	5. wait() for the diff process.
932
933	6. The shell closes both the read ends of both pipes. Neither us or
934	'diffd' will read again.
935
936	7. The shell waits for both 'seq' processes.
937
938	Related:
939	shopt -s process_sub_fail
940	_process_sub_status
941	"""
942	cs_loc = loc.WordPart(cs_part)
943
944	if not self.exec_opts._allow_process_sub():
945	e_die(
946	"Process subs not allowed here because status wouldn't be checked (strict_errexit)",
947	cs_loc)
948
949	p = self._MakeProcess(cs_part.child, True, self.exec_opts.errtrace())
950
951	r, w = posix.pipe()
952	#log('pipe = %d, %d', r, w)
953
954	op_id = cs_part.left_token.id
955	if op_id == Id.Left_ProcSubIn:
956	# Example: cat < <(head foo.txt)
957	#
958	# The head process should write its stdout to a pipe.
959	redir = process.StdoutToPipe(r,
960	w) # type: process.ChildStateChange
961
962	elif op_id == Id.Left_ProcSubOut:
963	# Example: head foo.txt > >(tac)
964	#
965	# The tac process should read its stdin from a pipe.
966
967	# Note: this example sometimes requires you to hit "enter" in bash and
968	# zsh. WHy?
969	redir = process.StdinFromPipe(r, w)
970
971	else:
972	raise AssertionError()
973
974	p.AddStateChange(redir)
975
976	if self.job_control.Enabled():
977	p.AddStateChange(process.SetPgid(process.OWN_LEADER, self.tracer))
978
979	# Fork, letting the child inherit the pipe file descriptors.
980	p.StartProcess(trace.ProcessSub)
981
982	ps_frame = self.process_sub_stack[-1]
983
984	# Note: bash never waits() on the process, but zsh does. The calling
985	# program needs to read() before we can wait, e.g.
986	# diff <(sort left.txt) <(sort right.txt)
987
988	# After forking, close the end of the pipe we're not using.
989	if op_id == Id.Left_ProcSubIn:
990	posix.close(w) # cat < <(head foo.txt)
991	ps_frame.Append(p, r, cs_loc) # close later
992	elif op_id == Id.Left_ProcSubOut:
993	posix.close(r)
994	#log('Left_ProcSubOut closed %d', r)
995	ps_frame.Append(p, w, cs_loc) # close later
996	else:
997	raise AssertionError()
998
999	# Is /dev Linux-specific?
1000	if op_id == Id.Left_ProcSubIn:
1001	return '/dev/fd/%d' % r
1002
1003	elif op_id == Id.Left_ProcSubOut:
1004	return '/dev/fd/%d' % w
1005
1006	else:
1007	raise AssertionError()
1008
1009	def PushRedirects(self, redirects, err_out):
1010	# type: (List[RedirValue], List[error.IOError_OSError]) -> None
1011	if len(redirects) == 0: # Optimized to avoid allocs
1012	return
1013	self.fd_state.Push(redirects, err_out)
1014
1015	def PopRedirects(self, num_redirects, err_out):
1016	# type: (int, List[error.IOError_OSError]) -> None
1017	if num_redirects == 0: # Optimized to avoid allocs
1018	return
1019	self.fd_state.Pop(err_out)
1020
1021	def PushProcessSub(self):
1022	# type: () -> None
1023	if len(self.clean_frame_pool):
1024	# Optimized to avoid allocs
1025	new_frame = self.clean_frame_pool.pop()
1026	else:
1027	new_frame = _ProcessSubFrame()
1028	self.process_sub_stack.append(new_frame)
1029
1030	def PopProcessSub(self, compound_st):
1031	# type: (StatusArray) -> None
1032	"""This method is called by a context manager, which means we always
1033	wait() on the way out, which I think is the right thing.
1034
1035	We don't always set _process_sub_status, e.g. if some fatal
1036	error occurs first, but we always wait.
1037	"""
1038	frame = self.process_sub_stack.pop()
1039	if frame.WasModified():
1040	frame.MaybeWaitOnProcessSubs(self.waiter, compound_st)
1041	else:
1042	# Optimized to avoid allocs
1043	self.clean_frame_pool.append(frame)
1044
1045	# Note: the 3 lists in _ProcessSubFrame are hot in our profiles. It would
1046	# be nice to somehow "destroy" them here, rather than letting them become
1047	# garbage that needs to be traced.
1048
1049	# The CommandEvaluator could have a ProcessSubStack, which supports Push(),
1050	# Pop(), and Top() of VALUES rather than GC objects?
1051
1052
1053	# vim: sw=4