test/syscall.sh

OILS / test / syscall.sh View on Github | oilshell.org

508 lines, 210 significant

1	#!/usr/bin/env bash
2	#
3	# Measure the number of syscalls that shells use.
4	#
5	# Usage:
6	# test/syscall.sh <function name>
7
8	: ${LIB_OSH=stdlib/osh}
9	source $LIB_OSH/bash-strict.sh
10	source $LIB_OSH/task-five.sh
11
12	source build/dev-shell.sh
13
14	OSH=${OSH:-osh}
15	YSH=${YSH:-ysh}
16
17	#readonly -a SHELLS=(dash bash-4.4 bash $OSH)
18
19	# Compare bash 4 vs. bash 5
20	SHELLS=(dash bash-4.4 bash-5.2.21 mksh zsh ash $OSH $YSH)
21
22	SHELLS_MORE=( ${SHELLS[@]} yash )
23
24	# yash does something fundamentally different in by-code.wrapped - it
25	# understands functions
26	#SHELLS+=(yash)
27
28	readonly BASE_DIR='_tmp/syscall' # What we'll publish
29	readonly RAW_DIR='_tmp/syscall-raw' # Raw data
30
31	# Run it against the dev version of OSH
32	REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
33
34	count-procs() {
35	local out_prefix=$1
36	local sh=$2
37	shift 2
38
39	case $sh in
40	# avoid the extra processes that bin/osh starts!
41	# relies on word splitting
42	#(X) # to compare against osh 0.8.pre3 installed
43	osh)
44	sh="env PYTHONPATH=$REPO_ROOT:$REPO_ROOT/vendor $REPO_ROOT/bin/oils_for_unix.py osh"
45	;;
46	ysh)
47	sh="env PYTHONPATH=$REPO_ROOT:$REPO_ROOT/vendor $REPO_ROOT/bin/oils_for_unix.py ysh"
48	;;
49	osh-cpp)
50	sh=_bin/cxx-dbg/osh
51	;;
52	ysh-cpp)
53	sh=_bin/cxx-dbg/ysh
54	;;
55	esac
56
57	# Ignore failure, because we are just counting
58	strace -ff -o $out_prefix -- $sh "$@" \|\| true
59	}
60
61	run-case() {
62	### Run a test case with many shells
63
64	local num=$1
65	local code_str=$2
66	local func_wrap=${3:-}
67
68	local -a shells
69	if test -n "$func_wrap"; then
70	code_str="wrapper() { $code_str; }; wrapper"
71	shells=( "${SHELLS[@]}" )
72	else
73	shells=( "${SHELLS_MORE[@]}" )
74	fi
75
76	for sh in "${shells[@]}"; do
77	local out_prefix=$RAW_DIR/${sh}__${num}
78	echo "--- $sh"
79	count-procs $out_prefix $sh -c "$code_str"
80	done
81	}
82
83	run-case-file() {
84	### Like the above, but the shell reads from a file
85
86	local num=$1
87	local code_str=$2
88
89	echo -n "$code_str" > _tmp/$num.sh
90
91	for sh in "${SHELLS_MORE[@]}"; do
92	local out_prefix=$RAW_DIR/${sh}__${num}
93	echo "--- $sh"
94	count-procs $out_prefix $sh _tmp/$num.sh
95	done
96	}
97
98	run-case-stdin() {
99	### Like the above, but read from a pipe
100
101	local num=$1
102	local code_str=$2
103
104	for sh in "${SHELLS_MORE[@]}"; do
105	local out_prefix=$RAW_DIR/${sh}__${num}
106	echo "--- $sh"
107	echo -n "$code_str" \| count-procs $out_prefix $sh
108	done
109	}
110
111	print-cases() {
112	# format: number, whitespace, then an arbitrary code string
113	egrep -v '^[[:space:]]*(#\|$)' <<EOF
114
115	# builtin
116	echo hi
117
118	# external command
119	date
120
121	# OSH calls this "sentence"
122	date ;
123
124	# trap - bash has special logic for this
125	trap 'echo mytrap' EXIT; date
126
127	# external then builtin
128	date; echo hi
129
130	# builtin then external
131	echo hi; date
132
133	# two external commands
134	date; date
135
136	# does a brace group make a difference?
137	{ date; date; }
138
139	# singleton brace group
140	date; { date; }
141
142	# does it behave differently if sourced?
143	. _tmp/sourced.sh
144
145	# dash and zsh somehow optimize this to 1
146	(echo hi)
147
148	(date)
149
150	( ( date ) )
151
152	( ( date ) ); echo hi
153
154	echo hi; (date)
155
156	echo hi; (date;)
157
158	echo hi; (echo hi;)
159
160	echo hi; (echo hi; date)
161
162	( echo hi ); echo hi
163
164	date > /tmp/redir.txt
165
166	(date;) > /tmp/sentence.txt
167
168	date 2> /tmp/stderr.txt \| wc -l
169
170	echo hi > /tmp/redir.txt
171
172	(echo hi;) > /tmp/sentence.txt
173
174	echo hi 2> /tmp/stderr.txt \| wc -l
175
176	(date; echo hi)
177
178	# command sub
179	echo \$(date)
180
181	# command sub with builtin
182	echo \$(echo hi)
183
184	# command sub with useless subshell (some scripts use this)
185	echo \$( ( date ) )
186
187	# command sub with other subshell
188	echo \$( ( date ); echo hi )
189
190	# 2 processes for all shells
191	( echo hi ); echo done
192
193	# simple pipeline
194	date \| wc -l
195
196	# negated
197	! date \| wc -l
198
199	# every shell does 3
200	echo a \| wc -l
201
202	# every shell does 3
203	command echo a \| wc -l
204
205	# bash does 4 here!
206	command date \| wc -l
207
208	# negated
209	! command date \| wc -l
210
211	# 3 processes for all?
212	# osh gives FIVE??? But others give 3. That's bad.
213	( date ) \| wc -l
214
215	# 3 processes for all shells except zsh and osh, which have shopt -s lastpipe!
216	date \| read x
217
218	# osh has 3, but should be 2 like zsh?
219	# hm how can zsh do 2 here? That seems impossible.
220	# oh it's lastpipe turns the shell process into wc -l ??? wow.
221	{ echo a; echo b; } \| wc -l
222
223	# zsh behaves normally here. That is a crazy optimization. I guess it's
224	# nice when you have SH -c 'mypipeline \| wc-l'
225	{ echo a; echo b; } \| wc -l; echo done
226
227	# this is all over the map too. 3 4 4 2.
228	{ echo a; date; } \| wc -l
229
230	# osh does 4 when others do 3. So every shell optimizes this extra pipeline.
231	( echo a; echo b ) \| wc -l
232
233	( echo a; echo b ) \| ( wc -l )
234
235	{ echo prefix; ( echo a; echo b ); } \| ( wc -l )
236
237	echo hi & wait
238
239	date & wait
240
241	echo hi \| wc -l & wait
242
243	date \| wc -l & wait
244
245	trap 'echo mytrap' EXIT; date & wait
246
247	trap 'echo mytrap' EXIT; date \| wc -l & wait
248
249	# trap in SubProgramThunk
250	{ trap 'echo mytrap' EXIT; date; } & wait
251	EOF
252
253	# Discarded because they're identical
254	# pipeline with redirect last
255	#date \| wc -l > /tmp/out.txt
256
257	# pipeline with redirect first
258	#date 2>&1 \| wc -l
259
260	}
261
262	number-cases() {
263	# Right justified, leading zeros, with 2
264	# Wish this was %02d
265	print-cases \| nl --number-format rz --number-width 2
266	}
267
268	by-input() {
269	### Run cases that vary by input reader
270	if ! strace true; then
271	echo "Aborting because we couldn't run strace"
272	return
273	fi
274
275	local suite='by-input'
276
277	rm -r -f -v $RAW_DIR
278	mkdir -p $RAW_DIR $BASE_DIR
279
280	# Wow this newline makes a difference in shells!
281
282	# This means that Id.Eof_Real is different than Id.Op_Newline?
283	# Should we create a Sentence for it too then?
284	# That is possible in _ParseCommandLine
285
286	zero=$'date; date'
287	one=$'date; date\n'
288	two=$'date; date\n#comment\n'
289	comment=$'# comment\ndate;date'
290	newline=$'date\n\ndate'
291	newline2=$'date\n\ndate\n#comment'
292
293	# zsh is the only shell to optimize all 6 cases! 2 processes instead of 3.
294	run-case 50 "$zero"
295	run-case 51 "$one"
296	run-case 52 "$two"
297	run-case 53 "$comment"
298	run-case 54 "$newline"
299	run-case 55 "$newline2"
300
301	run-case-file 60 "$zero"
302	run-case-file 61 "$one"
303	run-case-file 62 "$two"
304	run-case-file 63 "$comment"
305	run-case-file 64 "$newline2"
306	run-case-file 65 "$newline2"
307
308	# yash is the only shell to optimize the stdin case at all!
309	# it looks for a lack of trailing newline.
310	run-case-stdin 70 "$zero"
311	run-case-stdin 71 "$one"
312	run-case-stdin 72 "$two"
313	run-case-stdin 73 "$comment"
314	run-case-stdin 74 "$newline2"
315	run-case-stdin 75 "$newline2"
316
317	# This is identical for all shells
318	#run-case 32 $'date; date\n#comment\n'
319
320	cat >$BASE_DIR/cases.${suite}.txt <<EOF
321	50 -c: zero lines
322	51 -c: one line
323	52 -c: one line and comment
324	53 -c: comment first
325	54 -c: newline
326	55 -c: newline2
327	60 file: zero lines
328	61 file: one line
329	62 file: one line and comment
330	63 file: comment first
331	64 file: newline
332	65 file: newline2
333	70 stdin: zero lines
334	71 stdin: one line
335	72 stdin: one line and comment
336	73 stdin: comment first
337	74 stdin: newline
338	75 stdin: newline2
339	EOF
340
341	count-lines $suite
342	summarize $suite 3 0
343	}
344
345	# Quick hack: every shell uses 2 processes for this... doesn't illuminate much.
346	weird-command-sub() {
347	shopt -s nullglob
348	rm -r -f -v $RAW_DIR/*
349
350	local tmp=_tmp/cs
351	echo FOO > $tmp
352	run-case 60 "echo $(< $tmp)"
353	run-case 61 "echo $(< $tmp; echo hi)"
354
355	local suite=weird-command-sub
356
357	cat >$BASE_DIR/cases.${suite}.txt <<EOF
358	60 \$(< file)
359	61 \$(< file; echo hi)
360	EOF
361
362	count-lines $suite
363	summarize $suite 0 0
364	}
365
366	readonly MAX_CASES=100
367	#readonly MAX_CASES=3
368
369	by-code() {
370	### Run cases that vary by code snippet
371	local func_wrap=${1:-}
372
373	if ! strace true; then
374	echo "Aborting because we couldn't run strace"
375	return
376	fi
377
378	local max_cases=${1:-$MAX_CASES}
379
380	rm -r -f -v $RAW_DIR
381	mkdir -p $RAW_DIR $BASE_DIR
382
383	write-sourced
384
385	local suite
386	if test -n "$func_wrap"; then
387	suite='by-code-wrapped'
388	else
389	suite='by-code'
390	fi
391
392	local cases=$BASE_DIR/cases.${suite}.txt
393
394	number-cases > $cases
395	head -n $max_cases $cases \| while read -r num code_str; do
396	echo
397	echo '==='
398	echo "$num $code_str"
399	echo
400
401	run-case $num "$code_str" "$func_wrap"
402	done
403
404	# omit total line
405	count-lines $suite
406	summarize $suite 3 0
407	}
408
409	by-code-cpp() {
410	ninja _bin/cxx-dbg/{osh,ysh}
411	OSH=osh-cpp YSH=ysh-cpp $0 by-code "$@"
412	}
413
414	by-input-cpp() {
415	ninja _bin/cxx-dbg/{osh,ysh}
416	OSH=osh-cpp YSH=ysh-cpp $0 by-input "$@"
417	}
418
419	syscall-py() {
420	PYTHONPATH=. test/syscall.py "$@"
421	}
422
423	write-sourced() {
424	echo -n 'date; date' > _tmp/sourced.sh
425	}
426
427	count-lines() {
428	local suite=${1:-by-code}
429	( cd $RAW_DIR && wc -l * ) \| head -n -1 > $BASE_DIR/wc.${suite}.txt
430	}
431
432	summarize() {
433	local suite=${1:-by-code}
434	local not_minimum=${2:-0}
435	local more_than_bash=${3:-0}
436
437	set +o errexit
438	cat $BASE_DIR/wc.${suite}.txt \
439	\| syscall-py \
440	--not-minimum $not_minimum \
441	--more-than-bash $more_than_bash \
442	--suite $suite \
443	$BASE_DIR/cases.${suite}.txt \
444	$BASE_DIR
445	local status=$?
446	set -o errexit
447
448	if test $status -eq 0; then
449	echo 'OK'
450	else
451	echo 'FAIL'
452	fi
453	}
454
455	soil-run() {
456	# Invoked as one of the "other" tests. Soil runs by-code and by-input
457	# separately.
458
459	# Note: Only $BASE_DIR/*.txt is included in the release/$VERSION/other.wwz
460	by-code
461
462	# wrapped
463	by-code T
464
465	by-input
466
467	echo 'OK'
468	}
469
470	run-for-release() {
471	### Run the two syscall suites
472
473	soil-run
474	}
475
476	#
477	# Real World
478	#
479	# $ ls\|grep dash\|wc -l
480	# 6098
481	# $ ls\|grep bash\|wc -l
482	# 6102
483	# $ ls\|grep osh\|wc -l
484	# 6098
485	#
486	# So Oil is already at dash level for CPython's configure, and bash isn't
487	# far off. So autoconf-generated scripts probably already use constructs
488	# that are already "optimal" in most shells.
489
490	readonly PY27_DIR=$PWD/Python-2.7.13
491
492	cpython-configure() {
493	local raw_dir=$PWD/$RAW_DIR/real
494	mkdir -p $raw_dir
495
496	pushd $PY27_DIR
497	#for sh in "${SHELLS[@]}"; do
498	for sh in bash dash osh; do
499	local out_prefix=$raw_dir/cpython-$sh
500	echo "--- $sh"
501
502	# TODO: Use a different dir
503	count-procs $out_prefix $sh -c './configure'
504	done
505	popd
506	}
507
508	task-five "$@"