OILS / test / syscall.sh View on Github | oils.pub

520 lines, 210 significant
1#!/usr/bin/env bash
2#
3# Measure the number of syscalls that shells use.
4#
5# Usage:
6# test/syscall.sh <function name>
7
8: ${LIB_OSH=stdlib/osh}
9source $LIB_OSH/bash-strict.sh
10source $LIB_OSH/task-five.sh
11
12source build/dev-shell.sh
13
14# These are labels for count-procs
15OSH=${OSH:-osh-py}
16YSH=${YSH:-ysh-py}
17
18# Compare bash 4 vs. bash 5
19SHELLS=(dash bash-4.4 bash-5.2.21 mksh zsh ash $OSH $YSH)
20
21# yash does something fundamentally different in by-code.wrapped - it
22# understands functions
23SHELLS_MORE=( ${SHELLS[@]} yash )
24
25readonly BASE_DIR='_tmp/syscall' # What we'll publish
26readonly RAW_DIR='_tmp/syscall-raw' # Raw data
27
28REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
29
30count-procs() {
31 local out_prefix=$1
32 local sh=$2
33 shift 2
34
35 case $sh in
36 # avoid the extra processes that bin/osh starts!
37 # relies on word splitting
38 #(X) # to compare against osh 0.8.pre3 installed
39 osh-py)
40 sh="env PYTHONPATH=$REPO_ROOT:$REPO_ROOT/vendor $REPO_ROOT/bin/oils_for_unix.py osh"
41 ;;
42 ysh-py)
43 sh="env PYTHONPATH=$REPO_ROOT:$REPO_ROOT/vendor $REPO_ROOT/bin/oils_for_unix.py ysh"
44 ;;
45 osh-cpp) sh=_bin/cxx-dbg/osh ;;
46 ysh-cpp) sh=_bin/cxx-dbg/ysh ;;
47
48 # from soil/cpp-tarball.sh build-static
49 osh-st) sh=_bin/cxx-opt-sh/osh-static ;;
50 ysh-st) sh=_bin/cxx-opt-sh/ysh-static ;;
51 esac
52
53 # Ignore failure, because we are just counting
54 strace -ff -o $out_prefix -- $sh "$@" || true
55}
56
57run-case() {
58 ### Run a test case with many shells
59
60 local num=$1
61 local code_str=$2
62 local func_wrap=${3:-}
63
64 local -a shells
65 if test -n "$func_wrap"; then
66 code_str="wrapper() { $code_str; }; wrapper"
67 shells=( "${SHELLS[@]}" )
68 else
69 shells=( "${SHELLS_MORE[@]}" )
70 fi
71
72 for sh in "${shells[@]}"; do
73 local out_prefix=$RAW_DIR/${sh}__${num}
74 echo "--- $sh"
75 count-procs $out_prefix $sh -c "$code_str"
76 done
77}
78
79run-case-file() {
80 ### Like the above, but the shell reads from a file
81
82 local num=$1
83 local code_str=$2
84
85 echo -n "$code_str" > _tmp/$num.sh
86
87 for sh in "${SHELLS_MORE[@]}"; do
88 local out_prefix=$RAW_DIR/${sh}__${num}
89 echo "--- $sh"
90 count-procs $out_prefix $sh _tmp/$num.sh
91 done
92}
93
94run-case-stdin() {
95 ### Like the above, but read from a pipe
96
97 local num=$1
98 local code_str=$2
99
100 for sh in "${SHELLS_MORE[@]}"; do
101 local out_prefix=$RAW_DIR/${sh}__${num}
102 echo "--- $sh"
103 echo -n "$code_str" | count-procs $out_prefix $sh
104 done
105}
106
107print-cases() {
108 # format: number, whitespace, then an arbitrary code string
109 egrep -v '^[[:space:]]*(#|$)' <<EOF
110
111# builtin
112echo hi
113
114# external command
115date
116
117# OSH calls this "sentence"
118date ;
119
120# trap - bash has special logic for this
121trap 'echo mytrap' EXIT; date
122
123# external then builtin
124date; echo hi
125
126# builtin then external
127echo hi; date
128
129# two external commands
130date; date
131
132# does a brace group make a difference?
133{ date; date; }
134
135# singleton brace group
136date; { date; }
137
138# does it behave differently if sourced?
139. _tmp/sourced.sh
140
141# dash and zsh somehow optimize this to 1
142(echo hi)
143
144(date)
145
146( ( date ) )
147
148( ( date ) ); echo hi
149
150echo hi; (date)
151
152echo hi; (date;)
153
154echo hi; (echo hi;)
155
156echo hi; (echo hi; date)
157
158( echo hi ); echo hi
159
160date > /tmp/redir.txt
161
162(date;) > /tmp/sentence.txt
163
164date 2> /tmp/stderr.txt | wc -l
165
166echo hi > /tmp/redir.txt
167
168(echo hi;) > /tmp/sentence.txt
169
170echo hi 2> /tmp/stderr.txt | wc -l
171
172(date; echo hi)
173
174# command sub
175echo \$(date)
176
177# command sub with builtin
178echo \$(echo hi)
179
180# command sub with useless subshell (some scripts use this)
181echo \$( ( date ) )
182
183# command sub with other subshell
184echo \$( ( date ); echo hi )
185
186# 2 processes for all shells
187( echo hi ); echo done
188
189# simple pipeline
190date | wc -l
191
192# negated
193! date | wc -l
194
195# every shell does 3
196echo a | wc -l
197
198# every shell does 3
199command echo a | wc -l
200
201# bash does 4 here!
202command date | wc -l
203
204# negated
205! command date | wc -l
206
207# 3 processes for all?
208# osh gives FIVE??? But others give 3. That's bad.
209( date ) | wc -l
210
211# 3 processes for all shells except zsh and osh, which have shopt -s lastpipe!
212date | read x
213
214# osh has 3, but should be 2 like zsh?
215# hm how can zsh do 2 here? That seems impossible.
216# oh it's lastpipe turns the shell process into wc -l ??? wow.
217{ echo a; echo b; } | wc -l
218
219# zsh behaves normally here. That is a crazy optimization. I guess it's
220# nice when you have SH -c 'mypipeline | wc-l'
221{ echo a; echo b; } | wc -l; echo done
222
223# this is all over the map too. 3 4 4 2.
224{ echo a; date; } | wc -l
225
226# osh does 4 when others do 3. So every shell optimizes this extra pipeline.
227( echo a; echo b ) | wc -l
228
229( echo a; echo b ) | ( wc -l )
230
231{ echo prefix; ( echo a; echo b ); } | ( wc -l )
232
233echo hi & wait
234
235date & wait
236
237echo hi | wc -l & wait
238
239date | wc -l & wait
240
241trap 'echo mytrap' EXIT; date & wait
242
243trap 'echo mytrap' EXIT; date | wc -l & wait
244
245# trap in SubProgramThunk
246{ trap 'echo mytrap' EXIT; date; } & wait
247EOF
248
249# Discarded because they're identical
250# pipeline with redirect last
251#date | wc -l > /tmp/out.txt
252
253# pipeline with redirect first
254#date 2>&1 | wc -l
255}
256
257number-cases() {
258 # Right justified, leading zeros, with 2
259 # Wish this was %02d
260 print-cases | nl --number-format rz --number-width 2
261}
262
263by-input() {
264 ### Run cases that vary by input reader
265 if ! strace true; then
266 echo "Aborting because we couldn't run strace"
267 return
268 fi
269
270 local suite='by-input'
271
272 rm -r -f -v $RAW_DIR
273 mkdir -p $RAW_DIR $BASE_DIR
274
275 # Wow this newline makes a difference in shells!
276
277 # This means that Id.Eof_Real is different than Id.Op_Newline?
278 # Should we create a Sentence for it too then?
279 # That is possible in _ParseCommandLine
280
281 zero=$'date; date'
282 one=$'date; date\n'
283 two=$'date; date\n#comment\n'
284 comment=$'# comment\ndate;date'
285 newline=$'date\n\ndate'
286 newline2=$'date\n\ndate\n#comment'
287
288 # zsh is the only shell to optimize all 6 cases! 2 processes instead of 3.
289 run-case 50 "$zero"
290 run-case 51 "$one"
291 run-case 52 "$two"
292 run-case 53 "$comment"
293 run-case 54 "$newline"
294 run-case 55 "$newline2"
295
296 run-case-file 60 "$zero"
297 run-case-file 61 "$one"
298 run-case-file 62 "$two"
299 run-case-file 63 "$comment"
300 run-case-file 64 "$newline2"
301 run-case-file 65 "$newline2"
302
303 # yash is the only shell to optimize the stdin case at all!
304 # it looks for a lack of trailing newline.
305 run-case-stdin 70 "$zero"
306 run-case-stdin 71 "$one"
307 run-case-stdin 72 "$two"
308 run-case-stdin 73 "$comment"
309 run-case-stdin 74 "$newline2"
310 run-case-stdin 75 "$newline2"
311
312 # This is identical for all shells
313 #run-case 32 $'date; date\n#comment\n'
314
315 cat >$BASE_DIR/cases.${suite}.txt <<EOF
31650 -c: zero lines
31751 -c: one line
31852 -c: one line and comment
31953 -c: comment first
32054 -c: newline
32155 -c: newline2
32260 file: zero lines
32361 file: one line
32462 file: one line and comment
32563 file: comment first
32664 file: newline
32765 file: newline2
32870 stdin: zero lines
32971 stdin: one line
33072 stdin: one line and comment
33173 stdin: comment first
33274 stdin: newline
33375 stdin: newline2
334EOF
335
336 count-lines $suite
337 summarize $suite 3 0
338}
339
340# Quick hack: every shell uses 2 processes for this... doesn't illuminate much.
341weird-command-sub() {
342 shopt -s nullglob
343 rm -r -f -v $RAW_DIR/*
344
345 local tmp=_tmp/cs
346 echo FOO > $tmp
347 run-case 60 "echo $(< $tmp)"
348 run-case 61 "echo $(< $tmp; echo hi)"
349
350 local suite=weird-command-sub
351
352 cat >$BASE_DIR/cases.${suite}.txt <<EOF
35360 \$(< file)
35461 \$(< file; echo hi)
355EOF
356
357 count-lines $suite
358 summarize $suite 0 0
359}
360
361readonly MAX_CASES=100
362#readonly MAX_CASES=3
363
364by-code() {
365 ### Run cases that vary by code snippet
366 local func_wrap=${1:-}
367
368 if ! strace true; then
369 echo "Aborting because we couldn't run strace"
370 return
371 fi
372
373 local max_cases=${1:-$MAX_CASES}
374
375 rm -r -f -v $RAW_DIR
376 mkdir -p $RAW_DIR $BASE_DIR
377
378 write-sourced
379
380 local suite
381 if test -n "$func_wrap"; then
382 suite='by-code-wrapped'
383 else
384 suite='by-code'
385 fi
386
387 local cases=$BASE_DIR/cases.${suite}.txt
388
389 number-cases > $cases
390 head -n $max_cases $cases | while read -r num code_str; do
391 echo
392 echo '==='
393 echo "$num $code_str"
394 echo
395
396 run-case $num "$code_str" "$func_wrap"
397 done
398
399 # omit total line
400 count-lines $suite
401 summarize $suite 3 0
402}
403
404syscall-py() {
405 PYTHONPATH=. test/syscall.py "$@"
406}
407
408write-sourced() {
409 echo -n 'date; date' > _tmp/sourced.sh
410}
411
412count-lines() {
413 local suite=${1:-by-code}
414 ( cd $RAW_DIR && wc -l * ) | head -n -1 > $BASE_DIR/wc.${suite}.txt
415}
416
417summarize() {
418 local suite=${1:-by-code}
419 local not_minimum=${2:-0}
420 local more_than_bash=${3:-0}
421
422 set +o errexit
423 cat $BASE_DIR/wc.${suite}.txt \
424 | syscall-py \
425 --not-minimum $not_minimum \
426 --more-than-bash $more_than_bash \
427 --suite $suite \
428 $BASE_DIR/cases.${suite}.txt \
429 $BASE_DIR
430 local status=$?
431 set -o errexit
432
433 if test $status -eq 0; then
434 echo 'OK'
435 else
436 echo 'FAIL'
437 fi
438}
439
440soil-run() {
441 # Invoked as one of the "other" tests. Soil runs by-code and by-input
442 # separately.
443
444 # Note: Only $BASE_DIR/*.txt is included in the release/$VERSION/other.wwz
445 by-code
446
447 # wrapped
448 by-code T
449
450 by-input
451
452 echo 'OK'
453}
454
455run-for-release() {
456 ### Run the two syscall suites
457
458 soil-run
459}
460
461#
462# Entry points for C++
463#
464# TODO:
465# - Run with C++ in CI!
466# - ovm-tarball image does not have curl! to get tarball
467# - and it does not have Ninja
468# - Assert failures in summarize !
469# - also fix YSH metrics - some code doesn't parse
470
471suite-cpp() {
472 ### Compare with dynamically linked C++ build
473 local suite=${1:-by-code}
474
475 ninja _bin/cxx-dbg/{osh,ysh}
476 OSH=osh-cpp YSH=ysh-cpp $0 $suite "$@"
477}
478
479suite-static() {
480 ### Compare with statically linked C++ build
481 local suite=${1:-by-code}
482
483 soil/cpp-tarball.sh build-static
484 OSH=osh-st YSH=ysh-st $0 $suite "$@"
485}
486
487
488#
489# Real World
490#
491# $ ls|grep dash|wc -l
492# 6098
493# $ ls|grep bash|wc -l
494# 6102
495# $ ls|grep osh|wc -l
496# 6098
497#
498# So Oil is already at dash level for CPython's configure, and bash isn't
499# far off. So autoconf-generated scripts probably already use constructs
500# that are already "optimal" in most shells.
501
502readonly PY27_DIR=$PWD/Python-2.7.13
503
504cpython-configure() {
505 local raw_dir=$PWD/$RAW_DIR/real
506 mkdir -p $raw_dir
507
508 pushd $PY27_DIR
509 #for sh in "${SHELLS[@]}"; do
510 for sh in bash dash osh; do
511 local out_prefix=$raw_dir/cpython-$sh
512 echo "--- $sh"
513
514 # TODO: Use a different dir
515 count-procs $out_prefix $sh -c './configure'
516 done
517 popd
518}
519
520task-five "$@"