OILS / benchmarks / osh-runtime.sh View on Github | oils.pub

586 lines, 345 significant
1#!/usr/bin/env bash
2#
3# Test scripts found in the wild for both correctness and performance.
4#
5# Usage:
6# benchmarks/osh-runtime.sh <function name>
7
8set -o nounset
9set -o pipefail
10set -o errexit
11
12REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
13
14source benchmarks/common.sh # tsv-concat
15source benchmarks/id.sh # print-job-id
16source build/dev-shell.sh # python2
17source test/common.sh
18source test/tsv-lib.sh # tsv-row
19
20readonly BASE_DIR=_tmp/osh-runtime
21
22# TODO: Move to ../oil_DEPS
23readonly TAR_DIR=$PWD/_deps/osh-runtime # Make it absolute
24
25#
26# Dependencies
27#
28
29readonly PY27_DIR=$PWD/Python-2.7.13
30
31# NOTE: Same list in oilshell.org/blob/run.sh.
32tarballs() {
33 cat <<EOF
34tcc-0.9.26.tar.bz2
35yash-2.46.tar.xz
36ocaml-4.06.0.tar.xz
37util-linux-2.40.tar.xz
38EOF
39}
40
41download() {
42 mkdir -p $TAR_DIR
43 tarballs | xargs -n 1 -I {} --verbose -- \
44 wget --no-clobber --directory $TAR_DIR 'https://www.oilshell.org/blob/testdata/{}'
45}
46
47extract() {
48 set -x
49 time for f in $TAR_DIR/*.{bz2,xz}; do
50 tar -x --directory $TAR_DIR --file $f
51 done
52 set +x
53
54 ls -l $TAR_DIR
55}
56
57#
58# Computation
59#
60
61run-tasks() {
62 local raw_out_dir=$1
63 raw_out_dir="$PWD/$raw_out_dir" # because we change dirs
64
65 # Bug fix for dynamic scoping!
66 local host_name sh_path workload
67
68 local task_id=0
69 while read -r host_name sh_path workload; do
70
71 log "*** $host_name $sh_path $workload $task_id"
72
73 local sh_run_path
74 case $sh_path in
75 /*) # Already absolute
76 sh_run_path=$sh_path
77 ;;
78 */*) # It's relative, so make it absolute
79 sh_run_path=$PWD/$sh_path
80 ;;
81 *) # 'dash' should remain 'dash'
82 sh_run_path=$sh_path
83 ;;
84 esac
85
86 local working_dir=''
87 local files_out_dir="$raw_out_dir/files-$task_id"
88 mkdir -v -p $files_out_dir
89
90 local save_new_files=''
91
92 local -a argv
93 case $workload in
94 hello-world)
95 argv=( testdata/osh-runtime/hello_world.sh )
96 ;;
97
98 bin-true)
99 argv=( testdata/osh-runtime/bin_true.sh )
100 ;;
101
102 abuild-print-help)
103 argv=( testdata/osh-runtime/abuild -h )
104 ;;
105
106 configure.cpython)
107 argv=( $PY27_DIR/configure )
108 working_dir=$files_out_dir
109 ;;
110
111 configure.util-linux)
112 # flag needed to avoid sqlite3 dep error message
113 argv=( $TAR_DIR/util-linux-2.40/configure --disable-liblastlog2 )
114 working_dir=$files_out_dir
115 ;;
116
117 configure.*)
118 argv=( ./configure )
119
120 local conf_dir
121 case $workload in
122 *.ocaml)
123 conf_dir='ocaml-4.06.0'
124 ;;
125 *.tcc)
126 conf_dir='tcc-0.9.26'
127 ;;
128 *.yash)
129 conf_dir='yash-2.46'
130 ;;
131 *)
132 die "Invalid workload $workload"
133 esac
134
135 # These are run in-tree?
136 working_dir=$TAR_DIR/$conf_dir
137 ;;
138
139 *)
140 die "Invalid workload $workload"
141 ;;
142 esac
143
144 local -a time_argv=(
145 time-tsv
146 --output "$raw_out_dir/times.tsv" --append
147 --rusage
148 --rusage-2
149 --field "$task_id"
150 --field "$host_name" --field "$sh_path"
151 --field "$workload"
152 -- "$sh_run_path" "${argv[@]}"
153 )
154
155 local stdout_file="$files_out_dir/STDOUT.txt"
156 local gc_stats_file="$raw_out_dir/gc-$task_id.txt"
157
158 # Maybe change dirs
159 if test -n "$working_dir"; then
160 pushd "$working_dir"
161 fi
162
163 if test -n "$save_new_files"; then
164 touch __TIMESTAMP
165 fi
166
167 # Run it, possibly with GC stats
168 case $sh_path in
169 *_bin/*/osh)
170 OILS_GC_STATS_FD=99 "${time_argv[@]}" > $stdout_file 99> $gc_stats_file
171 ;;
172 *)
173 "${time_argv[@]}" > $stdout_file
174 ;;
175 esac
176
177 if test -n "$save_new_files"; then
178 echo "COPYING to $files_out_dir"
179 find . -type f -newer __TIMESTAMP \
180 | xargs -I {} -- cp --verbose {} $files_out_dir
181 fi
182
183 # Restore dir
184 if test -n "$working_dir"; then
185 popd
186 fi
187
188 task_id=$((task_id + 1))
189 done
190}
191
192# Sorted by priority for test-oils.sh osh-runtime --num-shells 3
193
194readonly -a ALL_WORKLOADS=(
195 hello-world
196 bin-true
197
198 configure.cpython
199 configure.util-linux
200 configure.ocaml
201 configure.tcc
202 configure.yash
203
204 abuild-print-help
205)
206
207print-workloads() {
208 ### for help
209
210 for w in "${ALL_WORKLOADS[@]}"; do
211 echo " $w"
212 done
213}
214
215print-tasks() {
216 local host_name=$1
217 shift 1
218 local -a osh_native=( "$@" )
219
220 if test -n "${QUICKLY:-}"; then
221 workloads=(
222 hello-world
223 bin-true
224 #configure.util-linux
225 #abuild-print-help
226 )
227 else
228 workloads=( "${ALL_WORKLOADS[@]}" )
229 fi
230
231 for sh_path in bash dash "${osh_native[@]}"; do
232 for workload in "${workloads[@]}"; do
233 tsv-row $host_name $sh_path $workload
234 done
235 done
236}
237
238print-tasks-xshar() {
239 local host_name=$1
240 local osh_native=$2
241
242 local num_iters=${3:-1}
243 local num_shells=${4:-1}
244 local num_workloads=${5:-1}
245
246 for i in $(seq $num_iters); do
247
248 local s=0
249 for sh_path in $osh_native bash dash; do
250
251 local w=0
252 for workload in "${ALL_WORKLOADS[@]}"; do
253 tsv-row $host_name $sh_path $workload
254
255 w=$(( w + 1 )) # cut off at specified workloads
256 if test $w -eq $num_workloads; then
257 break
258 fi
259 done
260
261 s=$(( s + 1 )) # cut off as specified shells
262 if test $s -eq $num_shells; then
263 break
264 fi
265
266 done
267 done
268}
269
270test-print-tasks-xshar() {
271 print-tasks-xshar $(hostname) osh 1 1 1
272 echo
273 print-tasks-xshar $(hostname) osh 1 2 1
274 echo
275 print-tasks-xshar $(hostname) osh 1 2 2
276 echo
277 print-tasks-xshar $(hostname) osh 1 2 3
278 echo
279}
280
281run-tasks-wrapper() {
282 ### reads tasks from stdin
283
284 local host_name=$1 # 'no-host' or 'lenny'
285 local raw_out_dir=$2
286
287 mkdir -v -p $raw_out_dir
288
289 local tsv_out="$raw_out_dir/times.tsv"
290
291 # Write header of the TSV file that is appended to.
292 time-tsv -o $tsv_out --print-header \
293 --rusage \
294 --rusage-2 \
295 --field task_id \
296 --field host_name --field sh_path \
297 --field workload
298
299 # reads tasks from stdin
300 # run-tasks outputs 3 things: raw times.tsv, per-task STDOUT and files, and
301 # per-task GC stats
302 run-tasks $raw_out_dir
303
304 # Turn individual files into a TSV, adding host
305 benchmarks/gc_stats_to_tsv.py $raw_out_dir/gc-*.txt \
306 | tsv-add-const-column host_name "$host_name" \
307 > $raw_out_dir/gc_stats.tsv
308
309 cp -v _tmp/provenance.tsv $raw_out_dir
310}
311
312measure() {
313 ### For release and CI
314 local host_name=$1 # 'no-host' or 'lenny'
315 local raw_out_dir=$2 # _tmp/osh-runtime/$X or ../../benchmark-data/osh-runtime/$X
316 shift 2
317 local -a osh_native=( "$@" ) # $OSH_CPP_NINJA or $OSH_CPP_TWO, etc...
318
319 print-tasks "$host_name" "${osh_native[@]}" \
320 | run-tasks-wrapper "$host_name" "$raw_out_dir"
321}
322
323stage1() {
324 local base_dir=${1:-$BASE_DIR} # _tmp/osh-runtime or ../benchmark-data/osh-runtime
325 local single_machine=${2:-}
326
327 local out_dir=$BASE_DIR/stage1 # _tmp/osh-runtime
328 mkdir -p $out_dir
329
330 local -a raw_times=()
331 local -a raw_gc_stats=()
332 local -a raw_provenance=()
333
334 if test -n "$single_machine"; then
335 # find dir in _tmp/osh-runtime
336 local -a a=( $base_dir/raw.$single_machine.* )
337
338 raw_times+=( ${a[-1]}/times.tsv )
339 raw_gc_stats+=( ${a[-1]}/gc_stats.tsv )
340 raw_provenance+=( ${a[-1]}/provenance.tsv )
341
342 else
343 # find last dirs in ../benchmark-data/osh-runtime
344 # Globs are in lexicographical order, which works for our dates.
345 local -a a=( $base_dir/raw.$MACHINE1.* )
346 local -a b=( $base_dir/raw.$MACHINE2.* )
347
348 raw_times+=( ${a[-1]}/times.tsv ${b[-1]}/times.tsv )
349 raw_gc_stats+=( ${a[-1]}/gc_stats.tsv ${b[-1]}/gc_stats.tsv )
350 raw_provenance+=( ${a[-1]}/provenance.tsv ${b[-1]}/provenance.tsv )
351 fi
352
353 tsv-concat "${raw_times[@]}" > $out_dir/times.tsv
354
355 tsv-concat "${raw_gc_stats[@]}" > $out_dir/gc_stats.tsv
356
357 tsv-concat "${raw_provenance[@]}" > $out_dir/provenance.tsv
358}
359
360print-report() {
361 local in_dir=$1
362
363 benchmark-html-head 'OSH Runtime Performance'
364
365 cat <<EOF
366 <body class="width60">
367 <p id="home-link">
368 <a href="/">oils.pub</a>
369 </p>
370EOF
371
372 cmark <<'EOF'
373## OSH Runtime Performance
374
375Source code: [benchmarks/osh-runtime.sh](https://github.com/oils-for-unix/oils/tree/master/benchmarks/osh-runtime.sh)
376
377- [Elapsed Time](#elapsed-time)
378- [Minor Page Faults](#page-faults)
379- [Memory Usage](#memory-usage)
380- [GC Stats](#gc-stats)
381- [rusage Details](#rusage-details)
382- [More Details](#more-details)
383- [Shell and Host](#shell-and-host)
384
385[Raw files](-wwz-index)
386
387<a name="elapsed-time" />
388
389### Elapsed Time by Shell (milliseconds)
390
391Some benchmarks call many external tools, while some exercise the shell
392interpreter itself.
393EOF
394 tsv2html $in_dir/elapsed.tsv
395
396 cmark <<EOF
397<a name="page-faults" />
398
399### Minor Page Faults
400EOF
401
402 tsv2html $in_dir/page_faults.tsv
403
404 cmark <<EOF
405<a name="memory-usage" />
406
407### Memory Usage (Max Resident Set Size in MB)
408
409Memory usage is measured in MB (powers of 10), not MiB (powers of 2).
410EOF
411 tsv2html $in_dir/max_rss.tsv
412
413 cmark <<EOF
414<a name="gc-stats" />
415
416### GC Stats
417EOF
418 tsv2html $in_dir/gc_stats.tsv
419
420 cmark <<EOF
421<a name="rusage-details" />
422
423### rusage Details
424EOF
425 tsv2html $in_dir/details.tsv
426
427 cmark <<EOF
428<a name="more-details" />
429
430### More Details
431EOF
432 tsv2html $in_dir/details_io.tsv
433
434 cmark <<'EOF'
435<a name="shell-and-host" />
436
437### Shell and Host
438EOF
439 tsv2html $in_dir/shells.tsv
440 tsv2html $in_dir/hosts.tsv
441
442 cmark <<'EOF'
443
444 </body>
445</html>
446EOF
447}
448
449test-oils-run() {
450 local osh=$1
451 local job_id=$2
452 local host_name=$3
453
454 # flags passed by caller
455 local num_iters=${4:-1}
456 local num_shells=${5:-1}
457 local num_workloads=${6:-1}
458
459 local time_py=${XSHAR_DIR:-$REPO_ROOT}/benchmarks/time_.py
460 $time_py --tsv --rusage -- \
461 $osh -c 'echo "smoke test: hi from benchmarks/osh-runtime.sh"'
462
463 # Fresh build
464 rm -r -f -v $BASE_DIR _tmp/{shell,host}-id
465
466 # Write _tmp/provenance.* and _tmp/{host,shell}-id
467 shell-provenance-2 \
468 $host_name $job_id _tmp \
469 bash dash $osh
470
471 # e.g. 2024-05-01__10-11-12.ci-vm-name
472 local raw_out_dir="$BASE_DIR/raw"
473 mkdir -p $raw_out_dir
474
475 # Similar to 'measure', for soil-run and release
476 print-tasks-xshar $host_name $osh \
477 $num_iters $num_shells $num_workloads \
478 | tee $BASE_DIR/tasks.txt
479
480 run-tasks-wrapper $host_name $raw_out_dir < $BASE_DIR/tasks.txt
481 echo
482
483 # Note: 'stage1' in soil-run is a trivial concatenation, so we can create input for
484 # benchmarks/report.R. We don't need that here
485}
486
487soil-run() {
488 ### Run it on just this machine, and make a report
489 # DEPS: soil/cpp-tarball.sh benchmark-build
490
491 rm -r -f $BASE_DIR
492 mkdir -p $BASE_DIR
493
494 # TODO: This testdata should be baked into Docker image, or mounted
495 download
496 extract
497
498 # The three things built
499 local -a osh_native=(
500 $OSH_CPP_SOIL
501 $OSH_SOUFFLE_CPP_SOIL
502 $OSH_STATIC_SOIL
503 )
504
505 local single_machine='no-host'
506
507 local job_id
508 job_id=$(print-job-id)
509
510 # Write _tmp/provenance.* and _tmp/{host,shell}-id
511 shell-provenance-2 \
512 $single_machine $job_id _tmp \
513 bash dash "${osh_native[@]}"
514
515 local host_job_id="$single_machine.$job_id"
516 local raw_out_dir="$BASE_DIR/raw.$host_job_id"
517 mkdir -p $raw_out_dir $BASE_DIR/stage1
518
519 measure $single_machine $raw_out_dir "${osh_native[@]}"
520
521 # Trivial concatenation for 1 machine
522 stage1 '' $single_machine
523
524 benchmarks/report.sh stage2 $BASE_DIR
525
526 benchmarks/report.sh stage3 $BASE_DIR
527}
528
529#
530# Debugging
531#
532
533compare-cpython() {
534 #local -a a=( ../benchmark-data/osh-runtime/*.lenny.2024* )
535 local -a a=( ../benchmark-data/osh-runtime/*.hoover.2024* )
536
537 # More of a diff here?
538 #local -a a=( ../benchmark-data/osh-runtime/*.broome.2023* )
539 # less diff here
540 #local -a a=( ../benchmark-data/osh-runtime/*.lenny.2023* )
541
542 local dir=${a[-1]}
543
544 echo $dir
545
546 head -n 1 $dir/times.tsv
547 fgrep 'configure.cpython' $dir/times.tsv
548
549 local bash_id=2
550 local dash_id=8
551 local osh_py_id=14
552 local osh_cpp_id=20
553
554 set +o errexit
555
556 local out_dir=_tmp/cpython-configure
557 mkdir -p $out_dir
558
559 echo 'bash vs. dash'
560 diff -u --recursive $dir/{files-2,files-8} > $out_dir/bash-vs-dash.txt
561 diffstat $out_dir/bash-vs-dash.txt
562 echo
563
564 echo 'bash vs. osh-py'
565 diff -u --recursive $dir/{files-2,files-14} > $out_dir/bash-vs-osh-py.txt
566 diffstat $out_dir/bash-vs-osh-py.txt
567 echo
568
569 echo 'bash vs. osh-cpp'
570 diff -u --recursive $dir/{files-2,files-20} > $out_dir/bash-vs-osh-cpp.txt
571 diffstat $out_dir/bash-vs-osh-cpp.txt
572 echo
573
574 return
575
576 diff -u $dir/{files-2,files-20}/STDOUT.txt
577 echo
578
579 diff -u $dir/{files-2,files-20}/pyconfig.h
580 echo
581
582 cdiff -u $dir/{files-2,files-20}/config.log
583 echo
584}
585
586"$@"