1 | #!/usr/bin/env bash
|
2 | #
|
3 | # Test scripts found in the wild for both correctness and performance.
|
4 | #
|
5 | # Usage:
|
6 | # benchmarks/osh-runtime.sh <function name>
|
7 |
|
8 | set -o nounset
|
9 | set -o pipefail
|
10 | set -o errexit
|
11 |
|
12 | REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
|
13 |
|
14 | source benchmarks/common.sh # tsv-concat
|
15 | source benchmarks/id.sh # print-job-id
|
16 | source build/dev-shell.sh # python2
|
17 | source test/common.sh
|
18 | source test/tsv-lib.sh # tsv-row
|
19 |
|
20 | readonly BASE_DIR=_tmp/osh-runtime
|
21 |
|
22 | # TODO: Move to ../oil_DEPS
|
23 | readonly TAR_DIR=$PWD/_deps/osh-runtime # Make it absolute
|
24 |
|
25 | #
|
26 | # Dependencies
|
27 | #
|
28 |
|
29 | readonly PY27_DIR=$PWD/Python-2.7.13
|
30 |
|
31 | # NOTE: Same list in oilshell.org/blob/run.sh.
|
32 | tarballs() {
|
33 | cat <<EOF
|
34 | tcc-0.9.26.tar.bz2
|
35 | yash-2.46.tar.xz
|
36 | ocaml-4.06.0.tar.xz
|
37 | util-linux-2.40.tar.xz
|
38 | EOF
|
39 | }
|
40 |
|
41 | download() {
|
42 | mkdir -p $TAR_DIR
|
43 | tarballs | xargs -n 1 -I {} --verbose -- \
|
44 | wget --no-clobber --directory $TAR_DIR 'https://www.oilshell.org/blob/testdata/{}'
|
45 | }
|
46 |
|
47 | extract() {
|
48 | set -x
|
49 | time for f in $TAR_DIR/*.{bz2,xz}; do
|
50 | tar -x --directory $TAR_DIR --file $f
|
51 | done
|
52 | set +x
|
53 |
|
54 | ls -l $TAR_DIR
|
55 | }
|
56 |
|
57 | #
|
58 | # Computation
|
59 | #
|
60 |
|
61 | run-tasks() {
|
62 | local raw_out_dir=$1
|
63 | raw_out_dir="$PWD/$raw_out_dir" # because we change dirs
|
64 |
|
65 | # Bug fix for dynamic scoping!
|
66 | local host_name sh_path workload
|
67 |
|
68 | local task_id=0
|
69 | while read -r host_name sh_path workload; do
|
70 |
|
71 | log "*** $host_name $sh_path $workload $task_id"
|
72 |
|
73 | local sh_run_path
|
74 | case $sh_path in
|
75 | /*) # Already absolute
|
76 | sh_run_path=$sh_path
|
77 | ;;
|
78 | */*) # It's relative, so make it absolute
|
79 | sh_run_path=$PWD/$sh_path
|
80 | ;;
|
81 | *) # 'dash' should remain 'dash'
|
82 | sh_run_path=$sh_path
|
83 | ;;
|
84 | esac
|
85 |
|
86 | local working_dir=''
|
87 | local files_out_dir="$raw_out_dir/files-$task_id"
|
88 | mkdir -v -p $files_out_dir
|
89 |
|
90 | local save_new_files=''
|
91 |
|
92 | local -a argv
|
93 | case $workload in
|
94 | hello-world)
|
95 | argv=( testdata/osh-runtime/hello_world.sh )
|
96 | ;;
|
97 |
|
98 | bin-true)
|
99 | argv=( testdata/osh-runtime/bin_true.sh )
|
100 | ;;
|
101 |
|
102 | abuild-print-help)
|
103 | argv=( testdata/osh-runtime/abuild -h )
|
104 | ;;
|
105 |
|
106 | configure.cpython)
|
107 | argv=( $PY27_DIR/configure )
|
108 | working_dir=$files_out_dir
|
109 | ;;
|
110 |
|
111 | configure.util-linux)
|
112 | # flag needed to avoid sqlite3 dep error message
|
113 | argv=( $TAR_DIR/util-linux-2.40/configure --disable-liblastlog2 )
|
114 | working_dir=$files_out_dir
|
115 | ;;
|
116 |
|
117 | configure.*)
|
118 | argv=( ./configure )
|
119 |
|
120 | local conf_dir
|
121 | case $workload in
|
122 | *.ocaml)
|
123 | conf_dir='ocaml-4.06.0'
|
124 | ;;
|
125 | *.tcc)
|
126 | conf_dir='tcc-0.9.26'
|
127 | ;;
|
128 | *.yash)
|
129 | conf_dir='yash-2.46'
|
130 | ;;
|
131 | *)
|
132 | die "Invalid workload $workload"
|
133 | esac
|
134 |
|
135 | # These are run in-tree?
|
136 | working_dir=$TAR_DIR/$conf_dir
|
137 | ;;
|
138 |
|
139 | *)
|
140 | die "Invalid workload $workload"
|
141 | ;;
|
142 | esac
|
143 |
|
144 | local -a time_argv=(
|
145 | time-tsv
|
146 | --output "$raw_out_dir/times.tsv" --append
|
147 | --rusage
|
148 | --rusage-2
|
149 | --field "$task_id"
|
150 | --field "$host_name" --field "$sh_path"
|
151 | --field "$workload"
|
152 | -- "$sh_run_path" "${argv[@]}"
|
153 | )
|
154 |
|
155 | local stdout_file="$files_out_dir/STDOUT.txt"
|
156 | local gc_stats_file="$raw_out_dir/gc-$task_id.txt"
|
157 |
|
158 | # Maybe change dirs
|
159 | if test -n "$working_dir"; then
|
160 | pushd "$working_dir"
|
161 | fi
|
162 |
|
163 | if test -n "$save_new_files"; then
|
164 | touch __TIMESTAMP
|
165 | fi
|
166 |
|
167 | # Run it, possibly with GC stats
|
168 | case $sh_path in
|
169 | *_bin/*/osh)
|
170 | OILS_GC_STATS_FD=99 "${time_argv[@]}" > $stdout_file 99> $gc_stats_file
|
171 | ;;
|
172 | *)
|
173 | "${time_argv[@]}" > $stdout_file
|
174 | ;;
|
175 | esac
|
176 |
|
177 | if test -n "$save_new_files"; then
|
178 | echo "COPYING to $files_out_dir"
|
179 | find . -type f -newer __TIMESTAMP \
|
180 | | xargs -I {} -- cp --verbose {} $files_out_dir
|
181 | fi
|
182 |
|
183 | # Restore dir
|
184 | if test -n "$working_dir"; then
|
185 | popd
|
186 | fi
|
187 |
|
188 | task_id=$((task_id + 1))
|
189 | done
|
190 | }
|
191 |
|
192 | # Sorted by priority for test-oils.sh osh-runtime --num-shells 3
|
193 |
|
194 | readonly -a ALL_WORKLOADS=(
|
195 | hello-world
|
196 | bin-true
|
197 |
|
198 | configure.cpython
|
199 | configure.util-linux
|
200 | configure.ocaml
|
201 | configure.tcc
|
202 | configure.yash
|
203 |
|
204 | abuild-print-help
|
205 | )
|
206 |
|
207 | print-workloads() {
|
208 | ### for help
|
209 |
|
210 | for w in "${ALL_WORKLOADS[@]}"; do
|
211 | echo " $w"
|
212 | done
|
213 | }
|
214 |
|
215 | print-tasks() {
|
216 | local host_name=$1
|
217 | shift 1
|
218 | local -a osh_native=( "$@" )
|
219 |
|
220 | if test -n "${QUICKLY:-}"; then
|
221 | workloads=(
|
222 | hello-world
|
223 | bin-true
|
224 | #configure.util-linux
|
225 | #abuild-print-help
|
226 | )
|
227 | else
|
228 | workloads=( "${ALL_WORKLOADS[@]}" )
|
229 | fi
|
230 |
|
231 | for sh_path in bash dash "${osh_native[@]}"; do
|
232 | for workload in "${workloads[@]}"; do
|
233 | tsv-row $host_name $sh_path $workload
|
234 | done
|
235 | done
|
236 | }
|
237 |
|
238 | print-tasks-xshar() {
|
239 | local host_name=$1
|
240 | local osh_native=$2
|
241 |
|
242 | local num_iters=${3:-1}
|
243 | local num_shells=${4:-1}
|
244 | local num_workloads=${5:-1}
|
245 |
|
246 | for i in $(seq $num_iters); do
|
247 |
|
248 | local s=0
|
249 | for sh_path in $osh_native bash dash; do
|
250 |
|
251 | local w=0
|
252 | for workload in "${ALL_WORKLOADS[@]}"; do
|
253 | tsv-row $host_name $sh_path $workload
|
254 |
|
255 | w=$(( w + 1 )) # cut off at specified workloads
|
256 | if test $w -eq $num_workloads; then
|
257 | break
|
258 | fi
|
259 | done
|
260 |
|
261 | s=$(( s + 1 )) # cut off as specified shells
|
262 | if test $s -eq $num_shells; then
|
263 | break
|
264 | fi
|
265 |
|
266 | done
|
267 | done
|
268 | }
|
269 |
|
270 | test-print-tasks-xshar() {
|
271 | print-tasks-xshar $(hostname) osh 1 1 1
|
272 | echo
|
273 | print-tasks-xshar $(hostname) osh 1 2 1
|
274 | echo
|
275 | print-tasks-xshar $(hostname) osh 1 2 2
|
276 | echo
|
277 | print-tasks-xshar $(hostname) osh 1 2 3
|
278 | echo
|
279 | }
|
280 |
|
281 | run-tasks-wrapper() {
|
282 | ### reads tasks from stdin
|
283 |
|
284 | local host_name=$1 # 'no-host' or 'lenny'
|
285 | local raw_out_dir=$2
|
286 |
|
287 | mkdir -v -p $raw_out_dir
|
288 |
|
289 | local tsv_out="$raw_out_dir/times.tsv"
|
290 |
|
291 | # Write header of the TSV file that is appended to.
|
292 | time-tsv -o $tsv_out --print-header \
|
293 | --rusage \
|
294 | --rusage-2 \
|
295 | --field task_id \
|
296 | --field host_name --field sh_path \
|
297 | --field workload
|
298 |
|
299 | # reads tasks from stdin
|
300 | # run-tasks outputs 3 things: raw times.tsv, per-task STDOUT and files, and
|
301 | # per-task GC stats
|
302 | run-tasks $raw_out_dir
|
303 |
|
304 | # Turn individual files into a TSV, adding host
|
305 | benchmarks/gc_stats_to_tsv.py $raw_out_dir/gc-*.txt \
|
306 | | tsv-add-const-column host_name "$host_name" \
|
307 | > $raw_out_dir/gc_stats.tsv
|
308 |
|
309 | cp -v _tmp/provenance.tsv $raw_out_dir
|
310 | }
|
311 |
|
312 | measure() {
|
313 | ### For release and CI
|
314 | local host_name=$1 # 'no-host' or 'lenny'
|
315 | local raw_out_dir=$2 # _tmp/osh-runtime/$X or ../../benchmark-data/osh-runtime/$X
|
316 | shift 2
|
317 | local -a osh_native=( "$@" ) # $OSH_CPP_NINJA or $OSH_CPP_TWO, etc...
|
318 |
|
319 | print-tasks "$host_name" "${osh_native[@]}" \
|
320 | | run-tasks-wrapper "$host_name" "$raw_out_dir"
|
321 | }
|
322 |
|
323 | stage1() {
|
324 | local base_dir=${1:-$BASE_DIR} # _tmp/osh-runtime or ../benchmark-data/osh-runtime
|
325 | local single_machine=${2:-}
|
326 |
|
327 | local out_dir=$BASE_DIR/stage1 # _tmp/osh-runtime
|
328 | mkdir -p $out_dir
|
329 |
|
330 | local -a raw_times=()
|
331 | local -a raw_gc_stats=()
|
332 | local -a raw_provenance=()
|
333 |
|
334 | if test -n "$single_machine"; then
|
335 | # find dir in _tmp/osh-runtime
|
336 | local -a a=( $base_dir/raw.$single_machine.* )
|
337 |
|
338 | raw_times+=( ${a[-1]}/times.tsv )
|
339 | raw_gc_stats+=( ${a[-1]}/gc_stats.tsv )
|
340 | raw_provenance+=( ${a[-1]}/provenance.tsv )
|
341 |
|
342 | else
|
343 | # find last dirs in ../benchmark-data/osh-runtime
|
344 | # Globs are in lexicographical order, which works for our dates.
|
345 | local -a a=( $base_dir/raw.$MACHINE1.* )
|
346 | local -a b=( $base_dir/raw.$MACHINE2.* )
|
347 |
|
348 | raw_times+=( ${a[-1]}/times.tsv ${b[-1]}/times.tsv )
|
349 | raw_gc_stats+=( ${a[-1]}/gc_stats.tsv ${b[-1]}/gc_stats.tsv )
|
350 | raw_provenance+=( ${a[-1]}/provenance.tsv ${b[-1]}/provenance.tsv )
|
351 | fi
|
352 |
|
353 | tsv-concat "${raw_times[@]}" > $out_dir/times.tsv
|
354 |
|
355 | tsv-concat "${raw_gc_stats[@]}" > $out_dir/gc_stats.tsv
|
356 |
|
357 | tsv-concat "${raw_provenance[@]}" > $out_dir/provenance.tsv
|
358 | }
|
359 |
|
360 | print-report() {
|
361 | local in_dir=$1
|
362 |
|
363 | benchmark-html-head 'OSH Runtime Performance'
|
364 |
|
365 | cat <<EOF
|
366 | <body class="width60">
|
367 | <p id="home-link">
|
368 | <a href="/">oils.pub</a>
|
369 | </p>
|
370 | EOF
|
371 |
|
372 | cmark <<'EOF'
|
373 | ## OSH Runtime Performance
|
374 |
|
375 | Source code: [benchmarks/osh-runtime.sh](https://github.com/oils-for-unix/oils/tree/master/benchmarks/osh-runtime.sh)
|
376 |
|
377 | - [Elapsed Time](#elapsed-time)
|
378 | - [Minor Page Faults](#page-faults)
|
379 | - [Memory Usage](#memory-usage)
|
380 | - [GC Stats](#gc-stats)
|
381 | - [rusage Details](#rusage-details)
|
382 | - [More Details](#more-details)
|
383 | - [Shell and Host](#shell-and-host)
|
384 |
|
385 | [Raw files](-wwz-index)
|
386 |
|
387 | <a name="elapsed-time" />
|
388 |
|
389 | ### Elapsed Time by Shell (milliseconds)
|
390 |
|
391 | Some benchmarks call many external tools, while some exercise the shell
|
392 | interpreter itself.
|
393 | EOF
|
394 | tsv2html $in_dir/elapsed.tsv
|
395 |
|
396 | cmark <<EOF
|
397 | <a name="page-faults" />
|
398 |
|
399 | ### Minor Page Faults
|
400 | EOF
|
401 |
|
402 | tsv2html $in_dir/page_faults.tsv
|
403 |
|
404 | cmark <<EOF
|
405 | <a name="memory-usage" />
|
406 |
|
407 | ### Memory Usage (Max Resident Set Size in MB)
|
408 |
|
409 | Memory usage is measured in MB (powers of 10), not MiB (powers of 2).
|
410 | EOF
|
411 | tsv2html $in_dir/max_rss.tsv
|
412 |
|
413 | cmark <<EOF
|
414 | <a name="gc-stats" />
|
415 |
|
416 | ### GC Stats
|
417 | EOF
|
418 | tsv2html $in_dir/gc_stats.tsv
|
419 |
|
420 | cmark <<EOF
|
421 | <a name="rusage-details" />
|
422 |
|
423 | ### rusage Details
|
424 | EOF
|
425 | tsv2html $in_dir/details.tsv
|
426 |
|
427 | cmark <<EOF
|
428 | <a name="more-details" />
|
429 |
|
430 | ### More Details
|
431 | EOF
|
432 | tsv2html $in_dir/details_io.tsv
|
433 |
|
434 | cmark <<'EOF'
|
435 | <a name="shell-and-host" />
|
436 |
|
437 | ### Shell and Host
|
438 | EOF
|
439 | tsv2html $in_dir/shells.tsv
|
440 | tsv2html $in_dir/hosts.tsv
|
441 |
|
442 | cmark <<'EOF'
|
443 |
|
444 | </body>
|
445 | </html>
|
446 | EOF
|
447 | }
|
448 |
|
449 | test-oils-run() {
|
450 | local osh=$1
|
451 | local job_id=$2
|
452 | local host_name=$3
|
453 |
|
454 | # flags passed by caller
|
455 | local num_iters=${4:-1}
|
456 | local num_shells=${5:-1}
|
457 | local num_workloads=${6:-1}
|
458 |
|
459 | local time_py=${XSHAR_DIR:-$REPO_ROOT}/benchmarks/time_.py
|
460 | $time_py --tsv --rusage -- \
|
461 | $osh -c 'echo "smoke test: hi from benchmarks/osh-runtime.sh"'
|
462 |
|
463 | # Fresh build
|
464 | rm -r -f -v $BASE_DIR _tmp/{shell,host}-id
|
465 |
|
466 | # Write _tmp/provenance.* and _tmp/{host,shell}-id
|
467 | shell-provenance-2 \
|
468 | $host_name $job_id _tmp \
|
469 | bash dash $osh
|
470 |
|
471 | # e.g. 2024-05-01__10-11-12.ci-vm-name
|
472 | local raw_out_dir="$BASE_DIR/raw"
|
473 | mkdir -p $raw_out_dir
|
474 |
|
475 | # Similar to 'measure', for soil-run and release
|
476 | print-tasks-xshar $host_name $osh \
|
477 | $num_iters $num_shells $num_workloads \
|
478 | | tee $BASE_DIR/tasks.txt
|
479 |
|
480 | run-tasks-wrapper $host_name $raw_out_dir < $BASE_DIR/tasks.txt
|
481 | echo
|
482 |
|
483 | # Note: 'stage1' in soil-run is a trivial concatenation, so we can create input for
|
484 | # benchmarks/report.R. We don't need that here
|
485 | }
|
486 |
|
487 | soil-run() {
|
488 | ### Run it on just this machine, and make a report
|
489 | # DEPS: soil/cpp-tarball.sh benchmark-build
|
490 |
|
491 | rm -r -f $BASE_DIR
|
492 | mkdir -p $BASE_DIR
|
493 |
|
494 | # TODO: This testdata should be baked into Docker image, or mounted
|
495 | download
|
496 | extract
|
497 |
|
498 | # The three things built
|
499 | local -a osh_native=(
|
500 | $OSH_CPP_SOIL
|
501 | $OSH_SOUFFLE_CPP_SOIL
|
502 | $OSH_STATIC_SOIL
|
503 | )
|
504 |
|
505 | local single_machine='no-host'
|
506 |
|
507 | local job_id
|
508 | job_id=$(print-job-id)
|
509 |
|
510 | # Write _tmp/provenance.* and _tmp/{host,shell}-id
|
511 | shell-provenance-2 \
|
512 | $single_machine $job_id _tmp \
|
513 | bash dash "${osh_native[@]}"
|
514 |
|
515 | local host_job_id="$single_machine.$job_id"
|
516 | local raw_out_dir="$BASE_DIR/raw.$host_job_id"
|
517 | mkdir -p $raw_out_dir $BASE_DIR/stage1
|
518 |
|
519 | measure $single_machine $raw_out_dir "${osh_native[@]}"
|
520 |
|
521 | # Trivial concatenation for 1 machine
|
522 | stage1 '' $single_machine
|
523 |
|
524 | benchmarks/report.sh stage2 $BASE_DIR
|
525 |
|
526 | benchmarks/report.sh stage3 $BASE_DIR
|
527 | }
|
528 |
|
529 | #
|
530 | # Debugging
|
531 | #
|
532 |
|
533 | compare-cpython() {
|
534 | #local -a a=( ../benchmark-data/osh-runtime/*.lenny.2024* )
|
535 | local -a a=( ../benchmark-data/osh-runtime/*.hoover.2024* )
|
536 |
|
537 | # More of a diff here?
|
538 | #local -a a=( ../benchmark-data/osh-runtime/*.broome.2023* )
|
539 | # less diff here
|
540 | #local -a a=( ../benchmark-data/osh-runtime/*.lenny.2023* )
|
541 |
|
542 | local dir=${a[-1]}
|
543 |
|
544 | echo $dir
|
545 |
|
546 | head -n 1 $dir/times.tsv
|
547 | fgrep 'configure.cpython' $dir/times.tsv
|
548 |
|
549 | local bash_id=2
|
550 | local dash_id=8
|
551 | local osh_py_id=14
|
552 | local osh_cpp_id=20
|
553 |
|
554 | set +o errexit
|
555 |
|
556 | local out_dir=_tmp/cpython-configure
|
557 | mkdir -p $out_dir
|
558 |
|
559 | echo 'bash vs. dash'
|
560 | diff -u --recursive $dir/{files-2,files-8} > $out_dir/bash-vs-dash.txt
|
561 | diffstat $out_dir/bash-vs-dash.txt
|
562 | echo
|
563 |
|
564 | echo 'bash vs. osh-py'
|
565 | diff -u --recursive $dir/{files-2,files-14} > $out_dir/bash-vs-osh-py.txt
|
566 | diffstat $out_dir/bash-vs-osh-py.txt
|
567 | echo
|
568 |
|
569 | echo 'bash vs. osh-cpp'
|
570 | diff -u --recursive $dir/{files-2,files-20} > $out_dir/bash-vs-osh-cpp.txt
|
571 | diffstat $out_dir/bash-vs-osh-cpp.txt
|
572 | echo
|
573 |
|
574 | return
|
575 |
|
576 | diff -u $dir/{files-2,files-20}/STDOUT.txt
|
577 | echo
|
578 |
|
579 | diff -u $dir/{files-2,files-20}/pyconfig.h
|
580 | echo
|
581 |
|
582 | cdiff -u $dir/{files-2,files-20}/config.log
|
583 | echo
|
584 | }
|
585 |
|
586 | "$@"
|