OILS / benchmarks / compute.sh View on Github | oils.pub

739 lines, 358 significant
1#!/usr/bin/env bash
2#
3# Compare operations on data structures, with little I/O: strings, array,
4# associative arrays, integers.
5#
6# Usage:
7# benchmarks/compute.sh <function name>
8#
9# List of benchmarks:
10#
11# - fib: integer, loop, assignment (shells don't have real integers
12# - for_loop: 2025 update, taken from benchmarks/ysh-for.sh
13# - word_freq: hash table / assoc array (OSH uses a vector<pair<>> now!)
14# also integer counter
15# - bubble_sort: indexed array (bash uses a linked list?)
16# - palindrome: string, slicing, unicode
17# - parse_help: realistic shell-only string processing, which I didn't write.
18#
19# TODO:
20# - Fix the BUGS
21# - palindrome doesn't work? Unicode? Does UTF-8 decode
22# - bubble sort depend on locale too - there is an LC_ALL here
23#
24# - This file is annoying to TEST
25# - to add to it, you also have to change benchmarks/report.R
26# - and there is a loop in 'stage1' as well
27# - why can't it behave like other benchmarks?
28# - they are using this "big table" pattern
29
30# - vary problem size, which is different than iters
31# - bubble sort: array length, to test complexity of array indexing
32# - palindrome: longer lines, to test complexity of unicode/byte slicing
33# - word_freq: more unique words, to test complexity of assoc array
34# - for_loop and fib are kinda similar
35# - maybe for_loop can introduce some conditionals
36#
37# - other languages
38# - awk, mawk, etc.
39# - we are going for Awk speed!
40#
41# Questions to answer
42# - Can fast bytecode runtime be as fast as Python?
43# - measurement issue: Python kills at fib/for_loop - it's mostly process
44# startup time
45# - bubble_sort and word_freq are a bit more work
46# - can add YSH versions of those
47# - I wonder if they are testing data structures or the actual interpreter
48# loop though
49# - it's possible that speeding up the interpreter loop doesn't help much
50# - the real motivations behind bytecode:
51# - to fix 'break continue'
52# - add coroutine support too - that suspends and resumes a frame, which we
53# can't do
54
55set -o nounset
56set -o pipefail
57set -o errexit
58
59REPO_ROOT=$(cd $(dirname $0)/.. && pwd)
60readonly REPO_ROOT
61
62source benchmarks/common.sh # filter-provenance
63source build/dev-shell.sh # python2
64source test/tsv-lib.sh # tsv2html
65
66readonly BASE_DIR=_tmp/compute
67
68# Stabilize 'sort' output across machines (ugh locales!)
69export LC_ALL=C
70
71TIMEFORMAT='%U'
72
73readonly OSH_YSH_OPT_REGEX='_bin/cxx-opt(-sh)?/(mycpp-souffle/)?(osh|ysh)'
74
75# task_name,iter,args
76hello-tasks() {
77 local provenance=$1
78
79 # Add 1 field for each of 5 fields.
80 cat $provenance | filter-provenance python2 awk bash dash "$OSH_CPP_REGEX" |
81 while read fields; do
82 echo 'hello _ _' | xargs -n 3 -- echo "$fields"
83 done
84}
85
86# task_name,iter,args
87fib-tasks() {
88 local provenance=$1
89
90 # Add 1 field for each of 5 fields.
91 cat $provenance | filter-provenance python2 awk bash dash "$OSH_YSH_OPT_REGEX" |
92 while read fields; do
93 echo 'fib 200 44' | xargs -n 3 -- echo "$fields"
94 done
95}
96
97# task_name,iter,args
98for_loop-tasks() {
99 local provenance=$1
100
101 # bumpleak segfaults on for_loop! Probably because it runs out of memory
102 cat $provenance | filter-provenance python2 awk bash dash "$OSH_YSH_OPT_REGEX" |
103 while read fields; do
104 echo 'for_loop 50000 _' | xargs -n 3 -- echo "$fields"
105 done
106}
107
108# task_name,iter,args
109control_flow-tasks() {
110 local provenance=$1
111
112 cat $provenance | filter-provenance bash dash "$OSH_CPP_REGEX" |
113 while read fields; do
114 echo 'control_flow do_return 200' | xargs -n 3 -- echo "$fields"
115 done
116}
117
118word_freq-tasks() {
119 local provenance=$1
120
121 cat $provenance | filter-provenance python2 bash "$OSH_CPP_REGEX" |
122 while read fields; do
123 # BUG: oils-for-unix differs on these two. Looks like it's related to
124 # backslashes!
125 #echo 'word_freq 10 benchmarks/testdata/abuild' | xargs -n 3 -- echo "$fields"
126 #echo 'word_freq 2 benchmarks/testdata/ltmain.sh' | xargs -n 3 -- echo "$fields"
127 echo 'word_freq 10 configure' | xargs -n 3 -- echo "$fields"
128 done
129}
130
131assoc_array-tasks() {
132 local provenance=$1
133
134 cat $provenance | filter-provenance python2 bash "$OSH_CPP_REGEX" |
135 while read fields; do
136 for n in 1000 2000 3000; do
137 echo "word_freq 10 $n" | xargs -n 3 -- echo "$fields"
138 done
139 done
140}
141
142bubble_sort-tasks() {
143 # Note: this is quadratic, but bubble sort itself is quadratic!
144 local provenance=$1
145
146 cat $provenance | filter-provenance python2 bash "$OSH_CPP_REGEX" |
147 while read fields; do
148 echo 'bubble_sort int 200' | xargs -n 3 -- echo "$fields"
149 echo 'bubble_sort bytes 200' | xargs -n 3 -- echo "$fields"
150 done
151}
152
153# Arrays are doubly linked lists in bash! With a LASTREF hack to avoid being
154# quadratic.
155#
156# See array_reference() in array.c in bash. It searches both back and
157# forward. Every cell has its index, a value, a forward pointer, and a back
158# pointer.
159#
160# You need pretty high N to see the quadratic behavior though!
161
162# NOTE: osh is also slower with linear access, but not superlinear!
163
164array_ref-tasks() {
165 local provenance=$1
166
167 cat $provenance | filter-provenance bash |
168 while read fields; do
169 for mode in seq random; do
170 for n in 10000 20000 30000 40000; do
171 echo "array_ref $mode $n" | xargs -n 3 -- echo "$fields"
172 done
173 done
174 done
175
176#array_ref $OSH_CC seq 5000
177#array_ref $OSH_CC seq 10000
178#array_ref $OSH_CC random 5000
179#array_ref $OSH_CC random 10000
180#EOF
181}
182
183palindrome-tasks() {
184 local provenance=$1
185
186 cat $provenance | filter-provenance python2 bash "$OSH_CPP_REGEX" |
187 while read fields; do
188 echo 'palindrome unicode _' | xargs -n 3 -- echo "$fields"
189 echo 'palindrome bytes _' | xargs -n 3 -- echo "$fields"
190 done
191}
192
193parse_help-tasks() {
194 local provenance=$1
195
196 cat $provenance | filter-provenance bash "$OSH_CPP_REGEX" |
197 while read fields; do
198 echo 'parse_help ls-short _' | xargs -n 3 -- echo "$fields"
199 echo 'parse_help ls _' | xargs -n 3 -- echo "$fields"
200 echo 'parse_help mypy _' | xargs -n 3 -- echo "$fields"
201 done
202}
203
204ext() {
205 local ext
206 case $runtime in
207 python2)
208 echo 'py'
209 ;;
210 awk)
211 echo 'awk'
212 ;;
213 *ysh*)
214 echo 'ysh'
215 ;;
216 *sh | *osh*)
217 echo 'sh'
218 ;;
219 esac
220}
221
222word_freq-one() {
223 ### Run one word_freq task (hash tables)
224
225 local name=${1:-word_freq}
226 local runtime=$2
227
228 local iters=${3:-10}
229 local in=${4:-configure} # input
230
231 $runtime benchmarks/compute/word_freq.$(ext $runtime) $iters < $in | sort -n
232}
233
234assoc_array-one() {
235 ### Run word_freq with seq
236
237 local name=${1:-word_freq}
238 local runtime=$2
239
240 local iters=${3:-10}
241 local n=${4:-10}
242
243 # shuf so we don't get the bash optimization
244 seq $n | shuf |
245 $runtime benchmarks/compute/word_freq.$(ext $runtime) $iters | sort -n
246}
247
248bubble_sort-one() {
249 ### Run one bubble_sort task (arrays)
250
251 local name=${1:-bubble_sort}
252 local runtime=$2
253 local mode=${3:-int}
254 local n=${4:-100}
255
256 $runtime benchmarks/compute/bubble_sort.$(ext $runtime) $mode \
257 < $BASE_DIR/tmp/$name/testdata-$n.txt
258}
259
260# OSH is like 10x faster here!
261array_ref-one() {
262 ### Run one array_ref task (arrays)
263
264 local name=${1:-bubble_sort}
265 local runtime=$2
266 local mode=${3:-seq}
267 local n=${4:-100}
268
269 seq $n | shuf | $runtime benchmarks/compute/array_ref.$(ext $runtime) $mode
270}
271
272palindrome-one() {
273 ### Run one palindrome task (strings)
274
275 local name=${1:-palindrome}
276 local runtime=$2
277 local mode=${3:-unicode}
278
279 $runtime benchmarks/compute/palindrome.$(ext $runtime) $mode \
280 < $BASE_DIR/tmp/$name/testdata.txt
281}
282
283parse_help-one() {
284 ### Run one palindrome task (strings, real code)
285
286 local name=${1:-parse_help}
287 local runtime=$2
288 local workload=${3:-}
289
290 $runtime benchmarks/parse-help/pure-excerpt.sh _parse_help - \
291 < benchmarks/parse-help/$workload.txt
292}
293
294#
295# Helpers
296#
297
298hello-all() { task-all hello "$@"; }
299fib-all() { task-all fib "$@"; }
300for_loop-all() { task-all for_loop "$@"; }
301control_flow-all() { task-all control_flow "$@"; }
302word_freq-all() { task-all word_freq "$@"; }
303assoc_array-all() { task-all assoc_array "$@"; }
304
305# TODO: Fix the OSH comparison operator! It gives the wrong answer and
306# completes quickly.
307bubble_sort-all() { task-all bubble_sort "$@"; }
308
309# Array that is not quadratic
310array_ref-all() { task-all array_ref "$@"; }
311
312# Hm osh is a little slower here
313palindrome-all() { task-all palindrome "$@"; }
314
315parse_help-all() { task-all parse_help "$@"; }
316
317task-all() {
318 local task_name=$1
319 local provenance=$2
320 local host_job_id=$3
321 local out_dir=$4 # put files to save in benchmarks-data repo here
322
323 local tmp_dir=$BASE_DIR/tmp/$task_name
324
325 local times_tsv=$out_dir/$task_name/$host_job_id.times.tsv
326 rm -f $times_tsv
327
328 mkdir -p $tmp_dir $out_dir/$task_name
329
330 banner "*** $task_name ***"
331
332 # header
333 tsv-row \
334 status elapsed_secs user_secs sys_secs max_rss_KiB \
335 stdout_md5sum \
336 host_name host_hash \
337 runtime_name runtime_hash \
338 task_name arg1 arg2 stdout_filename > $times_tsv
339
340 local task_id=0
341
342 ${task_name}-tasks $provenance > $tmp_dir/tasks.txt
343
344 cat $tmp_dir/tasks.txt |
345 while read _ host host_hash runtime runtime_hash _ arg1 arg2; do
346 local file
347 case $runtime in
348 python2)
349 file='py'
350 ;;
351 *sh | *osh*)
352 file=$(basename $runtime)
353 ;;
354 esac
355
356 local stdout_filename="stdout-$file-$arg1-$(basename $arg2).txt"
357
358 local -a cmd
359 case $task_name in
360 hello|fib|for_loop|control_flow)
361 # Run it DIRECTLY, do not run $0. Because we do NOT want to fork bash
362 # then dash, because bash uses more memory.
363 args=( benchmarks/compute/$task_name.$(ext $runtime) "$arg1" "$arg2" )
364
365 case $runtime in
366 # add -f flag
367 awk) cmd=($runtime -f "${args[@]}") ;;
368 *) cmd=($runtime "${args[@]}") ;;
369 esac
370 ;;
371 *)
372 cmd=($0 ${task_name}-one "$task_name" "$runtime" "$arg1" "$arg2")
373 ;;
374 esac
375
376 # join args into a single field
377 time-tsv -o $times_tsv --append \
378 --stdout $tmp_dir/$stdout_filename \
379 --rusage \
380 --field "$host" --field "$host_hash" \
381 --field $runtime --field $runtime_hash \
382 --field "$task_name" --field "$arg1" --field "$arg2" \
383 --field "$stdout_filename" -- \
384 "${cmd[@]}"
385
386 task_id=$((task_id + 1))
387 done
388
389 #wc -l _tmp/compute/word_freq/*
390 maybe-tree $tmp_dir
391 cat $times_tsv
392}
393
394#
395# Testdata
396#
397
398bubble_sort-testdata() {
399 local out=$BASE_DIR/tmp/bubble_sort
400 mkdir -p $out
401
402 # TODO: Make these deterministic for more stable benchmarks?
403 for n in 100 200 300 400; do
404 seq $n | shuf > $out/testdata-$n.txt
405 done
406
407 wc -l $out/testdata-*.txt
408}
409
410palindrome-testdata() {
411 local out=$BASE_DIR/tmp/palindrome
412 mkdir -p $out
413
414 # TODO: Use iters?
415
416 for i in $(seq 500); do
417 cat <<EOF
418foo
419a
420tat
421cat
422
423noon
424amanaplanacanalpanama
425
426μ
427-μ-
428EOF
429
430 done > $out/testdata.txt
431
432 wc -l $out/testdata.txt
433}
434
435measure() {
436 local provenance=$1
437 local host_job_id=$2
438 local out_dir=${3:-$BASE_DIR/raw} # ../benchmark-data/compute
439
440 mkdir -p $BASE_DIR/{tmp,raw,stage1} $out_dir
441
442 # set -x
443 hello-all $provenance $host_job_id $out_dir
444 fib-all $provenance $host_job_id $out_dir
445
446 for_loop-all $provenance $host_job_id $out_dir
447 control_flow-all $provenance $host_job_id $out_dir
448
449 #return
450
451 # TODO: doesn't work because we would need duplicate logic in stage1
452 #if test -n "${QUICKLY:-}"; then
453 # return
454 #fi
455
456 word_freq-all $provenance $host_job_id $out_dir
457 parse_help-all $provenance $host_job_id $out_dir
458
459 bubble_sort-testdata
460 palindrome-testdata
461
462 bubble_sort-all $provenance $host_job_id $out_dir
463
464 # INCORRECT, but still run it
465 palindrome-all $provenance $host_job_id $out_dir
466
467 # array_ref takes too long to show quadratic behavior, and that's only
468 # necessary on 1 machine. I think I will make a separate blog post,
469 # if anything.
470
471 maybe-tree $out_dir
472}
473
474soil-run() {
475 ### Run it on just this machine, and make a report
476
477 rm -r -f $BASE_DIR
478 mkdir -p $BASE_DIR
479
480 # Test the one that's IN TREE, NOT in ../benchmark-data
481 local -a oils_bin=(
482 _bin/cxx-opt/osh _bin/cxx-opt+bumpleak/osh _bin/cxx-opt/mycpp-souffle/osh
483 _bin/cxx-opt/ysh _bin/cxx-opt+bumpleak/ysh _bin/cxx-opt/mycpp-souffle/ysh
484 )
485 ninja "${oils_bin[@]}"
486
487 local single_machine='no-host'
488
489 local job_id
490 job_id=$(benchmarks/id.sh print-job-id)
491
492 # Only measure what's in the Docker image
493 # - The Soil 'benchmarks' job uses the 'cpp' Docker image, which doesn't have
494 # layer-cpython, ../oil_DEPS/cpython-full
495 # - It also doesn't have mksh or zsh
496
497 benchmarks/id.sh shell-provenance-2 \
498 $single_machine $job_id _tmp \
499 bash dash python2 awk "${oils_bin[@]}"
500
501 local provenance=_tmp/provenance.txt
502 local host_job_id="$single_machine.$job_id"
503
504 measure $provenance $host_job_id
505
506 # Make it run on one machine
507 stage1 '' $single_machine
508
509 benchmarks/report.sh stage2 $BASE_DIR
510 benchmarks/report.sh stage3 $BASE_DIR
511}
512
513
514test-report() {
515 # Make it run on one machine
516 stage1 '' no-host
517
518 benchmarks/report.sh stage2 $BASE_DIR
519 benchmarks/report.sh stage3 $BASE_DIR
520}
521
522stage1() {
523 local raw_dir=${1:-$BASE_DIR/raw}
524
525 # This report works even if we only have one machine
526 local single_machine=${2:-}
527
528 local out_dir=$BASE_DIR/stage1
529 mkdir -p $out_dir
530
531 local times_tsv=$out_dir/times.tsv
532
533 local -a raw=()
534
535 # TODO: We should respect QUICKLY=1
536 for metric in hello fib for_loop control_flow word_freq parse_help bubble_sort palindrome; do
537 local dir=$raw_dir/$metric
538
539 if test -n "$single_machine"; then
540 local -a a=($dir/$single_machine.*.times.tsv)
541 raw+=(${a[-1]})
542 else
543 # Globs are in lexicographical order, which works for our dates.
544 local -a a=($dir/$MACHINE1.*.times.tsv)
545 local -a b=($dir/$MACHINE2.*.times.tsv) # HACK for now
546
547 # take the latest file
548 raw+=(${a[-1]} ${b[-1]})
549 fi
550
551 done
552 csv-concat ${raw[@]} > $times_tsv
553 wc -l $times_tsv
554}
555
556print-report() {
557 local in_dir=$1
558
559 benchmark-html-head 'OSH Compute Performance'
560
561 cat <<EOF
562 <body class="width60">
563 <p id="home-link">
564 <a href="/">oils.pub</a>
565 </p>
566EOF
567 cmark <<EOF
568
569## OSH Compute Performance
570
571Running time and memory usage of programs that test data structures (as opposed
572to I/O).
573
574Memory usage is measured in MB (powers of 10), not MiB (powers of 2).
575
576Source code: [oils/benchmarks/compute](https://github.com/oils-for-unix/oils/tree/master/benchmarks/compute)
577
578EOF
579
580 cmark <<EOF
581### hello (minimal startup)
582
583EOF
584
585 tsv2html $in_dir/hello.tsv
586
587 cmark <<EOF
588### fibonacci (integers)
589
590- arg1: number of repetitions
591- arg2: the N in fib(N)
592EOF
593
594 tsv2html $in_dir/fib.tsv
595
596 cmark <<EOF
597### for loop
598
599- arg1: the N to sum
600EOF
601
602 tsv2html $in_dir/for_loop.tsv
603
604 cmark <<EOF
605### control flow
606
607- arg1: the N to sum
608EOF
609
610 tsv2html $in_dir/control_flow.tsv
611
612 cmark <<EOF
613### word_freq (associative arrays / hash tables)
614
615- arg1: number of repetitions
616- arg2: the file (varies size of hash table)
617EOF
618
619 tsv2html $in_dir/word_freq.tsv
620
621 cmark <<EOF
622### parse_help (strings, real code)
623
624- arg1: file to parse
625EOF
626
627 tsv2html $in_dir/parse_help.tsv
628
629 cmark <<EOF
630### bubble_sort (array of integers, arrays of strings)
631
632- arg1: type of array
633- arg2: length of array
634EOF
635
636 tsv2html $in_dir/bubble_sort.tsv
637
638 # Comment out until checksum is fixed
639
640if false; then
641 cmark <<EOF
642### palindrome (byte strings, unicode strings)
643
644- arg1: type of string
645- arg2: TODO: length of string
646EOF
647
648 tsv2html $in_dir/palindrome.tsv
649
650fi
651
652 cmark <<EOF
653### Interpreter and Host Details
654EOF
655
656 tsv2html $in_dir/shells.tsv
657 tsv2html $in_dir/hosts.tsv
658
659 cmark <<EOF
660### Details
661EOF
662
663 tsv2html $in_dir/details.tsv
664
665 cmark <<EOF
666### Stdout Files
667EOF
668
669 tsv2html $in_dir/stdout_files.tsv
670
671
672 cat <<EOF
673 </body>
674</html>
675EOF
676}
677
678#
679# Run by hand
680#
681
682run-control-flow() {
683 ### Reproduce OSH perf bug because of C++ exceptions
684
685 # do_neither: 0.288 dash, 0.872 bash, 0.865 OSH
686 # do_continue: 0.310 dash, 1.065 bash, 2.313 OSH
687 # do_break: 0.222 dash, 0.712 bash, 1.430 OSH
688
689 local osh=_bin/cxx-opt/osh
690 #set -x
691
692 ninja $osh
693
694 for func in do_none do_continue do_break do_return; do
695 #for func in do_return; do
696 echo "=== $func"
697 echo
698 for sh in dash bash $osh; do
699 echo "--- $sh"
700 # TIMEFORMAT above
701 time $sh benchmarks/compute/control_flow.sh $func 200
702 echo
703 done
704 done
705}
706
707# TODO: could make this a proper benchmark
708word-split() {
709 ### Test word splitting perf
710 export OILS_GC_STATS=${1:-}
711
712 local osh=_bin/cxx-opt/osh
713 #set -x
714
715 ninja $osh
716
717 #local filename=README.md
718
719 # Hm our word splitting actually isn't that slow?
720 # TODO: measure allocs too?
721
722 # Hm allocating over a million objects, but it's faster than bash
723 # Most are in the pools
724
725 local filename=benchmarks/testdata/configure-coreutils
726
727 for func in default_ifs other_ifs; do
728 echo "=== $func"
729 echo
730 for sh in dash bash $osh; do
731 echo "--- $sh"
732 # TIMEFORMAT above
733 time $sh benchmarks/compute/word_split.sh $func $filename
734 echo
735 done
736 done
737}
738
739"$@"