benchmarks/compute.sh

OILS / benchmarks / compute.sh View on Github | oils.pub

739 lines, 358 significant

1	#!/usr/bin/env bash
2	#
3	# Compare operations on data structures, with little I/O: strings, array,
4	# associative arrays, integers.
5	#
6	# Usage:
7	# benchmarks/compute.sh <function name>
8	#
9	# List of benchmarks:
10	#
11	# - fib: integer, loop, assignment (shells don't have real integers
12	# - for_loop: 2025 update, taken from benchmarks/ysh-for.sh
13	# - word_freq: hash table / assoc array (OSH uses a vector<pair<>> now!)
14	# also integer counter
15	# - bubble_sort: indexed array (bash uses a linked list?)
16	# - palindrome: string, slicing, unicode
17	# - parse_help: realistic shell-only string processing, which I didn't write.
18	#
19	# TODO:
20	# - Fix the BUGS
21	# - palindrome doesn't work? Unicode? Does UTF-8 decode
22	# - bubble sort depend on locale too - there is an LC_ALL here
23	#
24	# - This file is annoying to TEST
25	# - to add to it, you also have to change benchmarks/report.R
26	# - and there is a loop in 'stage1' as well
27	# - why can't it behave like other benchmarks?
28	# - they are using this "big table" pattern
29
30	# - vary problem size, which is different than iters
31	# - bubble sort: array length, to test complexity of array indexing
32	# - palindrome: longer lines, to test complexity of unicode/byte slicing
33	# - word_freq: more unique words, to test complexity of assoc array
34	# - for_loop and fib are kinda similar
35	# - maybe for_loop can introduce some conditionals
36	#
37	# - other languages
38	# - awk, mawk, etc.
39	# - we are going for Awk speed!
40	#
41	# Questions to answer
42	# - Can fast bytecode runtime be as fast as Python?
43	# - measurement issue: Python kills at fib/for_loop - it's mostly process
44	# startup time
45	# - bubble_sort and word_freq are a bit more work
46	# - can add YSH versions of those
47	# - I wonder if they are testing data structures or the actual interpreter
48	# loop though
49	# - it's possible that speeding up the interpreter loop doesn't help much
50	# - the real motivations behind bytecode:
51	# - to fix 'break continue'
52	# - add coroutine support too - that suspends and resumes a frame, which we
53	# can't do
54
55	set -o nounset
56	set -o pipefail
57	set -o errexit
58
59	REPO_ROOT=$(cd $(dirname $0)/.. && pwd)
60	readonly REPO_ROOT
61
62	source benchmarks/common.sh # filter-provenance
63	source build/dev-shell.sh # python2
64	source test/tsv-lib.sh # tsv2html
65
66	readonly BASE_DIR=_tmp/compute
67
68	# Stabilize 'sort' output across machines (ugh locales!)
69	export LC_ALL=C
70
71	TIMEFORMAT='%U'
72
73	readonly OSH_YSH_OPT_REGEX='_bin/cxx-opt(-sh)?/(mycpp-souffle/)?(osh\|ysh)'
74
75	# task_name,iter,args
76	hello-tasks() {
77	local provenance=$1
78
79	# Add 1 field for each of 5 fields.
80	cat $provenance \| filter-provenance python2 awk bash dash "$OSH_CPP_REGEX" \|
81	while read fields; do
82	echo 'hello _ _' \| xargs -n 3 -- echo "$fields"
83	done
84	}
85
86	# task_name,iter,args
87	fib-tasks() {
88	local provenance=$1
89
90	# Add 1 field for each of 5 fields.
91	cat $provenance \| filter-provenance python2 awk bash dash "$OSH_YSH_OPT_REGEX" \|
92	while read fields; do
93	echo 'fib 200 44' \| xargs -n 3 -- echo "$fields"
94	done
95	}
96
97	# task_name,iter,args
98	for_loop-tasks() {
99	local provenance=$1
100
101	# bumpleak segfaults on for_loop! Probably because it runs out of memory
102	cat $provenance \| filter-provenance python2 awk bash dash "$OSH_YSH_OPT_REGEX" \|
103	while read fields; do
104	echo 'for_loop 50000 _' \| xargs -n 3 -- echo "$fields"
105	done
106	}
107
108	# task_name,iter,args
109	control_flow-tasks() {
110	local provenance=$1
111
112	cat $provenance \| filter-provenance bash dash "$OSH_CPP_REGEX" \|
113	while read fields; do
114	echo 'control_flow do_return 200' \| xargs -n 3 -- echo "$fields"
115	done
116	}
117
118	word_freq-tasks() {
119	local provenance=$1
120
121	cat $provenance \| filter-provenance python2 bash "$OSH_CPP_REGEX" \|
122	while read fields; do
123	# BUG: oils-for-unix differs on these two. Looks like it's related to
124	# backslashes!
125	#echo 'word_freq 10 benchmarks/testdata/abuild' \| xargs -n 3 -- echo "$fields"
126	#echo 'word_freq 2 benchmarks/testdata/ltmain.sh' \| xargs -n 3 -- echo "$fields"
127	echo 'word_freq 10 configure' \| xargs -n 3 -- echo "$fields"
128	done
129	}
130
131	assoc_array-tasks() {
132	local provenance=$1
133
134	cat $provenance \| filter-provenance python2 bash "$OSH_CPP_REGEX" \|
135	while read fields; do
136	for n in 1000 2000 3000; do
137	echo "word_freq 10 $n" \| xargs -n 3 -- echo "$fields"
138	done
139	done
140	}
141
142	bubble_sort-tasks() {
143	# Note: this is quadratic, but bubble sort itself is quadratic!
144	local provenance=$1
145
146	cat $provenance \| filter-provenance python2 bash "$OSH_CPP_REGEX" \|
147	while read fields; do
148	echo 'bubble_sort int 200' \| xargs -n 3 -- echo "$fields"
149	echo 'bubble_sort bytes 200' \| xargs -n 3 -- echo "$fields"
150	done
151	}
152
153	# Arrays are doubly linked lists in bash! With a LASTREF hack to avoid being
154	# quadratic.
155	#
156	# See array_reference() in array.c in bash. It searches both back and
157	# forward. Every cell has its index, a value, a forward pointer, and a back
158	# pointer.
159	#
160	# You need pretty high N to see the quadratic behavior though!
161
162	# NOTE: osh is also slower with linear access, but not superlinear!
163
164	array_ref-tasks() {
165	local provenance=$1
166
167	cat $provenance \| filter-provenance bash \|
168	while read fields; do
169	for mode in seq random; do
170	for n in 10000 20000 30000 40000; do
171	echo "array_ref $mode $n" \| xargs -n 3 -- echo "$fields"
172	done
173	done
174	done
175
176	#array_ref $OSH_CC seq 5000
177	#array_ref $OSH_CC seq 10000
178	#array_ref $OSH_CC random 5000
179	#array_ref $OSH_CC random 10000
180	#EOF
181	}
182
183	palindrome-tasks() {
184	local provenance=$1
185
186	cat $provenance \| filter-provenance python2 bash "$OSH_CPP_REGEX" \|
187	while read fields; do
188	echo 'palindrome unicode _' \| xargs -n 3 -- echo "$fields"
189	echo 'palindrome bytes _' \| xargs -n 3 -- echo "$fields"
190	done
191	}
192
193	parse_help-tasks() {
194	local provenance=$1
195
196	cat $provenance \| filter-provenance bash "$OSH_CPP_REGEX" \|
197	while read fields; do
198	echo 'parse_help ls-short _' \| xargs -n 3 -- echo "$fields"
199	echo 'parse_help ls _' \| xargs -n 3 -- echo "$fields"
200	echo 'parse_help mypy _' \| xargs -n 3 -- echo "$fields"
201	done
202	}
203
204	ext() {
205	local ext
206	case $runtime in
207	python2)
208	echo 'py'
209	;;
210	awk)
211	echo 'awk'
212	;;
213	ysh)
214	echo 'ysh'
215	;;
216	sh \| osh*)
217	echo 'sh'
218	;;
219	esac
220	}
221
222	word_freq-one() {
223	### Run one word_freq task (hash tables)
224
225	local name=${1:-word_freq}
226	local runtime=$2
227
228	local iters=${3:-10}
229	local in=${4:-configure} # input
230
231	$runtime benchmarks/compute/word_freq.$(ext $runtime) $iters < $in \| sort -n
232	}
233
234	assoc_array-one() {
235	### Run word_freq with seq
236
237	local name=${1:-word_freq}
238	local runtime=$2
239
240	local iters=${3:-10}
241	local n=${4:-10}
242
243	# shuf so we don't get the bash optimization
244	seq $n \| shuf \|
245	$runtime benchmarks/compute/word_freq.$(ext $runtime) $iters \| sort -n
246	}
247
248	bubble_sort-one() {
249	### Run one bubble_sort task (arrays)
250
251	local name=${1:-bubble_sort}
252	local runtime=$2
253	local mode=${3:-int}
254	local n=${4:-100}
255
256	$runtime benchmarks/compute/bubble_sort.$(ext $runtime) $mode \
257	< $BASE_DIR/tmp/$name/testdata-$n.txt
258	}
259
260	# OSH is like 10x faster here!
261	array_ref-one() {
262	### Run one array_ref task (arrays)
263
264	local name=${1:-bubble_sort}
265	local runtime=$2
266	local mode=${3:-seq}
267	local n=${4:-100}
268
269	seq $n \| shuf \| $runtime benchmarks/compute/array_ref.$(ext $runtime) $mode
270	}
271
272	palindrome-one() {
273	### Run one palindrome task (strings)
274
275	local name=${1:-palindrome}
276	local runtime=$2
277	local mode=${3:-unicode}
278
279	$runtime benchmarks/compute/palindrome.$(ext $runtime) $mode \
280	< $BASE_DIR/tmp/$name/testdata.txt
281	}
282
283	parse_help-one() {
284	### Run one palindrome task (strings, real code)
285
286	local name=${1:-parse_help}
287	local runtime=$2
288	local workload=${3:-}
289
290	$runtime benchmarks/parse-help/pure-excerpt.sh _parse_help - \
291	< benchmarks/parse-help/$workload.txt
292	}
293
294	#
295	# Helpers
296	#
297
298	hello-all() { task-all hello "$@"; }
299	fib-all() { task-all fib "$@"; }
300	for_loop-all() { task-all for_loop "$@"; }
301	control_flow-all() { task-all control_flow "$@"; }
302	word_freq-all() { task-all word_freq "$@"; }
303	assoc_array-all() { task-all assoc_array "$@"; }
304
305	# TODO: Fix the OSH comparison operator! It gives the wrong answer and
306	# completes quickly.
307	bubble_sort-all() { task-all bubble_sort "$@"; }
308
309	# Array that is not quadratic
310	array_ref-all() { task-all array_ref "$@"; }
311
312	# Hm osh is a little slower here
313	palindrome-all() { task-all palindrome "$@"; }
314
315	parse_help-all() { task-all parse_help "$@"; }
316
317	task-all() {
318	local task_name=$1
319	local provenance=$2
320	local host_job_id=$3
321	local out_dir=$4 # put files to save in benchmarks-data repo here
322
323	local tmp_dir=$BASE_DIR/tmp/$task_name
324
325	local times_tsv=$out_dir/$task_name/$host_job_id.times.tsv
326	rm -f $times_tsv
327
328	mkdir -p $tmp_dir $out_dir/$task_name
329
330	banner "* $task_name *"
331
332	# header
333	tsv-row \
334	status elapsed_secs user_secs sys_secs max_rss_KiB \
335	stdout_md5sum \
336	host_name host_hash \
337	runtime_name runtime_hash \
338	task_name arg1 arg2 stdout_filename > $times_tsv
339
340	local task_id=0
341
342	${task_name}-tasks $provenance > $tmp_dir/tasks.txt
343
344	cat $tmp_dir/tasks.txt \|
345	while read _ host host_hash runtime runtime_hash _ arg1 arg2; do
346	local file
347	case $runtime in
348	python2)
349	file='py'
350	;;
351	sh \| osh*)
352	file=$(basename $runtime)
353	;;
354	esac
355
356	local stdout_filename="stdout-$file-$arg1-$(basename $arg2).txt"
357
358	local -a cmd
359	case $task_name in
360	hello\|fib\|for_loop\|control_flow)
361	# Run it DIRECTLY, do not run $0. Because we do NOT want to fork bash
362	# then dash, because bash uses more memory.
363	args=( benchmarks/compute/$task_name.$(ext $runtime) "$arg1" "$arg2" )
364
365	case $runtime in
366	# add -f flag
367	awk) cmd=($runtime -f "${args[@]}") ;;
368	*) cmd=($runtime "${args[@]}") ;;
369	esac
370	;;
371	*)
372	cmd=($0 ${task_name}-one "$task_name" "$runtime" "$arg1" "$arg2")
373	;;
374	esac
375
376	# join args into a single field
377	time-tsv -o $times_tsv --append \
378	--stdout $tmp_dir/$stdout_filename \
379	--rusage \
380	--field "$host" --field "$host_hash" \
381	--field $runtime --field $runtime_hash \
382	--field "$task_name" --field "$arg1" --field "$arg2" \
383	--field "$stdout_filename" -- \
384	"${cmd[@]}"
385
386	task_id=$((task_id + 1))
387	done
388
389	#wc -l _tmp/compute/word_freq/*
390	maybe-tree $tmp_dir
391	cat $times_tsv
392	}
393
394	#
395	# Testdata
396	#
397
398	bubble_sort-testdata() {
399	local out=$BASE_DIR/tmp/bubble_sort
400	mkdir -p $out
401
402	# TODO: Make these deterministic for more stable benchmarks?
403	for n in 100 200 300 400; do
404	seq $n \| shuf > $out/testdata-$n.txt
405	done
406
407	wc -l $out/testdata-*.txt
408	}
409
410	palindrome-testdata() {
411	local out=$BASE_DIR/tmp/palindrome
412	mkdir -p $out
413
414	# TODO: Use iters?
415
416	for i in $(seq 500); do
417	cat <<EOF
418	foo
419	a
420	tat
421	cat
422
423	noon
424	amanaplanacanalpanama
425
426	μ
427	-μ-
428	EOF
429
430	done > $out/testdata.txt
431
432	wc -l $out/testdata.txt
433	}
434
435	measure() {
436	local provenance=$1
437	local host_job_id=$2
438	local out_dir=${3:-$BASE_DIR/raw} # ../benchmark-data/compute
439
440	mkdir -p $BASE_DIR/{tmp,raw,stage1} $out_dir
441
442	# set -x
443	hello-all $provenance $host_job_id $out_dir
444	fib-all $provenance $host_job_id $out_dir
445
446	for_loop-all $provenance $host_job_id $out_dir
447	control_flow-all $provenance $host_job_id $out_dir
448
449	#return
450
451	# TODO: doesn't work because we would need duplicate logic in stage1
452	#if test -n "${QUICKLY:-}"; then
453	# return
454	#fi
455
456	word_freq-all $provenance $host_job_id $out_dir
457	parse_help-all $provenance $host_job_id $out_dir
458
459	bubble_sort-testdata
460	palindrome-testdata
461
462	bubble_sort-all $provenance $host_job_id $out_dir
463
464	# INCORRECT, but still run it
465	palindrome-all $provenance $host_job_id $out_dir
466
467	# array_ref takes too long to show quadratic behavior, and that's only
468	# necessary on 1 machine. I think I will make a separate blog post,
469	# if anything.
470
471	maybe-tree $out_dir
472	}
473
474	soil-run() {
475	### Run it on just this machine, and make a report
476
477	rm -r -f $BASE_DIR
478	mkdir -p $BASE_DIR
479
480	# Test the one that's IN TREE, NOT in ../benchmark-data
481	local -a oils_bin=(
482	_bin/cxx-opt/osh _bin/cxx-opt+bumpleak/osh _bin/cxx-opt/mycpp-souffle/osh
483	_bin/cxx-opt/ysh _bin/cxx-opt+bumpleak/ysh _bin/cxx-opt/mycpp-souffle/ysh
484	)
485	ninja "${oils_bin[@]}"
486
487	local single_machine='no-host'
488
489	local job_id
490	job_id=$(benchmarks/id.sh print-job-id)
491
492	# Only measure what's in the Docker image
493	# - The Soil 'benchmarks' job uses the 'cpp' Docker image, which doesn't have
494	# layer-cpython, ../oil_DEPS/cpython-full
495	# - It also doesn't have mksh or zsh
496
497	benchmarks/id.sh shell-provenance-2 \
498	$single_machine $job_id _tmp \
499	bash dash python2 awk "${oils_bin[@]}"
500
501	local provenance=_tmp/provenance.txt
502	local host_job_id="$single_machine.$job_id"
503
504	measure $provenance $host_job_id
505
506	# Make it run on one machine
507	stage1 '' $single_machine
508
509	benchmarks/report.sh stage2 $BASE_DIR
510	benchmarks/report.sh stage3 $BASE_DIR
511	}
512
513
514	test-report() {
515	# Make it run on one machine
516	stage1 '' no-host
517
518	benchmarks/report.sh stage2 $BASE_DIR
519	benchmarks/report.sh stage3 $BASE_DIR
520	}
521
522	stage1() {
523	local raw_dir=${1:-$BASE_DIR/raw}
524
525	# This report works even if we only have one machine
526	local single_machine=${2:-}
527
528	local out_dir=$BASE_DIR/stage1
529	mkdir -p $out_dir
530
531	local times_tsv=$out_dir/times.tsv
532
533	local -a raw=()
534
535	# TODO: We should respect QUICKLY=1
536	for metric in hello fib for_loop control_flow word_freq parse_help bubble_sort palindrome; do
537	local dir=$raw_dir/$metric
538
539	if test -n "$single_machine"; then
540	local -a a=($dir/$single_machine.*.times.tsv)
541	raw+=(${a[-1]})
542	else
543	# Globs are in lexicographical order, which works for our dates.
544	local -a a=($dir/$MACHINE1.*.times.tsv)
545	local -a b=($dir/$MACHINE2.*.times.tsv) # HACK for now
546
547	# take the latest file
548	raw+=(${a[-1]} ${b[-1]})
549	fi
550
551	done
552	csv-concat ${raw[@]} > $times_tsv
553	wc -l $times_tsv
554	}
555
556	print-report() {
557	local in_dir=$1
558
559	benchmark-html-head 'OSH Compute Performance'
560
561	cat <<EOF
562	<body class="width60">
563	<p id="home-link">
564	<a href="/">oils.pub</a>
565	</p>
566	EOF
567	cmark <<EOF
568
569	## OSH Compute Performance
570
571	Running time and memory usage of programs that test data structures (as opposed
572	to I/O).
573
574	Memory usage is measured in MB (powers of 10), not MiB (powers of 2).
575
576	Source code: [oils/benchmarks/compute](https://github.com/oils-for-unix/oils/tree/master/benchmarks/compute)
577
578	EOF
579
580	cmark <<EOF
581	### hello (minimal startup)
582
583	EOF
584
585	tsv2html $in_dir/hello.tsv
586
587	cmark <<EOF
588	### fibonacci (integers)
589
590	- arg1: number of repetitions
591	- arg2: the N in fib(N)
592	EOF
593
594	tsv2html $in_dir/fib.tsv
595
596	cmark <<EOF
597	### for loop
598
599	- arg1: the N to sum
600	EOF
601
602	tsv2html $in_dir/for_loop.tsv
603
604	cmark <<EOF
605	### control flow
606
607	- arg1: the N to sum
608	EOF
609
610	tsv2html $in_dir/control_flow.tsv
611
612	cmark <<EOF
613	### word_freq (associative arrays / hash tables)
614
615	- arg1: number of repetitions
616	- arg2: the file (varies size of hash table)
617	EOF
618
619	tsv2html $in_dir/word_freq.tsv
620
621	cmark <<EOF
622	### parse_help (strings, real code)
623
624	- arg1: file to parse
625	EOF
626
627	tsv2html $in_dir/parse_help.tsv
628
629	cmark <<EOF
630	### bubble_sort (array of integers, arrays of strings)
631
632	- arg1: type of array
633	- arg2: length of array
634	EOF
635
636	tsv2html $in_dir/bubble_sort.tsv
637
638	# Comment out until checksum is fixed
639
640	if false; then
641	cmark <<EOF
642	### palindrome (byte strings, unicode strings)
643
644	- arg1: type of string
645	- arg2: TODO: length of string
646	EOF
647
648	tsv2html $in_dir/palindrome.tsv
649
650	fi
651
652	cmark <<EOF
653	### Interpreter and Host Details
654	EOF
655
656	tsv2html $in_dir/shells.tsv
657	tsv2html $in_dir/hosts.tsv
658
659	cmark <<EOF
660	### Details
661	EOF
662
663	tsv2html $in_dir/details.tsv
664
665	cmark <<EOF
666	### Stdout Files
667	EOF
668
669	tsv2html $in_dir/stdout_files.tsv
670
671
672	cat <<EOF
673	</body>
674	</html>
675	EOF
676	}
677
678	#
679	# Run by hand
680	#
681
682	run-control-flow() {
683	### Reproduce OSH perf bug because of C++ exceptions
684
685	# do_neither: 0.288 dash, 0.872 bash, 0.865 OSH
686	# do_continue: 0.310 dash, 1.065 bash, 2.313 OSH
687	# do_break: 0.222 dash, 0.712 bash, 1.430 OSH
688
689	local osh=_bin/cxx-opt/osh
690	#set -x
691
692	ninja $osh
693
694	for func in do_none do_continue do_break do_return; do
695	#for func in do_return; do
696	echo "=== $func"
697	echo
698	for sh in dash bash $osh; do
699	echo "--- $sh"
700	# TIMEFORMAT above
701	time $sh benchmarks/compute/control_flow.sh $func 200
702	echo
703	done
704	done
705	}
706
707	# TODO: could make this a proper benchmark
708	word-split() {
709	### Test word splitting perf
710	export OILS_GC_STATS=${1:-}
711
712	local osh=_bin/cxx-opt/osh
713	#set -x
714
715	ninja $osh
716
717	#local filename=README.md
718
719	# Hm our word splitting actually isn't that slow?
720	# TODO: measure allocs too?
721
722	# Hm allocating over a million objects, but it's faster than bash
723	# Most are in the pools
724
725	local filename=benchmarks/testdata/configure-coreutils
726
727	for func in default_ifs other_ifs; do
728	echo "=== $func"
729	echo
730	for sh in dash bash $osh; do
731	echo "--- $sh"
732	# TIMEFORMAT above
733	time $sh benchmarks/compute/word_split.sh $func $filename
734	echo
735	done
736	done
737	}
738
739	"$@"