benchmarks/osh-runtime.sh

OILS / benchmarks / osh-runtime.sh View on Github | oils.pub

586 lines, 345 significant

1	#!/usr/bin/env bash
2	#
3	# Test scripts found in the wild for both correctness and performance.
4	#
5	# Usage:
6	# benchmarks/osh-runtime.sh <function name>
7
8	set -o nounset
9	set -o pipefail
10	set -o errexit
11
12	REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
13
14	source benchmarks/common.sh # tsv-concat
15	source benchmarks/id.sh # print-job-id
16	source build/dev-shell.sh # python2
17	source test/common.sh
18	source test/tsv-lib.sh # tsv-row
19
20	readonly BASE_DIR=_tmp/osh-runtime
21
22	# TODO: Move to ../oil_DEPS
23	readonly TAR_DIR=$PWD/_deps/osh-runtime # Make it absolute
24
25	#
26	# Dependencies
27	#
28
29	readonly PY27_DIR=$PWD/Python-2.7.13
30
31	# NOTE: Same list in oilshell.org/blob/run.sh.
32	tarballs() {
33	cat <<EOF
34	tcc-0.9.26.tar.bz2
35	yash-2.46.tar.xz
36	ocaml-4.06.0.tar.xz
37	util-linux-2.40.tar.xz
38	EOF
39	}
40
41	download() {
42	mkdir -p $TAR_DIR
43	tarballs \| xargs -n 1 -I {} --verbose -- \
44	wget --no-clobber --directory $TAR_DIR 'https://www.oilshell.org/blob/testdata/{}'
45	}
46
47	extract() {
48	set -x
49	time for f in $TAR_DIR/*.{bz2,xz}; do
50	tar -x --directory $TAR_DIR --file $f
51	done
52	set +x
53
54	ls -l $TAR_DIR
55	}
56
57	#
58	# Computation
59	#
60
61	run-tasks() {
62	local raw_out_dir=$1
63	raw_out_dir="$PWD/$raw_out_dir" # because we change dirs
64
65	# Bug fix for dynamic scoping!
66	local host_name sh_path workload
67
68	local task_id=0
69	while read -r host_name sh_path workload; do
70
71	log "*** $host_name $sh_path $workload $task_id"
72
73	local sh_run_path
74	case $sh_path in
75	/*) # Already absolute
76	sh_run_path=$sh_path
77	;;
78	/) # It's relative, so make it absolute
79	sh_run_path=$PWD/$sh_path
80	;;
81	*) # 'dash' should remain 'dash'
82	sh_run_path=$sh_path
83	;;
84	esac
85
86	local working_dir=''
87	local files_out_dir="$raw_out_dir/files-$task_id"
88	mkdir -v -p $files_out_dir
89
90	local save_new_files=''
91
92	local -a argv
93	case $workload in
94	hello-world)
95	argv=( testdata/osh-runtime/hello_world.sh )
96	;;
97
98	bin-true)
99	argv=( testdata/osh-runtime/bin_true.sh )
100	;;
101
102	abuild-print-help)
103	argv=( testdata/osh-runtime/abuild -h )
104	;;
105
106	configure.cpython)
107	argv=( $PY27_DIR/configure )
108	working_dir=$files_out_dir
109	;;
110
111	configure.util-linux)
112	# flag needed to avoid sqlite3 dep error message
113	argv=( $TAR_DIR/util-linux-2.40/configure --disable-liblastlog2 )
114	working_dir=$files_out_dir
115	;;
116
117	configure.*)
118	argv=( ./configure )
119
120	local conf_dir
121	case $workload in
122	*.ocaml)
123	conf_dir='ocaml-4.06.0'
124	;;
125	*.tcc)
126	conf_dir='tcc-0.9.26'
127	;;
128	*.yash)
129	conf_dir='yash-2.46'
130	;;
131	*)
132	die "Invalid workload $workload"
133	esac
134
135	# These are run in-tree?
136	working_dir=$TAR_DIR/$conf_dir
137	;;
138
139	*)
140	die "Invalid workload $workload"
141	;;
142	esac
143
144	local -a time_argv=(
145	time-tsv
146	--output "$raw_out_dir/times.tsv" --append
147	--rusage
148	--rusage-2
149	--field "$task_id"
150	--field "$host_name" --field "$sh_path"
151	--field "$workload"
152	-- "$sh_run_path" "${argv[@]}"
153	)
154
155	local stdout_file="$files_out_dir/STDOUT.txt"
156	local gc_stats_file="$raw_out_dir/gc-$task_id.txt"
157
158	# Maybe change dirs
159	if test -n "$working_dir"; then
160	pushd "$working_dir"
161	fi
162
163	if test -n "$save_new_files"; then
164	touch __TIMESTAMP
165	fi
166
167	# Run it, possibly with GC stats
168	case $sh_path in
169	_bin//osh)
170	OILS_GC_STATS_FD=99 "${time_argv[@]}" > $stdout_file 99> $gc_stats_file
171	;;
172	*)
173	"${time_argv[@]}" > $stdout_file
174	;;
175	esac
176
177	if test -n "$save_new_files"; then
178	echo "COPYING to $files_out_dir"
179	find . -type f -newer __TIMESTAMP \
180	\| xargs -I {} -- cp --verbose {} $files_out_dir
181	fi
182
183	# Restore dir
184	if test -n "$working_dir"; then
185	popd
186	fi
187
188	task_id=$((task_id + 1))
189	done
190	}
191
192	# Sorted by priority for test-oils.sh osh-runtime --num-shells 3
193
194	readonly -a ALL_WORKLOADS=(
195	hello-world
196	bin-true
197
198	configure.cpython
199	configure.util-linux
200	configure.ocaml
201	configure.tcc
202	configure.yash
203
204	abuild-print-help
205	)
206
207	print-workloads() {
208	### for help
209
210	for w in "${ALL_WORKLOADS[@]}"; do
211	echo " $w"
212	done
213	}
214
215	print-tasks() {
216	local host_name=$1
217	shift 1
218	local -a osh_native=( "$@" )
219
220	if test -n "${QUICKLY:-}"; then
221	workloads=(
222	hello-world
223	bin-true
224	#configure.util-linux
225	#abuild-print-help
226	)
227	else
228	workloads=( "${ALL_WORKLOADS[@]}" )
229	fi
230
231	for sh_path in bash dash "${osh_native[@]}"; do
232	for workload in "${workloads[@]}"; do
233	tsv-row $host_name $sh_path $workload
234	done
235	done
236	}
237
238	print-tasks-xshar() {
239	local host_name=$1
240	local osh_native=$2
241
242	local num_iters=${3:-1}
243	local num_shells=${4:-1}
244	local num_workloads=${5:-1}
245
246	for i in $(seq $num_iters); do
247
248	local s=0
249	for sh_path in $osh_native bash dash; do
250
251	local w=0
252	for workload in "${ALL_WORKLOADS[@]}"; do
253	tsv-row $host_name $sh_path $workload
254
255	w=$(( w + 1 )) # cut off at specified workloads
256	if test $w -eq $num_workloads; then
257	break
258	fi
259	done
260
261	s=$(( s + 1 )) # cut off as specified shells
262	if test $s -eq $num_shells; then
263	break
264	fi
265
266	done
267	done
268	}
269
270	test-print-tasks-xshar() {
271	print-tasks-xshar $(hostname) osh 1 1 1
272	echo
273	print-tasks-xshar $(hostname) osh 1 2 1
274	echo
275	print-tasks-xshar $(hostname) osh 1 2 2
276	echo
277	print-tasks-xshar $(hostname) osh 1 2 3
278	echo
279	}
280
281	run-tasks-wrapper() {
282	### reads tasks from stdin
283
284	local host_name=$1 # 'no-host' or 'lenny'
285	local raw_out_dir=$2
286
287	mkdir -v -p $raw_out_dir
288
289	local tsv_out="$raw_out_dir/times.tsv"
290
291	# Write header of the TSV file that is appended to.
292	time-tsv -o $tsv_out --print-header \
293	--rusage \
294	--rusage-2 \
295	--field task_id \
296	--field host_name --field sh_path \
297	--field workload
298
299	# reads tasks from stdin
300	# run-tasks outputs 3 things: raw times.tsv, per-task STDOUT and files, and
301	# per-task GC stats
302	run-tasks $raw_out_dir
303
304	# Turn individual files into a TSV, adding host
305	benchmarks/gc_stats_to_tsv.py $raw_out_dir/gc-*.txt \
306	\| tsv-add-const-column host_name "$host_name" \
307	> $raw_out_dir/gc_stats.tsv
308
309	cp -v _tmp/provenance.tsv $raw_out_dir
310	}
311
312	measure() {
313	### For release and CI
314	local host_name=$1 # 'no-host' or 'lenny'
315	local raw_out_dir=$2 # _tmp/osh-runtime/$X or ../../benchmark-data/osh-runtime/$X
316	shift 2
317	local -a osh_native=( "$@" ) # $OSH_CPP_NINJA or $OSH_CPP_TWO, etc...
318
319	print-tasks "$host_name" "${osh_native[@]}" \
320	\| run-tasks-wrapper "$host_name" "$raw_out_dir"
321	}
322
323	stage1() {
324	local base_dir=${1:-$BASE_DIR} # _tmp/osh-runtime or ../benchmark-data/osh-runtime
325	local single_machine=${2:-}
326
327	local out_dir=$BASE_DIR/stage1 # _tmp/osh-runtime
328	mkdir -p $out_dir
329
330	local -a raw_times=()
331	local -a raw_gc_stats=()
332	local -a raw_provenance=()
333
334	if test -n "$single_machine"; then
335	# find dir in _tmp/osh-runtime
336	local -a a=( $base_dir/raw.$single_machine.* )
337
338	raw_times+=( ${a[-1]}/times.tsv )
339	raw_gc_stats+=( ${a[-1]}/gc_stats.tsv )
340	raw_provenance+=( ${a[-1]}/provenance.tsv )
341
342	else
343	# find last dirs in ../benchmark-data/osh-runtime
344	# Globs are in lexicographical order, which works for our dates.
345	local -a a=( $base_dir/raw.$MACHINE1.* )
346	local -a b=( $base_dir/raw.$MACHINE2.* )
347
348	raw_times+=( ${a[-1]}/times.tsv ${b[-1]}/times.tsv )
349	raw_gc_stats+=( ${a[-1]}/gc_stats.tsv ${b[-1]}/gc_stats.tsv )
350	raw_provenance+=( ${a[-1]}/provenance.tsv ${b[-1]}/provenance.tsv )
351	fi
352
353	tsv-concat "${raw_times[@]}" > $out_dir/times.tsv
354
355	tsv-concat "${raw_gc_stats[@]}" > $out_dir/gc_stats.tsv
356
357	tsv-concat "${raw_provenance[@]}" > $out_dir/provenance.tsv
358	}
359
360	print-report() {
361	local in_dir=$1
362
363	benchmark-html-head 'OSH Runtime Performance'
364
365	cat <<EOF
366	<body class="width60">
367	<p id="home-link">
368	<a href="/">oils.pub</a>
369	</p>
370	EOF
371
372	cmark <<'EOF'
373	## OSH Runtime Performance
374
375	Source code: [benchmarks/osh-runtime.sh](https://github.com/oils-for-unix/oils/tree/master/benchmarks/osh-runtime.sh)
376
377	- [Elapsed Time](#elapsed-time)
378	- [Minor Page Faults](#page-faults)
379	- [Memory Usage](#memory-usage)
380	- [GC Stats](#gc-stats)
381	- [rusage Details](#rusage-details)
382	- [More Details](#more-details)
383	- [Shell and Host](#shell-and-host)
384
385	[Raw files](-wwz-index)
386
387	<a name="elapsed-time" />
388
389	### Elapsed Time by Shell (milliseconds)
390
391	Some benchmarks call many external tools, while some exercise the shell
392	interpreter itself.
393	EOF
394	tsv2html $in_dir/elapsed.tsv
395
396	cmark <<EOF
397	<a name="page-faults" />
398
399	### Minor Page Faults
400	EOF
401
402	tsv2html $in_dir/page_faults.tsv
403
404	cmark <<EOF
405	<a name="memory-usage" />
406
407	### Memory Usage (Max Resident Set Size in MB)
408
409	Memory usage is measured in MB (powers of 10), not MiB (powers of 2).
410	EOF
411	tsv2html $in_dir/max_rss.tsv
412
413	cmark <<EOF
414	<a name="gc-stats" />
415
416	### GC Stats
417	EOF
418	tsv2html $in_dir/gc_stats.tsv
419
420	cmark <<EOF
421	<a name="rusage-details" />
422
423	### rusage Details
424	EOF
425	tsv2html $in_dir/details.tsv
426
427	cmark <<EOF
428	<a name="more-details" />
429
430	### More Details
431	EOF
432	tsv2html $in_dir/details_io.tsv
433
434	cmark <<'EOF'
435	<a name="shell-and-host" />
436
437	### Shell and Host
438	EOF
439	tsv2html $in_dir/shells.tsv
440	tsv2html $in_dir/hosts.tsv
441
442	cmark <<'EOF'
443
444	</body>
445	</html>
446	EOF
447	}
448
449	test-oils-run() {
450	local osh=$1
451	local job_id=$2
452	local host_name=$3
453
454	# flags passed by caller
455	local num_iters=${4:-1}
456	local num_shells=${5:-1}
457	local num_workloads=${6:-1}
458
459	local time_py=${XSHAR_DIR:-$REPO_ROOT}/benchmarks/time_.py
460	$time_py --tsv --rusage -- \
461	$osh -c 'echo "smoke test: hi from benchmarks/osh-runtime.sh"'
462
463	# Fresh build
464	rm -r -f -v $BASE_DIR _tmp/{shell,host}-id
465
466	# Write _tmp/provenance.* and _tmp/{host,shell}-id
467	shell-provenance-2 \
468	$host_name $job_id _tmp \
469	bash dash $osh
470
471	# e.g. 2024-05-01__10-11-12.ci-vm-name
472	local raw_out_dir="$BASE_DIR/raw"
473	mkdir -p $raw_out_dir
474
475	# Similar to 'measure', for soil-run and release
476	print-tasks-xshar $host_name $osh \
477	$num_iters $num_shells $num_workloads \
478	\| tee $BASE_DIR/tasks.txt
479
480	run-tasks-wrapper $host_name $raw_out_dir < $BASE_DIR/tasks.txt
481	echo
482
483	# Note: 'stage1' in soil-run is a trivial concatenation, so we can create input for
484	# benchmarks/report.R. We don't need that here
485	}
486
487	soil-run() {
488	### Run it on just this machine, and make a report
489	# DEPS: soil/cpp-tarball.sh benchmark-build
490
491	rm -r -f $BASE_DIR
492	mkdir -p $BASE_DIR
493
494	# TODO: This testdata should be baked into Docker image, or mounted
495	download
496	extract
497
498	# The three things built
499	local -a osh_native=(
500	$OSH_CPP_SOIL
501	$OSH_SOUFFLE_CPP_SOIL
502	$OSH_STATIC_SOIL
503	)
504
505	local single_machine='no-host'
506
507	local job_id
508	job_id=$(print-job-id)
509
510	# Write _tmp/provenance.* and _tmp/{host,shell}-id
511	shell-provenance-2 \
512	$single_machine $job_id _tmp \
513	bash dash "${osh_native[@]}"
514
515	local host_job_id="$single_machine.$job_id"
516	local raw_out_dir="$BASE_DIR/raw.$host_job_id"
517	mkdir -p $raw_out_dir $BASE_DIR/stage1
518
519	measure $single_machine $raw_out_dir "${osh_native[@]}"
520
521	# Trivial concatenation for 1 machine
522	stage1 '' $single_machine
523
524	benchmarks/report.sh stage2 $BASE_DIR
525
526	benchmarks/report.sh stage3 $BASE_DIR
527	}
528
529	#
530	# Debugging
531	#
532
533	compare-cpython() {
534	#local -a a=( ../benchmark-data/osh-runtime/.lenny.2024 )
535	local -a a=( ../benchmark-data/osh-runtime/.hoover.2024 )
536
537	# More of a diff here?
538	#local -a a=( ../benchmark-data/osh-runtime/.broome.2023 )
539	# less diff here
540	#local -a a=( ../benchmark-data/osh-runtime/.lenny.2023 )
541
542	local dir=${a[-1]}
543
544	echo $dir
545
546	head -n 1 $dir/times.tsv
547	fgrep 'configure.cpython' $dir/times.tsv
548
549	local bash_id=2
550	local dash_id=8
551	local osh_py_id=14
552	local osh_cpp_id=20
553
554	set +o errexit
555
556	local out_dir=_tmp/cpython-configure
557	mkdir -p $out_dir
558
559	echo 'bash vs. dash'
560	diff -u --recursive $dir/{files-2,files-8} > $out_dir/bash-vs-dash.txt
561	diffstat $out_dir/bash-vs-dash.txt
562	echo
563
564	echo 'bash vs. osh-py'
565	diff -u --recursive $dir/{files-2,files-14} > $out_dir/bash-vs-osh-py.txt
566	diffstat $out_dir/bash-vs-osh-py.txt
567	echo
568
569	echo 'bash vs. osh-cpp'
570	diff -u --recursive $dir/{files-2,files-20} > $out_dir/bash-vs-osh-cpp.txt
571	diffstat $out_dir/bash-vs-osh-cpp.txt
572	echo
573
574	return
575
576	diff -u $dir/{files-2,files-20}/STDOUT.txt
577	echo
578
579	diff -u $dir/{files-2,files-20}/pyconfig.h
580	echo
581
582	cdiff -u $dir/{files-2,files-20}/config.log
583	echo
584	}
585
586	"$@"