OILS / benchmarks / autoconf.sh View on Github | oils.pub

652 lines, 278 significant
1#!/usr/bin/env bash
2#
3# Why is CPython configure slower under OSH?
4#
5# Usage:
6# benchmarks/autoconf.sh <function name>
7#
8# Examples:
9# $0 patch-pyconf # times builtin
10# $0 measure-times # time-tsv with gc stats
11# $0 report-times # times builtin
12#
13# $0 measure-syscalls
14# $0 report-syscalls
15# $0 report-processes
16# $0 report-external
17#
18# Simpler:
19# $0 measure-rusage # time-tsv
20
21set -o nounset
22set -o pipefail
23set -o errexit
24
25REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
26
27source benchmarks/cachegrind.sh # with-cachegrind
28source benchmarks/callgrind.sh # with-cachegrind
29source test/tsv-lib.sh # $TAB
30
31readonly BASE_DIR_RELATIVE=_tmp/autoconf
32readonly BASE_DIR=$REPO_ROOT/$BASE_DIR_RELATIVE
33readonly PY_CONF=$REPO_ROOT/Python-2.7.13/configure
34
35download() {
36 wget --no-clobber --directory $BASE_DIR \
37 'https://www.oilshell.org/blob/testdata/util-linux-2.40.tar.xz'
38 cd $BASE_DIR
39 tar --verbose -x --xz < util-linux-*.xz
40}
41
42deps() {
43 # gah, for util-linux
44 sudo apt-get install libsqlite3-dev
45}
46
47#
48# Trying to measure allocation/GC overhead
49#
50# This DOES NOT HELP because bumpleak/bumproot are **slower** on bigger heaps.
51# There's less cache locality!
52#
53
54cpython-configure-tasks() {
55 local do_souffle=${1:-}
56
57 local -a variants=( opt+bumpleak opt+bumproot opt )
58 for v in ${variants[@]}; do
59 echo "${v}${TAB}_bin/cxx-$v/osh"
60 done
61
62 if test -n "$do_souffle"; then
63 echo "opt${TAB}_bin/cxx-opt/mycpp-souffle/osh"
64 fi
65}
66
67cpython-setup() {
68 cpython-configure-tasks "$@" | while read -r _ osh; do
69 ninja $osh
70 done
71}
72
73measure-alloc-overhead() {
74 local base_dir=$BASE_DIR/alloc-overhead
75 rm -r -f -v $base_dir
76
77 cpython-configure-tasks | while read -r variant osh; do
78 osh=$REPO_ROOT/$osh
79
80 local task_dir=$base_dir/$variant
81
82 mkdir -p $task_dir
83 pushd $task_dir > /dev/null
84
85 local -a flags=(
86 --output "$base_dir/$variant.tsv"
87 --rusage
88 )
89
90 local -a time_argv
91
92 time_argv=(
93 time-tsv --print-header
94 "${flags[@]}"
95 --field variant
96 )
97 "${time_argv[@]}"
98
99 time_argv=(
100 time-tsv --append
101 "${flags[@]}"
102 --field "$variant"
103 -- $osh $PY_CONF
104 )
105
106 #echo "${time_argv[@]}"
107 "${time_argv[@]}"
108
109 popd > /dev/null
110
111 done
112}
113
114#
115# Compare bash/dash/osh locally
116#
117
118measure-rusage() {
119 local base_dir=$BASE_DIR/rusage
120 rm -r -f -v $base_dir
121
122 measure-times-tasks | while read -r sh_label sh_path config_script; do
123
124 local task_dir=$base_dir/$sh_label
125
126 mkdir -p $task_dir
127 pushd $task_dir > /dev/null
128
129 local -a flags=(
130 --output "$base_dir/$sh_label.tsv"
131 --rusage
132 )
133
134 local -a time_argv
135
136 time_argv=(
137 time-tsv --print-header
138 "${flags[@]}"
139 --field sh_label
140 )
141 "${time_argv[@]}"
142
143 time_argv=(
144 time-tsv --append
145 "${flags[@]}"
146 --field "$sh_label"
147 -- $sh_path $config_script
148 )
149
150 #echo "${time_argv[@]}"
151 "${time_argv[@]}"
152
153 popd > /dev/null
154
155 done
156}
157
158#
159# Now try strace
160#
161
162measure-times-tasks() {
163 local do_souffle=${1:-}
164
165 for script in $PY_CONF $BASE_DIR/util-linux-2.40/configure; do
166 #for script in $BASE_DIR/util-linux-2.40/configure; do
167 echo "bash${TAB}bash${TAB}$script"
168 echo "dash${TAB}dash${TAB}$script"
169 echo "osh${TAB}$REPO_ROOT/_bin/cxx-opt/osh${TAB}$script"
170
171 if test -n "$do_souffle"; then
172 echo "osh-souffle${TAB}$REPO_ROOT/_bin/cxx-opt/mycpp-souffle/osh${TAB}$script"
173 fi
174 done
175}
176
177measure-syscalls() {
178 local osh=_bin/cxx-opt/osh
179 #local osh=_bin/cxx-dbg/osh
180
181 ninja $osh
182
183 local base_dir=$BASE_DIR/syscalls
184
185 rm -r -f -v $base_dir
186
187 measure-times-tasks | while read -r sh_label sh_path config_script; do
188 local dir=$base_dir/$sh_label
189 mkdir -p $dir
190
191 local counts_dir=$base_dir/$sh_label
192 mkdir -p $counts_dir
193
194 pushd $dir
195 #strace -o $counts -c $sh_path $PY_CONF
196 # See how many external processes are started?
197 #strace -o $counts -ff -e execve $sh_path $PY_CONF
198 strace -o $counts_dir/syscalls -ff $sh_path $config_script
199 popd
200 done
201}
202
203# --- _tmp/autoconf/bash
204# 6047
205# 4621
206# --- _tmp/autoconf/dash
207# 6088
208# 4627
209# --- _tmp/autoconf/osh
210# 5691
211# 4631
212#
213# Woah we start fewer processes! But are not faster?
214
215grep-exec() {
216 # --max-count 1 - only report one execve per process
217 egrep --no-filename --max-count 1 -o 'execve\("[^"]+' "$@"
218}
219
220# andy@hoover:~/git/oils-for-unix/oils$ benchmarks/autoconf.sh report-syscalls
221# --- _tmp/autoconf/syscalls/bash
222# 2592 _tmp/autoconf/syscalls/bash/syscalls.903220
223# 2608 _tmp/autoconf/syscalls/bash/syscalls.898727
224# 2632 _tmp/autoconf/syscalls/bash/syscalls.898387
225# 2679 _tmp/autoconf/syscalls/bash/syscalls.898292
226# 2853 _tmp/autoconf/syscalls/bash/syscalls.898927
227# 2873 _tmp/autoconf/syscalls/bash/syscalls.898334
228# 2920 _tmp/autoconf/syscalls/bash/syscalls.898895
229# 3204 _tmp/autoconf/syscalls/bash/syscalls.898664
230# 112549 _tmp/autoconf/syscalls/bash/syscalls.897471
231# 1360223 total
232#
233# --- _tmp/autoconf/syscalls/dash
234# 2592 _tmp/autoconf/syscalls/dash/syscalls.909344
235# 2607 _tmp/autoconf/syscalls/dash/syscalls.904921
236# 2630 _tmp/autoconf/syscalls/dash/syscalls.904581
237# 2683 _tmp/autoconf/syscalls/dash/syscalls.904486
238# 2851 _tmp/autoconf/syscalls/dash/syscalls.905109
239# 2873 _tmp/autoconf/syscalls/dash/syscalls.904528
240# 2920 _tmp/autoconf/syscalls/dash/syscalls.905088
241# 3204 _tmp/autoconf/syscalls/dash/syscalls.904858
242# 112922 _tmp/autoconf/syscalls/dash/syscalls.903626
243# 1372118 total
244#
245# --- _tmp/autoconf/syscalls/osh
246# 2592 _tmp/autoconf/syscalls/osh/syscalls.915226
247# 2607 _tmp/autoconf/syscalls/osh/syscalls.910993
248# 2630 _tmp/autoconf/syscalls/osh/syscalls.910647
249# 2679 _tmp/autoconf/syscalls/osh/syscalls.910561
250# 2851 _tmp/autoconf/syscalls/osh/syscalls.911162
251# 2873 _tmp/autoconf/syscalls/osh/syscalls.910599
252# 2920 _tmp/autoconf/syscalls/osh/syscalls.911143
253# 3204 _tmp/autoconf/syscalls/osh/syscalls.910936
254# 72921 _tmp/autoconf/syscalls/osh/syscalls.909769
255# 1211074 total
256
257report-processes() {
258 for sh_dir in $BASE_DIR_RELATIVE/syscalls/*; do
259 echo "--- $sh_dir"
260 ls $sh_dir/* | wc -l
261 grep-exec $sh_dir/syscalls.* | wc -l
262 echo
263
264 done
265}
266
267report-external() {
268 local n=${1:-5}
269
270 for sh_dir in $BASE_DIR_RELATIVE/syscalls/*; do
271 echo "--- $sh_dir"
272
273 grep-exec $sh_dir/syscalls.* | sort | uniq -c | sort -n | tail -n $n
274 echo
275 done
276}
277
278report-syscalls() {
279 # Hm this is instructive, the shell itself makes the most syscalls
280 # And fewer than other shells?
281
282 for sh_dir in $BASE_DIR_RELATIVE/syscalls/*; do
283 echo "--- $sh_dir"
284 wc -l $sh_dir/syscalls.* | sort -n | tail
285 echo
286 done
287}
288
289#
290# Cachegrind
291#
292
293measure-valgrind() {
294 local tool=$1
295
296 # opt seems to give OK results, but I thought dbg was more accurate
297 #local osh=_bin/cxx-opt/osh
298 local osh=_bin/cxx-dbg/osh
299
300 ninja $osh
301
302 local osh=$REPO_ROOT/$osh
303
304 local base_dir=$REPO_ROOT/_tmp/$tool
305
306 local dir=$base_dir/cpython-configure
307 rm -r -f -v $dir
308
309 local out_file=$base_dir/cpython-configure.txt
310
311 mkdir -v -p $dir
312
313 pushd $dir
314 $tool $out_file $osh $PY_CONF
315 popd
316}
317
318measure-cachegrind() {
319 measure-valgrind with-cachegrind
320}
321
322measure-callgrind() {
323 # This takes ~5 minutes with opt binary, ~6:43 with dbg
324 # vs ~15 seconds uninstrumented
325 time measure-valgrind with-callgrind
326}
327
328# Note:
329# benchmarks/osh-runtime.sh compares our release, which does not have #ifdef
330# GC_TIMING, so we don't know total GC time.
331
332# TODO:
333#
334# - Run locally, reproduce GC_TIMING - this is not in the release build
335# - it seems to say only 143 ms total GC time, but we're seeing 1.5+ seconds
336# slowdown on Cpython configure vs. bash
337# - I want a local run that automates it, and returns PERCENTAGES for elapsed
338# time, sys time, user time
339# - We also might not want to amortize free() inside Allocate()
340# - #ifdef LAZY_FREE I think! That might show a big slowdown with free
341
342patch-pyconf() {
343 #sed -i $'s/ac_compile=\'$CC/ac_compile=\'times; $CC/g' $PY_CONF
344
345 # temporary
346 echo 'times > $SH_BENCHMARK_TIMES' >> $PY_CONF
347}
348
349measure-times() {
350 local do_souffle=${1:-}
351 local gc_stats=${2:-}
352
353 local osh=_bin/cxx-opt/osh
354 ninja $osh
355
356 local base_dir=$BASE_DIR/times
357 rm -r -f -v $base_dir
358
359 local trace_dir=$base_dir/oils-trace
360 mkdir -p $trace_dir
361
362 local task_id=0
363 measure-times-tasks "$do_souffle" | while read -r sh_label sh_path config_script; do
364 #case $sh_label in bash|dash) continue ;; esac
365
366 local dir=$base_dir/$task_id
367 mkdir -p $dir
368
369 pushd $dir
370
371 local -a flags=(
372 --output "$dir/times.tsv"
373 --rusage
374 )
375
376 local -a time_argv
377
378 time_argv=(
379 time-tsv --print-header
380 "${flags[@]}"
381 --field sh_label
382 --field config_script
383 )
384 "${time_argv[@]}"
385
386 # Try to make it more stable by running on CPU 1
387 local -a argv=( taskset -c 1 $sh_path $config_script )
388
389 time_argv=(
390 time-tsv --append
391 "${flags[@]}"
392 --field "$sh_label"
393 --field "$config_script"
394 -- "${argv[@]}"
395 )
396
397 #echo "${time_argv[@]}"
398
399 # 1269 argv0.json files created
400 # we can miss some via NOLASTFORK optimization
401 #OILS_TRACE_DIR=$trace_dir \
402
403 if test -n "$gc_stats"; then
404 _OILS_GC_VERBOSE=1 OILS_GC_STATS_FD=99 \
405 SH_BENCHMARK_TIMES=$base_dir/$sh_label.times.txt \
406 "${time_argv[@]}" \
407 99>$base_dir/$sh_label.gc-stats.txt
408 else
409 # Not patched
410 "${time_argv[@]}"
411 fi
412
413 popd
414
415 task_id=$(( task_id + 1 ))
416 done
417}
418
419inner-long-tsv() {
420 python2 -c '
421import os, re, sys
422
423def PrintRow(row):
424 print("\t".join(row))
425
426PrintRow(["shell", "who", "what", "seconds"])
427
428for path in sys.argv[1:]:
429 filename = os.path.basename(path)
430 shell = filename.split(".")[0]
431
432 f = open(path)
433 s = f.read()
434
435 secs = re.findall("0m([0-9.]+)s", s)
436 assert len(secs) == 4, secs
437
438 PrintRow([shell, "self", "user", secs[0]])
439 PrintRow([shell, "self", "sys", secs[1]])
440 PrintRow([shell, "child", "user", secs[2]])
441 PrintRow([shell, "child", "sys", secs[3]])
442
443 # Non-normalized, but OK
444 total_secs = sum(float(s) for s in secs)
445 PrintRow([shell, "both", "both", str(total_secs)])
446
447 ' $BASE_DIR/times/*.times.txt
448}
449
450compare-dim() {
451 # 8% more child system time
452 local who=${1:-child}
453 local what=${2:-user}
454
455 echo "=== $who $what ==="
456
457 # Annoying
458 # https://www.math.utah.edu/docs/info/gawk_8.html
459 # "If, for some reason, you need to force a number to be converted to a
460 # string, concatenate the empty string, "", with that number. To force a
461 # string to be converted to a number, add zero to that string."
462
463 cat $BASE_DIR/times-long.tsv | awk -v "who=$who" -v "what=$what" '
464 BEGIN {
465 TAB = "\t"
466
467 i = 0
468
469 printf "%s\t%s\t%s\t%s\n", "shell", "secs", "ratio", "diff secs"
470 }
471 $2 == who && $3 == what {
472 if (i == 0) {
473 first_secs = $4 + 0
474 }
475 i++
476
477 secs = $4 + 0
478 ratio = secs / first_secs
479 diff = secs - first_secs
480
481 # Need commas for OFMT to work correctly?
482 printf "%s\t%5.3f\t%5.3f\t%5.3f\n", $1, secs, ratio, diff
483 }
484 '
485
486 echo
487}
488
489compare-times() {
490 log "INNER"
491 log ''
492
493 compare-dim self user
494
495 compare-dim self sys
496
497 compare-dim child user
498
499 compare-dim child sys
500
501 compare-dim both both
502
503 # outer
504 log "OUTER"
505 log ''
506
507 compare-dim both elapsed
508
509 # These kinda match
510 return
511 compare-dim both user
512 compare-dim both sys
513}
514
515outer-long-tsv() {
516 log "=== outer times ==="
517 awk '
518 BEGIN {
519 i = 0
520
521 printf "%s\t%s\t%s\t%s\n", "shell", "who", "what", "seconds"
522 }
523 i == 0 {
524 #print "Skipping header"
525 i++
526 next
527 }
528 i >= 1 {
529 elapsed = $2 + 0
530 user = $3 + 0
531 sys = $4 + 0
532 sh_label = $6
533
534 printf "%s\t%s\t%s\t%5.3f\n", sh_label, "both", "elapsed", elapsed
535 printf "%s\t%s\t%s\t%5.3f\n", sh_label, "both", "user", user
536 printf "%s\t%s\t%s\t%5.3f\n", sh_label, "both", "sys", sys
537
538 i++
539 }
540 ' $BASE_DIR/outer-wide.tsv
541}
542
543report-times() {
544 head $BASE_DIR/times/*/times.tsv
545 echo
546}
547
548report-times-patched() {
549 head $BASE_DIR/times/*.tsv
550 echo
551
552 # 2025-06: this requires patch-pyconf
553 head $BASE_DIR/times/*.times.txt
554 echo
555
556 inner-long-tsv | tee $BASE_DIR/inner-long.tsv
557 echo
558
559 if false; then
560 tsv-concat $BASE_DIR/times/*.tsv | tee $BASE_DIR/outer-wide.tsv
561 outer-long-tsv | tee $BASE_DIR/outer-long.tsv
562 echo
563 tsv-concat $BASE_DIR/{inner,outer}-long.tsv | tee $BASE_DIR/times-long.tsv
564 fi
565
566 compare-times
567}
568
569### Why is clone() taking longer according to strace?
570
571fork-tasks() {
572 echo "bash${TAB}bash"
573 echo "dash${TAB}dash"
574
575 # Hm this is noisy, but cxx-opt-sh does seem slower
576 echo "osh${TAB}$REPO_ROOT/_bin/cxx-opt/osh"
577 echo "osh${TAB}$REPO_ROOT/_bin/cxx-opt-sh/osh"
578 echo "osh${TAB}$REPO_ROOT/_bin/cxx-opt/mycpp-souffle/osh"
579 echo "osh${TAB}$REPO_ROOT/_bin/cxx-opt-sh/mycpp-souffle/osh"
580}
581
582measure-fork() {
583 fork-tasks | while read -r sh_label sh_path; do
584 #case $sh_label in bash|dash) continue ;; esac
585
586 echo "=== $sh_path ==="
587
588 # Builtin is very fast
589 #time $sh_path -c 'for i in $(seq 100); do true; done'
590
591 # Hm this is very noisy
592 # TODO use hyperfine?
593 time $sh_path -c 'for i in $(seq 100); do /bin/true; done'
594
595 case $sh_label in
596 osh)
597 # Oops, we are not symlinking to the .stripped binary!
598 # This is explicitly done for symbols and benchmarking.
599 # Hm does that make it slower then?
600
601 ls -l -L $sh_path
602 ldd $sh_path
603 ;;
604 esac
605 done
606}
607
608# $ head _tmp/elapsed/*.times.txt
609# ==> _tmp/elapsed/bash.times.txt <==
610# 0m0.213s 0m0.477s
611# 0m8.233s 0m2.931s
612#
613# ==> _tmp/elapsed/dash.times.txt <==
614# 0m0.217s 0m0.463s
615# 0m8.281s 0m2.922s
616#
617# ==> _tmp/elapsed/osh.times.txt <==
618# 0m0.360s 0m0.720s
619# 0m8.790s 0m2.960s
620
621# shell user time - GC and allocs
622# shell system time - ???
623# child user time - ???
624# TODO: count how many processes this is.
625# It's more than 500 ms
626# Is that 500 processes, and 1 ms per process?
627
628fork-time() {
629 local osh=_bin/cxx-opt/osh
630
631 $osh -c 'time for i in {1..1000}; do true; done'
632 echo
633
634 $osh -c 'time for i in {1..1000}; do ( true ); done'
635 echo
636
637 # Does this increase fork time or no?
638 # Hm I can only get the forks up to 306ms for 1000, or 300 us
639 # But the HereDocWriter does dup() and so forth
640 $osh -c '
641echo ysh-parse
642time for i in {1..40}; do
643 . test/ysh-parse-errors.sh
644done
645times
646time for i in {1..1000}; do
647 ( true )
648done'
649 echo
650}
651
652"$@"