| 1 | #!/usr/bin/env bash
|
| 2 | #
|
| 3 | # Test scripts found in the wild for both correctness and performance.
|
| 4 | #
|
| 5 | # Usage:
|
| 6 | # benchmarks/osh-runtime.sh <function name>
|
| 7 |
|
| 8 | set -o nounset
|
| 9 | set -o pipefail
|
| 10 | set -o errexit
|
| 11 |
|
| 12 | REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
|
| 13 |
|
| 14 | source benchmarks/common.sh # tsv-concat
|
| 15 | source benchmarks/id.sh # print-job-id
|
| 16 | source test/common.sh
|
| 17 | source test/tsv-lib.sh # tsv-row
|
| 18 |
|
| 19 | readonly BASE_DIR=_tmp/osh-runtime
|
| 20 |
|
| 21 | # TODO: Move to ../oil_DEPS
|
| 22 | readonly TAR_DIR=$PWD/_deps/osh-runtime # Make it absolute
|
| 23 |
|
| 24 | #
|
| 25 | # Dependencies
|
| 26 | #
|
| 27 |
|
| 28 | readonly PY27_DIR=$PWD/Python-2.7.13
|
| 29 |
|
| 30 | # NOTE: Same list in oilshell.org/blob/run.sh.
|
| 31 | tarballs() {
|
| 32 | cat <<EOF
|
| 33 | tcc-0.9.26.tar.bz2
|
| 34 | yash-2.46.tar.xz
|
| 35 | ocaml-4.06.0.tar.xz
|
| 36 | util-linux-2.40.tar.xz
|
| 37 | EOF
|
| 38 | }
|
| 39 |
|
| 40 | download() {
|
| 41 | mkdir -p $TAR_DIR
|
| 42 | tarballs | xargs -n 1 -I {} --verbose -- \
|
| 43 | wget --no-clobber --directory $TAR_DIR 'https://www.oilshell.org/blob/testdata/{}'
|
| 44 | }
|
| 45 |
|
| 46 | extract() {
|
| 47 | set -x
|
| 48 | time for f in $TAR_DIR/*.{bz2,xz}; do
|
| 49 | tar -x --directory $TAR_DIR --file $f
|
| 50 | done
|
| 51 | set +x
|
| 52 |
|
| 53 | ls -l $TAR_DIR
|
| 54 | }
|
| 55 |
|
| 56 | #
|
| 57 | # Computation
|
| 58 | #
|
| 59 |
|
| 60 | run-tasks() {
|
| 61 | local raw_out_dir=$1
|
| 62 | raw_out_dir="$PWD/$raw_out_dir" # because we change dirs
|
| 63 |
|
| 64 | # Bug fix for dynamic scoping!
|
| 65 | local host_name sh_path workload
|
| 66 |
|
| 67 | local task_id=0
|
| 68 | while read -r host_name sh_path workload; do
|
| 69 |
|
| 70 | log "*** $host_name $sh_path $workload $task_id"
|
| 71 |
|
| 72 | local sh_run_path
|
| 73 | case $sh_path in
|
| 74 | /*) # Already absolute
|
| 75 | sh_run_path=$sh_path
|
| 76 | ;;
|
| 77 | */*) # It's relative, so make it absolute
|
| 78 | sh_run_path=$PWD/$sh_path
|
| 79 | ;;
|
| 80 | *) # 'dash' should remain 'dash'
|
| 81 | sh_run_path=$sh_path
|
| 82 | ;;
|
| 83 | esac
|
| 84 |
|
| 85 | local working_dir=''
|
| 86 | local files_out_dir="$raw_out_dir/files-$task_id"
|
| 87 | mkdir -v -p $files_out_dir
|
| 88 |
|
| 89 | local save_new_files=''
|
| 90 |
|
| 91 | local -a argv
|
| 92 | case $workload in
|
| 93 | hello-world)
|
| 94 | argv=( testdata/osh-runtime/hello_world.sh )
|
| 95 | ;;
|
| 96 |
|
| 97 | bin-true)
|
| 98 | argv=( testdata/osh-runtime/bin_true.sh )
|
| 99 | ;;
|
| 100 |
|
| 101 | abuild-print-help)
|
| 102 | argv=( testdata/osh-runtime/abuild -h )
|
| 103 | ;;
|
| 104 |
|
| 105 | configure.cpython)
|
| 106 | argv=( $PY27_DIR/configure )
|
| 107 | working_dir=$files_out_dir
|
| 108 | ;;
|
| 109 |
|
| 110 | configure.util-linux)
|
| 111 | # flag needed to avoid sqlite3 dep error message
|
| 112 | argv=( $TAR_DIR/util-linux-2.40/configure --disable-liblastlog2 )
|
| 113 | working_dir=$files_out_dir
|
| 114 | ;;
|
| 115 |
|
| 116 | configure.*)
|
| 117 | argv=( ./configure )
|
| 118 |
|
| 119 | local conf_dir
|
| 120 | case $workload in
|
| 121 | *.ocaml)
|
| 122 | conf_dir='ocaml-4.06.0'
|
| 123 | ;;
|
| 124 | *.tcc)
|
| 125 | conf_dir='tcc-0.9.26'
|
| 126 | ;;
|
| 127 | *.yash)
|
| 128 | conf_dir='yash-2.46'
|
| 129 | ;;
|
| 130 | *)
|
| 131 | die "Invalid workload $workload"
|
| 132 | esac
|
| 133 |
|
| 134 | # These are run in-tree?
|
| 135 | working_dir=$TAR_DIR/$conf_dir
|
| 136 | ;;
|
| 137 |
|
| 138 | *)
|
| 139 | die "Invalid workload $workload"
|
| 140 | ;;
|
| 141 | esac
|
| 142 |
|
| 143 | local -a time_argv=(
|
| 144 | time-tsv
|
| 145 | --output "$raw_out_dir/times.tsv" --append
|
| 146 | --rusage
|
| 147 | --rusage-2
|
| 148 | --field "$task_id"
|
| 149 | --field "$host_name" --field "$sh_path"
|
| 150 | --field "$workload"
|
| 151 | -- "$sh_run_path" "${argv[@]}"
|
| 152 | )
|
| 153 |
|
| 154 | local stdout_file="$files_out_dir/STDOUT.txt"
|
| 155 | local gc_stats_file="$raw_out_dir/gc-$task_id.txt"
|
| 156 |
|
| 157 | # Maybe change dirs
|
| 158 | if test -n "$working_dir"; then
|
| 159 | pushd "$working_dir"
|
| 160 | fi
|
| 161 |
|
| 162 | if test -n "$save_new_files"; then
|
| 163 | touch __TIMESTAMP
|
| 164 | fi
|
| 165 |
|
| 166 | # Run it, possibly with GC stats
|
| 167 | case $sh_path in
|
| 168 | *_bin/*/osh)
|
| 169 | OILS_GC_STATS_FD=99 "${time_argv[@]}" > $stdout_file 99> $gc_stats_file
|
| 170 | ;;
|
| 171 | *)
|
| 172 | "${time_argv[@]}" > $stdout_file
|
| 173 | ;;
|
| 174 | esac
|
| 175 |
|
| 176 | if test -n "$save_new_files"; then
|
| 177 | echo "COPYING to $files_out_dir"
|
| 178 | find . -type f -newer __TIMESTAMP \
|
| 179 | | xargs -I {} -- cp --verbose {} $files_out_dir
|
| 180 | fi
|
| 181 |
|
| 182 | # Restore dir
|
| 183 | if test -n "$working_dir"; then
|
| 184 | popd
|
| 185 | fi
|
| 186 |
|
| 187 | task_id=$((task_id + 1))
|
| 188 | done
|
| 189 | }
|
| 190 |
|
| 191 | # Sorted by priority for test-oils.sh osh-runtime --num-shells 3
|
| 192 |
|
| 193 | readonly -a ALL_WORKLOADS=(
|
| 194 | hello-world
|
| 195 | bin-true
|
| 196 |
|
| 197 | configure.cpython
|
| 198 | configure.util-linux
|
| 199 | configure.ocaml
|
| 200 | configure.tcc
|
| 201 | configure.yash
|
| 202 |
|
| 203 | abuild-print-help
|
| 204 | )
|
| 205 |
|
| 206 | print-workloads() {
|
| 207 | ### for help
|
| 208 |
|
| 209 | for w in "${ALL_WORKLOADS[@]}"; do
|
| 210 | echo " $w"
|
| 211 | done
|
| 212 | }
|
| 213 |
|
| 214 | print-tasks() {
|
| 215 | local host_name=$1
|
| 216 | local osh_native=$2
|
| 217 |
|
| 218 | if test -n "${QUICKLY:-}"; then
|
| 219 | workloads=(
|
| 220 | hello-world
|
| 221 | bin-true
|
| 222 | #configure.util-linux
|
| 223 | #abuild-print-help
|
| 224 | )
|
| 225 | else
|
| 226 | workloads=( "${ALL_WORKLOADS[@]}" )
|
| 227 | fi
|
| 228 |
|
| 229 | for sh_path in bash dash bin/osh $osh_native; do
|
| 230 | for workload in "${workloads[@]}"; do
|
| 231 | tsv-row $host_name $sh_path $workload
|
| 232 | done
|
| 233 | done
|
| 234 | }
|
| 235 |
|
| 236 | print-tasks-xshar() {
|
| 237 | local host_name=$1
|
| 238 | local osh_native=$2
|
| 239 |
|
| 240 | local num_iters=${3:-1}
|
| 241 | local num_shells=${4:-1}
|
| 242 | local num_workloads=${5:-1}
|
| 243 |
|
| 244 | for i in $(seq $num_iters); do
|
| 245 |
|
| 246 | local s=0
|
| 247 | for sh_path in $osh_native bash dash; do
|
| 248 |
|
| 249 | local w=0
|
| 250 | for workload in "${ALL_WORKLOADS[@]}"; do
|
| 251 | tsv-row $host_name $sh_path $workload
|
| 252 |
|
| 253 | w=$(( w + 1 )) # cut off at specified workloads
|
| 254 | if test $w -eq $num_workloads; then
|
| 255 | break
|
| 256 | fi
|
| 257 | done
|
| 258 |
|
| 259 | s=$(( s + 1 )) # cut off as specified shells
|
| 260 | if test $s -eq $num_shells; then
|
| 261 | break
|
| 262 | fi
|
| 263 |
|
| 264 | done
|
| 265 | done
|
| 266 | }
|
| 267 |
|
| 268 | test-print-tasks-xshar() {
|
| 269 | print-tasks-xshar $(hostname) osh 1 1 1
|
| 270 | echo
|
| 271 | print-tasks-xshar $(hostname) osh 1 2 1
|
| 272 | echo
|
| 273 | print-tasks-xshar $(hostname) osh 1 2 2
|
| 274 | echo
|
| 275 | print-tasks-xshar $(hostname) osh 1 2 3
|
| 276 | echo
|
| 277 | }
|
| 278 |
|
| 279 | run-tasks-wrapper() {
|
| 280 | ### reads tasks from stdin
|
| 281 |
|
| 282 | local host_name=$1 # 'no-host' or 'lenny'
|
| 283 | local raw_out_dir=$2
|
| 284 |
|
| 285 | mkdir -v -p $raw_out_dir
|
| 286 |
|
| 287 | local tsv_out="$raw_out_dir/times.tsv"
|
| 288 |
|
| 289 | # Write header of the TSV file that is appended to.
|
| 290 | time-tsv -o $tsv_out --print-header \
|
| 291 | --rusage \
|
| 292 | --rusage-2 \
|
| 293 | --field task_id \
|
| 294 | --field host_name --field sh_path \
|
| 295 | --field workload
|
| 296 |
|
| 297 | # reads tasks from stdin
|
| 298 | # run-tasks outputs 3 things: raw times.tsv, per-task STDOUT and files, and
|
| 299 | # per-task GC stats
|
| 300 | run-tasks $raw_out_dir
|
| 301 |
|
| 302 | # Turn individual files into a TSV, adding host
|
| 303 | benchmarks/gc_stats_to_tsv.py $raw_out_dir/gc-*.txt \
|
| 304 | | tsv-add-const-column host_name "$host_name" \
|
| 305 | > $raw_out_dir/gc_stats.tsv
|
| 306 |
|
| 307 | cp -v _tmp/provenance.tsv $raw_out_dir
|
| 308 | }
|
| 309 |
|
| 310 | measure() {
|
| 311 | ### For release and CI
|
| 312 | local host_name=$1 # 'no-host' or 'lenny'
|
| 313 | local raw_out_dir=$2 # _tmp/osh-runtime/$X or ../../benchmark-data/osh-runtime/$X
|
| 314 | local osh_native=$3 # $OSH_CPP_NINJA_BUILD or $OSH_CPP_BENCHMARK_DATA
|
| 315 |
|
| 316 | print-tasks "$host_name" "$osh_native" \
|
| 317 | | run-tasks-wrapper "$host_name" "$raw_out_dir"
|
| 318 | }
|
| 319 |
|
| 320 | stage1() {
|
| 321 | local base_dir=${1:-$BASE_DIR} # _tmp/osh-runtime or ../benchmark-data/osh-runtime
|
| 322 | local single_machine=${2:-}
|
| 323 |
|
| 324 | local out_dir=$BASE_DIR/stage1 # _tmp/osh-runtime
|
| 325 | mkdir -p $out_dir
|
| 326 |
|
| 327 | local -a raw_times=()
|
| 328 | local -a raw_gc_stats=()
|
| 329 | local -a raw_provenance=()
|
| 330 |
|
| 331 | if test -n "$single_machine"; then
|
| 332 | # find dir in _tmp/osh-runtime
|
| 333 | local -a a=( $base_dir/raw.$single_machine.* )
|
| 334 |
|
| 335 | raw_times+=( ${a[-1]}/times.tsv )
|
| 336 | raw_gc_stats+=( ${a[-1]}/gc_stats.tsv )
|
| 337 | raw_provenance+=( ${a[-1]}/provenance.tsv )
|
| 338 |
|
| 339 | else
|
| 340 | # find last dirs in ../benchmark-data/osh-runtime
|
| 341 | # Globs are in lexicographical order, which works for our dates.
|
| 342 | local -a a=( $base_dir/raw.$MACHINE1.* )
|
| 343 | local -a b=( $base_dir/raw.$MACHINE2.* )
|
| 344 |
|
| 345 | raw_times+=( ${a[-1]}/times.tsv ${b[-1]}/times.tsv )
|
| 346 | raw_gc_stats+=( ${a[-1]}/gc_stats.tsv ${b[-1]}/gc_stats.tsv )
|
| 347 | raw_provenance+=( ${a[-1]}/provenance.tsv ${b[-1]}/provenance.tsv )
|
| 348 | fi
|
| 349 |
|
| 350 | tsv-concat "${raw_times[@]}" > $out_dir/times.tsv
|
| 351 |
|
| 352 | tsv-concat "${raw_gc_stats[@]}" > $out_dir/gc_stats.tsv
|
| 353 |
|
| 354 | tsv-concat "${raw_provenance[@]}" > $out_dir/provenance.tsv
|
| 355 | }
|
| 356 |
|
| 357 | print-report() {
|
| 358 | local in_dir=$1
|
| 359 |
|
| 360 | benchmark-html-head 'OSH Runtime Performance'
|
| 361 |
|
| 362 | cat <<EOF
|
| 363 | <body class="width60">
|
| 364 | <p id="home-link">
|
| 365 | <a href="/">oilshell.org</a>
|
| 366 | </p>
|
| 367 | EOF
|
| 368 |
|
| 369 | cmark <<'EOF'
|
| 370 | ## OSH Runtime Performance
|
| 371 |
|
| 372 | Source code: [benchmarks/osh-runtime.sh](https://github.com/oilshell/oil/tree/master/benchmarks/osh-runtime.sh)
|
| 373 |
|
| 374 | - [Elapsed Time](#elapsed-time)
|
| 375 | - [Minor Page Faults](#page-faults)
|
| 376 | - [Memory Usage](#memory-usage)
|
| 377 | - [GC Stats](#gc-stats)
|
| 378 | - [rusage Details](#rusage-details)
|
| 379 | - [More Details](#more-details)
|
| 380 | - [Shell and Host](#shell-and-host)
|
| 381 |
|
| 382 | [Raw files](-wwz-index)
|
| 383 |
|
| 384 | <a name="elapsed-time" />
|
| 385 |
|
| 386 | ### Elapsed Time by Shell (milliseconds)
|
| 387 |
|
| 388 | Some benchmarks call many external tools, while some exercise the shell
|
| 389 | interpreter itself.
|
| 390 | EOF
|
| 391 | tsv2html $in_dir/elapsed.tsv
|
| 392 |
|
| 393 | cmark <<EOF
|
| 394 | <a name="page-faults" />
|
| 395 |
|
| 396 | ### Minor Page Faults
|
| 397 | EOF
|
| 398 |
|
| 399 | tsv2html $in_dir/page_faults.tsv
|
| 400 |
|
| 401 | cmark <<EOF
|
| 402 | <a name="memory-usage" />
|
| 403 |
|
| 404 | ### Memory Usage (Max Resident Set Size in MB)
|
| 405 |
|
| 406 | Memory usage is measured in MB (powers of 10), not MiB (powers of 2).
|
| 407 | EOF
|
| 408 | tsv2html $in_dir/max_rss.tsv
|
| 409 |
|
| 410 | cmark <<EOF
|
| 411 | <a name="gc-stats" />
|
| 412 |
|
| 413 | ### GC Stats
|
| 414 | EOF
|
| 415 | tsv2html $in_dir/gc_stats.tsv
|
| 416 |
|
| 417 | cmark <<EOF
|
| 418 | <a name="rusage-details" />
|
| 419 |
|
| 420 | ### rusage Details
|
| 421 | EOF
|
| 422 | tsv2html $in_dir/details.tsv
|
| 423 |
|
| 424 | cmark <<EOF
|
| 425 | <a name="more-details" />
|
| 426 |
|
| 427 | ### More Details
|
| 428 | EOF
|
| 429 | tsv2html $in_dir/details_io.tsv
|
| 430 |
|
| 431 | cmark <<'EOF'
|
| 432 | <a name="shell-and-host" />
|
| 433 |
|
| 434 | ### Shell and Host
|
| 435 | EOF
|
| 436 | tsv2html $in_dir/shells.tsv
|
| 437 | tsv2html $in_dir/hosts.tsv
|
| 438 |
|
| 439 | cmark <<'EOF'
|
| 440 |
|
| 441 | </body>
|
| 442 | </html>
|
| 443 | EOF
|
| 444 | }
|
| 445 |
|
| 446 | test-oils-run() {
|
| 447 | local osh=$1
|
| 448 | local job_id=$2
|
| 449 | local host_name=$3
|
| 450 |
|
| 451 | # flags passed by caller
|
| 452 | local num_iters=${4:-1}
|
| 453 | local num_shells=${5:-1}
|
| 454 | local num_workloads=${6:-1}
|
| 455 |
|
| 456 | local time_py=${XSHAR_DIR:-$REPO_ROOT}/benchmarks/time_.py
|
| 457 | $time_py --tsv --rusage -- \
|
| 458 | $osh -c 'echo "smoke test: hi from benchmarks/osh-runtime.sh"'
|
| 459 |
|
| 460 | # Fresh build
|
| 461 | rm -r -f -v $BASE_DIR _tmp/{shell,host}-id
|
| 462 |
|
| 463 | # Write _tmp/provenance.* and _tmp/{host,shell}-id
|
| 464 | shell-provenance-2 \
|
| 465 | $host_name $job_id _tmp \
|
| 466 | bash dash $osh
|
| 467 |
|
| 468 | # e.g. 2024-05-01__10-11-12.ci-vm-name
|
| 469 | local raw_out_dir="$BASE_DIR/raw"
|
| 470 | mkdir -p $raw_out_dir
|
| 471 |
|
| 472 | # Similar to 'measure', for soil-run and release
|
| 473 | print-tasks-xshar $host_name $osh \
|
| 474 | $num_iters $num_shells $num_workloads \
|
| 475 | | tee $BASE_DIR/tasks.txt
|
| 476 |
|
| 477 | run-tasks-wrapper $host_name $raw_out_dir < $BASE_DIR/tasks.txt
|
| 478 | echo
|
| 479 |
|
| 480 | # Note: 'stage1' in soil-run is a trivial concatenation, so we can create input for
|
| 481 | # benchmarks/report.R. We don't need that here
|
| 482 | }
|
| 483 |
|
| 484 | soil-run() {
|
| 485 | ### Run it on just this machine, and make a report
|
| 486 |
|
| 487 | rm -r -f $BASE_DIR
|
| 488 | mkdir -p $BASE_DIR
|
| 489 |
|
| 490 | # TODO: This testdata should be baked into Docker image, or mounted
|
| 491 | download
|
| 492 | extract
|
| 493 |
|
| 494 | # could add _bin/cxx-bumpleak/oils-for-unix, although sometimes it's slower
|
| 495 | local -a osh_bin=( $OSH_CPP_NINJA_BUILD )
|
| 496 | ninja "${osh_bin[@]}"
|
| 497 |
|
| 498 | local single_machine='no-host'
|
| 499 |
|
| 500 | local job_id
|
| 501 | job_id=$(print-job-id)
|
| 502 |
|
| 503 | # Write _tmp/provenance.* and _tmp/{host,shell}-id
|
| 504 | shell-provenance-2 \
|
| 505 | $single_machine $job_id _tmp \
|
| 506 | bash dash bin/osh "${osh_bin[@]}"
|
| 507 |
|
| 508 | local host_job_id="$single_machine.$job_id"
|
| 509 | local raw_out_dir="$BASE_DIR/raw.$host_job_id"
|
| 510 | mkdir -p $raw_out_dir $BASE_DIR/stage1
|
| 511 |
|
| 512 | measure $single_machine $raw_out_dir $OSH_CPP_NINJA_BUILD
|
| 513 |
|
| 514 | # Trivial concatenation for 1 machine
|
| 515 | stage1 '' $single_machine
|
| 516 |
|
| 517 | benchmarks/report.sh stage2 $BASE_DIR
|
| 518 |
|
| 519 | benchmarks/report.sh stage3 $BASE_DIR
|
| 520 | }
|
| 521 |
|
| 522 | #
|
| 523 | # Debugging
|
| 524 | #
|
| 525 |
|
| 526 | compare-cpython() {
|
| 527 | #local -a a=( ../benchmark-data/osh-runtime/*.lenny.2024* )
|
| 528 | local -a a=( ../benchmark-data/osh-runtime/*.hoover.2024* )
|
| 529 |
|
| 530 | # More of a diff here?
|
| 531 | #local -a a=( ../benchmark-data/osh-runtime/*.broome.2023* )
|
| 532 | # less diff here
|
| 533 | #local -a a=( ../benchmark-data/osh-runtime/*.lenny.2023* )
|
| 534 |
|
| 535 | local dir=${a[-1]}
|
| 536 |
|
| 537 | echo $dir
|
| 538 |
|
| 539 | head -n 1 $dir/times.tsv
|
| 540 | fgrep 'configure.cpython' $dir/times.tsv
|
| 541 |
|
| 542 | local bash_id=2
|
| 543 | local dash_id=8
|
| 544 | local osh_py_id=14
|
| 545 | local osh_cpp_id=20
|
| 546 |
|
| 547 | set +o errexit
|
| 548 |
|
| 549 | local out_dir=_tmp/cpython-configure
|
| 550 | mkdir -p $out_dir
|
| 551 |
|
| 552 | echo 'bash vs. dash'
|
| 553 | diff -u --recursive $dir/{files-2,files-8} > $out_dir/bash-vs-dash.txt
|
| 554 | diffstat $out_dir/bash-vs-dash.txt
|
| 555 | echo
|
| 556 |
|
| 557 | echo 'bash vs. osh-py'
|
| 558 | diff -u --recursive $dir/{files-2,files-14} > $out_dir/bash-vs-osh-py.txt
|
| 559 | diffstat $out_dir/bash-vs-osh-py.txt
|
| 560 | echo
|
| 561 |
|
| 562 | echo 'bash vs. osh-cpp'
|
| 563 | diff -u --recursive $dir/{files-2,files-20} > $out_dir/bash-vs-osh-cpp.txt
|
| 564 | diffstat $out_dir/bash-vs-osh-cpp.txt
|
| 565 | echo
|
| 566 |
|
| 567 | return
|
| 568 |
|
| 569 | diff -u $dir/{files-2,files-20}/STDOUT.txt
|
| 570 | echo
|
| 571 |
|
| 572 | diff -u $dir/{files-2,files-20}/pyconfig.h
|
| 573 | echo
|
| 574 |
|
| 575 | cdiff -u $dir/{files-2,files-20}/config.log
|
| 576 | echo
|
| 577 | }
|
| 578 |
|
| 579 | "$@"
|