| 1 | #!/usr/bin/env bash
|
| 2 | #
|
| 3 | # Run the osh parser on shell scripts found in the wild.
|
| 4 | #
|
| 5 | # Usage:
|
| 6 | # test/wild.sh <function name>
|
| 7 | #
|
| 8 | # Examples:
|
| 9 | # test/wild.sh all
|
| 10 | # test/wild.sh all '^oil' # subset
|
| 11 |
|
| 12 | # TODO:
|
| 13 | # - Add more scripts, like gentoo package defs
|
| 14 |
|
| 15 | set -o nounset
|
| 16 | set -o pipefail
|
| 17 | set -o errexit
|
| 18 | shopt -s strict:all 2>/dev/null || true # dogfood for OSH
|
| 19 |
|
| 20 | source build/dev-shell.sh # python2
|
| 21 | source test/common.sh # export-osh-cpp
|
| 22 |
|
| 23 | # This persists across build/clean.sh
|
| 24 | readonly DEPS_WILD_DIR=../oil_DEPS/wild
|
| 25 |
|
| 26 |
|
| 27 | #
|
| 28 | # Helpers
|
| 29 | #
|
| 30 |
|
| 31 | _manifest() {
|
| 32 | local proj=$1
|
| 33 | local base_dir=$2
|
| 34 | shift 2
|
| 35 |
|
| 36 | for path in "$@"; do
|
| 37 | echo $proj $base_dir/$path $path
|
| 38 | done
|
| 39 | }
|
| 40 |
|
| 41 | # generic helper
|
| 42 | _sh-manifest() {
|
| 43 | local base_dir=$1
|
| 44 | local category=${2:-}
|
| 45 |
|
| 46 | local proj
|
| 47 | if test -n "$category"; then
|
| 48 | proj="$category/$(basename $base_dir)"
|
| 49 | else
|
| 50 | proj="$(basename $base_dir)"
|
| 51 | fi
|
| 52 |
|
| 53 | _manifest $proj $base_dir \
|
| 54 | $(find $base_dir -name '*.sh' -a -printf '%P\n')
|
| 55 | }
|
| 56 |
|
| 57 | _configure-manifest() {
|
| 58 | local base_dir=$1
|
| 59 | shift
|
| 60 |
|
| 61 | local name=$(basename $base_dir)
|
| 62 | _manifest ${name}__configure $base_dir \
|
| 63 | $(find $src -name 'configure' -a -printf '%P\n')
|
| 64 | }
|
| 65 |
|
| 66 | #
|
| 67 | # Special Case Corpora Using Explicit Globs
|
| 68 | #
|
| 69 |
|
| 70 | # TODO: Where do we write the base dir?
|
| 71 | oil-sketch-manifest() {
|
| 72 | local base_dir=~/git/oil-sketch
|
| 73 | pushd $base_dir >/dev/null
|
| 74 | for name in *.sh {awk,demo,make,misc,regex,tools}/*.sh; do
|
| 75 | echo oil-sketch $base_dir/$name $name
|
| 76 | done
|
| 77 | popd >/dev/null
|
| 78 | }
|
| 79 |
|
| 80 | oil-manifest() {
|
| 81 | local base_dir=$PWD
|
| 82 | for name in \
|
| 83 | configure install *.sh {benchmarks,build,test,scripts,opy}/*.sh; do
|
| 84 | echo oil $base_dir/$name $name
|
| 85 | done
|
| 86 | }
|
| 87 |
|
| 88 | readonly ABORIGINAL_DIR=~/src/aboriginal-1.4.5
|
| 89 |
|
| 90 | #
|
| 91 | # All
|
| 92 | #
|
| 93 |
|
| 94 | all-manifests() {
|
| 95 | # Don't expose this repo for now
|
| 96 | #oil-sketch-manifest
|
| 97 |
|
| 98 | #
|
| 99 | # My Code
|
| 100 | #
|
| 101 |
|
| 102 | oil-manifest
|
| 103 |
|
| 104 | local src
|
| 105 |
|
| 106 | #
|
| 107 | # Books
|
| 108 | #
|
| 109 | src=~/git/wild/books/wicked_cool_shell_scripts_2e
|
| 110 | _manifest "books/$(basename $src)" $src \
|
| 111 | $(find $src \
|
| 112 | \( -name .git -a -prune \) -o \
|
| 113 | \( -type f -a -executable -a -printf '%P\n' \) )
|
| 114 |
|
| 115 | #
|
| 116 | # Shell/Bash Frameworks/Collections
|
| 117 | #
|
| 118 |
|
| 119 | src=~/git/other/bash-completion
|
| 120 | _manifest "shell/$(basename $src)" $src \
|
| 121 | $(find $src/completions -type f -a -printf 'completions/%P\n')
|
| 122 |
|
| 123 | # Bats bash test framework. It appears to be fairly popular.
|
| 124 | src=~/git/other/bats
|
| 125 | _manifest "shell/$(basename $src)" $src \
|
| 126 | $(find $src \
|
| 127 | \( -wholename '*/libexec/*' -a -type f -a \
|
| 128 | -executable -a -printf '%P\n' \) )
|
| 129 |
|
| 130 | # Bash debugger?
|
| 131 | src=~/src/bashdb-4.4-0.92
|
| 132 | _manifest shell/bashdb $src \
|
| 133 | $(find $src -name '*.sh' -a -printf '%P\n')
|
| 134 |
|
| 135 | src=~/git/other/Bash-Snippets
|
| 136 | _manifest "shell/$(basename $src)" $src \
|
| 137 | $(find $src \
|
| 138 | \( -name .git -a -prune \) -o \
|
| 139 | \( -type f -a -executable -a -printf '%P\n' \) )
|
| 140 |
|
| 141 | # ASDF meta package/version manager.
|
| 142 | # Note that the language-specific plugins are specified (as remote repos)
|
| 143 | # here: https://github.com/asdf-vm/asdf-plugins/tree/master/plugins
|
| 144 | # They # could be used for more tests.
|
| 145 |
|
| 146 | src=~/git/other/asdf
|
| 147 | _manifest "shell/$(basename $src)" $src \
|
| 148 | $(find $src \( -name '*.sh' -o -name '*.bash' \) -a -printf '%P\n' )
|
| 149 |
|
| 150 | src=~/git/other/scripts-to-rule-them-all
|
| 151 | _manifest "shell/$(basename $src)" $src \
|
| 152 | $(find $src \
|
| 153 | \( -name .git -a -prune \) -o \
|
| 154 | \( -type f -a -executable -a -printf '%P\n' \) )
|
| 155 |
|
| 156 | src=~/git/wild/shell/shflags
|
| 157 | _manifest "shell/$(basename $src)" $src \
|
| 158 | $(find $src \
|
| 159 | '(' -name .git -a -prune ')' -o \
|
| 160 | '(' -name '*.sh' -o \
|
| 161 | -name shflags -o -name shlib -o -name shunit2 -o -name versions \
|
| 162 | ')' -a -printf '%P\n')
|
| 163 |
|
| 164 | _sh-manifest ~/hg/wild/shell-script-library shell
|
| 165 | _sh-manifest ~/git/other/modernish shell
|
| 166 | _sh-manifest ~/git/other/posixcube shell
|
| 167 |
|
| 168 | # Shells themselves
|
| 169 | _sh-manifest ~/git/languages/ast shell # korn shell stuff
|
| 170 | _sh-manifest ~/git/languages/mwc-sh shell
|
| 171 | _sh-manifest ~/src/mksh shell
|
| 172 |
|
| 173 | #
|
| 174 | # Linux Distros
|
| 175 | #
|
| 176 |
|
| 177 | _sh-manifest ~/git/other/minimal distro
|
| 178 | _sh-manifest ~/git/other/linuxkit distro
|
| 179 | _sh-manifest ~/git/other/portage distro
|
| 180 | _sh-manifest ~/git/wild/distro/woof-CE distro
|
| 181 | _sh-manifest ~/git/wild/distro/crankshaft distro
|
| 182 |
|
| 183 | src=~/git/alpine/aports
|
| 184 | _manifest distro/alpine-aports $src \
|
| 185 | $(find $src -name APKBUILD -a -printf '%P\n')
|
| 186 |
|
| 187 | src=$ABORIGINAL_DIR
|
| 188 | _manifest distro/aboriginal $src \
|
| 189 | $(find $src -name '*.sh' -printf '%P\n')
|
| 190 |
|
| 191 | src=/etc/init.d
|
| 192 | _manifest distro/initd $src \
|
| 193 | $(find $src -type f -a -executable -a -printf '%P\n')
|
| 194 |
|
| 195 | src=/usr/bin
|
| 196 | _manifest distro/usr-bin $src \
|
| 197 | $(find $src -name '*.sh' -a -printf '%P\n')
|
| 198 |
|
| 199 | # Version 1.0.89 extracts to a version-less dir.
|
| 200 | src=~/git/basis-build/_tmp/debootstrap
|
| 201 | _manifest distro/debootstrap $src \
|
| 202 | $(find $src '(' -name debootstrap -o -name functions ')' -a -printf '%P\n') \
|
| 203 | $(find $src/scripts -type f -a -printf 'scripts/%P\n')
|
| 204 |
|
| 205 | # There are lot of dietpi-* bash scripts that aren't executable, for some
|
| 206 | # reason. Big hairy shell scripts here.
|
| 207 | src=~/git/wild/distro/DietPi
|
| 208 | _manifest distro/DietPi $src \
|
| 209 | $(find $src '(' -name '*.sh' -o -name 'dietpi-*' ')' -a -printf '%P\n') \
|
| 210 |
|
| 211 | src=~/src/grep-2.24
|
| 212 | _sh-manifest $src gnu
|
| 213 |
|
| 214 | src=~/src/coreutils-8.22
|
| 215 | _sh-manifest $src gnu
|
| 216 |
|
| 217 | src=~/src/glibc-2.23
|
| 218 | _sh-manifest $src gnu
|
| 219 |
|
| 220 | src=~/src/binutils-2.26
|
| 221 | _sh-manifest $src gnu
|
| 222 |
|
| 223 | #
|
| 224 | # Operating Systems
|
| 225 | #
|
| 226 |
|
| 227 | # Skip the autoconf stuff here. Could skip it elsewhere too.
|
| 228 | src=~/src/freebsd-11.1/usr/src
|
| 229 | _manifest freebsd-11.1 $src \
|
| 230 | $(find $src -name '*.sh' -a ! -name 'ltmain.sh' -a -printf '%P\n')
|
| 231 |
|
| 232 | _sh-manifest ~/git/other/minix
|
| 233 | _sh-manifest ~/git/other/illumos-gate
|
| 234 | _sh-manifest ~/git/other/daemontools-encore
|
| 235 |
|
| 236 | #
|
| 237 | # Cloud Stuff
|
| 238 | #
|
| 239 | _sh-manifest ~/git/other/mesos cloud
|
| 240 | _sh-manifest ~/git/other/rocket cloud
|
| 241 | _sh-manifest ~/git/other/docker cloud
|
| 242 | _sh-manifest ~/git/other/chef-bcpc cloud
|
| 243 | _sh-manifest ~/git/other/sandstorm cloud
|
| 244 | _sh-manifest ~/git/other/kubernetes cloud
|
| 245 | _sh-manifest ~/git/other/manta-marlin cloud
|
| 246 |
|
| 247 | src=~/git/other/dokku
|
| 248 | _manifest cloud/dokku $src \
|
| 249 | $(find $src '(' -name '*.sh' -o -name dokku ')' -a -printf '%P\n')
|
| 250 |
|
| 251 | #
|
| 252 | # Google
|
| 253 | #
|
| 254 | _sh-manifest ~/git/other/bazel google
|
| 255 | _sh-manifest ~/git/other/protobuf google
|
| 256 | _sh-manifest ~/git/other/kythe google
|
| 257 | _sh-manifest ~/git/other/tensorflow google
|
| 258 | # Filenames with spaces!
|
| 259 | #_sh-manifest ~/git/other/grpc google
|
| 260 |
|
| 261 | #
|
| 262 | # Esoteric
|
| 263 | #
|
| 264 |
|
| 265 | _sh-manifest ~/git/scratch/shasm esoteric
|
| 266 | _sh-manifest ~/git/wild/esoteric/wwwoosh esoteric
|
| 267 | _sh-manifest ~/git/wild/esoteric/lishp esoteric
|
| 268 |
|
| 269 | # OpenGL for bash is interesting because there is a lot of arithmetic.
|
| 270 | # Not surprisingly, there are 6 parse errors, almost all of which are due to
|
| 271 | # "dynamic arithmetic".
|
| 272 | src=~/git/other/CmdlineGL
|
| 273 | _manifest esoteric/CmdlineGL $src \
|
| 274 | $(find $src \
|
| 275 | -type f -a \
|
| 276 | '(' -name '*.sh' -o -name '*.lib' ')' -a \
|
| 277 | -printf '%P\n')
|
| 278 |
|
| 279 | src=~/git/wild/esoteric/setup.shl
|
| 280 | _manifest esoteric/setup.shl $src \
|
| 281 | $(find $src \
|
| 282 | -type f -a \
|
| 283 | '(' -name '*.shl' -o -name setup -o -name Setup ')' -a \
|
| 284 | -printf '%P\n')
|
| 285 |
|
| 286 | src=~/git/wild/esoteric/mal/bash
|
| 287 | _manifest esoteric/make-a-lisp-bash $src \
|
| 288 | $(find $src '(' -name '*.sh' ')' -a -printf '%P\n')
|
| 289 |
|
| 290 | src=~/git/wild/esoteric/gherkin
|
| 291 | _manifest esoteric/gherkin $src \
|
| 292 | $(find $src '(' -name '*.sh' -o -name 'gherkin' ')' -a -printf '%P\n')
|
| 293 |
|
| 294 | src=~/git/wild/esoteric/balls
|
| 295 | _manifest esoteric/balls $src \
|
| 296 | $(find $src '(' -name '*.sh' -o -name balls -o -name esh ')' -a \
|
| 297 | -printf '%P\n')
|
| 298 |
|
| 299 | src=~/git/wild/esoteric/bashcached
|
| 300 | _manifest esoteric/bashcached $src \
|
| 301 | $(find $src '(' -name '*.sh' -o -name 'bashcached' ')' -a -printf '%P\n')
|
| 302 |
|
| 303 | src=~/git/wild/esoteric/quinedb
|
| 304 | _manifest esoteric/quinedb $src \
|
| 305 | $(find $src '(' -name '*.sh' -o -name 'quinedb' ')' -a -printf '%P\n')
|
| 306 |
|
| 307 | src=~/git/wild/esoteric/bashttpd
|
| 308 | _manifest esoteric/bashttpd $src \
|
| 309 | $(find $src -name 'bashttpd' -a -printf '%P\n')
|
| 310 |
|
| 311 | # JSON Parsers
|
| 312 | src=~/git/other/j
|
| 313 | _manifest esoteric/j $src \
|
| 314 | $(find $src -type f -a -name j -a -printf '%P\n')
|
| 315 |
|
| 316 | _sh-manifest ~/git/other/JSON.sh esoteric
|
| 317 |
|
| 318 | #
|
| 319 | # Other Languages
|
| 320 | #
|
| 321 |
|
| 322 | _sh-manifest ~/git/languages/julia
|
| 323 | _sh-manifest ~/git/languages/reason
|
| 324 | _sh-manifest ~/git/languages/sdk # Dart SDK?
|
| 325 |
|
| 326 | _sh-manifest ~/git/languages/micropython
|
| 327 | _sh-manifest ~/git/other/staticpython # statically linked build
|
| 328 |
|
| 329 | _sh-manifest ~/git/other/exp # Go experimental repo
|
| 330 |
|
| 331 | #
|
| 332 | # Grab Bag
|
| 333 | #
|
| 334 |
|
| 335 | # This overlaps with git too much
|
| 336 | #src=~/git/other
|
| 337 | #local depth=3
|
| 338 | #_manifest git-other $src \
|
| 339 | # $(find $src -maxdepth $depth -name '*.sh' -a -printf '%P\n')
|
| 340 |
|
| 341 | src=~/hg/other
|
| 342 | _manifest hg-other $src \
|
| 343 | $(find $src -name '*.sh' -a -printf '%P\n')
|
| 344 |
|
| 345 | #
|
| 346 | # Top-Level
|
| 347 | #
|
| 348 |
|
| 349 | # Kernel
|
| 350 | _sh-manifest ~/src/linux-4.8.7
|
| 351 |
|
| 352 | # Git
|
| 353 | # git-gui.sh and po2msg.sh are actually Tcl! We could stop parsing at 'exec'
|
| 354 | # but there's no point right now.
|
| 355 | src=~/git/other/git
|
| 356 | _manifest $(basename $src) $src \
|
| 357 | $(find $src -name '*.sh' -a \
|
| 358 | ! -name 'git-gui.sh' \
|
| 359 | ! -name 'po2msg.sh' \
|
| 360 | -a -printf '%P\n')
|
| 361 |
|
| 362 | _sh-manifest ~/git/other/liballocs
|
| 363 | _sh-manifest ~/git/other/boringssl
|
| 364 | _sh-manifest ~/git/other/arrow
|
| 365 |
|
| 366 | #
|
| 367 | # Uncategorized
|
| 368 | #
|
| 369 |
|
| 370 | # Has filenames with spaces!
|
| 371 | #_sh-manifest ~/git/other/linguist
|
| 372 |
|
| 373 | # Brendan Gregg's performance scripts.
|
| 374 | # Find executable scripts, since they don't end in sh.
|
| 375 | # net/tcpretrans is written in Perl.
|
| 376 | src=~/git/other/perf-tools
|
| 377 | _manifest $(basename $src) $src \
|
| 378 | $(find $src \
|
| 379 | \( -name .git -a -prune \) -o \
|
| 380 | \( -name tcpretrans -a -prune \) -o \
|
| 381 | \( -type f -a -executable -a -printf '%P\n' \) )
|
| 382 |
|
| 383 | # Most of these scripts have no extension. So look at executable ones and
|
| 384 | # then see if the shebang ends with sh!
|
| 385 |
|
| 386 | # NOTE: In YSH it would be nice if shebang-is-shell could be a function call.
|
| 387 | # Don't need to fork every time.
|
| 388 | src=~/git/other/pixelb-scripts
|
| 389 | _manifest pixelb-scripts $src \
|
| 390 | $(find $src \( -name .git -a -prune \) -o \
|
| 391 | \( -type f -a \
|
| 392 | -executable -a \
|
| 393 | ! -name '*.py' -a \
|
| 394 | -exec test/shebang.sh is-shell {} ';' -a \
|
| 395 | -printf '%P\n' \) )
|
| 396 |
|
| 397 | # Something related to WebDriver
|
| 398 | # Doesn't parse because of extended glob.
|
| 399 | src=~/git/other/wd
|
| 400 | _manifest $(basename $src) $src \
|
| 401 | $(find $src -type f -a -name wd -a -printf '%P\n')
|
| 402 |
|
| 403 | #
|
| 404 | # Big
|
| 405 | #
|
| 406 |
|
| 407 | return
|
| 408 | log "Finding Files in Big Projects"
|
| 409 | readonly BIG_BUILD_ROOT=/media/andy/hdd-8T/big-build/ssd-backup/sdb/build
|
| 410 |
|
| 411 | # 2m 18s the first time.
|
| 412 | # 2 seconds the second time. This is a big slow drive.
|
| 413 | time {
|
| 414 | _sh-manifest $BIG_BUILD_ROOT/hg/other/mozilla-central/
|
| 415 |
|
| 416 | _sh-manifest $BIG_BUILD_ROOT/chrome
|
| 417 | _configure-manifest $BIG_BUILD_ROOT/chrome
|
| 418 |
|
| 419 | _sh-manifest $BIG_BUILD_ROOT/android
|
| 420 | _configure-manifest $BIG_BUILD_ROOT/android
|
| 421 |
|
| 422 | _sh-manifest $BIG_BUILD_ROOT/openwrt
|
| 423 | _sh-manifest $BIG_BUILD_ROOT/OpenWireless
|
| 424 | }
|
| 425 | }
|
| 426 |
|
| 427 | # TODO: Note: duplicated in wild-runner.sh
|
| 428 | readonly MANIFEST=_tmp/wild/MANIFEST.txt
|
| 429 |
|
| 430 | write-manifest() {
|
| 431 | mkdir -p _tmp/wild
|
| 432 | local out=$MANIFEST
|
| 433 | all-manifests > $out
|
| 434 | wc -l $out
|
| 435 | }
|
| 436 |
|
| 437 | # TODO: Publish this script
|
| 438 | multi() { ~/git/tree-tools/bin/multi "$@"; }
|
| 439 |
|
| 440 | make-archive() {
|
| 441 | # Format of manifest:
|
| 442 | # $1 is project
|
| 443 | # $2 is abspath of source
|
| 444 | # $3 is rel path within project
|
| 445 | local out=_tmp/wild/wild-source.tar.gz
|
| 446 | rm -f $out
|
| 447 | awk '{print $2 " " $1 "/" $3 }' $MANIFEST \
|
| 448 | | multi tar $out
|
| 449 | ls -l $out
|
| 450 | }
|
| 451 |
|
| 452 | # This is opposed to crawling the file system with 'find'.
|
| 453 | manifest-from-archive() {
|
| 454 | mkdir -p $(dirname $MANIFEST)
|
| 455 | # relative path then absolute path
|
| 456 | find $DEPS_WILD_DIR/src -type f -a -printf '%P %p\n' > $MANIFEST
|
| 457 | }
|
| 458 |
|
| 459 | # 442K lines without "big" and without ltmain.sh
|
| 460 | # TODO: Include a few ltmain.sh. Have to de-dupe them.
|
| 461 | #
|
| 462 | # 767K lines with aports (It's 250K lines by itself.)
|
| 463 |
|
| 464 | # 1.30 M lines with "big".
|
| 465 | # 760K lines without ltmain.sh. Hm need to get up to 1M.
|
| 466 |
|
| 467 | abspaths() {
|
| 468 | local proj=${1:-}
|
| 469 | if test -n "$proj"; then
|
| 470 | awk -v proj=$proj '$1 == proj {print $2}' $MANIFEST
|
| 471 | else
|
| 472 | awk '{print $2}' $MANIFEST
|
| 473 | fi
|
| 474 | }
|
| 475 |
|
| 476 | # Excluding ltmain.sh, goes from 910K lines to 830K.
|
| 477 | count-lines() {
|
| 478 | # We need this weird --files0-from because there are too many files. xargs
|
| 479 | # would split it into multiple invocations.
|
| 480 | #
|
| 481 | # It would be nicer if wc just had an option not to sum?
|
| 482 | time abspaths |
|
| 483 | grep -v ltmain.sh |
|
| 484 | tr '\n' '\0' | wc -l --files0-from - | sort -n
|
| 485 | }
|
| 486 |
|
| 487 | grep-features1() {
|
| 488 | # Hm only 608 files out of 10,000 use a[x]=
|
| 489 | # But it is used in
|
| 490 | # /home/andy/src/linux-4.8.7/scripts/decode_stacktrace.sh
|
| 491 | # portage, bash-completion, uses it
|
| 492 | time abspaths | grep -v ltmain.sh |
|
| 493 | xargs egrep '^[[:space:]]*[a-zA-Z0-9]+\[.*\]='
|
| 494 | }
|
| 495 |
|
| 496 | grep-features2() {
|
| 497 | # Outside of illumos/ast/esoteric, there's only one real usage of associative
|
| 498 | # array literals!
|
| 499 | # /home/andy/git/other/tensorflow/tensorflow/tools/ci_build/builds/pip.sh: WHL_TAGS=(["2.7"]="cp27-none" ["3.4"]="cp34-cp34m" ["3.5"]="cp35-cp35m")
|
| 500 | time abspaths | grep -v ltmain.sh |
|
| 501 | xargs grep -F '=(['
|
| 502 | }
|
| 503 |
|
| 504 | grep-features3() {
|
| 505 | # Wow this is ONLY used in a handful of files in bash-completions! And tests.
|
| 506 | # That might be enough to justify it.
|
| 507 | time abspaths | grep -v ltmain.sh |
|
| 508 | xargs grep -F ';&'
|
| 509 | }
|
| 510 |
|
| 511 | grep-features4() {
|
| 512 | # Wow this is ONLY used in a handful of files in bash-completions! And tests.
|
| 513 | # That might be enough to justify it.
|
| 514 | time abspaths | #| grep -v ltmain.sh |
|
| 515 | xargs grep -E '\[\[ .*-(eq|ne|le|ge|lt|gt)'
|
| 516 | #xargs grep -E '\${[a-zA-Z0-9_]+\[[^@*]' # looks like ${a[i]}
|
| 517 | #xargs grep -F '$(('
|
| 518 | }
|
| 519 |
|
| 520 | # Takes ~15 seconds for 8,000+ files.
|
| 521 | #
|
| 522 | # NOTE: APKBUILD don't have shebang lines! So there are a bunch of false
|
| 523 | # detections, e.g. APKBUILD as Makefile, C, etc.
|
| 524 | detect-all-types() {
|
| 525 | time abspaths | xargs file | pv > _tmp/wild/file-types.txt
|
| 526 | }
|
| 527 |
|
| 528 | wild-types() {
|
| 529 | cat _tmp/wild/file-types.txt | test/wild_types.py
|
| 530 | }
|
| 531 |
|
| 532 | all() {
|
| 533 | ### Run by devtools/release.sh
|
| 534 |
|
| 535 | # Make a report for all, but only run some
|
| 536 |
|
| 537 | test/wild-runner.sh parse-and-report "$@"
|
| 538 | }
|
| 539 |
|
| 540 | smoke-test() {
|
| 541 | ### Smoke test on Oils source; takes a few seconds
|
| 542 | all 'oil/'
|
| 543 | }
|
| 544 |
|
| 545 | find-tracebacks() {
|
| 546 | find _tmp/wild/raw -name '*__parse.stderr.txt*' |
|
| 547 | xargs grep -l 'Traceback'
|
| 548 | }
|
| 549 |
|
| 550 | find-with-shebang() {
|
| 551 | local dir=$1
|
| 552 |
|
| 553 | # Look for files without an extension that have shell shebang lines.
|
| 554 |
|
| 555 | # Bad things about find:
|
| 556 | # * -regextype is part of the expression that always returns true, and has a
|
| 557 | # side effect that only affects later clauses!
|
| 558 | # * there are TEN different kinds
|
| 559 | # * emacs is the default regex type!
|
| 560 | # * -regex matches the whole path, whereas -name only matches the name
|
| 561 | # - should be name ~ /regex/ and path ~ /regex/
|
| 562 | # - there is no way to search just the name for a regex
|
| 563 | # * no character classes in the default type
|
| 564 | #
|
| 565 | # https://www.gnu.org/software/findutils/manual/html_node/find_html/Regular-Expressions.html#Regular-Expressions
|
| 566 |
|
| 567 | # The regex matches the whole path, e.g. so freebsd-11.1 must be matched.
|
| 568 |
|
| 569 | # What might be faster here is to find all the executables first, then put
|
| 570 | # them in a text file. test/shebang.sh can be invoked with stdin as a path
|
| 571 | # list and filter them. It's not horribly slow though.
|
| 572 |
|
| 573 | # Looking for *.sh misses 590 files in FreeBSD. There are 1088 .sh files.
|
| 574 |
|
| 575 | # NOTE: Should get rid of most 'configure' scripts?
|
| 576 |
|
| 577 | time find $dir \
|
| 578 | \( -name .git -a -prune \) -o \
|
| 579 | \( -regex '.+/[a-zA-Z0-9_\-]+' -a \
|
| 580 | -type f -a \
|
| 581 | -executable -a \
|
| 582 | -exec test/shebang.sh is-shell {} ';' -a \
|
| 583 | -printf '%p\n' \)
|
| 584 | }
|
| 585 |
|
| 586 | gentoo() {
|
| 587 | # 38,000 ebuild files
|
| 588 | local src
|
| 589 | src=~/git/gentoo/gentoo
|
| 590 |
|
| 591 | # 2M lines, because there are a lot of duplicate versions.
|
| 592 |
|
| 593 | time find $src -name '*.ebuild' -a -print0 |
|
| 594 | wc -l --files0-from - | sort -n
|
| 595 |
|
| 596 | return
|
| 597 | _manifest distro/gentoo $src \
|
| 598 | $(find $src . -name '*.ebuild')
|
| 599 | }
|
| 600 |
|
| 601 | #
|
| 602 | # ANALYSIS: Find Biggest Shell Scripts in Aboriginal Source Tarballs
|
| 603 | #
|
| 604 |
|
| 605 | readonly AB_PACKAGES=~/hg/scratch/aboriginal/aboriginal-1.2.2/packages
|
| 606 |
|
| 607 | aboriginal-packages() {
|
| 608 | for z in $AB_PACKAGES/*.tar.gz; do
|
| 609 | local name=$(basename $z .tar.gz)
|
| 610 | echo $z -z $name
|
| 611 | done
|
| 612 | for z in $AB_PACKAGES/*.tar.bz2; do
|
| 613 | local name=$(basename $z .tar.bz2)
|
| 614 | echo $z -j $name
|
| 615 | done
|
| 616 | }
|
| 617 |
|
| 618 | readonly AB_OUT=_tmp/aboriginal
|
| 619 |
|
| 620 | aboriginal-manifest() {
|
| 621 | mkdir -p $AB_OUT
|
| 622 |
|
| 623 | aboriginal-packages | while read z tar_flag name; do
|
| 624 | echo $z $name
|
| 625 | local listing=$AB_OUT/${name}.txt
|
| 626 | tar --list --verbose $tar_flag < $z | grep '\.sh$' > $listing || true
|
| 627 | done
|
| 628 | }
|
| 629 |
|
| 630 | aboriginal-biggest() {
|
| 631 | # print size and filename
|
| 632 | cat $AB_OUT/*.txt | awk '{print $3 " " $6}' | sort -n
|
| 633 | }
|
| 634 |
|
| 635 | readonly AB_TIMES=_tmp/parse-aboriginal.csv
|
| 636 |
|
| 637 | parse-aboriginal() {
|
| 638 | #find $ABORIGINAL_DIR -name '*.sh' | xargs wc -l | sort -n
|
| 639 | #return
|
| 640 |
|
| 641 | find $ABORIGINAL_DIR -name '*.sh' | xargs -n 1 -- \
|
| 642 | benchmarks/time_.py --append --output $AB_TIMES -- \
|
| 643 | bin/osh -n --ast-format none
|
| 644 | }
|
| 645 |
|
| 646 | # 80 ms max. That is good enough for sure.
|
| 647 | ab-times() {
|
| 648 | awk -F ',' '{ if ($2 > max_elapsed) max_elapsed = $2 } END { print(max_elapsed) }' $AB_TIMES
|
| 649 | }
|
| 650 |
|
| 651 | # biggest scripts besides ltmain:
|
| 652 | #
|
| 653 | # 8406 binutils-397a64b3/binutils/embedspu.sh
|
| 654 | # 8597 binutils-397a64b3/ld/emulparams/msp430all.sh
|
| 655 | # 9951 bash-2.05b/examples/scripts/dd-ex.sh
|
| 656 | # 12558 binutils-397a64b3/ld/genscripts.sh
|
| 657 | # 14148 bash-2.05b/examples/scripts/adventure.sh
|
| 658 | # 21811 binutils-397a64b3/gas/testsuite/gas/xstormy16/allinsn.sh
|
| 659 | # 28004 bash-2.05b/examples/scripts/bcsh.sh
|
| 660 | # 29666 gcc-4.2.1/ltcf-gcj.sh
|
| 661 | # 33972 gcc-4.2.1/ltcf-c.sh
|
| 662 | # 39048 gcc-4.2.1/ltcf-cxx.sh
|
| 663 |
|
| 664 | #
|
| 665 | # ANALYSIS: Number of comment lines
|
| 666 | #
|
| 667 | # TODO: Determine if we should try to save comment lines? I think we should
|
| 668 | # save more than that.
|
| 669 |
|
| 670 |
|
| 671 | #
|
| 672 | # ANALYSIS: Which scripts use set -C / set -o noclobber?
|
| 673 | #
|
| 674 |
|
| 675 | # VERY rare, only 13 instances, in ast, freebsd, and illumos-gate.
|
| 676 | analyze-noclobber() {
|
| 677 | local out=_tmp/noclobber.txt
|
| 678 | # Ignore this script
|
| 679 | time abspaths | grep -v 'test/wild.sh' |
|
| 680 | xargs grep -E 'noclobber|^set -C|^set +C' > $out || true
|
| 681 | wc -l $out
|
| 682 | }
|
| 683 |
|
| 684 | # Quick golden test. Test that pretty-printing doesn't regress.
|
| 685 | golden-subset() {
|
| 686 | $0 all esoteric
|
| 687 | }
|
| 688 |
|
| 689 | # Make a copy of the output for comparison.
|
| 690 | copy-golden-ast() {
|
| 691 | local dest=${1:-_tmp/wild-gold}
|
| 692 | find _tmp/wild-www/esoteric/ -name '*__ast.html' -a -printf '%p %P\n' \
|
| 693 | | multi cp $dest
|
| 694 | }
|
| 695 |
|
| 696 | # Find shell scripts on the root file system.
|
| 697 | # 1302 files on my system.
|
| 698 | rootfs-manifest() {
|
| 699 | find /bin /lib /sbin /etc/ /opt /root /run /usr /var \
|
| 700 | -type f -a \
|
| 701 | -executable -a \
|
| 702 | -exec test/shebang.sh is-shell {} ';' \
|
| 703 | -a -print | tee _tmp/rootfs.txt
|
| 704 | }
|
| 705 |
|
| 706 | soil-run() {
|
| 707 | export-osh-cpp _tmp/native-tar-test opt
|
| 708 |
|
| 709 | if test -n "${QUICKLY:-}"; then
|
| 710 | # Do a quick version
|
| 711 | all '^oil'
|
| 712 | else
|
| 713 | # This takes longer than 15 minutes with build/dev.sh minimal !
|
| 714 | # That's with xargs -P $MAX_PROCS in test/wild-runner.sh
|
| 715 |
|
| 716 | # The whole thing takes 7:25, which means that the 'wild' Soil job takes 10
|
| 717 | # minutes. It waits for the tarball, then tests it.
|
| 718 | #
|
| 719 | # For now, just do 'distro', since that's about half the files.
|
| 720 |
|
| 721 | all '^distro'
|
| 722 | # all '^cloud'
|
| 723 | # all '^cloud|^gnu|^freebsd'
|
| 724 | # all
|
| 725 | fi
|
| 726 | }
|
| 727 |
|
| 728 | if test "$(basename $0)" = 'wild.sh'; then
|
| 729 | "$@"
|
| 730 | fi
|