1 | #!/usr/bin/env bash
|
2 | #
|
3 | # Run the osh parser on shell scripts found in the wild.
|
4 | #
|
5 | # Usage:
|
6 | # test/wild.sh <function name>
|
7 | #
|
8 | # Examples:
|
9 | # test/wild.sh all
|
10 | # test/wild.sh all '^oil' # subset
|
11 |
|
12 | # TODO:
|
13 | # - Add more scripts, like gentoo package defs
|
14 |
|
15 | set -o nounset
|
16 | set -o pipefail
|
17 | set -o errexit
|
18 | shopt -s strict:all 2>/dev/null || true # dogfood for OSH
|
19 |
|
20 | source build/dev-shell.sh # python2
|
21 | source test/common.sh # export-osh-cpp
|
22 |
|
23 | # This persists across build/clean.sh
|
24 | readonly DEPS_WILD_DIR=../oil_DEPS/wild
|
25 |
|
26 |
|
27 | #
|
28 | # Helpers
|
29 | #
|
30 |
|
31 | _manifest() {
|
32 | local proj=$1
|
33 | local base_dir=$2
|
34 | shift 2
|
35 |
|
36 | for path in "$@"; do
|
37 | echo $proj $base_dir/$path $path
|
38 | done
|
39 | }
|
40 |
|
41 | # generic helper
|
42 | _sh-manifest() {
|
43 | local base_dir=$1
|
44 | local category=${2:-}
|
45 |
|
46 | local proj
|
47 | if test -n "$category"; then
|
48 | proj="$category/$(basename $base_dir)"
|
49 | else
|
50 | proj="$(basename $base_dir)"
|
51 | fi
|
52 |
|
53 | _manifest $proj $base_dir \
|
54 | $(find $base_dir -name '*.sh' -a -printf '%P\n')
|
55 | }
|
56 |
|
57 | _configure-manifest() {
|
58 | local base_dir=$1
|
59 | shift
|
60 |
|
61 | local name=$(basename $base_dir)
|
62 | _manifest ${name}__configure $base_dir \
|
63 | $(find $src -name 'configure' -a -printf '%P\n')
|
64 | }
|
65 |
|
66 | #
|
67 | # Special Case Corpora Using Explicit Globs
|
68 | #
|
69 |
|
70 | # TODO: Where do we write the base dir?
|
71 | oil-sketch-manifest() {
|
72 | local base_dir=~/git/oil-sketch
|
73 | pushd $base_dir >/dev/null
|
74 | for name in *.sh {awk,demo,make,misc,regex,tools}/*.sh; do
|
75 | echo oil-sketch $base_dir/$name $name
|
76 | done
|
77 | popd >/dev/null
|
78 | }
|
79 |
|
80 | oil-manifest() {
|
81 | local base_dir=$PWD
|
82 | for name in \
|
83 | configure install *.sh {benchmarks,build,test,scripts,opy}/*.sh; do
|
84 | echo oil $base_dir/$name $name
|
85 | done
|
86 | }
|
87 |
|
88 | readonly ABORIGINAL_DIR=~/src/aboriginal-1.4.5
|
89 |
|
90 | #
|
91 | # All
|
92 | #
|
93 |
|
94 | all-manifests() {
|
95 | # Don't expose this repo for now
|
96 | #oil-sketch-manifest
|
97 |
|
98 | #
|
99 | # My Code
|
100 | #
|
101 |
|
102 | oil-manifest
|
103 |
|
104 | local src
|
105 |
|
106 | #
|
107 | # Books
|
108 | #
|
109 | src=~/git/wild/books/wicked_cool_shell_scripts_2e
|
110 | _manifest "books/$(basename $src)" $src \
|
111 | $(find $src \
|
112 | \( -name .git -a -prune \) -o \
|
113 | \( -type f -a -executable -a -printf '%P\n' \) )
|
114 |
|
115 | #
|
116 | # Shell/Bash Frameworks/Collections
|
117 | #
|
118 |
|
119 | src=~/git/other/bash-completion
|
120 | _manifest "shell/$(basename $src)" $src \
|
121 | $(find $src/completions -type f -a -printf 'completions/%P\n')
|
122 |
|
123 | # Bats bash test framework. It appears to be fairly popular.
|
124 | src=~/git/other/bats
|
125 | _manifest "shell/$(basename $src)" $src \
|
126 | $(find $src \
|
127 | \( -wholename '*/libexec/*' -a -type f -a \
|
128 | -executable -a -printf '%P\n' \) )
|
129 |
|
130 | # Bash debugger?
|
131 | src=~/src/bashdb-4.4-0.92
|
132 | _manifest shell/bashdb $src \
|
133 | $(find $src -name '*.sh' -a -printf '%P\n')
|
134 |
|
135 | src=~/git/other/Bash-Snippets
|
136 | _manifest "shell/$(basename $src)" $src \
|
137 | $(find $src \
|
138 | \( -name .git -a -prune \) -o \
|
139 | \( -type f -a -executable -a -printf '%P\n' \) )
|
140 |
|
141 | # ASDF meta package/version manager.
|
142 | # Note that the language-specific plugins are specified (as remote repos)
|
143 | # here: https://github.com/asdf-vm/asdf-plugins/tree/master/plugins
|
144 | # They # could be used for more tests.
|
145 |
|
146 | src=~/git/other/asdf
|
147 | _manifest "shell/$(basename $src)" $src \
|
148 | $(find $src \( -name '*.sh' -o -name '*.bash' \) -a -printf '%P\n' )
|
149 |
|
150 | src=~/git/other/scripts-to-rule-them-all
|
151 | _manifest "shell/$(basename $src)" $src \
|
152 | $(find $src \
|
153 | \( -name .git -a -prune \) -o \
|
154 | \( -type f -a -executable -a -printf '%P\n' \) )
|
155 |
|
156 | src=~/git/wild/shell/shflags
|
157 | _manifest "shell/$(basename $src)" $src \
|
158 | $(find $src \
|
159 | '(' -name .git -a -prune ')' -o \
|
160 | '(' -name '*.sh' -o \
|
161 | -name shflags -o -name shlib -o -name shunit2 -o -name versions \
|
162 | ')' -a -printf '%P\n')
|
163 |
|
164 | _sh-manifest ~/hg/wild/shell-script-library shell
|
165 | _sh-manifest ~/git/other/modernish shell
|
166 | _sh-manifest ~/git/other/posixcube shell
|
167 |
|
168 | # Shells themselves
|
169 | _sh-manifest ~/git/languages/ast shell # korn shell stuff
|
170 | _sh-manifest ~/git/languages/mwc-sh shell
|
171 | _sh-manifest ~/src/mksh shell
|
172 |
|
173 | #
|
174 | # Linux Distros
|
175 | #
|
176 |
|
177 | _sh-manifest ~/git/other/minimal distro
|
178 | _sh-manifest ~/git/other/linuxkit distro
|
179 | _sh-manifest ~/git/other/portage distro
|
180 | _sh-manifest ~/git/wild/distro/woof-CE distro
|
181 | _sh-manifest ~/git/wild/distro/crankshaft distro
|
182 |
|
183 | src=~/git/alpine/aports
|
184 | _manifest distro/alpine-aports $src \
|
185 | $(find $src -name APKBUILD -a -printf '%P\n')
|
186 |
|
187 | src=$ABORIGINAL_DIR
|
188 | _manifest distro/aboriginal $src \
|
189 | $(find $src -name '*.sh' -printf '%P\n')
|
190 |
|
191 | src=/etc/init.d
|
192 | _manifest distro/initd $src \
|
193 | $(find $src -type f -a -executable -a -printf '%P\n')
|
194 |
|
195 | src=/usr/bin
|
196 | _manifest distro/usr-bin $src \
|
197 | $(find $src -name '*.sh' -a -printf '%P\n')
|
198 |
|
199 | # Version 1.0.89 extracts to a version-less dir.
|
200 | src=~/git/basis-build/_tmp/debootstrap
|
201 | _manifest distro/debootstrap $src \
|
202 | $(find $src '(' -name debootstrap -o -name functions ')' -a -printf '%P\n') \
|
203 | $(find $src/scripts -type f -a -printf 'scripts/%P\n')
|
204 |
|
205 | # There are lot of dietpi-* bash scripts that aren't executable, for some
|
206 | # reason. Big hairy shell scripts here.
|
207 | src=~/git/wild/distro/DietPi
|
208 | _manifest distro/DietPi $src \
|
209 | $(find $src '(' -name '*.sh' -o -name 'dietpi-*' ')' -a -printf '%P\n') \
|
210 |
|
211 | src=~/src/grep-2.24
|
212 | _sh-manifest $src gnu
|
213 |
|
214 | src=~/src/coreutils-8.22
|
215 | _sh-manifest $src gnu
|
216 |
|
217 | src=~/src/glibc-2.23
|
218 | _sh-manifest $src gnu
|
219 |
|
220 | src=~/src/binutils-2.26
|
221 | _sh-manifest $src gnu
|
222 |
|
223 | #
|
224 | # Operating Systems
|
225 | #
|
226 |
|
227 | # Skip the autoconf stuff here. Could skip it elsewhere too.
|
228 | src=~/src/freebsd-11.1/usr/src
|
229 | _manifest freebsd-11.1 $src \
|
230 | $(find $src -name '*.sh' -a ! -name 'ltmain.sh' -a -printf '%P\n')
|
231 |
|
232 | _sh-manifest ~/git/other/minix
|
233 | _sh-manifest ~/git/other/illumos-gate
|
234 | _sh-manifest ~/git/other/daemontools-encore
|
235 |
|
236 | #
|
237 | # Cloud Stuff
|
238 | #
|
239 | _sh-manifest ~/git/other/mesos cloud
|
240 | _sh-manifest ~/git/other/rocket cloud
|
241 | _sh-manifest ~/git/other/docker cloud
|
242 | _sh-manifest ~/git/other/chef-bcpc cloud
|
243 | _sh-manifest ~/git/other/sandstorm cloud
|
244 | _sh-manifest ~/git/other/kubernetes cloud
|
245 | _sh-manifest ~/git/other/manta-marlin cloud
|
246 |
|
247 | src=~/git/other/dokku
|
248 | _manifest cloud/dokku $src \
|
249 | $(find $src '(' -name '*.sh' -o -name dokku ')' -a -printf '%P\n')
|
250 |
|
251 | #
|
252 | # Google
|
253 | #
|
254 | _sh-manifest ~/git/other/bazel google
|
255 | _sh-manifest ~/git/other/protobuf google
|
256 | _sh-manifest ~/git/other/kythe google
|
257 | _sh-manifest ~/git/other/tensorflow google
|
258 | # Filenames with spaces!
|
259 | #_sh-manifest ~/git/other/grpc google
|
260 |
|
261 | #
|
262 | # Esoteric
|
263 | #
|
264 |
|
265 | _sh-manifest ~/git/scratch/shasm esoteric
|
266 | _sh-manifest ~/git/wild/esoteric/wwwoosh esoteric
|
267 | _sh-manifest ~/git/wild/esoteric/lishp esoteric
|
268 |
|
269 | # OpenGL for bash is interesting because there is a lot of arithmetic.
|
270 | # Not surprisingly, there are 6 parse errors, almost all of which are due to
|
271 | # "dynamic arithmetic".
|
272 | src=~/git/other/CmdlineGL
|
273 | _manifest esoteric/CmdlineGL $src \
|
274 | $(find $src \
|
275 | -type f -a \
|
276 | '(' -name '*.sh' -o -name '*.lib' ')' -a \
|
277 | -printf '%P\n')
|
278 |
|
279 | src=~/git/wild/esoteric/setup.shl
|
280 | _manifest esoteric/setup.shl $src \
|
281 | $(find $src \
|
282 | -type f -a \
|
283 | '(' -name '*.shl' -o -name setup -o -name Setup ')' -a \
|
284 | -printf '%P\n')
|
285 |
|
286 | src=~/git/wild/esoteric/mal/bash
|
287 | _manifest esoteric/make-a-lisp-bash $src \
|
288 | $(find $src '(' -name '*.sh' ')' -a -printf '%P\n')
|
289 |
|
290 | src=~/git/wild/esoteric/gherkin
|
291 | _manifest esoteric/gherkin $src \
|
292 | $(find $src '(' -name '*.sh' -o -name 'gherkin' ')' -a -printf '%P\n')
|
293 |
|
294 | src=~/git/wild/esoteric/balls
|
295 | _manifest esoteric/balls $src \
|
296 | $(find $src '(' -name '*.sh' -o -name balls -o -name esh ')' -a \
|
297 | -printf '%P\n')
|
298 |
|
299 | src=~/git/wild/esoteric/bashcached
|
300 | _manifest esoteric/bashcached $src \
|
301 | $(find $src '(' -name '*.sh' -o -name 'bashcached' ')' -a -printf '%P\n')
|
302 |
|
303 | src=~/git/wild/esoteric/quinedb
|
304 | _manifest esoteric/quinedb $src \
|
305 | $(find $src '(' -name '*.sh' -o -name 'quinedb' ')' -a -printf '%P\n')
|
306 |
|
307 | src=~/git/wild/esoteric/bashttpd
|
308 | _manifest esoteric/bashttpd $src \
|
309 | $(find $src -name 'bashttpd' -a -printf '%P\n')
|
310 |
|
311 | # JSON Parsers
|
312 | src=~/git/other/j
|
313 | _manifest esoteric/j $src \
|
314 | $(find $src -type f -a -name j -a -printf '%P\n')
|
315 |
|
316 | _sh-manifest ~/git/other/JSON.sh esoteric
|
317 |
|
318 | #
|
319 | # Other Languages
|
320 | #
|
321 |
|
322 | _sh-manifest ~/git/languages/julia
|
323 | _sh-manifest ~/git/languages/reason
|
324 | _sh-manifest ~/git/languages/sdk # Dart SDK?
|
325 |
|
326 | _sh-manifest ~/git/languages/micropython
|
327 | _sh-manifest ~/git/other/staticpython # statically linked build
|
328 |
|
329 | _sh-manifest ~/git/other/exp # Go experimental repo
|
330 |
|
331 | #
|
332 | # Grab Bag
|
333 | #
|
334 |
|
335 | # This overlaps with git too much
|
336 | #src=~/git/other
|
337 | #local depth=3
|
338 | #_manifest git-other $src \
|
339 | # $(find $src -maxdepth $depth -name '*.sh' -a -printf '%P\n')
|
340 |
|
341 | src=~/hg/other
|
342 | _manifest hg-other $src \
|
343 | $(find $src -name '*.sh' -a -printf '%P\n')
|
344 |
|
345 | #
|
346 | # Top-Level
|
347 | #
|
348 |
|
349 | # Kernel
|
350 | _sh-manifest ~/src/linux-4.8.7
|
351 |
|
352 | # Git
|
353 | # git-gui.sh and po2msg.sh are actually Tcl! We could stop parsing at 'exec'
|
354 | # but there's no point right now.
|
355 | src=~/git/other/git
|
356 | _manifest $(basename $src) $src \
|
357 | $(find $src -name '*.sh' -a \
|
358 | ! -name 'git-gui.sh' \
|
359 | ! -name 'po2msg.sh' \
|
360 | -a -printf '%P\n')
|
361 |
|
362 | _sh-manifest ~/git/other/liballocs
|
363 | _sh-manifest ~/git/other/boringssl
|
364 | _sh-manifest ~/git/other/arrow
|
365 |
|
366 | #
|
367 | # Uncategorized
|
368 | #
|
369 |
|
370 | # Has filenames with spaces!
|
371 | #_sh-manifest ~/git/other/linguist
|
372 |
|
373 | # Brendan Gregg's performance scripts.
|
374 | # Find executable scripts, since they don't end in sh.
|
375 | # net/tcpretrans is written in Perl.
|
376 | src=~/git/other/perf-tools
|
377 | _manifest $(basename $src) $src \
|
378 | $(find $src \
|
379 | \( -name .git -a -prune \) -o \
|
380 | \( -name tcpretrans -a -prune \) -o \
|
381 | \( -type f -a -executable -a -printf '%P\n' \) )
|
382 |
|
383 | # Most of these scripts have no extension. So look at executable ones and
|
384 | # then see if the shebang ends with sh!
|
385 |
|
386 | # NOTE: In YSH it would be nice if shebang-is-shell could be a function call.
|
387 | # Don't need to fork every time.
|
388 | src=~/git/other/pixelb-scripts
|
389 | _manifest pixelb-scripts $src \
|
390 | $(find $src \( -name .git -a -prune \) -o \
|
391 | \( -type f -a \
|
392 | -executable -a \
|
393 | ! -name '*.py' -a \
|
394 | -exec test/shebang.sh is-shell {} ';' -a \
|
395 | -printf '%P\n' \) )
|
396 |
|
397 | # Something related to WebDriver
|
398 | # Doesn't parse because of extended glob.
|
399 | src=~/git/other/wd
|
400 | _manifest $(basename $src) $src \
|
401 | $(find $src -type f -a -name wd -a -printf '%P\n')
|
402 |
|
403 | #
|
404 | # Big
|
405 | #
|
406 |
|
407 | return
|
408 | log "Finding Files in Big Projects"
|
409 | readonly BIG_BUILD_ROOT=/media/andy/hdd-8T/big-build/ssd-backup/sdb/build
|
410 |
|
411 | # 2m 18s the first time.
|
412 | # 2 seconds the second time. This is a big slow drive.
|
413 | time {
|
414 | _sh-manifest $BIG_BUILD_ROOT/hg/other/mozilla-central/
|
415 |
|
416 | _sh-manifest $BIG_BUILD_ROOT/chrome
|
417 | _configure-manifest $BIG_BUILD_ROOT/chrome
|
418 |
|
419 | _sh-manifest $BIG_BUILD_ROOT/android
|
420 | _configure-manifest $BIG_BUILD_ROOT/android
|
421 |
|
422 | _sh-manifest $BIG_BUILD_ROOT/openwrt
|
423 | _sh-manifest $BIG_BUILD_ROOT/OpenWireless
|
424 | }
|
425 | }
|
426 |
|
427 | # TODO: Note: duplicated in wild-runner.sh
|
428 | readonly MANIFEST=_tmp/wild/MANIFEST.txt
|
429 |
|
430 | write-manifest() {
|
431 | mkdir -p _tmp/wild
|
432 | local out=$MANIFEST
|
433 | all-manifests > $out
|
434 | wc -l $out
|
435 | }
|
436 |
|
437 | # TODO: Publish this script
|
438 | multi() { ~/git/tree-tools/bin/multi "$@"; }
|
439 |
|
440 | make-archive() {
|
441 | # Format of manifest:
|
442 | # $1 is project
|
443 | # $2 is abspath of source
|
444 | # $3 is rel path within project
|
445 | local out=_tmp/wild/wild-source.tar.gz
|
446 | rm -f $out
|
447 | awk '{print $2 " " $1 "/" $3 }' $MANIFEST \
|
448 | | multi tar $out
|
449 | ls -l $out
|
450 | }
|
451 |
|
452 | # This is opposed to crawling the file system with 'find'.
|
453 | manifest-from-archive() {
|
454 | mkdir -p $(dirname $MANIFEST)
|
455 | # relative path then absolute path
|
456 | find $DEPS_WILD_DIR/src -type f -a -printf '%P %p\n' > $MANIFEST
|
457 | }
|
458 |
|
459 | # 442K lines without "big" and without ltmain.sh
|
460 | # TODO: Include a few ltmain.sh. Have to de-dupe them.
|
461 | #
|
462 | # 767K lines with aports (It's 250K lines by itself.)
|
463 |
|
464 | # 1.30 M lines with "big".
|
465 | # 760K lines without ltmain.sh. Hm need to get up to 1M.
|
466 |
|
467 | abspaths() {
|
468 | local proj=${1:-}
|
469 | if test -n "$proj"; then
|
470 | awk -v proj=$proj '$1 == proj {print $2}' $MANIFEST
|
471 | else
|
472 | awk '{print $2}' $MANIFEST
|
473 | fi
|
474 | }
|
475 |
|
476 | # Excluding ltmain.sh, goes from 910K lines to 830K.
|
477 | count-lines() {
|
478 | # We need this weird --files0-from because there are too many files. xargs
|
479 | # would split it into multiple invocations.
|
480 | #
|
481 | # It would be nicer if wc just had an option not to sum?
|
482 | time abspaths |
|
483 | grep -v ltmain.sh |
|
484 | tr '\n' '\0' | wc -l --files0-from - | sort -n
|
485 | }
|
486 |
|
487 | grep-features1() {
|
488 | # Hm only 608 files out of 10,000 use a[x]=
|
489 | # But it is used in
|
490 | # /home/andy/src/linux-4.8.7/scripts/decode_stacktrace.sh
|
491 | # portage, bash-completion, uses it
|
492 | time abspaths | grep -v ltmain.sh |
|
493 | xargs egrep '^[[:space:]]*[a-zA-Z0-9]+\[.*\]='
|
494 | }
|
495 |
|
496 | grep-features2() {
|
497 | # Outside of illumos/ast/esoteric, there's only one real usage of associative
|
498 | # array literals!
|
499 | # /home/andy/git/other/tensorflow/tensorflow/tools/ci_build/builds/pip.sh: WHL_TAGS=(["2.7"]="cp27-none" ["3.4"]="cp34-cp34m" ["3.5"]="cp35-cp35m")
|
500 | time abspaths | grep -v ltmain.sh |
|
501 | xargs grep -F '=(['
|
502 | }
|
503 |
|
504 | grep-features3() {
|
505 | # Wow this is ONLY used in a handful of files in bash-completions! And tests.
|
506 | # That might be enough to justify it.
|
507 | time abspaths | grep -v ltmain.sh |
|
508 | xargs grep -F ';&'
|
509 | }
|
510 |
|
511 | grep-features4() {
|
512 | # Wow this is ONLY used in a handful of files in bash-completions! And tests.
|
513 | # That might be enough to justify it.
|
514 | time abspaths | #| grep -v ltmain.sh |
|
515 | xargs grep -E '\[\[ .*-(eq|ne|le|ge|lt|gt)'
|
516 | #xargs grep -E '\${[a-zA-Z0-9_]+\[[^@*]' # looks like ${a[i]}
|
517 | #xargs grep -F '$(('
|
518 | }
|
519 |
|
520 | # Takes ~15 seconds for 8,000+ files.
|
521 | #
|
522 | # NOTE: APKBUILD don't have shebang lines! So there are a bunch of false
|
523 | # detections, e.g. APKBUILD as Makefile, C, etc.
|
524 | detect-all-types() {
|
525 | time abspaths | xargs file | pv > _tmp/wild/file-types.txt
|
526 | }
|
527 |
|
528 | wild-types() {
|
529 | cat _tmp/wild/file-types.txt | test/wild_types.py
|
530 | }
|
531 |
|
532 | all() {
|
533 | ### Run by devtools/release.sh
|
534 |
|
535 | # Make a report for all, but only run some
|
536 |
|
537 | test/wild-runner.sh parse-and-report "$@"
|
538 | }
|
539 |
|
540 | smoke-test() {
|
541 | ### Smoke test on Oils source; takes a few seconds
|
542 | all 'oil/'
|
543 | }
|
544 |
|
545 | find-tracebacks() {
|
546 | find _tmp/wild/raw -name '*__parse.stderr.txt*' |
|
547 | xargs grep -l 'Traceback'
|
548 | }
|
549 |
|
550 | find-with-shebang() {
|
551 | local dir=$1
|
552 |
|
553 | # Look for files without an extension that have shell shebang lines.
|
554 |
|
555 | # Bad things about find:
|
556 | # * -regextype is part of the expression that always returns true, and has a
|
557 | # side effect that only affects later clauses!
|
558 | # * there are TEN different kinds
|
559 | # * emacs is the default regex type!
|
560 | # * -regex matches the whole path, whereas -name only matches the name
|
561 | # - should be name ~ /regex/ and path ~ /regex/
|
562 | # - there is no way to search just the name for a regex
|
563 | # * no character classes in the default type
|
564 | #
|
565 | # https://www.gnu.org/software/findutils/manual/html_node/find_html/Regular-Expressions.html#Regular-Expressions
|
566 |
|
567 | # The regex matches the whole path, e.g. so freebsd-11.1 must be matched.
|
568 |
|
569 | # What might be faster here is to find all the executables first, then put
|
570 | # them in a text file. test/shebang.sh can be invoked with stdin as a path
|
571 | # list and filter them. It's not horribly slow though.
|
572 |
|
573 | # Looking for *.sh misses 590 files in FreeBSD. There are 1088 .sh files.
|
574 |
|
575 | # NOTE: Should get rid of most 'configure' scripts?
|
576 |
|
577 | time find $dir \
|
578 | \( -name .git -a -prune \) -o \
|
579 | \( -regex '.+/[a-zA-Z0-9_\-]+' -a \
|
580 | -type f -a \
|
581 | -executable -a \
|
582 | -exec test/shebang.sh is-shell {} ';' -a \
|
583 | -printf '%p\n' \)
|
584 | }
|
585 |
|
586 | gentoo() {
|
587 | # 38,000 ebuild files
|
588 | local src
|
589 | src=~/git/gentoo/gentoo
|
590 |
|
591 | # 2M lines, because there are a lot of duplicate versions.
|
592 |
|
593 | time find $src -name '*.ebuild' -a -print0 |
|
594 | wc -l --files0-from - | sort -n
|
595 |
|
596 | return
|
597 | _manifest distro/gentoo $src \
|
598 | $(find $src . -name '*.ebuild')
|
599 | }
|
600 |
|
601 | #
|
602 | # ANALYSIS: Find Biggest Shell Scripts in Aboriginal Source Tarballs
|
603 | #
|
604 |
|
605 | readonly AB_PACKAGES=~/hg/scratch/aboriginal/aboriginal-1.2.2/packages
|
606 |
|
607 | aboriginal-packages() {
|
608 | for z in $AB_PACKAGES/*.tar.gz; do
|
609 | local name=$(basename $z .tar.gz)
|
610 | echo $z -z $name
|
611 | done
|
612 | for z in $AB_PACKAGES/*.tar.bz2; do
|
613 | local name=$(basename $z .tar.bz2)
|
614 | echo $z -j $name
|
615 | done
|
616 | }
|
617 |
|
618 | readonly AB_OUT=_tmp/aboriginal
|
619 |
|
620 | aboriginal-manifest() {
|
621 | mkdir -p $AB_OUT
|
622 |
|
623 | aboriginal-packages | while read z tar_flag name; do
|
624 | echo $z $name
|
625 | local listing=$AB_OUT/${name}.txt
|
626 | tar --list --verbose $tar_flag < $z | grep '\.sh$' > $listing || true
|
627 | done
|
628 | }
|
629 |
|
630 | aboriginal-biggest() {
|
631 | # print size and filename
|
632 | cat $AB_OUT/*.txt | awk '{print $3 " " $6}' | sort -n
|
633 | }
|
634 |
|
635 | readonly AB_TIMES=_tmp/parse-aboriginal.csv
|
636 |
|
637 | parse-aboriginal() {
|
638 | #find $ABORIGINAL_DIR -name '*.sh' | xargs wc -l | sort -n
|
639 | #return
|
640 |
|
641 | find $ABORIGINAL_DIR -name '*.sh' | xargs -n 1 -- \
|
642 | benchmarks/time_.py --append --output $AB_TIMES -- \
|
643 | bin/osh -n --ast-format none
|
644 | }
|
645 |
|
646 | # 80 ms max. That is good enough for sure.
|
647 | ab-times() {
|
648 | awk -F ',' '{ if ($2 > max_elapsed) max_elapsed = $2 } END { print(max_elapsed) }' $AB_TIMES
|
649 | }
|
650 |
|
651 | # biggest scripts besides ltmain:
|
652 | #
|
653 | # 8406 binutils-397a64b3/binutils/embedspu.sh
|
654 | # 8597 binutils-397a64b3/ld/emulparams/msp430all.sh
|
655 | # 9951 bash-2.05b/examples/scripts/dd-ex.sh
|
656 | # 12558 binutils-397a64b3/ld/genscripts.sh
|
657 | # 14148 bash-2.05b/examples/scripts/adventure.sh
|
658 | # 21811 binutils-397a64b3/gas/testsuite/gas/xstormy16/allinsn.sh
|
659 | # 28004 bash-2.05b/examples/scripts/bcsh.sh
|
660 | # 29666 gcc-4.2.1/ltcf-gcj.sh
|
661 | # 33972 gcc-4.2.1/ltcf-c.sh
|
662 | # 39048 gcc-4.2.1/ltcf-cxx.sh
|
663 |
|
664 | #
|
665 | # ANALYSIS: Number of comment lines
|
666 | #
|
667 | # TODO: Determine if we should try to save comment lines? I think we should
|
668 | # save more than that.
|
669 |
|
670 |
|
671 | #
|
672 | # ANALYSIS: Which scripts use set -C / set -o noclobber?
|
673 | #
|
674 |
|
675 | # VERY rare, only 13 instances, in ast, freebsd, and illumos-gate.
|
676 | analyze-noclobber() {
|
677 | local out=_tmp/noclobber.txt
|
678 | # Ignore this script
|
679 | time abspaths | grep -v 'test/wild.sh' |
|
680 | xargs grep -E 'noclobber|^set -C|^set +C' > $out || true
|
681 | wc -l $out
|
682 | }
|
683 |
|
684 | # Quick golden test. Test that pretty-printing doesn't regress.
|
685 | golden-subset() {
|
686 | $0 all esoteric
|
687 | }
|
688 |
|
689 | # Make a copy of the output for comparison.
|
690 | copy-golden-ast() {
|
691 | local dest=${1:-_tmp/wild-gold}
|
692 | find _tmp/wild-www/esoteric/ -name '*__ast.html' -a -printf '%p %P\n' \
|
693 | | multi cp $dest
|
694 | }
|
695 |
|
696 | # Find shell scripts on the root file system.
|
697 | # 1302 files on my system.
|
698 | rootfs-manifest() {
|
699 | find /bin /lib /sbin /etc/ /opt /root /run /usr /var \
|
700 | -type f -a \
|
701 | -executable -a \
|
702 | -exec test/shebang.sh is-shell {} ';' \
|
703 | -a -print | tee _tmp/rootfs.txt
|
704 | }
|
705 |
|
706 | soil-run() {
|
707 | export-osh-cpp _tmp/native-tar-test opt
|
708 |
|
709 | if test -n "${QUICKLY:-}"; then
|
710 | # Do a quick version
|
711 | all '^oil'
|
712 | else
|
713 | # This takes longer than 15 minutes with build/dev.sh minimal !
|
714 | # That's with xargs -P $MAX_PROCS in test/wild-runner.sh
|
715 |
|
716 | # The whole thing takes 7:25, which means that the 'wild' Soil job takes 10
|
717 | # minutes. It waits for the tarball, then tests it.
|
718 | #
|
719 | # For now, just do 'distro', since that's about half the files.
|
720 |
|
721 | all '^distro'
|
722 | # all '^cloud'
|
723 | # all '^cloud|^gnu|^freebsd'
|
724 | # all
|
725 | fi
|
726 | }
|
727 |
|
728 | if test "$(basename $0)" = 'wild.sh'; then
|
729 | "$@"
|
730 | fi
|