pea/TEST.sh

OILS / pea / TEST.sh View on Github | oils.pub

411 lines, 176 significant

1	#!/usr/bin/env bash
2	#
3	# Quick test for a potential rewrite of mycpp.
4	#
5	# Usage:
6	# pea/TEST.sh <function name>
7
8	: ${LIB_OSH=stdlib/osh}
9	source $LIB_OSH/bash-strict.sh
10	source $LIB_OSH/task-five.sh
11	source $LIB_OSH/no-quotes.sh
12
13	source devtools/common.sh
14
15	source build/dev-shell.sh # find python3 in /wedge PATH component
16
17	readonly MYPY_VENV='_tmp/mypy-venv'
18
19	show-python-config() {
20	which python3
21	echo
22
23	python3 -V
24	echo
25
26	echo PYTHONPATH=$PYTHONPATH
27	echo
28	}
29
30	install-latest-mypy() {
31	local venv=$MYPY_VENV
32
33	export PYTHONPATH=.
34
35	rm -r -f -v $venv
36
37	show-python-config
38
39	echo "Creating venv in $venv"
40	python3 -m venv $venv
41
42	. $venv/bin/activate
43
44	echo "venv $venv is activated"
45	show-python-config
46
47
48	python3 -m pip install mypy
49
50	# 2022: 1.5.1 (compiled: yes)
51	# 2024-12 Debian desktop: 1.13.0 (compiled: yes)
52	# 2024-12 Soil CI image: 1.10.0
53	python3 -m mypy --version
54	}
55
56	pea-files() {
57	for f in pea/*.py; do
58	case $f in
59	*NINJA_subgraph.py)
60	continue
61	;;
62	esac
63
64	echo $f
65	done
66	}
67
68	count-lines() {
69	pea-files \| xargs wc -l
70	}
71
72	_check-types() {
73	echo PYTHONPATH=$PYTHONPATH
74	echo
75
76	python3 -m mypy --version
77	echo
78
79	time pea-files \| xargs python3 -m mypy --strict
80	}
81
82	check-with-our-mypy() {
83	_check-types
84	}
85
86	check-with-latest-mypy() {
87	### soil/worker.sh call this
88
89	# This disables the MyPy wedge< and uses the latest MyPy installed above
90	# It'
91	export PYTHONPATH=.
92
93	# install-mypy creates this. May not be present in CI machine.
94	local activate=$MYPY_VENV/bin/activate
95	if test -f $activate; then
96	. $activate
97	fi
98
99	_check-types
100	}
101
102	#
103	# Run Pea
104	#
105
106	pea-main() {
107	pea/pea_main.py "$@"
108	}
109
110	parse-one() {
111	pea-main parse "$@"
112	}
113
114	translate-cpp() {
115	### Used by mycpp/NINJA-steps.sh
116
117	pea-main cpp "$@"
118	}
119
120	all-files() {
121	# Can't run this on Soil because we only have build/py.sh py-source, not
122	# 'minimal'
123
124	# Update this file with build/dynamic-deps.sh pea-hack
125
126	cat pea/oils-typecheck.txt
127
128	for path in /.pyi; do
129	echo $path
130	done
131	}
132
133	parse-all() {
134	### soil/worker.sh call this
135
136	time all-files \| xargs --verbose -- $0 pea-main parse
137	}
138
139	# Good illustration of "distributing your overhead"
140	#
141	# Total work goes up, while latency goes down. To a point. Then it goes back
142	# up.
143
144	# batch size 30
145	#
146	# real 0m0.342s
147	# user 0m0.735s
148	# sys 0m0.059s
149	#
150	# batch size 20
151	#
152	# real 0m0.305s
153	# user 0m0.993s
154	# sys 0m0.081s
155	#
156	# batch size 15
157	#
158	# real 0m0.299s
159	# user 0m1.110s
160	# sys 0m0.123s
161	#
162	# batch size 10
163	#
164	# real 0m0.272s
165	# user 0m1.362s
166	# sys 0m0.145s
167
168	batch-size() {
169	local num_files=$1
170
171	local num_procs
172	num_procs=$(nproc)
173
174	# Use (p-1) as a fudge so we don't end up more batches than processors
175	local files_per_process=$(( num_files / (num_procs - 1) ))
176
177	echo "$num_procs $files_per_process"
178	}
179
180	demo-par() {
181	### Demo parallelism of Python processes
182
183	local files
184	num_files=$(all-files \| wc -l)
185
186	# 103 files
187
188	shopt -s lastpipe
189	batch-size $num_files \| read num_procs optimal
190
191	echo "Parsing $num_files files with $num_procs parallel processes"
192	echo "Optimal batch size is $optimal"
193
194	echo
195
196	echo 'All at once:'
197	time parse-all > /dev/null 2>&1
198	echo
199
200	# 5 is meant to be suboptimal
201	for n in 50 30 20 10 5 $optimal; do
202	echo "batch size $n"
203	time all-files \| xargs --verbose -P $num_procs -n $n -- \
204	$0 parse-one > /dev/null 2>&1
205	echo
206	done
207	}
208
209	# - 0.40 secs to parse
210	# - 0.56 secs pickle, so that's 160 ms
211	# Then
212	#
213	# - 0.39 secs load pickle
214	#
215	# That's definitely slower than I want. It's 6.6 MB of data.
216	#
217	# So
218	# - parallel parsing can be done in <300 ms
219	# - parallel pickling
220	# - serial unpickling (reduce) in 390 ms
221	#
222	# So now we're at ~700 ms or so. Can we type check in 300 ms in pure Python?
223	#
224	# What if we compress the generated ASDL? Those are very repetitive.
225
226	# Problem statement:
227
228	_serial-pickle() {
229	mkdir -p _tmp
230	local tmp=_tmp/serial
231
232	time all-files \| xargs --verbose -- $0 pea-main dump-pickles > $tmp
233
234	ls -l -h $tmp
235
236	echo 'loading'
237	time pea-main load-pickles < $tmp
238	}
239
240	# 1.07 seconds
241	serial-pickle() { time $0 _serial-pickle; }
242
243	pickle-one() {
244	pea-main dump-pickles "$@" > _tmp/p/$$
245	}
246
247	_par-pickle() {
248	local files
249	num_files=$(all-files \| wc -l)
250
251	shopt -s lastpipe
252	batch-size $num_files \| read num_procs optimal
253
254	local dir=_tmp/p
255	rm -r -f -v $dir
256	mkdir -p $dir
257
258	time all-files \| xargs --verbose -P $num_procs -n $optimal -- $0 pickle-one
259
260	ls -l -h $dir
261
262	# This takes 410-430 ms? Wow that's slow.
263	time cat $dir/* \| pea-main load-pickles
264	}
265
266	# Can get this down to ~700 ms
267	#
268	# Note parsing serially in a single process is 410 ms !!! So this is NOT a win
269	# unless we have more work besides parsing to parallelize.
270	#
271	# We can extract constants and forward declarations in parallel I suppose.
272	#
273	# BUT immutable string constants have to be de-duplciated! Though I guess that
274	# is a natural 'reduce' step.
275	#
276	# And we can even do implementation and prototypes in parallel too?
277	#
278	# I think the entire algorithm can be OPTIMISTIC without serialized type
279	# checking?
280	#
281	# I think
282	#
283	# a = 5
284	# b = a # do not know the type without a global algorithm
285	#
286	# Or I guess you can do type checking within a function. Functions require
287	# signatures. So yes let's do that in parallel.
288	#
289	# --
290	#
291	# The ideal way to do this would be to split Oils up into MODULES, like
292	#
293	# _debuild/
294	# builtin/
295	# core/
296	# data_lang/
297	# frontend/
298	# osh/
299	# ysh/
300	# Smaller: pgen2/ pylib/ tools/
301	#
302	# And modules are acyclic, and can compile on their own with dependencies. If
303	# you pick random .py files and spit out header files, I think they won't compile.
304	# The forward declarations and constants will work, but the prototype won't.
305
306	par-pickle() { time $0 _par-pickle; }
307
308	sum1() {
309	awk '{ sum += $1 } END { print sum }'
310	}
311
312	sum-sizes() {
313	xargs -I {} -- find {} -printf '%s %p\n' \| sum1
314	}
315
316	size-ratio() {
317	# all-files
318	# echo _tmp/p/*
319
320	# 1.96 MB of source code
321	all-files \| sum-sizes
322
323	# 7.13 MB of pickle files
324	# Weirdly echo _tmp/p/* doesn't work here
325	for f in _tmp/p/*; do echo $f; done \| sum-sizes
326	}
327
328	# Only 47 ms!
329	# I want the overhead to be less than 1 second:
330	# 1. parallel parsing + pickle
331	# 2. serial unpickle + type check
332	# 3. starting the process
333	#
334	# So unpickling is slow.
335
336	osh-overhead() {
337	time bin/osh -c 'echo hi'
338	}
339
340
341	# MyPy dev version takes 10.2 seconds the first time (without their mypyc
342	# speedups)
343	#
344	# 0.150 seconds the second time, WITHOUT code changes
345	# 0.136 seconds
346
347	# 4.1 seconds: whitespace change
348	# 3.9 seconds: again, and this is on my fast hoover machine
349
350	# 5.0 seconds - Invalid type!
351	# 4.9 seconds - again invalid
352
353
354	mypy-compare() {
355	devtools/types.sh check-oils
356	}
357
358	test-translate() {
359	translate-cpp bin/oils_for_unix.py
360	}
361
362	test-syntax-error() {
363	local status stdout
364
365	# error in Python syntax
366	nq-capture status stdout \
367	parse-one pea/testdata/py_err.py
368	nq-assert 1 = $status
369
370	# error in signature
371	nq-capture status stdout \
372	parse-one pea/testdata/sig_err.py
373	nq-assert 1 = $status
374
375	# error in assignment
376	nq-capture status stdout \
377	parse-one pea/testdata/assign_err.py
378	nq-assert 1 = $status
379	}
380
381	test-mycpp-integration() {
382	# In Soil CI, we are importing a compiled MyPy?
383	# We don't have the WEDGE
384	# OK I can just add that
385	#return
386
387	# Works
388	echo ---
389	pea-main mycpp
390
391	echo ---
392	pea-main mycpp mycpp/examples/test_small_str.py
393	}
394
395	test-example-hello() {
396	local bin=_bin/cxx-asan/mycpp/examples/pea_hello.pea
397	ninja $bin
398
399	local status stdout
400	nq-capture status stdout \
401	$bin
402	nq-assert 42 = $status
403	}
404
405	run-tests() {
406	### soil/worker.sh call this
407
408	devtools/byo.sh test $0
409	}
410
411	task-five "$@"