OILS / spec / regex.test.sh View on Github | oils.pub

635 lines, 287 significant
1## oils_failures_allowed: 0
2## compare_shells: bash-4.4 zsh
3
4#
5# Only bash and zsh seem to implement [[ foo =~ '' ]]
6#
7# ^(a b)$ is a regex that should match 'a b' in a group.
8#
9# Not sure what bash is doing here... I think I have to just be empirical.
10# Might need "compat" switch for parsing the regex. It should be an opaque
11# string like zsh, not sure why it isn't.
12#
13# I think this is just papering over bugs...
14# https://www.gnu.org/software/bash/manual/bash.html#Conditional-Constructs
15#
16# Storing the regular expression in a shell variable is often a useful way to
17# avoid problems with quoting characters that are special to the shell. It is
18# sometimes difficult to specify a regular expression literally without using
19# quotes, or to keep track of the quoting used by regular expressions while
20# paying attention to the shell’s quote removal. Using a shell variable to
21# store the pattern decreases these problems. For example, the following is
22# equivalent to the above:
23#
24# pattern='[[:space:]]*(a)?b'
25# [[ $line =~ $pattern ]]
26#
27# If you want to match a character that’s special to the regular expression
28# grammar, it has to be quoted to remove its special meaning. This means that in
29# the pattern ‘xxx.txt’, the ‘.’ matches any character in the string (its usual
30# regular expression meaning), but in the pattern "xxx.txt"’ it can only match a
31# literal ‘.’. Shell programmers should take special care with backslashes, since
32# backslashes are used both by the shell and regular expressions to remove the
33# special meaning from the following character. The following two sets of
34# commands are not equivalent:
35#
36# From bash code: ( | ) are treated special. Normally they must be quoted, but
37# they can be UNQUOTED in BASH_REGEX state. In fact they can't be quoted!
38
39#### BASH_REMATCH
40[[ foo123 =~ ([a-z]+)([0-9]+) ]]
41echo status=$?
42argv.py "${BASH_REMATCH[@]}"
43
44[[ failed =~ ([a-z]+)([0-9]+) ]]
45echo status=$?
46argv.py "${BASH_REMATCH[@]}" # not cleared!
47
48## STDOUT:
49status=0
50['foo123', 'foo', '123']
51status=1
52[]
53## END
54## N-I zsh STDOUT:
55status=0
56['']
57status=1
58['']
59## END
60
61#### Match is unanchored at both ends
62[[ 'bar' =~ a ]] && echo true
63## stdout: true
64
65#### Failed match
66[[ 'bar' =~ X ]] && echo true
67## status: 1
68## stdout-json: ""
69
70#### Regex quoted with \ -- preferred in bash
71[[ 'a b' =~ ^(a\ b)$ ]] && echo true
72## stdout: true
73
74#### Regex quoted with single quotes
75# bash doesn't like the quotes
76[[ 'a b' =~ '^(a b)$' ]] && echo true
77## stdout-json: ""
78## status: 1
79## OK zsh stdout: true
80## OK zsh status: 0
81
82#### Regex quoted with double quotes
83# bash doesn't like the quotes
84[[ 'a b' =~ "^(a b)$" ]] && echo true
85## stdout-json: ""
86## status: 1
87## OK zsh stdout: true
88## OK zsh status: 0
89
90#### Fix single quotes by storing in variable
91pat='^(a b)$'
92[[ 'a b' =~ $pat ]] && echo true
93## stdout: true
94
95#### Fix single quotes by storing in variable
96pat="^(a b)$"
97[[ 'a b' =~ $pat ]] && echo true
98## stdout: true
99
100#### Double quoting pat variable -- again bash doesn't like it.
101pat="^(a b)$"
102[[ 'a b' =~ "$pat" ]] && echo true
103## stdout-json: ""
104## status: 1
105## OK zsh stdout: true
106## OK zsh status: 0
107
108#### Mixing quoted and unquoted parts
109[[ 'a b' =~ 'a 'b ]] && echo true
110[[ "a b" =~ "a "'b' ]] && echo true
111## STDOUT:
112true
113true
114## END
115
116#### Regex with == and not =~ is parse error, different lexer mode required
117# They both give a syntax error. This is lame.
118[[ '^(a b)$' == ^(a\ b)$ ]] && echo true
119## status: 2
120## OK zsh status: 1
121
122#### Omitting ( )
123[[ '^a b$' == ^a\ b$ ]] && echo true
124## stdout: true
125
126#### Malformed regex
127# Are they trying to PARSE the regex? Do they feed the buffer directly to
128# regcomp()?
129[[ 'a b' =~ ^)a\ b($ ]] && echo true
130## stdout-json: ""
131## status: 2
132## OK zsh status: 1
133
134#### Regex with |
135[[ 'bar' =~ foo|bar ]] && echo true
136## stdout: true
137## N-I zsh stdout-json: ""
138## N-I zsh status: 1
139
140#### Regex to match literal brackets []
141
142# bash-completion relies on this, so we're making it match bash.
143# zsh understandably differs.
144[[ '[]' =~ \[\] ]] && echo true
145
146# Another way to write this.
147pat='\[\]'
148[[ '[]' =~ $pat ]] && echo true
149## STDOUT:
150true
151true
152## END
153## OK zsh STDOUT:
154true
155## END
156
157#### Regex to match literals . ^ $ etc.
158[[ 'x' =~ \. ]] || echo false
159[[ '.' =~ \. ]] && echo true
160
161[[ 'xx' =~ \^\$ ]] || echo false
162[[ '^$' =~ \^\$ ]] && echo true
163
164[[ 'xxx' =~ \+\*\? ]] || echo false
165[[ '*+?' =~ \*\+\? ]] && echo true
166
167[[ 'xx' =~ \{\} ]] || echo false
168[[ '{}' =~ \{\} ]] && echo true
169## STDOUT:
170false
171true
172false
173true
174false
175true
176false
177true
178## END
179## BUG zsh STDOUT:
180true
181false
182false
183false
184## END
185## BUG zsh status: 1
186
187#### Unquoted { is a regex parse error
188[[ { =~ { ]] && echo true
189echo status=$?
190## stdout-json: ""
191## status: 2
192## BUG bash STDOUT:
193status=2
194## END
195## BUG bash status: 0
196## BUG zsh STDOUT:
197status=1
198## END
199## BUG zsh status: 0
200
201#### Fatal error inside [[ =~ ]]
202
203# zsh and osh are stricter than bash. bash treats [[ like a command.
204
205[[ a =~ $(( 1 / 0 )) ]]
206echo status=$?
207## stdout-json: ""
208## status: 1
209## BUG bash stdout: status=1
210## BUG bash status: 0
211
212#### Quoted { and +
213[[ { =~ "{" ]] && echo 'yes {'
214[[ + =~ "+" ]] && echo 'yes +'
215[[ * =~ "*" ]] && echo 'yes *'
216[[ ? =~ "?" ]] && echo 'yes ?'
217[[ ^ =~ "^" ]] && echo 'yes ^'
218[[ $ =~ "$" ]] && echo 'yes $'
219[[ '(' =~ '(' ]] && echo 'yes ('
220[[ ')' =~ ')' ]] && echo 'yes )'
221[[ '|' =~ '|' ]] && echo 'yes |'
222[[ '\' =~ '\' ]] && echo 'yes \'
223echo ---
224
225[[ . =~ "." ]] && echo 'yes .'
226[[ z =~ "." ]] || echo 'no .'
227echo ---
228
229# This rule is weird but all shells agree. I would expect that the - gets
230# escaped? It's an operator? but it behaves like a-z.
231[[ a =~ ["a-z"] ]]; echo "a $?"
232[[ - =~ ["a-z"] ]]; echo "- $?"
233[[ b =~ ['a-z'] ]]; echo "b $?"
234[[ z =~ ['a-z'] ]]; echo "z $?"
235
236echo status=$?
237## STDOUT:
238yes {
239yes +
240yes *
241yes ?
242yes ^
243yes $
244yes (
245yes )
246yes |
247yes \
248---
249yes .
250no .
251---
252a 0
253- 1
254b 0
255z 0
256status=0
257## END
258## N-I zsh STDOUT:
259yes ^
260yes $
261yes )
262yes |
263---
264yes .
265---
266a 0
267- 1
268b 0
269z 0
270status=0
271## END
272
273#### Escaped {
274# from bash-completion
275[[ '$PA' =~ ^(\$\{?)([A-Za-z0-9_]*)$ ]] && argv.py "${BASH_REMATCH[@]}"
276## STDOUT:
277['$PA', '$', 'PA']
278## END
279## BUG zsh stdout-json: ""
280## BUG zsh status: 1
281
282#### Escaped { stored in variable first
283# from bash-completion
284pat='^(\$\{?)([A-Za-z0-9_]*)$'
285[[ '$PA' =~ $pat ]] && argv.py "${BASH_REMATCH[@]}"
286## STDOUT:
287['$PA', '$', 'PA']
288## END
289## BUG zsh STDOUT:
290['']
291## END
292
293#### regex with ?
294[[ 'c' =~ c? ]] && echo true
295[[ '' =~ c? ]] && echo true
296## STDOUT:
297true
298true
299## END
300
301#### regex with unprintable characters
302# can't have nul byte
303
304# This pattern has literal characters
305pat=$'^[\x01\x02]+$'
306
307[[ $'\x01\x02\x01' =~ $pat ]]; echo status=$?
308[[ $'a\x01' =~ $pat ]]; echo status=$?
309
310# NOTE: There doesn't appear to be any way to escape these!
311pat2='^[\x01\x02]+$'
312
313## STDOUT:
314status=0
315status=1
316## END
317
318#### pattern $f(x) -- regression
319f=fff
320[[ fffx =~ $f(x) ]]
321echo status=$?
322[[ ffx =~ $f(x) ]]
323echo status=$?
324## STDOUT:
325status=0
326status=1
327## END
328
329#### pattern a=(1)
330[[ a=x =~ a=(x) ]]
331echo status=$?
332[[ =x =~ a=(x) ]]
333echo status=$?
334## STDOUT:
335status=0
336status=1
337## END
338## BUG zsh status: 1
339## BUG zsh STDOUT:
340status=0
341## END
342
343#### pattern @f(x)
344shopt -s parse_at
345[[ @fx =~ @f(x) ]]
346echo status=$?
347[[ fx =~ @f(x) ]]
348echo status=$?
349## STDOUT:
350status=0
351status=1
352## END
353
354
355#### Bug: Nix idiom with closing ) next to pattern
356
357if [[ ! (" ${params[*]} " =~ " -shared " || " ${params[*]} " =~ " -static ") ]]; then
358 echo one
359fi
360
361# Reduced idiom
362if [[ (foo =~ foo) ]]; then
363 echo two
364fi
365
366## STDOUT:
367one
368two
369## END
370
371#### unquoted (a b) as pattern, (a b|c)
372
373if [[ 'a b' =~ (a b) ]]; then
374 echo one
375fi
376
377if [[ 'a b' =~ (a b) ]]; then
378 echo BAD
379fi
380
381if [[ 'a b' =~ (a b|c) ]]; then
382 echo two
383fi
384
385# I think spaces are only allowed within ()
386
387if [[ ' c' =~ (a| c) ]]; then
388 echo three
389fi
390
391## STDOUT:
392one
393two
394three
395## END
396
397#### Multiple adjacent () groups
398
399if [[ 'a-b-c-d' =~ a-(b| >>)-c-( ;|[de])|ff|gg ]]; then
400 argv.py "${BASH_REMATCH[@]}"
401fi
402
403if [[ ff =~ a-(b| >>)-c-( ;|[de])|ff|gg ]]; then
404 argv.py "${BASH_REMATCH[@]}"
405fi
406
407# empty group ()
408
409if [[ zz =~ ([a-z]+)() ]]; then
410 argv.py "${BASH_REMATCH[@]}"
411fi
412
413# nested empty group
414if [[ zz =~ ([a-z]+)(()z) ]]; then
415 argv.py "${BASH_REMATCH[@]}"
416fi
417
418## STDOUT:
419['a-b-c-d', 'b', 'd']
420['ff', '', '']
421['zz', 'zz', '']
422['zz', 'z', 'z', '']
423## END
424
425## BUG zsh status: 1
426## BUG zsh STDOUT:
427['']
428['']
429['']
430['']
431## END
432
433#### unquoted [a b] as pattern, [a b|c]
434
435$SH <<'EOF'
436[[ a =~ [ab] ]] && echo yes
437EOF
438echo "[ab]=$?"
439
440$SH <<'EOF'
441[[ a =~ [a b] ]] && echo yes
442EOF
443echo "[a b]=$?"
444
445$SH <<'EOF'
446[[ a =~ ([a b]) ]] && echo yes
447EOF
448echo "[a b]=$?"
449
450## STDOUT:
451yes
452[ab]=0
453[a b]=2
454yes
455[a b]=0
456## END
457
458## OK zsh STDOUT:
459yes
460[ab]=0
461[a b]=1
462yes
463[a b]=0
464## END
465
466#### c|a unquoted
467
468if [[ a =~ c|a ]]; then
469 echo one
470fi
471
472## STDOUT:
473one
474## END
475## N-I zsh status: 1
476
477#### Operator chars ; & but not |
478
479# Hm semicolon is still an operator in bash
480$SH <<'EOF'
481[[ ';' =~ ; ]] && echo semi
482EOF
483echo semi=$?
484
485$SH <<'EOF'
486[[ ';' =~ (;) ]] && echo semi paren
487EOF
488echo semi paren=$?
489
490echo
491
492$SH <<'EOF'
493[[ '&' =~ & ]] && echo amp
494EOF
495echo amp=$?
496
497# Oh I guess this is not a bug? regcomp doesn't reject this trivial regex?
498$SH <<'EOF'
499[[ '|' =~ | ]] && echo pipe1
500[[ 'a' =~ | ]] && echo pipe2
501EOF
502echo pipe=$?
503
504$SH <<'EOF'
505[[ '|' =~ a| ]] && echo four
506EOF
507echo pipe=$?
508
509# This is probably special because > operator is inside foo [[ a > b ]]
510$SH <<'EOF'
511[[ '<>' =~ <> ]] && echo angle
512EOF
513echo angle=$?
514
515# Bug: OSH allowed this!
516$SH <<'EOF'
517[[ $'a\nb' =~ a
518b ]] && echo newline
519EOF
520echo newline=$?
521
522## STDOUT:
523semi=2
524semi paren
525semi paren=0
526
527amp=2
528pipe1
529pipe2
530pipe=0
531four
532pipe=0
533angle=2
534newline=2
535## END
536
537## BUG zsh STDOUT:
538semi=1
539semi paren=1
540
541amp=1
542pipe=1
543pipe=1
544angle=1
545newline=1
546## END
547
548
549
550#### Quotes '' "" $'' $"" in pattern
551
552$SH <<'EOF'
553[[ '|' =~ '|' ]] && echo sq
554EOF
555echo sq=$?
556
557$SH <<'EOF'
558[[ '|' =~ "|" ]] && echo dq
559EOF
560echo dq=$?
561
562$SH <<'EOF'
563[[ '|' =~ $'|' ]] && echo dollar-sq
564EOF
565echo dollar-sq=$?
566
567$SH <<'EOF'
568[[ '|' =~ $"|" ]] && echo dollar-dq
569EOF
570echo dollar-dq=$?
571
572## STDOUT:
573sq
574sq=0
575dq
576dq=0
577dollar-sq
578dollar-sq=0
579dollar-dq
580dollar-dq=0
581## END
582
583
584#### Unicode in pattern
585
586$SH <<'EOF'
587[[ μ =~ μ ]] && echo mu
588EOF
589echo mu=$?
590
591## STDOUT:
592mu
593mu=0
594## END
595
596#### Parse error with 2 words
597
598if [[ a =~ c a ]]; then
599 echo one
600fi
601
602## status: 2
603## STDOUT:
604## END
605
606## BUG zsh status: 1
607## BUG zsh STDOUT:
608one
609## END
610
611#### make a lisp example
612
613str='(hi)'
614[[ "${str}" =~ ^^([][{}\(\)^@])|^(~@)|(\"(\\.|[^\\\"])*\")|^(;[^$'\n']*)|^([~\'\`])|^([^][ ~\`\'\";{}\(\)^@\,]+)|^[,]|^[[:space:]]+ ]]
615echo status=$?
616
617m=${BASH_REMATCH[0]}
618echo m=$m
619
620## STDOUT:
621status=0
622m=(
623## END
624
625## BUG zsh STDOUT:
626status=1
627m=
628## END
629
630#### Operators and space lose meaning inside ()
631[[ '< >' =~ (< >) ]] && echo true
632## stdout: true
633## N-I zsh stdout-json: ""
634## N-I zsh status: 1
635