OILS / spec / word-split.test.sh View on Github | oils.pub

863 lines, 469 significant
1## compare_shells: bash dash mksh ash yash
2## oils_failures_allowed: 7
3
4# NOTE on bash bug: After setting IFS to array, it never splits anymore? Even
5# if you assign IFS again.
6
7#### IFS is scoped
8IFS=b
9word=abcd
10f() { local IFS=c; argv.py $word; }
11f
12argv.py $word
13## STDOUT:
14['ab', 'd']
15['a', 'cd']
16## END
17
18#### Tilde sub is not split, but var sub is
19HOME="foo bar"
20argv.py ~
21argv.py $HOME
22## STDOUT:
23['foo bar']
24['foo', 'bar']
25## END
26
27#### Word splitting
28a="1 2"
29b="3 4"
30argv.py $a"$b"
31## STDOUT:
32['1', '23 4']
33## END
34
35#### Word splitting 2
36a="1 2"
37b="3 4"
38c="5 6"
39d="7 8"
40argv.py $a"$b"$c"$d"
41## STDOUT:
42['1', '23 45', '67 8']
43## END
44
45# Has tests on differences between $* "$*" $@ "$@"
46# http://stackoverflow.com/questions/448407/bash-script-to-receive-and-repass-quoted-parameters
47
48#### $*
49fun() { argv.py -$*-; }
50fun "a 1" "b 2" "c 3"
51## stdout: ['-a', '1', 'b', '2', 'c', '3-']
52
53#### "$*"
54fun() { argv.py "-$*-"; }
55fun "a 1" "b 2" "c 3"
56## stdout: ['-a 1 b 2 c 3-']
57
58#### $@
59# How does this differ from $* ? I don't think it does.
60fun() { argv.py -$@-; }
61fun "a 1" "b 2" "c 3"
62## stdout: ['-a', '1', 'b', '2', 'c', '3-']
63
64#### "$@"
65fun() { argv.py "-$@-"; }
66fun "a 1" "b 2" "c 3"
67## stdout: ['-a 1', 'b 2', 'c 3-']
68
69#### empty argv
70argv.py 1 "$@" 2 $@ 3 "$*" 4 $* 5
71## stdout: ['1', '2', '3', '', '4', '5']
72
73#### $* with empty IFS
74set -- "1 2" "3 4"
75
76IFS=
77argv.py $*
78argv.py "$*"
79
80## STDOUT:
81['1 2', '3 4']
82['1 23 4']
83## END
84
85#### Word elision with space
86s1=' '
87argv.py $s1
88## stdout: []
89
90#### Word elision with non-whitespace IFS
91# Treated differently than the default IFS. What is the rule here?
92IFS='_'
93char='_'
94space=' '
95empty=''
96argv.py $char
97argv.py $space
98argv.py $empty
99## STDOUT:
100['']
101[' ']
102[]
103## END
104## BUG yash STDOUT:
105[]
106[' ']
107[]
108## END
109
110#### Leading/trailing word elision with non-whitespace IFS
111# This behavior is weird.
112IFS=_
113s1='_a_b_'
114argv.py $s1
115## stdout: ['', 'a', 'b']
116
117#### Leading ' ' vs leading ' _ '
118# This behavior is weird, but all shells agree.
119IFS='_ '
120s1='_ a b _ '
121s2=' a b _ '
122argv.py $s1
123argv.py $s2
124## STDOUT:
125['', 'a', 'b']
126['a', 'b']
127## END
128
129#### Multiple non-whitespace IFS chars.
130IFS=_-
131s1='a__b---c_d'
132argv.py $s1
133## stdout: ['a', '', 'b', '', '', 'c', 'd']
134
135#### IFS with whitespace and non-whitepace.
136# NOTE: Three delimiters means two empty words in the middle. No elision.
137IFS='_ '
138s1='a_b _ _ _ c _d e'
139argv.py $s1
140## stdout: ['a', 'b', '', '', 'c', 'd', 'e']
141
142#### empty $@ and $* is elided
143fun() { argv.py 1 $@ $* 2; }
144fun
145## stdout: ['1', '2']
146
147#### unquoted empty arg is elided
148empty=""
149argv.py 1 $empty 2
150## stdout: ['1', '2']
151
152#### unquoted whitespace arg is elided
153space=" "
154argv.py 1 $space 2
155## stdout: ['1', '2']
156
157#### empty literals are not elided
158space=" "
159argv.py 1 $space"" 2
160## stdout: ['1', '', '2']
161
162#### no splitting when IFS is empty
163IFS=""
164foo="a b"
165argv.py $foo
166## stdout: ['a b']
167
168#### default value can yield multiple words
169argv.py 1 ${undefined:-"2 3" "4 5"} 6
170## stdout: ['1', '2 3', '4 5', '6']
171
172#### default value can yield multiple words with part joining
173argv.py 1${undefined:-"2 3" "4 5"}6
174## stdout: ['12 3', '4 56']
175
176#### default value with unquoted IFS char
177IFS=_
178argv.py 1${undefined:-"2_3"x_x"4_5"}6
179## stdout: ['12_3x', 'x4_56']
180
181#### IFS empty doesn't do splitting
182IFS=''
183x=$(python2 -c 'print(" a b\tc\n")')
184argv.py $x
185## STDOUT:
186[' a b\tc']
187## END
188
189#### IFS unset behaves like $' \t\n'
190unset IFS
191x=$(python2 -c 'print(" a b\tc\n")')
192argv.py $x
193## STDOUT:
194['a', 'b', 'c']
195## END
196
197#### IFS='\'
198# NOTE: OSH fails this because of double backslash escaping issue!
199IFS='\'
200s='a\b'
201argv.py $s
202## STDOUT:
203['a', 'b']
204## END
205
206#### IFS='\ '
207# NOTE: OSH fails this because of double backslash escaping issue!
208# When IFS is \, then you're no longer using backslash escaping.
209IFS='\ '
210s='a\b \\ c d\'
211argv.py $s
212## STDOUT:
213['a', 'b', '', 'c', 'd']
214## END
215
216#### IFS characters are glob metacharacters
217IFS='* '
218s='a*b c'
219argv.py $s
220
221IFS='?'
222s='?x?y?z?'
223argv.py $s
224
225IFS='['
226s='[x[y[z['
227argv.py $s
228## STDOUT:
229['a', 'b', 'c']
230['', 'x', 'y', 'z']
231['', 'x', 'y', 'z']
232## END
233
234#### Trailing space
235argv.py 'Xec ho '
236argv.py X'ec ho '
237argv.py X"ec ho "
238## STDOUT:
239['Xec ho ']
240['Xec ho ']
241['Xec ho ']
242## END
243
244#### Empty IFS (regression for bug)
245IFS=
246echo ["$*"]
247set a b c
248echo ["$*"]
249## STDOUT:
250[]
251[abc]
252## END
253
254#### Unset IFS (regression for bug)
255set a b c
256unset IFS
257echo ["$*"]
258## STDOUT:
259[a b c]
260## END
261
262#### IFS=o (regression for bug)
263IFS=o
264echo hi
265## STDOUT:
266hi
267## END
268
269#### IFS and joining arrays
270IFS=:
271set -- x 'y z'
272argv.py "$@"
273argv.py $@
274argv.py "$*"
275argv.py $*
276## STDOUT:
277['x', 'y z']
278['x', 'y z']
279['x:y z']
280['x', 'y z']
281## END
282
283#### IFS and joining arrays by assignments
284IFS=:
285set -- x 'y z'
286
287s="$@"
288argv.py "$s"
289
290s=$@
291argv.py "$s"
292
293s="$*"
294argv.py "$s"
295
296s=$*
297argv.py "$s"
298
299# bash and mksh agree, but this doesn't really make sense to me.
300# In OSH, "$@" is the only real array, so that's why it behaves differently.
301
302## STDOUT:
303['x y z']
304['x y z']
305['x:y z']
306['x:y z']
307## END
308## BUG dash/ash/yash STDOUT:
309['x:y z']
310['x:y z']
311['x:y z']
312['x:y z']
313## END
314
315
316# TODO:
317# - unquoted args of whitespace are not elided (when IFS = null)
318# - empty quoted args are kept
319#
320# - $* $@ with empty IFS
321# - $* $@ with custom IFS
322#
323# - no splitting when IFS is empty
324# - word splitting removes leading and trailing whitespace
325
326# TODO: test framework needs common setup
327
328# Test IFS and $@ $* on all these
329#### TODO
330empty=""
331space=" "
332AB="A B"
333X="X"
334Yspaces=" Y "
335
336
337#### IFS='' with $@ and $* (bug #627)
338set -- a 'b c'
339IFS=''
340argv.py at $@
341argv.py star $*
342
343# zsh agrees
344## STDOUT:
345['at', 'a', 'b c']
346['star', 'a', 'b c']
347## END
348
349#### IFS='' with $@ and $* and printf (bug #627)
350set -- a 'b c'
351IFS=''
352printf '[%s]\n' $@
353printf '[%s]\n' $*
354## STDOUT:
355[a]
356[b c]
357[a]
358[b c]
359## END
360
361#### IFS='' with ${a[@]} and ${a[*]} (bug #627)
362case $SH in dash | ash) exit 0 ;; esac
363
364myarray=(a 'b c')
365IFS=''
366argv.py at ${myarray[@]}
367argv.py star ${myarray[*]}
368
369## STDOUT:
370['at', 'a', 'b c']
371['star', 'a', 'b c']
372## END
373## N-I dash/ash stdout-json: ""
374
375#### IFS='' with ${!prefix@} and ${!prefix*} (bug #627)
376case $SH in dash | mksh | ash | yash) exit 0 ;; esac
377
378gLwbmGzS_var1=1
379gLwbmGzS_var2=2
380IFS=''
381argv.py at ${!gLwbmGzS_@}
382argv.py star ${!gLwbmGzS_*}
383
384## STDOUT:
385['at', 'gLwbmGzS_var1', 'gLwbmGzS_var2']
386['star', 'gLwbmGzS_var1', 'gLwbmGzS_var2']
387## END
388## BUG bash STDOUT:
389['at', 'gLwbmGzS_var1', 'gLwbmGzS_var2']
390['star', 'gLwbmGzS_var1gLwbmGzS_var2']
391## END
392## N-I dash/mksh/ash/yash stdout-json: ""
393
394#### IFS='' with ${!a[@]} and ${!a[*]} (bug #627)
395case $SH in dash | mksh | ash | yash) exit 0 ;; esac
396
397IFS=''
398a=(v1 v2 v3)
399argv.py at ${!a[@]}
400argv.py star ${!a[*]}
401
402## STDOUT:
403['at', '0', '1', '2']
404['star', '0', '1', '2']
405## END
406## BUG bash STDOUT:
407['at', '0', '1', '2']
408['star', '0 1 2']
409## END
410## N-I dash/mksh/ash/yash stdout-json: ""
411
412#### Bug #628 split on : with : in literal word
413
414# 2025-03: What's the cause of this bug?
415#
416# OSH is very wrong here
417# ['a', '\\', 'b']
418# Is this a fundamental problem with the IFS state machine?
419# It definitely relates to the use of backslashes.
420# So we have at least 4 backslash bugs
421
422IFS=':'
423word='a:'
424argv.py ${word}:b
425argv.py ${word}:
426
427echo ---
428
429# Same thing happens for 'z'
430IFS='z'
431word='az'
432argv.py ${word}zb
433argv.py ${word}z
434## STDOUT:
435['a', ':b']
436['a', ':']
437---
438['a', 'zb']
439['a', 'z']
440## END
441
442#### Bug #698, similar crash
443var='\'
444set -f
445echo $var
446## STDOUT:
447\
448## END
449
450#### Bug #1664, \\ with noglob
451
452# Note that we're not changing IFS
453
454argv.py [\\]_
455argv.py "[\\]_"
456
457# TODO: no difference observed here, go back to original bug
458
459#argv.py [\\_
460#argv.py "[\\_"
461
462echo noglob
463
464# repeat cases with -f, noglob
465set -f
466
467argv.py [\\]_
468argv.py "[\\]_"
469
470#argv.py [\\_
471#argv.py "[\\_"
472
473## STDOUT:
474['[\\]_']
475['[\\]_']
476noglob
477['[\\]_']
478['[\\]_']
479## END
480
481
482#### Empty IFS bug #2141 (from pnut)
483
484res=0
485sum() {
486 # implement callee-save calling convention using `set`
487 # here, we save the value of $res after the function parameters
488 set $@ $res # $1 $2 $3 are now set
489 res=$(($1 + $2))
490 echo "$1 + $2 = $res"
491 res=$3 # restore the value of $res
492}
493
494unset IFS
495sum 12 30 # outputs "12 + 30 = 42"
496
497IFS=' '
498sum 12 30 # outputs "12 + 30 = 42"
499
500IFS=
501sum 12 30 # outputs "1230 + 0 = 1230"
502
503# I added this
504IFS=''
505sum 12 30
506
507set -u
508IFS=
509sum 12 30 # fails with "fatal: Undefined variable '2'" on res=$(($1 + $2))
510
511## STDOUT:
51212 + 30 = 42
51312 + 30 = 42
51412 + 30 = 42
51512 + 30 = 42
51612 + 30 = 42
517## END
518
519#### Unicode in IFS
520
521# bash, zsh, and yash support unicode in IFS, but dash/mksh/ash don't.
522
523# for zsh, though we're not testing it here
524setopt SH_WORD_SPLIT
525
526x=çx IFS=ç
527printf "<%s>\n" $x
528
529## STDOUT:
530<>
531<x>
532## END
533
534## BUG dash/mksh/ash STDOUT:
535<>
536<>
537<x>
538## END
539
540#### 4 x 3 table: (default IFS, IFS='', IFS=zx) x ( $* "$*" $@ "$@" )
541
542setopt SH_WORD_SPLIT # for zsh
543
544set -- 'a b' c ''
545
546# default IFS
547argv.py ' $* ' $*
548argv.py ' "$*" ' "$*"
549argv.py ' $@ ' $@
550argv.py ' "$@" ' "$@"
551echo
552
553IFS=''
554argv.py ' $* ' $*
555argv.py ' "$*" ' "$*"
556argv.py ' $@ ' $@
557argv.py ' "$@" ' "$@"
558echo
559
560IFS=zx
561argv.py ' $* ' $*
562argv.py ' "$*" ' "$*"
563argv.py ' $@ ' $@
564argv.py ' "$@" ' "$@"
565
566## STDOUT:
567[' $* ', 'a', 'b', 'c']
568[' "$*" ', 'a b c ']
569[' $@ ', 'a', 'b', 'c']
570[' "$@" ', 'a b', 'c', '']
571
572[' $* ', 'a b', 'c']
573[' "$*" ', 'a bc']
574[' $@ ', 'a b', 'c']
575[' "$@" ', 'a b', 'c', '']
576
577[' $* ', 'a b', 'c']
578[' "$*" ', 'a bzcz']
579[' $@ ', 'a b', 'c']
580[' "$@" ', 'a b', 'c', '']
581## END
582
583# zsh disagrees on
584# - $@ with default IFS an
585# - $@ with IFS=zx
586
587## BUG zsh STDOUT:
588[' $* ', 'a', 'b', 'c']
589[' "$*" ', 'a b c ']
590[' $@ ', 'a b', 'c']
591[' "$@" ', 'a b', 'c', '']
592
593[' $* ', 'a b', 'c']
594[' "$*" ', 'a bc']
595[' $@ ', 'a b', 'c']
596[' "$@" ', 'a b', 'c', '']
597
598[' $* ', 'a b', 'c', '']
599[' "$*" ', 'a bzcz']
600[' $@ ', 'a b', 'c']
601[' "$@" ', 'a b', 'c', '']
602## END
603
604## BUG yash STDOUT:
605[' $* ', 'a', 'b', 'c', '']
606[' "$*" ', 'a b c ']
607[' $@ ', 'a', 'b', 'c', '']
608[' "$@" ', 'a b', 'c', '']
609
610[' $* ', 'a b', 'c', '']
611[' "$*" ', 'a bc']
612[' $@ ', 'a b', 'c', '']
613[' "$@" ', 'a b', 'c', '']
614
615[' $* ', 'a b', 'c', '']
616[' "$*" ', 'a bzcz']
617[' $@ ', 'a b', 'c', '']
618[' "$@" ', 'a b', 'c', '']
619## END
620
621#### 4 x 3 table - with for loop
622case $SH in yash) exit ;; esac # no echo -n
623
624setopt SH_WORD_SPLIT # for zsh
625
626set -- 'a b' c ''
627
628# default IFS
629echo -n ' $* '; for i in $*; do echo -n ' '; echo -n -$i-; done; echo
630echo -n ' "$*" '; for i in "$*"; do echo -n ' '; echo -n -$i-; done; echo
631echo -n ' $@ '; for i in $@; do echo -n ' '; echo -n -$i-; done; echo
632echo -n ' "$@" '; for i in "$@"; do echo -n ' '; echo -n -$i-; done; echo
633echo
634
635IFS=''
636echo -n ' $* '; for i in $*; do echo -n ' '; echo -n -$i-; done; echo
637echo -n ' "$*" '; for i in "$*"; do echo -n ' '; echo -n -$i-; done; echo
638echo -n ' $@ '; for i in $@; do echo -n ' '; echo -n -$i-; done; echo
639echo -n ' "$@" '; for i in "$@"; do echo -n ' '; echo -n -$i-; done; echo
640echo
641
642IFS=zx
643echo -n ' $* '; for i in $*; do echo -n ' '; echo -n -$i-; done; echo
644echo -n ' "$*" '; for i in "$*"; do echo -n ' '; echo -n -$i-; done; echo
645echo -n ' $@ '; for i in $@; do echo -n ' '; echo -n -$i-; done; echo
646echo -n ' "$@" '; for i in "$@"; do echo -n ' '; echo -n -$i-; done; echo
647
648## STDOUT:
649 $* -a- -b- -c-
650 "$*" -a b c -
651 $@ -a- -b- -c-
652 "$@" -a b- -c- --
653
654 $* -a b- -c-
655 "$*" -a bc-
656 $@ -a b- -c-
657 "$@" -a b- -c- --
658
659 $* -a b- -c-
660 "$*" -a b c -
661 $@ -a b- -c-
662 "$@" -a b- -c- --
663## END
664
665## N-I yash STDOUT:
666## END
667
668#### IFS=x and '' and $@ - same bug as spec/toysh-posix case #12
669case $SH in yash) exit ;; esac # no echo -n
670
671setopt SH_WORD_SPLIT # for zsh
672
673set -- one '' two
674
675IFS=zx
676echo -n ' $* '; for i in $*; do echo -n ' '; echo -n -$i-; done; echo
677echo -n ' "$*" '; for i in "$*"; do echo -n ' '; echo -n -$i-; done; echo
678echo -n ' $@ '; for i in $@; do echo -n ' '; echo -n -$i-; done; echo
679echo -n ' "$@" '; for i in "$@"; do echo -n ' '; echo -n -$i-; done; echo
680
681argv.py ' $* ' $*
682argv.py ' "$*" ' "$*"
683argv.py ' $@ ' $@
684argv.py ' "$@" ' "$@"
685
686
687## OK bash/mksh STDOUT:
688 $* -one- -- -two-
689 "$*" -one two-
690 $@ -one- -- -two-
691 "$@" -one- -- -two-
692[' $* ', 'one', '', 'two']
693[' "$*" ', 'onezztwo']
694[' $@ ', 'one', '', 'two']
695[' "$@" ', 'one', '', 'two']
696## END
697
698## STDOUT:
699 $* -one- -two-
700 "$*" -one two-
701 $@ -one- -two-
702 "$@" -one- -- -two-
703[' $* ', 'one', 'two']
704[' "$*" ', 'onezztwo']
705[' $@ ', 'one', 'two']
706[' "$@" ', 'one', '', 'two']
707## END
708
709## N-I yash STDOUT:
710## END
711
712#### IFS=x and '' and $@ (#2)
713setopt SH_WORD_SPLIT # for zsh
714
715set -- "" "" "" "" ""
716argv.py =$@=
717argv.py =$*=
718echo
719
720IFS=
721argv.py =$@=
722argv.py =$*=
723echo
724
725IFS=x
726argv.py =$@=
727argv.py =$*=
728
729## STDOUT:
730['=', '=']
731['=', '=']
732
733['=', '=']
734['=', '=']
735
736['=', '=']
737['=', '=']
738## END
739
740## OK bash/mksh STDOUT:
741['=', '=']
742['=', '=']
743
744['=', '=']
745['=', '=']
746
747['=', '', '', '', '=']
748['=', '', '', '', '=']
749## END
750
751# yash-2.49 seems to behave in a strange way, but this behavior seems to have
752# been fixed at least in yash-2.57.
753
754## BUG yash STDOUT:
755['=', '', '', '', '=']
756['=', '', '', '', '=']
757
758['=', '', '', '', '=']
759['=', '', '', '', '=']
760
761['=', '', '', '', '=']
762['=', '', '', '', '=']
763## END
764
765#### IFS=x and '' and $@ (#3)
766setopt SH_WORD_SPLIT # for zsh
767
768IFS=x
769set -- "" "" "" "" ""
770
771argv.py $*
772set -- $*
773argv.py $*
774set -- $*
775argv.py $*
776set -- $*
777argv.py $*
778set -- $*
779argv.py $*
780
781## STDOUT:
782[]
783[]
784[]
785[]
786[]
787## END
788
789## OK bash STDOUT:
790['', '', '', '']
791['', '', '']
792['', '']
793['']
794[]
795## END
796
797## OK mksh STDOUT:
798['', '', '']
799['']
800[]
801[]
802[]
803## END
804
805## BUG zsh/yash STDOUT:
806['', '', '', '', '']
807['', '', '', '', '']
808['', '', '', '', '']
809['', '', '', '', '']
810['', '', '', '', '']
811## END
812
813#### ""$A"" - empty string on both sides - derived from spec/toysh-posix #15
814
815A=" abc def "
816
817argv.py $A
818argv.py ""$A""
819
820unset IFS
821
822argv.py $A
823argv.py ""$A""
824
825echo
826
827# Do the same thing in a for loop - this is IDENTICAL behavior
828
829for i in $A; do echo =$i=; done
830echo
831
832for i in ""$A""; do echo =$i=; done
833echo
834
835unset IFS
836
837for i in $A; do echo =$i=; done
838echo
839
840for i in ""$A""; do echo =$i=; done
841
842## STDOUT:
843['abc', 'def']
844['', 'abc', 'def', '']
845['abc', 'def']
846['', 'abc', 'def', '']
847
848=abc=
849=def=
850
851==
852=abc=
853=def=
854==
855
856=abc=
857=def=
858
859==
860=abc=
861=def=
862==
863## END