osh/sh_expr_eval.py

OILS / osh / sh_expr_eval.py View on Github | oilshell.org

1218 lines, 791 significant

1	#!/usr/bin/env python2
2	# Copyright 2016 Andy Chu. All rights reserved.
3	# Licensed under the Apache License, Version 2.0 (the "License");
4	# you may not use this file except in compliance with the License.
5	# You may obtain a copy of the License at
6	#
7	# http://www.apache.org/licenses/LICENSE-2.0
8	"""
9	sh_expr_eval.py -- Shell boolean and arithmetic expressions.
10	"""
11	from __future__ import print_function
12
13	from _devbuild.gen.id_kind_asdl import Id
14	from _devbuild.gen.runtime_asdl import scope_t
15	from _devbuild.gen.syntax_asdl import (
16	word_t,
17	CompoundWord,
18	Token,
19	loc,
20	loc_t,
21	source,
22	arith_expr,
23	arith_expr_e,
24	arith_expr_t,
25	bool_expr,
26	bool_expr_e,
27	bool_expr_t,
28	sh_lhs,
29	sh_lhs_e,
30	sh_lhs_t,
31	BracedVarSub,
32	)
33	from _devbuild.gen.option_asdl import option_i
34	from _devbuild.gen.types_asdl import bool_arg_type_e
35	from _devbuild.gen.value_asdl import (
36	value,
37	value_e,
38	value_t,
39	sh_lvalue,
40	sh_lvalue_e,
41	sh_lvalue_t,
42	LeftName,
43	eggex_ops,
44	regex_match,
45	RegexMatch,
46	)
47	from core import alloc
48	from core import error
49	from core.error import e_die, e_die_status, e_strict, e_usage
50	from core import num
51	from core import state
52	from display import ui
53	from core import util
54	from frontend import consts
55	from frontend import lexer
56	from frontend import location
57	from frontend import match
58	from frontend import reader
59	from mycpp import mops
60	from mycpp import mylib
61	from mycpp.mylib import log, tagswitch, switch, str_cmp
62	from osh import bool_stat
63	from osh import word_eval
64
65	import libc # for fnmatch
66	# Import these names directly because the C++ translation uses macros literally.
67	from libc import FNM_CASEFOLD, REG_ICASE
68
69	from typing import Tuple, Optional, cast, TYPE_CHECKING
70	if TYPE_CHECKING:
71	from core import optview
72	from frontend import parse_lib
73
74	_ = log
75
76	#
77	# Arith and Command/Word variants of assignment
78	#
79	# Calls EvalShellLhs()
80	# a[$key]=$val # osh/cmd_eval.py:814 (command_e.ShAssignment)
81	# Calls EvalArithLhs()
82	# (( a[key] = val )) # osh/sh_expr_eval.py:326 (_EvalLhsArith)
83	#
84	# Calls OldValue()
85	# a[$key]+=$val # osh/cmd_eval.py:795 (assign_op_e.PlusEqual)
86	# (( a[key] += val )) # osh/sh_expr_eval.py:308 (_EvalLhsAndLookupArith)
87	#
88	# RHS Indexing
89	# val=${a[$key]} # osh/word_eval.py:639 (bracket_op_e.ArrayIndex)
90	# (( val = a[key] )) # osh/sh_expr_eval.py:509 (Id.Arith_LBracket)
91	#
92
93
94	def OldValue(lval, mem, exec_opts):
95	# type: (sh_lvalue_t, state.Mem, Optional[optview.Exec]) -> value_t
96	"""Look up for augmented assignment.
97
98	For s+=val and (( i += 1 ))
99
100	Args:
101	lval: value we need to
102	exec_opts: can be None if we don't want to check set -u!
103	Because s+=val doesn't check it.
104
105	TODO: A stricter and less ambiguous version for YSH.
106	- Problem: why does sh_lvalue have Indexed and Keyed, while sh_lhs only has
107	IndexedName?
108	- should I have location.LName and sh_lvalue.Indexed only?
109	- and Indexed uses the index_t type?
110	- well that might be Str or Int
111	"""
112	assert isinstance(lval, sh_lvalue_t), lval
113
114	# TODO: refactor sh_lvalue_t to make this simpler
115	UP_lval = lval
116	with tagswitch(lval) as case:
117	if case(sh_lvalue_e.Var): # (( i++ ))
118	lval = cast(LeftName, UP_lval)
119	var_name = lval.name
120	elif case(sh_lvalue_e.Indexed): # (( a[i]++ ))
121	lval = cast(sh_lvalue.Indexed, UP_lval)
122	var_name = lval.name
123	elif case(sh_lvalue_e.Keyed): # (( A['K']++ )) ? I think this works
124	lval = cast(sh_lvalue.Keyed, UP_lval)
125	var_name = lval.name
126	else:
127	raise AssertionError()
128
129	val = mem.GetValue(var_name)
130	if exec_opts and exec_opts.nounset() and val.tag() == value_e.Undef:
131	e_die('Undefined variable %r' % var_name) # TODO: location info
132
133	UP_val = val
134	with tagswitch(lval) as case:
135	if case(sh_lvalue_e.Var):
136	return val
137
138	elif case(sh_lvalue_e.Indexed):
139	lval = cast(sh_lvalue.Indexed, UP_lval)
140
141	array_val = None # type: value.BashArray
142	with tagswitch(val) as case2:
143	if case2(value_e.Undef):
144	array_val = value.BashArray([])
145	elif case2(value_e.BashArray):
146	tmp = cast(value.BashArray, UP_val)
147	# mycpp rewrite: add tmp. cast() creates a new var in inner scope
148	array_val = tmp
149	else:
150	e_die("Can't use [] on value of type %s" % ui.ValType(val))
151
152	s = word_eval.GetArrayItem(array_val.strs, lval.index)
153
154	if s is None:
155	val = value.Str('') # NOTE: Other logic is value.Undef? 0?
156	else:
157	assert isinstance(s, str), s
158	val = value.Str(s)
159
160	elif case(sh_lvalue_e.Keyed):
161	lval = cast(sh_lvalue.Keyed, UP_lval)
162
163	assoc_val = None # type: value.BashAssoc
164	with tagswitch(val) as case2:
165	if case2(value_e.Undef):
166	# This never happens, because undef[x]+= is assumed to
167	raise AssertionError()
168	elif case2(value_e.BashAssoc):
169	tmp2 = cast(value.BashAssoc, UP_val)
170	# mycpp rewrite: add tmp. cast() creates a new var in inner scope
171	assoc_val = tmp2
172	else:
173	e_die("Can't use [] on value of type %s" % ui.ValType(val))
174
175	s = assoc_val.d.get(lval.key)
176	if s is None:
177	val = value.Str('')
178	else:
179	val = value.Str(s)
180
181	else:
182	raise AssertionError()
183
184	return val
185
186
187	# TODO: Should refactor for int/char-based processing
188	if mylib.PYTHON:
189
190	def IsLower(ch):
191	# type: (str) -> bool
192	return 'a' <= ch and ch <= 'z'
193
194	def IsUpper(ch):
195	# type: (str) -> bool
196	return 'A' <= ch and ch <= 'Z'
197
198
199	class UnsafeArith(object):
200	"""For parsing a[i] at RUNTIME."""
201
202	def __init__(
203	self,
204	mem, # type: state.Mem
205	exec_opts, # type: optview.Exec
206	mutable_opts, # type: state.MutableOpts
207	parse_ctx, # type: parse_lib.ParseContext
208	arith_ev, # type: ArithEvaluator
209	errfmt, # type: ui.ErrorFormatter
210	):
211	# type: (...) -> None
212	self.mem = mem
213	self.exec_opts = exec_opts
214	self.mutable_opts = mutable_opts
215	self.parse_ctx = parse_ctx
216	self.arith_ev = arith_ev
217	self.errfmt = errfmt
218
219	self.arena = self.parse_ctx.arena
220
221	def ParseLValue(self, s, location):
222	# type: (str, loc_t) -> sh_lvalue_t
223	"""Parse sh_lvalue for 'unset' and 'printf -v'.
224
225	It uses the arith parser, so it behaves like the LHS of (( a[i] = x ))
226	"""
227	if not self.parse_ctx.parse_opts.parse_sh_arith():
228	# Do something simpler for YSH
229	if not match.IsValidVarName(s):
230	e_die('Invalid variable name %r (parse_sh_arith is off)' % s,
231	location)
232	return LeftName(s, location)
233
234	a_parser = self.parse_ctx.MakeArithParser(s)
235
236	with alloc.ctx_SourceCode(self.arena,
237	source.ArgvWord('dynamic LHS', location)):
238	try:
239	anode = a_parser.Parse()
240	except error.Parse as e:
241	self.errfmt.PrettyPrintError(e)
242	# Exception for builtins 'unset' and 'printf'
243	e_usage('got invalid LHS expression', location)
244
245	# Note: we parse '1+2', and then it becomes a runtime error because
246	# it's not a valid LHS. Could be a parse error.
247
248	if self.exec_opts.eval_unsafe_arith():
249	lval = self.arith_ev.EvalArithLhs(anode)
250	else:
251	# Prevent attacks like these by default:
252	#
253	# unset -v 'A["$(echo K; rm *)"]'
254	with state.ctx_Option(self.mutable_opts,
255	[option_i._allow_command_sub], False):
256	lval = self.arith_ev.EvalArithLhs(anode)
257
258	return lval
259
260	def ParseVarRef(self, ref_str, blame_tok):
261	# type: (str, Token) -> BracedVarSub
262	"""Parse and evaluate value for ${!ref}
263
264	This supports:
265	- 0 to 9 for $0 to $9
266	- @ for "$@" etc.
267
268	See grammar in osh/word_parse.py, which is related to grammar in
269	osh/word_parse.py _ReadBracedVarSub
270
271	Note: declare -n allows 'varname' and 'varname[i]' and 'varname[@]', but it
272	does NOT allow 0 to 9, @, *
273
274	NamerefExpr = NAME Subscript? # this allows @ and * too
275
276	_ResolveNameOrRef currently gives you a 'cell'. So it might not support
277	sh_lvalue.Indexed?
278	"""
279	line_reader = reader.StringLineReader(ref_str, self.arena)
280	lexer = self.parse_ctx.MakeLexer(line_reader)
281	w_parser = self.parse_ctx.MakeWordParser(lexer, line_reader)
282
283	src = source.VarRef(blame_tok)
284	with alloc.ctx_SourceCode(self.arena, src):
285	try:
286	bvs_part = w_parser.ParseVarRef()
287	except error.Parse as e:
288	# This prints the inner location
289	self.errfmt.PrettyPrintError(e)
290
291	# this affects builtins 'unset' and 'printf'
292	e_die("Invalid var ref expression", blame_tok)
293
294	return bvs_part
295
296
297	class ArithEvaluator(object):
298	"""Shared between arith and bool evaluators.
299
300	They both:
301
302	1. Convert strings to integers, respecting shopt -s strict_arith.
303	2. Look up variables and evaluate words.
304	"""
305
306	def __init__(
307	self,
308	mem, # type: state.Mem
309	exec_opts, # type: optview.Exec
310	mutable_opts, # type: state.MutableOpts
311	parse_ctx, # type: Optional[parse_lib.ParseContext]
312	errfmt, # type: ui.ErrorFormatter
313	):
314	# type: (...) -> None
315	self.word_ev = None # type: word_eval.StringWordEvaluator
316	self.mem = mem
317	self.exec_opts = exec_opts
318	self.mutable_opts = mutable_opts
319	self.parse_ctx = parse_ctx
320	self.errfmt = errfmt
321
322	def CheckCircularDeps(self):
323	# type: () -> None
324	assert self.word_ev is not None
325
326	def _StringToBigInt(self, s, blame_loc):
327	# type: (str, loc_t) -> mops.BigInt
328	"""Use bash-like rules to coerce a string to an integer.
329
330	Runtime parsing enables silly stuff like $(( $(echo 1)$(echo 2) + 1 )) => 13
331
332	0xAB -- hex constant
333	042 -- octal constant
334	42 -- decimal constant
335	64#z -- arbitrary base constant
336
337	bare word: variable
338	quoted word: string (not done?)
339	"""
340	m = util.RegexSearch('^0x([0-9A-Fa-f]+)$', s)
341	if m is not None:
342	try:
343	integer = mops.FromStr(m[1], 16)
344	except ValueError:
345	e_strict('Invalid hex constant %r' % s, blame_loc)
346	# TODO: don't truncate
347	return integer
348
349	m = util.RegexSearch('^0([0-7]+)$', s)
350	if m is not None:
351	try:
352	integer = mops.FromStr(s, 8)
353	except ValueError:
354	e_strict('Invalid octal constant %r' % s, blame_loc)
355	return integer
356
357	# Base specifier cannot start with a zero
358	m = util.RegexSearch('^([1-9][0-9]*)#([0-9a-zA-Z@_]+)$', s)
359	if m is not None:
360	b = m[1]
361	try:
362	base = int(b) # machine integer, not BigInt
363	except ValueError:
364	# Unreachable per the regex validation above
365	raise AssertionError()
366
367	integer = mops.ZERO
368	digits = m[2]
369	for ch in digits:
370	if IsLower(ch):
371	digit = ord(ch) - ord('a') + 10
372	elif IsUpper(ch):
373	digit = ord(ch) - ord('A') + 36
374	elif ch == '@': # horrible syntax
375	digit = 62
376	elif ch == '_':
377	digit = 63
378	elif ch.isdigit():
379	digit = int(ch)
380	else:
381	# Unreachable per the regex validation above
382	raise AssertionError()
383
384	if digit >= base:
385	e_die(
386	'Digits %r out of range for base %d' % (digits, base),
387	blame_loc)
388
389	#integer = integer * base + digit
390	integer = mops.Add(mops.Mul(integer, mops.BigInt(base)),
391	mops.BigInt(digit))
392	return integer
393
394	# Note: decimal integers cannot have a leading zero
395	m = util.RegexSearch('^([1-9][0-9]*\|0)$', s)
396	if m is not None:
397	# Normal base 10 integer.
398	return mops.FromStr(s)
399
400	# Doesn't look like an integer
401
402	# note: 'test' and '[' never evaluate recursively
403	if self.parse_ctx:
404	arena = self.parse_ctx.arena
405
406	# Special case so we don't get EOF error
407	if len(s.strip()) == 0:
408	return mops.ZERO
409
410	# For compatibility: Try to parse it as an expression and evaluate it.
411	a_parser = self.parse_ctx.MakeArithParser(s)
412
413	# TODO: Fill in the variable name
414	with alloc.ctx_SourceCode(arena,
415	source.Variable(None, blame_loc)):
416	try:
417	node2 = a_parser.Parse() # may raise error.Parse
418	except error.Parse as e:
419	self.errfmt.PrettyPrintError(e)
420	e_die('Parse error in recursive arithmetic',
421	e.location)
422
423	# Prevent infinite recursion of $(( 1x )) -- it's a word that evaluates
424	# to itself, and you don't want to reparse it as a word.
425	if node2.tag() == arith_expr_e.Word:
426	e_die("Invalid integer constant %r" % s, blame_loc)
427
428	if self.exec_opts.eval_unsafe_arith():
429	integer = self.EvalToBigInt(node2)
430	else:
431	# BoolEvaluator doesn't have parse_ctx or mutable_opts
432	assert self.mutable_opts is not None
433
434	# We don't need to flip _allow_process_sub, because they can't be
435	# parsed. See spec/bugs.test.sh.
436	with state.ctx_Option(self.mutable_opts,
437	[option_i._allow_command_sub],
438	False):
439	integer = self.EvalToBigInt(node2)
440
441	else:
442	if len(s.strip()) == 0 or match.IsValidVarName(s):
443	# x42 could evaluate to 0
444	e_strict("Invalid integer constant %r" % s, blame_loc)
445	else:
446	# 42x is always fatal!
447	e_die("Invalid integer constant %r" % s, blame_loc)
448
449	return integer
450
451	def _ValToIntOrError(self, val, blame):
452	# type: (value_t, arith_expr_t) -> mops.BigInt
453	try:
454	UP_val = val
455	with tagswitch(val) as case:
456	if case(value_e.Undef):
457	# 'nounset' already handled before got here
458	# Happens upon a[undefined]=42, which unfortunately turns into a[0]=42.
459	e_strict('Undefined value in arithmetic context',
460	loc.Arith(blame))
461
462	elif case(value_e.Int):
463	val = cast(value.Int, UP_val)
464	return val.i
465
466	elif case(value_e.Str):
467	val = cast(value.Str, UP_val)
468	# calls e_strict
469	return self._StringToBigInt(val.s, loc.Arith(blame))
470
471	except error.Strict as e:
472	if self.exec_opts.strict_arith():
473	raise
474	else:
475	return mops.ZERO
476
477	# Arrays and associative arrays always fail -- not controlled by
478	# strict_arith.
479	# In bash, (( a )) is like (( a[0] )), but I don't want that.
480	# And returning '0' gives different results.
481	e_die(
482	"Expected a value convertible to integer, got %s" %
483	ui.ValType(val), loc.Arith(blame))
484
485	def _EvalLhsAndLookupArith(self, node):
486	# type: (arith_expr_t) -> Tuple[mops.BigInt, sh_lvalue_t]
487	""" For x = y and x += y and ++x """
488
489	lval = self.EvalArithLhs(node)
490	val = OldValue(lval, self.mem, self.exec_opts)
491
492	# BASH_LINENO, arr (array name without strict_array), etc.
493	if (val.tag() in (value_e.BashArray, value_e.BashAssoc) and
494	lval.tag() == sh_lvalue_e.Var):
495	named_lval = cast(LeftName, lval)
496	if word_eval.ShouldArrayDecay(named_lval.name, self.exec_opts):
497	if val.tag() == value_e.BashArray:
498	lval = sh_lvalue.Indexed(named_lval.name, 0, loc.Missing)
499	elif val.tag() == value_e.BashAssoc:
500	lval = sh_lvalue.Keyed(named_lval.name, '0', loc.Missing)
501	val = word_eval.DecayArray(val)
502
503	# This error message could be better, but we already have one
504	#if val.tag() == value_e.BashArray:
505	# e_die("Can't use assignment like ++ or += on arrays")
506
507	i = self._ValToIntOrError(val, node)
508	return i, lval
509
510	def _Store(self, lval, new_int):
511	# type: (sh_lvalue_t, mops.BigInt) -> None
512	val = value.Str(mops.ToStr(new_int))
513	state.OshLanguageSetValue(self.mem, lval, val)
514
515	def EvalToBigInt(self, node):
516	# type: (arith_expr_t) -> mops.BigInt
517	"""Used externally by ${a[i+1]} and ${a:start:len}.
518
519	Also used internally.
520	"""
521	val = self.Eval(node)
522
523	# BASH_LINENO, arr (array name without strict_array), etc.
524	if (val.tag() in (value_e.BashArray, value_e.BashAssoc) and
525	node.tag() == arith_expr_e.VarSub):
526	vsub = cast(Token, node)
527	if word_eval.ShouldArrayDecay(lexer.LazyStr(vsub), self.exec_opts):
528	val = word_eval.DecayArray(val)
529
530	i = self._ValToIntOrError(val, node)
531	return i
532
533	def EvalToInt(self, node):
534	# type: (arith_expr_t) -> int
535	return mops.BigTruncate(self.EvalToBigInt(node))
536
537	def Eval(self, node):
538	# type: (arith_expr_t) -> value_t
539	"""
540	Returns:
541	None for Undef (e.g. empty cell) TODO: Don't return 0!
542	int for Str
543	List[int] for BashArray
544	Dict[str, str] for BashAssoc (TODO: Should we support this?)
545
546	NOTE: (( A['x'] = 'x' )) and (( x = A['x'] )) are syntactically valid in
547	bash, but don't do what you'd think. 'x' sometimes a variable name and
548	sometimes a key.
549	"""
550	# OSH semantics: Variable NAMES cannot be formed dynamically; but INTEGERS
551	# can. ${foo:-3}4 is OK. $? will be a compound word too, so we don't have
552	# to handle that as a special case.
553
554	UP_node = node
555	with tagswitch(node) as case:
556	if case(arith_expr_e.EmptyZero): # $(( ))
557	return value.Int(mops.ZERO) # Weird axiom
558
559	elif case(arith_expr_e.EmptyOne): # for (( ; ; ))
560	return value.Int(mops.ONE)
561
562	elif case(arith_expr_e.VarSub): # $(( x )) (can be array)
563	vsub = cast(Token, UP_node)
564	var_name = lexer.LazyStr(vsub)
565	val = self.mem.GetValue(var_name)
566	if val.tag() == value_e.Undef and self.exec_opts.nounset():
567	e_die('Undefined variable %r' % var_name, vsub)
568	return val
569
570	elif case(arith_expr_e.Word): # $(( $x )) $(( ${x}${y} )), etc.
571	w = cast(CompoundWord, UP_node)
572	return self.word_ev.EvalWordToString(w)
573
574	elif case(arith_expr_e.UnaryAssign): # a++
575	node = cast(arith_expr.UnaryAssign, UP_node)
576
577	op_id = node.op_id
578	old_big, lval = self._EvalLhsAndLookupArith(node.child)
579
580	if op_id == Id.Node_PostDPlus: # post-increment
581	new_big = mops.Add(old_big, mops.ONE)
582	result = old_big
583
584	elif op_id == Id.Node_PostDMinus: # post-decrement
585	new_big = mops.Sub(old_big, mops.ONE)
586	result = old_big
587
588	elif op_id == Id.Arith_DPlus: # pre-increment
589	new_big = mops.Add(old_big, mops.ONE)
590	result = new_big
591
592	elif op_id == Id.Arith_DMinus: # pre-decrement
593	new_big = mops.Sub(old_big, mops.ONE)
594	result = new_big
595
596	else:
597	raise AssertionError(op_id)
598
599	self._Store(lval, new_big)
600	return value.Int(result)
601
602	elif case(arith_expr_e.BinaryAssign): # a=1, a+=5, a[1]+=5
603	node = cast(arith_expr.BinaryAssign, UP_node)
604	op_id = node.op_id
605
606	if op_id == Id.Arith_Equal:
607	# Don't really need a span ID here, because tdop.CheckLhsExpr should
608	# have done all the validation.
609	lval = self.EvalArithLhs(node.left)
610	rhs_big = self.EvalToBigInt(node.right)
611
612	self._Store(lval, rhs_big)
613	return value.Int(rhs_big)
614
615	old_big, lval = self._EvalLhsAndLookupArith(node.left)
616	rhs_big = self.EvalToBigInt(node.right)
617
618	if op_id == Id.Arith_PlusEqual:
619	new_big = mops.Add(old_big, rhs_big)
620	elif op_id == Id.Arith_MinusEqual:
621	new_big = mops.Sub(old_big, rhs_big)
622	elif op_id == Id.Arith_StarEqual:
623	new_big = mops.Mul(old_big, rhs_big)
624
625	elif op_id == Id.Arith_SlashEqual:
626	if mops.Equal(rhs_big, mops.ZERO):
627	e_die('Divide by zero') # TODO: location
628	new_big = mops.Div(old_big, rhs_big)
629
630	elif op_id == Id.Arith_PercentEqual:
631	if mops.Equal(rhs_big, mops.ZERO):
632	e_die('Divide by zero') # TODO: location
633	new_big = mops.Rem(old_big, rhs_big)
634
635	elif op_id == Id.Arith_DGreatEqual:
636	new_big = mops.RShift(old_big, rhs_big)
637	elif op_id == Id.Arith_DLessEqual:
638	new_big = mops.LShift(old_big, rhs_big)
639	elif op_id == Id.Arith_AmpEqual:
640	new_big = mops.BitAnd(old_big, rhs_big)
641	elif op_id == Id.Arith_PipeEqual:
642	new_big = mops.BitOr(old_big, rhs_big)
643	elif op_id == Id.Arith_CaretEqual:
644	new_big = mops.BitXor(old_big, rhs_big)
645	else:
646	raise AssertionError(op_id) # shouldn't get here
647
648	self._Store(lval, new_big)
649	return value.Int(new_big)
650
651	elif case(arith_expr_e.Unary):
652	node = cast(arith_expr.Unary, UP_node)
653	op_id = node.op_id
654
655	i = self.EvalToBigInt(node.child)
656
657	if op_id == Id.Node_UnaryPlus: # +i
658	result = i
659	elif op_id == Id.Node_UnaryMinus: # -i
660	result = mops.Sub(mops.ZERO, i)
661
662	elif op_id == Id.Arith_Bang: # logical negation
663	if mops.Equal(i, mops.ZERO):
664	result = mops.ONE
665	else:
666	result = mops.ZERO
667	elif op_id == Id.Arith_Tilde: # bitwise complement
668	result = mops.BitNot(i)
669	else:
670	raise AssertionError(op_id) # shouldn't get here
671
672	return value.Int(result)
673
674	elif case(arith_expr_e.Binary):
675	node = cast(arith_expr.Binary, UP_node)
676	op_id = node.op.id
677
678	# Short-circuit evaluation for \|\| and &&.
679	if op_id == Id.Arith_DPipe:
680	lhs_big = self.EvalToBigInt(node.left)
681	if mops.Equal(lhs_big, mops.ZERO):
682	rhs_big = self.EvalToBigInt(node.right)
683	if mops.Equal(rhs_big, mops.ZERO):
684	result = mops.ZERO # false
685	else:
686	result = mops.ONE # true
687	else:
688	result = mops.ONE # true
689	return value.Int(result)
690
691	if op_id == Id.Arith_DAmp:
692	lhs_big = self.EvalToBigInt(node.left)
693	if mops.Equal(lhs_big, mops.ZERO):
694	result = mops.ZERO # false
695	else:
696	rhs_big = self.EvalToBigInt(node.right)
697	if mops.Equal(rhs_big, mops.ZERO):
698	result = mops.ZERO # false
699	else:
700	result = mops.ONE # true
701	return value.Int(result)
702
703	if op_id == Id.Arith_LBracket:
704	# NOTE: Similar to bracket_op_e.ArrayIndex in osh/word_eval.py
705
706	left = self.Eval(node.left)
707	UP_left = left
708	with tagswitch(left) as case:
709	if case(value_e.BashArray):
710	array_val = cast(value.BashArray, UP_left)
711	small_i = mops.BigTruncate(
712	self.EvalToBigInt(node.right))
713	s = word_eval.GetArrayItem(array_val.strs, small_i)
714
715	elif case(value_e.BashAssoc):
716	left = cast(value.BashAssoc, UP_left)
717	key = self.EvalWordToString(node.right)
718	s = left.d.get(key)
719
720	elif case(value_e.Str):
721	left = cast(value.Str, UP_left)
722	if self.exec_opts.strict_arith():
723	e_die(
724	"Value of type Str can't be indexed (strict_arith)",
725	node.op)
726	index = self.EvalToBigInt(node.right)
727	# s[0] evaluates to s
728	# s[1] evaluates to Undef
729	s = left.s if mops.Equal(index,
730	mops.ZERO) else None
731
732	elif case(value_e.Undef):
733	if self.exec_opts.strict_arith():
734	e_die(
735	"Value of type Undef can't be indexed (strict_arith)",
736	node.op)
737	s = None # value.Undef
738
739	# There isn't a way to distinguish Undef vs. empty
740	# string, even with set -o nounset?
741	# s = ''
742
743	else:
744	# TODO: Add error context
745	e_die(
746	"Value of type %s can't be indexed" %
747	ui.ValType(left), node.op)
748
749	if s is None:
750	val = value.Undef
751	else:
752	val = value.Str(s)
753
754	return val
755
756	if op_id == Id.Arith_Comma:
757	self.EvalToBigInt(node.left) # throw away result
758	result = self.EvalToBigInt(node.right)
759	return value.Int(result)
760
761	# Rest are integers
762	lhs_big = self.EvalToBigInt(node.left)
763	rhs_big = self.EvalToBigInt(node.right)
764
765	if op_id == Id.Arith_Plus:
766	result = mops.Add(lhs_big, rhs_big)
767	elif op_id == Id.Arith_Minus:
768	result = mops.Sub(lhs_big, rhs_big)
769	elif op_id == Id.Arith_Star:
770	result = mops.Mul(lhs_big, rhs_big)
771	elif op_id == Id.Arith_Slash:
772	if mops.Equal(rhs_big, mops.ZERO):
773	e_die('Divide by zero', node.op)
774	result = mops.Div(lhs_big, rhs_big)
775
776	elif op_id == Id.Arith_Percent:
777	if mops.Equal(rhs_big, mops.ZERO):
778	e_die('Divide by zero', node.op)
779	result = mops.Rem(lhs_big, rhs_big)
780
781	elif op_id == Id.Arith_DStar:
782	if mops.Greater(mops.ZERO, rhs_big):
783	e_die("Exponent can't be a negative number",
784	loc.Arith(node.right))
785	result = num.Exponent(lhs_big, rhs_big)
786
787	elif op_id == Id.Arith_DEqual:
788	result = mops.FromBool(mops.Equal(lhs_big, rhs_big))
789	elif op_id == Id.Arith_NEqual:
790	result = mops.FromBool(not mops.Equal(lhs_big, rhs_big))
791	elif op_id == Id.Arith_Great:
792	result = mops.FromBool(mops.Greater(lhs_big, rhs_big))
793	elif op_id == Id.Arith_GreatEqual:
794	result = mops.FromBool(
795	mops.Greater(lhs_big, rhs_big) or
796	mops.Equal(lhs_big, rhs_big))
797	elif op_id == Id.Arith_Less:
798	result = mops.FromBool(mops.Greater(rhs_big, lhs_big))
799	elif op_id == Id.Arith_LessEqual:
800	result = mops.FromBool(
801	mops.Greater(rhs_big, lhs_big) or
802	mops.Equal(lhs_big, rhs_big))
803
804	elif op_id == Id.Arith_Pipe:
805	result = mops.BitOr(lhs_big, rhs_big)
806	elif op_id == Id.Arith_Amp:
807	result = mops.BitAnd(lhs_big, rhs_big)
808	elif op_id == Id.Arith_Caret:
809	result = mops.BitXor(lhs_big, rhs_big)
810
811	# Note: how to define shift of negative numbers?
812	elif op_id == Id.Arith_DLess:
813	if mops.Greater(mops.ZERO, rhs_big): # rhs_big < 0
814	raise error.Expr("Can't left shift by negative number",
815	node.op)
816	result = mops.LShift(lhs_big, rhs_big)
817	elif op_id == Id.Arith_DGreat:
818	if mops.Greater(mops.ZERO, rhs_big): # rhs_big < 0
819	raise error.Expr(
820	"Can't right shift by negative number", node.op)
821	result = mops.RShift(lhs_big, rhs_big)
822	else:
823	raise AssertionError(op_id)
824
825	return value.Int(result)
826
827	elif case(arith_expr_e.TernaryOp):
828	node = cast(arith_expr.TernaryOp, UP_node)
829
830	cond = self.EvalToBigInt(node.cond)
831	if mops.Equal(cond, mops.ZERO):
832	return self.Eval(node.false_expr)
833	else:
834	return self.Eval(node.true_expr)
835
836	else:
837	raise AssertionError(node.tag())
838
839	raise AssertionError('for -Wreturn-type in C++')
840
841	def EvalWordToString(self, node, blame_loc=loc.Missing):
842	# type: (arith_expr_t, loc_t) -> str
843	"""
844	Raises:
845	error.FatalRuntime if the expression isn't a string
846	or if it contains a bare variable like a[x]
847
848	These are allowed because they're unambiguous, unlike a[x]
849
850	a[$x] a["$x"] a["x"] a['x']
851	"""
852	UP_node = node
853	if node.tag() == arith_expr_e.Word: # $(( $x )) $(( ${x}${y} )), etc.
854	w = cast(CompoundWord, UP_node)
855	val = self.word_ev.EvalWordToString(w)
856	return val.s
857	else:
858	# A[x] is the "Parsing Bash is Undecidable" problem
859	# It is a string or var name?
860	# (It's parsed as arith_expr.VarSub)
861	e_die(
862	"Assoc array keys must be strings: $x 'x' \"$x\" etc. (OILS-ERR-101)",
863	blame_loc)
864
865	def EvalShellLhs(self, node, which_scopes):
866	# type: (sh_lhs_t, scope_t) -> sh_lvalue_t
867	"""Evaluate a shell LHS expression
868
869	For a=b and a[x]=b etc.
870	"""
871	assert isinstance(node, sh_lhs_t), node
872
873	UP_node = node
874	lval = None # type: sh_lvalue_t
875	with tagswitch(node) as case:
876	if case(sh_lhs_e.Name): # a=x
877	node = cast(sh_lhs.Name, UP_node)
878	assert node.name is not None
879
880	lval1 = LeftName(node.name, node.left)
881	lval = lval1
882
883	elif case(sh_lhs_e.IndexedName): # a[1+2]=x
884	node = cast(sh_lhs.IndexedName, UP_node)
885	assert node.name is not None
886
887	if self.mem.IsBashAssoc(node.name):
888	key = self.EvalWordToString(node.index,
889	blame_loc=node.left)
890	# node.left points to A[ in A[x]=1
891	lval2 = sh_lvalue.Keyed(node.name, key, node.left)
892	lval = lval2
893	else:
894	index = mops.BigTruncate(self.EvalToBigInt(node.index))
895	lval3 = sh_lvalue.Indexed(node.name, index, node.left)
896	lval = lval3
897
898	else:
899	raise AssertionError(node.tag())
900
901	return lval
902
903	def _VarNameOrWord(self, anode):
904	# type: (arith_expr_t) -> Tuple[Optional[str], loc_t]
905	"""
906	Returns a variable name if the arith node can be interpreted that way.
907	"""
908	UP_anode = anode
909	with tagswitch(anode) as case:
910	if case(arith_expr_e.VarSub):
911	tok = cast(Token, UP_anode)
912	return (lexer.LazyStr(tok), tok)
913
914	elif case(arith_expr_e.Word):
915	w = cast(CompoundWord, UP_anode)
916	var_name = self.EvalWordToString(w)
917	return (var_name, w)
918
919	no_str = None # type: str
920	return (no_str, loc.Missing)
921
922	def EvalArithLhs(self, anode):
923	# type: (arith_expr_t) -> sh_lvalue_t
924	"""
925	For (( a[x] = 1 )) etc.
926	"""
927	UP_anode = anode
928	if anode.tag() == arith_expr_e.Binary:
929	anode = cast(arith_expr.Binary, UP_anode)
930	if anode.op.id == Id.Arith_LBracket:
931	var_name, blame_loc = self._VarNameOrWord(anode.left)
932
933	# (( 1[2] = 3 )) isn't valid
934	if not match.IsValidVarName(var_name):
935	e_die('Invalid variable name %r' % var_name, blame_loc)
936
937	if var_name is not None:
938	if self.mem.IsBashAssoc(var_name):
939	arith_loc = location.TokenForArith(anode)
940	key = self.EvalWordToString(anode.right,
941	blame_loc=arith_loc)
942	return sh_lvalue.Keyed(var_name, key, blame_loc)
943	else:
944	index = mops.BigTruncate(self.EvalToBigInt(
945	anode.right))
946	return sh_lvalue.Indexed(var_name, index, blame_loc)
947
948	var_name, blame_loc = self._VarNameOrWord(anode)
949	if var_name is not None:
950	return LeftName(var_name, blame_loc)
951
952	# e.g. unset 'x-y'. status 2 for runtime parse error
953	e_die_status(2, 'Invalid LHS to modify', blame_loc)
954
955
956	class BoolEvaluator(ArithEvaluator):
957	"""This is also an ArithEvaluator because it has to understand.
958
959	[[ x -eq 3 ]]
960
961	where x='1+2'
962	"""
963
964	def __init__(
965	self,
966	mem, # type: state.Mem
967	exec_opts, # type: optview.Exec
968	mutable_opts, # type: Optional[state.MutableOpts]
969	parse_ctx, # type: Optional[parse_lib.ParseContext]
970	errfmt, # type: ui.ErrorFormatter
971	always_strict=False # type: bool
972	):
973	# type: (...) -> None
974	ArithEvaluator.__init__(self, mem, exec_opts, mutable_opts, parse_ctx,
975	errfmt)
976	self.always_strict = always_strict
977
978	def _IsDefined(self, s, blame_loc):
979	# type: (str, loc_t) -> bool
980
981	m = util.RegexSearch(consts.TEST_V_RE, s)
982	if m is None:
983	if self.exec_opts.strict_word_eval():
984	e_die('-v expected name or name[index]', blame_loc)
985	return False
986
987	var_name = m[1]
988	index_str = m[3]
989
990	val = self.mem.GetValue(var_name)
991	if len(index_str) == 0: # it's just a variable name
992	return val.tag() != value_e.Undef
993
994	UP_val = val
995	with tagswitch(val) as case:
996	if case(value_e.BashArray):
997	val = cast(value.BashArray, UP_val)
998
999	# TODO: use mops.BigStr
1000	try:
1001	index = int(index_str)
1002	except ValueError as e:
1003	if self.exec_opts.strict_word_eval():
1004	e_die(
1005	'-v got BashArray and invalid index %r' %
1006	index_str, blame_loc)
1007	return False
1008
1009	if index < 0:
1010	if self.exec_opts.strict_word_eval():
1011	e_die('-v got invalid negative index %s' % index_str,
1012	blame_loc)
1013	return False
1014
1015	if index < len(val.strs):
1016	return val.strs[index] is not None
1017
1018	# out of range
1019	return False
1020
1021	elif case(value_e.BashAssoc):
1022	val = cast(value.BashAssoc, UP_val)
1023	return index_str in val.d
1024
1025	else:
1026	# work around mycpp bug! parses as 'elif'
1027	pass
1028
1029	if self.exec_opts.strict_word_eval():
1030	raise error.TypeErr(val, 'Expected BashArray or BashAssoc',
1031	blame_loc)
1032	return False
1033	raise AssertionError()
1034
1035	def _StringToBigIntOrError(self, s, blame_word=None):
1036	# type: (str, Optional[word_t]) -> mops.BigInt
1037	"""Used by both [[ $x -gt 3 ]] and (( $x ))."""
1038	if blame_word:
1039	location = loc.Word(blame_word) # type: loc_t
1040	else:
1041	location = loc.Missing
1042
1043	try:
1044	i = self._StringToBigInt(s, location)
1045	except error.Strict as e:
1046	if self.always_strict or self.exec_opts.strict_arith():
1047	raise
1048	else:
1049	i = mops.ZERO
1050	return i
1051
1052	def _EvalCompoundWord(self, word, eval_flags=0):
1053	# type: (word_t, int) -> str
1054	val = self.word_ev.EvalWordToString(word, eval_flags)
1055	return val.s
1056
1057	def EvalB(self, node):
1058	# type: (bool_expr_t) -> bool
1059
1060	UP_node = node
1061	with tagswitch(node) as case:
1062	if case(bool_expr_e.WordTest):
1063	node = cast(bool_expr.WordTest, UP_node)
1064	s = self._EvalCompoundWord(node.w)
1065	return bool(s)
1066
1067	elif case(bool_expr_e.LogicalNot):
1068	node = cast(bool_expr.LogicalNot, UP_node)
1069	b = self.EvalB(node.child)
1070	return not b
1071
1072	elif case(bool_expr_e.LogicalAnd):
1073	node = cast(bool_expr.LogicalAnd, UP_node)
1074	# Short-circuit evaluation
1075	if self.EvalB(node.left):
1076	return self.EvalB(node.right)
1077	else:
1078	return False
1079
1080	elif case(bool_expr_e.LogicalOr):
1081	node = cast(bool_expr.LogicalOr, UP_node)
1082	if self.EvalB(node.left):
1083	return True
1084	else:
1085	return self.EvalB(node.right)
1086
1087	elif case(bool_expr_e.Unary):
1088	node = cast(bool_expr.Unary, UP_node)
1089	op_id = node.op_id
1090	s = self._EvalCompoundWord(node.child)
1091
1092	# Now dispatch on arg type. (arg_type could be static in the
1093	# LST?)
1094	arg_type = consts.BoolArgType(op_id)
1095
1096	if arg_type == bool_arg_type_e.Path:
1097	return bool_stat.DoUnaryOp(op_id, s)
1098
1099	if arg_type == bool_arg_type_e.Str:
1100	if op_id == Id.BoolUnary_z:
1101	return not bool(s)
1102	if op_id == Id.BoolUnary_n:
1103	return bool(s)
1104
1105	raise AssertionError(op_id) # should never happen
1106
1107	if arg_type == bool_arg_type_e.Other:
1108	if op_id == Id.BoolUnary_t:
1109	return bool_stat.isatty(s, node.child)
1110
1111	# See whether 'set -o' options have been set
1112	if op_id == Id.BoolUnary_o:
1113	index = consts.OptionNum(s)
1114	if index == 0:
1115	return False
1116	else:
1117	return self.exec_opts.opt0_array[index]
1118
1119	if op_id == Id.BoolUnary_v:
1120	return self._IsDefined(s, loc.Word(node.child))
1121
1122	e_die("%s isn't implemented" %
1123	ui.PrettyId(op_id)) # implicit location
1124
1125	raise AssertionError(arg_type)
1126
1127	elif case(bool_expr_e.Binary):
1128	node = cast(bool_expr.Binary, UP_node)
1129
1130	op_id = node.op_id
1131	# Whether to glob escape
1132	eval_flags = 0
1133	with switch(op_id) as case2:
1134	if case2(Id.BoolBinary_GlobEqual, Id.BoolBinary_GlobDEqual,
1135	Id.BoolBinary_GlobNEqual):
1136	eval_flags \|= word_eval.QUOTE_FNMATCH
1137	elif case2(Id.BoolBinary_EqualTilde):
1138	eval_flags \|= word_eval.QUOTE_ERE
1139
1140	s1 = self._EvalCompoundWord(node.left)
1141	s2 = self._EvalCompoundWord(node.right, eval_flags)
1142
1143	# Now dispatch on arg type
1144	arg_type = consts.BoolArgType(op_id)
1145
1146	if arg_type == bool_arg_type_e.Path:
1147	return bool_stat.DoBinaryOp(op_id, s1, s2)
1148
1149	if arg_type == bool_arg_type_e.Int:
1150	# NOTE: We assume they are constants like [[ 3 -eq 3 ]].
1151	# Bash also allows [[ 1+2 -eq 3 ]].
1152	i1 = self._StringToBigIntOrError(s1, blame_word=node.left)
1153	i2 = self._StringToBigIntOrError(s2, blame_word=node.right)
1154
1155	if op_id == Id.BoolBinary_eq:
1156	return mops.Equal(i1, i2)
1157	if op_id == Id.BoolBinary_ne:
1158	return not mops.Equal(i1, i2)
1159	if op_id == Id.BoolBinary_gt:
1160	return mops.Greater(i1, i2)
1161	if op_id == Id.BoolBinary_ge:
1162	return mops.Greater(i1, i2) or mops.Equal(i1, i2)
1163	if op_id == Id.BoolBinary_lt:
1164	return mops.Greater(i2, i1)
1165	if op_id == Id.BoolBinary_le:
1166	return mops.Greater(i2, i1) or mops.Equal(i1, i2)
1167
1168	raise AssertionError(op_id) # should never happen
1169
1170	if arg_type == bool_arg_type_e.Str:
1171	fnmatch_flags = (FNM_CASEFOLD
1172	if self.exec_opts.nocasematch() else 0)
1173
1174	if op_id in (Id.BoolBinary_GlobEqual,
1175	Id.BoolBinary_GlobDEqual):
1176	#log('Matching %s against pattern %s', s1, s2)
1177	return libc.fnmatch(s2, s1, fnmatch_flags)
1178
1179	if op_id == Id.BoolBinary_GlobNEqual:
1180	return not libc.fnmatch(s2, s1, fnmatch_flags)
1181
1182	if op_id in (Id.BoolBinary_Equal, Id.BoolBinary_DEqual):
1183	return s1 == s2
1184
1185	if op_id == Id.BoolBinary_NEqual:
1186	return s1 != s2
1187
1188	if op_id == Id.BoolBinary_EqualTilde:
1189	# TODO: This should go to --debug-file
1190	#log('Matching %r against regex %r', s1, s2)
1191	regex_flags = (REG_ICASE
1192	if self.exec_opts.nocasematch() else 0)
1193
1194	try:
1195	indices = libc.regex_search(s2, regex_flags, s1, 0)
1196	except ValueError as e:
1197	# Status 2 indicates a regex parse error. This is
1198	# fatal in OSH but not in bash, which treats [[
1199	# like a command with an exit code.
1200	e_die_status(2, e.message, loc.Word(node.right))
1201
1202	if indices is not None:
1203	self.mem.SetRegexMatch(
1204	RegexMatch(s1, indices, eggex_ops.No))
1205	return True
1206	else:
1207	self.mem.SetRegexMatch(regex_match.No)
1208	return False
1209
1210	if op_id == Id.Op_Less:
1211	return str_cmp(s1, s2) < 0
1212
1213	if op_id == Id.Op_Great:
1214	return str_cmp(s1, s2) > 0
1215
1216	raise AssertionError(op_id) # should never happen
1217
1218	raise AssertionError(node.tag())