OILS / mycpp / gc_builtins.cc View on Github | oils.pub

468 lines, 251 significant
1#include <errno.h> // errno
2#include <float.h> // DBL_MIN, DBL_MAX
3#include <math.h> // INFINITY
4#include <stdio.h> // required for readline/readline.h (man readline)
5
6#include "_build/detected-cpp-config.h"
7#include "mycpp/gc_list.h"
8#include "mycpp/gc_str.h"
9
10// Translation of Python's print().
11void print(BigStr* s) {
12 fputs(s->data_, stdout); // print until first NUL
13 fputc('\n', stdout);
14}
15
16BigStr* str(int i) {
17 BigStr* s = OverAllocatedStr(kIntBufSize);
18 int length = snprintf(s->data(), kIntBufSize, "%d", i);
19 s->MaybeShrink(length);
20 return s;
21}
22
23BigStr* str(double d) {
24 char buf[64]; // overestimate, but we use snprintf() to be safe
25
26 int n = sizeof(buf) - 2; // in case we add '.0'
27
28 // The round tripping test in mycpp/float_test.cc tells us:
29 // %.9g - FLOAT round trip
30 // %.17g - DOUBLE round trip
31 // But this causes problems in practice, e.g. for 3.14, or 1/3
32 // int length = snprintf(buf, n, "%.17g", d);
33
34 // So use 1 less digit, which happens to match Python 3 and node.js (but not
35 // Python 2)
36 int length = snprintf(buf, n, "%.16g", d);
37
38 // TODO: This may depend on LC_NUMERIC locale!
39
40 // We may return the strings:
41 // inf -inf nan
42 // But this shouldn't come up much, because Python code changes it to:
43 // INFINITY -INFINITY NAN
44 if (strchr(buf, 'i') || strchr(buf, 'n')) {
45 return StrFromC(buf); // don't add .0
46 }
47
48 // Problem:
49 // %f prints 3.0000000 and 3.500000
50 // %g prints 3 and 3.5
51 //
52 // We want 3.0 and 3.5, so add '.0' in some cases
53 if (!strchr(buf, '.')) { // 12345 -> 12345.0
54 buf[length] = '.';
55 buf[length + 1] = '0';
56 buf[length + 2] = '\0';
57 }
58
59 return StrFromC(buf);
60}
61// %a is a hexfloat form, probably don't need that
62// int length = snprintf(buf, n, "%a", d);
63
64// Do we need this API? Or is mylib.InternedStr(BigStr* s, int start, int end)
65// better for getting values out of Token.line without allocating?
66//
67// e.g. mylib.InternedStr(tok.line, tok.start, tok.start+1)
68//
69// Also for SmallStr, we don't care about interning. Only for HeapStr.
70
71BigStr* intern(BigStr* s) {
72 // TODO: put in table gHeap.interned_
73 return s;
74}
75
76// Print quoted string. Called by StrFormat('%r').
77// TODO: consider using J8 notation instead, since error messages show that
78// string.
79BigStr* repr(BigStr* s) {
80 // Worst case: \0 becomes 4 bytes as '\\x00', and then two quote bytes.
81 int n = len(s);
82 int upper_bound = n * 4 + 2;
83
84 BigStr* result = OverAllocatedStr(upper_bound);
85
86 // Single quote by default.
87 char quote = '\'';
88 if (memchr(s->data_, '\'', n) && !memchr(s->data_, '"', n)) {
89 quote = '"';
90 }
91 char* p = result->data_;
92
93 // From PyString_Repr()
94 *p++ = quote;
95 for (int i = 0; i < n; ++i) {
96 unsigned char c = static_cast<unsigned char>(s->data_[i]);
97 if (c == quote || c == '\\') {
98 *p++ = '\\';
99 *p++ = c;
100 } else if (c == '\t') {
101 *p++ = '\\';
102 *p++ = 't';
103 } else if (c == '\n') {
104 *p++ = '\\';
105 *p++ = 'n';
106 } else if (c == '\r') {
107 *p++ = '\\';
108 *p++ = 'r';
109 } else if (0x20 <= c && c < 0x80) {
110 *p++ = c;
111 } else {
112 // Unprintable becomes \xff.
113 // TODO: Consider \yff. This is similar to J8 strings, but we don't
114 // decode UTF-8.
115 sprintf(p, "\\x%02x", c & 0xff);
116 p += 4;
117 }
118 }
119 *p++ = quote;
120 *p = '\0';
121
122 int length = p - result->data_;
123 result->MaybeShrink(length);
124 return result;
125}
126
127// Helper functions that don't use exceptions.
128
129bool StringToInt(const char* s, int length, int base, int* result) {
130 if (length == 0) {
131 return false; // empty string isn't a valid integer
132 }
133
134 // Note: sizeof(int) is often 4 bytes on both 32-bit and 64-bit
135 // sizeof(long) is often 4 bytes on both 32-bit but 8 bytes on 64-bit
136 // static_assert(sizeof(long) == 8);
137
138 char* pos; // mutated by strtol
139
140 errno = 0;
141 long v = strtol(s, &pos, base);
142
143 if (errno == ERANGE) {
144 switch (v) {
145 case LONG_MIN:
146 return false; // underflow of long, which may be 64 bits
147 case LONG_MAX:
148 return false; // overflow of long
149 }
150 }
151
152 // It should ALSO fit in an int, not just a long
153 if (v > INT_MAX) {
154 return false;
155 }
156 if (v < INT_MIN) {
157 return false;
158 }
159
160 const char* end = s + length;
161 if (pos == end) {
162 *result = v;
163 return true; // strtol() consumed ALL characters.
164 }
165
166 while (pos < end) {
167 if (!IsAsciiWhitespace(*pos)) {
168 return false; // Trailing non-space
169 }
170 pos++;
171 }
172
173 *result = v;
174 return true; // Trailing space is OK
175}
176
177bool StringToInt64(const char* s, int length, int base, int64_t* result) {
178 if (length == 0) {
179 return false; // empty string isn't a valid integer
180 }
181
182 // These should be the same type
183 static_assert(sizeof(long long) == sizeof(int64_t), "");
184
185 char* pos; // mutated by strtol
186
187 errno = 0;
188 long long v = strtoll(s, &pos, base);
189
190 if (errno == ERANGE) {
191 switch (v) {
192 case LLONG_MIN:
193 return false; // underflow
194 case LLONG_MAX:
195 return false; // overflow
196 }
197 }
198
199 const char* end = s + length;
200 if (pos == end) {
201 *result = v;
202 return true; // strtol() consumed ALL characters.
203 }
204
205 while (pos < end) {
206 if (!IsAsciiWhitespace(*pos)) {
207 return false; // Trailing non-space
208 }
209 pos++;
210 }
211
212 *result = v;
213 return true; // Trailing space is OK
214}
215
216int to_int(BigStr* s, int base) {
217 int i;
218 if (StringToInt(s->data_, len(s), base, &i)) {
219 return i; // truncated to int
220 } else {
221 throw Alloc<ValueError>();
222 }
223}
224
225BigStr* chr(int i) {
226 // NOTE: i should be less than 256, in which we could return an object from
227 // GLOBAL_STR() pool, like StrIter
228 auto result = NewStr(1);
229 result->data_[0] = i;
230 return result;
231}
232
233int ord(BigStr* s) {
234 assert(len(s) == 1);
235 // signed to unsigned conversion, so we don't get values like -127
236 uint8_t c = static_cast<uint8_t>(s->data_[0]);
237 return c;
238}
239
240bool to_bool(BigStr* s) {
241 return len(s) != 0;
242}
243
244double to_float(int i) {
245 return static_cast<double>(i);
246}
247
248double to_float(BigStr* s) {
249 char* begin = s->data_;
250 char* end_pos = begin + len(s);
251 char* orig_end = end_pos;
252
253 errno = 0;
254 double result = strtod(begin, &end_pos);
255
256 if (errno == ERANGE) { // error: overflow or underflow
257 if (result >= HUGE_VAL) {
258 return INFINITY;
259 } else if (result <= -HUGE_VAL) {
260 return -INFINITY;
261 } else if (-DBL_MIN <= result && result <= DBL_MIN) {
262 return 0.0;
263 } else {
264 FAIL("Invalid value after ERANGE");
265 }
266 }
267 if (end_pos == begin) { // error: not a floating point number
268 throw Alloc<ValueError>();
269 }
270 if (end_pos != orig_end) { // trailing data like '5s' not alowed
271 while (end_pos < orig_end) {
272 if (!IsAsciiWhitespace(*end_pos)) {
273 throw Alloc<ValueError>(); // Trailing non-space
274 }
275 end_pos++;
276 }
277 }
278
279 return result;
280}
281
282// e.g. ('a' in 'abc')
283bool str_contains(BigStr* haystack, BigStr* needle) {
284 // Common case
285 if (len(needle) == 1) {
286 return memchr(haystack->data_, needle->data_[0], len(haystack));
287 }
288
289 if (len(needle) > len(haystack)) {
290 return false;
291 }
292
293 // General case. TODO: We could use a smarter substring algorithm.
294
295 const char* end = haystack->data_ + len(haystack);
296 const char* last_possible = end - len(needle);
297 const char* p = haystack->data_;
298
299 while (p <= last_possible) {
300 if (memcmp(p, needle->data_, len(needle)) == 0) {
301 return true;
302 }
303 p++;
304 }
305 return false;
306}
307
308BigStr* str_repeat(BigStr* s, int times) {
309 // Python allows -1 too, and Oil used that
310 if (times <= 0) {
311 return kEmptyString;
312 }
313 int len_ = len(s);
314 int new_len = len_ * times;
315 BigStr* result = NewStr(new_len);
316
317 char* dest = result->data_;
318 for (int i = 0; i < times; i++) {
319 memcpy(dest, s->data_, len_);
320 dest += len_;
321 }
322 return result;
323}
324
325// for os_path.join()
326// NOTE(Jesse): Perfect candidate for BoundedBuffer
327BigStr* str_concat3(BigStr* a, BigStr* b, BigStr* c) {
328 int a_len = len(a);
329 int b_len = len(b);
330 int c_len = len(c);
331
332 int new_len = a_len + b_len + c_len;
333 BigStr* result = NewStr(new_len);
334 char* pos = result->data_;
335
336 memcpy(pos, a->data_, a_len);
337 pos += a_len;
338
339 memcpy(pos, b->data_, b_len);
340 pos += b_len;
341
342 memcpy(pos, c->data_, c_len);
343
344 assert(pos + c_len == result->data_ + new_len);
345
346 return result;
347}
348
349BigStr* str_concat(BigStr* a, BigStr* b) {
350 int a_len = len(a);
351 int b_len = len(b);
352 int new_len = a_len + b_len;
353 BigStr* result = NewStr(new_len);
354 char* buf = result->data_;
355
356 memcpy(buf, a->data_, a_len);
357 memcpy(buf + a_len, b->data_, b_len);
358
359 return result;
360}
361
362//
363// Comparators
364//
365
366bool str_equals(BigStr* left, BigStr* right) {
367 // Fast path for identical strings. String deduplication during GC could
368 // make this more likely. String interning could guarantee it, allowing us
369 // to remove memcmp().
370 if (left == right) {
371 return true;
372 }
373
374 // TODO: It would be nice to remove this condition, but I think we need MyPy
375 // strict None checking for it
376 if (left == nullptr || right == nullptr) {
377 return false;
378 }
379
380 if (left->len_ != right->len_) {
381 return false;
382 }
383
384 return memcmp(left->data_, right->data_, left->len_) == 0;
385}
386
387bool maybe_str_equals(BigStr* left, BigStr* right) {
388 if (left && right) {
389 return str_equals(left, right);
390 }
391
392 if (!left && !right) {
393 return true; // None == None
394 }
395
396 return false; // one is None and one is a BigStr*
397}
398
399bool items_equal(BigStr* left, BigStr* right) {
400 return str_equals(left, right);
401}
402
403bool keys_equal(BigStr* left, BigStr* right) {
404 return items_equal(left, right);
405}
406
407bool items_equal(Tuple2<int, int>* t1, Tuple2<int, int>* t2) {
408 return (t1->at0() == t2->at0()) && (t1->at1() == t2->at1());
409}
410
411bool keys_equal(Tuple2<int, int>* t1, Tuple2<int, int>* t2) {
412 return items_equal(t1, t2);
413}
414
415bool items_equal(Tuple2<BigStr*, int>* t1, Tuple2<BigStr*, int>* t2) {
416 return items_equal(t1->at0(), t2->at0()) && (t1->at1() == t2->at1());
417}
418
419bool keys_equal(Tuple2<BigStr*, int>* t1, Tuple2<BigStr*, int>* t2) {
420 return items_equal(t1, t2);
421}
422
423bool str_equals_c(BigStr* s, const char* c_string, int c_len) {
424 // Needs SmallStr change
425 if (len(s) == c_len) {
426 return memcmp(s->data_, c_string, c_len) == 0;
427 } else {
428 return false;
429 }
430}
431
432bool str_equals0(const char* c_string, BigStr* s) {
433 int n = strlen(c_string);
434 if (len(s) == n) {
435 return memcmp(s->data_, c_string, n) == 0;
436 } else {
437 return false;
438 }
439}
440
441int hash(BigStr* s) {
442 return s->hash(fnv1);
443}
444
445int max(int a, int b) {
446 return std::max(a, b);
447}
448
449int min(int a, int b) {
450 return std::min(a, b);
451}
452
453int max(List<int>* elems) {
454 int n = len(elems);
455 if (n < 1) {
456 throw Alloc<ValueError>();
457 }
458
459 int ret = elems->at(0);
460 for (int i = 0; i < n; ++i) {
461 int cand = elems->at(i);
462 if (cand > ret) {
463 ret = cand;
464 }
465 }
466
467 return ret;
468}