OILS / mycpp / gc_list.h View on Github | oils.pub

546 lines, 299 significant
1#ifndef MYCPP_GC_LIST_H
2#define MYCPP_GC_LIST_H
3
4#include <string.h> // memcpy
5
6#include <algorithm> // sort() is templated
7
8#include "mycpp/common.h" // DCHECK
9#include "mycpp/comparators.h"
10#include "mycpp/gc_alloc.h" // Alloc
11#include "mycpp/gc_builtins.h" // ValueError
12#include "mycpp/gc_mops.h" // BigInt
13#include "mycpp/gc_slab.h"
14
15// GlobalList is layout-compatible with List (unit tests assert this), and it
16// can be a true C global (incurs zero startup time)
17
18template <typename T, int N>
19class GlobalList {
20 public:
21 int len_;
22 int capacity_;
23 GlobalSlab<T, N>* slab_;
24};
25
26#define GLOBAL_LIST(name, T, N, array) \
27 GcGlobal<GlobalSlab<T, N>> _slab_##name = {ObjHeader::Global(TypeTag::Slab), \
28 {.items_ = array}}; \
29 GcGlobal<GlobalList<T, N>> _list_##name = { \
30 ObjHeader::Global(TypeTag::List), \
31 {.len_ = N, .capacity_ = N, .slab_ = &_slab_##name.obj}}; \
32 List<T>* name = reinterpret_cast<List<T>*>(&_list_##name.obj);
33
34template <typename T>
35class List {
36 public:
37 List() : len_(0), capacity_(0), slab_(nullptr) {
38 }
39
40 protected:
41 // Used for ASDL subtypes with <. NOT even a shallow copy - it ALIASES thes
42 // slab.
43 explicit List(List* other)
44 : len_(other->len_), capacity_(other->capacity_), slab_(other->slab_) {
45 }
46
47 public:
48 // Implements L[i]
49 T at(int i);
50
51 // returns index of the element
52 int index(T element);
53
54 // Implements L[i] = item
55 void set(int i, T item);
56
57 // L[begin:]
58 List* slice(int begin);
59
60 // L[begin:end]
61 List* slice(int begin, int end);
62
63 // Should we have a separate API that doesn't return it?
64 // https://stackoverflow.com/questions/12600330/pop-back-return-value
65 T pop();
66
67 // Used in osh/word_parse.py to remove from front
68 T pop(int i);
69
70 // Remove the first occurence of x from the list.
71 void remove(T x);
72
73 void clear();
74
75 // Used in osh/string_ops.py
76 void reverse();
77
78 // Templated function
79 void sort();
80
81 // Ensure that there's space for at LEAST this many items
82 void reserve(int num_desired);
83
84 // Append a single element to this list.
85 void append(T item);
86
87 // Extend this list with multiple elements.
88 void extend(List<T>* other);
89
90 static constexpr ObjHeader obj_header() {
91 return ObjHeader::ClassFixed(field_mask(), sizeof(List<T>));
92 }
93
94 // Used by ASDL
95 void SetTaken();
96
97 int len_; // number of entries
98 int capacity_; // max entries before resizing
99
100 // The container may be resized, so this field isn't in-line.
101 Slab<T>* slab_;
102
103 // A list has one Slab pointer which we need to follow.
104 static constexpr uint32_t field_mask() {
105 return maskbit(offsetof(List, slab_));
106 }
107
108 DISALLOW_COPY_AND_ASSIGN(List)
109
110 static_assert(sizeof(ObjHeader) % sizeof(T) == 0,
111 "ObjHeader size should be multiple of item size");
112 static constexpr int kHeaderFudge = sizeof(ObjHeader) / sizeof(T);
113
114#if 0
115 // 24-byte pool comes from very common List header, and Token
116 static constexpr int kPoolBytes1 = 24 - sizeof(ObjHeader);
117 static_assert(kPoolBytes1 % sizeof(T) == 0,
118 "An integral number of items should fit in first pool");
119 static constexpr int kNumItems1 = kPoolBytes1 / sizeof(T);
120#endif
121
122 // Matches mark_sweep_heap.h
123 static constexpr int kPoolBytes2 = 48 - sizeof(ObjHeader);
124 static_assert(kPoolBytes2 % sizeof(T) == 0,
125 "An integral number of items should fit in second pool");
126 static constexpr int kNumItems2 = kPoolBytes2 / sizeof(T);
127
128#if 0
129 static constexpr int kMinBytes2 = 128 - sizeof(ObjHeader);
130 static_assert(kMinBytes2 % sizeof(T) == 0,
131 "An integral number of items should fit");
132 static constexpr int kMinItems2 = kMinBytes2 / sizeof(T);
133#endif
134
135 // Given the number of items desired, return the number items we should
136 // reserve room for, according to our growth policy.
137 int HowManyItems(int num_desired) {
138 // Using the 24-byte pool leads to too much GC of tiny slab objects! So
139 // just use the larger 48 byte pool.
140#if 0
141 if (num_desired <= kNumItems1) { // use full cell in pool 1
142 return kNumItems1;
143 }
144#endif
145 if (num_desired <= kNumItems2) { // use full cell in pool 2
146 return kNumItems2;
147 }
148#if 0
149 if (num_desired <= kMinItems2) { // 48 -> 128, not 48 -> 64
150 return kMinItems2;
151 }
152#endif
153
154 // Make sure the total allocation is a power of 2. TODO: consider using
155 // slightly less than power of 2, to account for malloc() headers, and
156 // reduce fragmentation.
157 // Example:
158 // - ask for 11 integers
159 // - round up 11+2 == 13 up to 16 items
160 // - return 14 items
161 // - 14 integers is 56 bytes, plus 8 byte GC header => 64 byte alloc.
162 return RoundUp(num_desired + kHeaderFudge) - kHeaderFudge;
163 }
164};
165
166// "Constructors" as free functions since we can't allocate within a
167// constructor. Allocation may cause garbage collection, which interferes with
168// placement new.
169
170// This is not really necessary, only syntactic sugar.
171template <typename T>
172List<T>* NewList() {
173 return Alloc<List<T>>();
174}
175
176// Literal ['foo', 'bar']
177// This seems to allow better template argument type deduction than a
178// constructor.
179template <typename T>
180List<T>* NewList(std::initializer_list<T> init) {
181 auto self = Alloc<List<T>>();
182
183 int n = init.size();
184 self->reserve(n);
185
186 int i = 0;
187 for (auto item : init) {
188 self->slab_->items_[i] = item;
189 ++i;
190 }
191 self->len_ = n;
192 return self;
193}
194
195// ['foo'] * 3
196template <typename T>
197List<T>* NewList(T item, int times) {
198 auto self = Alloc<List<T>>();
199
200 self->reserve(times);
201 self->len_ = times;
202 for (int i = 0; i < times; ++i) {
203 self->set(i, item);
204 }
205 return self;
206}
207
208template <typename T>
209void List<T>::append(T item) {
210 reserve(len_ + 1);
211 slab_->items_[len_] = item;
212 ++len_;
213}
214
215template <typename T>
216int len(const List<T>* L) {
217 return L->len_;
218}
219
220template <typename T>
221List<T>* list_repeat(T item, int times);
222
223template <typename T>
224inline bool list_contains(List<T>* haystack, T needle);
225
226template <typename K, typename V>
227class Dict; // forward decl
228
229template <typename V>
230List<BigStr*>* sorted(Dict<BigStr*, V>* d);
231
232template <typename T>
233List<T>* sorted(List<T>* l);
234
235// L[begin:]
236template <typename T>
237List<T>* List<T>::slice(int begin) {
238 return slice(begin, len_);
239}
240
241// L[begin:end]
242template <typename T>
243List<T>* List<T>::slice(int begin, int end) {
244 SLICE_ADJUST(begin, end, len_);
245
246 DCHECK(0 <= begin && begin <= len_);
247 DCHECK(0 <= end && end <= len_);
248
249 int new_len = end - begin;
250 DCHECK(0 <= new_len && new_len <= len_);
251
252 List<T>* result = NewList<T>();
253 if (new_len == 0) { // empty slice
254 return result;
255 }
256
257 result->reserve(new_len);
258 DCHECK(result->slab_);
259 // Faster than append() in a loop
260 memcpy(result->slab_->items_, slab_->items_ + begin, new_len * sizeof(T));
261 result->len_ = new_len;
262
263 return result;
264}
265
266// Ensure that there's space for a number of items
267template <typename T>
268void List<T>::reserve(int num_desired) {
269 // log("reserve capacity = %d, n = %d", capacity_, n);
270
271 // Don't do anything if there's already enough space.
272 if (capacity_ >= num_desired) {
273 return;
274 }
275
276 // Slabs should be a total of 2^N bytes. kCapacityAdjust is the number of
277 // items that the 8 byte header takes up: 1 for List<T*>, and 2 for
278 // List<int>.
279 //
280 // Example: the user reserves space for 3 integers. The minimum number of
281 // items would be 5, which is rounded up to 8. Subtract 2 again, giving 6,
282 // which leads to 8 + 6*4 = 32 byte Slab.
283
284 capacity_ = HowManyItems(num_desired);
285 auto new_slab = NewSlab<T>(capacity_);
286
287 if (len_ > 0) {
288 // log("Copying %d bytes", len_ * sizeof(T));
289 memcpy(new_slab->items_, slab_->items_, len_ * sizeof(T));
290 }
291 slab_ = new_slab;
292}
293
294// Implements L[i] = item
295template <typename T>
296void List<T>::set(int i, T item) {
297 if (i < 0) {
298 i = len_ + i;
299 }
300
301 if (0 > i || i >= len_) {
302 throw Alloc<IndexError>();
303 }
304
305 slab_->items_[i] = item;
306}
307
308// Implements L[i]
309template <typename T>
310T List<T>::at(int i) {
311 if (i < 0) {
312 i = len_ + i;
313 }
314
315 if (0 > i || i >= len_) {
316 throw Alloc<IndexError>();
317 }
318 return slab_->items_[i];
319}
320
321// L.index(i) -- Python method
322template <typename T>
323int List<T>::index(T value) {
324 int element_count = len(this);
325 for (int i = 0; i < element_count; i++) {
326 if (items_equal(slab_->items_[i], value)) {
327 return i;
328 }
329 }
330 throw Alloc<ValueError>();
331}
332
333// Should we have a separate API that doesn't return it?
334// https://stackoverflow.com/questions/12600330/pop-back-return-value
335template <typename T>
336T List<T>::pop() {
337 if (len_ == 0) {
338 throw Alloc<IndexError>();
339 }
340 len_--;
341 T result = slab_->items_[len_];
342 slab_->items_[len_] = 0; // zero for GC scan
343 return result;
344}
345
346// Used in osh/word_parse.py to remove from front
347template <typename T>
348T List<T>::pop(int i) {
349 if (len_ < i) {
350 throw Alloc<IndexError>();
351 }
352
353 T result = at(i);
354 len_--;
355
356 // Shift everything by one
357 memmove(slab_->items_ + i, slab_->items_ + (i + 1), (len_ - i) * sizeof(T));
358
359 /*
360 for (int j = 0; j < len_; j++) {
361 slab_->items_[j] = slab_->items_[j+1];
362 }
363 */
364
365 slab_->items_[len_] = 0; // zero for GC scan
366 return result;
367}
368
369template <typename T>
370void List<T>::remove(T x) {
371 int idx = this->index(x);
372 this->pop(idx); // unused
373}
374
375template <typename T>
376void List<T>::clear() {
377 if (slab_) {
378 memset(slab_->items_, 0, len_ * sizeof(T)); // zero for GC scan
379 }
380 len_ = 0;
381}
382
383// used by ASDL
384template <typename T>
385void List<T>::SetTaken() {
386 slab_ = nullptr;
387 len_ = 0;
388 capacity_ = 0;
389}
390
391// Used in osh/string_ops.py
392template <typename T>
393void List<T>::reverse() {
394 for (int i = 0; i < len_ / 2; ++i) {
395 // log("swapping %d and %d", i, n-i);
396 T tmp = slab_->items_[i];
397 int j = len_ - 1 - i;
398 slab_->items_[i] = slab_->items_[j];
399 slab_->items_[j] = tmp;
400 }
401}
402
403// Extend this list with multiple elements.
404template <typename T>
405void List<T>::extend(List<T>* other) {
406 int n = other->len_;
407 int new_len = len_ + n;
408 reserve(new_len);
409
410 for (int i = 0; i < n; ++i) {
411 slab_->items_[len_ + i] = other->slab_->items_[i];
412 }
413 len_ = new_len;
414}
415
416inline bool CompareBigStr(BigStr* a, BigStr* b) {
417 return mylib::str_cmp(a, b) < 0;
418}
419
420template <>
421inline void List<BigStr*>::sort() {
422 if (slab_) {
423 std::sort(slab_->items_, slab_->items_ + len_, CompareBigStr);
424 }
425}
426
427inline bool CompareBigInt(mops::BigInt a, mops::BigInt b) {
428 return a < b;
429}
430
431template <>
432inline void List<mops::BigInt>::sort() {
433 std::sort(slab_->items_, slab_->items_ + len_, CompareBigInt);
434}
435
436// TODO: mycpp can just generate the constructor instead?
437// e.g. [None] * 3
438template <typename T>
439List<T>* list_repeat(T item, int times) {
440 return NewList<T>(item, times);
441}
442
443// e.g. 'a' in ['a', 'b', 'c']
444template <typename T>
445inline bool list_contains(List<T>* haystack, T needle) {
446 int n = len(haystack);
447 for (int i = 0; i < n; ++i) {
448 if (items_equal(haystack->at(i), needle)) {
449 return true;
450 }
451 }
452 return false;
453}
454
455template <typename V>
456List<BigStr*>* sorted(Dict<BigStr*, V>* d) {
457 auto keys = d->keys();
458 keys->sort();
459 return keys;
460}
461
462template <typename T>
463List<T>* sorted(List<T>* l) {
464 auto ret = list(l);
465 ret->sort();
466 return ret;
467}
468
469// list(L) copies the list
470template <typename T>
471List<T>* list(List<T>* other) {
472 auto result = NewList<T>();
473 result->extend(other);
474 return result;
475}
476
477template <class T>
478class ListIter {
479 public:
480 explicit ListIter(List<T>* L) : L_(L), i_(0) {
481 // Cheney only: L_ could be moved during iteration.
482 // gHeap.PushRoot(reinterpret_cast<RawObject**>(&L_));
483 }
484
485 ~ListIter() {
486 // gHeap.PopRoot();
487 }
488 void Next() {
489 i_++;
490 }
491 bool Done() {
492 // "unsigned size_t was a mistake"
493 return i_ >= static_cast<int>(L_->len_);
494 }
495 T Value() {
496 return L_->slab_->items_[i_];
497 }
498 T iterNext() {
499 if (Done()) {
500 throw Alloc<StopIteration>();
501 }
502 T ret = L_->slab_->items_[i_];
503 Next();
504 return ret;
505 }
506
507 // only for use with generators
508 List<T>* GetList() {
509 return L_;
510 }
511
512 private:
513 List<T>* L_;
514 int i_;
515};
516
517// list(it) returns the iterator's backing list
518template <typename T>
519List<T>* list(ListIter<T> it) {
520 return list(it.GetList());
521}
522
523// TODO: Does using pointers rather than indices make this more efficient?
524template <class T>
525class ReverseListIter {
526 public:
527 explicit ReverseListIter(List<T>* L) : L_(L), i_(L_->len_ - 1) {
528 }
529 void Next() {
530 i_--;
531 }
532 bool Done() {
533 return i_ < 0;
534 }
535 T Value() {
536 return L_->slab_->items_[i_];
537 }
538
539 private:
540 List<T>* L_;
541 int i_;
542};
543
544int max(List<int>* elems);
545
546#endif // MYCPP_GC_LIST_H