OILS / mycpp / mark_sweep_heap.h View on Github | oils.pub

352 lines, 217 significant
1#ifndef MARKSWEEP_HEAP_H
2#define MARKSWEEP_HEAP_H
3
4#include <stdlib.h>
5
6#include <vector>
7
8#include "mycpp/common.h"
9#include "mycpp/gc_obj.h"
10
11#if GC_ALWAYS
12 #define VALIDATE_ROOTS 1
13#else
14 #define VALIDATE_ROOTS 0 // flip this manually to diagnose bugs
15#endif
16
17#if VALIDATE_ROOTS
18static void ValidateRoot(const RawObject* obj) {
19 if (obj == nullptr) {
20 return;
21 }
22
23 // Assuming 64-bit == 8 byte alignment
24 if (reinterpret_cast<uintptr_t>(obj) & 0x3) {
25 log("Misaligned object %p", obj);
26 FAIL(kShouldNotGetHere);
27 return;
28 }
29
30 ObjHeader* header = ObjHeader::FromObject(obj);
31 // log("obj %p header %p", obj, header);
32
33 if (reinterpret_cast<uintptr_t>(header) & 0x3) {
34 log("Misaligned header %p", header);
35 FAIL(kShouldNotGetHere);
36 return;
37 }
38
39 switch (header->heap_tag) {
40 case HeapTag::Global:
41 case HeapTag::Opaque:
42 case HeapTag::Scanned:
43 case HeapTag::FixedSize:
44 break;
45
46 default:
47 log("root %p heap %d type %d mask %d len %d", obj, header->heap_tag,
48 header->type_tag, header->u_mask_npointers);
49 FAIL(kShouldNotGetHere);
50 break;
51 }
52}
53#endif
54
55class MarkSet {
56 public:
57 MarkSet() : bits_() {
58 }
59
60 // ReInit() must be called at the start of MarkObjects(). Allocate() should
61 // keep track of the maximum object ID.
62 void ReInit(int max_obj_id) {
63 // https://stackoverflow.com/questions/8848575/fastest-way-to-reset-every-value-of-stdvectorint-to-0
64 std::fill(bits_.begin(), bits_.end(), 0);
65 int max_byte_index = (max_obj_id >> 3) + 1; // round up
66 // log("ReInit max_byte_index %d", max_byte_index);
67 bits_.resize(max_byte_index);
68 }
69
70 // Called by MarkObjects()
71 void Mark(int obj_id) {
72 DCHECK(obj_id >= 0);
73 // log("obj id %d", obj_id);
74 DCHECK(!IsMarked(obj_id));
75 int byte_index = obj_id >> 3; // 8 bits per byte
76 int bit_index = obj_id & 0b111;
77 // log("byte_index %d %d", byte_index, bit_index);
78 bits_[byte_index] |= (1 << bit_index);
79 }
80
81 // Called by Sweep()
82 bool IsMarked(int obj_id) {
83 DCHECK(obj_id >= 0);
84 int byte_index = obj_id >> 3;
85 int bit_index = obj_id & 0b111;
86 return bits_[byte_index] & (1 << bit_index);
87 }
88
89 void Debug() {
90 // TODO: should use feature detection of dprintf
91#ifndef OILS_WIN32
92 int n = bits_.size();
93 dprintf(2, "[ ");
94 for (int i = 0; i < n; ++i) {
95 dprintf(2, "%02x ", bits_[i]);
96 }
97 dprintf(2, "] (%d bytes) \n", n);
98 dprintf(2, "[ ");
99 int num_bits = 0;
100 for (int i = 0; i < n; ++i) {
101 for (int j = 0; j < 8; ++j) {
102 int bit = (bits_[i] & (1 << j)) != 0;
103 dprintf(2, "%d", bit);
104 num_bits += bit;
105 }
106 }
107 dprintf(2, " ] (%d bits set)\n", num_bits);
108#endif
109 }
110
111 std::vector<uint8_t> bits_; // bit vector indexed by obj_id
112};
113
114// A simple Pool allocator for allocating small objects. It maintains an ever
115// growing number of Blocks each consisting of a number of fixed size Cells.
116// Memory is handed out one Cell at a time.
117// Note: within the context of the Pool allocator we refer to object IDs as cell
118// IDs because in addition to identifying an object they're also used to index
119// into the Cell storage.
120template <int CellsPerBlock, size_t CellSize>
121class Pool {
122 public:
123 static constexpr size_t kMaxObjSize = CellSize;
124 static constexpr int kBlockSize = CellSize * CellsPerBlock;
125
126 Pool() = default;
127
128 void* Allocate(int* obj_id) {
129 num_allocated_++;
130
131 if (!free_list_) {
132 // Allocate a new Block and add every new Cell to the free list.
133 Block* block = static_cast<Block*>(malloc(sizeof(Block)));
134 blocks_.push_back(block);
135 bytes_allocated_ += kBlockSize;
136 num_free_ += CellsPerBlock;
137
138 // The starting cell_id for Cells in this block.
139 int cell_id = (blocks_.size() - 1) * CellsPerBlock;
140 for (Cell& cell : block->cells) {
141 FreeCell* free_cell = reinterpret_cast<FreeCell*>(cell);
142 free_cell->id = cell_id++;
143 free_cell->next = free_list_;
144 free_list_ = free_cell;
145 }
146 }
147
148 FreeCell* cell = free_list_;
149 free_list_ = free_list_->next;
150 num_free_--;
151 *obj_id = cell->id;
152 return cell;
153 }
154
155 void PrepareForGc() {
156 DCHECK(!gc_underway_);
157 gc_underway_ = true;
158 mark_set_.ReInit(blocks_.size() * CellsPerBlock);
159 }
160
161 bool IsMarked(int cell_id) {
162 DCHECK(gc_underway_);
163 return mark_set_.IsMarked(cell_id);
164 }
165
166 void Mark(int cell_id) {
167 DCHECK(gc_underway_);
168 mark_set_.Mark(cell_id);
169 }
170
171 void Sweep() {
172 DCHECK(gc_underway_);
173 // Iterate over every Cell linking the free ones into a new free list.
174 num_free_ = 0;
175 free_list_ = nullptr;
176 int cell_id = 0;
177 for (Block* block : blocks_) {
178 for (Cell& cell : block->cells) {
179 if (!mark_set_.IsMarked(cell_id)) {
180 num_free_++;
181 FreeCell* free_cell = reinterpret_cast<FreeCell*>(cell);
182 free_cell->id = cell_id;
183 free_cell->next = free_list_;
184 free_list_ = free_cell;
185 }
186 cell_id++;
187 }
188 }
189 gc_underway_ = false;
190 }
191
192 void Free() {
193 for (Block* block : blocks_) {
194 free(block);
195 }
196 blocks_.clear();
197 num_free_ = 0;
198 }
199
200 int num_allocated() {
201 return num_allocated_;
202 }
203
204 int64_t bytes_allocated() {
205 return bytes_allocated_;
206 }
207
208 int num_live() {
209#ifndef OPTIMIZED
210 int capacity = blocks_.size() * CellsPerBlock;
211 // log("Pool capacity = %d", capacity);
212 // log("Pool num_free_ = %d", num_free_);
213 DCHECK(num_free_ <= capacity);
214#endif
215 return blocks_.size() * CellsPerBlock - num_free_;
216 }
217
218 private:
219 using Cell = uint8_t[CellSize];
220
221 struct Block {
222 Cell cells[CellsPerBlock];
223 };
224
225 // Unused/free cells are tracked via a linked list of FreeCells. The FreeCells
226 // are stored in the unused Cells, so it takes no extra memory to track them.
227 struct FreeCell {
228 int id;
229 FreeCell* next;
230 };
231 static_assert(CellSize >= sizeof(FreeCell), "CellSize is too small");
232
233 // Whether a GC is underway, for asserting that calls are in order.
234 bool gc_underway_ = false;
235
236 FreeCell* free_list_ = nullptr;
237 int num_free_ = 0;
238 int num_allocated_ = 0;
239 int64_t bytes_allocated_ = 0;
240 std::vector<Block*> blocks_;
241 MarkSet mark_set_;
242
243 DISALLOW_COPY_AND_ASSIGN(Pool);
244};
245
246class MarkSweepHeap {
247 public:
248 // reserve 32 frames to start
249 MarkSweepHeap() {
250 }
251
252 void Init(); // use default threshold
253 void Init(int gc_threshold);
254
255 void PushRoot(RawObject** p) {
256#if VALIDATE_ROOTS
257 ValidateRoot(*p);
258#endif
259 roots_.push_back(p);
260 }
261
262 void PopRoot() {
263 roots_.pop_back();
264 }
265
266 void RootGlobalVar(void* root) {
267 global_roots_.push_back(reinterpret_cast<RawObject*>(root));
268 }
269
270 void* Allocate(size_t num_bytes, int* obj_id, int* pool_id);
271
272#if 0
273 void* Reallocate(void* p, size_t num_bytes);
274#endif
275 int MaybeCollect();
276 int Collect();
277
278 void MaybeMarkAndPush(RawObject* obj);
279 void TraceChildren();
280
281 void Sweep();
282
283 void PrintStats(int fd); // public for testing
284 void PrintShortStats();
285
286 void CleanProcessExit(); // do one last GC, used in unit tests
287 void ProcessExit(); // main() lets OS clean up, except ASAN variant
288
289 int num_live() {
290 return num_live_
291#ifndef NO_POOL_ALLOC
292 + pool1_.num_live() + pool2_.num_live()
293#endif
294 ;
295 }
296
297 bool is_initialized_ = true; // mark/sweep doesn't need to be initialized
298
299 // Runtime params
300
301 // Threshold is a number of live objects, since we aren't keeping track of
302 // total bytes
303 int gc_threshold_;
304
305 // Show debug logging
306 bool gc_verbose_ = false;
307
308 // Current stats
309 int num_live_ = 0;
310 // Should we keep track of sizes?
311 // int64_t bytes_live_ = 0;
312
313 // Cumulative stats
314 int max_survived_ = 0; // max # live after a collection
315 int num_allocated_ = 0;
316 int64_t bytes_allocated_ = 0; // avoid overflow
317 int num_gc_points_ = 0; // manual collection points
318 int num_collections_ = 0;
319 int num_growths_;
320 double max_gc_millis_ = 0.0;
321 double total_gc_millis_ = 0.0;
322
323#ifndef NO_POOL_ALLOC
324 // 16,384 / 24 bytes = 682 cells (rounded), 16,368 bytes
325 // 16,384 / 48 bytes = 341 cells (rounded), 16,368 bytes
326 // Conveniently, the glibc malloc header is 16 bytes, giving exactly 16 Ki
327 // differences
328 Pool<682, 24> pool1_;
329 Pool<341, 48> pool2_;
330#endif
331
332 std::vector<RawObject**> roots_;
333 std::vector<RawObject*> global_roots_;
334
335 // Allocate() appends live objects, and Sweep() compacts it
336 std::vector<ObjHeader*> live_objs_;
337 // Allocate lazily frees these, and Sweep() replenishes it
338 std::vector<ObjHeader*> to_free_;
339
340 std::vector<ObjHeader*> gray_stack_;
341 MarkSet mark_set_;
342
343 int greatest_obj_id_ = 0;
344
345 private:
346 void FreeEverything();
347 void MaybePrintStats();
348
349 DISALLOW_COPY_AND_ASSIGN(MarkSweepHeap);
350};
351
352#endif // MARKSWEEP_HEAP_H