-
Notifications
You must be signed in to change notification settings - Fork 7
/
shared-worklist.h
262 lines (227 loc) · 8.15 KB
/
shared-worklist.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
#ifndef SHARED_WORKLIST_H
#define SHARED_WORKLIST_H
#include <stdatomic.h>
#include <sys/mman.h>
#include <unistd.h>
#include "assert.h"
#include "debug.h"
#include "gc-align.h"
#include "gc-inline.h"
#include "spin.h"
// The Chase-Lev work-stealing deque, as initially described in "Dynamic
// Circular Work-Stealing Deque" (Chase and Lev, SPAA'05)
// (https://www.dre.vanderbilt.edu/~schmidt/PDF/work-stealing-dequeue.pdf)
// and improved with C11 atomics in "Correct and Efficient Work-Stealing
// for Weak Memory Models" (Lê et al, PPoPP'13)
// (http://www.di.ens.fr/%7Ezappa/readings/ppopp13.pdf).
struct shared_worklist_buf {
unsigned log_size;
size_t size;
uintptr_t *data;
};
// Min size: 8 kB on 64-bit systems, 4 kB on 32-bit.
#define shared_worklist_buf_min_log_size ((unsigned) 10)
// Max size: 2 GB on 64-bit systems, 1 GB on 32-bit.
#define shared_worklist_buf_max_log_size ((unsigned) 28)
static const size_t shared_worklist_release_byte_threshold = 256 * 1024;
static int
shared_worklist_buf_init(struct shared_worklist_buf *buf, unsigned log_size) {
ASSERT(log_size >= shared_worklist_buf_min_log_size);
ASSERT(log_size <= shared_worklist_buf_max_log_size);
size_t size = (1 << log_size) * sizeof(uintptr_t);
void *mem = mmap(NULL, size, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if (mem == MAP_FAILED) {
perror("Failed to grow work-stealing dequeue");
DEBUG("Failed to allocate %zu bytes", size);
return 0;
}
buf->log_size = log_size;
buf->size = 1 << log_size;
buf->data = mem;
return 1;
}
static inline size_t
shared_worklist_buf_size(struct shared_worklist_buf *buf) {
return buf->size;
}
static inline size_t
shared_worklist_buf_byte_size(struct shared_worklist_buf *buf) {
return shared_worklist_buf_size(buf) * sizeof(uintptr_t);
}
static void
shared_worklist_buf_release(struct shared_worklist_buf *buf) {
size_t byte_size = shared_worklist_buf_byte_size(buf);
if (buf->data && byte_size >= shared_worklist_release_byte_threshold)
madvise(buf->data, byte_size, MADV_DONTNEED);
}
static void
shared_worklist_buf_destroy(struct shared_worklist_buf *buf) {
if (buf->data) {
munmap(buf->data, shared_worklist_buf_byte_size(buf));
buf->data = NULL;
buf->log_size = 0;
buf->size = 0;
}
}
static inline struct gc_ref
shared_worklist_buf_get(struct shared_worklist_buf *buf, size_t i) {
return gc_ref(atomic_load_explicit(&buf->data[i & (buf->size - 1)],
memory_order_relaxed));
}
static inline void
shared_worklist_buf_put(struct shared_worklist_buf *buf, size_t i,
struct gc_ref ref) {
return atomic_store_explicit(&buf->data[i & (buf->size - 1)],
gc_ref_value(ref),
memory_order_relaxed);
}
static inline int
shared_worklist_buf_grow(struct shared_worklist_buf *from,
struct shared_worklist_buf *to, size_t b, size_t t) {
if (from->log_size == shared_worklist_buf_max_log_size)
return 0;
if (!shared_worklist_buf_init (to, from->log_size + 1))
return 0;
for (size_t i=t; i<b; i++)
shared_worklist_buf_put(to, i, shared_worklist_buf_get(from, i));
return 1;
}
// Chase-Lev work-stealing deque. One thread pushes data into the deque
// at the bottom, and many threads compete to steal data from the top.
struct shared_worklist {
// Ensure bottom and top are on different cache lines.
union {
atomic_size_t bottom;
char bottom_padding[AVOID_FALSE_SHARING];
};
union {
atomic_size_t top;
char top_padding[AVOID_FALSE_SHARING];
};
atomic_int active; // Which shared_worklist_buf is active.
struct shared_worklist_buf bufs[(shared_worklist_buf_max_log_size -
shared_worklist_buf_min_log_size) + 1];
};
#define LOAD_RELAXED(loc) atomic_load_explicit(loc, memory_order_relaxed)
#define STORE_RELAXED(loc, o) atomic_store_explicit(loc, o, memory_order_relaxed)
#define LOAD_ACQUIRE(loc) atomic_load_explicit(loc, memory_order_acquire)
#define STORE_RELEASE(loc, o) atomic_store_explicit(loc, o, memory_order_release)
#define LOAD_CONSUME(loc) atomic_load_explicit(loc, memory_order_consume)
static int
shared_worklist_init(struct shared_worklist *q) {
memset(q, 0, sizeof (*q));
int ret = shared_worklist_buf_init(&q->bufs[0],
shared_worklist_buf_min_log_size);
// Note, this fence isn't in the paper, I added it out of caution.
atomic_thread_fence(memory_order_release);
return ret;
}
static void
shared_worklist_release(struct shared_worklist *q) {
for (int i = LOAD_RELAXED(&q->active); i >= 0; i--)
shared_worklist_buf_release(&q->bufs[i]);
}
static void
shared_worklist_destroy(struct shared_worklist *q) {
for (int i = LOAD_RELAXED(&q->active); i >= 0; i--)
shared_worklist_buf_destroy(&q->bufs[i]);
}
static int
shared_worklist_grow(struct shared_worklist *q, int cur, size_t b, size_t t) {
if (!shared_worklist_buf_grow(&q->bufs[cur], &q->bufs[cur + 1], b, t)) {
fprintf(stderr, "failed to grow deque!!\n");
GC_CRASH();
}
cur++;
STORE_RELAXED(&q->active, cur);
return cur;
}
static void
shared_worklist_push(struct shared_worklist *q, struct gc_ref x) {
size_t b = LOAD_RELAXED(&q->bottom);
size_t t = LOAD_ACQUIRE(&q->top);
int active = LOAD_RELAXED(&q->active);
ssize_t size = b - t;
if (size > shared_worklist_buf_size(&q->bufs[active]) - 1)
active = shared_worklist_grow(q, active, b, t); /* Full queue; grow. */
shared_worklist_buf_put(&q->bufs[active], b, x);
atomic_thread_fence(memory_order_release);
STORE_RELAXED(&q->bottom, b + 1);
}
static void
shared_worklist_push_many(struct shared_worklist *q, struct gc_ref *objv,
size_t count) {
size_t b = LOAD_RELAXED(&q->bottom);
size_t t = LOAD_ACQUIRE(&q->top);
int active = LOAD_RELAXED(&q->active);
ssize_t size = b - t;
while (size > shared_worklist_buf_size(&q->bufs[active]) - count)
active = shared_worklist_grow(q, active, b, t); /* Full queue; grow. */
for (size_t i = 0; i < count; i++)
shared_worklist_buf_put(&q->bufs[active], b + i, objv[i]);
atomic_thread_fence(memory_order_release);
STORE_RELAXED(&q->bottom, b + count);
}
static struct gc_ref
shared_worklist_try_pop(struct shared_worklist *q) {
size_t b = LOAD_RELAXED(&q->bottom);
int active = LOAD_RELAXED(&q->active);
STORE_RELAXED(&q->bottom, b - 1);
atomic_thread_fence(memory_order_seq_cst);
size_t t = LOAD_RELAXED(&q->top);
struct gc_ref x;
ssize_t size = b - t;
if (size > 0) { // Non-empty queue.
x = shared_worklist_buf_get(&q->bufs[active], b - 1);
if (size == 1) { // Single last element in queue.
if (!atomic_compare_exchange_strong_explicit(&q->top, &t, t + 1,
memory_order_seq_cst,
memory_order_relaxed))
// Failed race.
x = gc_ref_null();
STORE_RELAXED(&q->bottom, b);
}
} else { // Empty queue.
x = gc_ref_null();
STORE_RELAXED(&q->bottom, b);
}
return x;
}
static struct gc_ref
shared_worklist_steal(struct shared_worklist *q) {
while (1) {
size_t t = LOAD_ACQUIRE(&q->top);
atomic_thread_fence(memory_order_seq_cst);
size_t b = LOAD_ACQUIRE(&q->bottom);
ssize_t size = b - t;
if (size <= 0)
return gc_ref_null();
int active = LOAD_CONSUME(&q->active);
struct gc_ref ref = shared_worklist_buf_get(&q->bufs[active], t);
if (!atomic_compare_exchange_strong_explicit(&q->top, &t, t + 1,
memory_order_seq_cst,
memory_order_relaxed))
// Failed race.
continue;
return ref;
}
}
static ssize_t
shared_worklist_size(struct shared_worklist *q) {
size_t t = LOAD_ACQUIRE(&q->top);
atomic_thread_fence(memory_order_seq_cst);
size_t b = LOAD_ACQUIRE(&q->bottom);
ssize_t size = b - t;
return size;
}
static int
shared_worklist_can_steal(struct shared_worklist *q) {
return shared_worklist_size(q) > 0;
}
#undef LOAD_RELAXED
#undef STORE_RELAXED
#undef LOAD_ACQUIRE
#undef STORE_RELEASE
#undef LOAD_CONSUME
#endif // SHARED_WORKLIST_H