-
Notifications
You must be signed in to change notification settings - Fork 242
Expand file tree
/
Copy pathsequence.c
More file actions
353 lines (312 loc) · 11.8 KB
/
sequence.c
File metadata and controls
353 lines (312 loc) · 11.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
/*
* Sequence-aware fuzzing — chain executor and chain corpus.
*
* Phase 1 dispatches a short chain of random syscalls per fuzzer iteration
* and threads each call's return value into the next call's args with a
* tunable probability. Phase 2 (this file) mines productive chains into a
* global ring of saved chains, and replays them on a fraction of future
* iterations with the per-arg mutator chain that the per-call mini-corpus
* already runs (cross-arg splice + weighted-stack mutate + fd safety).
* Phase 3 (deferred) will add resource-type dependency tracking so chains
* are generated with structural awareness of which calls produce and
* consume which kinds of resource.
*
* Chain length is drawn from a geometric distribution biased toward 2:
* P(2)=50%, P(3)=30%, P(4)=20%. The bias toward 2 is deliberate —
* most setup-then-use kernel paths fit in two calls (open then ioctl,
* socket then sendmsg), and shorter chains preserve fuzzer throughput
* while still exercising the longer-tail patterns at lower frequency.
*
* Substitution-vs-failure: if a step's retval is negative (errno-style
* failure) the next step is dispatched without a substitute, since
* passing -EBADF as an fd to the following call wastes the slot. The
* chain itself continues — a single mid-chain failure does not abort
* the remaining steps.
*/
#include <stdlib.h>
#include <string.h>
#include "child.h"
#include "kcov.h"
#include "minicorpus.h"
#include "random.h"
#include "sequence.h"
#include "shm.h"
#include "syscall.h"
#include "tables.h"
#include "trinity.h"
#include "utils.h"
/*
* Probability (as 1/N) that an iteration replays a saved chain instead
* of generating a fresh one.
*
* 4 == 25%. Same starting point as minicorpus_replay's per-call replay
* rate, picked so the two replay paths sit at the same baseline and
* any divergence in coverage productivity between per-call and per-
* chain replay is attributable to the structural difference rather
* than to a sampling rate gap. Lower than 50/50 because fresh
* generation is still where new chain shapes are discovered — a
* replay-dominated mix would saturate on the seed distribution that
* Phase 1's random chain length and arg generation produce. The
* gating is in run_sequence_chain so the replay rate can be tuned
* here without touching the dispatch code.
*/
#define CHAIN_REPLAY_RATIO 4
struct chain_corpus_ring *chain_corpus_shm = NULL;
void chain_corpus_init(void)
{
/* No coverage signal means no save trigger; skip the allocation
* and let chain_corpus_save / dump_stats fall through their NULL
* guards. Mirrors the same kcov_shm gate that minicorpus_init
* uses for the same reason. */
if (kcov_shm == NULL)
return;
/*
* Stays alloc_shared() rather than alloc_shared_global().
* Children are the producers for chain_corpus_shm: chain_corpus_save
* runs in child context after run_sequence_chain notices a
* coverage-positive chain, takes ring->lock, memcpy's the saved
* chain_step entries into ring->slots[], and bumps ring->head /
* ring->count / ring->save_count. chain_corpus_pick (also from
* child context) takes the same lock to read. An mprotect PROT_READ
* on this region would EFAULT the lock acquire and the per-save
* memcpy, killing both phases of the chain corpus.
*
* The save path is exactly the slot-inlined design from f43e89c779f1
* — chain steps used to live in obj-heap-allocated chain_entry slots
* and the inline conversion in that commit deliberately moved them
* out of the obj heap so the obj-heap freeze (fbce60744dfb) could
* land. Conversely that means this region is intentionally outside
* the freeze pool; the trade is "child-writable corpus" for "freeze-
* safe obj heap".
*
* Wild-write risk this leaves open: a child syscall buffer pointer
* aliasing into a slot could corrupt a stored chain (next replay
* dispatches garbage syscalls — bounded by replay_syscall_step's
* deactivation / sanitise checks, which drop the chain on first
* unsafe step) or stick the ring lock (chain saves and replays
* stall fleet-wide until a kernel-side timeout reaps the holder).
* No parent crash surface.
*/
chain_corpus_shm = alloc_shared(sizeof(struct chain_corpus_ring));
memset(chain_corpus_shm, 0, sizeof(struct chain_corpus_ring));
output(0, "Sequence chain corpus allocated (%u slots, %lu B per entry)\n",
CHAIN_CORPUS_RING_SIZE,
(unsigned long) sizeof(struct chain_entry));
}
/*
* Replay-safety filter for chain corpus entries.
*
* Returns true if every step in @steps could be replayed without
* feeding stale heap pointers, stale pids, or sanitise-stashed
* pointers to the kernel. Same exclusions as minicorpus_save (which
* treats these arg types as poison) plus the entry->sanitise gate
* that random-syscall.c applies before it calls minicorpus_save.
*
* The check happens at save time so the corpus only ever contains
* chains that are themselves replay-safe. Saving an unsafe chain and
* filtering at replay time would let unsafe entries displace safe ones
* out of the ring as it wraps, and would shrink the effective corpus
* size whenever the unsafe fraction was non-trivial.
*/
static bool chain_is_replay_safe(const struct chain_step *steps,
unsigned int len)
{
unsigned int i, j;
for (i = 0; i < len; i++) {
struct syscallentry *entry = get_syscall_entry(steps[i].nr,
steps[i].do32bit);
if (entry == NULL || entry->sanitise != NULL)
return false;
for (j = 0; j < entry->num_args && j < 6; j++) {
switch (entry->argtype[j]) {
case ARG_IOVEC:
case ARG_PATHNAME:
case ARG_SOCKADDR:
case ARG_MMAP:
case ARG_PID:
return false;
default:
break;
}
}
}
return true;
}
/*
* Push a fresh chain into the ring, overwriting the oldest slot in place
* when the ring is full. Slots are inline structs in shm (no separate
* allocation), so the write is a memcpy under the ring lock and there is
* no eviction free path to defer.
*/
void chain_corpus_save(const struct chain_step *steps, unsigned int len)
{
struct chain_corpus_ring *ring = chain_corpus_shm;
struct chain_entry *ent;
unsigned int slot;
if (ring == NULL || len == 0 || len > MAX_SEQ_LEN)
return;
if (!chain_is_replay_safe(steps, len))
return;
lock(&ring->lock);
slot = ring->head % CHAIN_CORPUS_RING_SIZE;
ent = &ring->slots[slot];
ent->len = len;
memcpy(ent->steps, steps, len * sizeof(struct chain_step));
ring->head++;
if (ring->count < CHAIN_CORPUS_RING_SIZE)
ring->count++;
unlock(&ring->lock);
__atomic_fetch_add(&ring->save_count, 1UL, __ATOMIC_RELAXED);
}
bool chain_corpus_pick(struct chain_entry *out)
{
struct chain_corpus_ring *ring = chain_corpus_shm;
unsigned int slot;
if (ring == NULL || out == NULL)
return false;
/* Lock-protected snapshot. Holding the lock for the memcpy keeps
* the snapshot consistent against a concurrent in-place save
* overwriting the slot we are reading from. */
lock(&ring->lock);
if (ring->count == 0) {
unlock(&ring->lock);
return false;
}
/* Pick uniformly across the live entries. The newest entry is
* at (head - 1), the oldest at (head - count); both wrap mod
* CHAIN_CORPUS_RING_SIZE. */
slot = (ring->head - ring->count + (rand() % ring->count)) %
CHAIN_CORPUS_RING_SIZE;
*out = ring->slots[slot];
unlock(&ring->lock);
return true;
}
#if ENABLE_SEQUENCE_CHAIN
static unsigned int pick_chain_length(void)
{
unsigned int r = rand() % 10;
if (r < 5)
return 2;
if (r < 8)
return 3;
return 4;
}
bool run_sequence_chain(struct childdata *child)
{
struct syscallrecord *rec = &child->syscall;
struct chain_step steps[MAX_SEQ_LEN];
struct chain_entry replay;
const struct chain_step *replay_template = NULL;
unsigned int steps_recorded = 0;
unsigned int len, i;
bool have_substitute = false;
unsigned long substitute_retval = 0;
bool chain_found_new = false;
bool replaying = false;
/* With CHAIN_REPLAY_RATIO probability, try to replay a saved chain
* rather than generate a fresh one. Falls back to fresh if the
* corpus is empty (warm-start) or if the picked chain is rejected
* mid-replay by replay_syscall_step's safety checks (deactivated
* syscall, sanitise that wasn't there at save time, etc.). */
if (chain_corpus_shm != NULL && ONE_IN(CHAIN_REPLAY_RATIO) &&
chain_corpus_pick(&replay)) {
replaying = true;
replay_template = replay.steps;
len = replay.len;
__atomic_fetch_add(&chain_corpus_shm->replay_count, 1UL,
__ATOMIC_RELAXED);
} else {
len = pick_chain_length();
}
for (i = 0; i < len; i++) {
bool step_ret;
bool step_found_new = false;
unsigned long rv;
if (replaying) {
step_ret = replay_syscall_step(child,
&replay_template[i],
have_substitute,
substitute_retval,
&step_found_new);
if (step_ret == FAIL) {
/* Replay safety check failed (saved syscall
* has been deactivated or otherwise become
* unreplayable since save). Drop into fresh
* generation for the rest of the chain so the
* iteration still does useful work. */
replaying = false;
step_ret = random_syscall_step(child,
have_substitute,
substitute_retval,
&step_found_new);
}
} else {
step_ret = random_syscall_step(child,
have_substitute,
substitute_retval,
&step_found_new);
}
if (step_ret == FAIL)
return FAIL;
/* Snapshot the dispatched call into the chain buffer. Done
* after dispatch returns so the args reflect any Phase 1
* retval substitution and the retval is the kernel's actual
* return. cmp-mode steps have step_found_new == false
* (kcov_collect was skipped) — they still get recorded in
* the chain so saves preserve chain shape, but they don't
* by themselves trigger a chain save. */
if (steps_recorded < MAX_SEQ_LEN) {
struct chain_step *cs = &steps[steps_recorded++];
cs->nr = rec->nr;
cs->do32bit = rec->do32bit;
cs->args[0] = rec->a1;
cs->args[1] = rec->a2;
cs->args[2] = rec->a3;
cs->args[3] = rec->a4;
cs->args[4] = rec->a5;
cs->args[5] = rec->a6;
cs->retval = rec->retval;
}
if (step_found_new)
chain_found_new = true;
/* Decide whether the next step may receive a substitute.
* Errno-style returns (-1..-4095 region, all negative when
* read as long) are dropped because they are unlikely to
* be useful as downstream arg values. Zero is allowed
* through — RET_ZERO_SUCCESS calls return 0 on success
* and a NULL substituted into a pointer slot is a useful
* boundary case to exercise. */
rv = child->syscall.retval;
if ((long)rv < 0) {
have_substitute = false;
substitute_retval = 0;
} else {
have_substitute = true;
substitute_retval = rv;
}
}
/* Save chains that produced new coverage in any step. The same
* found_new signal that drives the per-call minicorpus save and the
* weighted_pick_case attribution gates this — chains the rest of
* the fuzzer already considers interesting are the chains worth
* keeping for replay. Saves apply equally to fresh chains and to
* mutated replays of saved chains: a replay that finds new edges
* proves the mutated form was structurally distinct from its
* parent and is worth retaining as a corpus entry in its own
* right. Length-1 chains aren't saved (trivially subsumed by
* the per-syscall minicorpus); the chain length floor of 2 from
* pick_chain_length() makes that condition redundant in practice
* but the explicit check keeps the contract obvious. */
if (chain_found_new && steps_recorded >= 2)
chain_corpus_save(steps, steps_recorded);
if (minicorpus_shm != NULL)
__atomic_fetch_add(&minicorpus_shm->chain_iter_count, 1,
__ATOMIC_RELAXED);
return true;
}
#else /* !ENABLE_SEQUENCE_CHAIN */
bool run_sequence_chain(struct childdata *child)
{
return random_syscall(child);
}
#endif