diff --git a/CHANGELOG.md b/CHANGELOG.md index 0493ab9..319026d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed + +- **`FirstFitBStackAllocator::dealloc` / `realloc` — coalesced tail blocks never reclaimed** (`alloc` + `set` features): `cascade_discard_free_tail` was only called from the explicit tail-discard path in `dealloc`. When `add_to_free_list` coalesced a freed block with its neighbours and the result ended at the stack tail, that merged free block was never discarded — leaving the arena larger than necessary after all allocations were freed. No data corruption occurs; the free list remains structurally valid throughout. Fixed by calling `cascade_discard_free_tail` after every `add_to_free_list` call in both `dealloc` and `realloc`. The cascade is a no-op when the tail is still allocated. + ### Changed - **`SlabBStackAllocator` version bumped to 0.1.1** (`alloc` + `set` features): Magic number updated from `ALSL\x00\x01\x00\x00` to `ALSL\x00\x01\x01\x00`. Reflects the addition of `atomic` / `Sync` support. Existing 0.1.x files remain fully compatible (only the first 6 bytes are checked on open). diff --git a/c/bstack_alloc.c b/c/bstack_alloc.c index 9e5c7ab..1c04d00 100644 --- a/c/bstack_alloc.c +++ b/c/bstack_alloc.c @@ -1478,6 +1478,7 @@ static int ff_vt_dealloc(bstack_allocator_t *self, bstack_slice_t slice) if (alff_set_recovery_needed(a->bs) != 0) { FF_UNLOCK(a); return -1; } if (alff_add_to_free_list(a->bs, slice.offset) != 0) { FF_UNLOCK(a); return -1; } + if (alff_cascade_discard_free_tail(a->bs) != 0) { FF_UNLOCK(a); return -1; } r = alff_clear_recovery_needed(a->bs); FF_UNLOCK(a); return r; @@ -1765,6 +1766,7 @@ static int ff_vt_realloc(bstack_allocator_t *self, bstack_slice_t slice, : found_start; if (alff_add_to_free_list(a->bs, slice.offset) != 0) { FF_UNLOCK(a); return -1; } + if (alff_cascade_discard_free_tail(a->bs) != 0) { FF_UNLOCK(a); return -1; } if (alff_clear_recovery_needed(a->bs) != 0) { FF_UNLOCK(a); return -1; } out->allocator = self; @@ -1825,6 +1827,7 @@ static int ff_vt_realloc(bstack_allocator_t *self, bstack_slice_t slice, new_ptr = push_offset + ALFF_BLOCK_HDR_SIZE; if (alff_add_to_free_list(a->bs, slice.offset) != 0) { FF_UNLOCK(a); return -1; } + if (alff_cascade_discard_free_tail(a->bs) != 0) { FF_UNLOCK(a); return -1; } if (alff_clear_recovery_needed(a->bs) != 0) { FF_UNLOCK(a); return -1; } out->allocator = self; diff --git a/c/test_first_fit.c b/c/test_first_fit.c index f0bc1a4..d648d8c 100644 --- a/c/test_first_fit.c +++ b/c/test_first_fit.c @@ -753,6 +753,7 @@ static int test_fuzz_reopen(void) /* ALFF header layout — mirrored from bstack_alloc.c for the flag poke. */ #define ALFF_FLAGS_OFFSET 24 /* OFFSET_SIZE(16) + magic(8) */ +#define ALFF_HDR_OFFSET 48 /* OFFSET_SIZE(16) + HEADER_SIZE(32) — arena start */ /* --- concurrent alloc / dealloc data integrity ------------------------ */ @@ -895,17 +896,13 @@ static int test_concurrent_realloc_grow_shrink_data_integrity(void) for (int i = 0; i < FF_REALLOC_THREADS; i++) pthread_join(threads[i], NULL); for (int i = 0; i < FF_REALLOC_THREADS; i++) CHECK(args[i].ok); - /* Final length is not asserted: non-adjacent mid-arena dealloc calls - * can leave coalesced free blocks in the list rather than discarding - * to the tail. Reopen to confirm the free list survived intact. */ - bstack_close(first_fit_bstack_allocator_into_stack(a)); - bstack_t *bs2 = bstack_open(tmp); CHECK(bs2); - first_fit_bstack_allocator_t *a2 = first_fit_bstack_allocator_new(bs2); - CHECK(a2); - bstack_slice_t probe; - CHECK(bstack_allocator_alloc((bstack_allocator_t *)a2, 16, &probe) == 0); - bstack_close(first_fit_bstack_allocator_into_stack(a2)); + { + uint64_t len; + CHECK(bstack_allocator_len((bstack_allocator_t *)a, &len) == 0); + CHECK(len == ALFF_HDR_OFFSET); + } + bstack_close(first_fit_bstack_allocator_into_stack(a)); ff_unlink(tmp); return 0; } @@ -953,19 +950,16 @@ static int test_concurrent_tail_thrash(void) CHECK(ret == NULL); } - /* Final stack length is not asserted: a non-tail-resident realloc - * 64→16 stays in-bucket (block stays 64 B) and its subsequent dealloc - * misses the tail-discard fast path, so the cascade only catches the - * blocks that happen to be tail-resident at join time. Reopen instead - * to confirm the free list survived intact. */ - bstack_close(first_fit_bstack_allocator_into_stack(a)); - bstack_t *bs2 = bstack_open(tmp); CHECK(bs2); - first_fit_bstack_allocator_t *a2 = first_fit_bstack_allocator_new(bs2); - CHECK(a2); - bstack_slice_t probe; - CHECK(bstack_allocator_alloc((bstack_allocator_t *)a2, 16, &probe) == 0); - bstack_close(first_fit_bstack_allocator_into_stack(a2)); + /* With cascade_discard_free_tail called after every alff_add_to_free_list, + * a shrunken block whose dealloc goes through the non-tail path is still + * reclaimed as soon as it coalesces to the tail. */ + { + uint64_t len; + CHECK(bstack_allocator_len((bstack_allocator_t *)a, &len) == 0); + CHECK(len == ALFF_HDR_OFFSET); + } + bstack_close(first_fit_bstack_allocator_into_stack(a)); ff_unlink(tmp); return 0; } @@ -1012,6 +1006,106 @@ static int test_recovery_needed_already_set_rejects_mutation(void) ff_unlink(tmp); return 0; } +/* --- regression: cascade after non-tail dealloc reclaims arena --------- */ + +#define FF_CASCADE_THREADS 4 +#define FF_CASCADE_ITERS 100 + +static void *ff_cascade_dealloc_worker(void *raw) +{ + bstack_allocator_t *a = raw; + static const uint64_t sizes[] = {16, 32, 64, 128}; + for (int i = 0; i < FF_CASCADE_ITERS; i++) { + uint64_t len = sizes[i % (int)(sizeof sizes / sizeof sizes[0])]; + bstack_slice_t s; + if (bstack_allocator_alloc(a, len, &s) != 0) return (void *)(intptr_t)-1; + if (bstack_allocator_dealloc(a, s) != 0) return (void *)(intptr_t)-1; + } + return NULL; +} + +static int test_dealloc_non_tail_cascade_reclaims_arena(void) +{ + /* Regression for missing alff_cascade_discard_free_tail after + * alff_add_to_free_list in ff_vt_dealloc's non-tail path. With + * concurrent threads the last dealloc going through the non-tail path + * can coalesce free neighbours to the physical tail, leaving a free + * block in the free list permanently unless cascade is called. */ + char tmp[64]; make_tmp(tmp, sizeof tmp); + bstack_t *bs = bstack_open(tmp); CHECK(bs); + first_fit_bstack_allocator_t *a = first_fit_bstack_allocator_new(bs); + CHECK(a); + + pthread_t threads[FF_CASCADE_THREADS]; + for (int i = 0; i < FF_CASCADE_THREADS; i++) + pthread_create(&threads[i], NULL, ff_cascade_dealloc_worker, + (bstack_allocator_t *)a); + for (int i = 0; i < FF_CASCADE_THREADS; i++) { + void *ret = NULL; + pthread_join(threads[i], &ret); + CHECK(ret == NULL); + } + + { + uint64_t len; + CHECK(bstack_allocator_len((bstack_allocator_t *)a, &len) == 0); + CHECK(len == ALFF_HDR_OFFSET); + } + + bstack_close(first_fit_bstack_allocator_into_stack(a)); + ff_unlink(tmp); return 0; +} + +/* --- regression: cascade after realloc copy-and-move reclaims arena ---- */ + +#define FF_REALLOC_CASCADE_THREADS 4 +#define FF_REALLOC_CASCADE_ITERS 60 + +static void *ff_realloc_cascade_worker(void *raw) +{ + bstack_allocator_t *a = raw; + for (int i = 0; i < FF_REALLOC_CASCADE_ITERS; i++) { + bstack_slice_t s, s2; + /* Grow far beyond the original block to force the copy-and-move + * path: the old 16-byte block is freed via alff_add_to_free_list. */ + if (bstack_allocator_alloc(a, 16, &s) != 0) return (void *)(intptr_t)-1; + if (bstack_allocator_realloc(a, s, 128, &s2) != 0) return (void *)(intptr_t)-1; + if (bstack_allocator_dealloc(a, s2) != 0) return (void *)(intptr_t)-1; + } + return NULL; +} + +static int test_realloc_copy_move_cascade_reclaims_arena(void) +{ + /* Regression for missing alff_cascade_discard_free_tail after + * alff_add_to_free_list in ff_vt_realloc's copy-and-move paths. + * After realloc copies data to a new block and frees the old one, + * the freed old block can coalesce to the tail if cascade is not called. */ + char tmp[64]; make_tmp(tmp, sizeof tmp); + bstack_t *bs = bstack_open(tmp); CHECK(bs); + first_fit_bstack_allocator_t *a = first_fit_bstack_allocator_new(bs); + CHECK(a); + + pthread_t threads[FF_REALLOC_CASCADE_THREADS]; + for (int i = 0; i < FF_REALLOC_CASCADE_THREADS; i++) + pthread_create(&threads[i], NULL, ff_realloc_cascade_worker, + (bstack_allocator_t *)a); + for (int i = 0; i < FF_REALLOC_CASCADE_THREADS; i++) { + void *ret = NULL; + pthread_join(threads[i], &ret); + CHECK(ret == NULL); + } + + { + uint64_t len; + CHECK(bstack_allocator_len((bstack_allocator_t *)a, &len) == 0); + CHECK(len == ALFF_HDR_OFFSET); + } + + bstack_close(first_fit_bstack_allocator_into_stack(a)); + ff_unlink(tmp); return 0; +} + #endif /* BSTACK_FEATURE_ATOMIC */ /* ========================================================================= @@ -1041,6 +1135,8 @@ int main(void) T(test_concurrent_alloc_dealloc_data_integrity); T(test_concurrent_realloc_grow_shrink_data_integrity); T(test_concurrent_tail_thrash); + T(test_dealloc_non_tail_cascade_reclaims_arena); + T(test_realloc_copy_move_cascade_reclaims_arena); T(test_recovery_needed_already_set_rejects_mutation); #endif diff --git a/src/alloc/first_fit.rs b/src/alloc/first_fit.rs index a1d0d36..e5d3913 100644 --- a/src/alloc/first_fit.rs +++ b/src/alloc/first_fit.rs @@ -384,8 +384,8 @@ impl FirstFitBStackAllocator { // 1. Marking the block as free (crash before coalescing: recovery finds it as free). // 2. Absorbing the right neighbour if it is free (right coalesce). // 3. Merging into the left neighbour if it is free (left coalesce). - // 4. Tail reclamation: if the merged block ends at the stack tail, discard it entirely. - // 5. Otherwise, prepend the merged block to the free list. + // 4. Prepend the merged block to the free list. + // (Tail reclamation is the caller's responsibility via cascade_discard_free_tail.) // Current free list: // free_head --------------> next -> ... @@ -655,16 +655,18 @@ impl FirstFitBStackAllocator { } } - /// After discarding the tail block, cascade-discard any free blocks that are now the new tail. + /// Cascade-discard any free blocks sitting at the stack tail. /// - /// This maintains the invariant that no free block ever sits at the stack tail, which in turn - /// makes tail reclamation inside `add_to_free_list` impossible (and therefore omitted). + /// Called after every operation that can leave a free block at the tail: the explicit + /// tail-discard path in `dealloc` and after every `add_to_free_list` call (which may + /// coalesce the freed block up to the tail). This maintains the invariant that no free + /// block ever sits at the stack tail. /// - /// Recovery management: the caller (`dealloc` tail path) is the sole manager of - /// `recovery_needed` and must set it before invoking this function and clear it after. - /// This avoids a double-set under the CAS-based atomic helpers and matches the C port. + /// Recovery management: the caller is the sole manager of `recovery_needed` and must set + /// it before invoking this function and clear it after. This avoids a double-set under + /// the CAS-based atomic helpers. /// - /// With the `atomic` feature the caller (`dealloc`) holds `self.lock` for the duration, + /// With the `atomic` feature the caller holds `self.lock` for the duration, /// so the free-list unlinks and tail discards here are serialised against other threads. fn cascade_discard_free_tail(&self) -> io::Result<()> { let arena_start = Self::OFFSET_SIZE + Self::HEADER_SIZE; @@ -945,6 +947,7 @@ impl BStackAllocator for FirstFitBStackAllocator { } self.set_recovery_needed()?; self.add_to_free_list(slice.start())?; + self.cascade_discard_free_tail()?; self.clear_recovery_needed() } @@ -1239,6 +1242,7 @@ impl BStackAllocator for FirstFitBStackAllocator { block_found.0 }; self.add_to_free_list(slice.start())?; + self.cascade_discard_free_tail()?; self.clear_recovery_needed()?; // SAFETY: new_payload from allocated block via unlink_block Ok(unsafe { BStackSlice::from_raw_parts(self, new_payload, new_len) }) @@ -1257,6 +1261,7 @@ impl BStackAllocator for FirstFitBStackAllocator { self.set_recovery_needed()?; let ptr = self.stack.push(&block_buf)? + Self::BLOCK_HEADER_SIZE; self.add_to_free_list(slice.start())?; + self.cascade_discard_free_tail()?; self.clear_recovery_needed()?; // SAFETY: ptr from fresh allocation via self.stack.push Ok(unsafe { BStackSlice::from_raw_parts(self, ptr, new_len) }) diff --git a/src/test.rs b/src/test.rs index 130108f..bc3f65f 100644 --- a/src/test.rs +++ b/src/test.rs @@ -3126,6 +3126,12 @@ mod first_fit_tests { // free-list mutation correctly, two threads could be handed the same // block (or overlapping blocks), and the read-back would observe a // different thread's pattern. + // + // Regression: the final assert_eq checks that cascade_discard_free_tail + // is called after every add_to_free_list in the non-tail dealloc path. + // Without the fix, the last dealloc going through the non-tail path + // could coalesce free neighbours all the way to the physical tail and + // leave a free block in the free list permanently. use std::sync::Arc; use std::thread; @@ -3223,13 +3229,7 @@ mod first_fit_tests { h.join().unwrap(); } - // Final length is not asserted: non-adjacent mid-arena dealloc calls - // can leave coalesced free blocks in the list rather than discarding - // to the tail. What matters is that no operation errored mid-run - // and the allocator survives a reopen with the free list intact. - drop(alloc); - let reopened = FirstFitBStackAllocator::new(BStack::open(&path).unwrap()).unwrap(); - let _ = reopened.alloc(16).unwrap(); + assert_eq!(alloc.len().unwrap(), ALFF_HDR_OFFSET); } #[cfg(feature = "atomic")] @@ -3238,10 +3238,11 @@ mod first_fit_tests { // Hammer the tail paths — alloc → tail-grow → tail-shrink → dealloc — // from many threads on one allocator. Whether each individual op // actually lands on the tail depends on the interleaving (another - // thread's allocation can sit above this one), so this is about - // exercising the `recovery_needed` CAS, the tail-grow extend, the - // in-bucket realloc shrink, and the dealloc/cascade paths under - // contention — not about asserting a specific final stack size. + // thread's allocation can sit above this one), so this exercises the + // `recovery_needed` CAS, the tail-grow extend, the in-bucket realloc + // shrink, and the dealloc/cascade paths under contention. With + // cascade_discard_free_tail called after every add_to_free_list, the + // arena is fully reclaimed even when dealloc takes the non-tail path. use std::sync::Arc; use std::thread; @@ -3268,19 +3269,93 @@ mod first_fit_tests { }) .collect(); + // With cascade_discard_free_tail called after every add_to_free_list, + // a shrunken block whose dealloc goes through the non-tail path + // (because the user-visible len is smaller than the physical block + // size) is still reclaimed as soon as it coalesces to the tail. for h in handles { h.join().unwrap(); } + assert_eq!(alloc.len().unwrap(), ALFF_HDR_OFFSET); + } - // Final stack length is not asserted: a non-tail-resident realloc - // 64→16 stays in-bucket (block stays 64 B) and its subsequent - // dealloc misses the tail-discard fast path, so the cascade only - // catches the blocks that happen to be tail-resident at join time. - // What we *can* assert is that no operation errored mid-run and the - // allocator is openable from disk with a usable free list. - drop(alloc); - let reopened = FirstFitBStackAllocator::new(BStack::open(&path).unwrap()).unwrap(); - let _ = reopened.alloc(16).unwrap(); + #[cfg(feature = "atomic")] + #[test] + fn dealloc_non_tail_cascade_reclaims_arena() { + // Targeted regression for missing cascade_discard_free_tail after + // add_to_free_list in dealloc's non-tail path. With concurrent + // threads the last dealloc going through the non-tail path can + // coalesce free neighbours to the physical tail, leaving a free block + // in the free list permanently unless cascade is called. + use std::sync::Arc; + use std::thread; + + let (alloc, path) = mk_ff("nontail_cascade"); + let _g = Guard(path); + let alloc = Arc::new(alloc); + + const THREADS: u64 = 4; + const ITERS: u64 = 100; + + let handles: Vec<_> = (0..THREADS) + .map(|_| { + let alloc = Arc::clone(&alloc); + thread::spawn(move || { + let sizes = [16u64, 32, 64, 128]; + for i in 0..ITERS { + let len = sizes[(i as usize) % sizes.len()]; + let s = alloc.alloc(len).unwrap(); + alloc.dealloc(s).unwrap(); + } + }) + }) + .collect(); + + for h in handles { + h.join().unwrap(); + } + assert_eq!(alloc.len().unwrap(), ALFF_HDR_OFFSET); + } + + #[cfg(feature = "atomic")] + #[test] + fn realloc_copy_move_cascade_reclaims_arena() { + // Targeted regression for missing cascade_discard_free_tail after + // add_to_free_list in realloc's copy-and-move paths. realloc copies + // data to a new block then frees the old one via add_to_free_list; + // the freed old block can coalesce to the tail if not immediately + // cascade-discarded. + use std::sync::Arc; + use std::thread; + + let (alloc, path) = mk_ff("realloc_cascade"); + let _g = Guard(path); + let alloc = Arc::new(alloc); + + const THREADS: u64 = 4; + const ITERS: u64 = 60; + + let handles: Vec<_> = (0..THREADS) + .map(|_| { + let alloc = Arc::clone(&alloc); + thread::spawn(move || { + for _ in 0..ITERS { + // Grow far beyond the original block to force the + // copy-and-move path: the 16-byte block can't be + // extended in place to 128 bytes when other threads + // hold adjacent blocks. + let s = alloc.alloc(16).unwrap(); + let s = alloc.realloc(s, 128).unwrap(); + alloc.dealloc(s).unwrap(); + } + }) + }) + .collect(); + + for h in handles { + h.join().unwrap(); + } + assert_eq!(alloc.len().unwrap(), ALFF_HDR_OFFSET); } #[cfg(feature = "atomic")]