diff --git a/array.c b/array.c index fbb712c7262624..71286d2d8d0669 100644 --- a/array.c +++ b/array.c @@ -6875,7 +6875,7 @@ static const rb_data_type_t ary_sample_memo_type = { .function = { .dfree = (RUBY_DATA_FUNC)st_free_table, }, - .flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY + .flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE diff --git a/ast.c b/ast.c index 5357aa38a5ae09..3321ae069f77a3 100644 --- a/ast.c +++ b/ast.c @@ -45,7 +45,7 @@ static const rb_data_type_t rb_node_type = { "AST/node", {node_gc_mark, RUBY_TYPED_DEFAULT_FREE, node_memsize,}, 0, 0, - RUBY_TYPED_FREE_IMMEDIATELY, + RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; struct ASTLocationData { @@ -70,7 +70,7 @@ static const rb_data_type_t rb_location_type = { "AST/location", {location_gc_mark, RUBY_TYPED_DEFAULT_FREE, location_memsize,}, 0, 0, - RUBY_TYPED_FREE_IMMEDIATELY, + RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; diff --git a/box.c b/box.c index fba494f7ad2e1c..88be74a0a9bfb1 100644 --- a/box.c +++ b/box.c @@ -300,7 +300,7 @@ static const rb_data_type_t rb_box_data_type = { box_entry_memsize, rb_box_gc_update_references, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY // TODO: enable RUBY_TYPED_WB_PROTECTED when inserting write barriers + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE // TODO: enable RUBY_TYPED_WB_PROTECTED when inserting write barriers }; static const rb_data_type_t rb_root_box_data_type = { @@ -311,7 +311,7 @@ static const rb_data_type_t rb_root_box_data_type = { box_entry_memsize, rb_box_gc_update_references, }, - &rb_box_data_type, 0, RUBY_TYPED_FREE_IMMEDIATELY // TODO: enable RUBY_TYPED_WB_PROTECTED when inserting write barriers + &rb_box_data_type, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE // TODO: enable RUBY_TYPED_WB_PROTECTED when inserting write barriers }; VALUE @@ -755,7 +755,7 @@ box_ext_cleanup_free(void *p) static const rb_data_type_t box_ext_cleanup_type = { "box_ext_cleanup", {box_ext_cleanup_mark, box_ext_cleanup_free}, - .flags = RUBY_TYPED_FREE_IMMEDIATELY, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; void diff --git a/compile.c b/compile.c index 100ab126ed152e..bad52f6620310a 100644 --- a/compile.c +++ b/compile.c @@ -12312,7 +12312,7 @@ static const rb_data_type_t labels_wrapper_type = { .dmark = (RUBY_DATA_FUNC)rb_mark_set, .dfree = (RUBY_DATA_FUNC)st_free_table, }, - .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; void @@ -12573,7 +12573,7 @@ static const rb_data_type_t pinned_list_type = { RUBY_DEFAULT_FREE, NULL, // No external memory to report, }, - 0, 0, RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE @@ -14724,7 +14724,7 @@ ibf_dump_memsize(const void *ptr) static const rb_data_type_t ibf_dump_type = { "ibf_dump", {ibf_dump_mark, ibf_dump_free, ibf_dump_memsize,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static void @@ -14961,7 +14961,7 @@ ibf_loader_memsize(const void *ptr) static const rb_data_type_t ibf_load_type = { "ibf_loader", {ibf_loader_mark, ibf_loader_free, ibf_loader_memsize,}, - 0, 0, RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; const rb_iseq_t * diff --git a/concurrent_set.c b/concurrent_set.c index c8b0c73881a85d..227f1b53f9f02b 100644 --- a/concurrent_set.c +++ b/concurrent_set.c @@ -4,14 +4,24 @@ #include "ruby/atomic.h" #include "vm_sync.h" -#define CONCURRENT_SET_CONTINUATION_BIT ((VALUE)1 << (sizeof(VALUE) * CHAR_BIT - 1)) -#define CONCURRENT_SET_HASH_MASK (~CONCURRENT_SET_CONTINUATION_BIT) +// insertion probes have gone past this slot +#define CONCURRENT_SET_CONTINUATION_BIT ((VALUE)0x2) +#define CONCURRENT_SET_KEY_MASK (~CONCURRENT_SET_CONTINUATION_BIT) +// This slot's hash can be reclaimed if and only if the key is EMPTY and it doesn't have a continuation bit. If the key is something +// else, this bit on the hash has no meaning and is ignored. +#define CONCURRENT_SET_HASH_RECLAIMABLE_BIT ((VALUE)1 << (sizeof(VALUE) * CHAR_BIT - 1)) +#define CONCURRENT_SET_HASH_MASK (~CONCURRENT_SET_HASH_RECLAIMABLE_BIT) + +#define CONCURRENT_SET_DEBUG 0 +#define CONCURRENT_SET_DEBUG_STATS 0 +#define CONCURRENT_SET_DEBUG_DUPLICATES 0 +#define CONCURRENT_SET_DEBUG_BAD_HASH_FN 0 enum concurrent_set_special_values { - CONCURRENT_SET_EMPTY, - CONCURRENT_SET_DELETED, - CONCURRENT_SET_MOVED, - CONCURRENT_SET_SPECIAL_VALUE_COUNT + CONCURRENT_SET_EMPTY = 0, + CONCURRENT_SET_TOMBSTONE = 1, + CONCURRENT_SET_MOVED = 5, // continuation bit is 0x02, so 0x05 doesn't have bits in conflict with it + CONCURRENT_SET_SPECIAL_VALUE_COUNT = 6 }; struct concurrent_set_entry { @@ -22,38 +32,53 @@ struct concurrent_set_entry { struct concurrent_set { rb_atomic_t size; unsigned int capacity; - unsigned int deleted_entries; + rb_atomic_t deleted_entries; const struct rb_concurrent_set_funcs *funcs; struct concurrent_set_entry *entries; + int key_type; +#if CONCURRENT_SET_DEBUG_STATS + rb_atomic_t find_count; + rb_atomic_t find_probe_total; + rb_atomic_t find_probe_max; + rb_atomic_t insert_count; + rb_atomic_t insert_probe_total; + rb_atomic_t insert_probe_max; +#endif }; -static void -concurrent_set_mark_continuation(struct concurrent_set_entry *entry, VALUE curr_hash_and_flags) +static bool +concurrent_set_mark_continuation(struct concurrent_set_entry *entry, VALUE raw_key) { - if (curr_hash_and_flags & CONCURRENT_SET_CONTINUATION_BIT) return; - - RUBY_ASSERT((curr_hash_and_flags & CONCURRENT_SET_HASH_MASK) != 0); + if (raw_key & CONCURRENT_SET_CONTINUATION_BIT) return true; - VALUE new_hash = curr_hash_and_flags | CONCURRENT_SET_CONTINUATION_BIT; - VALUE prev_hash = rbimpl_atomic_value_cas(&entry->hash, curr_hash_and_flags, new_hash, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_RELAXED); + VALUE new_key = raw_key | CONCURRENT_SET_CONTINUATION_BIT; // NOTE: raw_key can be CONCURRENT_SET_EMPTY + VALUE prev_key = rbimpl_atomic_value_cas(&entry->key, raw_key, new_key, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_ACQUIRE); - // At the moment we only expect to be racing concurrently against another - // thread also setting the continuation bit. - // In the future if deletion is concurrent this will need adjusting - RUBY_ASSERT(prev_hash == curr_hash_and_flags || prev_hash == new_hash); - (void)prev_hash; + if (prev_key == raw_key || prev_key == new_key) { + return true; + } + else if ((prev_key & CONCURRENT_SET_KEY_MASK) == CONCURRENT_SET_TOMBSTONE) { + return true; + } + else { + // * key could have been made EMPTY, and anything could have happened to this slot since then. Need to retry. + // * key could have been moved during resize + return false; + } } static VALUE concurrent_set_hash(const struct concurrent_set *set, VALUE key) { VALUE hash = set->funcs->hash(key); +#if CONCURRENT_SET_DEBUG_BAD_HASH_FN + hash = hash % 1024; + if (hash == 0) hash = 1; +#endif hash &= CONCURRENT_SET_HASH_MASK; - if (hash == 0) { - hash ^= CONCURRENT_SET_HASH_MASK; - } + if (hash == 0) hash = ~(VALUE)0 & CONCURRENT_SET_HASH_MASK; RUBY_ASSERT(hash != 0); - RUBY_ASSERT(!(hash & CONCURRENT_SET_CONTINUATION_BIT)); + RUBY_ASSERT(!(hash & CONCURRENT_SET_HASH_RECLAIMABLE_BIT)); return hash; } @@ -91,20 +116,31 @@ static const rb_data_type_t concurrent_set_type = { .dsize = concurrent_set_size, }, /* Hack: NOT WB_PROTECTED on purpose (see above) */ - .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE + /* NOTE: don't make embedded due to compaction */ + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; VALUE -rb_concurrent_set_new(const struct rb_concurrent_set_funcs *funcs, int capacity) +rb_concurrent_set_new(const struct rb_concurrent_set_funcs *funcs, int capacity, int key_type) { struct concurrent_set *set; VALUE obj = TypedData_Make_Struct(0, struct concurrent_set, &concurrent_set_type, set); set->funcs = funcs; set->entries = ZALLOC_N(struct concurrent_set_entry, capacity); set->capacity = capacity; + (void)key_type; +#if CONCURRENT_SET_DEBUG + set->key_type = key_type; +#endif return obj; } +void * +rb_concurrent_set_get_data(VALUE set_obj) +{ + return RTYPEDDATA_GET_DATA(set_obj); +} + rb_atomic_t rb_concurrent_set_size(VALUE set_obj) { @@ -113,6 +149,50 @@ rb_concurrent_set_size(VALUE set_obj) return RUBY_ATOMIC_LOAD(set->size); } +unsigned int +rb_concurrent_set_capacity(VALUE set_obj) +{ + struct concurrent_set *set = RTYPEDDATA_GET_DATA(set_obj); + + return set->capacity; +} + +void +rb_concurrent_set_probe_stats(VALUE set_obj, + rb_atomic_t *find_count, rb_atomic_t *find_probe_total, rb_atomic_t *find_probe_max, + rb_atomic_t *insert_count, rb_atomic_t *insert_probe_total, rb_atomic_t *insert_probe_max) +{ +#if CONCURRENT_SET_DEBUG_STATS + struct concurrent_set *set = RTYPEDDATA_GET_DATA(set_obj); + *find_count = RUBY_ATOMIC_LOAD(set->find_count); + *find_probe_total = RUBY_ATOMIC_LOAD(set->find_probe_total); + *find_probe_max = RUBY_ATOMIC_LOAD(set->find_probe_max); + *insert_count = RUBY_ATOMIC_LOAD(set->insert_count); + *insert_probe_total = RUBY_ATOMIC_LOAD(set->insert_probe_total); + *insert_probe_max = RUBY_ATOMIC_LOAD(set->insert_probe_max); +#else + *find_count = 0; + *find_probe_total = 0; + *find_probe_max = 0; + *insert_count = 0; + *insert_probe_total = 0; + *insert_probe_max = 0; +#endif +} + +#if CONCURRENT_SET_DEBUG_STATS +static void +concurrent_set_atomic_max(rb_atomic_t *target, rb_atomic_t val) +{ + rb_atomic_t cur = RUBY_ATOMIC_LOAD(*target); + while (val > cur) { + rb_atomic_t prev = rbimpl_atomic_cas(target, cur, val, RBIMPL_ATOMIC_RELAXED, RBIMPL_ATOMIC_RELAXED); + if (prev == cur) break; + cur = prev; + } +} +#endif + struct concurrent_set_probe { int idx; int d; @@ -138,67 +218,59 @@ concurrent_set_probe_next(struct concurrent_set_probe *probe) } static void -concurrent_set_try_resize_without_locking(VALUE old_set_obj, VALUE *set_obj_ptr) +concurrent_set_try_resize_locked(VALUE old_set_obj, VALUE *set_obj_ptr, VALUE new_set_obj, int old_capacity) { - // Check if another thread has already resized. - if (rbimpl_atomic_value_load(set_obj_ptr, RBIMPL_ATOMIC_ACQUIRE) != old_set_obj) { - return; - } - struct concurrent_set *old_set = RTYPEDDATA_GET_DATA(old_set_obj); - - // This may overcount by up to the number of threads concurrently attempting to insert - // GC may also happen between now and the set being rebuilt - int expected_size = rbimpl_atomic_load(&old_set->size, RBIMPL_ATOMIC_RELAXED) - old_set->deleted_entries; - - // NOTE: new capacity must make sense with load factor, don't change one without checking the other. struct concurrent_set_entry *old_entries = old_set->entries; - int old_capacity = old_set->capacity; - int new_capacity = old_capacity * 2; - if (new_capacity > expected_size * 8) { - new_capacity = old_capacity / 2; - } - else if (new_capacity > expected_size * 4) { - new_capacity = old_capacity; - } - - // May cause GC and therefore deletes, so must happen first. - VALUE new_set_obj = rb_concurrent_set_new(old_set->funcs, new_capacity); struct concurrent_set *new_set = RTYPEDDATA_GET_DATA(new_set_obj); for (int i = 0; i < old_capacity; i++) { struct concurrent_set_entry *old_entry = &old_entries[i]; - VALUE key = rbimpl_atomic_value_exchange(&old_entry->key, CONCURRENT_SET_MOVED, RBIMPL_ATOMIC_ACQUIRE); - RUBY_ASSERT(key != CONCURRENT_SET_MOVED); + VALUE prev_key_raw = rbimpl_atomic_value_exchange(&old_entry->key, CONCURRENT_SET_MOVED, RBIMPL_ATOMIC_ACQUIRE); + VALUE prev_key = prev_key_raw & CONCURRENT_SET_KEY_MASK; + RUBY_ASSERT(prev_key != CONCURRENT_SET_MOVED); + + if (prev_key < CONCURRENT_SET_SPECIAL_VALUE_COUNT) continue; - if (key < CONCURRENT_SET_SPECIAL_VALUE_COUNT) continue; - if (!RB_SPECIAL_CONST_P(key) && rb_objspace_garbage_object_p(key)) continue; + if (!RB_SPECIAL_CONST_P(prev_key) && rb_objspace_garbage_object_p(prev_key)) continue; - VALUE hash = rbimpl_atomic_value_load(&old_entry->hash, RBIMPL_ATOMIC_RELAXED) & CONCURRENT_SET_HASH_MASK; - RUBY_ASSERT(hash != 0); - RUBY_ASSERT(hash == concurrent_set_hash(old_set, key)); +#if CONCURRENT_SET_DEBUG + if (new_set->key_type == T_STRING) { + RUBY_ASSERT(BUILTIN_TYPE(prev_key) == T_STRING); + RUBY_ASSERT(FL_TEST(prev_key, RSTRING_FSTR)); + } + else { + RUBY_ASSERT(STATIC_SYM_P(prev_key)); + } +#endif + + VALUE hash = rbimpl_atomic_value_load(&old_entry->hash, RBIMPL_ATOMIC_ACQUIRE) & CONCURRENT_SET_HASH_MASK; + if (hash == 0) continue; + RUBY_ASSERT(concurrent_set_hash(old_set, prev_key) == hash); // Insert key into new_set. struct concurrent_set_probe probe; int idx = concurrent_set_probe_start(&probe, new_set, hash); + int start_idx = idx; while (true) { struct concurrent_set_entry *entry = &new_set->entries[idx]; - if (entry->hash == CONCURRENT_SET_EMPTY) { + if (entry->hash == 0) { RUBY_ASSERT(entry->key == CONCURRENT_SET_EMPTY); new_set->size++; RUBY_ASSERT(new_set->size <= new_set->capacity / 2); - entry->key = key; + entry->key = prev_key; // no continuation bit entry->hash = hash; break; } RUBY_ASSERT(entry->key >= CONCURRENT_SET_SPECIAL_VALUE_COUNT); - entry->hash |= CONCURRENT_SET_CONTINUATION_BIT; + entry->key |= CONCURRENT_SET_CONTINUATION_BIT; idx = concurrent_set_probe_next(&probe); + RUBY_ASSERT(idx != start_idx); } } @@ -207,12 +279,101 @@ concurrent_set_try_resize_without_locking(VALUE old_set_obj, VALUE *set_obj_ptr) RB_GC_GUARD(old_set_obj); } +// FIXME: cross-platform initializer. Also, we don't need rwlock anymore, just normal mutex will do +static pthread_rwlock_t resize_lock = PTHREAD_RWLOCK_INITIALIZER; +static pthread_t resize_lock_owner; +static unsigned int resize_lock_lvl; + +static inline void +resize_lock_wrlock(bool allow_reentry) +{ + if (allow_reentry && pthread_self() == resize_lock_owner) { + // Already held by this thread. + } + else { + int r; + if ((r = pthread_rwlock_wrlock(&resize_lock))) { + rb_bug_errno("pthread_rwlock_wrlock", r); + } + resize_lock_owner = pthread_self(); + } + resize_lock_lvl++; +} + +static inline void +resize_lock_wrunlock(void) +{ + RUBY_ASSERT(resize_lock_lvl > 0); + resize_lock_lvl--; + if (resize_lock_lvl == 0) { + resize_lock_owner = 0; + int r; + if ((r = pthread_rwlock_unlock(&resize_lock))) { + rb_bug_errno("pthread_rwlock_unlock", r); + } + } +} + +static inline bool +resize_lock_rdlock(void) +{ + if (resize_lock_owner == pthread_self()) { // we have the write lock, don't take it + return false; + } + int r; + if ((r = pthread_rwlock_rdlock(&resize_lock))) { + rb_bug_errno("pthread_rwlock_rdlock", r); + } + return true; +} + +static inline void +resize_lock_rdunlock(void) +{ + int r; + if ((r = pthread_rwlock_unlock(&resize_lock))) { + rb_bug_errno("pthread_rwlock_unlock", r); + } +} + static void concurrent_set_try_resize(VALUE old_set_obj, VALUE *set_obj_ptr) { - RB_VM_LOCKING() { - concurrent_set_try_resize_without_locking(old_set_obj, set_obj_ptr); + unsigned int lev; + RB_VM_LOCK_ENTER_LEV(&lev); + { + // Check if another thread has already resized. + if (rbimpl_atomic_value_load(set_obj_ptr, RBIMPL_ATOMIC_ACQUIRE) != old_set_obj) { + RB_VM_LOCK_LEAVE_LEV(&lev); + return; + } + struct concurrent_set *old_set = RTYPEDDATA_GET_DATA(old_set_obj); + + // This may overcount by up to the number of threads concurrently attempting to insert + // GC may also happen between now and the set being rebuilt + int expected_size = rbimpl_atomic_load(&old_set->size, RBIMPL_ATOMIC_RELAXED) - old_set->deleted_entries; + + // NOTE: new capacity must make sense with load factor, don't change one without checking the other. + int old_capacity = old_set->capacity; + int new_capacity = old_capacity * 2; + if (new_capacity > expected_size * 8) { + new_capacity = old_capacity / 2; + } + else if (new_capacity > expected_size * 4) { + new_capacity = old_capacity; + } + + // May cause GC and therefore deletes, so must happen first. + VALUE new_set_obj = rb_concurrent_set_new(old_set->funcs, new_capacity, old_set->key_type); + /*fprintf(stderr, "concurrent set resize from %d to %d\n", old_capacity, new_capacity);*/ + // deletes from sweep thread must not happen during resize and sweep thread can't take VM lock so it takes the resize lock + resize_lock_wrlock(true); + { + concurrent_set_try_resize_locked(old_set_obj, set_obj_ptr, new_set_obj, old_capacity); + } + resize_lock_wrunlock(); } + RB_VM_LOCK_LEAVE_LEV(&lev); } VALUE @@ -242,29 +403,39 @@ rb_concurrent_set_find(VALUE *set_obj_ptr, VALUE key) while (true) { struct concurrent_set_entry *entry = &set->entries[idx]; - VALUE curr_hash_and_flags = rbimpl_atomic_value_load(&entry->hash, RBIMPL_ATOMIC_ACQUIRE); - VALUE curr_hash = curr_hash_and_flags & CONCURRENT_SET_HASH_MASK; - bool continuation = curr_hash_and_flags & CONCURRENT_SET_CONTINUATION_BIT; - - if (curr_hash_and_flags == CONCURRENT_SET_EMPTY) { + VALUE curr_hash = rbimpl_atomic_value_load(&entry->hash, RBIMPL_ATOMIC_ACQUIRE) & CONCURRENT_SET_HASH_MASK; + + if (curr_hash == 0) { +#if CONCURRENT_SET_DEBUG_STATS + rbimpl_atomic_fetch_add(&set->find_count, 1, RBIMPL_ATOMIC_RELAXED); + rbimpl_atomic_fetch_add(&set->find_probe_total, probe.d, RBIMPL_ATOMIC_RELAXED); + concurrent_set_atomic_max(&set->find_probe_max, probe.d); +#endif return 0; } + VALUE raw_key = rbimpl_atomic_value_load(&entry->key, RBIMPL_ATOMIC_ACQUIRE); + VALUE curr_key = raw_key & CONCURRENT_SET_KEY_MASK; + bool continuation = raw_key & CONCURRENT_SET_CONTINUATION_BIT; + if (curr_hash != hash) { if (!continuation) { +#if CONCURRENT_SET_DEBUG_STATS + rbimpl_atomic_fetch_add(&set->find_count, 1, RBIMPL_ATOMIC_RELAXED); + rbimpl_atomic_fetch_add(&set->find_probe_total, probe.d, RBIMPL_ATOMIC_RELAXED); + concurrent_set_atomic_max(&set->find_probe_max, probe.d); +#endif return 0; } idx = concurrent_set_probe_next(&probe); continue; } - VALUE curr_key = rbimpl_atomic_value_load(&entry->key, RBIMPL_ATOMIC_ACQUIRE); - switch (curr_key) { case CONCURRENT_SET_EMPTY: - // In-progress insert: hash written but key not yet + // In-progress insert: hash written but key not yet. break; - case CONCURRENT_SET_DELETED: + case CONCURRENT_SET_TOMBSTONE: break; case CONCURRENT_SET_MOVED: // Wait @@ -280,11 +451,21 @@ rb_concurrent_set_find(VALUE *set_obj_ptr, VALUE key) if (set->funcs->cmp(key, curr_key)) { // We've found a match. +#if CONCURRENT_SET_DEBUG_STATS + rbimpl_atomic_fetch_add(&set->find_count, 1, RBIMPL_ATOMIC_RELAXED); + rbimpl_atomic_fetch_add(&set->find_probe_total, probe.d, RBIMPL_ATOMIC_RELAXED); + concurrent_set_atomic_max(&set->find_probe_max, probe.d); +#endif RB_GC_GUARD(set_obj); return curr_key; } if (!continuation) { +#if CONCURRENT_SET_DEBUG_STATS + rbimpl_atomic_fetch_add(&set->find_count, 1, RBIMPL_ATOMIC_RELAXED); + rbimpl_atomic_fetch_add(&set->find_probe_total, probe.d, RBIMPL_ATOMIC_RELAXED); + concurrent_set_atomic_max(&set->find_probe_max, probe.d); +#endif return 0; } @@ -312,7 +493,7 @@ rb_concurrent_set_find_or_insert(VALUE *set_obj_ptr, VALUE key, void *data) RUBY_ASSERT(set_obj); struct concurrent_set *set = RTYPEDDATA_GET_DATA(set_obj); - key = set->funcs->create(key, data); + key = set->funcs->create(key, data); // this can join GC (takes VM Lock) VALUE hash = concurrent_set_hash(set, key); struct concurrent_set_probe probe; @@ -333,33 +514,40 @@ rb_concurrent_set_find_or_insert(VALUE *set_obj_ptr, VALUE key, void *data) while (true) { struct concurrent_set_entry *entry = &set->entries[idx]; - VALUE curr_hash_and_flags = rbimpl_atomic_value_load(&entry->hash, RBIMPL_ATOMIC_ACQUIRE); - VALUE curr_hash = curr_hash_and_flags & CONCURRENT_SET_HASH_MASK; - bool continuation = curr_hash_and_flags & CONCURRENT_SET_CONTINUATION_BIT; - - if (curr_hash_and_flags == CONCURRENT_SET_EMPTY) { + bool can_continue_probing; + VALUE raw_hash = rbimpl_atomic_value_load(&entry->hash, RBIMPL_ATOMIC_ACQUIRE); + VALUE curr_hash = raw_hash & CONCURRENT_SET_HASH_MASK; + if (raw_hash == 0) { // Reserve this slot for our hash value - curr_hash_and_flags = rbimpl_atomic_value_cas(&entry->hash, CONCURRENT_SET_EMPTY, hash, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_RELAXED); - if (curr_hash_and_flags != CONCURRENT_SET_EMPTY) { + raw_hash = rbimpl_atomic_value_cas(&entry->hash, 0, hash, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_RELAXED); + if (raw_hash != 0) { // Lost race, retry same slot to check winner's hash continue; } - - // CAS succeeded, so these are the values stored - curr_hash_and_flags = hash; + raw_hash = hash; curr_hash = hash; - // Fall through to try to claim key } - if (curr_hash != hash) { - goto probe_next; - } - - VALUE curr_key = rbimpl_atomic_value_load(&entry->key, RBIMPL_ATOMIC_ACQUIRE); + VALUE raw_key = rbimpl_atomic_value_load(&entry->key, RBIMPL_ATOMIC_ACQUIRE); + VALUE curr_key = raw_key & CONCURRENT_SET_KEY_MASK; + bool continuation = raw_key & CONCURRENT_SET_CONTINUATION_BIT; switch (curr_key) { case CONCURRENT_SET_EMPTY: { + if ((raw_hash & CONCURRENT_SET_HASH_RECLAIMABLE_BIT) && !continuation) { + // Reclaim this reclaimable slot by clearing the reclaimable bit + VALUE prev_hash = rbimpl_atomic_value_cas(&entry->hash, raw_hash, hash, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_RELAXED); + if (prev_hash != raw_hash) { + // Lost race, retry same slot + continue; + } + curr_hash = hash; + raw_hash = hash; + } + if (curr_hash != hash) { + goto probe_next; + } rb_atomic_t prev_size = rbimpl_atomic_fetch_add(&set->size, 1, RBIMPL_ATOMIC_RELAXED); // Load_factor reached at 75% full. ex: prev_size: 32, capacity: 64, load_factor: 50%. @@ -370,9 +558,38 @@ rb_concurrent_set_find_or_insert(VALUE *set_obj_ptr, VALUE key, void *data) goto retry; } - VALUE prev_key = rbimpl_atomic_value_cas(&entry->key, CONCURRENT_SET_EMPTY, key, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_RELAXED); - if (prev_key == CONCURRENT_SET_EMPTY) { - RUBY_ASSERT(rb_concurrent_set_find(set_obj_ptr, key) == key); + VALUE prev_raw_key = rbimpl_atomic_value_cas(&entry->key, raw_key, key | (continuation ? CONCURRENT_SET_CONTINUATION_BIT : 0), RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_RELAXED); + if (prev_raw_key == raw_key) { +#if CONCURRENT_SET_DEBUG_STATS + rbimpl_atomic_fetch_add(&set->insert_count, 1, RBIMPL_ATOMIC_RELAXED); + rbimpl_atomic_fetch_add(&set->insert_probe_total, probe.d, RBIMPL_ATOMIC_RELAXED); + concurrent_set_atomic_max(&set->insert_probe_max, probe.d); +#endif +#if CONCURRENT_SET_DEBUG_DUPLICATES + { + // Probe further to verify no duplicate of our key exists + struct concurrent_set_probe dup_probe = probe; + int dup_idx = concurrent_set_probe_next(&dup_probe); + int dup_idx_start = dup_idx; + while (true) { + struct concurrent_set_entry *dup_entry = &set->entries[dup_idx]; + VALUE dup_raw_key = rbimpl_atomic_value_load(&dup_entry->key, RBIMPL_ATOMIC_ACQUIRE); + VALUE dup_key = dup_raw_key & CONCURRENT_SET_KEY_MASK; + + if (dup_key == CONCURRENT_SET_EMPTY) break; + if (dup_key == CONCURRENT_SET_MOVED) break; + + if (dup_key >= CONCURRENT_SET_SPECIAL_VALUE_COUNT && dup_key == key) { + rb_bug("concurrent_set_find_or_insert: duplicate key %p found at index %d after inserting at index %d", + (void *)key, dup_idx, idx); + } + int next_dup_idx = concurrent_set_probe_next(&dup_probe); + if (dup_idx < dup_idx_start && next_dup_idx >= dup_idx_start) break; + if (next_dup_idx == dup_idx_start) break; + dup_idx = next_dup_idx; + } + } +#endif RB_GC_GUARD(set_obj); return key; } @@ -380,31 +597,45 @@ rb_concurrent_set_find_or_insert(VALUE *set_obj_ptr, VALUE key, void *data) // Entry was not inserted. rbimpl_atomic_sub(&set->size, 1, RBIMPL_ATOMIC_RELAXED); - // Another thread won the race, try again at the same location. + // * Another thread with the same hash could have won the race, try again at the same location, we might find it. + // * A resize could also be underway, and `prev_raw_key` could be CONCURRENT_SET_MOVED. + // * The continuation bit could also have been set on the key just now, in which case we'll retry continue; } } - case CONCURRENT_SET_DELETED: + case CONCURRENT_SET_TOMBSTONE: break; case CONCURRENT_SET_MOVED: // Wait RB_VM_LOCKING(); goto retry; default: - // We're never GC during our search + if (curr_hash != hash) { + goto probe_next; + } // If the continuation bit wasn't set at the start of our search, - // any concurrent find with the same hash value would also look at + // any concurrent find_or_insert with the same hash value would also look at // this location and try to swap curr_key if (UNLIKELY(!RB_SPECIAL_CONST_P(curr_key) && rb_objspace_garbage_object_p(curr_key))) { if (continuation) { goto probe_next; } - rbimpl_atomic_value_cas(&entry->key, curr_key, CONCURRENT_SET_EMPTY, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_RELAXED); - continue; + { + VALUE prev = rbimpl_atomic_value_cas(&entry->key, raw_key, CONCURRENT_SET_EMPTY, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_RELAXED); + if (prev == raw_key) { + rbimpl_atomic_sub(&set->size, 1, RBIMPL_ATOMIC_RELAXED); + } + } + continue; // try to reclaim same slot, because the hash is the same and it's now EMPTY } if (set->funcs->cmp(key, curr_key)) { // We've found a live match. +#if CONCURRENT_SET_DEBUG_STATS + rbimpl_atomic_fetch_add(&set->insert_count, 1, RBIMPL_ATOMIC_RELAXED); + rbimpl_atomic_fetch_add(&set->insert_probe_total, probe.d, RBIMPL_ATOMIC_RELAXED); + concurrent_set_atomic_max(&set->insert_probe_max, probe.d); +#endif RB_GC_GUARD(set_obj); // We created key using set->funcs->create, but we didn't end @@ -418,8 +649,10 @@ rb_concurrent_set_find_or_insert(VALUE *set_obj_ptr, VALUE key, void *data) } probe_next: - RUBY_ASSERT(curr_hash_and_flags != CONCURRENT_SET_EMPTY); - concurrent_set_mark_continuation(entry, curr_hash_and_flags); + can_continue_probing = concurrent_set_mark_continuation(entry, raw_key); + if (!can_continue_probing) { + continue; + } idx = concurrent_set_probe_next(&probe); } } @@ -429,22 +662,21 @@ concurrent_set_delete_entry_locked(struct concurrent_set *set, struct concurrent { ASSERT_vm_locking_with_barrier(); - if (entry->hash & CONCURRENT_SET_CONTINUATION_BIT) { - entry->hash = CONCURRENT_SET_CONTINUATION_BIT; - entry->key = CONCURRENT_SET_DELETED; + if (entry->key & CONCURRENT_SET_CONTINUATION_BIT) { + entry->key = CONCURRENT_SET_TOMBSTONE | CONCURRENT_SET_CONTINUATION_BIT; set->deleted_entries++; } else { - entry->hash = CONCURRENT_SET_EMPTY; + entry->hash = 0; entry->key = CONCURRENT_SET_EMPTY; set->size--; } } -VALUE -rb_concurrent_set_delete_by_identity(VALUE set_obj, VALUE key) + +static VALUE +rb_concurrent_set_delete_by_identity_locked(VALUE set_obj, VALUE key) { - ASSERT_vm_locking_with_barrier(); struct concurrent_set *set = RTYPEDDATA_GET_DATA(set_obj); @@ -452,25 +684,70 @@ rb_concurrent_set_delete_by_identity(VALUE set_obj, VALUE key) struct concurrent_set_probe probe; int idx = concurrent_set_probe_start(&probe, set, hash); + bool hash_cleared = false; + VALUE prev_hash = 0; while (true) { struct concurrent_set_entry *entry = &set->entries[idx]; - VALUE curr_key = entry->key; + VALUE raw_key = rbimpl_atomic_value_load(&entry->key, RBIMPL_ATOMIC_ACQUIRE); + VALUE loaded_hash_raw = rbimpl_atomic_value_load(&entry->hash, RBIMPL_ATOMIC_ACQUIRE); + VALUE loaded_hash = loaded_hash_raw & CONCURRENT_SET_HASH_MASK; + bool continuation = raw_key & CONCURRENT_SET_CONTINUATION_BIT; + VALUE curr_key = raw_key & CONCURRENT_SET_KEY_MASK; switch (curr_key) { case CONCURRENT_SET_EMPTY: - // We didn't find our entry to delete. - return 0; - case CONCURRENT_SET_DELETED: + if (!continuation) { + return 0; + } + break; + case CONCURRENT_SET_TOMBSTONE: break; case CONCURRENT_SET_MOVED: rb_bug("rb_concurrent_set_delete_by_identity: moved entry"); break; default: if (key == curr_key) { - RUBY_ASSERT((entry->hash & CONCURRENT_SET_HASH_MASK) == hash); - concurrent_set_delete_entry_locked(set, entry); - return curr_key; + VALUE new_key; + RUBY_ASSERT(hash_cleared || loaded_hash == hash); + if (continuation) { + new_key = CONCURRENT_SET_TOMBSTONE | CONCURRENT_SET_CONTINUATION_BIT; + } + else { + new_key = CONCURRENT_SET_EMPTY; + } + + if (!hash_cleared) { + // Hashes only change here and they get reclaimed in find_or_insert + prev_hash = rbimpl_atomic_value_cas(&entry->hash, loaded_hash_raw, hash | CONCURRENT_SET_HASH_RECLAIMABLE_BIT, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_RELAXED); + RUBY_ASSERT(prev_hash == hash || prev_hash == (hash | CONCURRENT_SET_HASH_RECLAIMABLE_BIT)); + hash_cleared = true; + } + VALUE prev_key = rbimpl_atomic_value_cas(&entry->key, raw_key, new_key, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_ACQUIRE); + if (prev_key == raw_key) { + if (continuation) { + rbimpl_atomic_add(&set->deleted_entries, 1, RBIMPL_ATOMIC_RELAXED); + } + else { + rbimpl_atomic_sub(&set->size, 1, RBIMPL_ATOMIC_RELAXED); + } + return curr_key; + } + else if (!continuation && prev_key == (raw_key | CONCURRENT_SET_CONTINUATION_BIT)) { + continue; // try again, the continuation bit was just set on this key so we can tombstone it + } + else if ((prev_key & CONCURRENT_SET_KEY_MASK) == CONCURRENT_SET_EMPTY || (prev_key & CONCURRENT_SET_KEY_MASK) == CONCURRENT_SET_TOMBSTONE) { + return curr_key; // the key was deleted by another thread + } + else { + // the key was changed to EMPTY by being garbage during find_or_insert and then a new key was put at the same slot. It's okay + // that the hash was marked reclaimable above. + RUBY_ASSERT(prev_hash != 0); + return curr_key; + } + } + else if (!continuation) { + return 0; } break; } @@ -479,8 +756,41 @@ rb_concurrent_set_delete_by_identity(VALUE set_obj, VALUE key) } } -void -rb_concurrent_set_foreach_with_replace(VALUE set_obj, int (*callback)(VALUE *key, void *data), void *data) +// This can be called concurrently by a ruby GC thread and the sweep thread. +VALUE +rb_concurrent_set_delete_by_identity(VALUE *set_obj_ptr, VALUE key) +{ + VALUE result; + bool is_sweep_thread_p(void); + + VALUE set_obj = rbimpl_atomic_value_load(set_obj_ptr, RBIMPL_ATOMIC_ACQUIRE); + + if (is_sweep_thread_p()) { + while (1) { + bool lock_taken = resize_lock_rdlock(); + { + VALUE current_set_obj = rbimpl_atomic_value_load(set_obj_ptr, RBIMPL_ATOMIC_ACQUIRE); + if (current_set_obj != set_obj) { + set_obj = current_set_obj; + // retry - resize happened + } + else { + result = rb_concurrent_set_delete_by_identity_locked(set_obj, key); + if (lock_taken) resize_lock_rdunlock(); + break; + } + } + if (lock_taken) resize_lock_rdunlock(); + } + } + else { + result = rb_concurrent_set_delete_by_identity_locked(set_obj, key); + } + return result; +} + +static void +rb_concurrent_set_foreach_with_replace_locked(VALUE set_obj, int (*callback)(VALUE *key, void *data), void *data) { ASSERT_vm_locking_with_barrier(); @@ -488,26 +798,50 @@ rb_concurrent_set_foreach_with_replace(VALUE set_obj, int (*callback)(VALUE *key for (unsigned int i = 0; i < set->capacity; i++) { struct concurrent_set_entry *entry = &set->entries[i]; - VALUE key = entry->key; + VALUE raw_key = entry->key; + bool continuation = raw_key & CONCURRENT_SET_CONTINUATION_BIT; + VALUE key = raw_key & CONCURRENT_SET_KEY_MASK; switch (key) { case CONCURRENT_SET_EMPTY: - case CONCURRENT_SET_DELETED: + case CONCURRENT_SET_TOMBSTONE: continue; case CONCURRENT_SET_MOVED: rb_bug("rb_concurrent_set_foreach_with_replace: moved entry"); break; default: { - int ret = callback(&entry->key, data); + VALUE cb_key = key; + int ret = callback(&cb_key, data); switch (ret) { case ST_STOP: return; case ST_DELETE: concurrent_set_delete_entry_locked(set, entry); break; + case ST_CONTINUE: + if (cb_key != key) { + // Key was replaced by callback + entry->key = cb_key | (continuation ? CONCURRENT_SET_CONTINUATION_BIT : 0); + } + break; + case ST_REPLACE: + rb_bug("unexpected concurrent_set callback return value: ST_REPLACE"); } break; } } } } + +void +rb_concurrent_set_foreach_with_replace(VALUE set_obj, int (*callback)(VALUE *key, void *data), void *data) +{ + RB_VM_LOCKING() { + // Don't allow concurrent deletes from sweep thread during this time. Maybe we can loosen this restriction. + resize_lock_wrlock(true); + { + rb_concurrent_set_foreach_with_replace_locked(set_obj, callback, data); + } + resize_lock_wrunlock(); + } +} diff --git a/cont.c b/cont.c index e5239635081629..4ada1ba00cef36 100644 --- a/cont.c +++ b/cont.c @@ -298,6 +298,63 @@ rb_free_shared_fiber_pool(void) static ID fiber_initialize_keywords[3] = {0}; +// We don't use the VM lock to protect the shared fiber pool because the sweep +// thread needs to be able to free fibers and it can't take the VM lock. +rb_nativethread_lock_t fiber_lock; +#ifdef RUBY_THREAD_PTHREAD_H +pthread_t fiber_pool_lock_owner; +#endif + +MAYBE_UNUSED(static inline bool +fiber_pool_locked_p(bool fallback)) +{ +#ifdef RUBY_THREAD_PTHREAD_H + return pthread_self() == fiber_pool_lock_owner; +#else + return fallback; +#endif +} + +static inline void +ASSERT_fiber_pool_locked(void) +{ +#ifdef RUBY_THREAD_PTHREAD_H + VM_ASSERT(fiber_pool_locked_p(true)); +#endif +} + +static inline void +ASSERT_fiber_pool_unlocked(void) +{ +#ifdef RUBY_THREAD_PTHREAD_H + VM_ASSERT(!fiber_pool_locked_p(false)); +#endif +} + +static inline void +fiber_pool_lock(void) { + ASSERT_fiber_pool_unlocked(); + rb_native_mutex_lock(&fiber_lock); +#ifdef RUBY_THREAD_PTHREAD_H + fiber_pool_lock_owner = pthread_self(); +#endif +} + +static inline void +fiber_pool_unlock(void) { + ASSERT_fiber_pool_locked(); +#ifdef RUBY_THREAD_PTHREAD_H + fiber_pool_lock_owner = 0; +#endif + rb_native_mutex_unlock(&fiber_lock); +} + +void +fiber_pool_lock_reset(void) +{ + rb_native_mutex_initialize(&fiber_lock); +} + /* * FreeBSD require a first (i.e. addr) argument of mmap(2) is not NULL * if MAP_STACK is passed. @@ -394,6 +451,7 @@ fiber_pool_vacancy_reset(struct fiber_pool_vacancy * vacancy) inline static struct fiber_pool_vacancy * fiber_pool_vacancy_push(struct fiber_pool_vacancy * vacancy, struct fiber_pool_vacancy * head) { + ASSERT_fiber_pool_locked(); vacancy->next = head; #ifdef FIBER_POOL_ALLOCATION_FREE @@ -426,6 +484,7 @@ fiber_pool_vacancy_remove(struct fiber_pool_vacancy * vacancy) inline static struct fiber_pool_vacancy * fiber_pool_vacancy_pop(struct fiber_pool * pool) { + ASSERT_fiber_pool_locked(); struct fiber_pool_vacancy * vacancy = pool->vacancies; if (vacancy) { @@ -438,6 +497,7 @@ fiber_pool_vacancy_pop(struct fiber_pool * pool) inline static struct fiber_pool_vacancy * fiber_pool_vacancy_pop(struct fiber_pool * pool) { + ASSERT_fiber_pool_locked(); struct fiber_pool_vacancy * vacancy = pool->vacancies; if (vacancy) { @@ -525,117 +585,149 @@ fiber_pool_allocate_memory(size_t * count, size_t stride) // fiber_pool_initialize before the pool is shared across threads. // @sa fiber_pool_allocation_free static struct fiber_pool_allocation * -fiber_pool_expand(struct fiber_pool * fiber_pool, size_t count) +fiber_pool_expand(struct fiber_pool * fiber_pool, size_t count, bool needs_lock, bool unlock_before_raise, struct fiber_pool_vacancy **vacancy_out) { if (count == 0) { errno = EAGAIN; return NULL; } - STACK_GROW_DIR_DETECTION; + // Allocate metadata before mmap: ruby_xmalloc (RB_ALLOC) raises on failure and + // must not run after base is mapped, or the region would leak. + struct fiber_pool_allocation * allocation = RB_ALLOC(struct fiber_pool_allocation); + + if (needs_lock) fiber_pool_lock(); // no xmalloc allocations can occur with this lock held + { + STACK_GROW_DIR_DETECTION; - size_t size = fiber_pool->size; - size_t stride = size + RB_PAGE_SIZE; + size_t size = fiber_pool->size; + size_t stride = size + RB_PAGE_SIZE; - // If the maximum number of stacks is set, and we have reached it, return NULL. - if (fiber_pool->maximum_count > 0) { - if (fiber_pool->count >= fiber_pool->maximum_count) { - errno = EAGAIN; - return NULL; - } - size_t remaining = fiber_pool->maximum_count - fiber_pool->count; - if (count > remaining) { - count = remaining; + // If the maximum number of stacks is set, and we have reached it, return NULL. + if (fiber_pool->maximum_count > 0) { + if (fiber_pool->count >= fiber_pool->maximum_count) { + if (unlock_before_raise) fiber_pool_unlock(); + errno = EAGAIN; + return NULL; + } + size_t remaining = fiber_pool->maximum_count - fiber_pool->count; + if (count > remaining) { + count = remaining; + } } - } - // Allocate metadata before mmap: ruby_xmalloc (RB_ALLOC) raises on failure and - // must not run after base is mapped, or the region would leak. - struct fiber_pool_allocation * allocation = RB_ALLOC(struct fiber_pool_allocation); - // Allocate the memory required for the stacks: - void * base = fiber_pool_allocate_memory(&count, stride); + // Allocate the memory required for the stacks: + void * base = fiber_pool_allocate_memory(&count, stride); - if (base == NULL) { - if (!errno) errno = ENOMEM; - ruby_xfree(allocation); - return NULL; - } + if (base == NULL) { + int saved_errno = errno; + if (!saved_errno) saved_errno = ENOMEM; + if (unlock_before_raise) fiber_pool_unlock(); + ruby_xfree(allocation); + errno = saved_errno; + return NULL; + } - struct fiber_pool_vacancy * vacancies = fiber_pool->vacancies; + struct fiber_pool_vacancy * vacancies = fiber_pool->vacancies; - // Initialize fiber pool allocation: - allocation->base = base; - allocation->size = size; - allocation->stride = stride; - allocation->count = count; + // Initialize fiber pool allocation: + allocation->base = base; + allocation->size = size; + allocation->stride = stride; + allocation->count = count; #ifdef FIBER_POOL_ALLOCATION_FREE - allocation->used = 0; + allocation->used = 0; #endif - allocation->pool = fiber_pool; + allocation->pool = fiber_pool; - if (DEBUG_EXPAND) { - fprintf(stderr, "fiber_pool_expand(%"PRIuSIZE"): %p, %"PRIuSIZE"/%"PRIuSIZE" x [%"PRIuSIZE":%"PRIuSIZE"]\n", - count, (void*)fiber_pool, fiber_pool->used, fiber_pool->count, size, fiber_pool->vm_stack_size); - } + if (DEBUG_EXPAND) { + fprintf(stderr, "fiber_pool_expand(%"PRIuSIZE"): %p, %"PRIuSIZE"/%"PRIuSIZE" x [%"PRIuSIZE":%"PRIuSIZE"]\n", + count, (void*)fiber_pool, fiber_pool->used, fiber_pool->count, size, fiber_pool->vm_stack_size); + } - // Iterate over all stacks, initializing the vacancy list: - for (size_t i = 0; i < count; i += 1) { - void * base = (char*)allocation->base + (stride * i); - void * page = (char*)base + STACK_DIR_UPPER(size, 0); + // Iterate over all stacks, initializing the vacancy list: + for (size_t i = 0; i < count; i += 1) { + void * base = (char*)allocation->base + (stride * i); + void * page = (char*)base + STACK_DIR_UPPER(size, 0); #if defined(_WIN32) - DWORD old_protect; - - if (!VirtualProtect(page, RB_PAGE_SIZE, PAGE_READWRITE | PAGE_GUARD, &old_protect)) { - int error = rb_w32_map_errno(GetLastError()); - VirtualFree(allocation->base, 0, MEM_RELEASE); - ruby_xfree(allocation); - errno = error; - return NULL; - } + DWORD old_protect; + + if (!VirtualProtect(page, RB_PAGE_SIZE, PAGE_READWRITE | PAGE_GUARD, &old_protect)) { + int error = rb_w32_map_errno(GetLastError()); + if (unlock_before_raise) fiber_pool_unlock(); + VirtualFree(allocation->base, 0, MEM_RELEASE); + ruby_xfree(allocation); + errno = error; + return NULL; + } #elif defined(__wasi__) - // wasi-libc's mprotect emulation doesn't support PROT_NONE. - (void)page; + // wasi-libc's mprotect emulation doesn't support PROT_NONE. + (void)page; #else - if (mprotect(page, RB_PAGE_SIZE, PROT_NONE) < 0) { - int error = errno; - if (!error) error = ENOMEM; - munmap(allocation->base, count*stride); - ruby_xfree(allocation); - errno = error; - return NULL; - } + if (mprotect(page, RB_PAGE_SIZE, PROT_NONE) < 0) { + int error = errno; + if (!error) error = ENOMEM; + if (unlock_before_raise) fiber_pool_unlock(); + munmap(allocation->base, count*stride); + ruby_xfree(allocation); + errno = error; + return NULL; + } #endif - vacancies = fiber_pool_vacancy_initialize( - fiber_pool, vacancies, - (char*)base + STACK_DIR_UPPER(0, RB_PAGE_SIZE), - size - ); + vacancies = fiber_pool_vacancy_initialize( + fiber_pool, vacancies, + (char*)base + STACK_DIR_UPPER(0, RB_PAGE_SIZE), + size + ); #ifdef FIBER_POOL_ALLOCATION_FREE - vacancies->stack.allocation = allocation; + vacancies->stack.allocation = allocation; #endif - } + } - // Insert the allocation into the head of the pool: - allocation->next = fiber_pool->allocations; + // Insert the allocation into the head of the pool: + allocation->next = fiber_pool->allocations; #ifdef FIBER_POOL_ALLOCATION_FREE - if (allocation->next) { - allocation->next->previous = allocation; - } + if (allocation->next) { + allocation->next->previous = allocation; + } - allocation->previous = NULL; + allocation->previous = NULL; #endif - fiber_pool->allocations = allocation; - fiber_pool->vacancies = vacancies; - fiber_pool->count += count; + fiber_pool->allocations = allocation; + fiber_pool->vacancies = vacancies; + fiber_pool->count += count; + + if (vacancy_out) { + *vacancy_out = fiber_pool_vacancy_pop(fiber_pool); + } + + if (needs_lock) fiber_pool_unlock(); + } return allocation; } +static struct fiber_pool_vacancy * +fiber_pool_expand_and_pop(struct fiber_pool * fiber_pool, size_t count, bool needs_lock, bool unlock_before_raise) +{ + RUBY_ASSERT(needs_lock || (!needs_lock && fiber_pool_locked_p(true))); + struct fiber_pool_vacancy *vacancy_out = NULL; + struct fiber_pool_allocation *allocation = fiber_pool_expand(fiber_pool, count, needs_lock, unlock_before_raise, &vacancy_out); + if (allocation) { + RUBY_ASSERT(vacancy_out); + return vacancy_out; + } + else { + return NULL; + } + +} + // Initialize the specified fiber pool with the given number of stacks. // @param vm_stack_size The size of the vm stack to allocate. static void @@ -654,7 +746,7 @@ fiber_pool_initialize(struct fiber_pool * fiber_pool, size_t size, size_t minimu fiber_pool->vm_stack_size = vm_stack_size; if (fiber_pool->minimum_count > 0) { - if (RB_UNLIKELY(!fiber_pool_expand(fiber_pool, fiber_pool->minimum_count))) { + if (RB_UNLIKELY(!fiber_pool_expand(fiber_pool, fiber_pool->minimum_count, true, true, NULL))) { rb_raise(rb_eFiberError, "can't allocate initial fiber stacks (%"PRIuSIZE" x %"PRIuSIZE" bytes): %s", fiber_pool->minimum_count, fiber_pool->size, strerror(errno)); } } @@ -709,6 +801,7 @@ fiber_pool_allocation_free(struct fiber_pool_allocation * allocation) static size_t fiber_pool_stack_expand_count(const struct fiber_pool *pool) { + ASSERT_fiber_pool_locked(); const size_t maximum_allocations = FIBER_POOL_MAXIMUM_ALLOCATIONS; const size_t minimum_count = FIBER_POOL_MINIMUM_COUNT; @@ -739,19 +832,24 @@ fiber_pool_stack_expand_count(const struct fiber_pool *pool) static struct fiber_pool_vacancy * fiber_pool_stack_acquire_expand(struct fiber_pool *fiber_pool) { + // fiber_pool_lock acquired size_t count = fiber_pool_stack_expand_count(fiber_pool); if (DEBUG_ACQUIRE) fprintf(stderr, "fiber_pool_stack_acquire: expanding fiber pool by %"PRIuSIZE" stacks\n", count); struct fiber_pool_vacancy *vacancy = NULL; - if (RB_LIKELY(fiber_pool_expand(fiber_pool, count))) { - return fiber_pool_vacancy_pop(fiber_pool); + if (RB_LIKELY((vacancy = fiber_pool_expand_and_pop(fiber_pool, count, false, true)))) { + return vacancy; } else { if (DEBUG_ACQUIRE) fprintf(stderr, "fiber_pool_stack_acquire: expand failed (%s), collecting garbage\n", strerror(errno)); - rb_gc(); + fiber_pool_unlock(); + { + rb_gc(); + } + fiber_pool_lock(); // After running GC, the vacancy list may have some stacks: vacancy = fiber_pool_vacancy_pop(fiber_pool); @@ -763,8 +861,8 @@ fiber_pool_stack_acquire_expand(struct fiber_pool *fiber_pool) count = fiber_pool_stack_expand_count(fiber_pool); // Try to expand the fiber pool again: - if (RB_LIKELY(fiber_pool_expand(fiber_pool, count))) { - return fiber_pool_vacancy_pop(fiber_pool); + if (RB_LIKELY((vacancy = fiber_pool_expand_and_pop(fiber_pool, false, true, count)))) { + return vacancy; } else { // Okay, we really failed to acquire a stack. Give up and return NULL with errno set: @@ -779,8 +877,7 @@ fiber_pool_stack_acquire(struct fiber_pool * fiber_pool) { struct fiber_pool_vacancy * vacancy; - unsigned int lev; - RB_VM_LOCK_ENTER_LEV(&lev); + fiber_pool_lock(); { // Fast path: try to acquire a stack from the vacancy list: vacancy = fiber_pool_vacancy_pop(fiber_pool); @@ -793,7 +890,7 @@ fiber_pool_stack_acquire(struct fiber_pool * fiber_pool) // If expansion failed, raise an error: if (RB_UNLIKELY(!vacancy)) { - RB_VM_LOCK_LEAVE_LEV(&lev); + fiber_pool_unlock(); rb_raise(rb_eFiberError, "can't allocate fiber stack: %s", strerror(errno)); } } @@ -811,10 +908,9 @@ fiber_pool_stack_acquire(struct fiber_pool * fiber_pool) #ifdef FIBER_POOL_ALLOCATION_FREE vacancy->stack.allocation->used += 1; #endif - fiber_pool_stack_reset(&vacancy->stack); } - RB_VM_LOCK_LEAVE_LEV(&lev); + fiber_pool_unlock(); return vacancy->stack; } @@ -880,10 +976,11 @@ fiber_pool_stack_free(struct fiber_pool_stack * stack) #endif } -// Release and return a stack to the vacancy list. +// Release and return a stack to the vacancy list. fiber_lock is acquired upon entry. static void fiber_pool_stack_release(struct fiber_pool_stack * stack) { + ASSERT_fiber_pool_locked(); struct fiber_pool * pool = stack->pool; struct fiber_pool_vacancy * vacancy = fiber_pool_vacancy_pointer(stack->base, stack->size); @@ -1031,17 +1128,6 @@ fiber_stack_release(rb_fiber_t * fiber) rb_ec_clear_vm_stack(ec); } -static void -fiber_stack_release_locked(rb_fiber_t *fiber) -{ - if (!ruby_vm_during_cleanup) { - // We can't try to acquire the VM lock here because MMTK calls free in its own native thread which has no ec. - // This assertion will fail on MMTK but we currently don't have CI for debug releases of MMTK, so we can assert for now. - ASSERT_vm_locking_with_barrier(); - } - fiber_stack_release(fiber); -} - static const char * fiber_status_name(enum fiber_status s) { @@ -1204,7 +1290,11 @@ cont_free(void *ptr) else { rb_fiber_t *fiber = (rb_fiber_t*)cont; coroutine_destroy(&fiber->context); - fiber_stack_release_locked(fiber); + fiber_pool_lock(); + { + fiber_stack_release(fiber); + } + fiber_pool_unlock(); } SIZED_FREE_N(cont->saved_vm_stack.ptr, cont->saved_vm_stack.size); @@ -1373,7 +1463,7 @@ cont_handle_weak_references(void *ptr) static const rb_data_type_t rb_cont_data_type = { "continuation", {cont_mark, cont_free, cont_memsize, cont_compact, cont_handle_weak_references}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static inline void @@ -2134,7 +2224,7 @@ fiber_handle_weak_references(void *ptr) static const rb_data_type_t rb_fiber_data_type = { "fiber", {fiber_mark, fiber_free, fiber_memsize, fiber_compact, fiber_handle_weak_references}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE @@ -2892,9 +2982,11 @@ fiber_switch(rb_fiber_t *fiber, int argc, const VALUE *argv, int kw_splat, rb_fi // We cannot free the stack until the pthread is joined: #ifndef COROUTINE_PTHREAD_CONTEXT if (FIBER_TERMINATED_P(fiber)) { - RB_VM_LOCKING() { + fiber_pool_lock(); + { fiber_stack_release(fiber); } + fiber_pool_unlock(); } #endif @@ -3540,7 +3632,7 @@ fiber_pool_memsize(const void *ptr) static const rb_data_type_t FiberPoolDataType = { "fiber_pool", {NULL, fiber_pool_free, fiber_pool_memsize,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE @@ -3651,6 +3743,7 @@ Init_Cont(void) #endif SET_MACHINE_STACK_END(&th->ec->machine.stack_end); + rb_native_mutex_initialize(&fiber_lock); size_t minimum_count = shared_fiber_pool_minimum_count(); size_t maximum_count = shared_fiber_pool_maximum_count(); fiber_pool_initialize(&shared_fiber_pool, stack_size, minimum_count, maximum_count, vm_stack_size); diff --git a/darray.h b/darray.h index 31ab7d412aa441..08d79a45c27bd7 100644 --- a/darray.h +++ b/darray.h @@ -138,6 +138,21 @@ rb_darray_size(const void *ary) * Useful for TypedData objects. */ #define rb_darray_memsize(ary) (sizeof(*(ary)) + (rb_darray_size(ary) * sizeof((ary)->data[0]))) +/* Remove n items from the beginning of the array */ +#define rb_darray_shift_n(ary, n) rb_darray_shift_n_impl(ary, ary->data, n, sizeof((ary)->data[0])) + +static inline void +rb_darray_shift_n_impl(void *ary, void *data, size_t n, size_t type_sz) +{ + rb_darray_meta_t *meta = ary; + RUBY_ASSERT(meta->size >= n); + char *dst = (char*)data; + if (n > 0) { + memmove(dst, dst + n * type_sz, (meta->size - n) * type_sz); + meta->size -= n; + } +} + static inline void rb_darray_pop(void *ary, size_t count) { @@ -225,7 +240,9 @@ rb_darray_realloc_mul_add_without_gc(void *orig_ptr, size_t x, size_t y, size_t size_t size = rbimpl_size_add_or_raise(rbimpl_size_mul_or_raise(x, y), z); void *ptr = realloc(orig_ptr, size); - if (ptr == NULL) rb_bug("rb_darray_realloc_mul_add_without_gc: failed"); + if (ptr == NULL) { + rb_bug("rb_darray_realloc_mul_add_without_gc: failed"); + } return ptr; } diff --git a/dir.c b/dir.c index d67de8cf06c830..72496d0906dbc0 100644 --- a/dir.c +++ b/dir.c @@ -545,7 +545,7 @@ static const rb_data_type_t dir_data_type = { dir_free, NULL, // Nothing allocated externally, so don't need a memsize function }, - 0, NULL, RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_DECL_MARKING | RUBY_TYPED_EMBEDDABLE + 0, NULL, RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_DECL_MARKING | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE dir_close(VALUE); diff --git a/encoding.c b/encoding.c index 8bb393b471ed54..04f5269d63f5ea 100644 --- a/encoding.c +++ b/encoding.c @@ -122,7 +122,7 @@ static int filesystem_encindex = ENCINDEX_ASCII_8BIT; static const rb_data_type_t encoding_data_type = { "encoding", {0, 0, 0,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE }; #define is_encoding_type(obj) (RTYPEDDATA_TYPE(obj) == &encoding_data_type) diff --git a/enumerator.c b/enumerator.c index 81b71bd8b43b29..2f181918f08cb2 100644 --- a/enumerator.c +++ b/enumerator.c @@ -280,7 +280,7 @@ static const rb_data_type_t enumerator_data_type = { NULL, // Nothing allocated externally, so don't need a memsize function NULL, }, - 0, NULL, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_DECL_MARKING | RUBY_TYPED_EMBEDDABLE + 0, NULL, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_DECL_MARKING | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static struct enumerator * @@ -311,7 +311,7 @@ static const rb_data_type_t proc_entry_data_type = { NULL, // Nothing allocated externally, so don't need a memsize function proc_entry_mark_and_move, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static struct proc_entry * @@ -1323,7 +1323,7 @@ static const rb_data_type_t yielder_data_type = { NULL, yielder_mark_and_move, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static struct yielder * @@ -1447,7 +1447,7 @@ static const rb_data_type_t generator_data_type = { NULL, generator_mark_and_move, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static struct generator * @@ -2978,7 +2978,7 @@ static const rb_data_type_t producer_data_type = { producer_memsize, producer_mark_and_move, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static struct producer * @@ -3196,7 +3196,7 @@ static const rb_data_type_t enum_chain_data_type = { enum_chain_memsize, enum_chain_mark_and_move, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static struct enum_chain * @@ -3511,7 +3511,7 @@ static const rb_data_type_t enum_product_data_type = { enum_product_memsize, enum_product_mark_and_move, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static struct enum_product * @@ -3849,7 +3849,7 @@ static const rb_data_type_t arith_seq_data_type = { NULL, }, .parent = &enumerator_data_type, - .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_DECL_MARKING | RUBY_TYPED_EMBEDDABLE + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_DECL_MARKING | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE diff --git a/error.c b/error.c index 52bd3629bf2d13..10162bd0e1b46b 100644 --- a/error.c +++ b/error.c @@ -1117,11 +1117,16 @@ rb_bug_without_die(const char *fmt, ...) va_end(args); } +bool is_sweep_thread_p(void); + void rb_bug(const char *fmt, ...) { va_list args; va_start(args, fmt); + if (is_sweep_thread_p()) { + fprintf(stderr, "rb_bug() called from sweep_thread!\n"); + } rb_bug_without_die_internal(fmt, args); va_end(args); die(); @@ -2530,7 +2535,7 @@ static const rb_data_type_t name_err_mesg_data_type = { NULL, // No external memory to report, name_err_mesg_mark_and_move, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; /* :nodoc: */ diff --git a/ext/date/date_core.c b/ext/date/date_core.c index f37c1a54e5f53e..f85dc3083a61be 100644 --- a/ext/date/date_core.c +++ b/ext/date/date_core.c @@ -3222,7 +3222,7 @@ static const rb_data_type_t d_lite_type = { "Date", {d_lite_gc_mark, RUBY_TYPED_DEFAULT_FREE, d_lite_memsize,}, 0, 0, - RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_WB_PROTECTED|RUBY_TYPED_FROZEN_SHAREABLE, + RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_CONCURRENT_FREE_SAFE|RUBY_TYPED_WB_PROTECTED|RUBY_TYPED_FROZEN_SHAREABLE, }; inline static VALUE diff --git a/ext/digest/digest.c b/ext/digest/digest.c index bd8d3e815ffe6a..e54f0d7bda8e7c 100644 --- a/ext/digest/digest.c +++ b/ext/digest/digest.c @@ -619,7 +619,7 @@ static const rb_data_type_t digest_type = { "digest", {0, RUBY_TYPED_DEFAULT_FREE, 0,}, 0, 0, - (RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_WB_PROTECTED), + (RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_CONCURRENT_FREE_SAFE|RUBY_TYPED_WB_PROTECTED), }; static inline void diff --git a/ext/socket/raddrinfo.c b/ext/socket/raddrinfo.c index 6cdf5c6abc40e7..53a4e7f4564c11 100644 --- a/ext/socket/raddrinfo.c +++ b/ext/socket/raddrinfo.c @@ -1295,7 +1295,7 @@ addrinfo_memsize(const void *ptr) static const rb_data_type_t addrinfo_type = { "socket/addrinfo", {addrinfo_mark, addrinfo_free, addrinfo_memsize,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_WB_PROTECTED, + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_WB_PROTECTED, }; static VALUE diff --git a/ext/stringio/stringio.c b/ext/stringio/stringio.c index 09757a283eaf7c..fdb7f0e6550e14 100644 --- a/ext/stringio/stringio.c +++ b/ext/stringio/stringio.c @@ -119,7 +119,7 @@ static const rb_data_type_t strio_data_type = { strio_free, strio_memsize, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED // uses reference count, not concurrent free safe }; #define check_strio(self) ((struct StringIO*)rb_check_typeddata((self), &strio_data_type)) diff --git a/file.c b/file.c index e40f67ec73817a..c90a499e6603ff 100644 --- a/file.c +++ b/file.c @@ -535,7 +535,7 @@ static const rb_data_type_t stat_data_type = { RUBY_TYPED_DEFAULT_FREE, NULL, // No external memory to report }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; struct rb_stat { diff --git a/gc.c b/gc.c index d6d517d6a44c9e..817037553b67f9 100644 --- a/gc.c +++ b/gc.c @@ -151,9 +151,18 @@ rb_gc_vm_unlock(unsigned int lev, const char *file, int line) rb_vm_lock_leave(&lev, file, line); } +bool +is_sweep_thread_p(void) +{ + rb_vm_t *vm = GET_VM(); + if (!vm) return false; + return vm->gc.sweep_thread == pthread_self(); +} + unsigned int rb_gc_cr_lock(const char *file, int line) { + GC_ASSERT(!is_sweep_thread_p()); unsigned int lev; rb_vm_lock_enter_cr(GET_RACTOR(), &lev, file, line); return lev; @@ -162,6 +171,7 @@ rb_gc_cr_lock(const char *file, int line) void rb_gc_cr_unlock(unsigned int lev, const char *file, int line) { + GC_ASSERT(!is_sweep_thread_p()); rb_vm_lock_leave_cr(GET_RACTOR(), &lev, file, line); } @@ -1347,7 +1357,7 @@ rb_gc_obj_needs_cleanup_p(VALUE obj) } shape_id_t shape_id = RBASIC_SHAPE_ID(obj); - if (id2ref_tbl && rb_shape_has_object_id(shape_id)) return true; + if (RUBY_ATOMIC_PTR_LOAD(id2ref_tbl) && rb_shape_has_object_id(shape_id)) return true; switch (flags & RUBY_T_MASK) { case T_OBJECT: @@ -1392,8 +1402,12 @@ rb_gc_obj_needs_cleanup_p(VALUE obj) case T_COMPLEX: return rb_shape_has_fields(shape_id); + case T_ZOMBIE: + RUBY_ASSERT(flags & FL_FREEZE); + return true; + default: - UNREACHABLE_RETURN(true); + rb_bug("bad object type in needs_cleanup_p: %lu", flags & RUBY_T_MASK); } } @@ -1410,6 +1424,7 @@ make_io_zombie(void *objspace, VALUE obj) rb_gc_impl_make_zombie(objspace, obj, io_fptr_finalize, fptr); } +// Returns whether or not we can add `obj` back to the page's freelist. static bool rb_data_free(void *objspace, VALUE obj) { @@ -1476,6 +1491,7 @@ classext_iclass_free(rb_classext_t *ext, bool is_prime, VALUE box_value, void *a rb_iclass_classext_free(args->klass, ext, is_prime); } +// Returns whether or not we can add `obj` back to the page's freelist. bool rb_gc_obj_free(void *objspace, VALUE obj) { @@ -1580,7 +1596,7 @@ rb_gc_obj_free(void *objspace, VALUE obj) } break; case T_DATA: - if (!rb_data_free(objspace, obj)) return false; + if (!RB_LIKELY(rb_data_free(objspace, obj))) return FALSE; break; case T_MATCH: { @@ -1665,12 +1681,19 @@ rb_gc_obj_free(void *objspace, VALUE obj) rb_imemo_free((VALUE)obj); break; + case T_ZOMBIE: + GC_ASSERT(FL_TEST(obj, FL_FREEZE)); + GC_ASSERT(!FL_TEST(obj, FL_FINALIZE)); + void rb_gc_impl_free_zombie(rb_objspace_t *, VALUE); + rb_gc_impl_free_zombie(objspace, obj); + return TRUE; default: rb_bug("gc_sweep(): unknown data type 0x%x(%p) 0x%"PRIxVALUE, BUILTIN_TYPE(obj), (void*)obj, RBASIC(obj)->flags); } if (FL_TEST_RAW(obj, FL_FINALIZE)) { + GC_ASSERT(BUILTIN_TYPE(obj) != T_ZOMBIE); rb_gc_impl_make_zombie(objspace, obj, 0, 0); return FALSE; } @@ -2057,12 +2080,78 @@ id2ref_tbl_memsize(const void *data) return rb_st_memsize(data); } +// TODO: platforms other than pthread +static rb_nativethread_lock_t id2ref_tbl_lock_ = PTHREAD_MUTEX_INITIALIZER; +#ifdef RUBY_THREAD_PTHREAD_H +static pthread_t id2ref_tbl_lock_owner; +#endif +static unsigned int id2ref_tbl_lock_lvl; + +static inline void +ASSERT_id2ref_tbl_locked(void) +{ +#ifdef RUBY_THREAD_PTHREAD_H + VM_ASSERT(pthread_self() == id2ref_tbl_lock_owner); +#endif +} + +static inline void +ASSERT_id2ref_tbl_unlocked(void) +{ +#ifdef RUBY_THREAD_PTHREAD_H + VM_ASSERT(pthread_self() != id2ref_tbl_lock_owner); +#endif +} + +static inline void +id2ref_tbl_lock(bool allow_reentry) +{ + if (allow_reentry && pthread_self() == id2ref_tbl_lock_owner) { + } else { + ASSERT_id2ref_tbl_unlocked(); + rb_native_mutex_lock(&id2ref_tbl_lock_); + id2ref_tbl_lock_owner = pthread_self(); + } + id2ref_tbl_lock_lvl++; +} + +static inline bool +id2ref_tbl_trylock(bool allow_reentry) +{ + if (allow_reentry && pthread_self() == id2ref_tbl_lock_owner) { + } else { + ASSERT_id2ref_tbl_unlocked(); + if (rb_native_mutex_trylock(&id2ref_tbl_lock_) == EBUSY) { + return false; + } + id2ref_tbl_lock_owner = pthread_self(); + } + id2ref_tbl_lock_lvl++; + return true; +} + +static inline void +id2ref_tbl_unlock(void) +{ + ASSERT_id2ref_tbl_locked(); + GC_ASSERT(id2ref_tbl_lock_lvl > 0); + id2ref_tbl_lock_lvl--; + if (id2ref_tbl_lock_lvl == 0) { + id2ref_tbl_lock_owner = 0; + rb_native_mutex_unlock(&id2ref_tbl_lock_); + } +} + static void id2ref_tbl_free(void *data) { - id2ref_tbl = NULL; // clear global ref - st_table *table = (st_table *)data; - st_free_table(table); + id2ref_tbl_lock(true); + { + st_table *table = (st_table *)data; + st_free_table(table); + RUBY_ATOMIC_PTR_SET(id2ref_tbl, NULL); // clear global ref + } + id2ref_tbl_unlock(); } static const rb_data_type_t id2ref_tbl_type = { @@ -2074,6 +2163,8 @@ static const rb_data_type_t id2ref_tbl_type = { // dcompact function not required because the table is reference updated // in rb_gc_vm_weak_table_foreach }, + // Not marked concurrent free safe so that we can know that when we take the VM lock and check for + // the id2ref_tbl, it won't be deleted out from under us while the VM lock is held. .flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY }; @@ -2088,8 +2179,14 @@ class_object_id(VALUE klass) if (existing_id) { id = existing_id; } - else if (RB_UNLIKELY(id2ref_tbl)) { - st_insert(id2ref_tbl, id, klass); + else { + if (RB_UNLIKELY(id2ref_tbl)) { + id2ref_tbl_lock(false); + { + st_insert(id2ref_tbl, id, klass); // needs VM lock for allocation + } + id2ref_tbl_unlock(); + } } RB_GC_VM_UNLOCK(lock_lev); } @@ -2135,9 +2232,13 @@ object_id0(VALUE obj) RUBY_ASSERT(RBASIC_SHAPE_ID(obj) == object_id_shape_id); RUBY_ASSERT(rb_shape_obj_has_id(obj)); - if (RB_UNLIKELY(id2ref_tbl)) { + if (RB_UNLIKELY(RUBY_ATOMIC_PTR_LOAD(id2ref_tbl))) { RB_VM_LOCKING() { - st_insert(id2ref_tbl, (st_data_t)id, (st_data_t)obj); + id2ref_tbl_lock(false); + { + st_insert(id2ref_tbl, (st_data_t)id, (st_data_t)obj); // needs VM lock for allocation + } + id2ref_tbl_unlock(); } } return id; @@ -2175,6 +2276,8 @@ build_id2ref_i(VALUE obj, void *data) { st_table *id2ref_tbl = (st_table *)data; + if (rb_objspace_garbage_object_p(obj)) return; + switch (BUILTIN_TYPE(obj)) { case T_CLASS: case T_MODULE: @@ -2208,8 +2311,8 @@ object_id_to_ref(void *objspace_ptr, VALUE object_id) unsigned int lev = RB_GC_VM_LOCK(); - if (!id2ref_tbl) { - rb_gc_vm_barrier(); // stop other ractors + if (!RUBY_ATOMIC_PTR_LOAD(id2ref_tbl)) { + rb_gc_vm_barrier(); // stop other ractors but sweep thread could still be running // GC Must not trigger while we build the table, otherwise if we end // up freeing an object that had an ID, we might try to delete it from @@ -2218,20 +2321,25 @@ object_id_to_ref(void *objspace_ptr, VALUE object_id) VALUE tmp_id2ref_value = TypedData_Wrap_Struct(0, &id2ref_tbl_type, tmp_id2ref_tbl); // build_id2ref_i will most certainly malloc, which could trigger GC and sweep - // objects we just added to the table. - // By calling rb_gc_disable() we also save having to handle potentially garbage objects. + // objects we just added to the table. The sweep thread could still be running so + // we need to handle garbage objects. bool gc_disabled = RTEST(rb_gc_disable()); { - id2ref_tbl = tmp_id2ref_tbl; id2ref_value = tmp_id2ref_value; - rb_gc_impl_each_object(objspace, build_id2ref_i, (void *)id2ref_tbl); + rb_gc_impl_each_object(objspace, build_id2ref_i, (void *)tmp_id2ref_tbl); + RUBY_ATOMIC_PTR_SET(id2ref_tbl, tmp_id2ref_tbl); } if (!gc_disabled) rb_gc_enable(); } VALUE obj; - bool found = st_lookup(id2ref_tbl, object_id, &obj) && !rb_gc_impl_garbage_object_p(objspace, obj); + bool found; + id2ref_tbl_lock(false); + { + found = st_lookup(id2ref_tbl, object_id, &obj) && !rb_gc_impl_garbage_object_p(objspace, obj); + } + id2ref_tbl_unlock(); RB_GC_VM_UNLOCK(lev); @@ -2247,11 +2355,11 @@ object_id_to_ref(void *objspace_ptr, VALUE object_id) } } -static inline void -obj_free_object_id(VALUE obj) +static VALUE +obj_get_object_id(VALUE obj) { VALUE obj_id = 0; - if (RB_UNLIKELY(id2ref_tbl)) { + if (RB_UNLIKELY(RUBY_ATOMIC_PTR_LOAD(id2ref_tbl))) { switch (BUILTIN_TYPE(obj)) { case T_CLASS: case T_MODULE: @@ -2259,11 +2367,11 @@ obj_free_object_id(VALUE obj) break; case T_IMEMO: if (!IMEMO_TYPE_P(obj, imemo_fields)) { - return; + break; } // fallthrough case T_OBJECT: - { + { shape_id_t shape_id = RBASIC_SHAPE_ID(obj); if (rb_shape_has_object_id(shape_id)) { obj_id = object_id_get(obj, shape_id); @@ -2271,31 +2379,77 @@ obj_free_object_id(VALUE obj) break; } default: + break; // For generic_fields, the T_IMEMO/fields is responsible for freeing the id. - return; } + } + return obj_id; +} + +static inline bool +obj_free_object_id(VALUE obj, bool in_user_gc_thread) +{ + if (RB_UNLIKELY(RUBY_ATOMIC_PTR_LOAD(id2ref_tbl))) { + VALUE obj_id = obj_get_object_id(obj); if (RB_UNLIKELY(obj_id)) { RUBY_ASSERT(FIXNUM_P(obj_id) || RB_TYPE_P(obj_id, T_BIGNUM)); + // If we're in the sweep thread, we must use trylock because GC could have been + // triggered by inserting into the id2ref_tbl, which means the GC thread holds the + // lock and we can't wait on it. + bool needs_id2ref_tbl_trylock = !in_user_gc_thread; + if (needs_id2ref_tbl_trylock) { + bool did_lock = id2ref_tbl_trylock(false); + if (!did_lock) return false; + } else { + id2ref_tbl_lock(true); + } if (!st_delete(id2ref_tbl, (st_data_t *)&obj_id, NULL)) { // The the object is a T_IMEMO/fields, then it's possible the actual object // has been garbage collected already. if (!RB_TYPE_P(obj, T_IMEMO)) { + id2ref_tbl_unlock(); rb_bug("Object ID seen, but not in _id2ref table: object_id=%llu object=%s", NUM2ULL(obj_id), rb_obj_info(obj)); } } + id2ref_tbl_unlock(); } } + return true; } -void +bool +rb_gc_obj_free_concurrency_safe_vm_weak_references(VALUE obj) +{ + bool result = obj_free_object_id(obj, false); + if (RB_UNLIKELY(rb_obj_gen_fields_p(obj))) { + bool freed_generic = rb_free_generic_ivar(obj); + if (!freed_generic) result = false; + } + switch (BUILTIN_TYPE(obj)) { + case T_STRING: + if (FL_TEST_RAW(obj, RSTRING_FSTR)) { + rb_gc_free_fstring(obj); + } + break; + case T_SYMBOL: + rb_gc_free_dsymbol(obj); + break; + default: + break; + } + return result; +} + +bool rb_gc_obj_free_vm_weak_references(VALUE obj) { ASSUME(!RB_SPECIAL_CONST_P(obj)); - obj_free_object_id(obj); - if (rb_obj_gen_fields_p(obj)) { + obj_free_object_id(obj, true); + + if (RB_UNLIKELY(rb_obj_gen_fields_p(obj))) { rb_free_generic_ivar(obj); } @@ -2323,6 +2477,7 @@ rb_gc_obj_free_vm_weak_references(VALUE obj) default: break; } + return true; } /* @@ -2649,7 +2804,14 @@ count_objects_i(VALUE obj, void *d) struct count_objects_data *data = (struct count_objects_data *)d; if (RBASIC(obj)->flags) { - data->counts[BUILTIN_TYPE(obj)]++; + // This will make sure the count is like the old behavior when we used to turn a zombie into + // T_NONE right after the finalizer and/or free function ran. + if (BUILTIN_TYPE(obj) == T_ZOMBIE && FL_TEST(obj, FL_FREEZE)) { + data->freed++; + } + else { + data->counts[BUILTIN_TYPE(obj)]++; + } } else { data->freed++; @@ -4185,6 +4347,7 @@ vm_weak_table_gen_fields_foreach(st_data_t key, st_data_t value, st_data_t data) if (key != new_key || value != new_value) { DURING_GC_COULD_MALLOC_REGION_START(); { + // We're STW, no need for gen_fields_tbl_lock st_insert(rb_generic_fields_tbl_get(), (st_data_t)new_key, new_value); } DURING_GC_COULD_MALLOC_REGION_END(); @@ -4255,7 +4418,7 @@ rb_gc_vm_weak_table_foreach(vm_table_foreach_callback_func callback, break; } case RB_GC_VM_ID2REF_TABLE: { - if (id2ref_tbl) { + if (id2ref_tbl) { // we're STW, no need for lock st_foreach_with_replace( id2ref_tbl, vm_weak_table_id2ref_foreach, @@ -4267,7 +4430,7 @@ rb_gc_vm_weak_table_foreach(vm_table_foreach_callback_func callback, } case RB_GC_VM_GENERIC_FIELDS_TABLE: { st_table *generic_fields_tbl = rb_generic_fields_tbl_get(); - if (generic_fields_tbl) { + if (generic_fields_tbl) { // we're STW, no need for lock st_foreach( generic_fields_tbl, vm_weak_table_gen_fields_foreach, @@ -4842,7 +5005,7 @@ rb_method_type_name(rb_method_type_t type) static void rb_raw_iseq_info(char *const buff, const size_t buff_size, const rb_iseq_t *iseq) { - if (buff_size > 0 && ISEQ_BODY(iseq) && ISEQ_BODY(iseq)->location.label && !RB_TYPE_P(ISEQ_BODY(iseq)->location.pathobj, T_MOVED)) { + if (buff_size > 0 && ISEQ_BODY(iseq) && ISEQ_BODY(iseq)->location.label && !rb_objspace_garbage_object_p(ISEQ_BODY(iseq)->location.pathobj)) { VALUE path = rb_iseq_path(iseq); int n = ISEQ_BODY(iseq)->location.first_lineno; snprintf(buff, buff_size, " %s@%s:%d", @@ -4873,7 +5036,7 @@ str_len_no_raise(VALUE str) #define C(c, s) ((c) != 0 ? (s) : " ") static size_t -rb_raw_obj_info_common(char *const buff, const size_t buff_size, const VALUE obj) +rb_raw_obj_info_common(char *const buff, const size_t buff_size, const VALUE obj, bool *is_garbage_out) { size_t pos = 0; @@ -4916,6 +5079,10 @@ rb_raw_obj_info_common(char *const buff, const size_t buff_size, const VALUE obj else if (RBASIC(obj)->klass == 0) { APPEND_S("(temporary internal)"); } + else if (rb_objspace_garbage_object_p(RBASIC(obj)->klass)) { + APPEND_S("(garbage class)"); + *is_garbage_out = true; + } else if (RTEST(RBASIC(obj)->klass)) { VALUE class_path = rb_class_path_cached(RBASIC(obj)->klass); if (!NIL_P(class_path)) { @@ -5014,9 +5181,14 @@ rb_raw_obj_info_buitin_type(char *const buff, const size_t buff_size, const VALU } case T_ICLASS: { - VALUE class_path = rb_class_path_cached(RBASIC_CLASS(obj)); - if (!NIL_P(class_path)) { - APPEND_F("src:%s", RSTRING_PTR(class_path)); + if (rb_objspace_garbage_object_p(RBASIC_CLASS(obj))) { + APPEND_S("src: garbage"); + } + else { + VALUE class_path = rb_class_path_cached(RBASIC_CLASS(obj)); + if (!NIL_P(class_path)) { + APPEND_F("src:%s", RSTRING_PTR(class_path)); + } } break; } @@ -5157,8 +5329,11 @@ rb_asan_poisoned_object_p(VALUE obj) static void raw_obj_info(char *const buff, const size_t buff_size, VALUE obj) { - size_t pos = rb_raw_obj_info_common(buff, buff_size, obj); - pos = rb_raw_obj_info_buitin_type(buff, buff_size, obj, pos); + bool is_garbage = false; + size_t pos = rb_raw_obj_info_common(buff, buff_size, obj, &is_garbage); + if (!is_garbage) { + pos = rb_raw_obj_info_buitin_type(buff, buff_size, obj, pos); + } if (pos >= buff_size) {} // truncated } @@ -5173,11 +5348,9 @@ rb_raw_obj_info(char *const buff, const size_t buff_size, VALUE obj) else if (!rb_gc_impl_pointer_to_heap_p(objspace, (const void *)obj)) { snprintf(buff, buff_size, "out-of-heap:%p", (void *)obj); } -#if 0 // maybe no need to check it? - else if (0 && rb_gc_impl_garbage_object_p(objspace, obj)) { + else if (rb_gc_impl_garbage_object_p(objspace, obj)) { snprintf(buff, buff_size, "garbage:%p", (void *)obj); } -#endif else { asan_unpoisoning_object(obj) { raw_obj_info(buff, buff_size, obj); diff --git a/gc.rb b/gc.rb index 895a82b7343c01..01d798addb1596 100644 --- a/gc.rb +++ b/gc.rb @@ -147,7 +147,7 @@ def self.count # sweeping_time: 0, # heap_allocated_pages: 521, # heap_empty_pages: 0, - # heap_allocatable_bytes: 0, + # heap_allocatable_bytes: 0, # heap_available_slots: 539590, # heap_live_slots: 422243, # heap_free_slots: 117347, diff --git a/gc/default/default.c b/gc/default/default.c index 1b7d109ce69a99..40f8d4501d2068 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -25,8 +25,11 @@ #include "ruby/atomic.h" #include "ruby/debug.h" #include "ruby/thread.h" +#include "ruby/thread_native.h" #include "ruby/util.h" #include "ruby/vm.h" + +#include #include "ruby/internal/encoding/string.h" #include "ccan/list/list.h" #include "darray.h" @@ -112,6 +115,16 @@ #ifndef GC_HEAP_INIT_BYTES #define GC_HEAP_INIT_BYTES (2560 * 1024) #endif + +#define PSWEEP_DEBUG 0 +#if PSWEEP_DEBUG +#define psweep_debug(lvl, ...) if (lvl <= PSWEEP_DEBUG) fprintf(stderr, __VA_ARGS__) +#else +#define psweep_debug(...) (void)0 +#endif +#define PSWEEP_LOCK_STATS 0 +#define PSWEEP_COLLECT_TIMINGS 0 + #ifndef GC_HEAP_FREE_SLOTS #define GC_HEAP_FREE_SLOTS 4096 #endif @@ -165,8 +178,10 @@ #ifdef RB_THREAD_LOCAL_SPECIFIER #define USE_MALLOC_INCREASE_LOCAL 1 static RB_THREAD_LOCAL_SPECIFIER int malloc_increase_local; +static RB_THREAD_LOCAL_SPECIFIER struct heap_page *current_sweep_thread_page; #else #define USE_MALLOC_INCREASE_LOCAL 0 +static struct heap_page *current_sweep_thread_page; #endif #ifndef GC_CAN_COMPILE_COMPACTION @@ -464,16 +479,33 @@ typedef struct rb_heap_struct { /* Sweeping statistics */ size_t freed_slots; size_t empty_slots; +#if RUBY_DEBUG + size_t zombie_slots; // pre-existing zombies not ready yet to free +#endif struct heap_page *free_pages; struct ccan_list_head pages; - struct heap_page *sweeping_page; /* iterator for .pages */ + struct heap_page *sweeping_page; /* iterator for .pages. It always points to the next page to sweep. */ + struct heap_page *pre_sweeping_page; /* Background thread is currently sweeping this page */ + struct heap_page *swept_pages; /* pages claimed and swept by background thread */ + struct heap_page *latest_swept_page; // tail of `swept_pages` struct heap_page *compact_cursor; uintptr_t compact_cursor_index; struct heap_page *pooled_pages; size_t total_pages; /* total page count in a heap */ size_t total_slots; /* total slot count */ +#if RUBY_DEBUG + rb_atomic_t made_zombies; +#endif + rb_atomic_t foreground_sweep_steps; // incremented by ruby thread, checked by sweep thread + rb_atomic_t background_sweep_steps; // only incremented/checked by sweep thread + rb_nativethread_cond_t sweep_page_cond; // associated with global sweep lock + rb_nativethread_lock_t swept_pages_lock; + size_t pre_swept_slots_deferred; + bool is_finished_sweeping; + bool done_background_sweep; + bool skip_sweep_continue; // skip current sweep continue } rb_heap_t; enum { @@ -513,16 +545,27 @@ typedef struct rb_objspace { struct { unsigned int mode : 2; unsigned int immediate_sweep : 1; - unsigned int dont_gc : 1; unsigned int dont_incremental : 1; - unsigned int during_gc : 1; unsigned int during_compacting : 1; +#if RUBY_DEBUG + unsigned int was_compacting: 1; +#endif unsigned int during_reference_updating : 1; - unsigned int gc_stressful: 1; - unsigned int during_minor_gc : 1; unsigned int during_incremental_marking : 1; unsigned int measure_gc : 1; } flags; + // This can't be a bitfield because it's accessed in garbage_object_p() from the sweep thread + // while the ruby GC thread could be running and changing other bitfields. + bool during_lazy_sweeping; + // This one too, it's accessed in debug_free_check + bool during_minor_gc; + bool during_gc; + bool dont_gc; + bool gc_stressful; +#if RUBY_DEBUG + size_t will_be_swept_slots; + size_t have_swept_slots; +#endif rb_event_flag_t hook_events; @@ -530,6 +573,21 @@ typedef struct rb_objspace { size_t empty_pages_count; struct heap_page *empty_pages; + rb_nativethread_lock_t sweep_lock; + rb_nativethread_cond_t sweep_cond; + pthread_t sweep_thread; + bool sweep_thread_running; + bool sweep_thread_sweep_requested; + bool sweep_thread_sweep_exited; + bool sweep_thread_waiting_request; + bool sweep_thread_sweeping; + rb_atomic_t use_background_sweep_thread; + bool background_sweep_mode; + bool background_sweep_abort; + bool background_sweep_restart_heaps; + bool sweep_rest; + unsigned int heaps_done_background_sweep; + struct { rb_atomic_t finalizing; } atomic_flags; @@ -568,6 +626,11 @@ typedef struct rb_objspace { size_t minor_gc_count; size_t major_gc_count; + size_t major_gc_count_by_nofree; + size_t major_gc_count_by_oldgen; + size_t major_gc_count_by_shady; + size_t major_gc_count_by_force; + size_t major_gc_count_by_oldmalloc; size_t compact_count; size_t read_barrier_faults; #if RGENGC_PROFILE > 0 @@ -601,6 +664,16 @@ typedef struct rb_objspace { unsigned long long sweeping_time_ns; struct timespec sweeping_start_time; +#if PSWEEP_COLLECT_TIMINGS > 0 + /* Ruby thread sweep time tracking (always collected) */ + unsigned long long ruby_thread_sweep_cpu_time_ns; + unsigned long long ruby_thread_sweep_wall_time_ns; + struct timespec ruby_thread_sweep_cpu_start_time; + struct timespec ruby_thread_sweep_wall_start_time; +#endif + size_t pages_swept_by_sweep_thread; + size_t pages_swept_by_sweep_thread_had_deferred_free_objects; + /* Weak references */ size_t weak_references_count; } profile; @@ -779,11 +852,17 @@ struct heap_page { unsigned short free_slots; unsigned short final_slots; unsigned short pinned_slots; + unsigned short pre_freed_slots; + unsigned short pre_empty_slots; + unsigned short pre_deferred_free_slots; + unsigned short pre_final_slots; + unsigned short pre_zombie_slots; + size_t pre_freed_malloc_bytes; struct { - unsigned int before_sweep : 1; unsigned int has_remembered_objects : 1; unsigned int has_uncollectible_wb_unprotected_objects : 1; } flags; + rb_atomic_t before_sweep; // bool rb_heap_t *heap; @@ -804,6 +883,7 @@ struct heap_page { /* If set, the object is not movable */ bits_t pinned_bits[HEAP_PAGE_BITMAP_LIMIT]; bits_t age_bits[HEAP_PAGE_BITMAP_LIMIT * RVALUE_AGE_BIT_COUNT]; + bits_t deferred_free_bits[HEAP_PAGE_BITMAP_LIMIT]; }; /* @@ -859,6 +939,12 @@ slot_index_for_offset(size_t offset, uint32_t div_magic) return (size_t)(((uint64_t)offset * div_magic) >> 32); } +static inline unsigned +popcount_bits(bits_t x) +{ + return rb_popcount_intptr((uintptr_t)x); +} + #define SLOT_INDEX(page, p) slot_index_for_offset((uintptr_t)(p) - (page)->start, (page)->slot_div_magic) #define SLOT_BITMAP_INDEX(page, p) (SLOT_INDEX(page, p) / BITS_BITLENGTH) #define SLOT_BITMAP_OFFSET(page, p) (SLOT_INDEX(page, p) & (BITS_BITLENGTH - 1)) @@ -926,10 +1012,10 @@ RVALUE_AGE_SET(VALUE obj, int age) #define heap_pages_freeable_pages objspace->heap_pages.freeable_pages #define heap_pages_deferred_final objspace->heap_pages.deferred_final #define heaps objspace->heaps -#define during_gc objspace->flags.during_gc +#define during_gc objspace->during_gc #define finalizing objspace->atomic_flags.finalizing #define finalizer_table objspace->finalizer_table -#define ruby_gc_stressful objspace->flags.gc_stressful +#define ruby_gc_stressful objspace->gc_stressful #define ruby_gc_stress_mode objspace->gc_stress_mode #if GC_DEBUG_STRESS_TO_CLASS #define stress_to_class objspace->stress_to_class @@ -940,15 +1026,15 @@ RVALUE_AGE_SET(VALUE obj, int age) #endif #if 0 -#define dont_gc_on() (fprintf(stderr, "dont_gc_on@%s:%d\n", __FILE__, __LINE__), objspace->flags.dont_gc = 1) -#define dont_gc_off() (fprintf(stderr, "dont_gc_off@%s:%d\n", __FILE__, __LINE__), objspace->flags.dont_gc = 0) -#define dont_gc_set(b) (fprintf(stderr, "dont_gc_set(%d)@%s:%d\n", __FILE__, __LINE__), objspace->flags.dont_gc = (int)(b)) -#define dont_gc_val() (objspace->flags.dont_gc) +#define dont_gc_on() (fprintf(stderr, "dont_gc_on@%s:%d\n", __FILE__, __LINE__), objspace->dont_gc = 1) +#define dont_gc_off() (fprintf(stderr, "dont_gc_off@%s:%d\n", __FILE__, __LINE__), objspace->dont_gc = 0) +#define dont_gc_set(b) (fprintf(stderr, "dont_gc_set(%d)@%s:%d\n", __FILE__, __LINE__), objspace->dont_gc = (int)(b)) +#define dont_gc_val() (objspace->dont_gc) #else -#define dont_gc_on() (objspace->flags.dont_gc = 1) -#define dont_gc_off() (objspace->flags.dont_gc = 0) -#define dont_gc_set(b) (objspace->flags.dont_gc = (int)(b)) -#define dont_gc_val() (objspace->flags.dont_gc) +#define dont_gc_on() (objspace->dont_gc = 1) +#define dont_gc_off() (objspace->dont_gc = 0) +#define dont_gc_set(b) (objspace->dont_gc = (bool)(b)) +#define dont_gc_val() (objspace->dont_gc) #endif #define gc_config_full_mark_set(b) (objspace->gc_config.full_mark = (int)(b)) @@ -983,15 +1069,224 @@ gc_mode_verify(enum gc_mode mode) return mode; } -static inline bool +#if PSWEEP_LOCK_STATS > 0 +/* Lock contention statistics per callsite */ +#define MAX_LOCK_CALLSITES 100 + +typedef struct lock_callsite_stats { + const char *function; + int line; + size_t acquired_without_contention; + size_t contended; +} lock_callsite_stats_t; + +typedef struct lock_stats { + const char *name; + lock_callsite_stats_t callsites[MAX_LOCK_CALLSITES]; + int num_callsites; +} lock_stats_t; + +static lock_stats_t sweep_lock_stats = {"objspace->sweep_lock", {{0}}, 0}; +static lock_stats_t swept_pages_lock_stats = {"heap->swept_pages_lock", {{0}}, 0}; + + +static lock_callsite_stats_t* +find_or_create_callsite(lock_stats_t *stats, const char *function, int line) +{ + /* Find existing callsite */ + for (int i = 0; i < stats->num_callsites; i++) { + if (stats->callsites[i].function == function && stats->callsites[i].line == line) { + return &stats->callsites[i]; + } + } + + /* Create new callsite if space available */ + if (stats->num_callsites < MAX_LOCK_CALLSITES) { + lock_callsite_stats_t *callsite = &stats->callsites[stats->num_callsites++]; + callsite->function = function; + callsite->line = line; + callsite->acquired_without_contention = 0; + callsite->contended = 0; + return callsite; + } + + /* No space - return last callsite as overflow */ + return &stats->callsites[MAX_LOCK_CALLSITES - 1]; +} + +static void +instrumented_lock_acquire_impl(rb_nativethread_lock_t *lock, lock_stats_t *stats, const char *function, int line) +{ + lock_callsite_stats_t *callsite = find_or_create_callsite(stats, function, line); + + if (rb_native_mutex_trylock(lock) == 0) { + callsite->acquired_without_contention++; + } + else { + callsite->contended++; + rb_native_mutex_lock(lock); + } +} + +/* Macro to automatically pass function and line */ +#define instrumented_lock_acquire(lock, stats) \ + instrumented_lock_acquire_impl(lock, stats, __FUNCTION__, __LINE__) + +static void +print_lock_stats(void) +{ + fprintf(stderr, "\n=== Lock Contention Statistics by Callsite ===\n"); + fprintf(stderr, "%-40s %-30s %12s %12s %10s\n", "Lock Name", "Callsite", "Uncontended", "Contended", "Ratio"); + fprintf(stderr, "%-40s %-30s %12s %12s %10s\n", "---------", "--------", "-----------", "---------", "-----"); + + lock_stats_t *all_stats[] = {&sweep_lock_stats, &swept_pages_lock_stats}; + + for (int i = 0; i < 2; i++) { + lock_stats_t *stats = all_stats[i]; + + /* Sort callsites by total contentions (descending) */ + for (int j = 0; j < stats->num_callsites - 1; j++) { + for (int k = j + 1; k < stats->num_callsites; k++) { + if (stats->callsites[k].contended > stats->callsites[j].contended) { + lock_callsite_stats_t temp = stats->callsites[j]; + stats->callsites[j] = stats->callsites[k]; + stats->callsites[k] = temp; + } + } + } + + /* Print callsites for this lock */ + for (int j = 0; j < stats->num_callsites; j++) { + lock_callsite_stats_t *cs = &stats->callsites[j]; + size_t total = cs->acquired_without_contention + cs->contended; + if (total > 0) { + char callsite_buf[32]; + snprintf(callsite_buf, sizeof(callsite_buf), "%s:%d", cs->function, cs->line); + + double ratio = (double)cs->contended / total * 100.0; + fprintf(stderr, "%-40s %-30s %12zu %12zu %9.2f%%\n", + j == 0 ? stats->name : "", + callsite_buf, + cs->acquired_without_contention, + cs->contended, + ratio); + } + } + } + fprintf(stderr, "================================================\n\n"); +} +#endif /* PSWEEP_LOCK_STATS > 0 */ + +static pthread_t sweep_lock_owner = 0; + +static inline void +sweep_lock_lock_impl(rb_nativethread_lock_t *sweep_lock, const char *function, int line) +{ + GC_ASSERT(sweep_lock_owner != pthread_self()); +#if PSWEEP_LOCK_STATS > 0 + instrumented_lock_acquire_impl(sweep_lock, &sweep_lock_stats, function, line); +#else + rb_native_mutex_lock(sweep_lock); +#endif + GC_ASSERT(sweep_lock_owner == 0); +#if VM_CHECK_MODE > 0 + sweep_lock_owner = pthread_self(); +#endif +} + +#define sweep_lock_lock(sweep_lock) \ + sweep_lock_lock_impl(sweep_lock, __FUNCTION__, __LINE__) + +static inline void +sweep_lock_unlock(rb_nativethread_lock_t *sweep_lock) +{ +#if VM_CHECK_MODE > 0 + GC_ASSERT(sweep_lock_owner == pthread_self()); + sweep_lock_owner = 0; +#endif + rb_native_mutex_unlock(sweep_lock); +} + +static inline void +sweep_lock_set_locked(void) +{ + GC_ASSERT(sweep_lock_owner == 0); +#if VM_CHECK_MODE > 0 + sweep_lock_owner = pthread_self(); +#endif +} + +static inline void +sweep_lock_set_unlocked(void) +{ +#if VM_CHECK_MODE > 0 + GC_ASSERT(sweep_lock_owner == pthread_self()); + sweep_lock_owner = 0; +#endif +} + +// Returns true when the background sweep thread and Ruby thread have finished processing +// (background sweeping + ruby thread post-processing or deferred freeing) all pages for that heap. +static bool +heap_is_sweep_done(rb_objspace_t *objspace, rb_heap_t *heap) +{ + if (heap->is_finished_sweeping) { + psweep_debug(2, "[gc] heap_is_sweep_done: %d, heap:%p (%ld), heap->is_finished_sweeping\n", true, heap, heap - heaps); + return true; + } + if (!objspace->use_background_sweep_thread) { + bool done = heap->sweeping_page == NULL; + psweep_debug(2, "[gc] heap_is_sweep_done: %d, heap:%p (%ld), !use_background_thread\n", done, heap, heap - heaps); + return done; + } + + // We always dequeue the last page, never the sweep thread. This avoids locking in the common case. + // It should be synchronized, but it's a "benign race". + if (heap->sweeping_page) { + return false; + } + + bool done; + sweep_lock_lock(&objspace->sweep_lock); + if (heap->sweeping_page || heap->swept_pages) { + psweep_debug(2, "heap_is_sweep_done: %d, heap:%p (%ld), swept_pages:%d, sweeping_page:%p\n", false, heap, heap - heaps, heap->swept_pages != 0, heap->sweeping_page); + done = false; + } + else if (heap->pre_sweeping_page) { + sweep_lock_set_unlocked(); + // We need to wait because this is the final page for this heap, and the caller calls us + // like `while (!heap_is_sweep_done(heap)) { gc_sweep_step(heap) }` (we don't want to spin). + rb_native_cond_wait(&heap->sweep_page_cond, &objspace->sweep_lock); + sweep_lock_set_locked(); + GC_ASSERT(heap->swept_pages); + done = false; + } + else { + done = true; + } + sweep_lock_unlock(&objspace->sweep_lock); + return done; +} + +// Does the GC still have pages to sweep? If returns false, then the Ruby thread has fully +// processed all the pages in every heap. +static bool has_sweeping_pages(rb_objspace_t *objspace) { + rb_heap_t *heap_not_finished = NULL; for (int i = 0; i < HEAP_COUNT; i++) { - if ((&heaps[i])->sweeping_page) { - return TRUE; + rb_heap_t *heap = &heaps[i]; + if (!heap->is_finished_sweeping) { + if (heap_not_finished) { + return true; + } + else { + heap_not_finished = heap; + } } } - return FALSE; + if (!heap_not_finished) return false; // all done + return !heap_is_sweep_done(objspace, heap_not_finished); } static inline size_t @@ -1032,7 +1327,7 @@ total_final_slots_count(rb_objspace_t *objspace) size_t count = 0; for (int i = 0; i < HEAP_COUNT; i++) { rb_heap_t *heap = &heaps[i]; - count += heap->final_slots_count; + count += (size_t)RUBY_ATOMIC_VALUE_LOAD(heap->final_slots_count); } return count; } @@ -1043,12 +1338,12 @@ total_final_slots_count(rb_objspace_t *objspace) #define is_marking(objspace) (gc_mode(objspace) == gc_mode_marking) #define is_sweeping(objspace) (gc_mode(objspace) == gc_mode_sweeping) -#define is_full_marking(objspace) ((objspace)->flags.during_minor_gc == FALSE) +#define is_full_marking(objspace) ((objspace)->during_minor_gc == FALSE) #define is_incremental_marking(objspace) ((objspace)->flags.during_incremental_marking != FALSE) #define will_be_incremental_marking(objspace) ((objspace)->rgengc.need_major_gc != GPR_FLAG_NONE) #define GC_INCREMENTAL_SWEEP_SLOT_COUNT 2048 #define GC_INCREMENTAL_SWEEP_POOL_SLOT_COUNT 1024 -#define is_lazy_sweeping(objspace) (GC_ENABLE_LAZY_SWEEP && has_sweeping_pages(objspace)) +#define is_lazy_sweeping(objspace) ((objspace)->during_lazy_sweeping != FALSE) /* In lazy sweeping or the previous incremental marking finished and did not yield a free page. */ #define needs_continue_sweeping(objspace, heap) \ ((heap)->free_pages == NULL && is_lazy_sweeping(objspace)) @@ -1081,6 +1376,8 @@ static int garbage_collect(rb_objspace_t *, unsigned int reason); static int gc_start(rb_objspace_t *objspace, unsigned int reason); static void gc_rest(rb_objspace_t *objspace); +static inline void atomic_sub_nounderflow(size_t *var, size_t sub); +static size_t malloc_increase_local_flush(rb_objspace_t *objspace); enum gc_enter_event { gc_enter_event_start, @@ -1093,7 +1390,7 @@ static inline void gc_enter(rb_objspace_t *objspace, enum gc_enter_event event, static inline void gc_exit(rb_objspace_t *objspace, enum gc_enter_event event, unsigned int *lock_lev); static void gc_marking_enter(rb_objspace_t *objspace); static void gc_marking_exit(rb_objspace_t *objspace); -static void gc_sweeping_enter(rb_objspace_t *objspace); +static void gc_sweeping_enter(rb_objspace_t *objspace, const char *from_fn); static void gc_sweeping_exit(rb_objspace_t *objspace); static bool gc_marks_continue(rb_objspace_t *objspace, rb_heap_t *heap); @@ -1251,6 +1548,15 @@ RVALUE_MARKED(rb_objspace_t *objspace, VALUE obj) return RVALUE_MARKED_BITMAP(obj) != 0; } +static inline int +RVALUE_MARKED_ATOMIC(rb_objspace_t *objspace, VALUE obj) +{ + bits_t *bits = GET_HEAP_MARK_BITS(obj); + struct heap_page *page = GET_HEAP_PAGE(obj); + bits_t word = rbimpl_atomic_value_load((VALUE*)&bits[SLOT_BITMAP_INDEX(page, obj)], RBIMPL_ATOMIC_ACQUIRE); + return (word & SLOT_BITMAP_BIT(page, obj)) != 0; +} + static inline int RVALUE_PINNED(rb_objspace_t *objspace, VALUE obj) { @@ -1299,6 +1605,10 @@ check_rvalue_consistency_force(rb_objspace_t *objspace, const VALUE obj, int ter { int err = 0; + + rb_execution_context_t *ec = rb_current_execution_context(false); + if (!ec) return 0; // sweep thread + int lev = RB_GC_VM_LOCK_NO_BARRIER(); { if (SPECIAL_CONST_P(obj)) { @@ -1338,7 +1648,7 @@ check_rvalue_consistency_force(rb_objspace_t *objspace, const VALUE obj, int ter fprintf(stderr, "check_rvalue_consistency: %s is T_NONE.\n", rb_obj_info(obj)); err++; } - if (BUILTIN_TYPE(obj) == T_ZOMBIE) { + if (BUILTIN_TYPE(obj) == T_ZOMBIE && !FL_TEST(obj, FL_FREEZE)) { fprintf(stderr, "check_rvalue_consistency: %s is T_ZOMBIE.\n", rb_obj_info(obj)); err++; } @@ -1590,6 +1900,12 @@ rb_gc_impl_get_measure_total_time(void *objspace_ptr) return objspace->flags.measure_gc; } +#define ZOMBIE_OBJ_KEPT_FLAGS (FL_FINALIZE) +// Zombie needs to be put back on the freelist later (during GC) and finalizer has ran +#define ZOMBIE_NEEDS_FREE_FLAG (FL_FREEZE) +#define ZOMBIE_NEEDS_FREE_P(zombie) (FL_TEST(zombie, ZOMBIE_NEEDS_FREE_FLAG)) +#define ZOMBIE_SET_NEEDS_FREE_FLAG(zombie) (FL_SET(zombie, ZOMBIE_NEEDS_FREE_FLAG)) + /* garbage objects will be collected soon. */ bool rb_gc_impl_garbage_object_p(void *objspace_ptr, VALUE ptr) @@ -1598,29 +1914,57 @@ rb_gc_impl_garbage_object_p(void *objspace_ptr, VALUE ptr) bool dead = false; - asan_unpoisoning_object(ptr) { - switch (BUILTIN_TYPE(ptr)) { - case T_NONE: - case T_MOVED: - case T_ZOMBIE: - dead = true; - break; - default: - break; + // Set to false/true by the ruby GC thread when entering/exiting GC, so shouldn't change throughout this call. + rb_atomic_t use_sweep_thread = rbimpl_atomic_load(&objspace->use_background_sweep_thread, RBIMPL_ATOMIC_RELAXED); + + if (!use_sweep_thread) { + // It's not safe to read flags on an object if the sweep thread is running + asan_unpoisoning_object(ptr) { + switch (BUILTIN_TYPE(ptr)) { + case T_NONE: + case T_MOVED: + dead = true; + break; + case T_ZOMBIE: + dead = ZOMBIE_NEEDS_FREE_P(ptr); + break; + default: + break; + } } } if (dead) return true; - return is_lazy_sweeping(objspace) && GET_HEAP_PAGE(ptr)->flags.before_sweep && - !RVALUE_MARKED(objspace, ptr); + + struct heap_page *page = GET_HEAP_PAGE(ptr); + bool during_lazy_sweep = is_lazy_sweeping(objspace); + + if (!use_sweep_thread) { + // The ruby GC thread or a user thread called us + bool marked = RVALUE_MARKED(objspace, ptr); + return during_lazy_sweep && !marked && rbimpl_atomic_load(&page->before_sweep, RBIMPL_ATOMIC_RELAXED); + } + else if (during_lazy_sweep) { + // we're currently lazy sweeping with the sweep thread + bool marked = RVALUE_MARKED_ATOMIC(objspace, ptr); // load it atomically so it can't be re-ordered past the next atomic load + rb_atomic_t before_sweep = rbimpl_atomic_load(&page->before_sweep, RBIMPL_ATOMIC_ACQUIRE); + bool is_garbage = !marked && before_sweep; + if (is_garbage) return true; + if (marked && before_sweep) return false; + // already swept page, just check flags + return BUILTIN_TYPE(ptr) == T_NONE || BUILTIN_TYPE(ptr) == T_MOVED || (BUILTIN_TYPE(ptr) == T_ZOMBIE && ZOMBIE_NEEDS_FREE_P(ptr)); + } + else { + return BUILTIN_TYPE(ptr) == T_NONE || BUILTIN_TYPE(ptr) == T_MOVED || (BUILTIN_TYPE(ptr) == T_ZOMBIE && ZOMBIE_NEEDS_FREE_P(ptr)); + } } static void free_stack_chunks(mark_stack_t *); static void mark_stack_free_cache(mark_stack_t *); -static void heap_page_free(rb_objspace_t *objspace, struct heap_page *page); +static void heap_page_free(rb_objspace_t *objspace, struct heap_page *page, bool log); static inline void -heap_page_add_freeobj(rb_objspace_t *objspace, struct heap_page *page, VALUE obj) +heap_page_add_freeobj(rb_objspace_t *objspace, struct heap_page *page, VALUE obj, bool from_sweep_thread) { rb_asan_unpoison_object(obj, false); @@ -1632,8 +1976,10 @@ heap_page_add_freeobj(rb_objspace_t *objspace, struct heap_page *page, VALUE obj page->freelist = slot; asan_lock_freelist(page); - // Should have already been reset - GC_ASSERT(RVALUE_AGE_GET(obj) == 0); + if (!from_sweep_thread) { + // Should have already been reset + GC_ASSERT(RVALUE_AGE_GET(obj) == 0); + } if (RGENGC_CHECK_MODE && /* obj should belong to page */ @@ -1682,7 +2028,7 @@ heap_allocatable_bytes_expand(rb_objspace_t *objspace, } if (gc_params.growth_max_bytes > 0) { - size_t max_total_slots = total_slots + gc_params.growth_max_bytes / slot_size; + size_t max_total_slots = total_slots + (gc_params.growth_max_bytes / slot_size); if (target_total_slots > max_total_slots) target_total_slots = max_total_slots; } @@ -1693,13 +2039,16 @@ heap_allocatable_bytes_expand(rb_objspace_t *objspace, objspace->heap_pages.allocatable_bytes += extend_slot_count * slot_size; } +/* Add a `page` with some free slots to the beginning of `heap->free_pages` */ static inline void -heap_add_freepage(rb_heap_t *heap, struct heap_page *page) +heap_add_freepage(rb_heap_t *heap, struct heap_page *page, const char *from_func) { asan_unlock_freelist(page); GC_ASSERT(page->free_slots != 0); GC_ASSERT(page->freelist != NULL); + psweep_debug(1, "[gc] heap_add_freepage(heap:%p, page:%p) from %s\n", heap, page, from_func); + page->free_next = heap->free_pages; heap->free_pages = page; @@ -1726,7 +2075,10 @@ static void heap_unlink_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page) { ccan_list_del(&page->page_node); + GC_ASSERT(heap->total_pages > 0); heap->total_pages--; + GC_ASSERT(heap->total_slots >= page->total_slots); + GC_ASSERT(page->total_slots > 0); heap->total_slots -= page->total_slots; } @@ -1763,11 +2115,12 @@ heap_page_body_free(struct heap_page_body *page_body) } static void -heap_page_free(rb_objspace_t *objspace, struct heap_page *page) +heap_page_free(rb_objspace_t *objspace, struct heap_page *page, bool log) { objspace->heap_pages.freed_pages++; heap_page_body_free(page->body); free(page); + psweep_debug(1, "[gc] heap_page_free heap:%p page:%p\n", page->heap, page); } static void @@ -1783,7 +2136,7 @@ heap_pages_free_unused_pages(rb_objspace_t *objspace) struct heap_page *page = rb_darray_get(objspace->heap_pages.sorted, i); if (heap_page_in_global_empty_pages_pool(objspace, page) && heap_pages_freeable_pages > 0) { - heap_page_free(objspace, page); + heap_page_free(objspace, page, true); heap_pages_freeable_pages--; } else { @@ -1908,6 +2261,8 @@ heap_page_body_allocate(void) return page_body; } +/* Try to "resurrect" an empty page by removing it from the `objspace->empty_pages` list */ +/* NOTE: empty pages can go to any heap */ static struct heap_page * heap_page_resurrect(rb_objspace_t *objspace) { @@ -1920,6 +2275,7 @@ heap_page_resurrect(rb_objspace_t *objspace) objspace->empty_pages_count--; page = objspace->empty_pages; objspace->empty_pages = page->free_next; + page->freelist = NULL; } return page; @@ -1973,8 +2329,9 @@ heap_page_allocate(rb_objspace_t *objspace) return page; } +/* Add either an empty page (objspace->empty_pages) or a newly allocated page to a heap. Thread the freelist and set `heap->free_slots` */ static void -heap_add_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page) +heap_add_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page, bool sweep_lock_taken) { /* Adding to eden heap during incremental sweeping is forbidden */ GC_ASSERT(!heap->sweeping_page); @@ -1994,6 +2351,7 @@ heap_add_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page) page->slot_size = heap->slot_size; page->slot_div_magic = slot_div_magics[heap - heaps]; page->heap = heap; + page->free_next = NULL; memset(&page->wb_unprotected_bits[0], 0, HEAP_PAGE_BITMAP_SIZE); memset(&page->age_bits[0], 0, sizeof(page->age_bits)); @@ -2001,22 +2359,31 @@ heap_add_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page) asan_unlock_freelist(page); page->freelist = NULL; asan_unpoison_memory_region(page->body, HEAP_PAGE_SIZE, false); + int i = 0; for (VALUE p = (VALUE)start; p < start + (slot_count * heap->slot_size); p += heap->slot_size) { - heap_page_add_freeobj(objspace, page, p); + i++; + heap_page_add_freeobj(objspace, page, p, false); } + GC_ASSERT(i == slot_count); asan_lock_freelist(page); page->free_slots = slot_count; heap->total_allocated_pages++; - ccan_list_add_tail(&heap->pages, &page->page_node); + if (!sweep_lock_taken) sweep_lock_lock(&objspace->sweep_lock); + { + ccan_list_add_tail(&heap->pages, &page->page_node); + } + if (!sweep_lock_taken) sweep_lock_unlock(&objspace->sweep_lock); + heap->total_pages++; + GC_ASSERT(page->total_slots == page->free_slots); heap->total_slots += page->total_slots; } static int -heap_page_allocate_and_initialize(rb_objspace_t *objspace, rb_heap_t *heap) +heap_page_allocate_and_initialize(rb_objspace_t *objspace, rb_heap_t *heap, bool sweep_lock_taken) { gc_report(1, objspace, "heap_page_allocate_and_initialize: rb_darray_size(objspace->heap_pages.sorted): %"PRIdSIZE", " "allocatable_bytes: %"PRIdSIZE", heap->total_pages: %"PRIdSIZE"\n", @@ -2026,6 +2393,7 @@ heap_page_allocate_and_initialize(rb_objspace_t *objspace, rb_heap_t *heap) struct heap_page *page = heap_page_resurrect(objspace); if (page == NULL && objspace->heap_pages.allocatable_bytes > 0) { + psweep_debug(1, "[gc] heap_page_allocate_and_initialize: no empty pages, allocating page\n"); page = heap_page_allocate(objspace); allocated = true; @@ -2033,8 +2401,8 @@ heap_page_allocate_and_initialize(rb_objspace_t *objspace, rb_heap_t *heap) } if (page != NULL) { - heap_add_page(objspace, heap, page); - heap_add_freepage(heap, page); + heap_add_page(objspace, heap, page, sweep_lock_taken); + heap_add_freepage(heap, page, "allocate_and_initialize"); if (allocated) { size_t page_bytes = (size_t)page->total_slots * page->slot_size; @@ -2051,21 +2419,25 @@ heap_page_allocate_and_initialize(rb_objspace_t *objspace, rb_heap_t *heap) } static void -heap_page_allocate_and_initialize_force(rb_objspace_t *objspace, rb_heap_t *heap) +heap_page_allocate_and_initialize_force(rb_objspace_t *objspace, rb_heap_t *heap, bool sweep_lock_taken) { size_t prev_allocatable_bytes = objspace->heap_pages.allocatable_bytes; objspace->heap_pages.allocatable_bytes = HEAP_PAGE_SIZE; - heap_page_allocate_and_initialize(objspace, heap); + heap_page_allocate_and_initialize(objspace, heap, sweep_lock_taken); GC_ASSERT(heap->free_pages != NULL); objspace->heap_pages.allocatable_bytes = prev_allocatable_bytes; } +// Run incremental marking and/or sweeping, if in incremental marking or sweeping mode static void gc_continue(rb_objspace_t *objspace, rb_heap_t *heap) { unsigned int lock_lev; bool needs_gc = is_incremental_marking(objspace) || needs_continue_sweeping(objspace, heap); - if (!needs_gc) return; + if (!needs_gc) { + psweep_debug(1, "[gc] gc_continue: !needs_gc\n"); + return; + } gc_enter(objspace, gc_enter_event_continue, &lock_lev); // takes vm barrier, try to avoid @@ -2079,32 +2451,51 @@ gc_continue(rb_objspace_t *objspace, rb_heap_t *heap) if (needs_continue_sweeping(objspace, heap)) { gc_sweep_continue(objspace, heap); } + else { + psweep_debug(-1, "[gc] gc_continue: !needs_continue_sweeping (lazy_sweeping:%d)\n", is_lazy_sweeping(objspace)); + } gc_exit(objspace, gc_enter_event_continue, &lock_lev); } +void wait_for_background_sweeping_to_finish(rb_objspace_t *objspace, bool abort_current_background_sweep, bool exit_sweep_thread, const char *from_fn); + static void heap_prepare(rb_objspace_t *objspace, rb_heap_t *heap) { GC_ASSERT(heap->free_pages == NULL); - if (heap->total_slots < gc_params.heap_init_bytes / heap->slot_size && - heap->sweeping_page == NULL) { - heap_page_allocate_and_initialize_force(objspace, heap); + if (heap->is_finished_sweeping && heap->total_slots < (gc_params.heap_init_bytes / heap->slot_size)) { + heap_page_allocate_and_initialize_force(objspace, heap, false); GC_ASSERT(heap->free_pages != NULL); return; } + else { + sweep_lock_lock(&objspace->sweep_lock); + { + if (heap->total_slots < (gc_params.heap_init_bytes / heap->slot_size) && + heap->sweeping_page == NULL && heap->swept_pages == NULL && !heap->pre_sweeping_page) { + heap_page_allocate_and_initialize_force(objspace, heap, true); + GC_ASSERT(heap->free_pages != NULL); + sweep_lock_unlock(&objspace->sweep_lock); + return; + } + } + sweep_lock_unlock(&objspace->sweep_lock); + } /* Continue incremental marking or lazy sweeping, if in any of those steps. */ gc_continue(objspace, heap); if (heap->free_pages == NULL) { - heap_page_allocate_and_initialize(objspace, heap); + psweep_debug(1, "[gc] heap_prepare: heap->free_pages is NULL after gc_continue\n"); + heap_page_allocate_and_initialize(objspace, heap, false); } /* If we still don't have a free page and not allowed to create a new page, * we should start a new GC cycle. */ if (heap->free_pages == NULL) { + psweep_debug(1, "[gc] heap_prepare: still no heap_>free_pages even after try allocate!\n"); GC_ASSERT(objspace->empty_pages_count == 0); GC_ASSERT(objspace->heap_pages.allocatable_bytes == 0); @@ -2124,7 +2515,7 @@ heap_prepare(rb_objspace_t *objspace, rb_heap_t *heap) /* If we're not incremental marking (e.g. a minor GC) or finished * sweeping and still don't have a free page, then * gc_sweep_finish_heap should allow us to create a new page. */ - if (heap->free_pages == NULL && !heap_page_allocate_and_initialize(objspace, heap)) { + if (heap->free_pages == NULL && !heap_page_allocate_and_initialize(objspace, heap, false)) { if (gc_needs_major_flags == GPR_FLAG_NONE) { rb_bug("cannot create a new page after GC"); } @@ -2137,7 +2528,7 @@ heap_prepare(rb_objspace_t *objspace, rb_heap_t *heap) gc_continue(objspace, heap); if (heap->free_pages == NULL && - !heap_page_allocate_and_initialize(objspace, heap)) { + !heap_page_allocate_and_initialize(objspace, heap, false)) { rb_bug("cannot create a new page after major GC"); } } @@ -2171,6 +2562,7 @@ static inline VALUE newobj_init(VALUE klass, VALUE flags, int wb_protected, rb_objspace_t *objspace, VALUE obj) { GC_ASSERT(BUILTIN_TYPE(obj) == T_NONE); + GC_ASSERT(RVALUE_AGE_GET(obj) == 0); GC_ASSERT((flags & FL_WB_PROTECTED) == 0); RBASIC(obj)->flags = flags; *((VALUE *)&RBASIC(obj)->klass) = klass; @@ -2179,7 +2571,7 @@ newobj_init(VALUE klass, VALUE flags, int wb_protected, rb_objspace_t *objspace, #endif -#if RACTOR_CHECK_MODE +#if RACTOR_CHECK_MODE > 10 void rb_ractor_setup_belonging(VALUE obj); rb_ractor_setup_belonging(obj); #endif @@ -2281,6 +2673,7 @@ ractor_cache_allocate_slot(rb_objspace_t *objspace, rb_ractor_newobj_cache_t *ca } if (RB_LIKELY(p)) { + psweep_debug(2, "[gc] allocate slot: %p from heap:%p page:%p\n", p, &heaps[heap_idx], heap_cache->using_page); VALUE obj = (VALUE)p; rb_asan_unpoison_object(obj, true); heap_cache->freelist = p->next; @@ -2315,8 +2708,8 @@ heap_next_free_page(rb_objspace_t *objspace, rb_heap_t *heap) page = heap->free_pages; heap->free_pages = page->free_next; - - GC_ASSERT(page->free_slots != 0); + psweep_debug(1, "[gc] heap_next_free_page heap:%p free_pages:%p -> %p (free_slots:%d)\n", heap, page, heap->free_pages, page->free_slots); + GC_ASSERT(page->free_slots > 0); asan_unlock_freelist(page); @@ -2451,6 +2844,8 @@ newobj_alloc(rb_objspace_t *objspace, rb_ractor_newobj_cache_t *cache, size_t he ALWAYS_INLINE(static VALUE newobj_slowpath(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_newobj_cache_t *cache, int wb_protected, size_t heap_idx)); +static const char *type_name(int type, VALUE obj); + static inline VALUE newobj_slowpath(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_newobj_cache_t *cache, int wb_protected, size_t heap_idx) { @@ -2466,7 +2861,7 @@ newobj_slowpath(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_new if (rb_memerror_reentered()) { rb_memerror(); } - rb_bug("object allocation during garbage collection phase"); + rb_bug("object allocation during garbage collection phase for klass %s\n", type_name(flags & T_MASK, 0)); } if (ruby_gc_stressful) { @@ -2611,26 +3006,28 @@ rb_gc_impl_pointer_to_heap_p(void *objspace_ptr, const void *ptr) return is_pointer_to_heap(objspace_ptr, ptr); } -#define ZOMBIE_OBJ_KEPT_FLAGS (FL_FINALIZE) void rb_gc_impl_make_zombie(void *objspace_ptr, VALUE obj, void (*dfree)(void *), void *data) { rb_objspace_t *objspace = objspace_ptr; + struct heap_page *page = GET_HEAP_PAGE(obj); struct RZombie *zombie = RZOMBIE(obj); zombie->flags = T_ZOMBIE | (zombie->flags & ZOMBIE_OBJ_KEPT_FLAGS); zombie->dfree = dfree; zombie->data = data; - VALUE prev, next = heap_pages_deferred_final; + VALUE prev, next = (VALUE)RUBY_ATOMIC_PTR_LOAD(heap_pages_deferred_final); + GC_ASSERT(page == GET_HEAP_PAGE(zombie)); do { zombie->next = prev = next; next = RUBY_ATOMIC_VALUE_CAS(heap_pages_deferred_final, prev, obj); } while (next != prev); - - struct heap_page *page = GET_HEAP_PAGE(obj); - page->final_slots++; - page->heap->final_slots_count++; + page->final_slots++; // NOTE: not synchronized, but either background thread or user thread owns page during free +#if RUBY_DEBUG + RUBY_ATOMIC_INC(page->heap->made_zombies); +#endif + RUBY_ATOMIC_SIZE_INC(page->heap->final_slots_count); } typedef int each_obj_callback(void *, void *, size_t, void *); @@ -2754,6 +3151,7 @@ objspace_each_exec(bool protected, struct each_obj_data *each_obj_data) static void objspace_each_objects(rb_objspace_t *objspace, each_obj_callback *callback, void *data, bool protected) { + wait_for_background_sweeping_to_finish(objspace, true, false, "objspace_each_objects"); struct each_obj_data each_obj_data = { .objspace = objspace, .each_obj_callback = callback, @@ -2901,30 +3299,33 @@ run_final(rb_objspace_t *objspace, VALUE zombie, unsigned int lev) return lev; } +void +rb_gc_impl_free_zombie(rb_objspace_t *objspace, VALUE obj) +{ + GC_ASSERT(!is_sweep_thread_p()); + struct heap_page *page = GET_HEAP_PAGE(obj); + GC_ASSERT(RUBY_ATOMIC_VALUE_LOAD(page->heap->final_slots_count) > 0); + RUBY_ATOMIC_SIZE_DEC(page->heap->final_slots_count); + GC_ASSERT(page->final_slots > 0); + page->final_slots--; + RVALUE_AGE_SET_BITMAP(obj, 0); +} + static void finalize_list(rb_objspace_t *objspace, VALUE zombie) { while (zombie) { VALUE next_zombie; - struct heap_page *page; rb_asan_unpoison_object(zombie, false); next_zombie = RZOMBIE(zombie)->next; - page = GET_HEAP_PAGE(zombie); unsigned int lev = RB_GC_VM_LOCK(); lev = run_final(objspace, zombie, lev); { GC_ASSERT(BUILTIN_TYPE(zombie) == T_ZOMBIE); - GC_ASSERT(page->heap->final_slots_count > 0); - GC_ASSERT(page->final_slots > 0); - - page->heap->final_slots_count--; - page->final_slots--; - page->free_slots++; - RVALUE_AGE_SET_BITMAP(zombie, 0); - heap_page_add_freeobj(objspace, page, zombie); - page->heap->total_freed_objects++; + GC_ASSERT(!FL_TEST(zombie, FL_FINALIZE)); + ZOMBIE_SET_NEEDS_FREE_FLAG(zombie); } RB_GC_VM_UNLOCK(lev); @@ -2981,15 +3382,27 @@ gc_abort(void *objspace_ptr) objspace->flags.during_incremental_marking = FALSE; } +#if RUBY_DEBUG + sweep_lock_lock(&objspace->sweep_lock); + GC_ASSERT(!objspace->sweep_rest); + sweep_lock_unlock(&objspace->sweep_lock); +#endif + + wait_for_background_sweeping_to_finish(objspace, true, false, "gc_abort"); + if (is_lazy_sweeping(objspace)) { for (int i = 0; i < HEAP_COUNT; i++) { rb_heap_t *heap = &heaps[i]; - heap->sweeping_page = NULL; + heap->swept_pages = NULL; + + heap->pre_sweeping_page = NULL; + heap->is_finished_sweeping = false; + heap->background_sweep_steps = heap->foreground_sweep_steps; struct heap_page *page = NULL; ccan_list_for_each(&heap->pages, page, page_node) { - page->flags.before_sweep = false; + page->before_sweep = 0; } } } @@ -3043,6 +3456,8 @@ rb_gc_impl_shutdown_call_finalizer_i(st_data_t key, st_data_t val, st_data_t _da return ST_DELETE; } +void rb_gc_stop_background_threads(rb_objspace_t *objspace, const char *from_fn); + void rb_gc_impl_shutdown_call_finalizer(void *objspace_ptr) { @@ -3052,6 +3467,8 @@ rb_gc_impl_shutdown_call_finalizer(void *objspace_ptr) gc_verify_internal_consistency(objspace); #endif + wait_for_background_sweeping_to_finish(objspace, true, false, "shutdown_call_finalizer"); + /* prohibit incremental GC */ objspace->flags.dont_incremental = 1; @@ -3066,7 +3483,6 @@ rb_gc_impl_shutdown_call_finalizer(void *objspace_ptr) st_foreach(finalizer_table, rb_gc_impl_shutdown_call_finalizer_i, 0); } - /* run finalizers */ finalize_deferred(objspace); GC_ASSERT(heap_pages_deferred_final == 0); @@ -3481,8 +3897,11 @@ struct gc_sweep_context { int final_slots; int freed_slots; int empty_slots; + int zombie_slots; /* pre-existing zombies not yet ready to free */ }; +bool rb_gc_obj_needs_cleanup_p(VALUE obj); + static inline void gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bitset, struct gc_sweep_context *ctx) { @@ -3497,7 +3916,7 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit if (bitset & 1) { switch (BUILTIN_TYPE(vp)) { case T_MOVED: - if (objspace->flags.during_compacting) { + if (RB_UNLIKELY(objspace->flags.during_compacting)) { /* The sweep cursor shouldn't have made it to any * T_MOVED slots while the compact flag is enabled. * The sweep cursor and compact cursor move in @@ -3507,23 +3926,31 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit } gc_report(3, objspace, "page_sweep: %s is added to freelist\n", rb_obj_info(vp)); ctx->empty_slots++; - heap_page_add_freeobj(objspace, sweep_page, vp); + heap_page_add_freeobj(objspace, sweep_page, vp, false); break; case T_ZOMBIE: - /* already counted */ + if (ZOMBIE_NEEDS_FREE_P(vp)) { + goto free_object; + } + /* already counted as final slot */ + ctx->zombie_slots++; break; case T_NONE: ctx->empty_slots++; /* already freed */ break; default: + free_object: + psweep_debug(0, "[gc] gc_sweep_plane: heap:%p (%ld) freeing obj:%p (%s)\n", heap, heap - heaps, (void*)vp, rb_obj_info(vp)); #if RGENGC_CHECK_MODE if (!is_full_marking(objspace)) { - if (RVALUE_OLD_P(objspace, vp)) rb_bug("page_sweep: %p - old while minor GC.", (void *)p); - if (RVALUE_REMEMBERED(objspace, vp)) rb_bug("page_sweep: %p - remembered.", (void *)p); + if (RVALUE_OLD_P(objspace, vp)) rb_bug("page_sweep: %p - old while minor GC.", (void *)vp); + if (RVALUE_REMEMBERED(objspace, vp)) rb_bug("page_sweep: %p - remembered.", (void *)vp); } #endif + if (RVALUE_WB_UNPROTECTED(objspace, vp)) CLEAR_IN_BITMAP(GET_HEAP_WB_UNPROTECTED_BITS(vp), vp); + #if RGENGC_CHECK_MODE #define CHECK(x) if (x(objspace, vp) != FALSE) rb_bug("obj_free: " #x "(%s) != FALSE", rb_obj_info(vp)) CHECK(RVALUE_WB_UNPROTECTED); @@ -3538,21 +3965,23 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit rb_gc_event_hook(vp, RUBY_INTERNAL_EVENT_FREEOBJ); } - (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)p, slot_size); - heap_page_add_freeobj(objspace, sweep_page, vp); gc_report(3, objspace, "page_sweep: %s (fast path) added to freelist\n", rb_obj_info(vp)); + RVALUE_AGE_SET_BITMAP(vp, 0); + heap_page_add_freeobj(objspace, sweep_page, vp, false); + (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)vp, slot_size); ctx->freed_slots++; } else { - gc_report(2, objspace, "page_sweep: free %p\n", (void *)p); + gc_report(2, objspace, "page_sweep: free %p\n", (void *)vp); rb_gc_event_hook(vp, RUBY_INTERNAL_EVENT_FREEOBJ); rb_gc_obj_free_vm_weak_references(vp); if (rb_gc_obj_free(objspace, vp)) { - (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)p, slot_size); - heap_page_add_freeobj(objspace, sweep_page, vp); gc_report(3, objspace, "page_sweep: %s is added to freelist\n", rb_obj_info(vp)); + RVALUE_AGE_SET_BITMAP(vp, 0); + heap_page_add_freeobj(objspace, sweep_page, vp, false); + (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)vp, slot_size); ctx->freed_slots++; } else { @@ -3562,11 +3991,139 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit break; } } + else { + GC_ASSERT(RVALUE_MARKED(objspace, vp)); + } p += slot_size; bitset >>= 1; } while (bitset); } +void +wait_for_background_sweeping_to_finish(rb_objspace_t *objspace, bool abort_current_background_sweep, bool exit_sweep_thread, const char *from_fn) +{ + if (!objspace->sweep_thread) { + return; + } + sweep_lock_lock(&objspace->sweep_lock); + if (abort_current_background_sweep) { + objspace->background_sweep_abort = true; + objspace->background_sweep_restart_heaps = false; + objspace->sweep_thread_sweep_requested = false; + } + while (objspace->sweep_thread_running && objspace->sweep_thread_sweeping) { + psweep_debug(1, "[gc] Waiting for sweep thread to finish (abort_sweep:%d, from_fn:%s)\n", abort_current_background_sweep, from_fn); + rb_native_cond_signal(&objspace->sweep_cond); + sweep_lock_set_unlocked(); + rb_native_cond_wait(&objspace->sweep_cond, &objspace->sweep_lock); + sweep_lock_set_locked(); + } + if (exit_sweep_thread) { + GC_ASSERT(abort_current_background_sweep); + objspace->sweep_thread_running = false; + while (!objspace->sweep_thread_sweep_exited) { + rb_native_cond_signal(&objspace->sweep_cond); + sweep_lock_set_unlocked(); + rb_native_cond_wait(&objspace->sweep_cond, &objspace->sweep_lock); + sweep_lock_set_locked(); + } + pthread_join(objspace->sweep_thread, NULL); + psweep_debug(0, "Sweep thread joined from %s\n", from_fn); + GET_VM()->gc.sweep_thread = 0; + objspace->sweep_thread = 0; + } + else { + psweep_debug(0, "Waited for sweep thread to finish sweep from %s\n", from_fn); + } + objspace->background_sweep_abort = false; + objspace->background_sweep_mode = false; + sweep_lock_unlock(&objspace->sweep_lock); +} + +// Free the object in a Ruby thread. Return whether or not we put the slot back on the page's freelist. +static bool +deferred_free(rb_objspace_t *objspace, VALUE obj) +{ + ASSERT_vm_locking_with_barrier(); + bool result; +#ifdef PSWEEP_DEBUG + MAYBE_UNUSED(const char *obj_info) = rb_obj_info(obj); +#endif + bool freed_weakrefs = rb_gc_obj_free_vm_weak_references(obj); + (void)freed_weakrefs; + GC_ASSERT(freed_weakrefs); + if (rb_gc_obj_free(objspace, obj)) { + struct heap_page *page = GET_HEAP_PAGE(obj); + psweep_debug(1, "[gc] deferred free: page(%p) obj(%p) %s (success)\n", page, (void*)obj, obj_info); + RVALUE_AGE_SET_BITMAP(obj, 0); + heap_page_add_freeobj(objspace, page, obj, false); + (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)obj, page->slot_size); + result = true; + } + else { +#if RUBY_DEBUG + if (!(BUILTIN_TYPE(obj) == T_ZOMBIE && !FL_TEST(obj, FL_FREEZE))) { + rb_bug("should be unfreeable zombie"); + } +#endif + result = false; + MAYBE_UNUSED(struct heap_page *page) = GET_HEAP_PAGE(obj); + psweep_debug(1, "[gc] deferred sweep: page(%p) obj(%p) %s (zombie)\n", page, (void*)obj, obj_info); + } + return result; +} + +// Clear bits for the page that was swept by the background thread. +static inline void +gc_post_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *sweep_page) +{ + GC_ASSERT(sweep_page->heap == heap); + + bits_t *bits; + + gc_report(2, objspace, "post_page_sweep: start.\n"); + +#if RGENGC_CHECK_MODE + if (!objspace->flags.immediate_sweep) { + GC_ASSERT(RUBY_ATOMIC_LOAD(sweep_page->before_sweep)); + } +#endif + rbimpl_atomic_store(&sweep_page->before_sweep, 0, RBIMPL_ATOMIC_RELEASE); + + bits = sweep_page->mark_bits; + + int total_slots = sweep_page->total_slots; + int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH); + + int out_of_range_bits = total_slots % BITS_BITLENGTH; + if (out_of_range_bits != 0) { + bits[bitmap_plane_count - 1] |= ~(((bits_t)1 << out_of_range_bits) - 1); + } + + // Clear wb_unprotected and age bits for all unmarked slots + { + bits_t *wb_unprotected_bits = sweep_page->wb_unprotected_bits; + bits_t *age_bits = sweep_page->age_bits; + for (int i = 0; i < bitmap_plane_count; i++) { + bits_t unmarked = ~bits[i]; + wb_unprotected_bits[i] &= ~unmarked; + age_bits[i * 2] &= ~unmarked; + age_bits[i * 2 + 1] &= ~unmarked; + } + } + + if (!heap->compact_cursor) { + gc_setup_mark_bits(sweep_page); + } + + if (RUBY_ATOMIC_PTR_LOAD(heap_pages_deferred_final) && !finalizing) { + gc_finalize_deferred_register(objspace); + } + + gc_report(2, objspace, "post_page_sweep: end.\n"); +} + +// Sweep a page by the Ruby thread (synchronous freeing). static inline void gc_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct gc_sweep_context *ctx) { @@ -3578,12 +4135,14 @@ gc_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct gc_sweep_context gc_report(2, objspace, "page_sweep: start.\n"); + psweep_debug(1, "[gc] gc_sweep_page: heap:%p (%ld) page:%p\n", heap, heap - heaps, sweep_page); + #if RGENGC_CHECK_MODE if (!objspace->flags.immediate_sweep) { - GC_ASSERT(sweep_page->flags.before_sweep == TRUE); + GC_ASSERT(RUBY_ATOMIC_LOAD(sweep_page->before_sweep)); } #endif - sweep_page->flags.before_sweep = FALSE; + rbimpl_atomic_store(&sweep_page->before_sweep, 0, RBIMPL_ATOMIC_RELEASE); sweep_page->free_slots = 0; p = (uintptr_t)sweep_page->start; @@ -3597,6 +4156,14 @@ gc_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct gc_sweep_context bits[bitmap_plane_count - 1] |= ~(((bits_t)1 << out_of_range_bits) - 1); } + for (int i = 0; i < bitmap_plane_count; i++) { + bitset = ~bits[i]; + if (bitset) { + gc_sweep_plane(objspace, heap, p, bitset, ctx); + } + p += BITS_BITLENGTH * slot_size; + } + // Clear wb_unprotected and age bits for all unmarked slots { bits_t *wb_unprotected_bits = sweep_page->wb_unprotected_bits; @@ -3609,13 +4176,26 @@ gc_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct gc_sweep_context } } - for (int i = 0; i < bitmap_plane_count; i++) { - bitset = ~bits[i]; - if (bitset) { - gc_sweep_plane(objspace, heap, p, bitset, ctx); +#if RGENGC_CHECK_MODE + { + /* Assert that all unmarked slots with live objects were either freed or made into zombies. */ + int unmarked_slots = 0; + for (int i = 0; i < bitmap_plane_count; i++) { + bits_t unmarked = ~bits[i]; + unmarked_slots += (int)popcount_bits(unmarked); + } + + int freed_or_zombie = ctx->freed_slots + ctx->final_slots; + int unmarked_live = unmarked_slots - ctx->empty_slots - ctx->zombie_slots; + if (freed_or_zombie != unmarked_live) { + rb_bug("gc_sweep_page: unmarked live slot count mismatch: " + "unmarked_slots=%d - empty_slots=%d - zombie_slots=%d = %d unmarked live, " + "but freed_slots=%d + final_slots=%d = %d", + unmarked_slots, ctx->empty_slots, ctx->zombie_slots, unmarked_live, + ctx->freed_slots, ctx->final_slots, freed_or_zombie); } - p += BITS_BITLENGTH * slot_size; } +#endif if (!heap->compact_cursor) { gc_setup_mark_bits(sweep_page); @@ -3633,10 +4213,10 @@ gc_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct gc_sweep_context sweep_page->total_slots, ctx->freed_slots, ctx->empty_slots, ctx->final_slots); - sweep_page->free_slots += ctx->freed_slots + ctx->empty_slots; sweep_page->heap->total_freed_objects += ctx->freed_slots; + sweep_page->free_slots = ctx->freed_slots + ctx->empty_slots; - if (heap_pages_deferred_final && !finalizing) { + if (RUBY_ATOMIC_PTR_LOAD(heap_pages_deferred_final) && !finalizing) { gc_finalize_deferred_register(objspace); } @@ -3712,17 +4292,503 @@ heap_page_freelist_append(struct heap_page *page, struct free_slot *freelist) } } +static inline void +sweep_in_ruby_thread(rb_objspace_t *objspace, struct heap_page *page, VALUE obj) +{ + page->pre_deferred_free_slots += 1; + psweep_debug(1, "[sweep] register sweep later: page(%p), obj(%p) %s\n", (void*)page, (void*)obj, rb_obj_info(obj)); + GC_ASSERT(BUILTIN_TYPE(obj) != T_NONE); + MARK_IN_BITMAP(page->deferred_free_bits, obj); +} + +static inline bool +zombie_needs_deferred_free(VALUE zombie) +{ + return ZOMBIE_NEEDS_FREE_P(zombie); +} + +#if RGENGC_CHECK_MODE +static void +debug_free_check(rb_objspace_t *objspace, VALUE vp) +{ + if (!is_full_marking(objspace)) { + if (RVALUE_OLD_P(objspace, vp)) rb_bug("page_sweep: %p - old while minor GC.", (void *)vp); + if (RVALUE_REMEMBERED(objspace, vp)) rb_bug("page_sweep: %p - remembered.", (void *)vp); + } +#define CHECK(x) if (x(objspace, vp) != FALSE) rb_bug("obj_free: " #x "(%s) != FALSE", rb_obj_info(vp)) + CHECK(RVALUE_MARKED); + CHECK(RVALUE_MARKING); + CHECK(RVALUE_UNCOLLECTIBLE); +#undef CHECK +} +#else +#define debug_free_check(...) (void)0 +#endif + +static inline void +gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page, uintptr_t p, bits_t bitset, short slot_size) +{ + unsigned short freed = 0; + unsigned short empties = 0; + unsigned short finals = 0; + unsigned short zombies = 0; + do { + VALUE vp = (VALUE)p; + GC_ASSERT(GET_HEAP_PAGE(vp) == page); + + rb_asan_unpoison_object(vp, false); + if (bitset & 1) { + GC_ASSERT(!RVALUE_MARKED(objspace, vp)); + switch (BUILTIN_TYPE(vp)) { + case T_MOVED: { + empties++; + heap_page_add_freeobj(objspace, page, vp, true); + (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)vp, page->slot_size); + break; + } + case T_NONE: + empties++; // already in freelist + break; + case T_ZOMBIE: + if (zombie_needs_deferred_free(vp)) { + sweep_in_ruby_thread(objspace, page, vp); + } + else { + // already counted as final_slot when made into a zombie + zombies++; + } + break; + case T_DATA: { + debug_free_check(objspace, vp); + void *data = RTYPEDDATA_P(vp) ? RTYPEDDATA_GET_DATA(vp) : DATA_PTR(vp); + if (!data) { + goto free; + } + // NOTE: this repeats code found in `rb_data_free`. This is just for testing purposes. + bool free_immediately = false; + void (*dfree)(void *); + if (RTYPEDDATA_P(vp)) { + free_immediately = (RTYPEDDATA_TYPE(vp)->flags & RUBY_TYPED_FREE_IMMEDIATELY) != 0 && (RTYPEDDATA_TYPE(vp)->flags & RUBY_TYPED_CONCURRENT_FREE_SAFE) != 0; + dfree = RTYPEDDATA_TYPE(vp)->function.dfree; + } + else { + dfree = RDATA(vp)->dfree; + } + if (!dfree || dfree == RUBY_DEFAULT_FREE || free_immediately) { + goto free; + } + else { + sweep_in_ruby_thread(objspace, page, vp); + break; + } + break; + } + case T_IMEMO: { + debug_free_check(objspace, vp); + switch (imemo_type(vp)) { + case imemo_callcache: + case imemo_constcache: + case imemo_cref: + case imemo_env: + case imemo_ifunc: + case imemo_memo: + case imemo_svar: + case imemo_throw_data: + case imemo_tmpbuf: + case imemo_fields: + goto free; + case imemo_callinfo: + case imemo_iseq: // calls rb_yjit_iseq_free which is not concurrency safe + case imemo_ment: + // blacklisted due to vm weak references + sweep_in_ruby_thread(objspace, page, vp); + break; + default: + rb_bug("Unknown imemo type: %d\n", imemo_type(vp)); + } + break; + } + case T_COMPLEX: + case T_RATIONAL: + case T_FLOAT: + case T_BIGNUM: + case T_OBJECT: + case T_STRING: + case T_SYMBOL: + case T_ARRAY: + case T_HASH: + case T_STRUCT: + case T_MATCH: + case T_REGEXP: + case T_FILE: { + debug_free_check(objspace, vp); + goto free; + } + case T_CLASS: + case T_MODULE: + case T_ICLASS: + debug_free_check(objspace, vp); + if (!rb_gc_obj_needs_cleanup_p(vp)) { + heap_page_add_freeobj(objspace, page, vp, true); + psweep_debug(2, "[sweep] freed: page(%p), obj(%p)\n", (void*)page, (void*)vp); + (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)vp, page->slot_size); + freed++; + } + else { + sweep_in_ruby_thread(objspace, page, vp); + } + break; + free: { + debug_free_check(objspace, vp); + if (RB_LIKELY(rb_gc_obj_free_concurrency_safe_vm_weak_references(vp))) { + bool can_put_back_on_freelist = rb_gc_obj_free(objspace, vp); + if (can_put_back_on_freelist) { + heap_page_add_freeobj(objspace, page, vp, true); + freed++; + psweep_debug(2, "[sweep] freed: page(%p), obj(%p)\n", (void*)page, (void*)vp); + (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)vp, page->slot_size); + } + else { + RUBY_ASSERT(BUILTIN_TYPE(vp) == T_ZOMBIE); + psweep_debug(2, "[sweep] zombie: page(%p), obj(%p)\n", (void*)page, (void*)vp); + finals++; + } + } + else { + GC_ASSERT(BUILTIN_TYPE(vp) != T_NONE); + sweep_in_ruby_thread(objspace, page, vp); + } + break; + } + default: + rb_bug("unexpected type: %d\n", BUILTIN_TYPE(vp)); + } + } + else { + GC_ASSERT(RVALUE_MARKED(objspace, vp)); + } + + p += slot_size; + bitset >>= 1; + } while (bitset); + + page->pre_freed_slots += freed; + page->pre_empty_slots += empties; + page->pre_final_slots += finals; + page->pre_zombie_slots += zombies; +} + +static void +gc_pre_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page) +{ + uintptr_t p = (uintptr_t)page->start; + bits_t *bits = page->mark_bits; + bits_t bitset; + short slot_size = page->slot_size; + int total_slots = page->total_slots; + psweep_debug(1, "[sweep] gc_pre_sweep_page(heap:%p page:%p) start\n", heap, page); + GC_ASSERT(page->heap == heap); + page->pre_deferred_free_slots = 0; + memset(page->deferred_free_bits, 0, sizeof(page->deferred_free_bits)); + page->pre_zombie_slots = 0; + page->pre_freed_malloc_bytes = 0; + current_sweep_thread_page = page; + + int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH); + int out_of_range_bits = total_slots % BITS_BITLENGTH; + + if (out_of_range_bits != 0) { + bits[bitmap_plane_count - 1] |= ~(((bits_t)1 << out_of_range_bits) - 1); + } + + for (int i = 0; i < bitmap_plane_count; i++) { + bitset = ~bits[i]; + if (bitset) { + gc_pre_sweep_plane(objspace, heap, page, p, bitset, slot_size); + } + p += BITS_BITLENGTH * slot_size; + } + objspace->profile.pages_swept_by_sweep_thread++; + if (page->pre_deferred_free_slots > 0) { + objspace->profile.pages_swept_by_sweep_thread_had_deferred_free_objects++; + } + +#if RGENGC_CHECK_MODE + { + /* Assert that all unmarked slots with live objects were either freed, made into + * zombies, or deferred to the Ruby thread. */ + int unmarked_slots = 0; + for (int i = 0; i < bitmap_plane_count; i++) { + bits_t unmarked = ~bits[i]; + unmarked_slots += (int)popcount_bits(unmarked); + } + + int freed_or_zombie = page->pre_freed_slots + page->pre_final_slots + page->pre_deferred_free_slots; + int unmarked_live = unmarked_slots - page->pre_empty_slots - page->pre_zombie_slots; + if (freed_or_zombie != unmarked_live) { + rb_bug("gc_pre_sweep_page: unmarked live slot count mismatch: " + "unmarked_slots=%d - empty_slots=%d - zombie_slots=%d = %d unmarked live, " + "but freed_slots=%d + final_slots=%d + deferred_free_slots=%d = %d", + unmarked_slots, page->pre_empty_slots, page->pre_zombie_slots, unmarked_live, + page->pre_freed_slots, page->pre_final_slots, page->pre_deferred_free_slots, freed_or_zombie); + } + } +#endif + +#if USE_MALLOC_INCREASE_LOCAL + malloc_increase_local_flush(objspace); +#endif + current_sweep_thread_page = NULL; + + psweep_debug(1, "[sweep] gc_pre_sweep_page(heap:%p page:%p) done, deferred free:%d\n", heap, page, page->pre_deferred_free_slots); +} + +static inline bool +done_worker_incremental_sweep_steps_p(rb_objspace_t *objspace, rb_heap_t *heap) +{ + if (rbimpl_atomic_load(&heap->foreground_sweep_steps, RBIMPL_ATOMIC_ACQUIRE) != heap->background_sweep_steps) { + GC_ASSERT(ATOMIC_LOAD_RELAXED(heap->foreground_sweep_steps) > heap->background_sweep_steps); + return true; + } + return false; +} + +static bool +bitmap_is_all_zero(bits_t *bits, size_t count) +{ + for (size_t i = 0; i < count; i++) { + if (bits[i] != 0) return false; + } + return true; +} + +static void +move_to_empty_pages(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page) +{ + GC_ASSERT(bitmap_is_all_zero(page->mark_bits, HEAP_PAGE_BITMAP_LIMIT)); + GC_ASSERT(bitmap_is_all_zero(page->uncollectible_bits, HEAP_PAGE_BITMAP_LIMIT)); + GC_ASSERT(bitmap_is_all_zero(page->wb_unprotected_bits, HEAP_PAGE_BITMAP_LIMIT)); + GC_ASSERT(bitmap_is_all_zero(page->marking_bits, HEAP_PAGE_BITMAP_LIMIT)); + GC_ASSERT(bitmap_is_all_zero(page->remembered_bits, HEAP_PAGE_BITMAP_LIMIT)); + GC_ASSERT(bitmap_is_all_zero(page->deferred_free_bits, HEAP_PAGE_BITMAP_LIMIT)); + GC_ASSERT(bitmap_is_all_zero(page->age_bits, HEAP_PAGE_BITMAP_LIMIT * RVALUE_AGE_BIT_COUNT)); + // NOTE: pinned bits can still be set, but it's okay because they are cleared when compaction starts + + heap_unlink_page(objspace, heap, page); + + page->start = 0; + page->total_slots = 0; + page->slot_size = 0; + page->heap = NULL; + page->free_slots = 0; + + asan_unlock_freelist(page); + page->freelist = NULL; + asan_lock_freelist(page); + + asan_poison_memory_region(page->body, HEAP_PAGE_SIZE); + + objspace->empty_pages_count++; + page->free_next = objspace->empty_pages; + objspace->empty_pages = page; +} + +static void +clear_pre_sweep_fields(struct heap_page *page) +{ + page->pre_freed_slots = 0; + page->pre_deferred_free_slots = 0; + memset(page->deferred_free_bits, 0, sizeof(page->deferred_free_bits)); + page->pre_empty_slots = 0; + page->pre_final_slots = 0; + page->pre_zombie_slots = 0; + page->pre_freed_malloc_bytes = 0; +} + +// Perform incremental (lazy) sweep on a heap by the background sweep thread. +static void +gc_sweep_step_worker(rb_objspace_t *objspace, rb_heap_t *heap) +{ + // sweep_lock is acquired + // + // We're finished either when they are no pages left to pre-sweep, OR: + // 1) When we're not in `sweep_rest` or `background_mode`, if we've encountered a change in `heap->foreground_sweep_steps` + GC_ASSERT(heap->background_sweep_steps <= ATOMIC_LOAD_RELAXED(heap->foreground_sweep_steps)); + if (heap->done_background_sweep) { + psweep_debug(-2, "[sweep] gc_sweep_step_worker: heap:%p (%ld) - done (early return)\n", heap, heap - heaps); + return; + } + else if (heap->skip_sweep_continue) { + psweep_debug(-2, "[sweep] gc_sweep_step_worker: heap:%p (%ld) - skip_continue (early return)\n", heap, heap - heaps); + heap->skip_sweep_continue = false; + return; + } + while (1) { + struct heap_page *sweep_page = heap->sweeping_page; + if (!sweep_page) { + GC_ASSERT(!heap->done_background_sweep); + GC_ASSERT(objspace->heaps_done_background_sweep < HEAP_COUNT); + heap->done_background_sweep = true; + objspace->heaps_done_background_sweep++; + psweep_debug(-2, "[sweep] gc_sweep_step_worker: heap:%p (%ld) - !sweeping_page\n", heap, heap - heaps); + break; + } + sweep_page->free_next = NULL; + struct heap_page *next = ccan_list_next(&heap->pages, sweep_page, page_node); + + if (!next) { + GC_ASSERT(!heap->done_background_sweep); + GC_ASSERT(objspace->heaps_done_background_sweep < HEAP_COUNT); + heap->done_background_sweep = true; + objspace->heaps_done_background_sweep++; + psweep_debug(-2, "[sweep] gc_sweep_step_worker: heap:%p (%ld) - !next\n", heap, heap - heaps); + // Let Ruby thread deal with last page of the heap. + break; + } + + heap->sweeping_page = next; + heap->pre_sweeping_page = sweep_page; + + sweep_lock_unlock(&objspace->sweep_lock); + + gc_pre_sweep_page(objspace, heap, sweep_page); + + sweep_lock_lock(&objspace->sweep_lock); + heap->pre_sweeping_page = NULL; + sweep_page->free_next = NULL; + + int pre_freed_slots = sweep_page->pre_freed_slots; + int pre_empty_slots = sweep_page->pre_empty_slots; + int free_slots = pre_freed_slots + pre_empty_slots; + +#if PSWEEP_LOCK_STATS > 0 + instrumented_lock_acquire(&heap->swept_pages_lock, &swept_pages_lock_stats); +#else + rb_native_mutex_lock(&heap->swept_pages_lock); +#endif + { + if (heap->swept_pages) { + // NOTE: heap->swept_pages needs to be in swept order for gc_sweep_step to work properly. + // TODO: Change to LIFO to get better shared memory cache benefits across threads (L2/L3) + struct heap_page *latest = heap->latest_swept_page; + GC_ASSERT(latest); + latest->free_next = sweep_page; + } + else { + heap->swept_pages = sweep_page; + } + heap->latest_swept_page = sweep_page; + } + rb_native_mutex_unlock(&heap->swept_pages_lock); + + psweep_debug(-2, "[sweep] gc_sweep_step_worker: heap:%p (%ld) - swept page:%p\n", heap, heap - heaps, sweep_page); + + if (!objspace->background_sweep_mode) { + if (!objspace->sweep_rest && done_worker_incremental_sweep_steps_p(objspace, heap)) { + rb_native_cond_broadcast(&heap->sweep_page_cond); + psweep_debug(-2, "[sweep] (fg) gc_sweep_step_worker: done incremental step heap:%p (%ld)\n", heap, heap - heaps); + heap->background_sweep_steps = ATOMIC_LOAD_RELAXED(heap->foreground_sweep_steps); + break; + } + } + else { + heap->pre_swept_slots_deferred += free_slots; + if (RB_UNLIKELY(objspace->background_sweep_abort)) { + psweep_debug(-2, "[sweep] (bg) gc_sweep_step_worker: break early heap:%p (%ld) (abort)\n", heap, heap - heaps); + break; + } + else if (objspace->background_sweep_restart_heaps) { + psweep_debug(-2, "[sweep] (bg) gc_sweep_step_worker: break early heap:%p (%ld) (restart)\n", heap, heap - heaps); + break; + } + } + // notify of newly swept page in case Ruby thread is waiting on us + rb_native_cond_broadcast(&heap->sweep_page_cond); + } + // sweep_lock is acquired +} + +static void * +gc_sweep_thread_func(void *ptr) +{ + rb_objspace_t *objspace = ptr; + + psweep_debug(1, "[sweep] sweep_thread start\n"); + sweep_lock_lock(&objspace->sweep_lock); + objspace->sweep_thread_sweep_exited = false; + + while (objspace->sweep_thread_running) { + while (!objspace->sweep_thread_sweep_requested && objspace->sweep_thread_running) { + psweep_debug(1, "[sweep] sweep_thread wait\n"); + objspace->sweep_thread_waiting_request = true; + sweep_lock_set_unlocked(); + rb_native_cond_wait(&objspace->sweep_cond, &objspace->sweep_lock); + sweep_lock_set_locked(); + objspace->sweep_thread_waiting_request = false; + psweep_debug(1, "[sweep] sweep_thread wake\n"); // requested or signalled to exit + } + if (!objspace->sweep_thread_running) { + break; + } + + objspace->sweep_thread_sweep_requested = false; + objspace->sweep_thread_sweeping = true; + + restart_heaps: + for (int i = 0; i < HEAP_COUNT; i++) { + rb_heap_t *heap = &heaps[i]; + if (RB_UNLIKELY(objspace->background_sweep_mode && objspace->background_sweep_abort)) { + psweep_debug(-2, "[sweep] abort: break before sweeping heap:%p (%d)\n", heap, i); + break; + } + if (objspace->background_sweep_mode && objspace->background_sweep_restart_heaps) { + objspace->background_sweep_restart_heaps = false; + psweep_debug(-2, "[sweep] restart heaps from 0 (at %d)\n", i); + goto restart_heaps; + } + psweep_debug(-2, "[sweep] sweep heap:%p (%d)\n", heap, i); + gc_sweep_step_worker(objspace, heap); + } + psweep_debug(1, "[sweep] /sweep_heaps\n"); + + objspace->sweep_thread_sweeping = false; + rb_native_cond_broadcast(&objspace->sweep_cond); + } + psweep_debug(-5, "[sweep] sweep_thread exit\n"); + objspace->sweep_thread_sweep_requested = false; + objspace->sweep_thread_sweep_exited = true; + rb_native_cond_broadcast(&objspace->sweep_cond); + sweep_lock_unlock(&objspace->sweep_lock); + + return NULL; +} + static void gc_sweep_start_heap(rb_objspace_t *objspace, rb_heap_t *heap) { + // Background thread is not sweeping right now heap->sweeping_page = ccan_list_top(&heap->pages, struct heap_page, page_node); heap->free_pages = NULL; + heap->swept_pages = NULL; heap->pooled_pages = NULL; - if (!objspace->flags.immediate_sweep) { - struct heap_page *page = NULL; + heap->latest_swept_page = NULL; + heap->pre_swept_slots_deferred = 0; +#if RUBY_DEBUG + heap->made_zombies = 0; +#endif + heap->pre_sweeping_page = NULL; + heap->background_sweep_steps = heap->foreground_sweep_steps; + heap->is_finished_sweeping = false; + heap->done_background_sweep = false; + heap->skip_sweep_continue = false; + + struct heap_page *page = NULL; + + if (!objspace->flags.immediate_sweep) { ccan_list_for_each(&heap->pages, page, page_node) { - page->flags.before_sweep = TRUE; + page->before_sweep = 1; + GC_ASSERT(page->pre_deferred_free_slots == 0); } } } @@ -3767,8 +4833,16 @@ static void gc_sweep_start(rb_objspace_t *objspace) { gc_mode_transition(objspace, gc_mode_sweeping); + objspace->during_lazy_sweeping = TRUE; objspace->rincgc.pooled_slots = 0; +// Background sweeping cannot be happening +#if VM_CHECK_MODE > 0 + sweep_lock_lock(&objspace->sweep_lock); + GC_ASSERT(!objspace->sweep_thread_sweeping && !objspace->sweep_thread_sweep_requested); + sweep_lock_unlock(&objspace->sweep_lock); +#endif + #if GC_CAN_COMPILE_COMPACTION if (objspace->flags.during_compacting) { gc_sort_heap_by_compare_func( @@ -3778,6 +4852,8 @@ gc_sweep_start(rb_objspace_t *objspace) } #endif + objspace->heaps_done_background_sweep = 0; + for (int i = 0; i < HEAP_COUNT; i++) { rb_heap_t *heap = &heaps[i]; gc_sweep_start_heap(objspace, heap); @@ -3791,6 +4867,22 @@ gc_sweep_start(rb_objspace_t *objspace) } rb_gc_ractor_newobj_cache_foreach(gc_ractor_newobj_cache_clear, NULL); + + psweep_debug(1, "[gc] gc_sweep_start\n"); + if (!objspace->flags.during_compacting && !(objspace->hook_events & RUBY_INTERNAL_EVENT_FREEOBJ)) { + rbimpl_atomic_store(&objspace->use_background_sweep_thread, true, RBIMPL_ATOMIC_RELEASE); + psweep_debug(-1, "[gc] gc_sweep_start: requesting sweep thread\n"); + sweep_lock_lock(&objspace->sweep_lock); + { + objspace->sweep_thread_sweep_requested = true; + rb_native_cond_broadcast(&objspace->sweep_cond); + } + sweep_lock_unlock(&objspace->sweep_lock); + } + else { + rbimpl_atomic_store(&objspace->use_background_sweep_thread, false, RBIMPL_ATOMIC_RELEASE); + psweep_debug(-1, "[gc] gc_sweep_start: not using background sweep thread\n"); + } } static void @@ -3802,6 +4894,20 @@ gc_sweep_finish_heap(rb_objspace_t *objspace, rb_heap_t *heap) size_t init_slots = gc_params.heap_init_bytes / heap->slot_size; size_t min_free_slots = (size_t)(MAX(total_slots, init_slots) * gc_params.heap_free_slots_min_ratio); + psweep_debug(-1, "[gc] gc_sweep_finish heap:%p (%ld)\n", heap, heap - heaps); + +#if RUBY_DEBUG + if (!objspace->flags.during_compacting) { + objspace->have_swept_slots += swept_slots; + objspace->have_swept_slots += heap->made_zombies; + objspace->will_be_swept_slots -= heap->zombie_slots; + } +#endif + + GC_ASSERT(heap->background_sweep_steps <= ATOMIC_LOAD_RELAXED(heap->foreground_sweep_steps)); + GC_ASSERT(!heap->is_finished_sweeping); + heap->is_finished_sweeping = true; + if (swept_slots < min_free_slots && /* The heap is a growth heap if it freed more slots than had empty slots. */ ((heap->empty_slots == 0 && total_slots > 0) || heap->freed_slots > heap->empty_slots)) { @@ -3812,8 +4918,8 @@ gc_sweep_finish_heap(rb_objspace_t *objspace, rb_heap_t *heap) struct heap_page *resurrected_page; while (swept_slots < min_free_slots && (resurrected_page = heap_page_resurrect(objspace))) { - heap_add_page(objspace, heap, resurrected_page); - heap_add_freepage(heap, resurrected_page); + heap_add_page(objspace, heap, resurrected_page, false); + heap_add_freepage(heap, resurrected_page, "gc_sweep_finish_heap"); swept_slots += resurrected_page->free_slots; } @@ -3839,6 +4945,27 @@ static void gc_sweep_finish(rb_objspace_t *objspace) { gc_report(1, objspace, "gc_sweep_finish\n"); + psweep_debug(-1, "[gc] gc_sweep_finish\n"); + + rbimpl_atomic_store(&objspace->use_background_sweep_thread, false, RBIMPL_ATOMIC_RELEASE); + +#if RUBY_DEBUG + // When calling GC.start, if in the middle of a non-full mark it will be set as full mark in gc_rest() so the numbers + // will be off. + if (!objspace->flags.was_compacting && !objspace->sweep_rest && gc_config_full_mark_val) { + if (objspace->will_be_swept_slots != objspace->have_swept_slots) { + fprintf(stderr, "Expecting to free %lu slots, freed %lu slots (major:%d)\n", objspace->will_be_swept_slots, objspace->have_swept_slots, is_full_marking(objspace)); + for (int i = 0; i < HEAP_COUNT; i++) { + rb_heap_t *heap = &heaps[i]; + fprintf(stderr, "heap %ld zombies_created:%u freed_slots:%lu empty_slots:%lu zombie_slots:%lu, total_slots:%lu\n", + heap - heaps, heap->made_zombies, heap->freed_slots, heap->empty_slots, heap->zombie_slots, heap->total_slots); + } + + rb_bug("MISMATCH: marked_slots:%lu, pooled_slots:%lu, empty_pages:%lu", objspace->marked_slots, objspace->rincgc.pooled_slots, objspace->empty_pages_count); + } + } + objspace->flags.was_compacting = FALSE; +#endif gc_prof_set_heap_info(objspace); heap_pages_free_unused_pages(objspace); @@ -3846,8 +4973,23 @@ gc_sweep_finish(rb_objspace_t *objspace) for (int i = 0; i < HEAP_COUNT; i++) { rb_heap_t *heap = &heaps[i]; +#if RUBY_DEBUG + { + struct heap_page *page; + ccan_list_for_each(&heap->pages, page, page_node) { + if (RUBY_ATOMIC_LOAD(page->before_sweep)) { + rb_bug("gc_sweep_finish: page %p in heap %d still has before_sweep set", (void *)page, i); + } + } + } + heap->zombie_slots = 0; +#endif + heap->freed_slots = 0; heap->empty_slots = 0; + if (heap->background_sweep_steps < heap->foreground_sweep_steps) { + heap->background_sweep_steps = heap->foreground_sweep_steps; + } if (!will_be_incremental_marking(objspace)) { struct heap_page *end_page = heap->free_pages; @@ -3865,71 +5007,305 @@ gc_sweep_finish(rb_objspace_t *objspace) rb_gc_event_hook(0, RUBY_INTERNAL_EVENT_GC_END_SWEEP); gc_mode_transition(objspace, gc_mode_none); + objspace->during_lazy_sweeping = FALSE; #if RGENGC_CHECK_MODE >= 2 gc_verify_internal_consistency(objspace); #endif } +// Dequeue a page swept by the sweep thread. If `free_in_user_thread` is true, then +// dequeue an unswept page to be swept by the Ruby thread. It can also dequeue an unswept +// page if otherwise it would have to wait for the sweep thread. In that case, `dequeued_unswept_page` +// is set to true. +// +// It returns NULL when there are no more pages to sweep for the heap. +static struct heap_page * +gc_sweep_dequeue_page(rb_objspace_t *objspace, rb_heap_t *heap, bool free_in_user_thread, bool *dequeued_unswept_page) +{ + if (free_in_user_thread) { + GC_ASSERT(!objspace->use_background_sweep_thread); + if (heap->sweeping_page == NULL) { + psweep_debug(0, "[gc] gc_sweep_dequeue_page: NULL page (synchronous) from heap(%p) (%ld)\n", heap, heap - heaps); + return NULL; + } + else { + struct heap_page *cur = heap->sweeping_page; + psweep_debug(0, "[gc] gc_sweep_dequeue_page:%p (synchronous) from heap(%p %ld)\n", cur, heap, heap - heaps); + struct heap_page *next = ccan_list_next(&heap->pages, cur, page_node); + heap->sweeping_page = next; + return cur; + } + } + + struct heap_page *page = NULL; + + // Avoid taking the global sweep_lock if we can +#if PSWEEP_LOCK_STATS > 0 + instrumented_lock_acquire(&heap->swept_pages_lock, &swept_pages_lock_stats); +#else + rb_native_mutex_lock(&heap->swept_pages_lock); +#endif + { + if (heap->swept_pages) { + page = heap->swept_pages; + psweep_debug(0, "[gc] gc_sweep_dequeue_page: got page:%p from heap(%p)->swept_pages (swept_pages lock) (heap %ld)\n", page, heap, heap - heaps); + heap->swept_pages = page->free_next; + } + } + rb_native_mutex_unlock(&heap->swept_pages_lock); + if (page) return page; + + sweep_lock_lock(&objspace->sweep_lock); + { + GC_ASSERT(!objspace->background_sweep_mode); + retry_swept_pages: + if (heap->swept_pages) { // grab the earliest page that the sweep thread swept (ie: it dequeues in swept order) + page = heap->swept_pages; + psweep_debug(0, "[gc] gc_sweep_dequeue_page: got page:%p from heap(%p)->swept_pages (sweep_lock) (heap %ld)\n", page, heap, heap - heaps); + heap->swept_pages = page->free_next; + } + else if (!heap->sweeping_page) { // This heap is finished + while (heap->pre_sweeping_page) { + sweep_lock_set_unlocked(); + rb_native_cond_wait(&heap->sweep_page_cond, &objspace->sweep_lock); + sweep_lock_set_locked(); + goto retry_swept_pages; + } + psweep_debug(0, "[gc] gc_sweep_dequeue_page: got nil page from heap(%p) (heap %ld) end\n", heap, heap - heaps); + } + else { + *dequeued_unswept_page = true; + page = heap->sweeping_page; // this could be the last page + heap->sweeping_page = ccan_list_next(&heap->pages, page, page_node); + psweep_debug(0, "[gc] gc_sweep_dequeue_page: dequeued unswept page from heap(%p) (heap %ld)\n", heap, heap - heaps); + } + GC_ASSERT(!objspace->background_sweep_mode); + } + sweep_lock_unlock(&objspace->sweep_lock); + + return page; +} + +MAYBE_UNUSED(static int +freelist_size(struct free_slot *slot)) +{ + if (!slot) return 0; + int size = 0; + while (slot) { + size++; + slot = slot->next; + } + return size; +} + +static inline bool +is_last_heap(rb_objspace_t *objspace, rb_heap_t *heap) +{ + return heap - heaps == (HEAP_COUNT - 1); +} + +static void +gc_sweep_step_deferred_free(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *sweep_page, unsigned short *freed_out, unsigned short *finals_out) +{ + unsigned short freed = 0; + unsigned short finals = 0; + uintptr_t p = (uintptr_t)sweep_page->start; + bits_t *deferred_bits = sweep_page->deferred_free_bits; + int total_slots = sweep_page->total_slots; + short slot_size = sweep_page->slot_size; + + int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH); + int out_of_range_bits = total_slots % BITS_BITLENGTH; + bits_t bitset; + + if (out_of_range_bits != 0) { + deferred_bits[bitmap_plane_count - 1] &= (((bits_t)1 << out_of_range_bits) - 1); + } + + for (int i = 0; i < bitmap_plane_count; i++) { + bitset = deferred_bits[i]; + p = (uintptr_t)sweep_page->start + (i * BITS_BITLENGTH * slot_size); + while (bitset) { + if (bitset & 1) { + VALUE obj = (VALUE)p; + GC_ASSERT(GET_HEAP_PAGE(obj) == sweep_page); + GC_ASSERT(!RVALUE_MARKED(objspace, obj)); + if (deferred_free(objspace, obj)) { + freed++; + } + else { + finals++; + } + } + p += slot_size; + bitset >>= 1; + } + } + *freed_out = freed; + *finals_out = finals; +} + +// Perform incremental (lazy) sweep on a heap. static int gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) { - struct heap_page *sweep_page = heap->sweeping_page; - int swept_slots = 0; - int pooled_slots = 0; + size_t swept_slots = 0; + size_t pooled_slots = 0; + +#if RUBY_DEBUG + sweep_lock_lock(&objspace->sweep_lock); + GC_ASSERT(!objspace->background_sweep_mode); + sweep_lock_unlock(&objspace->sweep_lock); +#endif - if (sweep_page == NULL) return FALSE; + if (heap_is_sweep_done(objspace, heap)) { + psweep_debug(0, "[gc] gc_sweep_step: heap %p (%ld) is heap_is_sweep_done() early!\n", heap, heap - heaps); + GC_ASSERT(heap->sweeping_page == NULL); + GC_ASSERT(heap->is_finished_sweeping); + return heap->free_pages != NULL; + } #if GC_ENABLE_LAZY_SWEEP gc_prof_sweep_timer_start(objspace); #endif + psweep_debug(-2, "[gc] gc_sweep_step heap:%p (%ld) use_sweep_thread:%d\n", heap, heap - heaps, objspace->use_background_sweep_thread); + bool sweep_rest = objspace->sweep_rest; + bool use_sweep_thread = objspace->use_background_sweep_thread; + + while (1) { + bool free_in_user_thread_p = !use_sweep_thread; + bool dequeued_unswept_page = false; + // NOTE: pages we dequeue from the sweep thread need to be AFTER the list of heap->free_pages so we don't free from pages + // we've allocated from since sweep started. + struct heap_page *sweep_page = gc_sweep_dequeue_page(objspace, heap, free_in_user_thread_p, &dequeued_unswept_page); + if (RB_UNLIKELY(!sweep_page)) { + psweep_debug(-2, "[gc] gc_sweep_step heap:%p (%ld) deq() = nil, break\n", heap, heap - heaps); + break; + } + if (dequeued_unswept_page) { + free_in_user_thread_p = true; + psweep_debug(-2, "[gc] gc_sweep_step heap:%p (%ld) deq unswept page\n", heap, heap - heaps); + } + else { + psweep_debug(-2, "[gc] gc_sweep_step heap:%p (%ld) deq preswept page\n", heap, heap - heaps); + } + GC_ASSERT(sweep_page->heap == heap); - do { RUBY_DEBUG_LOG("sweep_page:%p", (void *)sweep_page); struct gc_sweep_context ctx = { - .page = sweep_page, - .final_slots = 0, - .freed_slots = 0, - .empty_slots = 0, + .page = sweep_page }; - gc_sweep_page(objspace, heap, &ctx); - int free_slots = ctx.freed_slots + ctx.empty_slots; - heap->sweeping_page = ccan_list_next(&heap->pages, sweep_page, page_node); + if (free_in_user_thread_p) { + gc_sweep_page(objspace, heap, &ctx); + GC_ASSERT(sweep_page->pre_deferred_free_slots == 0); + } + else { + unsigned short deferred_free_freed = 0; + unsigned short deferred_free_final_slots = 0; + unsigned short deferred_to_free = sweep_page->pre_deferred_free_slots; + + psweep_debug(-2, "[gc] gc_sweep_step: (heap:%p %ld, page:%p) free_ruby_th: %d, deferred_to_free:%d, pre_freed:%d, pre_empty:%d\n", + heap, heap - heaps, sweep_page, free_in_user_thread_p, deferred_to_free, sweep_page->pre_freed_slots, sweep_page->pre_empty_slots); - if (free_slots == sweep_page->total_slots) { - /* There are no living objects, so move this page to the global empty pages. */ - heap_unlink_page(objspace, heap, sweep_page); + if (deferred_to_free > 0) { + gc_sweep_step_deferred_free(objspace, heap, sweep_page, &deferred_free_freed, &deferred_free_final_slots); + } + GC_ASSERT(deferred_to_free == (deferred_free_freed + deferred_free_final_slots)); + + ctx.final_slots = sweep_page->pre_final_slots + deferred_free_final_slots; + ctx.freed_slots = sweep_page->pre_freed_slots + deferred_free_freed; + ctx.empty_slots = sweep_page->pre_empty_slots; + ctx.zombie_slots = sweep_page->pre_zombie_slots; + + gc_post_sweep_page(objspace, heap, sweep_page); // clear bits + } + + if (0) fprintf(stderr, "gc_sweep_page(%"PRIdSIZE"): total_slots: %d, freed_slots: %d, empty_slots: %d, final_slots: %d\n", + rb_gc_count(), + sweep_page->total_slots, + ctx.freed_slots, ctx.empty_slots, ctx.final_slots); +#if GC_PROFILE_MORE_DETAIL + if (gc_prof_enabled(objspace)) { + gc_profile_record *record = gc_prof_record(objspace); + record->removing_objects += ctx.final_slots + ctx.freed_slots; + record->empty_objects += ctx.empty_slots; + } +#endif + + int free_slots = ctx.freed_slots + ctx.empty_slots; + GC_ASSERT(sweep_page->total_slots > 0); + GC_ASSERT(sweep_page->total_slots >= free_slots); + + if (free_in_user_thread_p) { + GC_ASSERT(sweep_page->free_slots == free_slots); // gc_sweep_page() sets sweep_page->free slots + GC_ASSERT(sweep_page->heap->total_freed_objects >= (unsigned long)ctx.freed_slots); + } else { + sweep_page->free_slots = free_slots; + // NOTE: sweep_page->final slots have already been updated by make_zombie + GC_ASSERT(sweep_page->free_slots <= sweep_page->total_slots); + GC_ASSERT(sweep_page->final_slots <= sweep_page->total_slots); + sweep_page->heap->total_freed_objects += ctx.freed_slots; + + if (sweep_page->pre_freed_malloc_bytes > 0) { + atomic_sub_nounderflow(&malloc_increase, sweep_page->pre_freed_malloc_bytes); +#if RGENGC_ESTIMATE_OLDMALLOC + atomic_sub_nounderflow(&objspace->malloc_counters.oldmalloc_increase, sweep_page->pre_freed_malloc_bytes); +#endif + } + clear_pre_sweep_fields(sweep_page); + } - sweep_page->start = 0; - sweep_page->total_slots = 0; - sweep_page->slot_size = 0; - sweep_page->heap = NULL; - sweep_page->free_slots = 0; +#if RGENGC_CHECK_MODE + short freelist_len = 0; + asan_unlock_freelist(sweep_page); + struct free_slot *ptr = sweep_page->freelist; + while (ptr) { + freelist_len++; + rb_asan_unpoison_object((VALUE)ptr, false); + struct free_slot *next = ptr->next; + rb_asan_poison_object((VALUE)ptr); + ptr = next; + } + asan_lock_freelist(sweep_page); + if (freelist_len != sweep_page->free_slots) { + rb_bug("inconsistent freelist length: expected %d but was %d", sweep_page->free_slots, freelist_len); + } +#endif - asan_unlock_freelist(sweep_page); - sweep_page->freelist = NULL; - asan_lock_freelist(sweep_page); + psweep_debug(0, "[gc] gc_sweep_step: dequeued page(heap:%p %ld, page:%p) free_slots:%u,total_slots:%u\n", heap, heap - heaps, sweep_page, free_slots, sweep_page->total_slots); - asan_poison_memory_region(sweep_page->body, HEAP_PAGE_SIZE); +#if RUBY_DEBUG + heap->zombie_slots += ctx.zombie_slots; +#endif - objspace->empty_pages_count++; - sweep_page->free_next = objspace->empty_pages; - objspace->empty_pages = sweep_page; + if (free_slots == sweep_page->total_slots) { +#if RUBY_DEBUG + objspace->have_swept_slots += free_slots; +#endif + psweep_debug(0, "[gc] gc_sweep_step: adding to empty_pages:%p\n", sweep_page); + move_to_empty_pages(objspace, heap, sweep_page); } else if (free_slots > 0) { heap->freed_slots += ctx.freed_slots; heap->empty_slots += ctx.empty_slots; if (pooled_slots < GC_INCREMENTAL_SWEEP_POOL_SLOT_COUNT) { + psweep_debug(0, "[gc] gc_sweep_step: adding pooled_page:%p, pooled_slots:%d\n", sweep_page, pooled_slots); heap_add_poolpage(objspace, heap, sweep_page); pooled_slots += free_slots; } else { - heap_add_freepage(heap, sweep_page); + psweep_debug(0, "[gc] gc_sweep_step: adding freepage:%p, swept_slots:%d\n", sweep_page, swept_slots); + heap_add_freepage(heap, sweep_page, "gc_sweep_step"); swept_slots += free_slots; if (swept_slots > GC_INCREMENTAL_SWEEP_SLOT_COUNT) { + if (!sweep_rest && use_sweep_thread) { + rbimpl_atomic_inc(&heap->foreground_sweep_steps, RBIMPL_ATOMIC_RELEASE); // signal sweep thread to move on + } + psweep_debug(0, "[gc] gc_sweep_step got to SWEEP_SLOT_COUNT, break\n"); break; } } @@ -3937,13 +5313,14 @@ gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) else { sweep_page->free_next = NULL; } - } while ((sweep_page = heap->sweeping_page)); + } - if (!heap->sweeping_page) { + if (heap_is_sweep_done(objspace, heap)) { + psweep_debug(0, "[gc] gc_sweep_step heap:%p (%ld) sweep done\n", heap, heap - heaps); gc_sweep_finish_heap(objspace, heap); if (!has_sweeping_pages(objspace)) { - gc_sweep_finish(objspace); + gc_sweep_finish(objspace); // done, no more pages in any heap } } @@ -3951,36 +5328,121 @@ gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) gc_prof_sweep_timer_stop(objspace); #endif + psweep_debug(1, "[gc] gc_sweep_step: finished for heap:%p (%ld), got free page:%d\n", heap, heap - heaps, heap->free_pages != NULL); return heap->free_pages != NULL; } +static bool +background_sweep_done_p(rb_objspace_t *objspace) +{ + // must have sweep_lock acquired (TODO: add assertion) + return objspace->heaps_done_background_sweep == HEAP_COUNT; +} + +unsigned long long sweep_rest_count = 0; + static void gc_sweep_rest(rb_objspace_t *objspace) { + sweep_rest_count++; + sweep_lock_lock(&objspace->sweep_lock); + { + objspace->sweep_rest = true; // reset to false in `gc_sweeping_exit` + if (background_sweep_done_p(objspace)) { + psweep_debug(-2, "[gc] gc_sweep_rest: bg done, not requesting\n"); + } + else { + if (objspace->use_background_sweep_thread && !objspace->sweep_thread_sweeping && !objspace->sweep_thread_sweep_requested) { + psweep_debug(-2, "[gc] gc_sweep_rest: request sweep thread\n"); + objspace->sweep_thread_sweep_requested = true; + rb_native_cond_broadcast(&objspace->sweep_cond); + } + else if (objspace->use_background_sweep_thread) { + psweep_debug(-2, "[gc] gc_sweep_rest: restart sweep thread\n"); + objspace->background_sweep_restart_heaps = true; // restart sweeping heaps from heap 0 + } + } + } + sweep_lock_unlock(&objspace->sweep_lock); + for (int i = 0; i < HEAP_COUNT; i++) { rb_heap_t *heap = &heaps[i]; - while (heap->sweeping_page) { + while (!heap_is_sweep_done(objspace, heap)) { + psweep_debug(0, "[gc] gc_sweep_rest: gc_sweep_step heap:%p (heap %ld)\n", heap, heap - heaps); gc_sweep_step(objspace, heap); } + GC_ASSERT(heap->is_finished_sweeping); + heap->background_sweep_steps = heap->foreground_sweep_steps; } + + GC_ASSERT(!has_sweeping_pages(objspace)); + GC_ASSERT(gc_mode(objspace) == gc_mode_none); } +unsigned long long sweep_continue_count = 0; + static void gc_sweep_continue(rb_objspace_t *objspace, rb_heap_t *sweep_heap) { GC_ASSERT(dont_gc_val() == FALSE || objspace->profile.latest_gc_info & GPR_FLAG_METHOD); if (!GC_ENABLE_LAZY_SWEEP) return; - gc_sweeping_enter(objspace); + psweep_debug(-2, "[gc] gc_sweep_continue\n"); + + sweep_continue_count++; + + gc_sweeping_enter(objspace, "gc_sweep_continue"); + sweep_lock_lock(&objspace->sweep_lock); + { + if (objspace->use_background_sweep_thread) { + if (background_sweep_done_p(objspace)) { + psweep_debug(-2, "[gc] gc_sweep_continue: bg done, not requesting\n"); + } + else { + int num_heaps_need_continue = 0; + for (int i = 0; i < HEAP_COUNT; i++) { + rb_heap_t *heap = &heaps[i]; + heap->background_sweep_steps = heap->foreground_sweep_steps; + if (heap->pre_swept_slots_deferred >= (GC_INCREMENTAL_SWEEP_SLOT_COUNT + GC_INCREMENTAL_SWEEP_POOL_SLOT_COUNT)) { + heap->skip_sweep_continue = true; + } + else { + if (!heap->is_finished_sweeping && !heap->done_background_sweep) { + num_heaps_need_continue++; + } + heap->skip_sweep_continue = false; + } + heap->pre_swept_slots_deferred = 0; + } + if (num_heaps_need_continue > 0) { + if (!objspace->sweep_thread_sweeping && !objspace->sweep_thread_sweep_requested) { + psweep_debug(-2, "[gc] gc_sweep_continue: requesting sweep thread\n"); + objspace->sweep_thread_sweep_requested = true; + rb_native_cond_broadcast(&objspace->sweep_cond); + } + else { + psweep_debug(-2, "[gc] gc_sweep_continue: sweep thread restart heaps\n"); + objspace->background_sweep_restart_heaps = true; + } + } + } + } + else { + psweep_debug(-2, "[gc] gc_sweep_continue: !use_background_sweep_thread\n"); + } + } + sweep_lock_unlock(&objspace->sweep_lock); for (int i = 0; i < HEAP_COUNT; i++) { rb_heap_t *heap = &heaps[i]; + if (gc_sweep_step(objspace, heap)) { GC_ASSERT(heap->free_pages != NULL); } else if (heap == sweep_heap) { if (objspace->empty_pages_count > 0 || objspace->heap_pages.allocatable_bytes > 0) { + GC_ASSERT(!sweep_heap->sweeping_page); // went through whole heap, couldn't find free page /* [Bug #21548] * * If this heap is the heap we want to sweep, but we weren't able @@ -3991,7 +5453,7 @@ gc_sweep_continue(rb_objspace_t *objspace, rb_heap_t *sweep_heap) * empty/allocatable pages. If other heaps are not finished sweeping * then we do not finish this GC and we will end up triggering a new * GC cycle during this GC phase. */ - heap_page_allocate_and_initialize(objspace, heap); + heap_page_allocate_and_initialize(objspace, heap, false); GC_ASSERT(heap->free_pages != NULL); } @@ -4059,7 +5521,7 @@ invalidate_moved_plane(rb_objspace_t *objspace, struct heap_page *page, uintptr_ struct heap_page *orig_page = GET_HEAP_PAGE(object); orig_page->free_slots++; RVALUE_AGE_SET_BITMAP(object, 0); - heap_page_add_freeobj(objspace, orig_page, object); + heap_page_add_freeobj(objspace, orig_page, object, false); GC_ASSERT(RVALUE_MARKED(objspace, forwarding_object)); GC_ASSERT(BUILTIN_TYPE(forwarding_object) != T_MOVED); @@ -4103,10 +5565,16 @@ gc_compact_start(rb_objspace_t *objspace) struct heap_page *page = NULL; gc_mode_transition(objspace, gc_mode_compacting); +#if RUBY_DEBUG + sweep_lock_lock(&objspace->sweep_lock); + GC_ASSERT(!objspace->sweep_thread_sweeping && !objspace->sweep_thread_sweep_requested); + sweep_lock_unlock(&objspace->sweep_lock); +#endif + for (int i = 0; i < HEAP_COUNT; i++) { rb_heap_t *heap = &heaps[i]; ccan_list_for_each(&heap->pages, page, page_node) { - page->flags.before_sweep = TRUE; + page->before_sweep = 1; } heap->compact_cursor = ccan_list_tail(&heap->pages, struct heap_page, page_node); @@ -4132,10 +5600,10 @@ static void gc_sweep_compact(rb_objspace_t *objspace); static void gc_sweep(rb_objspace_t *objspace) { - gc_sweeping_enter(objspace); - const unsigned int immediate_sweep = objspace->flags.immediate_sweep; + gc_sweeping_enter(objspace, "gc_sweep"); + gc_report(1, objspace, "gc_sweep: immediate: %d\n", immediate_sweep); gc_sweep_start(objspace); @@ -4148,12 +5616,12 @@ gc_sweep(rb_objspace_t *objspace) gc_prof_sweep_timer_start(objspace); #endif gc_sweep_rest(objspace); + #if !GC_ENABLE_LAZY_SWEEP gc_prof_sweep_timer_stop(objspace); #endif } else { - /* Sweep every size pool. */ for (int i = 0; i < HEAP_COUNT; i++) { rb_heap_t *heap = &heaps[i]; @@ -4931,6 +6399,7 @@ struct verify_internal_consistency_struct { int err_count; size_t live_object_count; size_t zombie_object_count; + size_t zombie_ran_finalizer_object_count; VALUE parent; size_t old_object_count; @@ -5027,7 +6496,11 @@ verify_internal_consistency_i(void *page_start, void *page_end, size_t stride, if (BUILTIN_TYPE(obj) == T_ZOMBIE) { data->zombie_object_count++; - if ((RBASIC(obj)->flags & ~ZOMBIE_OBJ_KEPT_FLAGS) != T_ZOMBIE) { + if (FL_TEST(obj, ZOMBIE_NEEDS_FREE_FLAG)) { + data->zombie_ran_finalizer_object_count++; + } + + if ((RBASIC(obj)->flags & ~(ZOMBIE_OBJ_KEPT_FLAGS|ZOMBIE_NEEDS_FREE_FLAG)) != T_ZOMBIE) { fprintf(stderr, "verify_internal_consistency_i: T_ZOMBIE has extra flags set: %s\n", rb_obj_info(obj)); data->err_count++; @@ -5164,6 +6637,7 @@ gc_verify_internal_consistency_(rb_objspace_t *objspace) uintptr_t end = start + page->total_slots * slot_size; verify_internal_consistency_i((void *)start, (void *)end, slot_size, &data); + data.live_object_count += (page->pre_freed_slots + page->pre_final_slots + page->pre_zombie_slots); } if (data.err_count != 0) { @@ -5216,7 +6690,7 @@ gc_verify_internal_consistency_(rb_objspace_t *objspace) } if (total_final_slots_count(objspace) != data.zombie_object_count || - total_final_slots_count(objspace) != list_count) { + (data.zombie_object_count - data.zombie_ran_finalizer_object_count) != list_count) { rb_bug("inconsistent finalizing object count:\n" " expect %"PRIuSIZE"\n" @@ -5241,6 +6715,7 @@ gc_verify_internal_consistency(void *objspace_ptr) rb_gc_vm_barrier(); // stop other ractors unsigned int prev_during_gc = during_gc; + wait_for_background_sweeping_to_finish(objspace, true, false, "verify_internal_consistency"); during_gc = FALSE; // stop gc here { gc_verify_internal_consistency_(objspace); @@ -5427,7 +6902,14 @@ gc_marks_finish(rb_objspace_t *objspace) min_free_slots = gc_params.heap_free_slots * r_mul; } + int full_marking = is_full_marking(objspace); +#if RUBY_DEBUG + if (!objspace->flags.during_compacting) { + objspace->have_swept_slots = 0; + objspace->will_be_swept_slots = sweep_slots; + } +#endif GC_ASSERT(objspace_available_slots(objspace) >= objspace->marked_slots); @@ -5465,7 +6947,13 @@ gc_marks_finish(rb_objspace_t *objspace) } if (full_marking) { - heap_allocatable_bytes_expand(objspace, NULL, sweep_slots, total_slots, heaps[0].slot_size); + /* Use weighted average slot size since total_slots spans all heaps */ + size_t total_heap_bytes = 0; + for (int i = 0; i < HEAP_COUNT; i++) { + total_heap_bytes += heaps[i].total_slots * heaps[i].slot_size; + } + size_t avg_slot_size = total_slots > 0 ? total_heap_bytes / total_slots : heaps[0].slot_size; + heap_allocatable_bytes_expand(objspace, NULL, sweep_slots, total_slots, avg_slot_size); } } @@ -5563,7 +7051,7 @@ gc_compact_move(rb_objspace_t *objspace, rb_heap_t *heap, VALUE src) unlock_page_body(objspace, GET_PAGE_BODY(src)); if (dest_pool->sweeping_page->free_slots > 0) { - heap_add_freepage(dest_pool, dest_pool->sweeping_page); + heap_add_freepage(dest_pool, dest_pool->sweeping_page, "gc_compact_move"); } dest_pool->sweeping_page = ccan_list_next(&dest_pool->pages, dest_pool->sweeping_page, page_node); @@ -5753,6 +7241,7 @@ gc_marks_continue(rb_objspace_t *objspace, rb_heap_t *heap) static void gc_marks_start(rb_objspace_t *objspace, int full_mark) { + // NOTE: background sweeping cannot be running during marking. /* start marking */ gc_report(1, objspace, "gc_marks_start: (%s)\n", full_mark ? "full" : "minor"); gc_mode_transition(objspace, gc_mode_marking); @@ -5765,7 +7254,7 @@ gc_marks_start(rb_objspace_t *objspace, int full_mark) "objspace->rincgc.pooled_page_num: %"PRIdSIZE", " "objspace->rincgc.step_slots: %"PRIdSIZE", \n", objspace->marked_slots, objspace->rincgc.pooled_slots, objspace->rincgc.step_slots); - objspace->flags.during_minor_gc = FALSE; + objspace->during_minor_gc = FALSE; if (ruby_enable_autocompact) { objspace->flags.during_compacting |= TRUE; } @@ -5790,7 +7279,7 @@ gc_marks_start(rb_objspace_t *objspace, int full_mark) } } else { - objspace->flags.during_minor_gc = TRUE; + objspace->during_minor_gc = TRUE; objspace->marked_slots = objspace->rgengc.old_objects + objspace->rgengc.uncollectible_wb_unprotected_objects; /* uncollectible objects are marked already */ objspace->profile.minor_gc_count++; @@ -6278,9 +7767,9 @@ static void heap_ready_to_gc(rb_objspace_t *objspace, rb_heap_t *heap) { if (!heap->free_pages) { - if (!heap_page_allocate_and_initialize(objspace, heap)) { + if (!heap_page_allocate_and_initialize(objspace, heap, false)) { objspace->heap_pages.allocatable_bytes = HEAP_PAGE_SIZE; - heap_page_allocate_and_initialize(objspace, heap); + heap_page_allocate_and_initialize(objspace, heap, false); } } } @@ -6400,6 +7889,8 @@ gc_start(rb_objspace_t *objspace, unsigned int reason) if (!rb_darray_size(objspace->heap_pages.sorted)) return TRUE; /* heap is not ready */ if (!(reason & GPR_FLAG_METHOD) && !ready_to_gc(objspace)) return TRUE; /* GC is not allowed */ + wait_for_background_sweeping_to_finish(objspace, true, false, "gc_start"); // in case user called `GC.start` explicitly + GC_ASSERT(gc_mode(objspace) == gc_mode_none, "gc_mode is %s\n", gc_mode_name(gc_mode(objspace))); GC_ASSERT(!is_lazy_sweeping(objspace)); GC_ASSERT(!is_incremental_marking(objspace)); @@ -6451,12 +7942,18 @@ gc_start(rb_objspace_t *objspace, unsigned int reason) /* Explicitly enable compaction (GC.compact) */ if (do_full_mark && ruby_enable_autocompact) { objspace->flags.during_compacting = TRUE; +#if RUBY_DEBUG + objspace->flags.was_compacting = TRUE; +#endif #if RGENGC_CHECK_MODE objspace->rcompactor.compare_func = ruby_autocompact_compare_func; #endif } else { objspace->flags.during_compacting = !!(reason & GPR_FLAG_COMPACT); +#if RUBY_DEBUG + objspace->flags.was_compacting = objspace->flags.during_compacting; +#endif } if (!GC_ENABLE_LAZY_SWEEP || objspace->flags.dont_incremental) { @@ -6479,6 +7976,11 @@ gc_start(rb_objspace_t *objspace, unsigned int reason) #if RGENGC_ESTIMATE_OLDMALLOC (void)RB_DEBUG_COUNTER_INC_IF(gc_major_oldmalloc, reason & GPR_FLAG_MAJOR_BY_OLDMALLOC); #endif + if (reason & GPR_FLAG_MAJOR_BY_NOFREE) objspace->profile.major_gc_count_by_nofree++; + if (reason & GPR_FLAG_MAJOR_BY_OLDGEN) objspace->profile.major_gc_count_by_oldgen++; + if (reason & GPR_FLAG_MAJOR_BY_SHADY) objspace->profile.major_gc_count_by_shady++; + if (reason & GPR_FLAG_MAJOR_BY_FORCE) objspace->profile.major_gc_count_by_force++; + if (reason & GPR_FLAG_MAJOR_BY_OLDMALLOC) objspace->profile.major_gc_count_by_oldmalloc++; } else { (void)RB_DEBUG_COUNTER_INC_IF(gc_minor_newobj, reason & GPR_FLAG_NEWOBJ); @@ -6531,7 +8033,7 @@ gc_rest(rb_objspace_t *objspace) } if (is_lazy_sweeping(objspace)) { - gc_sweeping_enter(objspace); + gc_sweeping_enter(objspace, "gc_rest"); gc_sweep_rest(objspace); gc_sweeping_exit(objspace); } @@ -6668,6 +8170,30 @@ gc_clock_end(struct timespec *ts) return 0; } +#if PSWEEP_COLLECT_TIMINGS > 0 +/* Wall time clock functions using CLOCK_MONOTONIC */ +static void +gc_wall_clock_start(struct timespec *ts) +{ + if (clock_gettime(CLOCK_MONOTONIC, ts) != 0) { + ts->tv_sec = 0; + ts->tv_nsec = 0; + } +} + +static unsigned long long +gc_wall_clock_end(struct timespec *ts) +{ + struct timespec end_time; + + if ((ts->tv_sec > 0 || ts->tv_nsec > 0) && + clock_gettime(CLOCK_MONOTONIC, &end_time) == 0) { + return (unsigned long long)(end_time.tv_sec - ts->tv_sec) * (1000 * 1000 * 1000) + (end_time.tv_nsec - ts->tv_nsec); + } + return 0; +} +#endif + static inline void gc_enter(rb_objspace_t *objspace, enum gc_enter_event event, unsigned int *lock_lev) { @@ -6688,6 +8214,7 @@ gc_enter(rb_objspace_t *objspace, enum gc_enter_event event, unsigned int *lock_ if (RB_UNLIKELY(during_gc != 0)) rb_bug("during_gc != 0"); if (RGENGC_CHECK_MODE >= 3) gc_verify_internal_consistency(objspace); + GC_ASSERT(!is_sweep_thread_p()); during_gc = TRUE; RUBY_DEBUG_LOG("%s (%s)",gc_enter_event_cstr(event), gc_current_status(objspace)); gc_report(1, objspace, "gc_enter: %s [%s]\n", gc_enter_event_cstr(event), gc_current_status(objspace)); @@ -6706,7 +8233,8 @@ gc_exit(rb_objspace_t *objspace, enum gc_enter_event event, unsigned int *lock_l gc_record(objspace, 1, gc_enter_event_cstr(event)); RUBY_DEBUG_LOG("%s (%s)", gc_enter_event_cstr(event), gc_current_status(objspace)); gc_report(1, objspace, "gc_exit: %s [%s]\n", gc_enter_event_cstr(event), gc_current_status(objspace)); - during_gc = FALSE; + GC_ASSERT(!is_sweep_thread_p()); + during_gc = FALSE; // NOTE: background thread could still be sweeping even if !during_gc RB_GC_VM_UNLOCK(*lock_lev); } @@ -6735,24 +8263,80 @@ gc_marking_exit(rb_objspace_t *objspace) } } +unsigned long long sweeping_enter_count = 0; + static void -gc_sweeping_enter(rb_objspace_t *objspace) +gc_sweeping_enter(rb_objspace_t *objspace, const char *from_fn) { + MAYBE_UNUSED(const unsigned int immediate_sweep) = objspace->flags.immediate_sweep; + psweep_debug(1, "[gc] gc_sweeping_enter from %s (immediate:%u)\n", from_fn, immediate_sweep); GC_ASSERT(during_gc != 0); + sweep_lock_lock(&objspace->sweep_lock); + { + objspace->background_sweep_mode = false; + } + sweep_lock_unlock(&objspace->sweep_lock); + if (MEASURE_GC) { gc_clock_start(&objspace->profile.sweeping_start_time); } + + sweeping_enter_count++; + /* Always track Ruby thread sweep time */ +#if PSWEEP_COLLECT_TIMINGS > 0 + gc_clock_start(&objspace->profile.ruby_thread_sweep_cpu_start_time); + gc_wall_clock_start(&objspace->profile.ruby_thread_sweep_wall_start_time); +#endif } static void gc_sweeping_exit(rb_objspace_t *objspace) { GC_ASSERT(during_gc != 0); + psweep_debug(1, "[gc] gc_sweeping_exit\n"); + MAYBE_UNUSED(bool was_rest) = objspace->sweep_rest; + + bool continue_sweep_in_background = objspace->use_background_sweep_thread && + !objspace->sweep_rest && !dont_gc_val() && is_lazy_sweeping(objspace); + + if (continue_sweep_in_background) { + if (background_sweep_done_p(objspace)) { + psweep_debug(-2, "[gc] gc_sweeping_exit: bg done, not requesting\n"); + } + else { + psweep_debug(-2, "[gc] gc_sweeping_exit: continue in background\n"); + sweep_lock_lock(&objspace->sweep_lock); + objspace->background_sweep_mode = true; + if (!objspace->sweep_thread_sweeping && !objspace->sweep_thread_sweep_requested) { + psweep_debug(-2, "[gc] gc_sweeping_exit: requested\n"); + objspace->sweep_thread_sweep_requested = true; + rb_native_cond_broadcast(&objspace->sweep_cond); + } + else { + psweep_debug(-2, "[gc] gc_sweeping_exit: restart heaps\n"); + objspace->background_sweep_restart_heaps = true; // restart sweeping heaps from heap 0 + } + sweep_lock_unlock(&objspace->sweep_lock); + } + } + else { + GC_ASSERT(!objspace->background_sweep_mode); + psweep_debug(-2, "[gc] gc_sweeping_exit: don't continue (rest:%d, use:%d)\n", was_rest, objspace->use_background_sweep_thread); + sweep_lock_lock(&objspace->sweep_lock); + objspace->sweep_rest = false; + sweep_lock_unlock(&objspace->sweep_lock); + } if (MEASURE_GC) { objspace->profile.sweeping_time_ns += gc_clock_end(&objspace->profile.sweeping_start_time); } + + /* Always track Ruby thread sweep time */ +#if PSWEEP_COLLECT_TIMINGS > 0 + objspace->profile.ruby_thread_sweep_cpu_time_ns += gc_clock_end(&objspace->profile.ruby_thread_sweep_cpu_start_time); + objspace->profile.ruby_thread_sweep_wall_time_ns += gc_wall_clock_end(&objspace->profile.ruby_thread_sweep_wall_start_time); +#endif } static void * @@ -6836,11 +8420,32 @@ rb_gc_impl_start(void *objspace_ptr, bool full_mark, bool immediate_mark, bool i } garbage_collect(objspace, reason); +#if RUBY_DEBUG + if (immediate_sweep) { + sweep_lock_lock(&objspace->sweep_lock); + { + GC_ASSERT(!objspace->sweep_thread_sweeping); + for (int j = 0; j < HEAP_COUNT; j++) { + rb_heap_t *heap = &heaps[j]; + GC_ASSERT(!heap->swept_pages); + GC_ASSERT(!heap->sweeping_page); + } + } + sweep_lock_unlock(&objspace->sweep_lock); + } +#endif + // NOTE: background sweeping can still be active here. We also may enter a new GC cycle from finalizers below. gc_finalize_deferred(objspace); gc_config_full_mark_set(full_marking_p); } +void +rb_gc_stop_background_threads(rb_objspace_t *objspace, const char *from_fn) +{ + wait_for_background_sweeping_to_finish(objspace, true, true, from_fn); +} + void rb_gc_impl_prepare_heap(void *objspace_ptr) { @@ -7064,8 +8669,8 @@ gc_sort_heap_by_compare_func(rb_objspace_t *objspace, gc_compact_compare_func co for (i = 0; i < total_pages; i++) { ccan_list_add(&heap->pages, &page_list[i]->page_node); - if (page_list[i]->free_slots != 0) { - heap_add_freepage(heap, page_list[i]); + if (page_list[i]->free_slots != 0 && page_list[i]->start) { + heap_add_freepage(heap, page_list[i], "sort_by_compare_func"); } } @@ -7109,7 +8714,7 @@ gc_ref_update(void *vstart, void *vend, size_t stride, rb_objspace_t *objspace, if (RVALUE_REMEMBERED(objspace, v)) { page->flags.has_remembered_objects = TRUE; } - if (page->flags.before_sweep) { + if (page->before_sweep) { if (RVALUE_MARKED(objspace, v)) { rb_gc_update_object_references(objspace, v); } @@ -7448,6 +9053,11 @@ enum gc_stat_sym { gc_stat_sym_malloc_increase_bytes_limit, gc_stat_sym_minor_gc_count, gc_stat_sym_major_gc_count, + gc_stat_sym_major_gc_count_by_nofree, + gc_stat_sym_major_gc_count_by_oldgen, + gc_stat_sym_major_gc_count_by_shady, + gc_stat_sym_major_gc_count_by_force, + gc_stat_sym_major_gc_count_by_oldmalloc, gc_stat_sym_compact_count, gc_stat_sym_read_barrier_faults, gc_stat_sym_total_moved_objects, @@ -7455,6 +9065,8 @@ enum gc_stat_sym { gc_stat_sym_remembered_wb_unprotected_objects_limit, gc_stat_sym_old_objects, gc_stat_sym_old_objects_limit, + gc_stat_sym_pages_swept_by_sweep_thread, + gc_stat_sym_pages_swept_by_sweep_thread_had_deferred_free_objects, #if RGENGC_ESTIMATE_OLDMALLOC gc_stat_sym_oldmalloc_increase_bytes, gc_stat_sym_oldmalloc_increase_bytes_limit, @@ -7498,6 +9110,11 @@ setup_gc_stat_symbols(void) S(malloc_increase_bytes_limit); S(minor_gc_count); S(major_gc_count); + S(major_gc_count_by_nofree); + S(major_gc_count_by_oldgen); + S(major_gc_count_by_shady); + S(major_gc_count_by_force); + S(major_gc_count_by_oldmalloc); S(compact_count); S(read_barrier_faults); S(total_moved_objects); @@ -7505,6 +9122,8 @@ setup_gc_stat_symbols(void) S(remembered_wb_unprotected_objects_limit); S(old_objects); S(old_objects_limit); + S(pages_swept_by_sweep_thread); + S(pages_swept_by_sweep_thread_had_deferred_free_objects); #if RGENGC_ESTIMATE_OLDMALLOC S(oldmalloc_increase_bytes); S(oldmalloc_increase_bytes_limit); @@ -7527,7 +9146,7 @@ ns_to_ms(uint64_t ns) return ns / (1000 * 1000); } -static void malloc_increase_local_flush(rb_objspace_t *objspace); +static size_t malloc_increase_local_flush(rb_objspace_t *objspace); VALUE rb_gc_impl_stat(void *objspace_ptr, VALUE hash_or_sym) @@ -7579,6 +9198,11 @@ rb_gc_impl_stat(void *objspace_ptr, VALUE hash_or_sym) SET(malloc_increase_bytes_limit, malloc_limit); SET(minor_gc_count, objspace->profile.minor_gc_count); SET(major_gc_count, objspace->profile.major_gc_count); + SET(major_gc_count_by_nofree, objspace->profile.major_gc_count_by_nofree); + SET(major_gc_count_by_oldgen, objspace->profile.major_gc_count_by_oldgen); + SET(major_gc_count_by_shady, objspace->profile.major_gc_count_by_shady); + SET(major_gc_count_by_force, objspace->profile.major_gc_count_by_force); + SET(major_gc_count_by_oldmalloc, objspace->profile.major_gc_count_by_oldmalloc); SET(compact_count, objspace->profile.compact_count); SET(read_barrier_faults, objspace->profile.read_barrier_faults); SET(total_moved_objects, objspace->rcompactor.total_moved); @@ -7586,6 +9210,8 @@ rb_gc_impl_stat(void *objspace_ptr, VALUE hash_or_sym) SET(remembered_wb_unprotected_objects_limit, objspace->rgengc.uncollectible_wb_unprotected_objects_limit); SET(old_objects, objspace->rgengc.old_objects); SET(old_objects_limit, objspace->rgengc.old_objects_limit); + SET(pages_swept_by_sweep_thread, objspace->profile.pages_swept_by_sweep_thread); + SET(pages_swept_by_sweep_thread_had_deferred_free_objects, objspace->profile.pages_swept_by_sweep_thread_had_deferred_free_objects); #if RGENGC_ESTIMATE_OLDMALLOC SET(oldmalloc_increase_bytes, objspace->malloc_counters.oldmalloc_increase); SET(oldmalloc_increase_bytes_limit, objspace->rgengc.oldmalloc_increase_limit); @@ -7794,7 +9420,7 @@ rb_gc_impl_stress_set(void *objspace_ptr, VALUE flag) { rb_objspace_t *objspace = objspace_ptr; - objspace->flags.gc_stressful = RTEST(flag); + objspace->gc_stressful = RTEST(flag); objspace->gc_stress_mode = flag; } @@ -8001,9 +9627,9 @@ atomic_sub_nounderflow(size_t *var, size_t sub) if (sub == 0) return; while (1) { - size_t val = *var; + size_t val = rbimpl_atomic_size_load(var, RBIMPL_ATOMIC_RELAXED); if (val < sub) sub = val; - if (RUBY_ATOMIC_SIZE_CAS(*var, val, val-sub) == val) break; + if (rbimpl_atomic_size_cas(var, val, val-sub, RBIMPL_ATOMIC_RELAXED, RBIMPL_ATOMIC_RELAXED) == val) break; } } @@ -8024,42 +9650,53 @@ objspace_malloc_gc_stress(rb_objspace_t *objspace) } } -static void -malloc_increase_commit(rb_objspace_t *objspace, size_t new_size, size_t old_size) +static size_t +malloc_increase_commit(rb_objspace_t *objspace, size_t new_size, size_t old_size, struct heap_page *sweep_thread_page) { if (new_size > old_size) { - RUBY_ATOMIC_SIZE_ADD(malloc_increase, new_size - old_size); + GC_ASSERT(!is_sweep_thread_p()); + size_t delta = new_size - old_size; + size_t old_val = rbimpl_atomic_size_fetch_add(&malloc_increase, delta, RBIMPL_ATOMIC_RELAXED); #if RGENGC_ESTIMATE_OLDMALLOC - RUBY_ATOMIC_SIZE_ADD(objspace->malloc_counters.oldmalloc_increase, new_size - old_size); + rbimpl_atomic_size_add(&objspace->malloc_counters.oldmalloc_increase, delta, RBIMPL_ATOMIC_RELAXED); #endif + return old_val + delta; } else { - atomic_sub_nounderflow(&malloc_increase, old_size - new_size); + size_t delta = old_size - new_size; + if (sweep_thread_page) { + sweep_thread_page->pre_freed_malloc_bytes += delta; + } + else { + atomic_sub_nounderflow(&malloc_increase, delta); #if RGENGC_ESTIMATE_OLDMALLOC - atomic_sub_nounderflow(&objspace->malloc_counters.oldmalloc_increase, old_size - new_size); + atomic_sub_nounderflow(&objspace->malloc_counters.oldmalloc_increase, delta); #endif + } + return 0; } } #if USE_MALLOC_INCREASE_LOCAL -static void +static size_t malloc_increase_local_flush(rb_objspace_t *objspace) { int delta = malloc_increase_local; - if (delta == 0) return; + if (delta == 0) return 0; malloc_increase_local = 0; if (delta > 0) { - malloc_increase_commit(objspace, (size_t)delta, 0); + return malloc_increase_commit(objspace, (size_t)delta, 0, NULL); } else { - malloc_increase_commit(objspace, 0, (size_t)(-delta)); + return malloc_increase_commit(objspace, 0, (size_t)(-delta), current_sweep_thread_page); } } #else -static void +static size_t malloc_increase_local_flush(rb_objspace_t *objspace) { + return 0; } #endif @@ -8078,6 +9715,8 @@ objspace_malloc_increase_report(rb_objspace_t *objspace, void *mem, size_t new_s static bool objspace_malloc_increase_body(rb_objspace_t *objspace, void *mem, size_t new_size, size_t old_size, enum memop_type type, bool gc_allowed) { + size_t current_malloc_increase = 0; + #if USE_MALLOC_INCREASE_LOCAL if (new_size < GC_MALLOC_INCREASE_LOCAL_THRESHOLD && old_size < GC_MALLOC_INCREASE_LOCAL_THRESHOLD) { @@ -8085,22 +9724,23 @@ objspace_malloc_increase_body(rb_objspace_t *objspace, void *mem, size_t new_siz if (malloc_increase_local >= GC_MALLOC_INCREASE_LOCAL_THRESHOLD || malloc_increase_local <= -GC_MALLOC_INCREASE_LOCAL_THRESHOLD) { - malloc_increase_local_flush(objspace); + current_malloc_increase = malloc_increase_local_flush(objspace); } } else { malloc_increase_local_flush(objspace); - malloc_increase_commit(objspace, new_size, old_size); + current_malloc_increase = malloc_increase_commit(objspace, new_size, old_size, current_sweep_thread_page); } #else - malloc_increase_commit(objspace, new_size, old_size); + current_malloc_increase = malloc_increase_commit(objspace, new_size, old_size, is_sweep_thread_p() ? current_sweep_thread_page : NULL); #endif if (type == MEMOP_TYPE_MALLOC && gc_allowed) { retry: - if (malloc_increase > malloc_limit && ruby_native_thread_p() && !dont_gc_val()) { + if (current_malloc_increase > malloc_limit && ruby_native_thread_p() && !dont_gc_val()) { if (ruby_thread_has_gvl_p() && is_lazy_sweeping(objspace)) { gc_rest(objspace); /* gc_rest can reduce malloc_increase */ + current_malloc_increase = rbimpl_atomic_size_load(&malloc_increase, RBIMPL_ATOMIC_RELAXED); goto retry; } garbage_collect_with_gvl(objspace, GPR_FLAG_MALLOC); @@ -8168,13 +9808,19 @@ objspace_malloc_prepare(rb_objspace_t *objspace, size_t size) } static bool -malloc_during_gc_p(rb_objspace_t *objspace) +bad_malloc_during_gc_p(rb_objspace_t *objspace) { /* malloc is not allowed during GC when we're not using multiple ractors * (since ractors can run while another thread is sweeping) and when we * have the GVL (since if we don't have the GVL, we'll try to acquire the * GVL which will block and ensure the other thread finishes GC). */ - return during_gc && !dont_gc_val() && !rb_gc_multi_ractor_p() && ruby_thread_has_gvl_p(); + if (is_sweep_thread_p()) { + fprintf(stderr, "ERROR: bad malloc/calloc call family during GC in sweep thread!\n"); + return true; + } + else { + return during_gc && !dont_gc_val() && !rb_gc_multi_ractor_p() && ruby_thread_has_gvl_p(); + } } static inline void * @@ -8233,10 +9879,16 @@ objspace_malloc_fixup(rb_objspace_t *objspace, void *mem, size_t size, bool gc_a static void check_malloc_not_in_gc(rb_objspace_t *objspace, const char *msg) { - if (RB_UNLIKELY(malloc_during_gc_p(objspace))) { - dont_gc_on(); - during_gc = false; - rb_bug("Cannot %s during GC", msg); + if (RB_UNLIKELY(bad_malloc_during_gc_p(objspace))) { + if (is_sweep_thread_p()) { + fprintf(stderr, "Bad %s in sweep thread, exiting\n", msg); + exit(EXIT_FAILURE); + } + else { + dont_gc_on(); + during_gc = false; + rb_bug("Cannot %s during GC", msg); + } } } @@ -8295,11 +9947,16 @@ rb_gc_impl_calloc(void *objspace_ptr, size_t size, bool gc_allowed) { rb_objspace_t *objspace = objspace_ptr; - if (RB_UNLIKELY(malloc_during_gc_p(objspace))) { - rb_warn("calloc during GC detected, this could cause crashes if it triggers another GC"); + if (RB_UNLIKELY(bad_malloc_during_gc_p(objspace))) { + if (is_sweep_thread_p()) { + fprintf(stderr, "calloc in sweep thread detected! This could cause crashes!\n"); + } + else { + rb_warn("calloc during GC detected, this could cause crashes if it triggers another GC"); #if RGENGC_CHECK_MODE || RUBY_DEBUG - rb_bug("Cannot calloc during GC"); + rb_bug("Cannot calloc during GC"); #endif + } } void *mem; @@ -9312,7 +10969,7 @@ gc_verify_compaction_references(int argc, VALUE* argv, VALUE self) */ objspace->heap_pages.allocatable_bytes = desired_compaction.required_slots[i] * heap->slot_size; while (objspace->heap_pages.allocatable_bytes > 0) { - heap_page_allocate_and_initialize(objspace, heap); + heap_page_allocate_and_initialize(objspace, heap, false); } /* * Step 3: Add two more pages so that the compact & sweep cursors will meet _after_ all objects @@ -9321,7 +10978,7 @@ gc_verify_compaction_references(int argc, VALUE* argv, VALUE self) pages_to_add += 2; for (; pages_to_add > 0; pages_to_add--) { - heap_page_allocate_and_initialize_force(objspace, heap); + heap_page_allocate_and_initialize_force(objspace, heap, false); } } } @@ -9350,29 +11007,51 @@ rb_gc_impl_objspace_free(void *objspace_ptr) { rb_objspace_t *objspace = objspace_ptr; - if (is_lazy_sweeping(objspace)) - rb_bug("lazy sweeping underway when freeing object space"); +// if (is_lazy_sweeping(objspace)) +// rb_bug("lazy sweeping underway when freeing object space"); + + rb_gc_stop_background_threads(objspace, "objspace_free"); + +#if PSWEEP_LOCK_STATS > 0 + /* Print lock contention statistics before freeing */ + print_lock_stats(); +#endif + +#if PSWEEP_COLLECT_TIMINGS > 0 + /* Print Ruby thread sweep time to stdout */ + double ruby_thread_sweep_cpu_time_ms = (double)(objspace->profile.ruby_thread_sweep_cpu_time_ns) / 1000000.0; + double ruby_thread_sweep_wall_time_ms = ((double)objspace->profile.ruby_thread_sweep_wall_time_ns) / 1000000.0; + fprintf(stderr, "\nSweep Time (CPU): %.3f ms (%.6f seconds)\n", ruby_thread_sweep_cpu_time_ms, ruby_thread_sweep_cpu_time_ms / 1000.0); + fprintf(stderr, "\nSweep Time (Wall): %.3f ms (%.6f seconds)\n", ruby_thread_sweep_wall_time_ms, ruby_thread_sweep_wall_time_ms / 1000.0); + fprintf(stderr, "\nSweeping enter count: %llu\n", sweeping_enter_count); + fprintf(stderr, "\nSweep continue count: %llu\n", sweep_continue_count); + fprintf(stderr, "\nSweep rest count: %llu\n", sweep_rest_count); +#endif free(objspace->profile.records); objspace->profile.records = NULL; for (size_t i = 0; i < rb_darray_size(objspace->heap_pages.sorted); i++) { - heap_page_free(objspace, rb_darray_get(objspace->heap_pages.sorted, i)); + heap_page_free(objspace, rb_darray_get(objspace->heap_pages.sorted, i), false); } rb_darray_free_without_gc(objspace->heap_pages.sorted); heap_pages_lomem = 0; heap_pages_himem = 0; + free_stack_chunks(&objspace->mark_stack); + mark_stack_free_cache(&objspace->mark_stack); + for (int i = 0; i < HEAP_COUNT; i++) { rb_heap_t *heap = &heaps[i]; + rb_native_mutex_destroy(&heap->swept_pages_lock); + rb_native_cond_destroy(&heap->sweep_page_cond); heap->total_pages = 0; heap->total_slots = 0; } - free_stack_chunks(&objspace->mark_stack); - mark_stack_free_cache(&objspace->mark_stack); - rb_darray_free_without_gc(objspace->weak_references); + rb_native_cond_destroy(&objspace->sweep_cond); + rb_native_mutex_destroy(&objspace->sweep_lock); free(objspace); } @@ -9416,8 +11095,11 @@ rb_gc_impl_before_fork(void *objspace_ptr) { rb_objspace_t *objspace = objspace_ptr; + wait_for_background_sweeping_to_finish(objspace, true, false, "impl_before_fork"); + objspace->fork_vm_lock_lev = RB_GC_VM_LOCK(); rb_gc_vm_barrier(); + GC_ASSERT(!during_gc); } void @@ -9428,8 +11110,44 @@ rb_gc_impl_after_fork(void *objspace_ptr, rb_pid_t pid) RB_GC_VM_UNLOCK(objspace->fork_vm_lock_lev); objspace->fork_vm_lock_lev = 0; + void fiber_pool_lock_reset(void); + fiber_pool_lock_reset(); + // TODO: reset the id_table lock in case of Ractors. + + GC_ASSERT(!during_gc); if (pid == 0) { /* child process */ + objspace->sweep_thread = 0; + rb_native_mutex_initialize(&objspace->sweep_lock); + rb_native_cond_initialize(&objspace->sweep_cond); + for (int i = 0; i < HEAP_COUNT; i++) { + rb_heap_t *heap = &heaps[i]; + + rb_native_mutex_initialize(&heap->swept_pages_lock); + rb_native_cond_initialize(&heap->sweep_page_cond); + heap->pre_sweeping_page = NULL; + heap->background_sweep_steps = heap->foreground_sweep_steps; + } rb_gc_ractor_newobj_cache_foreach(gc_ractor_newobj_cache_clear, NULL); + + sweep_lock_owner = 0; + /* Start the sweep thread after fork */ + objspace->sweep_thread_running = true; + objspace->sweep_thread_sweep_requested = false; + objspace->sweep_thread_sweeping = false; + objspace->sweep_thread_waiting_request = false; + GC_ASSERT(!objspace->background_sweep_mode); + GC_ASSERT(!objspace->background_sweep_abort); + GC_ASSERT(!objspace->background_sweep_restart_heaps); + pthread_create(&objspace->sweep_thread, NULL, gc_sweep_thread_func, objspace); + GET_VM()->gc.sweep_thread = objspace->sweep_thread; + sweep_lock_lock(&objspace->sweep_lock); + // The thread needs to be ready to accept sweep requests. + while (!objspace->sweep_thread_waiting_request) { + sweep_lock_unlock(&objspace->sweep_lock); + usleep(50); + sweep_lock_lock(&objspace->sweep_lock); + } + sweep_lock_unlock(&objspace->sweep_lock); } } @@ -9516,6 +11234,8 @@ rb_gc_impl_objspace_init(void *objspace_ptr) slot_div_magics[i] = (uint32_t)((uint64_t)UINT32_MAX / heap->slot_size + 1); ccan_list_head_init(&heap->pages); + rb_native_mutex_initialize(&heap->swept_pages_lock); + rb_native_cond_initialize(&heap->sweep_page_cond); } init_size_to_heap_idx(); @@ -9536,6 +11256,12 @@ rb_gc_impl_objspace_init(void *objspace_ptr) objspace->profile.invoke_time = getrusage_time(); finalizer_table = st_init_numtable(); + + rb_native_mutex_initialize(&objspace->sweep_lock); + rb_native_cond_initialize(&objspace->sweep_cond); + objspace->sweep_thread_running = true; + pthread_create(&objspace->sweep_thread, NULL, gc_sweep_thread_func, objspace); + GET_VM()->gc.sweep_thread = objspace->sweep_thread; } void diff --git a/gc/gc.h b/gc/gc.h index 469a4902f03365..44ff018aa123e7 100644 --- a/gc/gc.h +++ b/gc/gc.h @@ -81,7 +81,8 @@ MODULAR_GC_FN void *rb_gc_get_objspace(void); MODULAR_GC_FN void rb_gc_run_obj_finalizer(VALUE objid, long count, VALUE (*callback)(long i, void *data), void *data); MODULAR_GC_FN void rb_gc_set_pending_interrupt(void); MODULAR_GC_FN void rb_gc_unset_pending_interrupt(void); -MODULAR_GC_FN void rb_gc_obj_free_vm_weak_references(VALUE obj); +MODULAR_GC_FN bool rb_gc_obj_free_vm_weak_references(VALUE obj); +MODULAR_GC_FN bool rb_gc_obj_free_concurrency_safe_vm_weak_references(VALUE obj); MODULAR_GC_FN bool rb_gc_obj_free(void *objspace, VALUE obj); MODULAR_GC_FN void rb_gc_save_machine_context(void); MODULAR_GC_FN void rb_gc_mark_roots(void *objspace, const char **categoryp); diff --git a/hash.c b/hash.c index 773df7e78d8c7f..0df553db67853b 100644 --- a/hash.c +++ b/hash.c @@ -6905,7 +6905,7 @@ static const rb_data_type_t env_data_type = { NULL, NULL, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED, + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; /* diff --git a/id_table.c b/id_table.c index 76841d0cff8d07..c15867cc8715e1 100644 --- a/id_table.c +++ b/id_table.c @@ -349,7 +349,7 @@ const rb_data_type_t rb_managed_id_table_type = { .dfree = managed_id_table_free, .dsize = managed_id_table_memsize, }, - .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; static inline struct rb_id_table * diff --git a/include/ruby/atomic.h b/include/ruby/atomic.h index fcc48f532c89ba..32371953f4037c 100644 --- a/include/ruby/atomic.h +++ b/include/ruby/atomic.h @@ -36,6 +36,7 @@ #if RBIMPL_COMPILER_IS(MSVC) # pragma intrinsic(_InterlockedOr) +# pragma intrinsic(_InterlockedAnd) #elif defined(__sun) && defined(HAVE_ATOMIC_H) # include #endif @@ -140,6 +141,48 @@ typedef unsigned int rb_atomic_t; */ #define RUBY_ATOMIC_OR(var, val) rbimpl_atomic_or(&(var), (val), RBIMPL_ATOMIC_SEQ_CST) +/** + * Atomically replaces the value pointed by `var` with the result of + * bitwise AND between `val` and the old value of `var`. + * + * @param var A variable of ::rb_atomic_t. + * @param val Value to mask. + * @return void + * @post `var` holds `var & val`. + */ +#define RUBY_ATOMIC_AND(var, val) rbimpl_atomic_and(&(var), (val), RBIMPL_ATOMIC_SEQ_CST) + +/** + * Atomically replaces the value pointed by `var` with the result of + * bitwise AND between `val` and the old value of `var`. + * + * @param var A variable of ::rb_atomic_t. + * @param val Value to mask. + * @return What was stored in `var` before the operation. + * @post `var` holds `var & val`. + */ +#define RUBY_ATOMIC_FETCH_AND(var, val) rbimpl_atomic_fetch_and(&(var), (val), RBIMPL_ATOMIC_SEQ_CST) + +/** + * Identical to #RUBY_ATOMIC_OR, except it expects its arguments are ::VALUE. + * + * @param var A variable of ::VALUE. + * @param val Value to mix. + * @return void + * @post `var` holds `var | val`. + */ +#define RUBY_ATOMIC_VALUE_OR(var, val) rbimpl_atomic_size_or((volatile size_t *)&(var), (size_t)(val), RBIMPL_ATOMIC_SEQ_CST) + +/** + * Identical to #RUBY_ATOMIC_AND, except it expects its arguments are ::VALUE. + * + * @param var A variable of ::VALUE. + * @param val Value to mask. + * @return void + * @post `var` holds `var & val`. + */ +#define RUBY_ATOMIC_VALUE_AND(var, val) rbimpl_atomic_size_and((volatile size_t *)&(var), (size_t)(val), RBIMPL_ATOMIC_SEQ_CST) + /** * Atomically replaces the value pointed by `var` with `val`. This is just an * assignment, but you can additionally know the previous value. @@ -559,6 +602,76 @@ rbimpl_atomic_size_add(volatile size_t *ptr, size_t val, int memory_order) #endif } +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline void +rbimpl_atomic_size_or(volatile size_t *ptr, size_t val, int memory_order) +{ + (void)memory_order; +#if 0 + +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) + __atomic_or_fetch(ptr, val, memory_order); + +#elif defined(HAVE_GCC_SYNC_BUILTINS) + __sync_or_and_fetch(ptr, val); + +#elif defined(_WIN64) + InterlockedOr64(ptr, val); + +#elif defined(__sun) && defined(HAVE_ATOMIC_H) && (defined(_LP64) || defined(_I32LPx)) + atomic_or_ulong(ptr, val); + +#elif defined(_WIN32) || (defined(__sun) && defined(HAVE_ATOMIC_H)) + RBIMPL_STATIC_ASSERT(size_of_size_t, sizeof *ptr == sizeof(rb_atomic_t)); + + volatile rb_atomic_t *const tmp = RBIMPL_CAST((volatile rb_atomic_t *)ptr); + rbimpl_atomic_or(tmp, val, memory_order); + +#elif defined(HAVE_STDATOMIC_H) + atomic_fetch_or_explicit((_Atomic volatile size_t *)ptr, val, memory_order); + +#else +# error Unsupported platform. +#endif +} + +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline void +rbimpl_atomic_size_and(volatile size_t *ptr, size_t val, int memory_order) +{ + (void)memory_order; +#if 0 + +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) + __atomic_and_fetch(ptr, val, memory_order); + +#elif defined(HAVE_GCC_SYNC_BUILTINS) + __sync_and_and_fetch(ptr, val); + +#elif defined(_WIN64) + InterlockedAnd64(ptr, val); + +#elif defined(__sun) && defined(HAVE_ATOMIC_H) && (defined(_LP64) || defined(_I32LPx)) + atomic_and_ulong(ptr, val); + +#elif defined(_WIN32) || (defined(__sun) && defined(HAVE_ATOMIC_H)) + RBIMPL_STATIC_ASSERT(size_of_size_t, sizeof *ptr == sizeof(rb_atomic_t)); + + volatile rb_atomic_t *const tmp = RBIMPL_CAST((volatile rb_atomic_t *)ptr); + rbimpl_atomic_and(tmp, val, memory_order); + +#elif defined(HAVE_STDATOMIC_H) + atomic_fetch_and_explicit((_Atomic volatile size_t *)ptr, val, memory_order); + +#else +# error Unsupported platform. +#endif +} + RBIMPL_ATTR_ARTIFICIAL() RBIMPL_ATTR_NOALIAS() RBIMPL_ATTR_NONNULL((1)) @@ -804,6 +917,70 @@ rbimpl_atomic_or(volatile rb_atomic_t *ptr, rb_atomic_t val, int memory_order) #endif } +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline rb_atomic_t +rbimpl_atomic_fetch_and(volatile rb_atomic_t *ptr, rb_atomic_t val, int memory_order) +{ + (void)memory_order; +#if 0 + +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) + return __atomic_fetch_and(ptr, val, memory_order); + +#elif defined(HAVE_GCC_SYNC_BUILTINS) + return __sync_fetch_and_and(ptr, val); + +#elif RBIMPL_COMPILER_IS(MSVC) + return _InterlockedAnd(ptr, val); + +#elif defined(__sun) && defined(HAVE_ATOMIC_H) + /* TODO: Solaris atomic_and_uint does not return the old value. + * Using CAS loop as fallback. */ + rb_atomic_t old = *ptr; + while (atomic_cas_uint(ptr, old, old & val) != old) { + old = *ptr; + } + return old; + +#elif !defined(_WIN32) && defined(HAVE_STDATOMIC_H) + return atomic_fetch_and_explicit((_Atomic volatile rb_atomic_t *)ptr, val, memory_order); + +#else +# error Unsupported platform. +#endif +} + +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline void +rbimpl_atomic_and(volatile rb_atomic_t *ptr, rb_atomic_t val, int memory_order) +{ + (void)memory_order; +#if 0 + +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) + __atomic_and_fetch(ptr, val, memory_order); + +#elif defined(HAVE_GCC_SYNC_BUILTINS) + __sync_and_and_fetch(ptr, val); + +#elif RBIMPL_COMPILER_IS(MSVC) + _InterlockedAnd(ptr, val); + +#elif defined(__sun) && defined(HAVE_ATOMIC_H) + atomic_and_uint(ptr, val); + +#elif !defined(_WIN32) && defined(HAVE_STDATOMIC_H) + atomic_fetch_and_explicit((_Atomic volatile rb_atomic_t *)ptr, val, memory_order); + +#else +# error Unsupported platform. +#endif +} + RBIMPL_ATTR_ARTIFICIAL() RBIMPL_ATTR_NOALIAS() RBIMPL_ATTR_NONNULL((1)) @@ -869,6 +1046,15 @@ rbimpl_atomic_size_exchange(volatile size_t *ptr, size_t val, int memory_order) #endif } +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline size_t +rbimpl_atomic_size_load(volatile size_t *ptr, int memory_order) +{ + return rbimpl_atomic_size_fetch_add(ptr, 0, memory_order); +} + RBIMPL_ATTR_ARTIFICIAL() RBIMPL_ATTR_NOALIAS() RBIMPL_ATTR_NONNULL((1)) diff --git a/include/ruby/internal/core/rtypeddata.h b/include/ruby/internal/core/rtypeddata.h index 22bf46eb031bba..204cf0b539c689 100644 --- a/include/ruby/internal/core/rtypeddata.h +++ b/include/ruby/internal/core/rtypeddata.h @@ -120,6 +120,7 @@ static inline VALUE rbimpl_check_external_typeddata(VALUE obj); * Macros to see if each corresponding flag is defined. */ #define RUBY_TYPED_FREE_IMMEDIATELY RUBY_TYPED_FREE_IMMEDIATELY +#define RUBY_TYPED_CONCURRENT_FREE_SAFE RUBY_TYPED_CONCURRENT_FREE_SAFE #define RUBY_TYPED_FROZEN_SHAREABLE RUBY_TYPED_FROZEN_SHAREABLE #define RUBY_TYPED_WB_PROTECTED RUBY_TYPED_WB_PROTECTED #define RUBY_TYPED_EMBEDDABLE RUBY_TYPED_EMBEDDABLE @@ -164,6 +165,14 @@ rbimpl_typeddata_flags { */ RUBY_TYPED_EMBEDDABLE = 2, + /** + * This flag indicates that the dfree function for this type is safe to + * call concurrently from a background sweep thread. When set, the GC + * may free objects of this type without holding the GVL. Only set this + * flag if the dfree function does not access shared mutable state. + */ + RUBY_TYPED_CONCURRENT_FREE_SAFE = 4, + /** * This flag has something to do with Ractor. Multiple Ractors run without * protecting each other. Sharing an object among Ractors is basically diff --git a/include/ruby/internal/intern/variable.h b/include/ruby/internal/intern/variable.h index 479c3950c1e373..d983a0b0ebc30e 100644 --- a/include/ruby/internal/intern/variable.h +++ b/include/ruby/internal/intern/variable.h @@ -214,7 +214,7 @@ void rb_alias_variable(ID dst, ID src); * This just destroys the given object. @shyouhei has no idea why extension * libraries should use this API. */ -void rb_free_generic_ivar(VALUE obj); +bool rb_free_generic_ivar(VALUE obj); /** * Identical to rb_iv_get(), except it accepts the name as an ::ID instead of a diff --git a/include/ruby/internal/value_type.h b/include/ruby/internal/value_type.h index b47d8afb97b2a7..88c9027f7ee537 100644 --- a/include/ruby/internal/value_type.h +++ b/include/ruby/internal/value_type.h @@ -81,6 +81,7 @@ #define T_TRUE RUBY_T_TRUE /**< @old{RUBY_T_TRUE} */ #define T_UNDEF RUBY_T_UNDEF /**< @old{RUBY_T_UNDEF} */ #define T_ZOMBIE RUBY_T_ZOMBIE /**< @old{RUBY_T_ZOMBIE} */ +#define T_LAST RUBY_T_MOVED #define BUILTIN_TYPE RB_BUILTIN_TYPE /**< @old{RB_BUILTIN_TYPE} */ #define DYNAMIC_SYM_P RB_DYNAMIC_SYM_P /**< @old{RB_DYNAMIC_SYM_P} */ diff --git a/internal/concurrent_set.h b/internal/concurrent_set.h index 76cbefab0413ec..ce0b366a3cdc66 100644 --- a/internal/concurrent_set.h +++ b/internal/concurrent_set.h @@ -11,11 +11,11 @@ struct rb_concurrent_set_funcs { void (*free)(VALUE key); }; -VALUE rb_concurrent_set_new(const struct rb_concurrent_set_funcs *funcs, int capacity); +VALUE rb_concurrent_set_new(const struct rb_concurrent_set_funcs *funcs, int capacity, int key_type); rb_atomic_t rb_concurrent_set_size(VALUE set_obj); VALUE rb_concurrent_set_find(VALUE *set_obj_ptr, VALUE key); VALUE rb_concurrent_set_find_or_insert(VALUE *set_obj_ptr, VALUE key, void *data); -VALUE rb_concurrent_set_delete_by_identity(VALUE set_obj, VALUE key); +VALUE rb_concurrent_set_delete_by_identity(VALUE *set_obj_ptr, VALUE key); void rb_concurrent_set_foreach_with_replace(VALUE set_obj, int (*callback)(VALUE *key, void *data), void *data); #endif diff --git a/io.c b/io.c index ab04d8df22864c..596f7db352bee4 100644 --- a/io.c +++ b/io.c @@ -10017,7 +10017,7 @@ argf_memsize(const void *ptr) static const rb_data_type_t argf_type = { "ARGF", {argf_mark_and_move, RUBY_TYPED_DEFAULT_FREE, argf_memsize, argf_mark_and_move}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static inline void diff --git a/io_buffer.c b/io_buffer.c index 3c7b3eb16a756b..684bb8e1c53717 100644 --- a/io_buffer.c +++ b/io_buffer.c @@ -332,7 +332,7 @@ static const rb_data_type_t rb_io_buffer_type = { .dcompact = rb_io_buffer_type_compact, }, .data = NULL, - .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; static inline enum rb_io_buffer_flags diff --git a/iseq.c b/iseq.c index 6f87b2df3e085b..2c4ecb1caedf4a 100644 --- a/iseq.c +++ b/iseq.c @@ -1606,7 +1606,7 @@ static const rb_data_type_t iseqw_data_type = { iseqw_memsize, iseqw_mark_and_move, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_WB_PROTECTED + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_WB_PROTECTED|RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE @@ -2846,7 +2846,7 @@ iseq_inspect(const rb_iseq_t *iseq) static const rb_data_type_t tmp_set = { "tmpset", {(void (*)(void *))rb_mark_set, (void (*)(void *))st_free_table, 0, 0,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE @@ -3324,7 +3324,7 @@ cdhash_each(VALUE key, VALUE value, VALUE ary) static const rb_data_type_t label_wrapper = { "label_wrapper", {(void (*)(void *))rb_mark_tbl, (void (*)(void *))st_free_table, 0, 0,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; #define DECL_ID(name) \ diff --git a/marshal.c b/marshal.c index 967855529e6d76..c592f7fe387b4e 100644 --- a/marshal.c +++ b/marshal.c @@ -237,7 +237,7 @@ memsize_dump_arg(const void *ptr) static const rb_data_type_t dump_arg_data = { "dump_arg", {mark_dump_arg, free_dump_arg, memsize_dump_arg,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE @@ -1317,7 +1317,7 @@ memsize_load_arg(const void *ptr) static const rb_data_type_t load_arg_data = { "load_arg", {mark_load_arg, free_load_arg, memsize_load_arg,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; #define r_entry(v, arg) r_entry0((v), (arg)->data->num_entries, (arg)) @@ -2626,7 +2626,7 @@ static const rb_data_type_t marshal_compat_type = { .dsize = marshal_compat_table_memsize, .dcompact = marshal_compat_table_mark_and_move, }, - .flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY, + .flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; static st_table * diff --git a/memory_view.c b/memory_view.c index 9f5d6715804b22..f360c6c88091de 100644 --- a/memory_view.c +++ b/memory_view.c @@ -65,7 +65,7 @@ const rb_data_type_t rb_memory_view_exported_object_registry_data_type = { exported_object_registry_free, 0, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static int @@ -124,7 +124,7 @@ static const rb_data_type_t memory_view_entry_data_type = { 0, 0, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; /* Register memory view functions for the given class */ diff --git a/parse.y b/parse.y index bcff7918bfa4c3..170ed08a5e87a7 100644 --- a/parse.y +++ b/parse.y @@ -2773,7 +2773,7 @@ rb_parser_ary_free(rb_parser_t *p, rb_parser_ary_t *ary) %type if_tail opt_else case_body case_args cases opt_rescue exc_list exc_var opt_ensure %type args arg_splat call_args opt_call_args %type paren_args opt_paren_args -%type args_tail block_args_tail block_args-opt_tail +%type args_tail block_args_tail %type command_args aref_args %type opt_block_arg block_arg %type var_ref var_lhs @@ -2798,7 +2798,7 @@ rb_parser_ary_free(rb_parser_t *p, rb_parser_ary_t *ary) %type p_value p_primitive p_variable p_var_ref p_expr_ref p_const %type p_kwargs p_kwarg p_kw %type keyword_variable user_variable sym operation2 operation3 -%type cname fname op f_rest_arg f_block_arg opt_f_block_arg f_norm_arg f_bad_arg +%type cname fname op f_rest_arg f_block_arg opt_comma f_norm_arg f_bad_arg %type f_kwrest f_label f_arg_asgn call_op call_op2 reswords relop dot_or_colon %type p_kwrest p_kwnorest p_any_kwrest p_kw_label %type f_no_kwarg f_any_kwrest args_forward excessed_comma nonlocal_var def_name @@ -2923,18 +2923,18 @@ rb_parser_ary_free(rb_parser_t *p, rb_parser_ary_t *ary) } ; -%rule args_tail_basic(value) - : f_kwarg(value) ',' f_kwrest opt_f_block_arg +%rule args_tail_basic(value, trailing) + : f_kwarg(value) ',' f_kwrest opt_f_block_arg(trailing) { $$ = new_args_tail(p, $1, $3, $4, &@3); /*% ripper: [$:1, $:3, $:4] %*/ } - | f_kwarg(value) opt_f_block_arg + | f_kwarg(value) opt_f_block_arg(trailing) { $$ = new_args_tail(p, $1, 0, $2, &@1); /*% ripper: [$:1, Qnil, $:2] %*/ } - | f_any_kwrest opt_f_block_arg + | f_any_kwrest opt_f_block_arg(trailing) { $$ = new_args_tail(p, 0, $1, $2, &@1); /*% ripper: [Qnil, $:1, $:2] %*/ @@ -2946,6 +2946,15 @@ rb_parser_ary_free(rb_parser_t *p, rb_parser_ary_t *ary) } ; +%rule opt_f_block_arg(trailing) + : ',' f_block_arg + { + $$ = $2; + /*% ripper: $:2 %*/ + } + | trailing + ; + %rule def_endless_method(bodystmt) : defn_head[head] f_opt_paren_args[args] '=' bodystmt { @@ -3087,13 +3096,13 @@ rb_parser_ary_free(rb_parser_t *p, rb_parser_ary_t *ary) } ; -%rule opt_args_tail(tail) +%rule opt_args_tail(tail, trailing) : ',' tail { $$ = $tail; /*% ripper: $:tail %*/ } - | /* none */ + | trailing { $$ = new_empty_args_tail(p, &@$); /*% ripper: [Qnil, Qnil, Qnil] %*/ @@ -4973,10 +4982,7 @@ f_any_kwrest : f_kwrest f_eq : {p->ctxt.in_argdef = 0;} '='; -block_args_tail : args_tail_basic(primary_value) - ; - -block_args-opt_tail : opt_args_tail(block_args_tail) +block_args_tail : args_tail_basic(primary_value, none) ; excessed_comma : ',' @@ -4987,14 +4993,14 @@ excessed_comma : ',' } ; -block_param : args-list(primary_value, block_args-opt_tail) +block_param : args-list(primary_value, opt_args_tail(block_args_tail, none)) | f_arg[pre] excessed_comma { $$ = new_empty_args_tail(p, &@excessed_comma); $$ = new_args(p, $pre, 0, $excessed_comma, 0, $$, &@$); /*% ripper: params!($:pre, Qnil, $:excessed_comma, Qnil, Qnil, Qnil, Qnil) %*/ } - | f_arg[pre] opt_args_tail(block_args_tail)[tail] + | f_arg[pre] opt_args_tail(block_args_tail, none)[tail] { $$ = new_args(p, $pre, 0, 0, 0, $tail, &@$); /*% ripper: params!($:pre, Qnil, Qnil, Qnil, *$:tail[0..2]) %*/ @@ -6240,7 +6246,7 @@ f_arglist : f_paren_args } ; -args_tail : args_tail_basic(arg_value) +args_tail : args_tail_basic(arg_value, opt_comma) | args_forward { add_forwarding_args(p); @@ -6250,7 +6256,7 @@ args_tail : args_tail_basic(arg_value) } ; -largs_tail : args_tail_basic(arg_value) +largs_tail : args_tail_basic(arg_value, none) | args_forward { yyerror1(&@args_forward, "unexpected ... in lambda argument"); @@ -6331,14 +6337,9 @@ largs_tail : args_tail_basic(arg_value) } ; -%rule f_args-opt_tail(tail) - : opt_args_tail(tail) - ; - - -%rule f_args-list(tail) - : args-list(arg_value, f_args-opt_tail(tail)) - | f_arg[pre] opt_args_tail(tail)[tail] +%rule f_args-list(tail, trailing) + : args-list(arg_value, opt_args_tail(tail, trailing)) + | f_arg[pre] opt_args_tail(tail, trailing)[tail] { $$ = new_args(p, $pre, 0, 0, 0, $tail, &@$); /*% ripper: params!($:pre, Qnil, Qnil, Qnil, *$:tail[0..2]) %*/ @@ -6347,10 +6348,10 @@ largs_tail : args_tail_basic(arg_value) | f_empty_arg ; -f_args : f_args-list(args_tail) +f_args : f_args-list(args_tail, opt_comma) ; -f_largs : f_args-list(largs_tail) +f_largs : f_args-list(largs_tail, none) ; args_forward : tBDOT3 @@ -6538,12 +6539,11 @@ f_block_arg : blkarg_mark tIDENTIFIER } ; -opt_f_block_arg : ',' f_block_arg +opt_comma : ','? { - $$ = $2; - /*% ripper: $:2 %*/ + $$ = 0; + /*% ripper: Qnil %*/ } - | none ; diff --git a/proc.c b/proc.c index 99fb880881b9d8..1550b9ad8c5ec1 100644 --- a/proc.c +++ b/proc.c @@ -106,7 +106,7 @@ const rb_data_type_t ruby_proc_data_type = { proc_memsize, proc_mark_and_move, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE }; #define proc_data_type ruby_proc_data_type @@ -285,7 +285,7 @@ const rb_data_type_t ruby_binding_data_type = { binding_memsize, binding_mark_and_move, }, - 0, 0, RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; VALUE @@ -1795,7 +1795,7 @@ static const rb_data_type_t method_data_type = { NULL, // No external memory to report, bm_mark_and_move, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_FROZEN_SHAREABLE_NO_REC + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_FROZEN_SHAREABLE_NO_REC | RUBY_TYPED_CONCURRENT_FREE_SAFE }; VALUE diff --git a/process.c b/process.c index 126e36ee8d0d2a..be912be27cc754 100644 --- a/process.c +++ b/process.c @@ -597,7 +597,7 @@ static const rb_data_type_t rb_process_status_type = { .dfree = RUBY_DEFAULT_FREE, .dsize = NULL, }, - .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; static VALUE @@ -1582,8 +1582,6 @@ before_fork_ruby(void) static void after_fork_ruby(rb_pid_t pid) { - rb_gc_after_fork(pid); - if (pid == 0) { // child clear_pid_cache(); @@ -1593,6 +1591,8 @@ after_fork_ruby(rb_pid_t pid) // parent after_exec(); } + + rb_gc_after_fork(pid); } #endif @@ -1740,7 +1740,7 @@ memsize_exec_arg(const void *ptr) static const rb_data_type_t exec_arg_data_type = { "exec_arg", {mark_exec_arg, RUBY_TYPED_DEFAULT_FREE, memsize_exec_arg}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; #ifdef _WIN32 @@ -4131,7 +4131,7 @@ rb_fork_ruby(int *status) struct child_handler_disabler_state old; do { - prefork(); + prefork(); // NOTE: can context switch before_fork_ruby(); rb_thread_acquire_fork_lock(); diff --git a/ractor.c b/ractor.c index 4726cf107bfb03..3deef5f6719410 100644 --- a/ractor.c +++ b/ractor.c @@ -321,7 +321,7 @@ static const rb_data_type_t ractor_data_type = { ractor_memsize, NULL, // update }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY /* | RUBY_TYPED_WB_PROTECTED */ + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE /* | RUBY_TYPED_WB_PROTECTED */ }; bool @@ -2450,7 +2450,7 @@ static const rb_data_type_t cross_ractor_require_data_type = { NULL, // memsize NULL, // compact }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_DECL_MARKING | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_DECL_MARKING | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE diff --git a/ractor_core.h b/ractor_core.h index c692ebbbbfc638..8f53e599bbc3f0 100644 --- a/ractor_core.h +++ b/ractor_core.h @@ -5,6 +5,8 @@ #include "id_table.h" #include "vm_debug.h" +// FIXME: parallel sweep +#define RACTOR_CHECK_MODE 0 #ifndef RACTOR_CHECK_MODE #define RACTOR_CHECK_MODE (VM_CHECK_MODE || RUBY_DEBUG) && (SIZEOF_UINT64_T == SIZEOF_VALUE) #endif diff --git a/ractor_sync.c b/ractor_sync.c index 44c84ded92696f..405a7f8248eb08 100644 --- a/ractor_sync.c +++ b/ractor_sync.c @@ -36,7 +36,7 @@ static const rb_data_type_t ractor_port_data_type = { NULL, // memsize NULL, // update }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_EMBEDDABLE, + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_EMBEDDABLE, }; static st_data_t diff --git a/random.c b/random.c index b6c96f1b4d25ff..6795165962fe86 100644 --- a/random.c +++ b/random.c @@ -272,7 +272,7 @@ const rb_data_type_t rb_random_data_type = { random_free, random_memsize, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE }; #define random_mt_mark rb_random_mark @@ -293,7 +293,7 @@ static const rb_data_type_t random_mt_type = { }, &rb_random_data_type, (void *)&random_mt_if, - RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED + RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static rb_random_t * @@ -578,7 +578,7 @@ release_crypt(void *p) static const rb_data_type_t crypt_prov_type = { "HCRYPTPROV", {0, release_crypt,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static int diff --git a/regexec.c b/regexec.c index 3210c7cc1b5603..73f49d2963ad5a 100644 --- a/regexec.c +++ b/regexec.c @@ -905,16 +905,13 @@ onig_region_resize(OnigRegion* region, int n) if (n < ONIG_NREGION) n = ONIG_NREGION; + size_t region_half_sz = n * sizeof(OnigPosition); if (region->allocated == 0) { - region->beg = (OnigPosition* )xmalloc(n * sizeof(OnigPosition)); + region->beg = (OnigPosition* )xmalloc(region_half_sz * 2); if (region->beg == 0) return ONIGERR_MEMORY; - region->end = (OnigPosition* )xmalloc(n * sizeof(OnigPosition)); - if (region->end == 0) { - xfree(region->beg); - return ONIGERR_MEMORY; - } + region->end = (OnigPosition* )region->beg + n; region->allocated = n; } @@ -922,20 +919,13 @@ onig_region_resize(OnigRegion* region, int n) OnigPosition *tmp; region->allocated = 0; - tmp = (OnigPosition* )xrealloc(region->beg, n * sizeof(OnigPosition)); + tmp = (OnigPosition* )xrealloc(region->beg, region_half_sz * 2); if (tmp == 0) { xfree(region->beg); - xfree(region->end); return ONIGERR_MEMORY; } region->beg = tmp; - tmp = (OnigPosition* )xrealloc(region->end, n * sizeof(OnigPosition)); - if (tmp == 0) { - xfree(region->beg); - xfree(region->end); - return ONIGERR_MEMORY; - } - region->end = tmp; + region->end = (OnigPosition*)region->beg + n; region->allocated = n; } @@ -998,7 +988,6 @@ onig_region_free(OnigRegion* r, int free_self) if (r) { if (r->allocated > 0) { xfree(r->beg); - xfree(r->end); } #ifdef USE_CAPTURE_HISTORY history_root_free(r); diff --git a/ruby_parser.c b/ruby_parser.c index 267f619bf9cd18..d58d69de535f59 100644 --- a/ruby_parser.c +++ b/ruby_parser.c @@ -508,7 +508,7 @@ static const rb_data_type_t ruby_parser_data_type = { parser_free, parser_memsize, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; #ifdef UNIVERSAL_PARSER @@ -736,7 +736,7 @@ static const rb_data_type_t ast_data_type = { ast_free, NULL, // No dsize() because this object does not appear in ObjectSpace. }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE diff --git a/scheduler.c b/scheduler.c index c2f370a22aee4e..d542702d45b86d 100644 --- a/scheduler.c +++ b/scheduler.c @@ -90,7 +90,7 @@ static const rb_data_type_t blocking_operation_data_type = { RUBY_DEFAULT_FREE, blocking_operation_memsize, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE }; /* diff --git a/set.c b/set.c index 6bfded02a414ee..fc826aa5f6eeee 100644 --- a/set.c +++ b/set.c @@ -186,7 +186,7 @@ static const rb_data_type_t set_data_type = { .dsize = set_size, .dcompact = set_update_references, }, - .flags = RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE + .flags = RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static inline set_table * diff --git a/shape.c b/shape.c index 90036722f10026..bd9c2fc089c3b3 100644 --- a/shape.c +++ b/shape.c @@ -322,7 +322,7 @@ static const rb_data_type_t shape_tree_type = { .dsize = shape_tree_memsize, .dcompact = shape_tree_mark_and_move, }, - .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; diff --git a/string.c b/string.c index 55a229f37c3b5c..a6b6427bc1f5d2 100644 --- a/string.c +++ b/string.c @@ -549,7 +549,7 @@ static const struct rb_concurrent_set_funcs fstring_concurrent_set_funcs = { void Init_fstring_table(void) { - fstring_table_obj = rb_concurrent_set_new(&fstring_concurrent_set_funcs, 8192); + fstring_table_obj = rb_concurrent_set_new(&fstring_concurrent_set_funcs, 8192, T_STRING); rb_gc_register_address(&fstring_table_obj); } @@ -593,13 +593,11 @@ rb_obj_is_fstring_table(VALUE obj) void rb_gc_free_fstring(VALUE obj) { - ASSERT_vm_locking_with_barrier(); - RUBY_ASSERT(FL_TEST(obj, RSTRING_FSTR)); RUBY_ASSERT(OBJ_FROZEN(obj)); RUBY_ASSERT(!FL_TEST(obj, STR_SHARED)); - rb_concurrent_set_delete_by_identity(fstring_table_obj, obj); + rb_concurrent_set_delete_by_identity(&fstring_table_obj, obj); RB_DEBUG_COUNTER_INC(obj_str_fstr); @@ -7835,7 +7833,7 @@ mapping_buffer_free(void *p) static const rb_data_type_t mapping_buffer_type = { "mapping_buffer", {0, mapping_buffer_free,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE diff --git a/symbol.c b/symbol.c index d3d7e13ea43626..e7a74b2550e6ce 100644 --- a/symbol.c +++ b/symbol.c @@ -233,17 +233,19 @@ static VALUE dup_string_for_create(VALUE str) { rb_encoding *enc = rb_enc_get(str); + VALUE new_str; - str = rb_enc_str_new(RSTRING_PTR(str), RSTRING_LEN(str), enc); + new_str = rb_enc_str_new(RSTRING_PTR(str), RSTRING_LEN(str), enc); + RB_GC_GUARD(str); rb_encoding *ascii = rb_usascii_encoding(); - if (enc != ascii && sym_check_asciionly(str, false)) { - rb_enc_associate(str, ascii); + if (enc != ascii && sym_check_asciionly(new_str, false)) { + rb_enc_associate(new_str, ascii); } - OBJ_FREEZE(str); + OBJ_FREEZE(new_str); - str = rb_fstring(str); - return str; + new_str = rb_fstring(new_str); + return new_str; } static int @@ -338,6 +340,7 @@ sym_set_create(VALUE sym, void *data) RB_VM_LOCKING() { set_id_entry(&ruby_global_symbols, rb_id_to_serial(STATIC_SYM2ID(static_sym)), str, static_sym); } + RB_GC_GUARD(str); return sym_set_static_sym_tag(new_static_sym_entry); } @@ -415,7 +418,7 @@ Init_sym(void) { rb_symbols_t *symbols = &ruby_global_symbols; - symbols->sym_set = rb_concurrent_set_new(&sym_set_funcs, 1024); + symbols->sym_set = rb_concurrent_set_new(&sym_set_funcs, 1024, T_SYMBOL); symbols->ids = rb_ary_hidden_new(0); Init_op_tbl(); @@ -950,7 +953,7 @@ rb_gc_free_dsymbol(VALUE sym) VALUE str = RSYMBOL(sym)->fstr; if (str) { - rb_concurrent_set_delete_by_identity(ruby_global_symbols.sym_set, sym); + rb_concurrent_set_delete_by_identity(&ruby_global_symbols.sym_set, sym); RSYMBOL(sym)->fstr = 0; } diff --git a/test/-ext-/tracepoint/test_tracepoint.rb b/test/-ext-/tracepoint/test_tracepoint.rb index 603fd01fd5c7e6..4805b323baa9af 100644 --- a/test/-ext-/tracepoint/test_tracepoint.rb +++ b/test/-ext-/tracepoint/test_tracepoint.rb @@ -47,7 +47,6 @@ def test_tracks_objspace_count assert_operator stat2[:total_allocated_objects] - stat1[:total_allocated_objects], :>=, newobj_count assert_operator 1_000_000, :<=, newobj_count - assert_operator stat2[:total_freed_objects] + stat2[:heap_final_slots] - stat1[:total_freed_objects], :>=, free_count assert_operator stat2[:count] - stat1[:count], :==, gc_start_count assert_operator gc_start_count, :==, gc_end_mark_count diff --git a/test/ruby/test_process.rb b/test/ruby/test_process.rb index d99e356e69bfd4..276a18e931b63f 100644 --- a/test/ruby/test_process.rb +++ b/test/ruby/test_process.rb @@ -1941,7 +1941,7 @@ def test_daemon_no_threads puts Dir.entries("/proc/self/task") - %W[. ..] end bug4920 = '[ruby-dev:43873]' - assert_include(1..2, data.size, bug4920) + assert_include(1..3, data.size, bug4920) assert_not_include(data.map(&:to_i), pid) end else # darwin diff --git a/test/ruby/test_syntax.rb b/test/ruby/test_syntax.rb index 70e19568160d0e..2d04858bde46da 100644 --- a/test/ruby/test_syntax.rb +++ b/test/ruby/test_syntax.rb @@ -222,6 +222,16 @@ def test_no_block_argument_in_method assert_raise_with_message(ArgumentError, /block accepted/) {obj.f(&proc {})} end + def test_trailing_comma_in_method_parameters + assert_valid_syntax("def f(a,b,c,); end") + assert_valid_syntax("def f(a,b,*c,); end") + assert_valid_syntax("def f(a,b,*,); end") + assert_valid_syntax("def f(a,b,**c,); end") + assert_valid_syntax("def f(a,b,**,); end") + assert_syntax_error("def f(a,b,&block,); end", /unexpected/) + assert_syntax_error("def f(a,b,...,); end", /unexpected/) + end + def test_no_block_argument_in_block assert_valid_syntax("proc do |&nil| end") assert_valid_syntax("proc do |a, &nil| end") diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index 0c7d76bdf67292..84ded50300b114 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -395,7 +395,9 @@ def array.itself = :not_itself test(array) fxt_files = Dir.glob("/tmp/perfetto-\#{Process.pid}.fxt") - fxt_files.length == 1 && !File.empty?(fxt_files.first) + result = fxt_files.length == 1 && !File.empty?(fxt_files.first) + File.unlink(*fxt_files) + result RUBY end diff --git a/thread.c b/thread.c index f876b4bd05c80e..444cd14d955e3b 100644 --- a/thread.c +++ b/thread.c @@ -446,18 +446,26 @@ rb_threadptr_join_list_wakeup(rb_thread_t *thread) } } +void mutexes_lock_lock(void); +void mutexes_lock_unlock(void); + void rb_threadptr_unlock_all_locking_mutexes(rb_thread_t *th) { + mutexes_lock_lock(); while (th->keeping_mutexes) { rb_mutex_t *mutex = th->keeping_mutexes; - th->keeping_mutexes = mutex->next_mutex; - + rb_mutex_t *next = mutex->next_mutex; + th->keeping_mutexes = next; + mutex->next_mutex = NULL; + mutexes_lock_unlock(); // rb_warn("mutex #<%p> was not unlocked by thread #<%p>", (void *)mutex, (void*)th); VM_ASSERT(mutex->ec_serial); - const char *error_message = rb_mutex_unlock_th(mutex, th, 0); + const char *error_message = rb_mutex_unlock_th(mutex, th, 0, false); if (error_message) rb_bug("invalid keeping_mutexes: %s", error_message); + mutexes_lock_lock(); } + mutexes_lock_unlock(); } void @@ -5011,6 +5019,9 @@ rb_thread_atfork_internal(rb_thread_t *th, void (*atfork)(rb_thread_t *, const r rb_thread_reset_timer_thread(); rb_thread_start_timer_thread(); + void mutexes_lock_reset(void); + mutexes_lock_reset(); // TODO: should be on thread + VM_ASSERT(vm->ractor.blocking_cnt == 0); VM_ASSERT(vm->ractor.cnt == 1); } @@ -5081,7 +5092,7 @@ static const rb_data_type_t thgroup_data_type = { RUBY_TYPED_DEFAULT_FREE, NULL, // No external memory to report }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; /* @@ -5250,7 +5261,7 @@ thread_shield_mark(void *ptr) static const rb_data_type_t thread_shield_data_type = { "thread_shield", {thread_shield_mark, 0, 0,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE diff --git a/thread_sync.c b/thread_sync.c index cf4e3843ff6c2f..1ee77b6aeeae0f 100644 --- a/thread_sync.c +++ b/thread_sync.c @@ -80,7 +80,7 @@ static void rb_mutex_abandon_all(rb_mutex_t *mutexes); static void rb_mutex_abandon_keeping_mutexes(rb_thread_t *th); static void rb_mutex_abandon_locking_mutex(rb_thread_t *th); #endif -static const char* rb_mutex_unlock_th(rb_mutex_t *mutex, rb_thread_t *th, rb_serial_t ec_serial); +static const char* rb_mutex_unlock_th(rb_mutex_t *mutex, rb_thread_t *th, rb_serial_t ec_serial, bool unlink_from_keeping); static size_t rb_mutex_num_waiting(rb_mutex_t *mutex) @@ -95,7 +95,52 @@ rb_mutex_num_waiting(rb_mutex_t *mutex) return n; } -rb_thread_t* rb_fiber_threadptr(const rb_fiber_t *fiber); +// TODO: mutexes_lock should be per-thread (on rb_thread_struct) +rb_nativethread_lock_t mutexes_lock = PTHREAD_MUTEX_INITIALIZER; +#ifdef RUBY_THREAD_PTHREAD_H +pthread_t mutexes_lock_lock_owner; +#endif + +static inline void +ASSERT_mutexes_lock_locked(void) +{ +#ifdef RUBY_THREAD_PTHREAD_H + VM_ASSERT(pthread_self() == mutexes_lock_lock_owner); +#endif +} + +static inline void +ASSERT_mutexes_lock_unlocked(void) +{ +#ifdef RUBY_THREAD_PTHREAD_H + VM_ASSERT(pthread_self() != mutexes_lock_lock_owner); +#endif +} + +void +mutexes_lock_lock(void) { + ASSERT_mutexes_lock_unlocked(); + rb_native_mutex_lock(&mutexes_lock); +#ifdef RUBY_THREAD_PTHREAD_H + mutexes_lock_lock_owner = pthread_self(); +#endif +} + +void +mutexes_lock_unlock(void) { + ASSERT_mutexes_lock_locked(); +#ifdef RUBY_THREAD_PTHREAD_H + mutexes_lock_lock_owner = 0; +#endif + rb_native_mutex_unlock(&mutexes_lock); +} + +void +mutexes_lock_reset(void) +{ + rb_native_mutex_initialize(&mutexes_lock); +} + static bool mutex_locked_p(rb_mutex_t *mutex) @@ -108,7 +153,7 @@ mutex_free(void *ptr) { rb_mutex_t *mutex = ptr; if (mutex_locked_p(mutex)) { - const char *err = rb_mutex_unlock_th(mutex, mutex->th, 0); + const char *err = rb_mutex_unlock_th(mutex, mutex->th, 0, true); if (err) rb_bug("%s", err); } ruby_xfree(ptr); @@ -123,7 +168,7 @@ mutex_memsize(const void *ptr) static const rb_data_type_t mutex_data_type = { "mutex", {NULL, mutex_free, mutex_memsize,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static rb_mutex_t * @@ -172,27 +217,35 @@ static void thread_mutex_insert(rb_thread_t *thread, rb_mutex_t *mutex) { RUBY_ASSERT(!mutex->next_mutex); - if (thread->keeping_mutexes) { - mutex->next_mutex = thread->keeping_mutexes; - } + mutexes_lock_lock(); + { + if (thread->keeping_mutexes) { + mutex->next_mutex = thread->keeping_mutexes; + } - thread->keeping_mutexes = mutex; + thread->keeping_mutexes = mutex; + } + mutexes_lock_unlock(); } static void thread_mutex_remove(rb_thread_t *thread, rb_mutex_t *mutex) { - rb_mutex_t **keeping_mutexes = &thread->keeping_mutexes; + mutexes_lock_lock(); + { + rb_mutex_t **keeping_mutexes = &thread->keeping_mutexes; - while (*keeping_mutexes && *keeping_mutexes != mutex) { - // Move to the next mutex in the list: - keeping_mutexes = &(*keeping_mutexes)->next_mutex; - } + while (*keeping_mutexes && *keeping_mutexes != mutex) { + // Move to the next mutex in the list: + keeping_mutexes = &(*keeping_mutexes)->next_mutex; + } - if (*keeping_mutexes) { - *keeping_mutexes = mutex->next_mutex; - mutex->next_mutex = NULL; + if (*keeping_mutexes) { + *keeping_mutexes = mutex->next_mutex; + mutex->next_mutex = NULL; + } } + mutexes_lock_unlock(); } static void @@ -441,7 +494,10 @@ rb_mutex_owned_p(VALUE self) } static const char * -rb_mutex_unlock_th(rb_mutex_t *mutex, rb_thread_t *th, rb_serial_t ec_serial) +// m = Mutex.new +// m.lock() Thread.current.keeping_mutexes << m +// +rb_mutex_unlock_th(rb_mutex_t *mutex, rb_thread_t *th, rb_serial_t ec_serial, bool unlink_from_keeping) { RUBY_DEBUG_LOG("%p", mutex); @@ -455,7 +511,9 @@ rb_mutex_unlock_th(rb_mutex_t *mutex, rb_thread_t *th, rb_serial_t ec_serial) struct sync_waiter *cur = 0, *next; mutex->ec_serial = 0; - thread_mutex_remove(th, mutex); + if (unlink_from_keeping) { + thread_mutex_remove(th, mutex); + } ccan_list_for_each_safe(&mutex->waitq, cur, next, node) { ccan_list_del_init(&cur->node); @@ -492,7 +550,7 @@ do_mutex_unlock(struct mutex_args *args) rb_mutex_t *mutex = args->mutex; rb_thread_t *th = rb_ec_thread_ptr(args->ec); - err = rb_mutex_unlock_th(mutex, th, rb_ec_serial(args->ec)); + err = rb_mutex_unlock_th(mutex, th, rb_ec_serial(args->ec), true); if (err) rb_raise(rb_eThreadError, "%s", err); } @@ -535,8 +593,12 @@ rb_mut_unlock(rb_execution_context_t *ec, VALUE self) static void rb_mutex_abandon_keeping_mutexes(rb_thread_t *th) { - rb_mutex_abandon_all(th->keeping_mutexes); - th->keeping_mutexes = NULL; + mutexes_lock_lock(); + { + rb_mutex_abandon_all(th->keeping_mutexes); + th->keeping_mutexes = NULL; + } + mutexes_lock_unlock(); } static void @@ -727,7 +789,7 @@ static const rb_data_type_t queue_data_type = { .dsize = queue_memsize, .dcompact = queue_mark_and_move, }, - .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; static VALUE @@ -833,7 +895,7 @@ static const rb_data_type_t szqueue_data_type = { .dcompact = szqueue_mark_and_move, }, .parent = &queue_data_type, - .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; static VALUE @@ -1173,7 +1235,7 @@ condvar_memsize(const void *ptr) static const rb_data_type_t cv_data_type = { "condvar", {0, RUBY_TYPED_DEFAULT_FREE, condvar_memsize,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_WB_PROTECTED + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_WB_PROTECTED|RUBY_TYPED_CONCURRENT_FREE_SAFE }; static struct rb_condvar * diff --git a/time.c b/time.c index c3bda3f6af0472..261437a747a2f7 100644 --- a/time.c +++ b/time.c @@ -1909,7 +1909,7 @@ static const rb_data_type_t time_data_type = { .dsize = NULL, .dcompact = time_mark_and_move, }, - .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; static VALUE diff --git a/transcode.c b/transcode.c index f8b0fec42ef275..ede9002d7d8152 100644 --- a/transcode.c +++ b/transcode.c @@ -3019,7 +3019,7 @@ econv_memsize(const void *ptr) static const rb_data_type_t econv_data_type = { "econv", {0, econv_free, econv_memsize,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE diff --git a/variable.c b/variable.c index 9d0e4e4a2b9eac..35eb86443a9d75 100644 --- a/variable.c +++ b/variable.c @@ -579,6 +579,7 @@ void rb_free_generic_fields_tbl_(void) { st_free_table(generic_fields_tbl_); + generic_fields_tbl_ = NULL; } static struct rb_global_entry* @@ -1225,11 +1226,71 @@ ivar_ractor_check(VALUE obj, ID id) } } +// TODO: platforms other than pthread +static rb_nativethread_lock_t gen_fields_tbl_lock_ = PTHREAD_MUTEX_INITIALIZER; +#ifdef RUBY_THREAD_PTHREAD_H +static pthread_t gen_fields_tbl_lock_owner; +#endif +static unsigned int gen_fields_tbl_lock_lvl; + +static inline void +ASSERT_gen_fields_tbl_locked(void) +{ +#ifdef RUBY_THREAD_PTHREAD_H + VM_ASSERT(pthread_self() == gen_fields_tbl_lock_owner); +#endif +} + +static inline void +ASSERT_gen_fields_tbl_unlocked(void) +{ +#ifdef RUBY_THREAD_PTHREAD_H + VM_ASSERT(pthread_self() != gen_fields_tbl_lock_owner); +#endif +} + +static inline void +gen_fields_tbl_lock(bool allow_reentry) +{ + if (allow_reentry && pthread_self() == gen_fields_tbl_lock_owner) { + } else { + ASSERT_gen_fields_tbl_unlocked(); + rb_native_mutex_lock(&gen_fields_tbl_lock_); + gen_fields_tbl_lock_owner = pthread_self(); + } + gen_fields_tbl_lock_lvl++; +} + +static inline bool +gen_fields_tbl_trylock(bool allow_reentry) +{ + if (allow_reentry && pthread_self() == gen_fields_tbl_lock_owner) { + } else { + ASSERT_gen_fields_tbl_unlocked(); + if (rb_native_mutex_trylock(&gen_fields_tbl_lock_) == EBUSY) { + return false; + } + gen_fields_tbl_lock_owner = pthread_self(); + } + gen_fields_tbl_lock_lvl++; + return true; +} + +static inline void +gen_fields_tbl_unlock(void) +{ + ASSERT_gen_fields_tbl_locked(); + RUBY_ASSERT(gen_fields_tbl_lock_lvl > 0); + gen_fields_tbl_lock_lvl--; + if (gen_fields_tbl_lock_lvl == 0) { + gen_fields_tbl_lock_owner = 0; + rb_native_mutex_unlock(&gen_fields_tbl_lock_); + } +} + static inline struct st_table * generic_fields_tbl_no_ractor_check(void) { - ASSERT_vm_locking(); - return generic_fields_tbl_; } @@ -1243,21 +1304,27 @@ void rb_mark_generic_ivar(VALUE obj) { VALUE data; - // Bypass ASSERT_vm_locking() check because marking may happen concurrently with mmtk - if (st_lookup(generic_fields_tbl_, (st_data_t)obj, (st_data_t *)&data)) { - rb_gc_mark_movable(data); + gen_fields_tbl_lock(true); + { + // Bypass ASSERT_vm_locking() check because marking may happen concurrently with mmtk + if (st_lookup(generic_fields_tbl_, (st_data_t)obj, (st_data_t *)&data)) { + rb_gc_mark_movable(data); + } } + gen_fields_tbl_unlock(); } VALUE rb_obj_fields_generic_uncached(VALUE obj) { VALUE fields_obj = 0; - RB_VM_LOCKING() { + gen_fields_tbl_lock(false); + { if (!st_lookup(generic_fields_tbl_, (st_data_t)obj, (st_data_t *)&fields_obj)) { rb_bug("Object is missing entry in generic_fields_tbl"); } } + gen_fields_tbl_unlock(); return fields_obj; } @@ -1301,9 +1368,10 @@ rb_obj_fields(VALUE obj, ID field_name) return fields_obj; } -void +bool rb_free_generic_ivar(VALUE obj) { + bool result = true; if (rb_obj_gen_fields_p(obj)) { st_data_t key = (st_data_t)obj, value; switch (BUILTIN_TYPE(obj)) { @@ -1324,20 +1392,32 @@ rb_free_generic_ivar(VALUE obj) { // Other EC may have stale caches, so fields_obj should be // invalidated and the GC will replace with Qundef - rb_execution_context_t *ec = GET_EC(); - if (ec->gen_fields_cache.obj == obj) { + rb_execution_context_t *ec = rb_current_execution_context(false); + if (ec && ec->gen_fields_cache.obj == obj) { ec->gen_fields_cache.obj = Qundef; ec->gen_fields_cache.fields_obj = Qundef; } - RB_VM_LOCKING() { + if (ec) { + gen_fields_tbl_lock(true); // needs to be re-entrant + } + else { + bool did_lock = gen_fields_tbl_trylock(false); + // If we can't acquire it, bail (could lead to deadlock) + if (!did_lock) return false; + } + // gen_fields_tbl_lock(); + { if (!st_delete(generic_fields_tbl_no_ractor_check(), &key, &value)) { + gen_fields_tbl_unlock(); rb_bug("Object is missing entry in generic_fields_tbl"); } } + gen_fields_tbl_unlock(); } } RBASIC_SET_SHAPE_ID(obj, ROOT_SHAPE_ID); } + return result; } static void @@ -1372,8 +1452,12 @@ rb_obj_set_fields(VALUE obj, VALUE fields_obj, ID field_name, VALUE original_fie default: generic_fields: { - RB_VM_LOCKING() { - st_insert(generic_fields_tbl_, (st_data_t)obj, (st_data_t)fields_obj); + RB_VM_LOCKING() { // needed in case insert triggers GC + gen_fields_tbl_lock(false); + { + st_insert(generic_fields_tbl_, (st_data_t)obj, (st_data_t)fields_obj); + } + gen_fields_tbl_unlock(); } RB_OBJ_WRITTEN(obj, original_fields_obj, fields_obj); @@ -2296,6 +2380,7 @@ rb_replace_generic_ivar(VALUE clone, VALUE obj) { RB_VM_LOCKING() { st_data_t fields_tbl, obj_data = (st_data_t)obj; + // We've STW at this point, no need to lock gen_fields_tbl_lock if (st_delete(generic_fields_tbl_, &obj_data, &fields_tbl)) { st_insert(generic_fields_tbl_, (st_data_t)clone, fields_tbl); RB_OBJ_WRITTEN(clone, Qundef, fields_tbl); @@ -2584,6 +2669,45 @@ rb_mod_const_missing(VALUE klass, VALUE name) UNREACHABLE_RETURN(Qnil); } +rb_nativethread_lock_t autoload_free_lock = PTHREAD_MUTEX_INITIALIZER; +#ifdef RUBY_THREAD_PTHREAD_H +pthread_t autoload_free_lock_owner; +#endif + +static inline void +ASSERT_autoload_free_lock_locked(void) +{ +#ifdef RUBY_THREAD_PTHREAD_H + VM_ASSERT(pthread_self() == autoload_free_lock_owner); +#endif +} + +static inline void +ASSERT_autoload_free_lock_unlocked(void) +{ +#ifdef RUBY_THREAD_PTHREAD_H + VM_ASSERT(pthread_self() != autoload_free_lock_owner); +#endif +} + +static inline void +autoload_free_lock_lock(void) { + ASSERT_autoload_free_lock_unlocked(); + rb_native_mutex_lock(&autoload_free_lock); +#ifdef RUBY_THREAD_PTHREAD_H + autoload_free_lock_owner = pthread_self(); +#endif +} + +static inline void +autoload_free_lock_unlock(void) { + ASSERT_autoload_free_lock_locked(); +#ifdef RUBY_THREAD_PTHREAD_H + autoload_free_lock_owner = 0; +#endif + rb_native_mutex_unlock(&autoload_free_lock); +} + static void autoload_table_mark(void *ptr) { @@ -2612,7 +2736,7 @@ autoload_table_compact(void *ptr) static const rb_data_type_t autoload_table_type = { "autoload_table", {autoload_table_mark, autoload_table_free, autoload_table_memsize, autoload_table_compact,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE }; #define check_autoload_table(av) \ @@ -2705,10 +2829,14 @@ autoload_data_free(void *ptr) { struct autoload_data *p = ptr; - struct autoload_const *autoload_const, *next; - ccan_list_for_each_safe(&p->constants, autoload_const, next, cnode) { - ccan_list_del_init(&autoload_const->cnode); + autoload_free_lock_lock(); + { + struct autoload_const *autoload_const, *next; + ccan_list_for_each_safe(&p->constants, autoload_const, next, cnode) { + ccan_list_del_init(&autoload_const->cnode); + } } + autoload_free_lock_unlock(); SIZED_FREE(p); } @@ -2722,7 +2850,7 @@ autoload_data_memsize(const void *ptr) static const rb_data_type_t autoload_data_type = { "autoload_data", {autoload_data_mark_and_move, autoload_data_free, autoload_data_memsize, autoload_data_mark_and_move}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static void @@ -2748,14 +2876,19 @@ autoload_const_free(void *ptr) { struct autoload_const *autoload_const = ptr; - ccan_list_del(&autoload_const->cnode); + autoload_free_lock_lock(); + { + ccan_list_del(&autoload_const->cnode); + } + autoload_free_lock_unlock(); + SIZED_FREE(autoload_const); } static const rb_data_type_t autoload_const_type = { "autoload_const", {autoload_const_mark_and_move, autoload_const_free, autoload_const_memsize, autoload_const_mark_and_move,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static struct autoload_data * diff --git a/vm.c b/vm.c index 0398b9f74c9683..0eabdfeaa5f1df 100644 --- a/vm.c +++ b/vm.c @@ -3415,6 +3415,8 @@ ruby_vm_destruct(rb_vm_t *vm) if (vm) { rb_thread_t *th = vm->ractor.main_thread; + void wait_for_background_sweeping_to_finish(void *, bool, bool, const char*); + wait_for_background_sweeping_to_finish(vm->gc.objspace, true, false, "vm_destruct"); if (rb_free_at_exit) { rb_free_encoded_insn_data(); @@ -3559,7 +3561,7 @@ vm_memsize(const void *ptr) const rb_data_type_t ruby_vm_data_type = { "VM", {0, 0, vm_memsize,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; #define vm_data_type ruby_vm_data_type @@ -3897,7 +3899,7 @@ const rb_data_type_t ruby_threadptr_data_type = { thread_memsize, thread_compact, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; VALUE @@ -4724,7 +4726,7 @@ static const rb_data_type_t pin_array_list_type = { .dsize = pin_array_list_memsize, .dcompact = pin_array_list_update_references, }, - .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; static VALUE diff --git a/vm_backtrace.c b/vm_backtrace.c index c0bc46b8caf5c7..35faedc6e487e7 100644 --- a/vm_backtrace.c +++ b/vm_backtrace.c @@ -157,7 +157,7 @@ static const rb_data_type_t location_data_type = { NULL, // No external memory to report, location_ref_update, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; int @@ -567,7 +567,7 @@ static const rb_data_type_t backtrace_data_type = { /* Cannot set the RUBY_TYPED_EMBEDDABLE flag because the loc of frame_info * points elements in the backtrace array. This can cause the loc to become * incorrect if this backtrace object is moved by compaction. */ - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE }; int diff --git a/vm_core.h b/vm_core.h index 85664e18b8396b..4ca92b431b17e7 100644 --- a/vm_core.h +++ b/vm_core.h @@ -806,6 +806,7 @@ typedef struct rb_vm_struct { void *data; void (*mark_func)(VALUE v, void *data); } *mark_func_data; + pthread_t sweep_thread; } gc; rb_at_exit_list *at_exit; @@ -1631,10 +1632,17 @@ VM_ENV_BOX_UNCHECKED(const VALUE *ep) int rb_vm_ep_in_heap_p(const VALUE *ep); #endif +static rb_execution_context_t *rb_current_execution_context(bool expect_ec); + static inline int VM_ENV_ESCAPED_P(const VALUE *ep) { - VM_ASSERT(rb_vm_ep_in_heap_p(ep) == !!VM_ENV_FLAGS(ep, VM_ENV_FLAG_ESCAPED)); +#if VM_CHECK_MODE > 0 + if (rb_current_execution_context(false)) { + // Can be called from background sweep thread, and this uses GET_EC() + VM_ASSERT(rb_vm_ep_in_heap_p(ep) == !!VM_ENV_FLAGS(ep, VM_ENV_FLAG_ESCAPED)); + } +#endif return VM_ENV_FLAGS(ep, VM_ENV_FLAG_ESCAPED) ? 1 : 0; } @@ -2158,11 +2166,6 @@ rb_current_ractor_raw(bool expect) } } -static inline rb_ractor_t * -rb_current_ractor(void) -{ - return rb_current_ractor_raw(true); -} static inline rb_vm_t * rb_current_vm(void) @@ -2178,6 +2181,16 @@ rb_current_vm(void) return ruby_current_vm_ptr; } +static inline rb_ractor_t * +rb_current_ractor(void) +{ + rb_vm_t *vm = GET_VM(); + if (vm) { + VM_ASSERT(vm->gc.sweep_thread != pthread_self()); + } + return rb_current_ractor_raw(true); +} + void rb_ec_vm_lock_rec_release(const rb_execution_context_t *ec, unsigned int recorded_lock_rec, unsigned int current_lock_rec); diff --git a/vm_method.c b/vm_method.c index 021b06bf00109b..03038ef688eef9 100644 --- a/vm_method.c +++ b/vm_method.c @@ -135,7 +135,7 @@ static const rb_data_type_t cc_table_type = { .dcompact = vm_cc_table_compact, }, .parent = &rb_managed_id_table_type, - .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; VALUE diff --git a/vm_sync.c b/vm_sync.c index aca83dde5a73aa..5b33309ebbd572 100644 --- a/vm_sync.c +++ b/vm_sync.c @@ -8,10 +8,12 @@ void rb_ractor_sched_barrier_start(rb_vm_t *vm, rb_ractor_t *cr); void rb_ractor_sched_barrier_join(rb_vm_t *vm, rb_ractor_t *cr); void rb_ractor_sched_barrier_end(rb_vm_t *vm, rb_ractor_t *cr); +bool is_sweep_thread_p(void); static bool vm_locked(rb_vm_t *vm) { + if (!vm) return false; return vm_locked_by_ractor_p(vm, GET_RACTOR()); } @@ -68,6 +70,7 @@ vm_need_barrier_waiting(const rb_vm_t *vm) static bool vm_need_barrier(bool no_barrier, const rb_ractor_t *cr, const rb_vm_t *vm) { + VM_ASSERT(cr); #ifdef RUBY_THREAD_PTHREAD_H return !no_barrier && cr->threads.sched.running != NULL && vm_need_barrier_waiting(vm); // ractor has running threads. #else @@ -80,6 +83,8 @@ vm_lock_enter(rb_ractor_t *cr, rb_vm_t *vm, bool locked, bool no_barrier, unsign { RUBY_DEBUG_LOG2(file, line, "start locked:%d", locked); + VM_ASSERT(!is_sweep_thread_p()); + if (locked) { ASSERT_vm_locking(); } @@ -152,6 +157,7 @@ void rb_vm_lock_enter_body(unsigned int *lev APPEND_LOCATION_ARGS) { rb_vm_t *vm = GET_VM(); + VM_ASSERT(vm); if (vm_locked(vm)) { vm_lock_enter(NULL, vm, true, false, lev APPEND_LOCATION_PARAMS); } @@ -164,6 +170,7 @@ void rb_vm_lock_enter_body_nb(unsigned int *lev APPEND_LOCATION_ARGS) { rb_vm_t *vm = GET_VM(); + VM_ASSERT(vm); if (vm_locked(vm)) { vm_lock_enter(NULL, vm, true, true, lev APPEND_LOCATION_PARAMS); } @@ -176,6 +183,7 @@ void rb_vm_lock_enter_body_cr(rb_ractor_t *cr, unsigned int *lev APPEND_LOCATION_ARGS) { rb_vm_t *vm = GET_VM(); + VM_ASSERT(vm); vm_lock_enter(cr, vm, vm_locked(vm), false, lev APPEND_LOCATION_PARAMS); } @@ -188,13 +196,14 @@ rb_vm_lock_leave_body_nb(unsigned int *lev APPEND_LOCATION_ARGS) void rb_vm_lock_leave_body(unsigned int *lev APPEND_LOCATION_ARGS) { - vm_lock_leave(GET_VM(), false, lev APPEND_LOCATION_PARAMS); + vm_lock_leave(GET_VM(), false, lev APPEND_LOCATION_PARAMS); } void rb_vm_lock_body(LOCATION_ARGS) { rb_vm_t *vm = GET_VM(); + VM_ASSERT(vm); ASSERT_vm_unlocking(); vm_lock_enter(GET_RACTOR(), vm, false, false, &vm->ractor.sync.lock_rec APPEND_LOCATION_PARAMS); @@ -254,6 +263,7 @@ void rb_vm_barrier(void) { RB_DEBUG_COUNTER_INC(vm_sync_barrier); + VM_ASSERT(!is_sweep_thread_p()); if (!rb_multi_ractor_p()) { // no other ractors diff --git a/vm_sync.h b/vm_sync.h index 314a2238a96581..761c1795eeb09d 100644 --- a/vm_sync.h +++ b/vm_sync.h @@ -44,7 +44,7 @@ rb_multi_ractor_p(void) { if (LIKELY(ruby_single_main_ractor)) { // 0 on boot time. - RUBY_ASSERT(GET_VM()->ractor.cnt <= 1); + RUBY_ASSERT(!GET_VM() || GET_VM()->ractor.cnt <= 1); return false; } else { diff --git a/vm_trace.c b/vm_trace.c index 42b9991e7141bc..5457cc4627a8e5 100644 --- a/vm_trace.c +++ b/vm_trace.c @@ -905,7 +905,7 @@ static const rb_data_type_t tp_data_type = { RUBY_TYPED_DEFAULT_FREE, NULL, // Nothing allocated externally, so don't need a memsize function }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE diff --git a/weakmap.c b/weakmap.c index 7cef1fd46a63a7..256d0887655a03 100644 --- a/weakmap.c +++ b/weakmap.c @@ -141,7 +141,7 @@ const rb_data_type_t rb_weakmap_type = { wmap_compact, wmap_handle_weak_references, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static int @@ -627,7 +627,7 @@ static const rb_data_type_t rb_weakkeymap_type = { wkmap_compact, wkmap_handle_weak_references, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static int