From 05428c8ec7721069f35bea46c0107a5ea06800da Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Mon, 30 Mar 2026 21:31:29 +0900 Subject: [PATCH 01/67] ZJIT: Remove side-exit locations dump after test --- test/ruby/test_zjit.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index 0c7d76bdf67292..84ded50300b114 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -395,7 +395,9 @@ def array.itself = :not_itself test(array) fxt_files = Dir.glob("/tmp/perfetto-\#{Process.pid}.fxt") - fxt_files.length == 1 && !File.empty?(fxt_files.first) + result = fxt_files.length == 1 && !File.empty?(fxt_files.first) + File.unlink(*fxt_files) + result RUBY end From 5b83468b86c5467281b3c39d7349bee486787a8c Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Mon, 30 Mar 2026 21:33:48 +0900 Subject: [PATCH 02/67] Adjust indent [ci skip] --- gc.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gc.rb b/gc.rb index 895a82b7343c01..01d798addb1596 100644 --- a/gc.rb +++ b/gc.rb @@ -147,7 +147,7 @@ def self.count # sweeping_time: 0, # heap_allocated_pages: 521, # heap_empty_pages: 0, - # heap_allocatable_bytes: 0, + # heap_allocatable_bytes: 0, # heap_available_slots: 539590, # heap_live_slots: 422243, # heap_free_slots: 117347, From ae9b60f18999479640b5b945b76cd74f124c172d Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 26 Feb 2026 20:09:33 +0900 Subject: [PATCH 03/67] [Feature #19107] parse.y: Allow trailing comma in method signature --- parse.y | 60 ++++++++++++++++++++-------------------- test/ruby/test_syntax.rb | 10 +++++++ 2 files changed, 40 insertions(+), 30 deletions(-) diff --git a/parse.y b/parse.y index bcff7918bfa4c3..170ed08a5e87a7 100644 --- a/parse.y +++ b/parse.y @@ -2773,7 +2773,7 @@ rb_parser_ary_free(rb_parser_t *p, rb_parser_ary_t *ary) %type if_tail opt_else case_body case_args cases opt_rescue exc_list exc_var opt_ensure %type args arg_splat call_args opt_call_args %type paren_args opt_paren_args -%type args_tail block_args_tail block_args-opt_tail +%type args_tail block_args_tail %type command_args aref_args %type opt_block_arg block_arg %type var_ref var_lhs @@ -2798,7 +2798,7 @@ rb_parser_ary_free(rb_parser_t *p, rb_parser_ary_t *ary) %type p_value p_primitive p_variable p_var_ref p_expr_ref p_const %type p_kwargs p_kwarg p_kw %type keyword_variable user_variable sym operation2 operation3 -%type cname fname op f_rest_arg f_block_arg opt_f_block_arg f_norm_arg f_bad_arg +%type cname fname op f_rest_arg f_block_arg opt_comma f_norm_arg f_bad_arg %type f_kwrest f_label f_arg_asgn call_op call_op2 reswords relop dot_or_colon %type p_kwrest p_kwnorest p_any_kwrest p_kw_label %type f_no_kwarg f_any_kwrest args_forward excessed_comma nonlocal_var def_name @@ -2923,18 +2923,18 @@ rb_parser_ary_free(rb_parser_t *p, rb_parser_ary_t *ary) } ; -%rule args_tail_basic(value) - : f_kwarg(value) ',' f_kwrest opt_f_block_arg +%rule args_tail_basic(value, trailing) + : f_kwarg(value) ',' f_kwrest opt_f_block_arg(trailing) { $$ = new_args_tail(p, $1, $3, $4, &@3); /*% ripper: [$:1, $:3, $:4] %*/ } - | f_kwarg(value) opt_f_block_arg + | f_kwarg(value) opt_f_block_arg(trailing) { $$ = new_args_tail(p, $1, 0, $2, &@1); /*% ripper: [$:1, Qnil, $:2] %*/ } - | f_any_kwrest opt_f_block_arg + | f_any_kwrest opt_f_block_arg(trailing) { $$ = new_args_tail(p, 0, $1, $2, &@1); /*% ripper: [Qnil, $:1, $:2] %*/ @@ -2946,6 +2946,15 @@ rb_parser_ary_free(rb_parser_t *p, rb_parser_ary_t *ary) } ; +%rule opt_f_block_arg(trailing) + : ',' f_block_arg + { + $$ = $2; + /*% ripper: $:2 %*/ + } + | trailing + ; + %rule def_endless_method(bodystmt) : defn_head[head] f_opt_paren_args[args] '=' bodystmt { @@ -3087,13 +3096,13 @@ rb_parser_ary_free(rb_parser_t *p, rb_parser_ary_t *ary) } ; -%rule opt_args_tail(tail) +%rule opt_args_tail(tail, trailing) : ',' tail { $$ = $tail; /*% ripper: $:tail %*/ } - | /* none */ + | trailing { $$ = new_empty_args_tail(p, &@$); /*% ripper: [Qnil, Qnil, Qnil] %*/ @@ -4973,10 +4982,7 @@ f_any_kwrest : f_kwrest f_eq : {p->ctxt.in_argdef = 0;} '='; -block_args_tail : args_tail_basic(primary_value) - ; - -block_args-opt_tail : opt_args_tail(block_args_tail) +block_args_tail : args_tail_basic(primary_value, none) ; excessed_comma : ',' @@ -4987,14 +4993,14 @@ excessed_comma : ',' } ; -block_param : args-list(primary_value, block_args-opt_tail) +block_param : args-list(primary_value, opt_args_tail(block_args_tail, none)) | f_arg[pre] excessed_comma { $$ = new_empty_args_tail(p, &@excessed_comma); $$ = new_args(p, $pre, 0, $excessed_comma, 0, $$, &@$); /*% ripper: params!($:pre, Qnil, $:excessed_comma, Qnil, Qnil, Qnil, Qnil) %*/ } - | f_arg[pre] opt_args_tail(block_args_tail)[tail] + | f_arg[pre] opt_args_tail(block_args_tail, none)[tail] { $$ = new_args(p, $pre, 0, 0, 0, $tail, &@$); /*% ripper: params!($:pre, Qnil, Qnil, Qnil, *$:tail[0..2]) %*/ @@ -6240,7 +6246,7 @@ f_arglist : f_paren_args } ; -args_tail : args_tail_basic(arg_value) +args_tail : args_tail_basic(arg_value, opt_comma) | args_forward { add_forwarding_args(p); @@ -6250,7 +6256,7 @@ args_tail : args_tail_basic(arg_value) } ; -largs_tail : args_tail_basic(arg_value) +largs_tail : args_tail_basic(arg_value, none) | args_forward { yyerror1(&@args_forward, "unexpected ... in lambda argument"); @@ -6331,14 +6337,9 @@ largs_tail : args_tail_basic(arg_value) } ; -%rule f_args-opt_tail(tail) - : opt_args_tail(tail) - ; - - -%rule f_args-list(tail) - : args-list(arg_value, f_args-opt_tail(tail)) - | f_arg[pre] opt_args_tail(tail)[tail] +%rule f_args-list(tail, trailing) + : args-list(arg_value, opt_args_tail(tail, trailing)) + | f_arg[pre] opt_args_tail(tail, trailing)[tail] { $$ = new_args(p, $pre, 0, 0, 0, $tail, &@$); /*% ripper: params!($:pre, Qnil, Qnil, Qnil, *$:tail[0..2]) %*/ @@ -6347,10 +6348,10 @@ largs_tail : args_tail_basic(arg_value) | f_empty_arg ; -f_args : f_args-list(args_tail) +f_args : f_args-list(args_tail, opt_comma) ; -f_largs : f_args-list(largs_tail) +f_largs : f_args-list(largs_tail, none) ; args_forward : tBDOT3 @@ -6538,12 +6539,11 @@ f_block_arg : blkarg_mark tIDENTIFIER } ; -opt_f_block_arg : ',' f_block_arg +opt_comma : ','? { - $$ = $2; - /*% ripper: $:2 %*/ + $$ = 0; + /*% ripper: Qnil %*/ } - | none ; diff --git a/test/ruby/test_syntax.rb b/test/ruby/test_syntax.rb index 70e19568160d0e..2d04858bde46da 100644 --- a/test/ruby/test_syntax.rb +++ b/test/ruby/test_syntax.rb @@ -222,6 +222,16 @@ def test_no_block_argument_in_method assert_raise_with_message(ArgumentError, /block accepted/) {obj.f(&proc {})} end + def test_trailing_comma_in_method_parameters + assert_valid_syntax("def f(a,b,c,); end") + assert_valid_syntax("def f(a,b,*c,); end") + assert_valid_syntax("def f(a,b,*,); end") + assert_valid_syntax("def f(a,b,**c,); end") + assert_valid_syntax("def f(a,b,**,); end") + assert_syntax_error("def f(a,b,&block,); end", /unexpected/) + assert_syntax_error("def f(a,b,...,); end", /unexpected/) + end + def test_no_block_argument_in_block assert_valid_syntax("proc do |&nil| end") assert_valid_syntax("proc do |a, &nil| end") From f9704cb00aa9dd247d07f930600edce685bd7344 Mon Sep 17 00:00:00 2001 From: John Hawthorn Date: Tue, 13 Jan 2026 14:31:46 -0800 Subject: [PATCH 04/67] Only check malloc_increase atomically if necessary Previously we did a non-atomic read of malloc_increase to determine whether or not we needed to gc on malloc. We also should only need to check the value when we have actually flushed/committed a new increase. Use relaxed load in atomic_sub_nounderflow The previous code did a non-atomic load of val, which would work fine (since the value would only be used for an atomic CAS) but resulted in TSAN errors. This alos adjusts the cas to use relaxed memory model, though I'm not sure that actually makes a difference anywhere. Use relaxed atomics for malloc increase as well Use atomics for loading deferred Use atomics for final slot count Use flag for is_lazy_sweeping WIP: simpler background thread (no-op for now) Adjustments Allow one more thread WIP: getting closer to checking end of sweep condition correctly dequeue usleep Attempt pre-sweep Fix accounting of free slots Sweep anything that !needs_cleanup Add TODO Free some T_OBJECTs in background thread get id2ref working Finish background sweeping before compaction gc_abort() waits for background sweeping to finish Add page->page_lock and lock it when changing freelist Right now only the mutator and the background thread need to lock it. We should re-init all these locks on fork due to Ractors (TODO). Make sure not background sweeping during Process.warmup Less locking/unlocking in gc_sweep_step_worker Better sweep_lock management reinit sweep_lock,sweep_cond after fork Allow sweep thread to acquire VM lock. It never joins a VM barrier. Add simply T_DATA freeing Also finish background freeing in ruby_vm_destruct. Allow taking VM lock in sweep thread Add fiber_pool_lock for cont.c Don't take VM lock in background sweep thread Make id2ref_tbl_lock re-entrant We can get the following: 1) RB_VM_LOCK() // need this to allow GC when inserting into tbl 2) id2ref_tbl_lock() // 3) insert into id2ref_tbl, causes GC 4) free object id, which acquires id2ref_tbl_lock again Therefore, the lock needs to be re-entrant. freeing of id2ref object_id in background thread Get gen fields freeing done in background sweep thread First pass at making zombies in background sweep thread add mutexes_lock lock for thread->keeping_mutexes We need to lock this when manipulating this linked list, because freeing a mutex, which can now be done in a background thread, manipulates it. I made this lock global for now, but it should really be either per-ractor or per-thread. Add autoload_free_lock We can't free autoload_data by 1 thread while also freeing an autoload_const that's associated with it concurrently. This can happen currently if they're on separate pages. Add assertions after major GC that background thread is inactive. I'm going to work on allowing background sweeping during a major (unless explicitly requested via GC.start). This is probably more important even than during minors. Add more assertions and comments whitelist making zombies in sweep thread sweep some imemos in background sweep thread tmp commit stuck tmp Get 1 pass over pages mostly working GC compaction tests are still broken. Not sure why. TODO: when in background thread, never modify the page's freelist directly in case user code is being run. Instead, each page should have a deferred_freelist that the user thread will link in when the page is swept. Merge freelist and deferred freelist when we process a page some cleanup Get GC compaction working, doesn't use background thread Fix running GC in cleanup finalizers stuck with GC compact Fix GC.compact Remove usage of page_lock mutex as we no longer need it. Keep actual lock around, but I'll remove it in a separate commit. GC: Remove unused page->page_lock mutex cleanup Remove unused code, add comments Background thread only sweeps until ruby thread is done with that heap There are some problems with the current approach: 1) The background thread can get ahead of the ruby thread on the current heap and sweep more than is necessary instead of moving on to the next heap. We should track `incremental_step_freed_objects` for each heap so ruby thread and background thread are in sync, and background thread can sweep next heap when necessary. 2) We need to restart the background sweeping when we exit from GC. There should be a `objspace->background sweep_mode` after GC exits and background sweeping begins. checkin Fix issues with parallel sweep Issues were: * post-fork issues * gc_sweep_dequeue_page/heap_is_sweep_done/has_sweeping_pages trio is tricky * rb_ec_cleanup issues (aborting bg sweeping, stopping thread) Fix issue with gc_sweep_rest() that could loop forever It could happen when background sweeping got ahead of the ruby thread. Fix more bugs Attempt to make has_sweeping_pages() faster We can make it even faster if we always let the ruby thread take the last page. This is what it used to do, and I think it was the right strategy in hindsight, just because of `has_sweeping_pages` and `gc_sweep_finish`. Otherwise, the ruby thread could need to wait on the background thread somtimes when it's called. Simplify end conditions by ruby thread taking last sweeping_page This is how it used to work, and I think it's a good idea to simplify checks for when sweeping is finished. Tracking down allocation bug Fixed allocation bug It had to do with adjacent bitfields being same memory object and used concurrently. Changed them to bools and it fixed the issue. Improve efficiency of requesting background sweep help Keep track of heap->latest_swept_page cleanup unlink pages in sweep thread Add to free_pages and pooled_pages in sweep thread remove redundant work Use atomic for heap->foreground_sweep_steps and separate swept_pages lock Add heap->skip_sweep_continue Add parallel sweep lock stats Output sweep time at end of process Add counts of sweep events less conditionals Change PSWEEP_LOCK_STATS to use per-callsite stats Add wall clock psweep timings first pass at rb_garbage_object_p with sweep thread Fix WB issues with sweep thread Use atomic operations for bitmaps that can be read/modified by both the mutator and the sweep thread. Avoid the tricky case of `gc_setup_mark_bits` by deferring it to sweep finish. This way, it doesn't conflict with write barriers. Make page->needs_setup_mark_bits its own memory object tmp commit before pairing Bug fix for mark T_NONE John found the fix. --- cont.c | 276 +-- darray.h | 30 +- error.c | 5 + gc.c | 237 ++- gc/default/default.c | 2038 +++++++++++++++++++++-- gc/gc.h | 3 +- include/ruby/atomic.h | 186 +++ include/ruby/internal/intern/variable.h | 2 +- include/ruby/internal/value_type.h | 1 + process.c | 6 +- ractor_core.h | 2 + symbol.c | 15 +- test/ruby/test_process.rb | 2 +- thread.c | 17 +- thread_sync.c | 102 +- variable.c | 165 +- vm.c | 2 + vm_callinfo.h | 3 + vm_core.h | 25 +- vm_sync.c | 77 +- vm_sync.h | 2 +- 21 files changed, 2825 insertions(+), 371 deletions(-) diff --git a/cont.c b/cont.c index e5239635081629..a1af339d331751 100644 --- a/cont.c +++ b/cont.c @@ -298,6 +298,51 @@ rb_free_shared_fiber_pool(void) static ID fiber_initialize_keywords[3] = {0}; +rb_nativethread_lock_t fiber_lock; +#ifdef RUBY_THREAD_PTHREAD_H +pthread_t fiber_pool_lock_owner; +#endif + +static inline void +ASSERT_fiber_pool_locked(void) +{ +#ifdef RUBY_THREAD_PTHREAD_H + VM_ASSERT(pthread_self() == fiber_pool_lock_owner); +#endif +} + +static inline void +ASSERT_fiber_pool_unlocked(void) +{ +#ifdef RUBY_THREAD_PTHREAD_H + VM_ASSERT(pthread_self() != fiber_pool_lock_owner); +#endif +} + +static inline void +fiber_pool_lock(void) { + ASSERT_fiber_pool_unlocked(); + rb_native_mutex_lock(&fiber_lock); +#ifdef RUBY_THREAD_PTHREAD_H + fiber_pool_lock_owner = pthread_self(); +#endif +} + +static inline void +fiber_pool_unlock(void) { + ASSERT_fiber_pool_locked(); +#ifdef RUBY_THREAD_PTHREAD_H + fiber_pool_lock_owner = 0; +#endif + rb_native_mutex_unlock(&fiber_lock); +} + +void +fiber_pool_lock_reset(void) +{ + rb_native_mutex_initialize(&fiber_lock); +} + /* * FreeBSD require a first (i.e. addr) argument of mmap(2) is not NULL * if MAP_STACK is passed. @@ -426,6 +471,7 @@ fiber_pool_vacancy_remove(struct fiber_pool_vacancy * vacancy) inline static struct fiber_pool_vacancy * fiber_pool_vacancy_pop(struct fiber_pool * pool) { + // fiber_pool_lock is acquired struct fiber_pool_vacancy * vacancy = pool->vacancies; if (vacancy) { @@ -438,6 +484,7 @@ fiber_pool_vacancy_pop(struct fiber_pool * pool) inline static struct fiber_pool_vacancy * fiber_pool_vacancy_pop(struct fiber_pool * pool) { + // fiber_pool_lock is acquired struct fiber_pool_vacancy * vacancy = pool->vacancies; if (vacancy) { @@ -525,117 +572,147 @@ fiber_pool_allocate_memory(size_t * count, size_t stride) // fiber_pool_initialize before the pool is shared across threads. // @sa fiber_pool_allocation_free static struct fiber_pool_allocation * -fiber_pool_expand(struct fiber_pool * fiber_pool, size_t count) +fiber_pool_expand(struct fiber_pool * fiber_pool, size_t count, bool needs_lock, bool unlock_before_raise, struct fiber_pool_vacancy **vacancy_out) { if (count == 0) { errno = EAGAIN; return NULL; } - STACK_GROW_DIR_DETECTION; + // Allocate metadata before mmap: ruby_xmalloc (RB_ALLOC) raises on failure and + // must not run after base is mapped, or the region would leak. + struct fiber_pool_allocation * allocation = RB_ALLOC(struct fiber_pool_allocation); - size_t size = fiber_pool->size; - size_t stride = size + RB_PAGE_SIZE; + if (needs_lock) fiber_pool_lock(); + { + STACK_GROW_DIR_DETECTION; - // If the maximum number of stacks is set, and we have reached it, return NULL. - if (fiber_pool->maximum_count > 0) { - if (fiber_pool->count >= fiber_pool->maximum_count) { - errno = EAGAIN; - return NULL; - } - size_t remaining = fiber_pool->maximum_count - fiber_pool->count; - if (count > remaining) { - count = remaining; + size_t size = fiber_pool->size; + size_t stride = size + RB_PAGE_SIZE; + + // If the maximum number of stacks is set, and we have reached it, return NULL. + if (fiber_pool->maximum_count > 0) { + if (fiber_pool->count >= fiber_pool->maximum_count) { + if (unlock_before_raise) fiber_pool_unlock(); + errno = EAGAIN; + return NULL; + } + size_t remaining = fiber_pool->maximum_count - fiber_pool->count; + if (count > remaining) { + count = remaining; + } } - } - // Allocate metadata before mmap: ruby_xmalloc (RB_ALLOC) raises on failure and - // must not run after base is mapped, or the region would leak. - struct fiber_pool_allocation * allocation = RB_ALLOC(struct fiber_pool_allocation); - // Allocate the memory required for the stacks: - void * base = fiber_pool_allocate_memory(&count, stride); + // Allocate the memory required for the stacks: + void * base = fiber_pool_allocate_memory(&count, stride); - if (base == NULL) { - if (!errno) errno = ENOMEM; - ruby_xfree(allocation); - return NULL; - } + if (base == NULL) { + int saved_errno = errno; + if (!saved_errno) saved_errno = ENOMEM; + if (unlock_before_raise) fiber_pool_unlock(); + ruby_xfree(allocation); + errno = saved_errno; + return NULL; + } - struct fiber_pool_vacancy * vacancies = fiber_pool->vacancies; + struct fiber_pool_vacancy * vacancies = fiber_pool->vacancies; - // Initialize fiber pool allocation: - allocation->base = base; - allocation->size = size; - allocation->stride = stride; - allocation->count = count; + // Initialize fiber pool allocation: + allocation->base = base; + allocation->size = size; + allocation->stride = stride; + allocation->count = count; #ifdef FIBER_POOL_ALLOCATION_FREE - allocation->used = 0; + allocation->used = 0; #endif - allocation->pool = fiber_pool; + allocation->pool = fiber_pool; - if (DEBUG_EXPAND) { - fprintf(stderr, "fiber_pool_expand(%"PRIuSIZE"): %p, %"PRIuSIZE"/%"PRIuSIZE" x [%"PRIuSIZE":%"PRIuSIZE"]\n", - count, (void*)fiber_pool, fiber_pool->used, fiber_pool->count, size, fiber_pool->vm_stack_size); - } + if (DEBUG_EXPAND) { + fprintf(stderr, "fiber_pool_expand(%"PRIuSIZE"): %p, %"PRIuSIZE"/%"PRIuSIZE" x [%"PRIuSIZE":%"PRIuSIZE"]\n", + count, (void*)fiber_pool, fiber_pool->used, fiber_pool->count, size, fiber_pool->vm_stack_size); + } - // Iterate over all stacks, initializing the vacancy list: - for (size_t i = 0; i < count; i += 1) { - void * base = (char*)allocation->base + (stride * i); - void * page = (char*)base + STACK_DIR_UPPER(size, 0); + // Iterate over all stacks, initializing the vacancy list: + for (size_t i = 0; i < count; i += 1) { + void * base = (char*)allocation->base + (stride * i); + void * page = (char*)base + STACK_DIR_UPPER(size, 0); #if defined(_WIN32) - DWORD old_protect; - - if (!VirtualProtect(page, RB_PAGE_SIZE, PAGE_READWRITE | PAGE_GUARD, &old_protect)) { - int error = rb_w32_map_errno(GetLastError()); - VirtualFree(allocation->base, 0, MEM_RELEASE); - ruby_xfree(allocation); - errno = error; - return NULL; - } + DWORD old_protect; + + if (!VirtualProtect(page, RB_PAGE_SIZE, PAGE_READWRITE | PAGE_GUARD, &old_protect)) { + int error = rb_w32_map_errno(GetLastError()); + if (unlock_before_raise) fiber_pool_unlock(); + VirtualFree(allocation->base, 0, MEM_RELEASE); + ruby_xfree(allocation); + errno = error; + return NULL; + } #elif defined(__wasi__) - // wasi-libc's mprotect emulation doesn't support PROT_NONE. - (void)page; + // wasi-libc's mprotect emulation doesn't support PROT_NONE. + (void)page; #else - if (mprotect(page, RB_PAGE_SIZE, PROT_NONE) < 0) { - int error = errno; - if (!error) error = ENOMEM; - munmap(allocation->base, count*stride); - ruby_xfree(allocation); - errno = error; - return NULL; - } + if (mprotect(page, RB_PAGE_SIZE, PROT_NONE) < 0) { + int error = errno; + if (!error) error = ENOMEM; + if (unlock_before_raise) fiber_pool_unlock(); + munmap(allocation->base, count*stride); + ruby_xfree(allocation); + errno = error; + return NULL; + } #endif - vacancies = fiber_pool_vacancy_initialize( - fiber_pool, vacancies, - (char*)base + STACK_DIR_UPPER(0, RB_PAGE_SIZE), - size - ); + vacancies = fiber_pool_vacancy_initialize( + fiber_pool, vacancies, + (char*)base + STACK_DIR_UPPER(0, RB_PAGE_SIZE), + size + ); #ifdef FIBER_POOL_ALLOCATION_FREE - vacancies->stack.allocation = allocation; + vacancies->stack.allocation = allocation; #endif - } + } - // Insert the allocation into the head of the pool: - allocation->next = fiber_pool->allocations; + // Insert the allocation into the head of the pool: + allocation->next = fiber_pool->allocations; #ifdef FIBER_POOL_ALLOCATION_FREE - if (allocation->next) { - allocation->next->previous = allocation; - } + if (allocation->next) { + allocation->next->previous = allocation; + } - allocation->previous = NULL; + allocation->previous = NULL; #endif - fiber_pool->allocations = allocation; - fiber_pool->vacancies = vacancies; - fiber_pool->count += count; + fiber_pool->allocations = allocation; + fiber_pool->vacancies = vacancies; + fiber_pool->count += count; + + if (vacancy_out) { + *vacancy_out = fiber_pool_vacancy_pop(fiber_pool); + } + + if (needs_lock) fiber_pool_unlock(); + } return allocation; } +static struct fiber_pool_vacancy * +fiber_pool_expand_and_pop(struct fiber_pool * fiber_pool, size_t count, bool needs_lock, bool unlock_before_raise) +{ + struct fiber_pool_vacancy *vacancy_out; + struct fiber_pool_allocation *allocation = fiber_pool_expand(fiber_pool, count, needs_lock, unlock_before_raise, &vacancy_out); + if (allocation) { + return vacancy_out; + } + else { + return NULL; + } + +} + // Initialize the specified fiber pool with the given number of stacks. // @param vm_stack_size The size of the vm stack to allocate. static void @@ -654,7 +731,7 @@ fiber_pool_initialize(struct fiber_pool * fiber_pool, size_t size, size_t minimu fiber_pool->vm_stack_size = vm_stack_size; if (fiber_pool->minimum_count > 0) { - if (RB_UNLIKELY(!fiber_pool_expand(fiber_pool, fiber_pool->minimum_count))) { + if (RB_UNLIKELY(!fiber_pool_expand(fiber_pool, fiber_pool->minimum_count, false, false, NULL))) { rb_raise(rb_eFiberError, "can't allocate initial fiber stacks (%"PRIuSIZE" x %"PRIuSIZE" bytes): %s", fiber_pool->minimum_count, fiber_pool->size, strerror(errno)); } } @@ -739,19 +816,24 @@ fiber_pool_stack_expand_count(const struct fiber_pool *pool) static struct fiber_pool_vacancy * fiber_pool_stack_acquire_expand(struct fiber_pool *fiber_pool) { + // fiber_pool_lock acquired size_t count = fiber_pool_stack_expand_count(fiber_pool); if (DEBUG_ACQUIRE) fprintf(stderr, "fiber_pool_stack_acquire: expanding fiber pool by %"PRIuSIZE" stacks\n", count); struct fiber_pool_vacancy *vacancy = NULL; - if (RB_LIKELY(fiber_pool_expand(fiber_pool, count))) { - return fiber_pool_vacancy_pop(fiber_pool); + if (RB_LIKELY((vacancy = fiber_pool_expand_and_pop(fiber_pool, count, false, true)))) { + return vacancy; } else { if (DEBUG_ACQUIRE) fprintf(stderr, "fiber_pool_stack_acquire: expand failed (%s), collecting garbage\n", strerror(errno)); - rb_gc(); + fiber_pool_unlock(); + { + rb_gc(); + } + fiber_pool_lock(); // After running GC, the vacancy list may have some stacks: vacancy = fiber_pool_vacancy_pop(fiber_pool); @@ -763,8 +845,8 @@ fiber_pool_stack_acquire_expand(struct fiber_pool *fiber_pool) count = fiber_pool_stack_expand_count(fiber_pool); // Try to expand the fiber pool again: - if (RB_LIKELY(fiber_pool_expand(fiber_pool, count))) { - return fiber_pool_vacancy_pop(fiber_pool); + if (RB_LIKELY((vacancy = fiber_pool_expand_and_pop(fiber_pool, false, true, count)))) { + return vacancy; } else { // Okay, we really failed to acquire a stack. Give up and return NULL with errno set: @@ -779,8 +861,7 @@ fiber_pool_stack_acquire(struct fiber_pool * fiber_pool) { struct fiber_pool_vacancy * vacancy; - unsigned int lev; - RB_VM_LOCK_ENTER_LEV(&lev); + fiber_pool_lock(); { // Fast path: try to acquire a stack from the vacancy list: vacancy = fiber_pool_vacancy_pop(fiber_pool); @@ -793,7 +874,7 @@ fiber_pool_stack_acquire(struct fiber_pool * fiber_pool) // If expansion failed, raise an error: if (RB_UNLIKELY(!vacancy)) { - RB_VM_LOCK_LEAVE_LEV(&lev); + fiber_pool_unlock(); rb_raise(rb_eFiberError, "can't allocate fiber stack: %s", strerror(errno)); } } @@ -811,10 +892,9 @@ fiber_pool_stack_acquire(struct fiber_pool * fiber_pool) #ifdef FIBER_POOL_ALLOCATION_FREE vacancy->stack.allocation->used += 1; #endif - fiber_pool_stack_reset(&vacancy->stack); } - RB_VM_LOCK_LEAVE_LEV(&lev); + fiber_pool_unlock(); return vacancy->stack; } @@ -880,7 +960,7 @@ fiber_pool_stack_free(struct fiber_pool_stack * stack) #endif } -// Release and return a stack to the vacancy list. +// Release and return a stack to the vacancy list. fiber_lock is acquired upon entry. static void fiber_pool_stack_release(struct fiber_pool_stack * stack) { @@ -1031,17 +1111,6 @@ fiber_stack_release(rb_fiber_t * fiber) rb_ec_clear_vm_stack(ec); } -static void -fiber_stack_release_locked(rb_fiber_t *fiber) -{ - if (!ruby_vm_during_cleanup) { - // We can't try to acquire the VM lock here because MMTK calls free in its own native thread which has no ec. - // This assertion will fail on MMTK but we currently don't have CI for debug releases of MMTK, so we can assert for now. - ASSERT_vm_locking_with_barrier(); - } - fiber_stack_release(fiber); -} - static const char * fiber_status_name(enum fiber_status s) { @@ -1204,7 +1273,11 @@ cont_free(void *ptr) else { rb_fiber_t *fiber = (rb_fiber_t*)cont; coroutine_destroy(&fiber->context); - fiber_stack_release_locked(fiber); + fiber_pool_lock(); + { + fiber_stack_release(fiber); + } + fiber_pool_unlock(); } SIZED_FREE_N(cont->saved_vm_stack.ptr, cont->saved_vm_stack.size); @@ -2892,9 +2965,11 @@ fiber_switch(rb_fiber_t *fiber, int argc, const VALUE *argv, int kw_splat, rb_fi // We cannot free the stack until the pthread is joined: #ifndef COROUTINE_PTHREAD_CONTEXT if (FIBER_TERMINATED_P(fiber)) { - RB_VM_LOCKING() { + fiber_pool_lock(); + { fiber_stack_release(fiber); } + fiber_pool_unlock(); } #endif @@ -3651,6 +3726,7 @@ Init_Cont(void) #endif SET_MACHINE_STACK_END(&th->ec->machine.stack_end); + rb_native_mutex_initialize(&fiber_lock); size_t minimum_count = shared_fiber_pool_minimum_count(); size_t maximum_count = shared_fiber_pool_maximum_count(); fiber_pool_initialize(&shared_fiber_pool, stack_size, minimum_count, maximum_count, vm_stack_size); diff --git a/darray.h b/darray.h index 31ab7d412aa441..c65c01df7355ff 100644 --- a/darray.h +++ b/darray.h @@ -48,6 +48,10 @@ #define rb_darray_append_without_gc(ptr_to_ary, element) \ rb_darray_append_impl(ptr_to_ary, element, rb_darray_realloc_mul_add_without_gc) +//#define rb_darray_clear_and_free_without_gc(ptr_to_ary) \ + //rb_darray_size(ptr_to_ary) ? (rb_darray_free_without_gc(ptr_to_ary)) : (void)0 + + #define rb_darray_append_impl(ptr_to_ary, element, realloc_func) do { \ rb_darray_ensure_space((ptr_to_ary), \ sizeof(**(ptr_to_ary)), \ @@ -138,6 +142,21 @@ rb_darray_size(const void *ary) * Useful for TypedData objects. */ #define rb_darray_memsize(ary) (sizeof(*(ary)) + (rb_darray_size(ary) * sizeof((ary)->data[0]))) +/* Remove n items from the beginning of the array */ +#define rb_darray_shift_n(ary, n) rb_darray_shift_n_impl(ary, ary->data, n, sizeof((ary)->data[0])) + +static inline void +rb_darray_shift_n_impl(void *ary, void *data, size_t n, size_t type_sz) +{ + rb_darray_meta_t *meta = ary; + RUBY_ASSERT(meta->size >= n); + char *dst = (char*)data; + if (n > 0) { + memmove(dst, dst + n * type_sz, (meta->size - n) * type_sz); + meta->size -= n; + } +} + static inline void rb_darray_pop(void *ary, size_t count) { @@ -218,6 +237,8 @@ rb_darray_realloc_mul_add(void *orig_ptr, size_t capa, size_t element_size, size return ptr; } +bool is_sweep_thread_p(void); + /* Internal function. Like rb_xrealloc_mul_add but does not trigger GC. */ static inline void * rb_darray_realloc_mul_add_without_gc(void *orig_ptr, size_t x, size_t y, size_t z) @@ -225,7 +246,14 @@ rb_darray_realloc_mul_add_without_gc(void *orig_ptr, size_t x, size_t y, size_t size_t size = rbimpl_size_add_or_raise(rbimpl_size_mul_or_raise(x, y), z); void *ptr = realloc(orig_ptr, size); - if (ptr == NULL) rb_bug("rb_darray_realloc_mul_add_without_gc: failed"); + if (ptr == NULL) { + if (!is_sweep_thread_p()) { + rb_bug("rb_darray_realloc_mul_add_without_gc: failed"); + } + else { + fprintf(stderr, "darray: realloc failed (from sweep thread)\n"); + } + } return ptr; } diff --git a/error.c b/error.c index 52bd3629bf2d13..6e88dcbfaff897 100644 --- a/error.c +++ b/error.c @@ -1117,11 +1117,16 @@ rb_bug_without_die(const char *fmt, ...) va_end(args); } +bool is_sweep_thread_p(void); + void rb_bug(const char *fmt, ...) { va_list args; va_start(args, fmt); + if (is_sweep_thread_p()) { + fprintf(stderr, "rb_bug() called from sweep_thread!\n"); + } rb_bug_without_die_internal(fmt, args); va_end(args); die(); diff --git a/gc.c b/gc.c index d6d517d6a44c9e..fe6f0c9b90e509 100644 --- a/gc.c +++ b/gc.c @@ -151,9 +151,18 @@ rb_gc_vm_unlock(unsigned int lev, const char *file, int line) rb_vm_lock_leave(&lev, file, line); } +bool +is_sweep_thread_p(void) +{ + rb_vm_t *vm = GET_VM(); + if (!vm) return false; + return vm->gc.sweep_thread == pthread_self(); +} + unsigned int rb_gc_cr_lock(const char *file, int line) { + GC_ASSERT(!is_sweep_thread_p()); unsigned int lev; rb_vm_lock_enter_cr(GET_RACTOR(), &lev, file, line); return lev; @@ -162,6 +171,7 @@ rb_gc_cr_lock(const char *file, int line) void rb_gc_cr_unlock(unsigned int lev, const char *file, int line) { + GC_ASSERT(!is_sweep_thread_p()); rb_vm_lock_leave_cr(GET_RACTOR(), &lev, file, line); } @@ -1347,7 +1357,7 @@ rb_gc_obj_needs_cleanup_p(VALUE obj) } shape_id_t shape_id = RBASIC_SHAPE_ID(obj); - if (id2ref_tbl && rb_shape_has_object_id(shape_id)) return true; + if (RUBY_ATOMIC_PTR_LOAD(id2ref_tbl) && rb_shape_has_object_id(shape_id)) return true; switch (flags & RUBY_T_MASK) { case T_OBJECT: @@ -1410,6 +1420,7 @@ make_io_zombie(void *objspace, VALUE obj) rb_gc_impl_make_zombie(objspace, obj, io_fptr_finalize, fptr); } +// Returns whether or not we can add `obj` back to the page's freelist. static bool rb_data_free(void *objspace, VALUE obj) { @@ -1476,6 +1487,7 @@ classext_iclass_free(rb_classext_t *ext, bool is_prime, VALUE box_value, void *a rb_iclass_classext_free(args->klass, ext, is_prime); } +// Returns whether or not we can add `obj` back to the page's freelist. bool rb_gc_obj_free(void *objspace, VALUE obj) { @@ -1665,12 +1677,19 @@ rb_gc_obj_free(void *objspace, VALUE obj) rb_imemo_free((VALUE)obj); break; + case T_ZOMBIE: + GC_ASSERT(FL_TEST(obj, FL_FREEZE)); + GC_ASSERT(!FL_TEST(obj, FL_FINALIZE)); + void rb_gc_impl_free_zombie(rb_objspace_t *, VALUE); + rb_gc_impl_free_zombie(objspace, obj); + break; default: rb_bug("gc_sweep(): unknown data type 0x%x(%p) 0x%"PRIxVALUE, BUILTIN_TYPE(obj), (void*)obj, RBASIC(obj)->flags); } if (FL_TEST_RAW(obj, FL_FINALIZE)) { + GC_ASSERT(BUILTIN_TYPE(obj) != T_ZOMBIE); rb_gc_impl_make_zombie(objspace, obj, 0, 0); return FALSE; } @@ -2057,12 +2076,78 @@ id2ref_tbl_memsize(const void *data) return rb_st_memsize(data); } +// TODO: platforms other than pthread +static rb_nativethread_lock_t id2ref_tbl_lock_ = PTHREAD_MUTEX_INITIALIZER; +#ifdef RUBY_THREAD_PTHREAD_H +static pthread_t id2ref_tbl_lock_owner; +#endif +static unsigned int id2ref_tbl_lock_lvl; + +static inline void +ASSERT_id2ref_tbl_locked(void) +{ +#ifdef RUBY_THREAD_PTHREAD_H + VM_ASSERT(pthread_self() == id2ref_tbl_lock_owner); +#endif +} + +static inline void +ASSERT_id2ref_tbl_unlocked(void) +{ +#ifdef RUBY_THREAD_PTHREAD_H + VM_ASSERT(pthread_self() != id2ref_tbl_lock_owner); +#endif +} + +static inline void +id2ref_tbl_lock(bool allow_reentry) +{ + if (allow_reentry && pthread_self() == id2ref_tbl_lock_owner) { + } else { + ASSERT_id2ref_tbl_unlocked(); + rb_native_mutex_lock(&id2ref_tbl_lock_); + id2ref_tbl_lock_owner = pthread_self(); + } + id2ref_tbl_lock_lvl++; +} + +static inline bool +id2ref_tbl_trylock(bool allow_reentry) +{ + if (allow_reentry && pthread_self() == id2ref_tbl_lock_owner) { + } else { + ASSERT_id2ref_tbl_unlocked(); + if (rb_native_mutex_trylock(&id2ref_tbl_lock_) == EBUSY) { + return false; + } + id2ref_tbl_lock_owner = pthread_self(); + } + id2ref_tbl_lock_lvl++; + return true; +} + +static inline void +id2ref_tbl_unlock(void) +{ + ASSERT_id2ref_tbl_locked(); + GC_ASSERT(id2ref_tbl_lock_lvl > 0); + id2ref_tbl_lock_lvl--; + if (id2ref_tbl_lock_lvl == 0) { + id2ref_tbl_lock_owner = 0; + rb_native_mutex_unlock(&id2ref_tbl_lock_); + } +} + static void id2ref_tbl_free(void *data) { - id2ref_tbl = NULL; // clear global ref - st_table *table = (st_table *)data; - st_free_table(table); + id2ref_tbl_lock(true); + { + RUBY_ATOMIC_PTR_SET(id2ref_tbl, NULL); // clear global ref + st_table *table = (st_table *)data; + st_free_table(table); + } + id2ref_tbl_unlock(); } static const rb_data_type_t id2ref_tbl_type = { @@ -2088,8 +2173,14 @@ class_object_id(VALUE klass) if (existing_id) { id = existing_id; } - else if (RB_UNLIKELY(id2ref_tbl)) { - st_insert(id2ref_tbl, id, klass); + else { + if (RB_UNLIKELY(id2ref_tbl)) { + id2ref_tbl_lock(false); + { + st_insert(id2ref_tbl, id, klass); // FIXME: currently needs VM lock for allocation + } + id2ref_tbl_unlock(); + } } RB_GC_VM_UNLOCK(lock_lev); } @@ -2135,9 +2226,13 @@ object_id0(VALUE obj) RUBY_ASSERT(RBASIC_SHAPE_ID(obj) == object_id_shape_id); RUBY_ASSERT(rb_shape_obj_has_id(obj)); - if (RB_UNLIKELY(id2ref_tbl)) { + if (RB_UNLIKELY(RUBY_ATOMIC_PTR_LOAD(id2ref_tbl))) { RB_VM_LOCKING() { - st_insert(id2ref_tbl, (st_data_t)id, (st_data_t)obj); + id2ref_tbl_lock(false); + { + st_insert(id2ref_tbl, (st_data_t)id, (st_data_t)obj); // FIXME: currently needs VM lock for allocation + } + id2ref_tbl_unlock(); } } return id; @@ -2180,19 +2275,31 @@ build_id2ref_i(VALUE obj, void *data) case T_MODULE: RUBY_ASSERT(!rb_objspace_garbage_object_p(obj)); if (RCLASS(obj)->object_id) { - st_insert(id2ref_tbl, RCLASS(obj)->object_id, obj); + id2ref_tbl_lock(false); + { + st_insert(id2ref_tbl, RCLASS(obj)->object_id, obj); + } + id2ref_tbl_unlock(); } break; case T_IMEMO: RUBY_ASSERT(!rb_objspace_garbage_object_p(obj)); if (IMEMO_TYPE_P(obj, imemo_fields) && rb_shape_obj_has_id(obj)) { - st_insert(id2ref_tbl, rb_obj_id(obj), rb_imemo_fields_owner(obj)); + id2ref_tbl_lock(false); + { + st_insert(id2ref_tbl, rb_obj_id(obj), rb_imemo_fields_owner(obj)); + } + id2ref_tbl_unlock(); } break; case T_OBJECT: RUBY_ASSERT(!rb_objspace_garbage_object_p(obj)); if (rb_shape_obj_has_id(obj)) { - st_insert(id2ref_tbl, rb_obj_id(obj), obj); + id2ref_tbl_lock(false); + { + st_insert(id2ref_tbl, rb_obj_id(obj), obj); + } + id2ref_tbl_unlock(); } break; default: @@ -2208,8 +2315,8 @@ object_id_to_ref(void *objspace_ptr, VALUE object_id) unsigned int lev = RB_GC_VM_LOCK(); - if (!id2ref_tbl) { - rb_gc_vm_barrier(); // stop other ractors + if (!RUBY_ATOMIC_PTR_LOAD(id2ref_tbl)) { + rb_gc_vm_barrier(); // stop other ractors, background sweeper could still be running // GC Must not trigger while we build the table, otherwise if we end // up freeing an object that had an ID, we might try to delete it from @@ -2222,16 +2329,21 @@ object_id_to_ref(void *objspace_ptr, VALUE object_id) // By calling rb_gc_disable() we also save having to handle potentially garbage objects. bool gc_disabled = RTEST(rb_gc_disable()); { - id2ref_tbl = tmp_id2ref_tbl; id2ref_value = tmp_id2ref_value; - rb_gc_impl_each_object(objspace, build_id2ref_i, (void *)id2ref_tbl); + rb_gc_impl_each_object(objspace, build_id2ref_i, (void *)tmp_id2ref_tbl); + RUBY_ATOMIC_PTR_SET(id2ref_tbl, tmp_id2ref_tbl); } if (!gc_disabled) rb_gc_enable(); } VALUE obj; - bool found = st_lookup(id2ref_tbl, object_id, &obj) && !rb_gc_impl_garbage_object_p(objspace, obj); + bool found; + id2ref_tbl_lock(false); + { + found = st_lookup(id2ref_tbl, object_id, &obj) && !rb_gc_impl_garbage_object_p(objspace, obj); + } + id2ref_tbl_unlock(); RB_GC_VM_UNLOCK(lev); @@ -2247,11 +2359,11 @@ object_id_to_ref(void *objspace_ptr, VALUE object_id) } } -static inline void -obj_free_object_id(VALUE obj) +static VALUE +obj_get_object_id(VALUE obj) { VALUE obj_id = 0; - if (RB_UNLIKELY(id2ref_tbl)) { + if (RB_UNLIKELY(RUBY_ATOMIC_PTR_LOAD(id2ref_tbl))) { switch (BUILTIN_TYPE(obj)) { case T_CLASS: case T_MODULE: @@ -2259,11 +2371,11 @@ obj_free_object_id(VALUE obj) break; case T_IMEMO: if (!IMEMO_TYPE_P(obj, imemo_fields)) { - return; + break; } // fallthrough case T_OBJECT: - { + { shape_id_t shape_id = RBASIC_SHAPE_ID(obj); if (rb_shape_has_object_id(shape_id)) { obj_id = object_id_get(obj, shape_id); @@ -2271,13 +2383,29 @@ obj_free_object_id(VALUE obj) break; } default: + break; // For generic_fields, the T_IMEMO/fields is responsible for freeing the id. - return; } + } + return obj_id; +} + +static inline bool +obj_free_object_id(VALUE obj, bool in_user_gc_thread) +{ + if (RB_UNLIKELY(RUBY_ATOMIC_PTR_LOAD(id2ref_tbl))) { + VALUE obj_id = obj_get_object_id(obj); if (RB_UNLIKELY(obj_id)) { RUBY_ASSERT(FIXNUM_P(obj_id) || RB_TYPE_P(obj_id, T_BIGNUM)); + bool needs_id2ref_tbl_trylock = !in_user_gc_thread; + if (needs_id2ref_tbl_trylock) { + bool did_lock = id2ref_tbl_trylock(false); + if (!did_lock) return false; + } else { + id2ref_tbl_lock(true); + } if (!st_delete(id2ref_tbl, (st_data_t *)&obj_id, NULL)) { // The the object is a T_IMEMO/fields, then it's possible the actual object // has been garbage collected already. @@ -2285,15 +2413,61 @@ obj_free_object_id(VALUE obj) rb_bug("Object ID seen, but not in _id2ref table: object_id=%llu object=%s", NUM2ULL(obj_id), rb_obj_info(obj)); } } + id2ref_tbl_unlock(); } } + return true; } -void +bool +rb_gc_obj_has_blacklisted_vm_weak_references(VALUE obj) +{ + switch (BUILTIN_TYPE(obj)) { + case T_STRING: + return FL_TEST_RAW(obj, RSTRING_FSTR); + case T_SYMBOL: + return true; + case T_IMEMO: + switch (imemo_type(obj)) { + case imemo_callcache: { + const struct rb_callcache *cc = (const struct rb_callcache *)obj; + return vm_cc_refinement_p(cc); + } + case imemo_callinfo: + case imemo_ment: + return true; + default: + break; + } + return false; + default: + return false; + } +} + +static bool +rb_gc_obj_free_whitelisted_vm_weak_references_in_sweep_thread(VALUE obj) +{ + VM_ASSERT(pthread_self() == GET_VM()->gc.sweep_thread); + bool result = obj_free_object_id(obj, false); + if (rb_obj_gen_fields_p(obj)) { + bool freed_generic = rb_free_generic_ivar(obj); + if (!freed_generic) result = false; + } + return result; +} + +bool rb_gc_obj_free_vm_weak_references(VALUE obj) { ASSUME(!RB_SPECIAL_CONST_P(obj)); - obj_free_object_id(obj); + + rb_execution_context_t *ec = rb_current_execution_context(false); + if (!ec) { + return rb_gc_obj_free_whitelisted_vm_weak_references_in_sweep_thread(obj); + } + + obj_free_object_id(obj, true); if (rb_obj_gen_fields_p(obj)) { rb_free_generic_ivar(obj); @@ -2323,6 +2497,7 @@ rb_gc_obj_free_vm_weak_references(VALUE obj) default: break; } + return true; } /* @@ -2649,7 +2824,14 @@ count_objects_i(VALUE obj, void *d) struct count_objects_data *data = (struct count_objects_data *)d; if (RBASIC(obj)->flags) { - data->counts[BUILTIN_TYPE(obj)]++; + // This will make sure the count is like the old behavior when we used to turn a zombie into + // T_NONE right after the finalizer and/or free function ran. + if (BUILTIN_TYPE(obj) == T_ZOMBIE && FL_TEST(obj, FL_FREEZE)) { + data->freed++; + } + else { + data->counts[BUILTIN_TYPE(obj)]++; + } } else { data->freed++; @@ -4185,6 +4367,7 @@ vm_weak_table_gen_fields_foreach(st_data_t key, st_data_t value, st_data_t data) if (key != new_key || value != new_value) { DURING_GC_COULD_MALLOC_REGION_START(); { + // We're STW, no need for gen_fields_tbl_lock st_insert(rb_generic_fields_tbl_get(), (st_data_t)new_key, new_value); } DURING_GC_COULD_MALLOC_REGION_END(); @@ -4255,7 +4438,7 @@ rb_gc_vm_weak_table_foreach(vm_table_foreach_callback_func callback, break; } case RB_GC_VM_ID2REF_TABLE: { - if (id2ref_tbl) { + if (id2ref_tbl) { // we're STW, no need for lock st_foreach_with_replace( id2ref_tbl, vm_weak_table_id2ref_foreach, @@ -4267,7 +4450,7 @@ rb_gc_vm_weak_table_foreach(vm_table_foreach_callback_func callback, } case RB_GC_VM_GENERIC_FIELDS_TABLE: { st_table *generic_fields_tbl = rb_generic_fields_tbl_get(); - if (generic_fields_tbl) { + if (generic_fields_tbl) { // we're STW, no need for lock st_foreach( generic_fields_tbl, vm_weak_table_gen_fields_foreach, diff --git a/gc/default/default.c b/gc/default/default.c index 1b7d109ce69a99..e7d1791affd04e 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -25,8 +25,11 @@ #include "ruby/atomic.h" #include "ruby/debug.h" #include "ruby/thread.h" +#include "ruby/thread_native.h" #include "ruby/util.h" #include "ruby/vm.h" + +#include #include "ruby/internal/encoding/string.h" #include "ccan/list/list.h" #include "darray.h" @@ -112,6 +115,22 @@ #ifndef GC_HEAP_INIT_BYTES #define GC_HEAP_INIT_BYTES (2560 * 1024) #endif + +/*#define PSWEEP_DEBUG -6*/ +#if defined(PSWEEP_DEBUG) +#define psweep_debug(lvl, ...) if (lvl <= PSWEEP_DEBUG) fprintf(stderr, __VA_ARGS__) +#else +#define psweep_debug(...) (void)0 +#endif + +/* Define PSWEEP_LOCK_STATS to > 0 to enable lock contention statistics */ +#define PSWEEP_LOCK_STATS 0 +#ifndef PSWEEP_LOCK_STATS +#define PSWEEP_LOCK_STATS 0 +#endif + +#define PSWEEP_COLLECT_TIMINGS 0 + #ifndef GC_HEAP_FREE_SLOTS #define GC_HEAP_FREE_SLOTS 4096 #endif @@ -450,6 +469,11 @@ typedef struct mark_stack { typedef int (*gc_compact_compare_func)(const void *l, const void *r, void *d); +typedef struct { + rb_darray(VALUE) object_list; + rb_nativethread_lock_t lock; +} deferred_sweep_data_t; + typedef struct rb_heap_struct { short slot_size; @@ -467,13 +491,26 @@ typedef struct rb_heap_struct { struct heap_page *free_pages; struct ccan_list_head pages; - struct heap_page *sweeping_page; /* iterator for .pages */ + struct heap_page *sweeping_page; /* iterator for .pages. It always points to the next page to sweep. */ + struct heap_page *pre_sweeping_page; /* Background thread is currently sweeping this page */ + struct heap_page *swept_pages; /* pages claimed and swept by background thread */ + struct heap_page *latest_swept_page; // tail of `swept_pages` struct heap_page *compact_cursor; uintptr_t compact_cursor_index; struct heap_page *pooled_pages; size_t total_pages; /* total page count in a heap */ size_t total_slots; /* total slot count */ + rb_atomic_t foreground_sweep_steps; // incremented by ruby thread, checked by sweep thread + rb_atomic_t background_sweep_steps; // only incremented/checked by sweep thread + rb_nativethread_cond_t sweep_page_cond; // associated with global sweep lock + rb_nativethread_lock_t swept_pages_lock; + size_t pre_swept_slots_nodeferred; + size_t pre_swept_slots_deferred; + deferred_sweep_data_t deferred_sweep_data; + bool is_finished_sweeping; + bool done_background_sweep; + bool skip_sweep_continue; // skip current sweep continue } rb_heap_t; enum { @@ -521,6 +558,9 @@ typedef struct rb_objspace { unsigned int gc_stressful: 1; unsigned int during_minor_gc : 1; unsigned int during_incremental_marking : 1; + unsigned int during_lazy_sweeping : 1; + + unsigned int measure_gc : 1; } flags; @@ -530,6 +570,21 @@ typedef struct rb_objspace { size_t empty_pages_count; struct heap_page *empty_pages; + rb_nativethread_lock_t sweep_lock; + rb_nativethread_cond_t sweep_cond; + pthread_t sweep_thread; + bool sweep_thread_running; + bool sweep_thread_sweep_requested; + bool sweep_thread_sweep_exited; + bool sweep_thread_waiting_request; + bool sweep_thread_sweeping; + bool background_sweep_mode; + bool background_sweep_abort; + bool background_sweep_restart_heaps; + bool use_background_sweep_thread; + bool sweep_rest; + unsigned int heaps_done_background_sweep; + struct { rb_atomic_t finalizing; } atomic_flags; @@ -601,6 +656,14 @@ typedef struct rb_objspace { unsigned long long sweeping_time_ns; struct timespec sweeping_start_time; +#if PSWEEP_COLLECT_TIMINGS > 0 + /* Ruby thread sweep time tracking (always collected) */ + unsigned long long ruby_thread_sweep_cpu_time_ns; + unsigned long long ruby_thread_sweep_wall_time_ns; + struct timespec ruby_thread_sweep_cpu_start_time; + struct timespec ruby_thread_sweep_wall_start_time; +#endif + /* Weak references */ size_t weak_references_count; } profile; @@ -779,11 +842,16 @@ struct heap_page { unsigned short free_slots; unsigned short final_slots; unsigned short pinned_slots; + unsigned short pre_freed_slots; + unsigned short pre_empty_slots; + unsigned short pre_deferred_free_slots; + unsigned short pre_final_slots; struct { - unsigned int before_sweep : 1; unsigned int has_remembered_objects : 1; unsigned int has_uncollectible_wb_unprotected_objects : 1; } flags; + bool needs_setup_mark_bits; + rb_atomic_t before_sweep; // bool rb_heap_t *heap; @@ -791,7 +859,9 @@ struct heap_page { struct heap_page_body *body; uintptr_t start; struct free_slot *freelist; + struct free_slot *deferred_freelist; struct ccan_list_node page_node; + rb_ractor_newobj_heap_cache_t *heap_cache; bits_t wb_unprotected_bits[HEAP_PAGE_BITMAP_LIMIT]; /* the following three bitmaps are cleared at the beginning of full GC */ @@ -824,6 +894,18 @@ asan_unlock_freelist(struct heap_page *page) asan_unpoison_memory_region(&page->freelist, sizeof(struct free_list *), false); } +static void +asan_lock_deferred_freelist(struct heap_page *page) +{ + asan_poison_memory_region(&page->deferred_freelist, sizeof(struct free_list *)); +} + +static void +asan_unlock_deferred_freelist(struct heap_page *page) +{ + asan_unpoison_memory_region(&page->deferred_freelist, sizeof(struct free_list *), false); +} + static inline bool heap_page_in_global_empty_pages_pool(rb_objspace_t *objspace, struct heap_page *page) { @@ -872,6 +954,14 @@ slot_index_for_offset(size_t offset, uint32_t div_magic) #define MARK_IN_BITMAP(bits, p) _MARK_IN_BITMAP(bits, GET_HEAP_PAGE(p), p) #define CLEAR_IN_BITMAP(bits, p) _CLEAR_IN_BITMAP(bits, GET_HEAP_PAGE(p), p) +/* Atomic bitmap operations for use during parallel sweep, where the sweep + * thread and mutator write barriers may modify different bits in the same + * bitmap word concurrently. */ +#define _ATOMIC_MARK_IN_BITMAP(bits, page, p) RUBY_ATOMIC_VALUE_OR((bits)[SLOT_BITMAP_INDEX(page, p)], SLOT_BITMAP_BIT(page, p)) +#define _ATOMIC_CLEAR_IN_BITMAP(bits, page, p) RUBY_ATOMIC_VALUE_AND((bits)[SLOT_BITMAP_INDEX(page, p)], ~SLOT_BITMAP_BIT(page, p)) +#define ATOMIC_MARK_IN_BITMAP(bits, p) _ATOMIC_MARK_IN_BITMAP(bits, GET_HEAP_PAGE(p), p) +#define ATOMIC_CLEAR_IN_BITMAP(bits, p) _ATOMIC_CLEAR_IN_BITMAP(bits, GET_HEAP_PAGE(p), p) + #define GET_HEAP_MARK_BITS(x) (&GET_HEAP_PAGE(x)->mark_bits[0]) #define GET_HEAP_PINNED_BITS(x) (&GET_HEAP_PAGE(x)->pinned_bits[0]) #define GET_HEAP_UNCOLLECTIBLE_BITS(x) (&GET_HEAP_PAGE(x)->uncollectible_bits[0]) @@ -983,15 +1073,253 @@ gc_mode_verify(enum gc_mode mode) return mode; } -static inline bool +#if PSWEEP_LOCK_STATS > 0 +/* Lock contention statistics per callsite */ +#define MAX_LOCK_CALLSITES 100 + +typedef struct lock_callsite_stats { + const char *function; + int line; + size_t acquired_without_contention; + size_t contended; +} lock_callsite_stats_t; + +typedef struct lock_stats { + const char *name; + lock_callsite_stats_t callsites[MAX_LOCK_CALLSITES]; + int num_callsites; +} lock_stats_t; + +static lock_stats_t sweep_lock_stats = {"objspace->sweep_lock", {{0}}, 0}; +static lock_stats_t swept_pages_lock_stats = {"heap->swept_pages_lock", {{0}}, 0}; +static lock_stats_t deferred_sweep_data_lock_stats = {"heap->deferred_sweep_data.lock", {{0}}, 0}; + +static lock_callsite_stats_t* +find_or_create_callsite(lock_stats_t *stats, const char *function, int line) +{ + /* Find existing callsite */ + for (int i = 0; i < stats->num_callsites; i++) { + if (stats->callsites[i].function == function && stats->callsites[i].line == line) { + return &stats->callsites[i]; + } + } + + /* Create new callsite if space available */ + if (stats->num_callsites < MAX_LOCK_CALLSITES) { + lock_callsite_stats_t *callsite = &stats->callsites[stats->num_callsites++]; + callsite->function = function; + callsite->line = line; + callsite->acquired_without_contention = 0; + callsite->contended = 0; + return callsite; + } + + /* No space - return last callsite as overflow */ + return &stats->callsites[MAX_LOCK_CALLSITES - 1]; +} + +static void +instrumented_lock_acquire_impl(rb_nativethread_lock_t *lock, lock_stats_t *stats, const char *function, int line) +{ + lock_callsite_stats_t *callsite = find_or_create_callsite(stats, function, line); + + if (rb_native_mutex_trylock(lock) == 0) { + callsite->acquired_without_contention++; + } + else { + callsite->contended++; + rb_native_mutex_lock(lock); + } +} + +/* Macro to automatically pass function and line */ +#define instrumented_lock_acquire(lock, stats) \ + instrumented_lock_acquire_impl(lock, stats, __FUNCTION__, __LINE__) + +static void +print_lock_stats(void) +{ + fprintf(stderr, "\n=== Lock Contention Statistics by Callsite ===\n"); + fprintf(stderr, "%-40s %-30s %12s %12s %10s\n", "Lock Name", "Callsite", "Uncontended", "Contended", "Ratio"); + fprintf(stderr, "%-40s %-30s %12s %12s %10s\n", "---------", "--------", "-----------", "---------", "-----"); + + lock_stats_t *all_stats[] = {&sweep_lock_stats, &swept_pages_lock_stats, &deferred_sweep_data_lock_stats}; + + for (int i = 0; i < 3; i++) { + lock_stats_t *stats = all_stats[i]; + + /* Sort callsites by total contentions (descending) */ + for (int j = 0; j < stats->num_callsites - 1; j++) { + for (int k = j + 1; k < stats->num_callsites; k++) { + if (stats->callsites[k].contended > stats->callsites[j].contended) { + lock_callsite_stats_t temp = stats->callsites[j]; + stats->callsites[j] = stats->callsites[k]; + stats->callsites[k] = temp; + } + } + } + + /* Print callsites for this lock */ + for (int j = 0; j < stats->num_callsites; j++) { + lock_callsite_stats_t *cs = &stats->callsites[j]; + size_t total = cs->acquired_without_contention + cs->contended; + if (total > 0) { + char callsite_buf[32]; + snprintf(callsite_buf, sizeof(callsite_buf), "%s:%d", cs->function, cs->line); + + double ratio = (double)cs->contended / total * 100.0; + fprintf(stderr, "%-40s %-30s %12zu %12zu %9.2f%%\n", + j == 0 ? stats->name : "", + callsite_buf, + cs->acquired_without_contention, + cs->contended, + ratio); + } + } + } + fprintf(stderr, "================================================\n\n"); +} +#endif /* PSWEEP_LOCK_STATS > 0 */ + +static pthread_t sweep_lock_owner = 0; + +static inline void +sweep_lock_lock_impl(rb_nativethread_lock_t *sweep_lock, const char *function, int line) +{ + GC_ASSERT(sweep_lock_owner != pthread_self()); +#if PSWEEP_LOCK_STATS > 0 + instrumented_lock_acquire_impl(sweep_lock, &sweep_lock_stats, function, line); +#else + rb_native_mutex_lock(sweep_lock); +#endif + GC_ASSERT(sweep_lock_owner == 0); +#if VM_CHECK_MODE > 0 + sweep_lock_owner = pthread_self(); +#endif +} + +#define sweep_lock_lock(sweep_lock) \ + sweep_lock_lock_impl(sweep_lock, __FUNCTION__, __LINE__) + +static inline void +sweep_lock_unlock(rb_nativethread_lock_t *sweep_lock) +{ + GC_ASSERT(sweep_lock_owner == pthread_self()); +#if VM_CHECK_MODE > 0 + sweep_lock_owner = 0; +#endif + rb_native_mutex_unlock(sweep_lock); +} + +static inline void +sweep_lock_set_locked(void) +{ + GC_ASSERT(sweep_lock_owner == 0); +#if VM_CHECK_MODE > 0 + sweep_lock_owner = pthread_self(); +#endif +} + +static inline void +sweep_lock_set_unlocked(void) +{ + GC_ASSERT(sweep_lock_owner == pthread_self()); +#if VM_CHECK_MODE > 0 + sweep_lock_owner = 0; +#endif +} + +// TODO: put it in objspace struct +/*static rb_nativethread_lock_t gc_data_lock_ = PTHREAD_MUTEX_INITIALIZER;*/ +/*static pthread_t gc_data_lock_owner = 0;*/ +/*static int gc_data_lock_lvl = 0;*/ + +/*static inline void*/ +/*gc_data_lock(rb_objspace_t *objspace, bool allow_reentry)*/ +/*{*/ + /*if (allow_reentry && pthread_self() == gc_data_lock_owner) {*/ + /*}*/ + /*else {*/ + /*GC_ASSERT(gc_data_lock_owner != pthread_self());*/ + /*rb_native_mutex_lock(&gc_data_lock_);*/ + /*gc_data_lock_owner = pthread_self();*/ + /*}*/ + /*gc_data_lock_lvl++;*/ +/*}*/ + +/*static inline void*/ +/*gc_data_unlock(rb_objspace_t *objspace)*/ +/*{*/ + /*GC_ASSERT(gc_data_lock_owner == pthread_self());*/ + /*gc_data_lock_lvl--;*/ + /*if (gc_data_lock_lvl == 0) {*/ + /*gc_data_lock_owner = 0;*/ + /*rb_native_mutex_unlock(&gc_data_lock_);*/ + /*}*/ +/*}*/ + +// Returns true when the background sweep thread and Ruby thread have finished processing +// (background sweeping + ruby thread post-processing or deferred freeing) all pages for that heap. +static bool +heap_is_sweep_done(rb_objspace_t *objspace, rb_heap_t *heap) +{ + if (heap->is_finished_sweeping) { + psweep_debug(2, "[gc] heap_is_sweep_done: %d, heap:%p (%ld), heap->is_finished_sweeping\n", true, heap, heap - heaps); + return true; + } + if (!objspace->use_background_sweep_thread) { + bool done = heap->sweeping_page == NULL; + psweep_debug(2, "[gc] heap_is_sweep_done: %d, heap:%p (%ld), !use_background_thread\n", done, heap, heap - heaps); + return done; + } + + // We always dequeue the last page, never the sweep thread. This avoids locking in the common case. + // It should be synchronized, but it's a "benign race" (FIXME: use atomics?) + if (heap->sweeping_page) { + return false; + } + + bool done; + sweep_lock_lock(&objspace->sweep_lock); + if (heap->sweeping_page || heap->swept_pages) { + psweep_debug(2, "heap_is_sweep_done: %d, heap:%p (%ld), swept_pages:%d, sweeping_page:%p\n", false, heap, heap - heaps, heap->swept_pages != 0, heap->sweeping_page); + done = false; + } + else if (heap->pre_sweeping_page) { + sweep_lock_set_unlocked(); + // We need to wait because this is the final page for this heap, and the caller calls us + // like `while (!heap_is_sweep_done(heap)) { gc_sweep_step(heap) }` (we don't want to spin). + rb_native_cond_wait(&heap->sweep_page_cond, &objspace->sweep_lock); + sweep_lock_set_locked(); + GC_ASSERT(heap->swept_pages); + done = false; + } + else { + done = true; + } + sweep_lock_unlock(&objspace->sweep_lock); + return done; +} + +// Does the GC still have pages to sweep? If returns false, then the Ruby thread has fully +// processed all the pages in every heap. +static bool has_sweeping_pages(rb_objspace_t *objspace) { + rb_heap_t *heap_not_finished = NULL; for (int i = 0; i < HEAP_COUNT; i++) { - if ((&heaps[i])->sweeping_page) { - return TRUE; + rb_heap_t *heap = &heaps[i]; + if (!heap->is_finished_sweeping) { + if (heap_not_finished) { + return true; + } + else { + heap_not_finished = heap; + } } } - return FALSE; + if (!heap_not_finished) return false; // all done + return !heap_is_sweep_done(objspace, heap_not_finished); } static inline size_t @@ -1032,7 +1360,7 @@ total_final_slots_count(rb_objspace_t *objspace) size_t count = 0; for (int i = 0; i < HEAP_COUNT; i++) { rb_heap_t *heap = &heaps[i]; - count += heap->final_slots_count; + count += (size_t)RUBY_ATOMIC_VALUE_LOAD(heap->final_slots_count); } return count; } @@ -1048,7 +1376,7 @@ total_final_slots_count(rb_objspace_t *objspace) #define will_be_incremental_marking(objspace) ((objspace)->rgengc.need_major_gc != GPR_FLAG_NONE) #define GC_INCREMENTAL_SWEEP_SLOT_COUNT 2048 #define GC_INCREMENTAL_SWEEP_POOL_SLOT_COUNT 1024 -#define is_lazy_sweeping(objspace) (GC_ENABLE_LAZY_SWEEP && has_sweeping_pages(objspace)) +#define is_lazy_sweeping(objspace) ((objspace)->flags.during_lazy_sweeping != FALSE) /* In lazy sweeping or the previous incremental marking finished and did not yield a free page. */ #define needs_continue_sweeping(objspace, heap) \ ((heap)->free_pages == NULL && is_lazy_sweeping(objspace)) @@ -1093,7 +1421,7 @@ static inline void gc_enter(rb_objspace_t *objspace, enum gc_enter_event event, static inline void gc_exit(rb_objspace_t *objspace, enum gc_enter_event event, unsigned int *lock_lev); static void gc_marking_enter(rb_objspace_t *objspace); static void gc_marking_exit(rb_objspace_t *objspace); -static void gc_sweeping_enter(rb_objspace_t *objspace); +static void gc_sweeping_enter(rb_objspace_t *objspace, const char *from_fn); static void gc_sweeping_exit(rb_objspace_t *objspace); static bool gc_marks_continue(rb_objspace_t *objspace, rb_heap_t *heap); @@ -1598,26 +1926,48 @@ rb_gc_impl_garbage_object_p(void *objspace_ptr, VALUE ptr) bool dead = false; - asan_unpoisoning_object(ptr) { - switch (BUILTIN_TYPE(ptr)) { - case T_NONE: - case T_MOVED: - case T_ZOMBIE: - dead = true; - break; - default: - break; + if (!objspace->background_sweep_mode) { // set to false/true by ruby GC thread when entering/exiting GC + // psweep: not safe to read flags on object if during background sweeping + asan_unpoisoning_object(ptr) { + switch (BUILTIN_TYPE(ptr)) { + case T_NONE: + case T_MOVED: + case T_ZOMBIE: + dead = true; + break; + default: + break; + } } } if (dead) return true; - return is_lazy_sweeping(objspace) && GET_HEAP_PAGE(ptr)->flags.before_sweep && - !RVALUE_MARKED(objspace, ptr); + + struct heap_page *page = GET_HEAP_PAGE(ptr); + bool during_lazy_sweep = is_lazy_sweeping(objspace); + + if (!objspace->background_sweep_mode) { + return during_lazy_sweep && !RVALUE_MARKED(objspace, ptr) && RUBY_ATOMIC_LOAD(page->before_sweep); + } + // we're currently lazy sweeping with the sweep thread in background mode + else if (during_lazy_sweep) { + bool is_before1, is_before2; + // This is technically UB because reading of mark bits is not synchronized, but I think it's fine. + bool is_garbage = ((is_before1 = RUBY_ATOMIC_LOAD(page->before_sweep)) && + !RVALUE_MARKED(objspace, ptr) && (is_before2 = RUBY_ATOMIC_LOAD(page->before_sweep))); + if (is_garbage) return true; + if (is_before1 && is_before2) return false; // must be marked (before_sweep and marked) + // already swept page, just check flags + return BUILTIN_TYPE(ptr) == T_NONE || BUILTIN_TYPE(ptr) == T_MOVED || BUILTIN_TYPE(ptr) == T_ZOMBIE; + } + else { + return BUILTIN_TYPE(ptr) == T_NONE || BUILTIN_TYPE(ptr) == T_MOVED || BUILTIN_TYPE(ptr) == T_ZOMBIE; + } } static void free_stack_chunks(mark_stack_t *); static void mark_stack_free_cache(mark_stack_t *); -static void heap_page_free(rb_objspace_t *objspace, struct heap_page *page); +static void heap_page_free(rb_objspace_t *objspace, struct heap_page *page, bool log); static inline void heap_page_add_freeobj(rb_objspace_t *objspace, struct heap_page *page, VALUE obj) @@ -1647,6 +1997,22 @@ heap_page_add_freeobj(rb_objspace_t *objspace, struct heap_page *page, VALUE obj gc_report(3, objspace, "heap_page_add_freeobj: add %p to freelist\n", (void *)obj); } +static inline void +heap_page_add_deferred_freeobj(rb_objspace_t *objspace, struct heap_page *page, VALUE obj) +{ + rb_asan_unpoison_object(obj, false); + + struct free_slot *slot = (struct free_slot *)obj; + slot->flags = 0; + asan_unlock_deferred_freelist(page); + slot->next = page->deferred_freelist; + page->deferred_freelist = slot; + asan_lock_deferred_freelist(page); + + rb_asan_poison_object(obj); + gc_report(3, objspace, "heap_page_add_deferred_freeobj: add %p to deferred_freelist\n", (void *)obj); +} + static void heap_allocatable_bytes_expand(rb_objspace_t *objspace, rb_heap_t *heap, size_t free_slots, size_t total_slots, size_t slot_size) @@ -1693,13 +2059,16 @@ heap_allocatable_bytes_expand(rb_objspace_t *objspace, objspace->heap_pages.allocatable_bytes += extend_slot_count * slot_size; } +/* Add a `page` with some free slots to the beginning of `heap->free_pages` */ static inline void -heap_add_freepage(rb_heap_t *heap, struct heap_page *page) +heap_add_freepage(rb_heap_t *heap, struct heap_page *page, const char *from_func) { asan_unlock_freelist(page); GC_ASSERT(page->free_slots != 0); GC_ASSERT(page->freelist != NULL); + psweep_debug(1, "[gc] heap_add_freepage(heap:%p, page:%p) from %s\n", heap, page, from_func); + page->free_next = heap->free_pages; heap->free_pages = page; @@ -1726,7 +2095,9 @@ static void heap_unlink_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page) { ccan_list_del(&page->page_node); + GC_ASSERT(heap->total_pages > 0); heap->total_pages--; + GC_ASSERT(heap->total_slots >= page->total_slots); heap->total_slots -= page->total_slots; } @@ -1763,11 +2134,12 @@ heap_page_body_free(struct heap_page_body *page_body) } static void -heap_page_free(rb_objspace_t *objspace, struct heap_page *page) +heap_page_free(rb_objspace_t *objspace, struct heap_page *page, bool log) { objspace->heap_pages.freed_pages++; heap_page_body_free(page->body); free(page); + psweep_debug(1, "[gc] heap_page_free heap:%p page:%p\n", page->heap, page); } static void @@ -1783,7 +2155,7 @@ heap_pages_free_unused_pages(rb_objspace_t *objspace) struct heap_page *page = rb_darray_get(objspace->heap_pages.sorted, i); if (heap_page_in_global_empty_pages_pool(objspace, page) && heap_pages_freeable_pages > 0) { - heap_page_free(objspace, page); + heap_page_free(objspace, page, true); heap_pages_freeable_pages--; } else { @@ -1908,6 +2280,8 @@ heap_page_body_allocate(void) return page_body; } +/* Try to "resurrect" an empty page by removing it from the `objspace->empty_pages` list */ +/* NOTE: empty pages can go to any heap */ static struct heap_page * heap_page_resurrect(rb_objspace_t *objspace) { @@ -1920,6 +2294,7 @@ heap_page_resurrect(rb_objspace_t *objspace) objspace->empty_pages_count--; page = objspace->empty_pages; objspace->empty_pages = page->free_next; + page->freelist = NULL; } return page; @@ -1973,8 +2348,9 @@ heap_page_allocate(rb_objspace_t *objspace) return page; } +/* Add either an empty page (objspace->empty_pages) or a newly allocated page to a heap. Thread the freelist and set `heap->free_slots` */ static void -heap_add_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page) +heap_add_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page, bool sweep_lock_taken) { /* Adding to eden heap during incremental sweeping is forbidden */ GC_ASSERT(!heap->sweeping_page); @@ -1994,29 +2370,38 @@ heap_add_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page) page->slot_size = heap->slot_size; page->slot_div_magic = slot_div_magics[heap - heaps]; page->heap = heap; + page->free_next = NULL; memset(&page->wb_unprotected_bits[0], 0, HEAP_PAGE_BITMAP_SIZE); memset(&page->age_bits[0], 0, sizeof(page->age_bits)); asan_unlock_freelist(page); + asan_unlock_deferred_freelist(page); page->freelist = NULL; + page->deferred_freelist = NULL; asan_unpoison_memory_region(page->body, HEAP_PAGE_SIZE, false); for (VALUE p = (VALUE)start; p < start + (slot_count * heap->slot_size); p += heap->slot_size) { heap_page_add_freeobj(objspace, page, p); } + asan_lock_deferred_freelist(page); asan_lock_freelist(page); page->free_slots = slot_count; heap->total_allocated_pages++; - ccan_list_add_tail(&heap->pages, &page->page_node); + if (!sweep_lock_taken) sweep_lock_lock(&objspace->sweep_lock); + { + ccan_list_add_tail(&heap->pages, &page->page_node); + } + if (!sweep_lock_taken) sweep_lock_unlock(&objspace->sweep_lock); + heap->total_pages++; heap->total_slots += page->total_slots; } static int -heap_page_allocate_and_initialize(rb_objspace_t *objspace, rb_heap_t *heap) +heap_page_allocate_and_initialize(rb_objspace_t *objspace, rb_heap_t *heap, bool sweep_lock_taken) { gc_report(1, objspace, "heap_page_allocate_and_initialize: rb_darray_size(objspace->heap_pages.sorted): %"PRIdSIZE", " "allocatable_bytes: %"PRIdSIZE", heap->total_pages: %"PRIdSIZE"\n", @@ -2026,6 +2411,7 @@ heap_page_allocate_and_initialize(rb_objspace_t *objspace, rb_heap_t *heap) struct heap_page *page = heap_page_resurrect(objspace); if (page == NULL && objspace->heap_pages.allocatable_bytes > 0) { + psweep_debug(1, "[gc] heap_page_allocate_and_initialize: no empty pages, allocating page\n"); page = heap_page_allocate(objspace); allocated = true; @@ -2033,8 +2419,8 @@ heap_page_allocate_and_initialize(rb_objspace_t *objspace, rb_heap_t *heap) } if (page != NULL) { - heap_add_page(objspace, heap, page); - heap_add_freepage(heap, page); + heap_add_page(objspace, heap, page, sweep_lock_taken); + heap_add_freepage(heap, page, "allocate_and_initialize"); if (allocated) { size_t page_bytes = (size_t)page->total_slots * page->slot_size; @@ -2051,21 +2437,25 @@ heap_page_allocate_and_initialize(rb_objspace_t *objspace, rb_heap_t *heap) } static void -heap_page_allocate_and_initialize_force(rb_objspace_t *objspace, rb_heap_t *heap) +heap_page_allocate_and_initialize_force(rb_objspace_t *objspace, rb_heap_t *heap, bool sweep_lock_taken) { size_t prev_allocatable_bytes = objspace->heap_pages.allocatable_bytes; objspace->heap_pages.allocatable_bytes = HEAP_PAGE_SIZE; - heap_page_allocate_and_initialize(objspace, heap); + heap_page_allocate_and_initialize(objspace, heap, sweep_lock_taken); GC_ASSERT(heap->free_pages != NULL); objspace->heap_pages.allocatable_bytes = prev_allocatable_bytes; } +// Run incremental marking and/or sweeping, if in incremental marking or sweeping mode static void gc_continue(rb_objspace_t *objspace, rb_heap_t *heap) { unsigned int lock_lev; bool needs_gc = is_incremental_marking(objspace) || needs_continue_sweeping(objspace, heap); - if (!needs_gc) return; + if (!needs_gc) { + psweep_debug(1, "[gc] gc_continue: !needs_gc\n"); + return; + } gc_enter(objspace, gc_enter_event_continue, &lock_lev); // takes vm barrier, try to avoid @@ -2079,32 +2469,44 @@ gc_continue(rb_objspace_t *objspace, rb_heap_t *heap) if (needs_continue_sweeping(objspace, heap)) { gc_sweep_continue(objspace, heap); } + else { + psweep_debug(-1, "[gc] gc_continue: !needs_continue_sweeping (lazy_sweeping:%d)\n", is_lazy_sweeping(objspace)); + } gc_exit(objspace, gc_enter_event_continue, &lock_lev); } +void wait_for_background_sweeping_to_finish(rb_objspace_t *objspace, bool abort_current_background_sweep, bool exit_sweep_thread, const char *from_fn); + static void heap_prepare(rb_objspace_t *objspace, rb_heap_t *heap) { GC_ASSERT(heap->free_pages == NULL); - if (heap->total_slots < gc_params.heap_init_bytes / heap->slot_size && - heap->sweeping_page == NULL) { - heap_page_allocate_and_initialize_force(objspace, heap); - GC_ASSERT(heap->free_pages != NULL); - return; + sweep_lock_lock(&objspace->sweep_lock); + { + if (heap->total_slots < gc_params.heap_init_bytes / heap->slot_size && + heap->sweeping_page == NULL && heap->swept_pages == NULL && !heap->pre_sweeping_page) { + heap_page_allocate_and_initialize_force(objspace, heap, true); + GC_ASSERT(heap->free_pages != NULL); + sweep_lock_unlock(&objspace->sweep_lock); + return; + } } + sweep_lock_unlock(&objspace->sweep_lock); /* Continue incremental marking or lazy sweeping, if in any of those steps. */ gc_continue(objspace, heap); if (heap->free_pages == NULL) { - heap_page_allocate_and_initialize(objspace, heap); + psweep_debug(1, "[gc] heap_prepare: heap->free_pages is NULL after gc_continue\n"); + heap_page_allocate_and_initialize(objspace, heap, false); } /* If we still don't have a free page and not allowed to create a new page, * we should start a new GC cycle. */ if (heap->free_pages == NULL) { + psweep_debug(1, "[gc] heap_prepare: still no heap_>free_pages even after try allocate!\n"); GC_ASSERT(objspace->empty_pages_count == 0); GC_ASSERT(objspace->heap_pages.allocatable_bytes == 0); @@ -2124,7 +2526,7 @@ heap_prepare(rb_objspace_t *objspace, rb_heap_t *heap) /* If we're not incremental marking (e.g. a minor GC) or finished * sweeping and still don't have a free page, then * gc_sweep_finish_heap should allow us to create a new page. */ - if (heap->free_pages == NULL && !heap_page_allocate_and_initialize(objspace, heap)) { + if (heap->free_pages == NULL && !heap_page_allocate_and_initialize(objspace, heap, false)) { if (gc_needs_major_flags == GPR_FLAG_NONE) { rb_bug("cannot create a new page after GC"); } @@ -2137,7 +2539,7 @@ heap_prepare(rb_objspace_t *objspace, rb_heap_t *heap) gc_continue(objspace, heap); if (heap->free_pages == NULL && - !heap_page_allocate_and_initialize(objspace, heap)) { + !heap_page_allocate_and_initialize(objspace, heap, false)) { rb_bug("cannot create a new page after major GC"); } } @@ -2170,6 +2572,11 @@ rb_gc_impl_source_location_cstr(int *ptr) static inline VALUE newobj_init(VALUE klass, VALUE flags, int wb_protected, rb_objspace_t *objspace, VALUE obj) { +#if VM_CHECK_MODE > 0 + if (BUILTIN_TYPE(obj) != T_NONE) { + fprintf(stderr, "BUILTIN_TYPE(newobj) = %s for obj:%p\n", rb_obj_info(obj), (void*)obj); + } +#endif GC_ASSERT(BUILTIN_TYPE(obj) == T_NONE); GC_ASSERT((flags & FL_WB_PROTECTED) == 0); RBASIC(obj)->flags = flags; @@ -2179,7 +2586,7 @@ newobj_init(VALUE klass, VALUE flags, int wb_protected, rb_objspace_t *objspace, #endif -#if RACTOR_CHECK_MODE +#if RACTOR_CHECK_MODE > 10 void rb_ractor_setup_belonging(VALUE obj); rb_ractor_setup_belonging(obj); #endif @@ -2200,7 +2607,7 @@ newobj_init(VALUE klass, VALUE flags, int wb_protected, rb_objspace_t *objspace, #endif if (RB_UNLIKELY(wb_protected == FALSE)) { - MARK_IN_BITMAP(GET_HEAP_WB_UNPROTECTED_BITS(obj), obj); + ATOMIC_MARK_IN_BITMAP(GET_HEAP_WB_UNPROTECTED_BITS(obj), obj); } #if RGENGC_PROFILE @@ -2281,6 +2688,7 @@ ractor_cache_allocate_slot(rb_objspace_t *objspace, rb_ractor_newobj_cache_t *ca } if (RB_LIKELY(p)) { + psweep_debug(2, "[gc] allocate slot: %p from heap:%p page:%p\n", p, &heaps[heap_idx], heap_cache->using_page); VALUE obj = (VALUE)p; rb_asan_unpoison_object(obj, true); heap_cache->freelist = p->next; @@ -2313,10 +2721,14 @@ heap_next_free_page(rb_objspace_t *objspace, rb_heap_t *heap) heap_prepare(objspace, heap); } - page = heap->free_pages; - heap->free_pages = page->free_next; - - GC_ASSERT(page->free_slots != 0); + sweep_lock_lock(&objspace->sweep_lock); + { + page = heap->free_pages; + heap->free_pages = page->free_next; + } + sweep_lock_unlock(&objspace->sweep_lock); + psweep_debug(1, "[gc] heap_next_free_page heap:%p free_pages:%p -> %p (free_slots:%d)\n", heap, page, heap->free_pages, page->free_slots); + GC_ASSERT(page->free_slots > 0); asan_unlock_freelist(page); @@ -2335,10 +2747,15 @@ ractor_cache_set_page(rb_objspace_t *objspace, rb_ractor_newobj_cache_t *cache, GC_ASSERT(page->free_slots != 0); GC_ASSERT(page->freelist != NULL); + if (heap_cache->using_page) { + heap_cache->using_page->heap_cache = NULL; + } + heap_cache->using_page = page; heap_cache->freelist = page->freelist; page->free_slots = 0; page->freelist = NULL; + page->heap_cache = heap_cache; rb_asan_unpoison_object((VALUE)heap_cache->freelist, false); GC_ASSERT(RB_TYPE_P((VALUE)heap_cache->freelist, T_NONE)); @@ -2451,6 +2868,8 @@ newobj_alloc(rb_objspace_t *objspace, rb_ractor_newobj_cache_t *cache, size_t he ALWAYS_INLINE(static VALUE newobj_slowpath(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_newobj_cache_t *cache, int wb_protected, size_t heap_idx)); +static const char *type_name(int type, VALUE obj); + static inline VALUE newobj_slowpath(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_newobj_cache_t *cache, int wb_protected, size_t heap_idx) { @@ -2466,7 +2885,7 @@ newobj_slowpath(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_new if (rb_memerror_reentered()) { rb_memerror(); } - rb_bug("object allocation during garbage collection phase"); + rb_bug("object allocation during garbage collection phase for klass %s\n", type_name(flags & T_MASK, 0)); } if (ruby_gc_stressful) { @@ -2612,25 +3031,30 @@ rb_gc_impl_pointer_to_heap_p(void *objspace_ptr, const void *ptr) } #define ZOMBIE_OBJ_KEPT_FLAGS (FL_FINALIZE) +// Zombie needs to be put back on the freelist later (during GC) and finalizer has ran +#define ZOMBIE_NEEDS_FREE_FLAG (FL_FREEZE) +#define ZOMBIE_NEEDS_FREE_P(zombie) (FL_TEST(zombie, ZOMBIE_NEEDS_FREE_FLAG)) +#define ZOMBIE_SET_NEEDS_FREE_FLAG(zombie) (FL_SET(zombie, ZOMBIE_NEEDS_FREE_FLAG)) void rb_gc_impl_make_zombie(void *objspace_ptr, VALUE obj, void (*dfree)(void *), void *data) { rb_objspace_t *objspace = objspace_ptr; + struct heap_page *page = GET_HEAP_PAGE(obj); struct RZombie *zombie = RZOMBIE(obj); zombie->flags = T_ZOMBIE | (zombie->flags & ZOMBIE_OBJ_KEPT_FLAGS); zombie->dfree = dfree; zombie->data = data; - VALUE prev, next = heap_pages_deferred_final; + VALUE prev, next = (VALUE)RUBY_ATOMIC_PTR_LOAD(heap_pages_deferred_final); + struct heap_page *page_after = GET_HEAP_PAGE(zombie); + GC_ASSERT(page == page_after); do { zombie->next = prev = next; next = RUBY_ATOMIC_VALUE_CAS(heap_pages_deferred_final, prev, obj); } while (next != prev); - - struct heap_page *page = GET_HEAP_PAGE(obj); - page->final_slots++; - page->heap->final_slots_count++; + page->final_slots++; // NOTE: not synchronized, but either background thread or user thread owns page during free + RUBY_ATOMIC_SIZE_INC(page->heap->final_slots_count); } typedef int each_obj_callback(void *, void *, size_t, void *); @@ -2754,6 +3178,7 @@ objspace_each_exec(bool protected, struct each_obj_data *each_obj_data) static void objspace_each_objects(rb_objspace_t *objspace, each_obj_callback *callback, void *data, bool protected) { + wait_for_background_sweeping_to_finish(objspace, true, false, "objspace_each_objects"); struct each_obj_data each_obj_data = { .objspace = objspace, .each_obj_callback = callback, @@ -2901,30 +3326,33 @@ run_final(rb_objspace_t *objspace, VALUE zombie, unsigned int lev) return lev; } +void +rb_gc_impl_free_zombie(rb_objspace_t *objspace, VALUE obj) +{ + GC_ASSERT(!is_sweep_thread_p()); + struct heap_page *page = GET_HEAP_PAGE(obj); + GC_ASSERT(RUBY_ATOMIC_VALUE_LOAD(page->heap->final_slots_count) > 0); + RUBY_ATOMIC_SIZE_DEC(page->heap->final_slots_count); + GC_ASSERT(page->final_slots > 0); + page->final_slots--; + RVALUE_AGE_SET_BITMAP(obj, 0); +} + static void finalize_list(rb_objspace_t *objspace, VALUE zombie) { while (zombie) { VALUE next_zombie; - struct heap_page *page; rb_asan_unpoison_object(zombie, false); next_zombie = RZOMBIE(zombie)->next; - page = GET_HEAP_PAGE(zombie); unsigned int lev = RB_GC_VM_LOCK(); lev = run_final(objspace, zombie, lev); { GC_ASSERT(BUILTIN_TYPE(zombie) == T_ZOMBIE); - GC_ASSERT(page->heap->final_slots_count > 0); - GC_ASSERT(page->final_slots > 0); - - page->heap->final_slots_count--; - page->final_slots--; - page->free_slots++; - RVALUE_AGE_SET_BITMAP(zombie, 0); - heap_page_add_freeobj(objspace, page, zombie); - page->heap->total_freed_objects++; + GC_ASSERT(!FL_TEST(zombie, FL_FINALIZE)); + ZOMBIE_SET_NEEDS_FREE_FLAG(zombie); } RB_GC_VM_UNLOCK(lev); @@ -2981,15 +3409,27 @@ gc_abort(void *objspace_ptr) objspace->flags.during_incremental_marking = FALSE; } +#if VM_CHECK_MODE > 0 + sweep_lock_lock(&objspace->sweep_lock); + GC_ASSERT(!objspace->sweep_rest); + sweep_lock_unlock(&objspace->sweep_lock); +#endif + + wait_for_background_sweeping_to_finish(objspace, true, false, "gc_abort"); + if (is_lazy_sweeping(objspace)) { for (int i = 0; i < HEAP_COUNT; i++) { rb_heap_t *heap = &heaps[i]; - heap->sweeping_page = NULL; + heap->swept_pages = NULL; + + heap->pre_sweeping_page = NULL; + heap->is_finished_sweeping = false; + heap->background_sweep_steps = heap->foreground_sweep_steps; struct heap_page *page = NULL; ccan_list_for_each(&heap->pages, page, page_node) { - page->flags.before_sweep = false; + page->before_sweep = 0; } } } @@ -3043,6 +3483,8 @@ rb_gc_impl_shutdown_call_finalizer_i(st_data_t key, st_data_t val, st_data_t _da return ST_DELETE; } +void rb_gc_stop_background_threads(rb_objspace_t *objspace, const char *from_fn); + void rb_gc_impl_shutdown_call_finalizer(void *objspace_ptr) { @@ -3052,6 +3494,8 @@ rb_gc_impl_shutdown_call_finalizer(void *objspace_ptr) gc_verify_internal_consistency(objspace); #endif + wait_for_background_sweeping_to_finish(objspace, true, false, "shutdown_call_finalizer"); + /* prohibit incremental GC */ objspace->flags.dont_incremental = 1; @@ -3066,7 +3510,6 @@ rb_gc_impl_shutdown_call_finalizer(void *objspace_ptr) st_foreach(finalizer_table, rb_gc_impl_shutdown_call_finalizer_i, 0); } - /* run finalizers */ finalize_deferred(objspace); GC_ASSERT(heap_pages_deferred_final == 0); @@ -3483,6 +3926,8 @@ struct gc_sweep_context { int empty_slots; }; +bool rb_gc_obj_needs_cleanup_p(VALUE obj); + static inline void gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bitset, struct gc_sweep_context *ctx) { @@ -3509,14 +3954,19 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit ctx->empty_slots++; heap_page_add_freeobj(objspace, sweep_page, vp); break; - case T_ZOMBIE: - /* already counted */ + case T_ZOMBIE: // FIXME: no more zombies? + if (ZOMBIE_NEEDS_FREE_P(vp)) { + goto free_object; + } + /* already counted as final slot */ break; case T_NONE: ctx->empty_slots++; /* already freed */ break; default: + free_object: + psweep_debug(0, "[gc] gc_sweep_plane: heap:%p (%ld) freeing obj:%p (%s)\n", heap, heap - heaps, (void*)vp, rb_obj_info(vp)); #if RGENGC_CHECK_MODE if (!is_full_marking(objspace)) { if (RVALUE_OLD_P(objspace, vp)) rb_bug("page_sweep: %p - old while minor GC.", (void *)p); @@ -3567,28 +4017,118 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit } while (bitset); } +void +wait_for_background_sweeping_to_finish(rb_objspace_t *objspace, bool abort_current_background_sweep, bool exit_sweep_thread, const char *from_fn) +{ + if (!objspace->sweep_thread) { + return; + } + sweep_lock_lock(&objspace->sweep_lock); + if (abort_current_background_sweep) { + objspace->background_sweep_abort = true; + objspace->background_sweep_restart_heaps = false; + objspace->sweep_thread_sweep_requested = false; + } + while (objspace->sweep_thread_running && objspace->sweep_thread_sweeping) { + psweep_debug(1, "[gc] Waiting for sweep thread to finish (abort_sweep:%d, from_fn:%s)\n", abort_current_background_sweep, from_fn); + rb_native_cond_signal(&objspace->sweep_cond); + sweep_lock_set_unlocked(); + rb_native_cond_wait(&objspace->sweep_cond, &objspace->sweep_lock); + sweep_lock_set_locked(); + } + if (exit_sweep_thread) { + GC_ASSERT(abort_current_background_sweep); + objspace->sweep_thread_running = false; + while (!objspace->sweep_thread_sweep_exited) { + rb_native_cond_signal(&objspace->sweep_cond); + sweep_lock_set_unlocked(); + rb_native_cond_wait(&objspace->sweep_cond, &objspace->sweep_lock); + sweep_lock_set_locked(); + } + pthread_join(objspace->sweep_thread, NULL); + psweep_debug(0, "Sweep thread joined from %s\n", from_fn); + GET_VM()->gc.sweep_thread = 0; + objspace->sweep_thread = 0; + } + else { + psweep_debug(0, "Waited for sweep thread to finish sweep from %s\n", from_fn); + } + objspace->background_sweep_abort = false; + objspace->background_sweep_mode = false; + sweep_lock_unlock(&objspace->sweep_lock); +} + +// dequeue MIN(left_to_deq, 10) objects from the deferred object list into `obj_buf`, returning the amount dequeued. +static short +deq_deferred_sweep_objects(rb_objspace_t *objspace, rb_heap_t *heap, VALUE obj_buf[10], short left_to_deq) +{ + GC_ASSERT(left_to_deq > 0); + short to_deq = 10; + if (left_to_deq < 10) to_deq = left_to_deq; +#if PSWEEP_LOCK_STATS > 0 + instrumented_lock_acquire(&heap->deferred_sweep_data.lock, &deferred_sweep_data_lock_stats); +#else + rb_native_mutex_lock(&heap->deferred_sweep_data.lock); +#endif + { + if ((size_t)to_deq > rb_darray_size(heap->deferred_sweep_data.object_list)) { + psweep_debug(0, "Error: trying to deq %hi from object_list of size %lu\n", to_deq, rb_darray_size(heap->deferred_sweep_data.object_list)); + } + GC_ASSERT((size_t)to_deq <= rb_darray_size(heap->deferred_sweep_data.object_list)); + for (short i = 0; i < to_deq; i++) { + obj_buf[i] = rb_darray_get(heap->deferred_sweep_data.object_list, i); + } + } + rb_darray_shift_n(heap->deferred_sweep_data.object_list, to_deq); + rb_native_mutex_unlock(&heap->deferred_sweep_data.lock); + return to_deq; +} + +// Free the object in a Ruby thread. Return whether or not we put the slot back on the page's freelist. +static bool +deferred_free(rb_objspace_t *objspace, VALUE obj) +{ + ASSERT_vm_locking_with_barrier(); + bool result; +#if VM_CHECK_MODE > 0 + MAYBE_UNUSED(const char *obj_info) = rb_obj_info(obj); +#endif + rb_gc_obj_free_vm_weak_references(obj); + if (rb_gc_obj_free(objspace, obj)) { + struct heap_page *page = GET_HEAP_PAGE(obj); + psweep_debug(1, "[gc] deferred free: page(%p) obj(%p) %s (success)\n", page, (void*)obj, obj_info); + RVALUE_AGE_SET_BITMAP(obj, 0); + heap_page_add_freeobj(objspace, page, obj); + (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)obj, page->slot_size); + result = true; + } + else { + result = false; + MAYBE_UNUSED(struct heap_page *page) = GET_HEAP_PAGE(obj); + psweep_debug(1, "[gc] deferred sweep: page(%p) obj(%p) %s (zombie)\n", page, (void*)obj, obj_info); + } + return result; +} + +// Clear bits for the page that was swept by the background thread. static inline void -gc_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct gc_sweep_context *ctx) +gc_post_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *sweep_page, bool force_setup_mark_bits) { - struct heap_page *sweep_page = ctx->page; GC_ASSERT(sweep_page->heap == heap); - uintptr_t p; - bits_t *bits, bitset; + bits_t *bits; - gc_report(2, objspace, "page_sweep: start.\n"); + gc_report(2, objspace, "post_page_sweep: start.\n"); #if RGENGC_CHECK_MODE if (!objspace->flags.immediate_sweep) { - GC_ASSERT(sweep_page->flags.before_sweep == TRUE); + GC_ASSERT(RUBY_ATOMIC_LOAD(sweep_page->before_sweep)); } #endif - sweep_page->flags.before_sweep = FALSE; - sweep_page->free_slots = 0; + RUBY_ATOMIC_SET(sweep_page->before_sweep, 0); - p = (uintptr_t)sweep_page->start; bits = sweep_page->mark_bits; - short slot_size = sweep_page->slot_size; + int total_slots = sweep_page->total_slots; int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH); @@ -3609,6 +4149,59 @@ gc_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct gc_sweep_context } } + if (!heap->compact_cursor) { + if (objspace->background_sweep_mode && !force_setup_mark_bits) { + /* Defer gc_setup_mark_bits to gc_sweep_finish on the GC thread, + * because it overwrites mark_bits which would race with mutator + * write barriers for objects on the same page. */ + sweep_page->needs_setup_mark_bits = true; + } + else { + gc_setup_mark_bits(sweep_page); + sweep_page->needs_setup_mark_bits = false; + } + } + + if (RUBY_ATOMIC_PTR_LOAD(heap_pages_deferred_final) && !finalizing) { + gc_finalize_deferred_register(objspace); + } + + gc_report(2, objspace, "post_page_sweep: end.\n"); +} + +// Sweep a page by the Ruby thread (synchronous freeing). +static inline void +gc_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct gc_sweep_context *ctx) +{ + struct heap_page *sweep_page = ctx->page; + GC_ASSERT(sweep_page->heap == heap); + + uintptr_t p; + bits_t *bits, bitset; + + gc_report(2, objspace, "page_sweep: start.\n"); + + psweep_debug(1, "[gc] gc_sweep_page: heap:%p (%ld) page:%p\n", heap, heap - heaps, sweep_page); + +#if RGENGC_CHECK_MODE + if (!objspace->flags.immediate_sweep) { + GC_ASSERT(RUBY_ATOMIC_LOAD(sweep_page->before_sweep)); + } +#endif + RUBY_ATOMIC_SET(sweep_page->before_sweep, 0); + sweep_page->free_slots = 0; + + p = (uintptr_t)sweep_page->start; + bits = sweep_page->mark_bits; + short slot_size = sweep_page->slot_size; + int total_slots = sweep_page->total_slots; + int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH); + + int out_of_range_bits = total_slots % BITS_BITLENGTH; + if (out_of_range_bits != 0) { + bits[bitmap_plane_count - 1] |= ~(((bits_t)1 << out_of_range_bits) - 1); + } + for (int i = 0; i < bitmap_plane_count; i++) { bitset = ~bits[i]; if (bitset) { @@ -3617,6 +4210,18 @@ gc_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct gc_sweep_context p += BITS_BITLENGTH * slot_size; } + // Clear wb_unprotected and age bits for all unmarked slots + { + bits_t *wb_unprotected_bits = sweep_page->wb_unprotected_bits; + bits_t *age_bits = sweep_page->age_bits; + for (int i = 0; i < bitmap_plane_count; i++) { + bits_t unmarked = ~bits[i]; + wb_unprotected_bits[i] &= ~unmarked; + age_bits[i * 2] &= ~unmarked; + age_bits[i * 2 + 1] &= ~unmarked; + } + } + if (!heap->compact_cursor) { gc_setup_mark_bits(sweep_page); } @@ -3633,10 +4238,10 @@ gc_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct gc_sweep_context sweep_page->total_slots, ctx->freed_slots, ctx->empty_slots, ctx->final_slots); - sweep_page->free_slots += ctx->freed_slots + ctx->empty_slots; sweep_page->heap->total_freed_objects += ctx->freed_slots; + sweep_page->free_slots = ctx->freed_slots + ctx->empty_slots; - if (heap_pages_deferred_final && !finalizing) { + if (RUBY_ATOMIC_PTR_LOAD(heap_pages_deferred_final) && !finalizing) { gc_finalize_deferred_register(objspace); } @@ -3705,24 +4310,537 @@ heap_page_freelist_append(struct heap_page *page, struct free_slot *freelist) p->next = freelist; rb_asan_poison_object((VALUE)p); } - else { - page->freelist = freelist; + else { + page->freelist = freelist; + } + asan_lock_freelist(page); + } +} + +static void +sweep_in_ruby_thread(rb_objspace_t *objspace, struct heap_page *page, VALUE obj, bool nozombie) +{ + rb_heap_t *heap = page->heap; +#if PSWEEP_LOCK_STATS > 0 + instrumented_lock_acquire(&heap->deferred_sweep_data.lock, &deferred_sweep_data_lock_stats); +#else + rb_native_mutex_lock(&heap->deferred_sweep_data.lock); +#endif + { + page->pre_deferred_free_slots += 1; + psweep_debug(1, "[sweep] register sweep later: page(%p), obj(%p) %s\n", (void*)page, (void*)obj, rb_obj_info(obj)); + GC_ASSERT(BUILTIN_TYPE(obj) != T_NONE); + rb_darray_append_without_gc(&heap->deferred_sweep_data.object_list, obj); + /*if (rb_darray_size(heap->deferred_sweep_data.object_list) > 128) {*/ + /*fprintf(stderr, "deferred sweep data object list size:%lu\n", rb_darray_size(heap->deferred_sweep_data.object_list));*/ + /*}*/ + } + rb_native_mutex_unlock(&heap->deferred_sweep_data.lock); +} + +bool +zombie_needs_deferred_free(VALUE zombie) +{ + return ZOMBIE_NEEDS_FREE_P(zombie); +} + +static inline void +gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page, uintptr_t p, bits_t bitset, short slot_size) +{ + unsigned short freed = 0; + unsigned short empties = 0; + unsigned short finals = 0; + do { + VALUE vp = (VALUE)p; + GC_ASSERT(GET_HEAP_PAGE(vp) == page); + + rb_asan_unpoison_object(vp, false); + if (bitset & 1) { + switch (BUILTIN_TYPE(vp)) { + case T_MOVED: { + empties++; + heap_page_add_deferred_freeobj(objspace, page, vp); + (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)vp, page->slot_size); + break; + } + case T_NONE: + /*psweep_debug("[sweep] empty: page(%p), obj(%p)\n", (void*)page, (void*)vp);*/ + empties++; // already in freelist + break; + case T_ZOMBIE: + if (zombie_needs_deferred_free(vp)) { + sweep_in_ruby_thread(objspace, page, vp, false); + } + else { + // already counted as final_slot when made into a zombie + } + break; + case T_DATA: { + void *data = RTYPEDDATA_P(vp) ? RTYPEDDATA_GET_DATA(vp) : DATA_PTR(vp); + if (!data) { + goto free; + } + // NOTE: this repeats code found in `rb_data_free`. This is just for testing purposes. + bool free_immediately = false; + void (*dfree)(void *); + if (RTYPEDDATA_P(vp)) { + free_immediately = (RTYPEDDATA_TYPE(vp)->flags & RUBY_TYPED_FREE_IMMEDIATELY) != 0; + dfree = RTYPEDDATA_TYPE(vp)->function.dfree; + } + else { + dfree = RDATA(vp)->dfree; + } + if (!dfree || dfree == RUBY_DEFAULT_FREE || free_immediately) { + if (rb_gc_obj_has_blacklisted_vm_weak_references(vp)) { + sweep_in_ruby_thread(objspace, page, vp, true); + break; + } + else { + goto free; + } + } + else { + if (rb_gc_obj_has_blacklisted_vm_weak_references(vp)) { + sweep_in_ruby_thread(objspace, page, vp, false); + break; + } + else { + goto free; + } + } + break; + } + case T_IMEMO: { + if (rb_gc_obj_has_blacklisted_vm_weak_references(vp)) { + sweep_in_ruby_thread(objspace, page, vp, true); + break; + } + switch (imemo_type(vp)) { + case imemo_constcache: + case imemo_cref: + case imemo_env: + case imemo_ifunc: + case imemo_memo: + case imemo_svar: + case imemo_throw_data: + case imemo_tmpbuf: + case imemo_fields: + goto free; + default: + sweep_in_ruby_thread(objspace, page, vp, true); + break; + } + break; + } + case T_COMPLEX: + case T_RATIONAL: + case T_FLOAT: + case T_BIGNUM: + case T_OBJECT: + case T_STRING: + case T_ARRAY: + case T_HASH: + case T_STRUCT: + case T_MATCH: + case T_REGEXP: + case T_FILE: { + if (rb_gc_obj_has_blacklisted_vm_weak_references(vp)) { + sweep_in_ruby_thread(objspace, page, vp, true); + break; + } + else { + goto free; + } + break; + } + default: // ex: T_CLASS/T_MODULE/T_ICLASS/T_SYMBOL + if (!rb_gc_obj_needs_cleanup_p(vp)) { + heap_page_add_deferred_freeobj(objspace, page, vp); + psweep_debug(2, "[sweep] freed: page(%p), obj(%p)\n", (void*)page, (void*)vp); + (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)vp, page->slot_size); + freed++; + } + else { + sweep_in_ruby_thread(objspace, page, vp, true); + } + break; + free: { + if (rb_gc_obj_free_vm_weak_references(vp)) { + bool can_put_back_on_freelist = rb_gc_obj_free(objspace, vp); + if (can_put_back_on_freelist) { + heap_page_add_deferred_freeobj(objspace, page, vp); + freed++; + psweep_debug(2, "[sweep] freed: page(%p), obj(%p)\n", (void*)page, (void*)vp); + (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)vp, page->slot_size); + } + else { + // They are zombies now + RUBY_ASSERT(BUILTIN_TYPE(vp) == T_ZOMBIE); + psweep_debug(2, "[sweep] zombie: page(%p), obj(%p)\n", (void*)page, (void*)vp); + finals++; + } + } + else { + GC_ASSERT(BUILTIN_TYPE(vp) != T_NONE); + sweep_in_ruby_thread(objspace, page, vp, true); + } + break; + } + } + } + + p += slot_size; + bitset >>= 1; + } while (bitset); + + page->pre_freed_slots += freed; + page->pre_empty_slots += empties; + page->pre_final_slots += finals; +} + +static void +gc_pre_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page) +{ + uintptr_t p = (uintptr_t)page->start; + bits_t *bits = page->mark_bits; + bits_t bitset; + short slot_size = page->slot_size; + int total_slots = page->total_slots; + psweep_debug(1, "[sweep] gc_pre_sweep_page(heap:%p page:%p) start\n", heap, page); + GC_ASSERT(page->heap == heap); + page->pre_deferred_free_slots = 0; + + int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH); + int out_of_range_bits = total_slots % BITS_BITLENGTH; + + if (out_of_range_bits != 0) { + bits[bitmap_plane_count - 1] |= ~(((bits_t)1 << out_of_range_bits) - 1); + } + + for (int i = 0; i < bitmap_plane_count; i++) { + bitset = ~bits[i]; + if (bitset) { + gc_pre_sweep_plane(objspace, heap, page, p, bitset, slot_size); + } + p += BITS_BITLENGTH * slot_size; + } + + psweep_debug(1, "[sweep] gc_pre_sweep_page(heap:%p page:%p) done, deferred free:%d\n", heap, page, page->pre_deferred_free_slots); +} + +static inline bool +done_worker_incremental_sweep_steps_p(rb_objspace_t *objspace, rb_heap_t *heap) +{ + if (ATOMIC_LOAD_RELAXED(heap->foreground_sweep_steps) != heap->background_sweep_steps) { + GC_ASSERT(ATOMIC_LOAD_RELAXED(heap->foreground_sweep_steps) > heap->background_sweep_steps); + return true; + } + return false; +} + +static bool +bitmap_is_all_zero(bits_t *bits, size_t count) +{ + for (size_t i = 0; i < count; i++) { + if (bits[i] != 0) return false; + } + return true; +} + +static void +move_to_empty_pages(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page) +{ + GC_ASSERT(bitmap_is_all_zero(page->mark_bits, HEAP_PAGE_BITMAP_LIMIT)); + GC_ASSERT(bitmap_is_all_zero(page->uncollectible_bits, HEAP_PAGE_BITMAP_LIMIT)); + GC_ASSERT(bitmap_is_all_zero(page->wb_unprotected_bits, HEAP_PAGE_BITMAP_LIMIT)); + GC_ASSERT(bitmap_is_all_zero(page->marking_bits, HEAP_PAGE_BITMAP_LIMIT)); + GC_ASSERT(bitmap_is_all_zero(page->remembered_bits, HEAP_PAGE_BITMAP_LIMIT)); + GC_ASSERT(bitmap_is_all_zero(page->age_bits, HEAP_PAGE_BITMAP_LIMIT * RVALUE_AGE_BIT_COUNT)); + + heap_unlink_page(objspace, heap, page); + + page->start = 0; + page->total_slots = 0; + page->slot_size = 0; + page->heap = NULL; + page->free_slots = 0; + + asan_unlock_freelist(page); + page->freelist = NULL; + asan_lock_freelist(page); + + asan_poison_memory_region(page->body, HEAP_PAGE_SIZE); + + objspace->empty_pages_count++; + page->free_next = objspace->empty_pages; + objspace->empty_pages = page; +} + +static void +clear_pre_sweep_fields(struct heap_page *page) +{ + page->pre_freed_slots = 0; + page->pre_deferred_free_slots = 0; + page->pre_empty_slots = 0; + page->pre_final_slots = 0; +} + +// add beginning of b to end of a +static void +merge_freelists(struct free_slot *a, struct free_slot *b) +{ + if (a && b) { + while (a->next) { + a = a->next; + } + a->next = b; + } +} + +// Perform incremental (lazy) sweep on a heap by the background sweep thread. +static void +gc_sweep_step_worker(rb_objspace_t *objspace, rb_heap_t *heap) +{ + // sweep_lock is acquired + // + // We're finished either when they are no pages left to pre-sweep, OR: + // 1) When we're not in `sweep_rest` or `background_mode`, if we've encountered a change in `heap->foreground_sweep_steps` + GC_ASSERT(heap->background_sweep_steps <= ATOMIC_LOAD_RELAXED(heap->foreground_sweep_steps)); + if (heap->done_background_sweep) { + psweep_debug(-2, "[sweep] gc_sweep_step_worker: heap:%p (%ld) - done (early return)\n", heap, heap - heaps); + return; + } + else if (heap->skip_sweep_continue) { + psweep_debug(-2, "[sweep] gc_sweep_step_worker: heap:%p (%ld) - skip_continue (early return)\n", heap, heap - heaps); + heap->skip_sweep_continue = false; + return; + } + while (1) { + struct heap_page *sweep_page = heap->sweeping_page; + if (!sweep_page) { + GC_ASSERT(!heap->done_background_sweep); + GC_ASSERT(objspace->heaps_done_background_sweep < HEAP_COUNT); + heap->done_background_sweep = true; + objspace->heaps_done_background_sweep++; + psweep_debug(-2, "[sweep] gc_sweep_step_worker: heap:%p (%ld) - !sweeping_page\n", heap, heap - heaps); + break; + } + sweep_page->free_next = NULL; + struct heap_page *next = ccan_list_next(&heap->pages, sweep_page, page_node); + + if (!next) { + GC_ASSERT(!heap->done_background_sweep); + GC_ASSERT(objspace->heaps_done_background_sweep < HEAP_COUNT); + heap->done_background_sweep = true; + objspace->heaps_done_background_sweep++; + psweep_debug(-2, "[sweep] gc_sweep_step_worker: heap:%p (%ld) - !next\n", heap, heap - heaps); + // Let Ruby thread deal with last page of the heap. + break; + } + + heap->sweeping_page = next; + heap->pre_sweeping_page = sweep_page; + + sweep_lock_unlock(&objspace->sweep_lock); + + gc_pre_sweep_page(objspace, heap, sweep_page); + + sweep_lock_lock(&objspace->sweep_lock); + heap->pre_sweeping_page = NULL; + sweep_page->free_next = NULL; + + int pre_freed_slots = sweep_page->pre_freed_slots; + int pre_empty_slots = sweep_page->pre_empty_slots; + int free_slots = pre_freed_slots + pre_empty_slots; + + if (objspace->background_sweep_mode && sweep_page->pre_deferred_free_slots == 0) { + if (free_slots == sweep_page->total_slots) { + GC_ASSERT(sweep_page->total_slots > 0); + psweep_debug(-6, "[sweep] (bg) gc_sweep_step_worker: heap %ld adding to empty_pages:%p (pre_empty:%d, pre_freed:%d)\n", + heap - heaps, sweep_page, sweep_page->pre_empty_slots, sweep_page->pre_freed_slots); + // We're guaranteed to stay in background mode during this (starting GC requires taking the + // sweep_lock to change sweep background mode to false) + GC_ASSERT(sweep_page->pre_final_slots == 0); + clear_pre_sweep_fields(sweep_page); + gc_post_sweep_page(objspace, heap, sweep_page, true); + move_to_empty_pages(objspace, heap, sweep_page); + continue; + } + else if (free_slots > 0) { + // These are just for statistics, not used in calculations + heap->freed_slots += sweep_page->pre_freed_slots; + heap->empty_slots += sweep_page->pre_empty_slots; + + sweep_page->free_slots = free_slots; + sweep_page->heap->total_freed_objects += sweep_page->pre_freed_slots; + clear_pre_sweep_fields(sweep_page); + gc_post_sweep_page(objspace, heap, sweep_page, false); + if (sweep_page->deferred_freelist) { + merge_freelists(sweep_page->deferred_freelist, sweep_page->freelist); + sweep_page->freelist = sweep_page->deferred_freelist; + } + sweep_page->deferred_freelist = NULL; + if (heap->pre_swept_slots_nodeferred < GC_INCREMENTAL_SWEEP_POOL_SLOT_COUNT) { + psweep_debug(-6, "[sweep] (bg) gc_sweep_step_worker: heap %ld adding to pooled pages:%p (pre_empty:%d, pre_freed:%d, pre_swept:%lu->%lu)\n", + heap - heaps, sweep_page, pre_empty_slots, pre_freed_slots, heap->pre_swept_slots_nodeferred, + heap->pre_swept_slots_nodeferred + free_slots); + heap->pre_swept_slots_nodeferred += free_slots; + heap_add_poolpage(objspace, heap, sweep_page); + continue; + } + else { + psweep_debug(-6, "[sweep] (bg) gc_sweep_step_worker: heap %ld adding to free pages:%p (pre_empty:%d, pre_freed:%d, pre_swept:%lu->%lu)\n", + heap - heaps, sweep_page, pre_empty_slots, pre_freed_slots, heap->pre_swept_slots_nodeferred, + heap->pre_swept_slots_nodeferred + free_slots); + heap_add_freepage(heap, sweep_page, "gc_sweep_step_worker"); + heap->pre_swept_slots_nodeferred += free_slots; + if (heap->pre_swept_slots_nodeferred > (GC_INCREMENTAL_SWEEP_SLOT_COUNT + GC_INCREMENTAL_SWEEP_POOL_SLOT_COUNT)) { + heap->pre_swept_slots_nodeferred = 0; + /*break;*/ + } + continue; + } + } + else { + // Don't even add to `swept_pages`, no further processing needed by ruby thread (no free slots) + clear_pre_sweep_fields(sweep_page); + gc_post_sweep_page(objspace, heap, sweep_page, false); + continue; + } + } + + +#if PSWEEP_LOCK_STATS > 0 + instrumented_lock_acquire(&heap->swept_pages_lock, &swept_pages_lock_stats); +#else + rb_native_mutex_lock(&heap->swept_pages_lock); +#endif + { + if (heap->swept_pages) { + // NOTE: heap->swept_pages needs to be in swept order for gc_sweep_step to work properly. + struct heap_page *latest = heap->latest_swept_page; + GC_ASSERT(latest); + latest->free_next = sweep_page; + } + else { + heap->swept_pages = sweep_page; + } + heap->latest_swept_page = sweep_page; + } + rb_native_mutex_unlock(&heap->swept_pages_lock); + + psweep_debug(-2, "[sweep] gc_sweep_step_worker: heap:%p (%ld) - swept page:%p\n", heap, heap - heaps, sweep_page); + + if (!objspace->background_sweep_mode) { + if (!objspace->sweep_rest && done_worker_incremental_sweep_steps_p(objspace, heap)) { + rb_native_cond_broadcast(&heap->sweep_page_cond); + psweep_debug(-2, "[sweep] (fg) gc_sweep_step_worker: done incremental step heap:%p (%ld)\n", heap, heap - heaps); + heap->background_sweep_steps = ATOMIC_LOAD_RELAXED(heap->foreground_sweep_steps); + break; + } + } + else { + heap->pre_swept_slots_deferred += free_slots; + if (RB_UNLIKELY(objspace->background_sweep_abort)) { + psweep_debug(-2, "[sweep] (bg) gc_sweep_step_worker: break early heap:%p (%ld) (abort)\n", heap, heap - heaps); + break; + } + else if (objspace->background_sweep_restart_heaps) { + psweep_debug(-2, "[sweep] (bg) gc_sweep_step_worker: break early heap:%p (%ld) (restart)\n", heap, heap - heaps); + break; + } + } + // notify of newly swept page in case Ruby thread is waiting on us + rb_native_cond_broadcast(&heap->sweep_page_cond); + } + // sweep_lock is acquired +} + +static void * +gc_sweep_thread_func(void *ptr) +{ + rb_objspace_t *objspace = ptr; + + psweep_debug(1, "[sweep] sweep_thread start\n"); + sweep_lock_lock(&objspace->sweep_lock); + objspace->sweep_thread_sweep_exited = false; + + while (objspace->sweep_thread_running) { + while (!objspace->sweep_thread_sweep_requested && objspace->sweep_thread_running) { + psweep_debug(1, "[sweep] sweep_thread wait\n"); + objspace->sweep_thread_waiting_request = true; + sweep_lock_set_unlocked(); + rb_native_cond_wait(&objspace->sweep_cond, &objspace->sweep_lock); + sweep_lock_set_locked(); + objspace->sweep_thread_waiting_request = false; + psweep_debug(1, "[sweep] sweep_thread wake\n"); // requested or signalled to exit + } + if (!objspace->sweep_thread_running) { + break; + } + + objspace->sweep_thread_sweep_requested = false; + objspace->sweep_thread_sweeping = true; + + restart_heaps: + for (int i = 0; i < HEAP_COUNT; i++) { + rb_heap_t *heap = &heaps[i]; + if (RB_UNLIKELY(objspace->background_sweep_mode && objspace->background_sweep_abort)) { + psweep_debug(-2, "[sweep] abort: break before sweeping heap:%p (%d)\n", heap, i); + break; + } + if (objspace->background_sweep_mode && objspace->background_sweep_restart_heaps) { + objspace->background_sweep_restart_heaps = false; + psweep_debug(-2, "[sweep] restart heaps from 0 (at %d)\n", i); + goto restart_heaps; + } + psweep_debug(-2, "[sweep] sweep heap:%p (%d)\n", heap, i); + gc_sweep_step_worker(objspace, heap); } - asan_lock_freelist(page); + psweep_debug(1, "[sweep] /sweep_heaps\n"); + + objspace->sweep_thread_sweeping = false; + rb_native_cond_broadcast(&objspace->sweep_cond); } + psweep_debug(-5, "[sweep] sweep_thread exit\n"); + objspace->sweep_thread_sweep_requested = false; + objspace->sweep_thread_sweep_exited = true; + rb_native_cond_broadcast(&objspace->sweep_cond); + sweep_lock_unlock(&objspace->sweep_lock); + + return NULL; } static void gc_sweep_start_heap(rb_objspace_t *objspace, rb_heap_t *heap) { + // Background thread is not sweeping right now heap->sweeping_page = ccan_list_top(&heap->pages, struct heap_page, page_node); heap->free_pages = NULL; + heap->swept_pages = NULL; heap->pooled_pages = NULL; - if (!objspace->flags.immediate_sweep) { - struct heap_page *page = NULL; + heap->latest_swept_page = NULL; + heap->pre_swept_slots_nodeferred = 0; + heap->pre_swept_slots_deferred = 0; + + heap->pre_sweeping_page = NULL; + heap->background_sweep_steps = heap->foreground_sweep_steps; + heap->is_finished_sweeping = false; + heap->done_background_sweep = false; + heap->skip_sweep_continue = false; + // TODO + /*rb_darray_clear_and_free_without_gc(heap->deferred_sweep_data.object_list);*/ + /*if (rb_darray_size(heap->deferred_sweep_data.object_list) > 0) {*/ + /*psweep_debug(-1, "Error: gc_sweep_start_heap with object_list of size %lu\n", rb_darray_size(heap->deferred_sweep_data.object_list));*/ + /*}*/ + /*GC_ASSERT(rb_darray_size(heap->deferred_sweep_data.object_list) == 0);*/ + + struct heap_page *page = NULL; + if (!objspace->flags.immediate_sweep) { ccan_list_for_each(&heap->pages, page, page_node) { - page->flags.before_sweep = TRUE; + page->before_sweep = 1; + GC_ASSERT(page->pre_deferred_free_slots == 0); } } } @@ -3758,6 +4876,7 @@ gc_ractor_newobj_cache_clear(void *c, void *data) heap_page_freelist_append(page, freelist); + if (page) page->heap_cache = NULL; cache->using_page = NULL; cache->freelist = NULL; } @@ -3767,8 +4886,16 @@ static void gc_sweep_start(rb_objspace_t *objspace) { gc_mode_transition(objspace, gc_mode_sweeping); + objspace->flags.during_lazy_sweeping = TRUE; objspace->rincgc.pooled_slots = 0; +// Background sweeping cannot be happening +#if VM_CHECK_MODE > 0 + sweep_lock_lock(&objspace->sweep_lock); + GC_ASSERT(!objspace->sweep_thread_sweeping && !objspace->sweep_thread_sweep_requested); + sweep_lock_unlock(&objspace->sweep_lock); +#endif + #if GC_CAN_COMPILE_COMPACTION if (objspace->flags.during_compacting) { gc_sort_heap_by_compare_func( @@ -3778,6 +4905,8 @@ gc_sweep_start(rb_objspace_t *objspace) } #endif + objspace->heaps_done_background_sweep = 0; + for (int i = 0; i < HEAP_COUNT; i++) { rb_heap_t *heap = &heaps[i]; gc_sweep_start_heap(objspace, heap); @@ -3791,6 +4920,25 @@ gc_sweep_start(rb_objspace_t *objspace) } rb_gc_ractor_newobj_cache_foreach(gc_ractor_newobj_cache_clear, NULL); + + psweep_debug(1, "[gc] gc_sweep_start\n"); + if (!objspace->flags.during_compacting && + (objspace->profile.latest_gc_info & GPR_FLAG_METHOD) == 0 && + !(objspace->hook_events & RUBY_INTERNAL_EVENT_FREEOBJ)) { + + objspace->use_background_sweep_thread = true; + psweep_debug(-1, "[gc] gc_sweep_start: requesting sweep thread\n"); + sweep_lock_lock(&objspace->sweep_lock); + { + objspace->sweep_thread_sweep_requested = true; + rb_native_cond_broadcast(&objspace->sweep_cond); + } + sweep_lock_unlock(&objspace->sweep_lock); + } + else { + objspace->use_background_sweep_thread = false; + psweep_debug(-1, "[gc] gc_sweep_start: not using background sweep thread\n"); + } } static void @@ -3802,6 +4950,12 @@ gc_sweep_finish_heap(rb_objspace_t *objspace, rb_heap_t *heap) size_t init_slots = gc_params.heap_init_bytes / heap->slot_size; size_t min_free_slots = (size_t)(MAX(total_slots, init_slots) * gc_params.heap_free_slots_min_ratio); + psweep_debug(-1, "[gc] gc_sweep_finish heap:%p (%ld)\n", heap, heap - heaps); + + GC_ASSERT(heap->background_sweep_steps <= ATOMIC_LOAD_RELAXED(heap->foreground_sweep_steps)); + GC_ASSERT(!heap->is_finished_sweeping); + heap->is_finished_sweeping = true; + if (swept_slots < min_free_slots && /* The heap is a growth heap if it freed more slots than had empty slots. */ ((heap->empty_slots == 0 && total_slots > 0) || heap->freed_slots > heap->empty_slots)) { @@ -3812,8 +4966,8 @@ gc_sweep_finish_heap(rb_objspace_t *objspace, rb_heap_t *heap) struct heap_page *resurrected_page; while (swept_slots < min_free_slots && (resurrected_page = heap_page_resurrect(objspace))) { - heap_add_page(objspace, heap, resurrected_page); - heap_add_freepage(heap, resurrected_page); + heap_add_page(objspace, heap, resurrected_page, false); + heap_add_freepage(heap, resurrected_page, "gc_sweep_finish_heap"); swept_slots += resurrected_page->free_slots; } @@ -3839,6 +4993,22 @@ static void gc_sweep_finish(rb_objspace_t *objspace) { gc_report(1, objspace, "gc_sweep_finish\n"); + psweep_debug(-1, "[gc] gc_sweep_finish\n"); + + objspace->use_background_sweep_thread = false; + + /* Run deferred gc_setup_mark_bits for pages swept by the background thread. + * This must run on the GC thread to avoid racing with mutator write barriers + * that modify mark_bits and uncollectible_bits. */ + for (int i = 0; i < HEAP_COUNT; i++) { + struct heap_page *page; + ccan_list_for_each(&heaps[i].pages, page, page_node) { + if (page->needs_setup_mark_bits) { + gc_setup_mark_bits(page); + page->needs_setup_mark_bits = false; + } + } + } gc_prof_set_heap_info(objspace); heap_pages_free_unused_pages(objspace); @@ -3848,6 +5018,9 @@ gc_sweep_finish(rb_objspace_t *objspace) heap->freed_slots = 0; heap->empty_slots = 0; + if (heap->background_sweep_steps < heap->foreground_sweep_steps) { + heap->background_sweep_steps = heap->foreground_sweep_steps; + } if (!will_be_incremental_marking(objspace)) { struct heap_page *end_page = heap->free_pages; @@ -3865,71 +5038,298 @@ gc_sweep_finish(rb_objspace_t *objspace) rb_gc_event_hook(0, RUBY_INTERNAL_EVENT_GC_END_SWEEP); gc_mode_transition(objspace, gc_mode_none); + objspace->flags.during_lazy_sweeping = FALSE; #if RGENGC_CHECK_MODE >= 2 gc_verify_internal_consistency(objspace); #endif } +// Dequeue a page swept by the background thread. If `free_in_user_thread` is true, then +// dequeue an unswept page to be swept by the Ruby thread. It can also dequeue an unswept +// page if otherwise it would have to wait for the background thread. In that case, `dequeued_unswept_page` +// is set to true. +// +// It returns NULL when there are no more pages to sweep for the heap, and also when the incremental +// step is finished for the heap (1 incremental step = `gc_continue()`). +static struct heap_page * +gc_sweep_dequeue_page(rb_objspace_t *objspace, rb_heap_t *heap, bool free_in_user_thread, bool *dequeued_unswept_page) +{ + if (free_in_user_thread) { + GC_ASSERT(!objspace->use_background_sweep_thread); + if (heap->sweeping_page == NULL) { + psweep_debug(0, "[gc] gc_sweep_dequeue_page: NULL page (synchronous) from heap(%p) (%ld)\n", heap, heap - heaps); + return NULL; + } + else { + struct heap_page *cur = heap->sweeping_page; + psweep_debug(0, "[gc] gc_sweep_dequeue_page:%p (synchronous) from heap(%p %ld)\n", cur, heap, heap - heaps); + struct heap_page *next = ccan_list_next(&heap->pages, cur, page_node); + heap->sweeping_page = next; + return cur; + } + } + + struct heap_page *page = NULL; + + // Avoid taking the global sweep_lock if we can +#if PSWEEP_LOCK_STATS > 0 + instrumented_lock_acquire(&heap->swept_pages_lock, &swept_pages_lock_stats); +#else + rb_native_mutex_lock(&heap->swept_pages_lock); +#endif + { + if (heap->swept_pages) { + page = heap->swept_pages; + psweep_debug(0, "[gc] gc_sweep_dequeue_page: got page:%p from heap(%p)->swept_pages (swept_pages lock) (heap %ld)\n", page, heap, heap - heaps); + heap->swept_pages = page->free_next; + } + } + rb_native_mutex_unlock(&heap->swept_pages_lock); + if (page) return page; + + sweep_lock_lock(&objspace->sweep_lock); + { + GC_ASSERT(!objspace->background_sweep_mode); + retry_swept_pages: + if (heap->swept_pages) { // grab the earliest page that the sweep thread swept (ie: it dequeues in swept order) + page = heap->swept_pages; + psweep_debug(0, "[gc] gc_sweep_dequeue_page: got page:%p from heap(%p)->swept_pages (sweep_lock) (heap %ld)\n", page, heap, heap - heaps); + heap->swept_pages = page->free_next; + } + else if (!heap->sweeping_page) { // This heap is finished + while (heap->pre_sweeping_page) { + sweep_lock_set_unlocked(); + rb_native_cond_wait(&heap->sweep_page_cond, &objspace->sweep_lock); + sweep_lock_set_locked(); + goto retry_swept_pages; + } + psweep_debug(0, "[gc] gc_sweep_dequeue_page: got nil page from heap(%p) (heap %ld) end\n", heap, heap - heaps); + } + else { + *dequeued_unswept_page = true; + page = heap->sweeping_page; // this could be the last page + heap->sweeping_page = ccan_list_next(&heap->pages, page, page_node); + psweep_debug(0, "[gc] gc_sweep_dequeue_page: dequeued unswept page from heap(%p) (heap %ld)\n", heap, heap - heaps); + } + GC_ASSERT(!objspace->background_sweep_mode); + } + sweep_lock_unlock(&objspace->sweep_lock); + + return page; +} + +MAYBE_UNUSED(static int +freelist_size(struct free_slot *slot)) +{ + if (!slot) return 0; + int size = 0; + while (slot) { + size++; + slot = slot->next; + } + return size; +} + +static inline bool +is_last_heap(rb_objspace_t *objspace, rb_heap_t *heap) +{ + return heap - heaps == (HEAP_COUNT - 1); +} + +// Perform incremental (lazy) sweep on a heap. static int gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) { - struct heap_page *sweep_page = heap->sweeping_page; int swept_slots = 0; int pooled_slots = 0; + if (heap->pre_swept_slots_nodeferred >= GC_INCREMENTAL_SWEEP_POOL_SLOT_COUNT) { + swept_slots = heap->pre_swept_slots_nodeferred - GC_INCREMENTAL_SWEEP_POOL_SLOT_COUNT; + } + else if (heap->pre_swept_slots_nodeferred > 0) { + pooled_slots = heap->pre_swept_slots_nodeferred; + } + heap->pre_swept_slots_nodeferred = 0; + +#if VM_CHECK_MODE > 0 + sweep_lock_lock(&objspace->sweep_lock); + GC_ASSERT(!objspace->background_sweep_mode); + sweep_lock_unlock(&objspace->sweep_lock); +#endif - if (sweep_page == NULL) return FALSE; + if (heap_is_sweep_done(objspace, heap)) { + psweep_debug(0, "[gc] gc_sweep_step: heap %p (%ld) is heap_is_sweep_done() early!\n", heap, heap - heaps); + GC_ASSERT(heap->sweeping_page == NULL); + GC_ASSERT(heap->is_finished_sweeping); + return heap->free_pages != NULL; + } #if GC_ENABLE_LAZY_SWEEP gc_prof_sweep_timer_start(objspace); #endif + psweep_debug(-2, "[gc] gc_sweep_step heap:%p (%ld) use_sweep_thread:%d\n", heap, heap - heaps, objspace->use_background_sweep_thread); + bool sweep_rest = objspace->sweep_rest; + bool use_bg_thread = objspace->use_background_sweep_thread; + + while (1) { + bool free_in_user_thread_p = !use_bg_thread; + bool dequeued_unswept_page = false; + // NOTE: pages we dequeue from the sweep thread need to be AFTER the list of heap->free_pages so we don't free from pages + // we've allocated from since sweep started. + struct heap_page *sweep_page = gc_sweep_dequeue_page(objspace, heap, free_in_user_thread_p, &free_in_user_thread_p); + if (RB_UNLIKELY(!sweep_page)) { + psweep_debug(-2, "[gc] gc_sweep_step heap:%p (%ld) deq() = nil, break\n", heap, heap - heaps); + break; + } + if (dequeued_unswept_page) { + psweep_debug(-2, "[gc] gc_sweep_step heap:%p (%ld) deq unswept page\n", heap, heap - heaps); + } + else { + psweep_debug(-2, "[gc] gc_sweep_step heap:%p (%ld) deq preswept page\n", heap, heap - heaps); + } + GC_ASSERT(sweep_page->heap == heap); - do { RUBY_DEBUG_LOG("sweep_page:%p", (void *)sweep_page); struct gc_sweep_context ctx = { - .page = sweep_page, - .final_slots = 0, - .freed_slots = 0, - .empty_slots = 0, + .page = sweep_page }; - gc_sweep_page(objspace, heap, &ctx); - int free_slots = ctx.freed_slots + ctx.empty_slots; - heap->sweeping_page = ccan_list_next(&heap->pages, sweep_page, page_node); - - if (free_slots == sweep_page->total_slots) { - /* There are no living objects, so move this page to the global empty pages. */ - heap_unlink_page(objspace, heap, sweep_page); + unsigned short deferred_free_final_slots = 0; + if (free_in_user_thread_p) { + gc_sweep_page(objspace, heap, &ctx); + GC_ASSERT(sweep_page->pre_deferred_free_slots == 0); + } + else { + gc_post_sweep_page(objspace, heap, sweep_page, false); // clear bits + // Process deferred free objects + unsigned short deferred_free_freed = 0; + unsigned short deferred_to_free = sweep_page->pre_deferred_free_slots; + + VALUE obj_buf[10]; + short deq_sz = 0; + psweep_debug(-2, "[gc] gc_sweep_step: (heap:%p %ld, page:%p) free_ruby_th: %d, deferred_to_free:%d, pre_freed:%d, pre_empty:%d\n", + heap, heap - heaps, sweep_page, free_in_user_thread_p, deferred_to_free, sweep_page->pre_freed_slots, sweep_page->pre_empty_slots); + int deferred_processed = 0; + while (deferred_processed < deferred_to_free) { + deq_sz = deq_deferred_sweep_objects(objspace, heap, obj_buf, deferred_to_free - deferred_processed); + psweep_debug(1, "[gc] gc_sweep_step(heap:%p %ld, page:%p) deq:%d\n", heap, heap - heaps, sweep_page, deq_sz); + for (short i = 0; i < deq_sz; i++) { + VALUE obj = obj_buf[i]; +#if VM_CHECK_MODE > 0 + if (GET_HEAP_PAGE(obj) != sweep_page) { + psweep_debug(0, "Error! bad heap page (got:%p, expecting:%p) obj type:%s\n", GET_HEAP_PAGE(obj), sweep_page, rb_obj_info(obj)); + } + GC_ASSERT(GET_HEAP_PAGE(obj) == sweep_page); +#endif + if (deferred_free(objspace, obj)) { + deferred_free_freed++; + } + else { + deferred_free_final_slots++; + } + deferred_processed++; + } + } + ctx.final_slots = sweep_page->pre_final_slots + deferred_free_final_slots; + ctx.freed_slots = sweep_page->pre_freed_slots + deferred_free_freed; + ctx.empty_slots = sweep_page->pre_empty_slots; + } - sweep_page->start = 0; - sweep_page->total_slots = 0; - sweep_page->slot_size = 0; - sweep_page->heap = NULL; - sweep_page->free_slots = 0; + if (0) fprintf(stderr, "gc_sweep_page(%"PRIdSIZE"): total_slots: %d, freed_slots: %d, empty_slots: %d, final_slots: %d\n", + rb_gc_count(), + sweep_page->total_slots, + ctx.freed_slots, ctx.empty_slots, ctx.final_slots); +#if GC_PROFILE_MORE_DETAIL + if (gc_prof_enabled(objspace)) { + gc_profile_record *record = gc_prof_record(objspace); + record->removing_objects += ctx.final_slots + ctx.freed_slots; + record->empty_objects += ctx.empty_slots; + } +#endif + int free_slots = ctx.freed_slots + ctx.empty_slots; + GC_ASSERT(sweep_page->total_slots > 0); + GC_ASSERT(sweep_page->total_slots >= free_slots); + + if (free_in_user_thread_p) { + GC_ASSERT(sweep_page->free_slots == free_slots); // gc_sweep_page() sets sweep_page->free slots + GC_ASSERT(sweep_page->heap->total_freed_objects >= (unsigned long)ctx.freed_slots); + GC_ASSERT(!sweep_page->deferred_freelist); + } else { + sweep_page->free_slots = free_slots; + sweep_page->final_slots += deferred_free_final_slots; + GC_ASSERT(sweep_page->free_slots <= sweep_page->total_slots); + GC_ASSERT(sweep_page->final_slots <= sweep_page->total_slots); + sweep_page->heap->total_freed_objects += ctx.freed_slots; + // merge freelists asan_unlock_freelist(sweep_page); - sweep_page->freelist = NULL; + asan_unlock_deferred_freelist(sweep_page); + struct free_slot *deferred_freelist = sweep_page->deferred_freelist; + psweep_debug(1, "[gc] gc_sweep_step: deferred freelist size:%d, free slots:%d\n", freelist_size(deferred_freelist), free_slots); + if (deferred_freelist) { + struct free_slot *cur_list = sweep_page->freelist; + psweep_debug(1, "[gc] gc_sweep_step: sweep_page->freelist size:%d\n", freelist_size(cur_list)); + if (cur_list) { + merge_freelists(deferred_freelist, cur_list); + } + sweep_page->freelist = deferred_freelist; + sweep_page->deferred_freelist = NULL; + } + else { + GC_ASSERT(sweep_page->pre_freed_slots == 0); + } + asan_lock_deferred_freelist(sweep_page); asan_lock_freelist(sweep_page); - asan_poison_memory_region(sweep_page->body, HEAP_PAGE_SIZE); + clear_pre_sweep_fields(sweep_page); + } + + // We never sweep a page that's currently in free_pages, such as a cached page. Our iterator is past those already. + GC_ASSERT(!sweep_page->heap_cache); + +#if RGENGC_CHECK_MODE + short freelist_len = 0; + asan_unlock_freelist(sweep_page); + struct free_slot *ptr = sweep_page->freelist; + while (ptr) { + freelist_len++; + rb_asan_unpoison_object((VALUE)ptr, false); + struct free_slot *next = ptr->next; + rb_asan_poison_object((VALUE)ptr); + ptr = next; + } + asan_lock_freelist(sweep_page); + if (freelist_len != sweep_page->free_slots) { + rb_bug("inconsistent freelist length: expected %d but was %d", sweep_page->free_slots, freelist_len); + } +#endif + + psweep_debug(0, "[gc] gc_sweep_step: dequeued page(heap:%p %ld, page:%p) free_slots:%u,total_slots:%u\n", heap, heap - heaps, sweep_page, free_slots, sweep_page->total_slots); - objspace->empty_pages_count++; - sweep_page->free_next = objspace->empty_pages; - objspace->empty_pages = sweep_page; + if (free_slots == sweep_page->total_slots) { + psweep_debug(0, "[gc] gc_sweep_step: adding to empty_pages:%p\n", sweep_page); + move_to_empty_pages(objspace, heap, sweep_page); } else if (free_slots > 0) { + // These are just for statistics, not used in calculations heap->freed_slots += ctx.freed_slots; heap->empty_slots += ctx.empty_slots; if (pooled_slots < GC_INCREMENTAL_SWEEP_POOL_SLOT_COUNT) { + psweep_debug(0, "[gc] gc_sweep_step: adding pooled_page:%p, pooled_slots:%d\n", sweep_page, pooled_slots); heap_add_poolpage(objspace, heap, sweep_page); pooled_slots += free_slots; } else { - heap_add_freepage(heap, sweep_page); + psweep_debug(0, "[gc] gc_sweep_step: adding freepage:%p, swept_slots:%d\n", sweep_page, swept_slots); + heap_add_freepage(heap, sweep_page, "gc_sweep_step"); swept_slots += free_slots; if (swept_slots > GC_INCREMENTAL_SWEEP_SLOT_COUNT) { + if (!sweep_rest && use_bg_thread) { + RUBY_ATOMIC_INC(heap->foreground_sweep_steps); // signal sweep thread to move on + } + psweep_debug(0, "[gc] gc_sweep_step got to SWEEP_SLOT_COUNT, break\n"); break; } } @@ -3937,13 +5337,14 @@ gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) else { sweep_page->free_next = NULL; } - } while ((sweep_page = heap->sweeping_page)); + } - if (!heap->sweeping_page) { + if (heap_is_sweep_done(objspace, heap)) { + psweep_debug(0, "[gc] gc_sweep_step heap:%p (%ld) sweep done\n", heap, heap - heaps); gc_sweep_finish_heap(objspace, heap); if (!has_sweeping_pages(objspace)) { - gc_sweep_finish(objspace); + gc_sweep_finish(objspace); // done, no more pages in any heap } } @@ -3951,36 +5352,132 @@ gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) gc_prof_sweep_timer_stop(objspace); #endif + psweep_debug(1, "[gc] gc_sweep_step: finished for heap:%p (%ld), got free page:%d\n", heap, heap - heaps, heap->free_pages != NULL); return heap->free_pages != NULL; } +static bool +background_sweep_done_p(rb_objspace_t *objspace) +{ + // must have sweep_lock acquired (TODO: add assertion) + return objspace->heaps_done_background_sweep == HEAP_COUNT; +} + +unsigned long long sweep_rest_count = 0; + static void gc_sweep_rest(rb_objspace_t *objspace) { + sweep_rest_count++; + sweep_lock_lock(&objspace->sweep_lock); + { + if (background_sweep_done_p(objspace)) { + psweep_debug(-2, "[gc] gc_sweep_rest: bg done, not requesting\n"); + } + else { + objspace->sweep_rest = true; // reset to false in `gc_sweeping_exit` + if (objspace->use_background_sweep_thread && !objspace->sweep_thread_sweeping && !objspace->sweep_thread_sweep_requested) { + psweep_debug(-2, "[gc] gc_sweep_rest: request sweep thread\n"); + objspace->sweep_thread_sweep_requested = true; + rb_native_cond_broadcast(&objspace->sweep_cond); + } + else if (objspace->use_background_sweep_thread) { + psweep_debug(-2, "[gc] gc_sweep_rest: restart sweep thread\n"); + objspace->background_sweep_restart_heaps = true; // restart sweeping heaps from heap 0 + } + } + } + sweep_lock_unlock(&objspace->sweep_lock); + + // We go backwards because the sweep thread goes forwards, and we want to avoid lock contention for (int i = 0; i < HEAP_COUNT; i++) { rb_heap_t *heap = &heaps[i]; - while (heap->sweeping_page) { + while (!heap_is_sweep_done(objspace, heap)) { + psweep_debug(0, "[gc] gc_sweep_rest: gc_sweep_step heap:%p (heap %ld)\n", heap, heap - heaps); gc_sweep_step(objspace, heap); } + GC_ASSERT(heap->is_finished_sweeping); + heap->background_sweep_steps = heap->foreground_sweep_steps; } + + /*for (int i = 0; i < HEAP_COUNT; i++) {*/ + /*rb_heap_t *heap = &heaps[i];*/ + + /*while (!heap_is_sweep_done(objspace, heap)) {*/ + /*psweep_debug(0, "[gc] gc_sweep_rest: gc_sweep_step heap:%p (heap %ld)\n", heap, heap - heaps);*/ + /*gc_sweep_step(objspace, heap, false);*/ + /*}*/ + /*GC_ASSERT(heap->is_finished_sweeping);*/ + /*heap->background_sweep_steps = heap->foreground_sweep_steps;*/ + /*}*/ + GC_ASSERT(!has_sweeping_pages(objspace)); + GC_ASSERT(gc_mode(objspace) == gc_mode_none); } +unsigned long long sweep_continue_count = 0; + static void gc_sweep_continue(rb_objspace_t *objspace, rb_heap_t *sweep_heap) { GC_ASSERT(dont_gc_val() == FALSE || objspace->profile.latest_gc_info & GPR_FLAG_METHOD); if (!GC_ENABLE_LAZY_SWEEP) return; - gc_sweeping_enter(objspace); + psweep_debug(-2, "[gc] gc_sweep_continue\n"); + + sweep_continue_count++; + + gc_sweeping_enter(objspace, "gc_sweep_continue"); + sweep_lock_lock(&objspace->sweep_lock); + { + if (objspace->use_background_sweep_thread) { + if (background_sweep_done_p(objspace)) { + psweep_debug(-2, "[gc] gc_sweep_continue: bg done, not requesting\n"); + } + else { + int num_heaps_need_continue = 0; + for (int i = 0; i < HEAP_COUNT; i++) { + rb_heap_t *heap = &heaps[i]; + heap->background_sweep_steps = heap->foreground_sweep_steps; + if (heap->pre_swept_slots_deferred >= (GC_INCREMENTAL_SWEEP_SLOT_COUNT + GC_INCREMENTAL_SWEEP_POOL_SLOT_COUNT)) { + heap->skip_sweep_continue = true; + } + else { + if (!heap->is_finished_sweeping && !heap->done_background_sweep) { + num_heaps_need_continue++; + } + heap->skip_sweep_continue = false; + } + heap->pre_swept_slots_deferred = 0; + } + if (num_heaps_need_continue > 0) { + if (!objspace->sweep_thread_sweeping && !objspace->sweep_thread_sweep_requested) { + psweep_debug(-2, "[gc] gc_sweep_continue: requesting sweep thread\n"); + objspace->sweep_thread_sweep_requested = true; + rb_native_cond_broadcast(&objspace->sweep_cond); + } + else { + psweep_debug(-2, "[gc] gc_sweep_continue: sweep thread restart heaps\n"); + objspace->background_sweep_restart_heaps = true; + } + } + } + } + else { + psweep_debug(-2, "[gc] gc_sweep_continue: !use_background_sweep_thread\n"); + } + } + sweep_lock_unlock(&objspace->sweep_lock); for (int i = 0; i < HEAP_COUNT; i++) { rb_heap_t *heap = &heaps[i]; + if (gc_sweep_step(objspace, heap)) { GC_ASSERT(heap->free_pages != NULL); } else if (heap == sweep_heap) { if (objspace->empty_pages_count > 0 || objspace->heap_pages.allocatable_bytes > 0) { + GC_ASSERT(!sweep_heap->sweeping_page); // went through whole heap, couldn't find free page /* [Bug #21548] * * If this heap is the heap we want to sweep, but we weren't able @@ -3991,7 +5488,7 @@ gc_sweep_continue(rb_objspace_t *objspace, rb_heap_t *sweep_heap) * empty/allocatable pages. If other heaps are not finished sweeping * then we do not finish this GC and we will end up triggering a new * GC cycle during this GC phase. */ - heap_page_allocate_and_initialize(objspace, heap); + heap_page_allocate_and_initialize(objspace, heap, false); GC_ASSERT(heap->free_pages != NULL); } @@ -4103,10 +5600,16 @@ gc_compact_start(rb_objspace_t *objspace) struct heap_page *page = NULL; gc_mode_transition(objspace, gc_mode_compacting); +#if VM_CHECK_MODE > 0 + sweep_lock_lock(&objspace->sweep_lock); + GC_ASSERT(!objspace->sweep_thread_sweeping && !objspace->sweep_thread_sweep_requested); + sweep_lock_unlock(&objspace->sweep_lock); +#endif + for (int i = 0; i < HEAP_COUNT; i++) { rb_heap_t *heap = &heaps[i]; ccan_list_for_each(&heap->pages, page, page_node) { - page->flags.before_sweep = TRUE; + page->before_sweep = 1; } heap->compact_cursor = ccan_list_tail(&heap->pages, struct heap_page, page_node); @@ -4132,10 +5635,10 @@ static void gc_sweep_compact(rb_objspace_t *objspace); static void gc_sweep(rb_objspace_t *objspace) { - gc_sweeping_enter(objspace); - const unsigned int immediate_sweep = objspace->flags.immediate_sweep; + gc_sweeping_enter(objspace, "gc_sweep"); + gc_report(1, objspace, "gc_sweep: immediate: %d\n", immediate_sweep); gc_sweep_start(objspace); @@ -4148,12 +5651,12 @@ gc_sweep(rb_objspace_t *objspace) gc_prof_sweep_timer_start(objspace); #endif gc_sweep_rest(objspace); + #if !GC_ENABLE_LAZY_SWEEP gc_prof_sweep_timer_stop(objspace); #endif } else { - /* Sweep every size pool. */ for (int i = 0; i < HEAP_COUNT; i++) { rb_heap_t *heap = &heaps[i]; @@ -4986,6 +6489,7 @@ verify_internal_consistency_i(void *page_start, void *page_end, size_t stride, { VALUE obj; rb_objspace_t *objspace = data->objspace; + return 0; // FIXME for parallel sweep for (obj = (VALUE)page_start; obj != (VALUE)page_end; obj += stride) { asan_unpoisoning_object(obj) { @@ -5150,6 +6654,7 @@ gc_verify_heap_pages(rb_objspace_t *objspace) static void gc_verify_internal_consistency_(rb_objspace_t *objspace) { + return; // FIXME for parallel sweep struct verify_internal_consistency_struct data = {0}; data.objspace = objspace; @@ -5563,7 +7068,7 @@ gc_compact_move(rb_objspace_t *objspace, rb_heap_t *heap, VALUE src) unlock_page_body(objspace, GET_PAGE_BODY(src)); if (dest_pool->sweeping_page->free_slots > 0) { - heap_add_freepage(dest_pool, dest_pool->sweeping_page); + heap_add_freepage(dest_pool, dest_pool->sweeping_page, "gc_compact_move"); } dest_pool->sweeping_page = ccan_list_next(&dest_pool->pages, dest_pool->sweeping_page, page_node); @@ -5753,6 +7258,7 @@ gc_marks_continue(rb_objspace_t *objspace, rb_heap_t *heap) static void gc_marks_start(rb_objspace_t *objspace, int full_mark) { + // NOTE: background sweeping cannot be running during marking. /* start marking */ gc_report(1, objspace, "gc_marks_start: (%s)\n", full_mark ? "full" : "minor"); gc_mode_transition(objspace, gc_mode_marking); @@ -6152,7 +7658,7 @@ rb_gc_impl_writebarrier_unprotect(void *objspace_ptr, VALUE obj) } RB_DEBUG_COUNTER_INC(obj_wb_unprotect); - MARK_IN_BITMAP(GET_HEAP_WB_UNPROTECTED_BITS(obj), obj); + ATOMIC_MARK_IN_BITMAP(GET_HEAP_WB_UNPROTECTED_BITS(obj), obj); } RB_GC_VM_UNLOCK_NO_BARRIER(lev); } @@ -6278,9 +7784,9 @@ static void heap_ready_to_gc(rb_objspace_t *objspace, rb_heap_t *heap) { if (!heap->free_pages) { - if (!heap_page_allocate_and_initialize(objspace, heap)) { + if (!heap_page_allocate_and_initialize(objspace, heap, false)) { objspace->heap_pages.allocatable_bytes = HEAP_PAGE_SIZE; - heap_page_allocate_and_initialize(objspace, heap); + heap_page_allocate_and_initialize(objspace, heap, false); } } } @@ -6400,6 +7906,8 @@ gc_start(rb_objspace_t *objspace, unsigned int reason) if (!rb_darray_size(objspace->heap_pages.sorted)) return TRUE; /* heap is not ready */ if (!(reason & GPR_FLAG_METHOD) && !ready_to_gc(objspace)) return TRUE; /* GC is not allowed */ + wait_for_background_sweeping_to_finish(objspace, true, false, "gc_start"); // in case user called `GC.start` explicitly + GC_ASSERT(gc_mode(objspace) == gc_mode_none, "gc_mode is %s\n", gc_mode_name(gc_mode(objspace))); GC_ASSERT(!is_lazy_sweeping(objspace)); GC_ASSERT(!is_incremental_marking(objspace)); @@ -6531,7 +8039,7 @@ gc_rest(rb_objspace_t *objspace) } if (is_lazy_sweeping(objspace)) { - gc_sweeping_enter(objspace); + gc_sweeping_enter(objspace, "gc_rest"); gc_sweep_rest(objspace); gc_sweeping_exit(objspace); } @@ -6668,6 +8176,30 @@ gc_clock_end(struct timespec *ts) return 0; } +#if PSWEEP_COLLECT_TIMINGS > 0 +/* Wall time clock functions using CLOCK_MONOTONIC */ +static void +gc_wall_clock_start(struct timespec *ts) +{ + if (clock_gettime(CLOCK_MONOTONIC, ts) != 0) { + ts->tv_sec = 0; + ts->tv_nsec = 0; + } +} + +static unsigned long long +gc_wall_clock_end(struct timespec *ts) +{ + struct timespec end_time; + + if ((ts->tv_sec > 0 || ts->tv_nsec > 0) && + clock_gettime(CLOCK_MONOTONIC, &end_time) == 0) { + return (unsigned long long)(end_time.tv_sec - ts->tv_sec) * (1000 * 1000 * 1000) + (end_time.tv_nsec - ts->tv_nsec); + } + return 0; +} +#endif + static inline void gc_enter(rb_objspace_t *objspace, enum gc_enter_event event, unsigned int *lock_lev) { @@ -6688,6 +8220,7 @@ gc_enter(rb_objspace_t *objspace, enum gc_enter_event event, unsigned int *lock_ if (RB_UNLIKELY(during_gc != 0)) rb_bug("during_gc != 0"); if (RGENGC_CHECK_MODE >= 3) gc_verify_internal_consistency(objspace); + GC_ASSERT(!is_sweep_thread_p()); during_gc = TRUE; RUBY_DEBUG_LOG("%s (%s)",gc_enter_event_cstr(event), gc_current_status(objspace)); gc_report(1, objspace, "gc_enter: %s [%s]\n", gc_enter_event_cstr(event), gc_current_status(objspace)); @@ -6706,7 +8239,8 @@ gc_exit(rb_objspace_t *objspace, enum gc_enter_event event, unsigned int *lock_l gc_record(objspace, 1, gc_enter_event_cstr(event)); RUBY_DEBUG_LOG("%s (%s)", gc_enter_event_cstr(event), gc_current_status(objspace)); gc_report(1, objspace, "gc_exit: %s [%s]\n", gc_enter_event_cstr(event), gc_current_status(objspace)); - during_gc = FALSE; + GC_ASSERT(!is_sweep_thread_p()); + during_gc = FALSE; // NOTE: background thread could still be sweeping even if !during_gc RB_GC_VM_UNLOCK(*lock_lev); } @@ -6735,24 +8269,80 @@ gc_marking_exit(rb_objspace_t *objspace) } } +unsigned long long sweeping_enter_count = 0; + static void -gc_sweeping_enter(rb_objspace_t *objspace) +gc_sweeping_enter(rb_objspace_t *objspace, const char *from_fn) { + MAYBE_UNUSED(const unsigned int immediate_sweep) = objspace->flags.immediate_sweep; + psweep_debug(1, "[gc] gc_sweeping_enter from %s (immediate:%u)\n", from_fn, immediate_sweep); GC_ASSERT(during_gc != 0); + sweep_lock_lock(&objspace->sweep_lock); + { + objspace->background_sweep_mode = false; + } + sweep_lock_unlock(&objspace->sweep_lock); + if (MEASURE_GC) { gc_clock_start(&objspace->profile.sweeping_start_time); } + + sweeping_enter_count++; + /* Always track Ruby thread sweep time */ +#if PSWEEP_COLLECT_TIMINGS > 0 + gc_clock_start(&objspace->profile.ruby_thread_sweep_cpu_start_time); + gc_wall_clock_start(&objspace->profile.ruby_thread_sweep_wall_start_time); +#endif } static void gc_sweeping_exit(rb_objspace_t *objspace) { GC_ASSERT(during_gc != 0); + psweep_debug(1, "[gc] gc_sweeping_exit\n"); + MAYBE_UNUSED(bool was_rest) = objspace->sweep_rest; + + bool continue_sweep_in_background = objspace->use_background_sweep_thread && + !objspace->sweep_rest && !dont_gc_val() && is_lazy_sweeping(objspace); + + if (continue_sweep_in_background) { + if (background_sweep_done_p(objspace)) { + psweep_debug(-2, "[gc] gc_sweeping_exit: bg done, not requesting\n"); + } + else { + psweep_debug(-2, "[gc] gc_sweeping_exit: continue in background\n"); + sweep_lock_lock(&objspace->sweep_lock); + objspace->background_sweep_mode = true; + if (!objspace->sweep_thread_sweeping && !objspace->sweep_thread_sweep_requested) { + psweep_debug(-2, "[gc] gc_sweeping_exit: requested\n"); + objspace->sweep_thread_sweep_requested = true; + rb_native_cond_broadcast(&objspace->sweep_cond); + } + else { + psweep_debug(-2, "[gc] gc_sweeping_exit: restart heaps\n"); + objspace->background_sweep_restart_heaps = true; // restart sweeping heaps from heap 0 + } + sweep_lock_unlock(&objspace->sweep_lock); + } + } + else { + GC_ASSERT(!objspace->background_sweep_mode); + psweep_debug(-2, "[gc] gc_sweeping_exit: don't continue (rest:%d, use:%d)\n", was_rest, objspace->use_background_sweep_thread); + sweep_lock_lock(&objspace->sweep_lock); + objspace->sweep_rest = false; + sweep_lock_unlock(&objspace->sweep_lock); + } if (MEASURE_GC) { objspace->profile.sweeping_time_ns += gc_clock_end(&objspace->profile.sweeping_start_time); } + + /* Always track Ruby thread sweep time */ +#if PSWEEP_COLLECT_TIMINGS > 0 + objspace->profile.ruby_thread_sweep_cpu_time_ns += gc_clock_end(&objspace->profile.ruby_thread_sweep_cpu_start_time); + objspace->profile.ruby_thread_sweep_wall_time_ns += gc_wall_clock_end(&objspace->profile.ruby_thread_sweep_wall_start_time); +#endif } static void * @@ -6836,11 +8426,33 @@ rb_gc_impl_start(void *objspace_ptr, bool full_mark, bool immediate_mark, bool i } garbage_collect(objspace, reason); +#if VM_CHECK_MODE > 0 + if (immediate_sweep) { + sweep_lock_lock(&objspace->sweep_lock); + { + GC_ASSERT(!objspace->sweep_thread_sweeping); + GC_ASSERT(!objspace->sweep_thread_sweep_requested); + for (int j = 0; j < HEAP_COUNT; j++) { + rb_heap_t *heap = &heaps[j]; + GC_ASSERT(!heap->swept_pages); + GC_ASSERT(!heap->sweeping_page); + } + } + sweep_lock_unlock(&objspace->sweep_lock); + } +#endif + // NOTE: background sweeping can still be active here. We also may enter a new GC cycle from finalizers below. gc_finalize_deferred(objspace); gc_config_full_mark_set(full_marking_p); } +void +rb_gc_stop_background_threads(rb_objspace_t *objspace, const char *from_fn) +{ + wait_for_background_sweeping_to_finish(objspace, true, true, from_fn); +} + void rb_gc_impl_prepare_heap(void *objspace_ptr) { @@ -7064,8 +8676,8 @@ gc_sort_heap_by_compare_func(rb_objspace_t *objspace, gc_compact_compare_func co for (i = 0; i < total_pages; i++) { ccan_list_add(&heap->pages, &page_list[i]->page_node); - if (page_list[i]->free_slots != 0) { - heap_add_freepage(heap, page_list[i]); + if (page_list[i]->free_slots != 0 && page_list[i]->start) { + heap_add_freepage(heap, page_list[i], "sort_by_compare_func"); } } @@ -7109,7 +8721,7 @@ gc_ref_update(void *vstart, void *vend, size_t stride, rb_objspace_t *objspace, if (RVALUE_REMEMBERED(objspace, v)) { page->flags.has_remembered_objects = TRUE; } - if (page->flags.before_sweep) { + if (page->before_sweep) { if (RVALUE_MARKED(objspace, v)) { rb_gc_update_object_references(objspace, v); } @@ -7527,7 +9139,7 @@ ns_to_ms(uint64_t ns) return ns / (1000 * 1000); } -static void malloc_increase_local_flush(rb_objspace_t *objspace); +static size_t malloc_increase_local_flush(rb_objspace_t *objspace); VALUE rb_gc_impl_stat(void *objspace_ptr, VALUE hash_or_sym) @@ -8001,9 +9613,9 @@ atomic_sub_nounderflow(size_t *var, size_t sub) if (sub == 0) return; while (1) { - size_t val = *var; + size_t val = rbimpl_atomic_size_load(var, RBIMPL_ATOMIC_RELAXED); if (val < sub) sub = val; - if (RUBY_ATOMIC_SIZE_CAS(*var, val, val-sub) == val) break; + if (rbimpl_atomic_size_cas(var, val, val-sub, RBIMPL_ATOMIC_RELAXED, RBIMPL_ATOMIC_RELAXED) == val) break; } } @@ -8024,42 +9636,46 @@ objspace_malloc_gc_stress(rb_objspace_t *objspace) } } -static void +static size_t malloc_increase_commit(rb_objspace_t *objspace, size_t new_size, size_t old_size) { if (new_size > old_size) { - RUBY_ATOMIC_SIZE_ADD(malloc_increase, new_size - old_size); + size_t delta = new_size - old_size; + size_t old_val = rbimpl_atomic_size_fetch_add(&malloc_increase, delta, RBIMPL_ATOMIC_RELAXED); #if RGENGC_ESTIMATE_OLDMALLOC - RUBY_ATOMIC_SIZE_ADD(objspace->malloc_counters.oldmalloc_increase, new_size - old_size); + rbimpl_atomic_size_add(&objspace->malloc_counters.oldmalloc_increase, delta, RBIMPL_ATOMIC_RELAXED); #endif + return old_val + delta; } else { atomic_sub_nounderflow(&malloc_increase, old_size - new_size); #if RGENGC_ESTIMATE_OLDMALLOC atomic_sub_nounderflow(&objspace->malloc_counters.oldmalloc_increase, old_size - new_size); #endif + return 0; } } #if USE_MALLOC_INCREASE_LOCAL -static void +static size_t malloc_increase_local_flush(rb_objspace_t *objspace) { int delta = malloc_increase_local; - if (delta == 0) return; + if (delta == 0) return 0; malloc_increase_local = 0; if (delta > 0) { - malloc_increase_commit(objspace, (size_t)delta, 0); + return malloc_increase_commit(objspace, (size_t)delta, 0); } else { - malloc_increase_commit(objspace, 0, (size_t)(-delta)); + return malloc_increase_commit(objspace, 0, (size_t)(-delta)); } } #else -static void +static size_t malloc_increase_local_flush(rb_objspace_t *objspace) { + return 0; } #endif @@ -8078,6 +9694,8 @@ objspace_malloc_increase_report(rb_objspace_t *objspace, void *mem, size_t new_s static bool objspace_malloc_increase_body(rb_objspace_t *objspace, void *mem, size_t new_size, size_t old_size, enum memop_type type, bool gc_allowed) { + size_t current_malloc_increase = 0; + #if USE_MALLOC_INCREASE_LOCAL if (new_size < GC_MALLOC_INCREASE_LOCAL_THRESHOLD && old_size < GC_MALLOC_INCREASE_LOCAL_THRESHOLD) { @@ -8085,22 +9703,23 @@ objspace_malloc_increase_body(rb_objspace_t *objspace, void *mem, size_t new_siz if (malloc_increase_local >= GC_MALLOC_INCREASE_LOCAL_THRESHOLD || malloc_increase_local <= -GC_MALLOC_INCREASE_LOCAL_THRESHOLD) { - malloc_increase_local_flush(objspace); + current_malloc_increase = malloc_increase_local_flush(objspace); } } else { malloc_increase_local_flush(objspace); - malloc_increase_commit(objspace, new_size, old_size); + current_malloc_increase = malloc_increase_commit(objspace, new_size, old_size); } #else - malloc_increase_commit(objspace, new_size, old_size); + current_malloc_increase = malloc_increase_commit(objspace, new_size, old_size); #endif if (type == MEMOP_TYPE_MALLOC && gc_allowed) { retry: - if (malloc_increase > malloc_limit && ruby_native_thread_p() && !dont_gc_val()) { + if (current_malloc_increase > malloc_limit && ruby_native_thread_p() && !dont_gc_val()) { if (ruby_thread_has_gvl_p() && is_lazy_sweeping(objspace)) { gc_rest(objspace); /* gc_rest can reduce malloc_increase */ + current_malloc_increase = rbimpl_atomic_size_load(&malloc_increase, RBIMPL_ATOMIC_RELAXED); goto retry; } garbage_collect_with_gvl(objspace, GPR_FLAG_MALLOC); @@ -8168,13 +9787,19 @@ objspace_malloc_prepare(rb_objspace_t *objspace, size_t size) } static bool -malloc_during_gc_p(rb_objspace_t *objspace) +bad_malloc_during_gc_p(rb_objspace_t *objspace) { /* malloc is not allowed during GC when we're not using multiple ractors * (since ractors can run while another thread is sweeping) and when we * have the GVL (since if we don't have the GVL, we'll try to acquire the * GVL which will block and ensure the other thread finishes GC). */ - return during_gc && !dont_gc_val() && !rb_gc_multi_ractor_p() && ruby_thread_has_gvl_p(); + if (is_sweep_thread_p()) { + fprintf(stderr, "ERROR: bad malloc/calloc call family during GC in sweep thread!\n"); + return true; + } + else { + return during_gc && !dont_gc_val() && !rb_gc_multi_ractor_p() && ruby_thread_has_gvl_p(); + } } static inline void * @@ -8233,10 +9858,16 @@ objspace_malloc_fixup(rb_objspace_t *objspace, void *mem, size_t size, bool gc_a static void check_malloc_not_in_gc(rb_objspace_t *objspace, const char *msg) { - if (RB_UNLIKELY(malloc_during_gc_p(objspace))) { - dont_gc_on(); - during_gc = false; - rb_bug("Cannot %s during GC", msg); + if (RB_UNLIKELY(bad_malloc_during_gc_p(objspace))) { + if (is_sweep_thread_p()) { + fprintf(stderr, "Bad %s in sweep thread, exiting\n", msg); + exit(EXIT_FAILURE); + } + else { + dont_gc_on(); + during_gc = false; + rb_bug("Cannot %s during GC", msg); + } } } @@ -8295,11 +9926,16 @@ rb_gc_impl_calloc(void *objspace_ptr, size_t size, bool gc_allowed) { rb_objspace_t *objspace = objspace_ptr; - if (RB_UNLIKELY(malloc_during_gc_p(objspace))) { - rb_warn("calloc during GC detected, this could cause crashes if it triggers another GC"); + if (RB_UNLIKELY(bad_malloc_during_gc_p(objspace))) { + if (is_sweep_thread_p()) { + fprintf(stderr, "calloc in sweep thread detected! This could cause crashes!\n"); + } + else { + rb_warn("calloc during GC detected, this could cause crashes if it triggers another GC"); #if RGENGC_CHECK_MODE || RUBY_DEBUG - rb_bug("Cannot calloc during GC"); + rb_bug("Cannot calloc during GC"); #endif + } } void *mem; @@ -9312,7 +10948,7 @@ gc_verify_compaction_references(int argc, VALUE* argv, VALUE self) */ objspace->heap_pages.allocatable_bytes = desired_compaction.required_slots[i] * heap->slot_size; while (objspace->heap_pages.allocatable_bytes > 0) { - heap_page_allocate_and_initialize(objspace, heap); + heap_page_allocate_and_initialize(objspace, heap, false); } /* * Step 3: Add two more pages so that the compact & sweep cursors will meet _after_ all objects @@ -9321,7 +10957,7 @@ gc_verify_compaction_references(int argc, VALUE* argv, VALUE self) pages_to_add += 2; for (; pages_to_add > 0; pages_to_add--) { - heap_page_allocate_and_initialize_force(objspace, heap); + heap_page_allocate_and_initialize_force(objspace, heap, false); } } } @@ -9350,29 +10986,51 @@ rb_gc_impl_objspace_free(void *objspace_ptr) { rb_objspace_t *objspace = objspace_ptr; - if (is_lazy_sweeping(objspace)) - rb_bug("lazy sweeping underway when freeing object space"); +// if (is_lazy_sweeping(objspace)) +// rb_bug("lazy sweeping underway when freeing object space"); + + rb_gc_stop_background_threads(objspace, "objspace_free"); + +#if PSWEEP_LOCK_STATS > 0 + /* Print lock contention statistics before freeing */ + print_lock_stats(); +#endif + +#if PSWEEP_COLLECT_TIMINGS > 0 + /* Print Ruby thread sweep time to stdout */ + double ruby_thread_sweep_cpu_time_ms = (double)(objspace->profile.ruby_thread_sweep_cpu_time_ns) / 1000000.0; + double ruby_thread_sweep_wall_time_ms = ((double)objspace->profile.ruby_thread_sweep_wall_time_ns) / 1000000.0; + fprintf(stderr, "\nSweep Time (CPU): %.3f ms (%.6f seconds)\n", ruby_thread_sweep_cpu_time_ms, ruby_thread_sweep_cpu_time_ms / 1000.0); + fprintf(stderr, "\nSweep Time (Wall): %.3f ms (%.6f seconds)\n", ruby_thread_sweep_wall_time_ms, ruby_thread_sweep_wall_time_ms / 1000.0); + fprintf(stderr, "\nSweeping enter count: %llu\n", sweeping_enter_count); + fprintf(stderr, "\nSweep continue count: %llu\n", sweep_continue_count); + fprintf(stderr, "\nSweep rest count: %llu\n", sweep_rest_count); +#endif free(objspace->profile.records); objspace->profile.records = NULL; for (size_t i = 0; i < rb_darray_size(objspace->heap_pages.sorted); i++) { - heap_page_free(objspace, rb_darray_get(objspace->heap_pages.sorted, i)); + heap_page_free(objspace, rb_darray_get(objspace->heap_pages.sorted, i), false); } rb_darray_free_without_gc(objspace->heap_pages.sorted); heap_pages_lomem = 0; heap_pages_himem = 0; + free_stack_chunks(&objspace->mark_stack); + mark_stack_free_cache(&objspace->mark_stack); + for (int i = 0; i < HEAP_COUNT; i++) { rb_heap_t *heap = &heaps[i]; + rb_native_mutex_destroy(&heap->swept_pages_lock); + rb_native_cond_destroy(&heap->sweep_page_cond); heap->total_pages = 0; heap->total_slots = 0; } - free_stack_chunks(&objspace->mark_stack); - mark_stack_free_cache(&objspace->mark_stack); - rb_darray_free_without_gc(objspace->weak_references); + rb_native_cond_destroy(&objspace->sweep_cond); + rb_native_mutex_destroy(&objspace->sweep_lock); free(objspace); } @@ -9416,8 +11074,11 @@ rb_gc_impl_before_fork(void *objspace_ptr) { rb_objspace_t *objspace = objspace_ptr; + wait_for_background_sweeping_to_finish(objspace, true, false, "impl_before_fork"); + objspace->fork_vm_lock_lev = RB_GC_VM_LOCK(); rb_gc_vm_barrier(); + GC_ASSERT(!during_gc); } void @@ -9428,8 +11089,45 @@ rb_gc_impl_after_fork(void *objspace_ptr, rb_pid_t pid) RB_GC_VM_UNLOCK(objspace->fork_vm_lock_lev); objspace->fork_vm_lock_lev = 0; + void fiber_pool_lock_reset(void); + fiber_pool_lock_reset(); + // TODO: reset the id_table lock in case of Ractors. + + GC_ASSERT(!during_gc); if (pid == 0) { /* child process */ + objspace->sweep_thread = 0; + rb_native_mutex_initialize(&objspace->sweep_lock); + rb_native_cond_initialize(&objspace->sweep_cond); + for (int i = 0; i < HEAP_COUNT; i++) { + rb_heap_t *heap = &heaps[i]; + + rb_native_mutex_initialize(&heap->deferred_sweep_data.lock); + rb_native_mutex_initialize(&heap->swept_pages_lock); + rb_native_cond_initialize(&heap->sweep_page_cond); + heap->pre_sweeping_page = NULL; + heap->background_sweep_steps = heap->foreground_sweep_steps; + } rb_gc_ractor_newobj_cache_foreach(gc_ractor_newobj_cache_clear, NULL); + + sweep_lock_owner = 0; + /* Start the sweep thread after fork */ + objspace->sweep_thread_running = true; + objspace->sweep_thread_sweep_requested = false; + objspace->sweep_thread_sweeping = false; + objspace->sweep_thread_waiting_request = false; + GC_ASSERT(!objspace->background_sweep_mode); + GC_ASSERT(!objspace->background_sweep_abort); + GC_ASSERT(!objspace->background_sweep_restart_heaps); + pthread_create(&objspace->sweep_thread, NULL, gc_sweep_thread_func, objspace); + GET_VM()->gc.sweep_thread = objspace->sweep_thread; + sweep_lock_lock(&objspace->sweep_lock); + // The thread needs to be ready to accept sweep requests. + while (!objspace->sweep_thread_waiting_request) { + sweep_lock_unlock(&objspace->sweep_lock); + usleep(50); + sweep_lock_lock(&objspace->sweep_lock); + } + sweep_lock_unlock(&objspace->sweep_lock); } } @@ -9516,6 +11214,10 @@ rb_gc_impl_objspace_init(void *objspace_ptr) slot_div_magics[i] = (uint32_t)((uint64_t)UINT32_MAX / heap->slot_size + 1); ccan_list_head_init(&heap->pages); + rb_native_mutex_initialize(&heap->deferred_sweep_data.lock); + rb_native_mutex_initialize(&heap->swept_pages_lock); + rb_darray_make_without_gc(&heap->deferred_sweep_data.object_list, 0); + rb_native_cond_initialize(&heap->sweep_page_cond); } init_size_to_heap_idx(); @@ -9536,6 +11238,12 @@ rb_gc_impl_objspace_init(void *objspace_ptr) objspace->profile.invoke_time = getrusage_time(); finalizer_table = st_init_numtable(); + + rb_native_mutex_initialize(&objspace->sweep_lock); + rb_native_cond_initialize(&objspace->sweep_cond); + objspace->sweep_thread_running = true; + pthread_create(&objspace->sweep_thread, NULL, gc_sweep_thread_func, objspace); + GET_VM()->gc.sweep_thread = objspace->sweep_thread; } void diff --git a/gc/gc.h b/gc/gc.h index 469a4902f03365..20d941ef9102fd 100644 --- a/gc/gc.h +++ b/gc/gc.h @@ -81,7 +81,8 @@ MODULAR_GC_FN void *rb_gc_get_objspace(void); MODULAR_GC_FN void rb_gc_run_obj_finalizer(VALUE objid, long count, VALUE (*callback)(long i, void *data), void *data); MODULAR_GC_FN void rb_gc_set_pending_interrupt(void); MODULAR_GC_FN void rb_gc_unset_pending_interrupt(void); -MODULAR_GC_FN void rb_gc_obj_free_vm_weak_references(VALUE obj); +MODULAR_GC_FN bool rb_gc_obj_free_vm_weak_references(VALUE obj); +MODULAR_GC_FN bool rb_gc_obj_has_blacklisted_vm_weak_references(VALUE obj); MODULAR_GC_FN bool rb_gc_obj_free(void *objspace, VALUE obj); MODULAR_GC_FN void rb_gc_save_machine_context(void); MODULAR_GC_FN void rb_gc_mark_roots(void *objspace, const char **categoryp); diff --git a/include/ruby/atomic.h b/include/ruby/atomic.h index fcc48f532c89ba..32371953f4037c 100644 --- a/include/ruby/atomic.h +++ b/include/ruby/atomic.h @@ -36,6 +36,7 @@ #if RBIMPL_COMPILER_IS(MSVC) # pragma intrinsic(_InterlockedOr) +# pragma intrinsic(_InterlockedAnd) #elif defined(__sun) && defined(HAVE_ATOMIC_H) # include #endif @@ -140,6 +141,48 @@ typedef unsigned int rb_atomic_t; */ #define RUBY_ATOMIC_OR(var, val) rbimpl_atomic_or(&(var), (val), RBIMPL_ATOMIC_SEQ_CST) +/** + * Atomically replaces the value pointed by `var` with the result of + * bitwise AND between `val` and the old value of `var`. + * + * @param var A variable of ::rb_atomic_t. + * @param val Value to mask. + * @return void + * @post `var` holds `var & val`. + */ +#define RUBY_ATOMIC_AND(var, val) rbimpl_atomic_and(&(var), (val), RBIMPL_ATOMIC_SEQ_CST) + +/** + * Atomically replaces the value pointed by `var` with the result of + * bitwise AND between `val` and the old value of `var`. + * + * @param var A variable of ::rb_atomic_t. + * @param val Value to mask. + * @return What was stored in `var` before the operation. + * @post `var` holds `var & val`. + */ +#define RUBY_ATOMIC_FETCH_AND(var, val) rbimpl_atomic_fetch_and(&(var), (val), RBIMPL_ATOMIC_SEQ_CST) + +/** + * Identical to #RUBY_ATOMIC_OR, except it expects its arguments are ::VALUE. + * + * @param var A variable of ::VALUE. + * @param val Value to mix. + * @return void + * @post `var` holds `var | val`. + */ +#define RUBY_ATOMIC_VALUE_OR(var, val) rbimpl_atomic_size_or((volatile size_t *)&(var), (size_t)(val), RBIMPL_ATOMIC_SEQ_CST) + +/** + * Identical to #RUBY_ATOMIC_AND, except it expects its arguments are ::VALUE. + * + * @param var A variable of ::VALUE. + * @param val Value to mask. + * @return void + * @post `var` holds `var & val`. + */ +#define RUBY_ATOMIC_VALUE_AND(var, val) rbimpl_atomic_size_and((volatile size_t *)&(var), (size_t)(val), RBIMPL_ATOMIC_SEQ_CST) + /** * Atomically replaces the value pointed by `var` with `val`. This is just an * assignment, but you can additionally know the previous value. @@ -559,6 +602,76 @@ rbimpl_atomic_size_add(volatile size_t *ptr, size_t val, int memory_order) #endif } +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline void +rbimpl_atomic_size_or(volatile size_t *ptr, size_t val, int memory_order) +{ + (void)memory_order; +#if 0 + +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) + __atomic_or_fetch(ptr, val, memory_order); + +#elif defined(HAVE_GCC_SYNC_BUILTINS) + __sync_or_and_fetch(ptr, val); + +#elif defined(_WIN64) + InterlockedOr64(ptr, val); + +#elif defined(__sun) && defined(HAVE_ATOMIC_H) && (defined(_LP64) || defined(_I32LPx)) + atomic_or_ulong(ptr, val); + +#elif defined(_WIN32) || (defined(__sun) && defined(HAVE_ATOMIC_H)) + RBIMPL_STATIC_ASSERT(size_of_size_t, sizeof *ptr == sizeof(rb_atomic_t)); + + volatile rb_atomic_t *const tmp = RBIMPL_CAST((volatile rb_atomic_t *)ptr); + rbimpl_atomic_or(tmp, val, memory_order); + +#elif defined(HAVE_STDATOMIC_H) + atomic_fetch_or_explicit((_Atomic volatile size_t *)ptr, val, memory_order); + +#else +# error Unsupported platform. +#endif +} + +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline void +rbimpl_atomic_size_and(volatile size_t *ptr, size_t val, int memory_order) +{ + (void)memory_order; +#if 0 + +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) + __atomic_and_fetch(ptr, val, memory_order); + +#elif defined(HAVE_GCC_SYNC_BUILTINS) + __sync_and_and_fetch(ptr, val); + +#elif defined(_WIN64) + InterlockedAnd64(ptr, val); + +#elif defined(__sun) && defined(HAVE_ATOMIC_H) && (defined(_LP64) || defined(_I32LPx)) + atomic_and_ulong(ptr, val); + +#elif defined(_WIN32) || (defined(__sun) && defined(HAVE_ATOMIC_H)) + RBIMPL_STATIC_ASSERT(size_of_size_t, sizeof *ptr == sizeof(rb_atomic_t)); + + volatile rb_atomic_t *const tmp = RBIMPL_CAST((volatile rb_atomic_t *)ptr); + rbimpl_atomic_and(tmp, val, memory_order); + +#elif defined(HAVE_STDATOMIC_H) + atomic_fetch_and_explicit((_Atomic volatile size_t *)ptr, val, memory_order); + +#else +# error Unsupported platform. +#endif +} + RBIMPL_ATTR_ARTIFICIAL() RBIMPL_ATTR_NOALIAS() RBIMPL_ATTR_NONNULL((1)) @@ -804,6 +917,70 @@ rbimpl_atomic_or(volatile rb_atomic_t *ptr, rb_atomic_t val, int memory_order) #endif } +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline rb_atomic_t +rbimpl_atomic_fetch_and(volatile rb_atomic_t *ptr, rb_atomic_t val, int memory_order) +{ + (void)memory_order; +#if 0 + +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) + return __atomic_fetch_and(ptr, val, memory_order); + +#elif defined(HAVE_GCC_SYNC_BUILTINS) + return __sync_fetch_and_and(ptr, val); + +#elif RBIMPL_COMPILER_IS(MSVC) + return _InterlockedAnd(ptr, val); + +#elif defined(__sun) && defined(HAVE_ATOMIC_H) + /* TODO: Solaris atomic_and_uint does not return the old value. + * Using CAS loop as fallback. */ + rb_atomic_t old = *ptr; + while (atomic_cas_uint(ptr, old, old & val) != old) { + old = *ptr; + } + return old; + +#elif !defined(_WIN32) && defined(HAVE_STDATOMIC_H) + return atomic_fetch_and_explicit((_Atomic volatile rb_atomic_t *)ptr, val, memory_order); + +#else +# error Unsupported platform. +#endif +} + +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline void +rbimpl_atomic_and(volatile rb_atomic_t *ptr, rb_atomic_t val, int memory_order) +{ + (void)memory_order; +#if 0 + +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) + __atomic_and_fetch(ptr, val, memory_order); + +#elif defined(HAVE_GCC_SYNC_BUILTINS) + __sync_and_and_fetch(ptr, val); + +#elif RBIMPL_COMPILER_IS(MSVC) + _InterlockedAnd(ptr, val); + +#elif defined(__sun) && defined(HAVE_ATOMIC_H) + atomic_and_uint(ptr, val); + +#elif !defined(_WIN32) && defined(HAVE_STDATOMIC_H) + atomic_fetch_and_explicit((_Atomic volatile rb_atomic_t *)ptr, val, memory_order); + +#else +# error Unsupported platform. +#endif +} + RBIMPL_ATTR_ARTIFICIAL() RBIMPL_ATTR_NOALIAS() RBIMPL_ATTR_NONNULL((1)) @@ -869,6 +1046,15 @@ rbimpl_atomic_size_exchange(volatile size_t *ptr, size_t val, int memory_order) #endif } +RBIMPL_ATTR_ARTIFICIAL() +RBIMPL_ATTR_NOALIAS() +RBIMPL_ATTR_NONNULL((1)) +static inline size_t +rbimpl_atomic_size_load(volatile size_t *ptr, int memory_order) +{ + return rbimpl_atomic_size_fetch_add(ptr, 0, memory_order); +} + RBIMPL_ATTR_ARTIFICIAL() RBIMPL_ATTR_NOALIAS() RBIMPL_ATTR_NONNULL((1)) diff --git a/include/ruby/internal/intern/variable.h b/include/ruby/internal/intern/variable.h index 479c3950c1e373..d983a0b0ebc30e 100644 --- a/include/ruby/internal/intern/variable.h +++ b/include/ruby/internal/intern/variable.h @@ -214,7 +214,7 @@ void rb_alias_variable(ID dst, ID src); * This just destroys the given object. @shyouhei has no idea why extension * libraries should use this API. */ -void rb_free_generic_ivar(VALUE obj); +bool rb_free_generic_ivar(VALUE obj); /** * Identical to rb_iv_get(), except it accepts the name as an ::ID instead of a diff --git a/include/ruby/internal/value_type.h b/include/ruby/internal/value_type.h index b47d8afb97b2a7..88c9027f7ee537 100644 --- a/include/ruby/internal/value_type.h +++ b/include/ruby/internal/value_type.h @@ -81,6 +81,7 @@ #define T_TRUE RUBY_T_TRUE /**< @old{RUBY_T_TRUE} */ #define T_UNDEF RUBY_T_UNDEF /**< @old{RUBY_T_UNDEF} */ #define T_ZOMBIE RUBY_T_ZOMBIE /**< @old{RUBY_T_ZOMBIE} */ +#define T_LAST RUBY_T_MOVED #define BUILTIN_TYPE RB_BUILTIN_TYPE /**< @old{RB_BUILTIN_TYPE} */ #define DYNAMIC_SYM_P RB_DYNAMIC_SYM_P /**< @old{RB_DYNAMIC_SYM_P} */ diff --git a/process.c b/process.c index 126e36ee8d0d2a..ac2ecfbbe62d71 100644 --- a/process.c +++ b/process.c @@ -1582,8 +1582,6 @@ before_fork_ruby(void) static void after_fork_ruby(rb_pid_t pid) { - rb_gc_after_fork(pid); - if (pid == 0) { // child clear_pid_cache(); @@ -1593,6 +1591,8 @@ after_fork_ruby(rb_pid_t pid) // parent after_exec(); } + + rb_gc_after_fork(pid); } #endif @@ -4131,7 +4131,7 @@ rb_fork_ruby(int *status) struct child_handler_disabler_state old; do { - prefork(); + prefork(); // NOTE: can context switch before_fork_ruby(); rb_thread_acquire_fork_lock(); diff --git a/ractor_core.h b/ractor_core.h index c692ebbbbfc638..8f53e599bbc3f0 100644 --- a/ractor_core.h +++ b/ractor_core.h @@ -5,6 +5,8 @@ #include "id_table.h" #include "vm_debug.h" +// FIXME: parallel sweep +#define RACTOR_CHECK_MODE 0 #ifndef RACTOR_CHECK_MODE #define RACTOR_CHECK_MODE (VM_CHECK_MODE || RUBY_DEBUG) && (SIZEOF_UINT64_T == SIZEOF_VALUE) #endif diff --git a/symbol.c b/symbol.c index d3d7e13ea43626..daadd557b9fa9b 100644 --- a/symbol.c +++ b/symbol.c @@ -233,17 +233,19 @@ static VALUE dup_string_for_create(VALUE str) { rb_encoding *enc = rb_enc_get(str); + VALUE new_str; - str = rb_enc_str_new(RSTRING_PTR(str), RSTRING_LEN(str), enc); + new_str = rb_enc_str_new(RSTRING_PTR(str), RSTRING_LEN(str), enc); + RB_GC_GUARD(str); rb_encoding *ascii = rb_usascii_encoding(); - if (enc != ascii && sym_check_asciionly(str, false)) { - rb_enc_associate(str, ascii); + if (enc != ascii && sym_check_asciionly(new_str, false)) { + rb_enc_associate(new_str, ascii); } - OBJ_FREEZE(str); + OBJ_FREEZE(new_str); - str = rb_fstring(str); - return str; + new_str = rb_fstring(new_str); + return new_str; } static int @@ -338,6 +340,7 @@ sym_set_create(VALUE sym, void *data) RB_VM_LOCKING() { set_id_entry(&ruby_global_symbols, rb_id_to_serial(STATIC_SYM2ID(static_sym)), str, static_sym); } + RB_GC_GUARD(str); return sym_set_static_sym_tag(new_static_sym_entry); } diff --git a/test/ruby/test_process.rb b/test/ruby/test_process.rb index d99e356e69bfd4..276a18e931b63f 100644 --- a/test/ruby/test_process.rb +++ b/test/ruby/test_process.rb @@ -1941,7 +1941,7 @@ def test_daemon_no_threads puts Dir.entries("/proc/self/task") - %W[. ..] end bug4920 = '[ruby-dev:43873]' - assert_include(1..2, data.size, bug4920) + assert_include(1..3, data.size, bug4920) assert_not_include(data.map(&:to_i), pid) end else # darwin diff --git a/thread.c b/thread.c index f876b4bd05c80e..94432809e39da9 100644 --- a/thread.c +++ b/thread.c @@ -446,18 +446,26 @@ rb_threadptr_join_list_wakeup(rb_thread_t *thread) } } +void mutexes_lock_lock(void); +void mutexes_lock_unlock(void); + void rb_threadptr_unlock_all_locking_mutexes(rb_thread_t *th) { + mutexes_lock_lock(); while (th->keeping_mutexes) { rb_mutex_t *mutex = th->keeping_mutexes; - th->keeping_mutexes = mutex->next_mutex; - + rb_mutex_t *next = mutex->next_mutex; + th->keeping_mutexes = next; + mutex->next_mutex = NULL; + mutexes_lock_unlock(); // rb_warn("mutex #<%p> was not unlocked by thread #<%p>", (void *)mutex, (void*)th); VM_ASSERT(mutex->ec_serial); - const char *error_message = rb_mutex_unlock_th(mutex, th, 0); + const char *error_message = rb_mutex_unlock_th(mutex, th, 0, false); if (error_message) rb_bug("invalid keeping_mutexes: %s", error_message); + mutexes_lock_lock(); } + mutexes_lock_unlock(); } void @@ -5011,6 +5019,9 @@ rb_thread_atfork_internal(rb_thread_t *th, void (*atfork)(rb_thread_t *, const r rb_thread_reset_timer_thread(); rb_thread_start_timer_thread(); + void mutexes_lock_reset(void); + mutexes_lock_reset(); // TODO: should be on thread + VM_ASSERT(vm->ractor.blocking_cnt == 0); VM_ASSERT(vm->ractor.cnt == 1); } diff --git a/thread_sync.c b/thread_sync.c index cf4e3843ff6c2f..4d39b924e462d3 100644 --- a/thread_sync.c +++ b/thread_sync.c @@ -80,7 +80,7 @@ static void rb_mutex_abandon_all(rb_mutex_t *mutexes); static void rb_mutex_abandon_keeping_mutexes(rb_thread_t *th); static void rb_mutex_abandon_locking_mutex(rb_thread_t *th); #endif -static const char* rb_mutex_unlock_th(rb_mutex_t *mutex, rb_thread_t *th, rb_serial_t ec_serial); +static const char* rb_mutex_unlock_th(rb_mutex_t *mutex, rb_thread_t *th, rb_serial_t ec_serial, bool unlink_from_keeping); static size_t rb_mutex_num_waiting(rb_mutex_t *mutex) @@ -95,7 +95,52 @@ rb_mutex_num_waiting(rb_mutex_t *mutex) return n; } -rb_thread_t* rb_fiber_threadptr(const rb_fiber_t *fiber); +// TODO: mutexes_lock should be per-thread (on rb_thread_struct) +rb_nativethread_lock_t mutexes_lock = PTHREAD_MUTEX_INITIALIZER; +#ifdef RUBY_THREAD_PTHREAD_H +pthread_t mutexes_lock_lock_owner; +#endif + +static inline void +ASSERT_mutexes_lock_locked(void) +{ +#ifdef RUBY_THREAD_PTHREAD_H + VM_ASSERT(pthread_self() == mutexes_lock_lock_owner); +#endif +} + +static inline void +ASSERT_mutexes_lock_unlocked(void) +{ +#ifdef RUBY_THREAD_PTHREAD_H + VM_ASSERT(pthread_self() != mutexes_lock_lock_owner); +#endif +} + +void +mutexes_lock_lock(void) { + ASSERT_mutexes_lock_unlocked(); + rb_native_mutex_lock(&mutexes_lock); +#ifdef RUBY_THREAD_PTHREAD_H + mutexes_lock_lock_owner = pthread_self(); +#endif +} + +void +mutexes_lock_unlock(void) { + ASSERT_mutexes_lock_locked(); +#ifdef RUBY_THREAD_PTHREAD_H + mutexes_lock_lock_owner = 0; +#endif + rb_native_mutex_unlock(&mutexes_lock); +} + +void +mutexes_lock_reset(void) +{ + rb_native_mutex_initialize(&mutexes_lock); +} + static bool mutex_locked_p(rb_mutex_t *mutex) @@ -108,7 +153,7 @@ mutex_free(void *ptr) { rb_mutex_t *mutex = ptr; if (mutex_locked_p(mutex)) { - const char *err = rb_mutex_unlock_th(mutex, mutex->th, 0); + const char *err = rb_mutex_unlock_th(mutex, mutex->th, 0, true); if (err) rb_bug("%s", err); } ruby_xfree(ptr); @@ -172,27 +217,35 @@ static void thread_mutex_insert(rb_thread_t *thread, rb_mutex_t *mutex) { RUBY_ASSERT(!mutex->next_mutex); - if (thread->keeping_mutexes) { - mutex->next_mutex = thread->keeping_mutexes; - } + mutexes_lock_lock(); + { + if (thread->keeping_mutexes) { + mutex->next_mutex = thread->keeping_mutexes; + } - thread->keeping_mutexes = mutex; + thread->keeping_mutexes = mutex; + } + mutexes_lock_unlock(); } static void thread_mutex_remove(rb_thread_t *thread, rb_mutex_t *mutex) { - rb_mutex_t **keeping_mutexes = &thread->keeping_mutexes; + mutexes_lock_lock(); + { + rb_mutex_t **keeping_mutexes = &thread->keeping_mutexes; - while (*keeping_mutexes && *keeping_mutexes != mutex) { - // Move to the next mutex in the list: - keeping_mutexes = &(*keeping_mutexes)->next_mutex; - } + while (*keeping_mutexes && *keeping_mutexes != mutex) { + // Move to the next mutex in the list: + keeping_mutexes = &(*keeping_mutexes)->next_mutex; + } - if (*keeping_mutexes) { - *keeping_mutexes = mutex->next_mutex; - mutex->next_mutex = NULL; + if (*keeping_mutexes) { + *keeping_mutexes = mutex->next_mutex; + mutex->next_mutex = NULL; + } } + mutexes_lock_unlock(); } static void @@ -441,7 +494,10 @@ rb_mutex_owned_p(VALUE self) } static const char * -rb_mutex_unlock_th(rb_mutex_t *mutex, rb_thread_t *th, rb_serial_t ec_serial) +// m = Mutex.new +// m.lock() Thread.current.keeping_mutexes << m +// +rb_mutex_unlock_th(rb_mutex_t *mutex, rb_thread_t *th, rb_serial_t ec_serial, bool unlink_from_keeping) { RUBY_DEBUG_LOG("%p", mutex); @@ -455,7 +511,9 @@ rb_mutex_unlock_th(rb_mutex_t *mutex, rb_thread_t *th, rb_serial_t ec_serial) struct sync_waiter *cur = 0, *next; mutex->ec_serial = 0; - thread_mutex_remove(th, mutex); + if (unlink_from_keeping) { + thread_mutex_remove(th, mutex); + } ccan_list_for_each_safe(&mutex->waitq, cur, next, node) { ccan_list_del_init(&cur->node); @@ -492,7 +550,7 @@ do_mutex_unlock(struct mutex_args *args) rb_mutex_t *mutex = args->mutex; rb_thread_t *th = rb_ec_thread_ptr(args->ec); - err = rb_mutex_unlock_th(mutex, th, rb_ec_serial(args->ec)); + err = rb_mutex_unlock_th(mutex, th, rb_ec_serial(args->ec), true); if (err) rb_raise(rb_eThreadError, "%s", err); } @@ -535,8 +593,12 @@ rb_mut_unlock(rb_execution_context_t *ec, VALUE self) static void rb_mutex_abandon_keeping_mutexes(rb_thread_t *th) { - rb_mutex_abandon_all(th->keeping_mutexes); - th->keeping_mutexes = NULL; + mutexes_lock_lock(); + { + rb_mutex_abandon_all(th->keeping_mutexes); + th->keeping_mutexes = NULL; + } + mutexes_lock_unlock(); } static void diff --git a/variable.c b/variable.c index 9d0e4e4a2b9eac..96f38da135d36f 100644 --- a/variable.c +++ b/variable.c @@ -579,6 +579,7 @@ void rb_free_generic_fields_tbl_(void) { st_free_table(generic_fields_tbl_); + generic_fields_tbl_ = NULL; } static struct rb_global_entry* @@ -1225,11 +1226,71 @@ ivar_ractor_check(VALUE obj, ID id) } } +// TODO: platforms other than pthread +static rb_nativethread_lock_t gen_fields_tbl_lock_ = PTHREAD_MUTEX_INITIALIZER; +#ifdef RUBY_THREAD_PTHREAD_H +static pthread_t gen_fields_tbl_lock_owner; +#endif +static unsigned int gen_fields_tbl_lock_lvl; + +static inline void +ASSERT_gen_fields_tbl_locked(void) +{ +#ifdef RUBY_THREAD_PTHREAD_H + VM_ASSERT(pthread_self() == gen_fields_tbl_lock_owner); +#endif +} + +static inline void +ASSERT_gen_fields_tbl_unlocked(void) +{ +#ifdef RUBY_THREAD_PTHREAD_H + VM_ASSERT(pthread_self() != gen_fields_tbl_lock_owner); +#endif +} + +static inline void +gen_fields_tbl_lock(bool allow_reentry) +{ + if (allow_reentry && pthread_self() == gen_fields_tbl_lock_owner) { + } else { + ASSERT_gen_fields_tbl_unlocked(); + rb_native_mutex_lock(&gen_fields_tbl_lock_); + gen_fields_tbl_lock_owner = pthread_self(); + } + gen_fields_tbl_lock_lvl++; +} + +static inline bool +gen_fields_tbl_trylock(bool allow_reentry) +{ + if (allow_reentry && pthread_self() == gen_fields_tbl_lock_owner) { + } else { + ASSERT_gen_fields_tbl_unlocked(); + if (rb_native_mutex_trylock(&gen_fields_tbl_lock_) == EBUSY) { + return false; + } + gen_fields_tbl_lock_owner = pthread_self(); + } + gen_fields_tbl_lock_lvl++; + return true; +} + +static inline void +gen_fields_tbl_unlock(void) +{ + ASSERT_gen_fields_tbl_locked(); + RUBY_ASSERT(gen_fields_tbl_lock_lvl > 0); + gen_fields_tbl_lock_lvl--; + if (gen_fields_tbl_lock_lvl == 0) { + gen_fields_tbl_lock_owner = 0; + rb_native_mutex_unlock(&gen_fields_tbl_lock_); + } +} + static inline struct st_table * generic_fields_tbl_no_ractor_check(void) { - ASSERT_vm_locking(); - return generic_fields_tbl_; } @@ -1243,21 +1304,27 @@ void rb_mark_generic_ivar(VALUE obj) { VALUE data; - // Bypass ASSERT_vm_locking() check because marking may happen concurrently with mmtk - if (st_lookup(generic_fields_tbl_, (st_data_t)obj, (st_data_t *)&data)) { - rb_gc_mark_movable(data); + gen_fields_tbl_lock(true); + { + // Bypass ASSERT_vm_locking() check because marking may happen concurrently with mmtk + if (st_lookup(generic_fields_tbl_, (st_data_t)obj, (st_data_t *)&data)) { + rb_gc_mark_movable(data); + } } + gen_fields_tbl_unlock(); } VALUE rb_obj_fields_generic_uncached(VALUE obj) { VALUE fields_obj = 0; - RB_VM_LOCKING() { + gen_fields_tbl_lock(false); + { if (!st_lookup(generic_fields_tbl_, (st_data_t)obj, (st_data_t *)&fields_obj)) { rb_bug("Object is missing entry in generic_fields_tbl"); } } + gen_fields_tbl_unlock(); return fields_obj; } @@ -1301,9 +1368,10 @@ rb_obj_fields(VALUE obj, ID field_name) return fields_obj; } -void +bool rb_free_generic_ivar(VALUE obj) { + bool result = true; if (rb_obj_gen_fields_p(obj)) { st_data_t key = (st_data_t)obj, value; switch (BUILTIN_TYPE(obj)) { @@ -1324,20 +1392,32 @@ rb_free_generic_ivar(VALUE obj) { // Other EC may have stale caches, so fields_obj should be // invalidated and the GC will replace with Qundef - rb_execution_context_t *ec = GET_EC(); - if (ec->gen_fields_cache.obj == obj) { + rb_execution_context_t *ec = rb_current_execution_context(false); + if (ec && ec->gen_fields_cache.obj == obj) { ec->gen_fields_cache.obj = Qundef; ec->gen_fields_cache.fields_obj = Qundef; } - RB_VM_LOCKING() { + if (ec) { + gen_fields_tbl_lock(true); // needs to be re-entrant + } + else { + bool did_lock = gen_fields_tbl_trylock(false); + // If we can't acquire it, bail (could lead to deadlock) + if (!did_lock) return false; + } + // gen_fields_tbl_lock(); + { if (!st_delete(generic_fields_tbl_no_ractor_check(), &key, &value)) { + gen_fields_tbl_unlock(); rb_bug("Object is missing entry in generic_fields_tbl"); } } + gen_fields_tbl_unlock(); } } RBASIC_SET_SHAPE_ID(obj, ROOT_SHAPE_ID); } + return result; } static void @@ -1372,8 +1452,12 @@ rb_obj_set_fields(VALUE obj, VALUE fields_obj, ID field_name, VALUE original_fie default: generic_fields: { - RB_VM_LOCKING() { - st_insert(generic_fields_tbl_, (st_data_t)obj, (st_data_t)fields_obj); + RB_VM_LOCKING() { // needed in case insert triggers GC + gen_fields_tbl_lock(false); + { + st_insert(generic_fields_tbl_, (st_data_t)obj, (st_data_t)fields_obj); + } + gen_fields_tbl_unlock(); } RB_OBJ_WRITTEN(obj, original_fields_obj, fields_obj); @@ -2296,6 +2380,7 @@ rb_replace_generic_ivar(VALUE clone, VALUE obj) { RB_VM_LOCKING() { st_data_t fields_tbl, obj_data = (st_data_t)obj; + // We've STW at this point, no need to lock gen_fields_tbl_lock if (st_delete(generic_fields_tbl_, &obj_data, &fields_tbl)) { st_insert(generic_fields_tbl_, (st_data_t)clone, fields_tbl); RB_OBJ_WRITTEN(clone, Qundef, fields_tbl); @@ -2584,6 +2669,45 @@ rb_mod_const_missing(VALUE klass, VALUE name) UNREACHABLE_RETURN(Qnil); } +rb_nativethread_lock_t autoload_free_lock = PTHREAD_MUTEX_INITIALIZER; +#ifdef RUBY_THREAD_PTHREAD_H +pthread_t autoload_free_lock_owner; +#endif + +static inline void +ASSERT_autoload_free_lock_locked(void) +{ +#ifdef RUBY_THREAD_PTHREAD_H + VM_ASSERT(pthread_self() == autoload_free_lock_owner); +#endif +} + +static inline void +ASSERT_autoload_free_lock_unlocked(void) +{ +#ifdef RUBY_THREAD_PTHREAD_H + VM_ASSERT(pthread_self() != autoload_free_lock_owner); +#endif +} + +static inline void +autoload_free_lock_lock(void) { + ASSERT_autoload_free_lock_unlocked(); + rb_native_mutex_lock(&autoload_free_lock); +#ifdef RUBY_THREAD_PTHREAD_H + autoload_free_lock_owner = pthread_self(); +#endif +} + +static inline void +autoload_free_lock_unlock(void) { + ASSERT_autoload_free_lock_locked(); +#ifdef RUBY_THREAD_PTHREAD_H + autoload_free_lock_owner = 0; +#endif + rb_native_mutex_unlock(&autoload_free_lock); +} + static void autoload_table_mark(void *ptr) { @@ -2705,10 +2829,14 @@ autoload_data_free(void *ptr) { struct autoload_data *p = ptr; - struct autoload_const *autoload_const, *next; - ccan_list_for_each_safe(&p->constants, autoload_const, next, cnode) { - ccan_list_del_init(&autoload_const->cnode); + autoload_free_lock_lock(); + { + struct autoload_const *autoload_const, *next; + ccan_list_for_each_safe(&p->constants, autoload_const, next, cnode) { + ccan_list_del_init(&autoload_const->cnode); + } } + autoload_free_lock_unlock(); SIZED_FREE(p); } @@ -2748,7 +2876,12 @@ autoload_const_free(void *ptr) { struct autoload_const *autoload_const = ptr; - ccan_list_del(&autoload_const->cnode); + autoload_free_lock_lock(); + { + ccan_list_del(&autoload_const->cnode); + } + autoload_free_lock_unlock(); + SIZED_FREE(autoload_const); } diff --git a/vm.c b/vm.c index 0398b9f74c9683..c88137366b31a2 100644 --- a/vm.c +++ b/vm.c @@ -3415,6 +3415,8 @@ ruby_vm_destruct(rb_vm_t *vm) if (vm) { rb_thread_t *th = vm->ractor.main_thread; + void wait_for_background_sweeping_to_finish(void *, bool, bool, const char*); + wait_for_background_sweeping_to_finish(vm->gc.objspace, true, false, "vm_destruct"); if (rb_free_at_exit) { rb_free_encoded_insn_data(); diff --git a/vm_callinfo.h b/vm_callinfo.h index 9f147522815d50..5168ce57670745 100644 --- a/vm_callinfo.h +++ b/vm_callinfo.h @@ -399,6 +399,9 @@ vm_cc_refinement_p(const struct rb_callcache *cc) static inline bool vm_cc_class_check(const struct rb_callcache *cc, VALUE klass) { + if (!IMEMO_TYPE_P(cc, imemo_callcache)) { + fprintf(stderr, "Error: vm_cc_class_check called on %s (%p)\n", rb_obj_info((VALUE)cc), (void*)cc); + } VM_ASSERT(IMEMO_TYPE_P(cc, imemo_callcache)); VM_ASSERT(cc_check_class(cc->klass)); return cc->klass == klass; diff --git a/vm_core.h b/vm_core.h index 85664e18b8396b..4ca92b431b17e7 100644 --- a/vm_core.h +++ b/vm_core.h @@ -806,6 +806,7 @@ typedef struct rb_vm_struct { void *data; void (*mark_func)(VALUE v, void *data); } *mark_func_data; + pthread_t sweep_thread; } gc; rb_at_exit_list *at_exit; @@ -1631,10 +1632,17 @@ VM_ENV_BOX_UNCHECKED(const VALUE *ep) int rb_vm_ep_in_heap_p(const VALUE *ep); #endif +static rb_execution_context_t *rb_current_execution_context(bool expect_ec); + static inline int VM_ENV_ESCAPED_P(const VALUE *ep) { - VM_ASSERT(rb_vm_ep_in_heap_p(ep) == !!VM_ENV_FLAGS(ep, VM_ENV_FLAG_ESCAPED)); +#if VM_CHECK_MODE > 0 + if (rb_current_execution_context(false)) { + // Can be called from background sweep thread, and this uses GET_EC() + VM_ASSERT(rb_vm_ep_in_heap_p(ep) == !!VM_ENV_FLAGS(ep, VM_ENV_FLAG_ESCAPED)); + } +#endif return VM_ENV_FLAGS(ep, VM_ENV_FLAG_ESCAPED) ? 1 : 0; } @@ -2158,11 +2166,6 @@ rb_current_ractor_raw(bool expect) } } -static inline rb_ractor_t * -rb_current_ractor(void) -{ - return rb_current_ractor_raw(true); -} static inline rb_vm_t * rb_current_vm(void) @@ -2178,6 +2181,16 @@ rb_current_vm(void) return ruby_current_vm_ptr; } +static inline rb_ractor_t * +rb_current_ractor(void) +{ + rb_vm_t *vm = GET_VM(); + if (vm) { + VM_ASSERT(vm->gc.sweep_thread != pthread_self()); + } + return rb_current_ractor_raw(true); +} + void rb_ec_vm_lock_rec_release(const rb_execution_context_t *ec, unsigned int recorded_lock_rec, unsigned int current_lock_rec); diff --git a/vm_sync.c b/vm_sync.c index aca83dde5a73aa..457af1ec215a8a 100644 --- a/vm_sync.c +++ b/vm_sync.c @@ -9,10 +9,24 @@ void rb_ractor_sched_barrier_start(rb_vm_t *vm, rb_ractor_t *cr); void rb_ractor_sched_barrier_join(rb_vm_t *vm, rb_ractor_t *cr); void rb_ractor_sched_barrier_end(rb_vm_t *vm, rb_ractor_t *cr); +static bool +is_sweep_thread_p(void) +{ + rb_vm_t *vm = GET_VM(); + if (!vm) return false; + return vm->gc.sweep_thread == pthread_self(); +} + static bool vm_locked(rb_vm_t *vm) { - return vm_locked_by_ractor_p(vm, GET_RACTOR()); + if (!vm) return false; + if (is_sweep_thread_p()) { + return vm->ractor.sync.lock_owner == (void*)-1; + } + else { + return vm_locked_by_ractor_p(vm, GET_RACTOR()); + } } #if RUBY_DEBUG > 0 @@ -68,6 +82,7 @@ vm_need_barrier_waiting(const rb_vm_t *vm) static bool vm_need_barrier(bool no_barrier, const rb_ractor_t *cr, const rb_vm_t *vm) { + VM_ASSERT(cr); #ifdef RUBY_THREAD_PTHREAD_H return !no_barrier && cr->threads.sched.running != NULL && vm_need_barrier_waiting(vm); // ractor has running threads. #else @@ -76,7 +91,7 @@ vm_need_barrier(bool no_barrier, const rb_ractor_t *cr, const rb_vm_t *vm) } static void -vm_lock_enter(rb_ractor_t *cr, rb_vm_t *vm, bool locked, bool no_barrier, unsigned int *lev APPEND_LOCATION_ARGS) +vm_lock_enter(rb_ractor_t *cr, rb_vm_t *vm, bool locked, bool no_barrier, bool is_sweep_thread, unsigned int *lev APPEND_LOCATION_ARGS) { RUBY_DEBUG_LOG2(file, line, "start locked:%d", locked); @@ -85,8 +100,15 @@ vm_lock_enter(rb_ractor_t *cr, rb_vm_t *vm, bool locked, bool no_barrier, unsign } else { #if RACTOR_CHECK_MODE - // locking ractor and acquire VM lock will cause deadlock - VM_ASSERT(cr->sync.locked_by != rb_ractor_self(cr)); + if (is_sweep_thread) { + VM_ASSERT(0); + VM_ASSERT(cr == 0); + VM_ASSERT(vm->ractor.sync.lock_owner != (void*)-1); + } + else { + // locking ractor and acquire VM lock will cause deadlock + VM_ASSERT(cr->sync.locked_by != rb_ractor_self(cr)); + } #endif // lock rb_native_mutex_lock(&vm->ractor.sync.lock); @@ -94,7 +116,7 @@ vm_lock_enter(rb_ractor_t *cr, rb_vm_t *vm, bool locked, bool no_barrier, unsign VM_ASSERT(vm->ractor.sync.lock_rec == 0); // barrier - if (vm_need_barrier(no_barrier, cr, vm)) { + if (!is_sweep_thread && vm_need_barrier(no_barrier, cr, vm)) { rb_execution_context_t *ec = GET_EC(); RB_VM_SAVE_MACHINE_CONTEXT(rb_ec_thread_ptr(ec)); @@ -107,18 +129,20 @@ vm_lock_enter(rb_ractor_t *cr, rb_vm_t *vm, bool locked, bool no_barrier, unsign VM_ASSERT(vm->ractor.sync.lock_rec == 0); VM_ASSERT(vm->ractor.sync.lock_owner == NULL); - vm->ractor.sync.lock_owner = cr; + vm->ractor.sync.lock_owner = is_sweep_thread ? (rb_ractor_t*)-1 : cr; } vm->ractor.sync.lock_rec++; *lev = vm->ractor.sync.lock_rec; - RUBY_DEBUG_LOG2(file, line, "rec:%u owner:%u", vm->ractor.sync.lock_rec, - (unsigned int)rb_ractor_id(vm->ractor.sync.lock_owner)); + if (!is_sweep_thread) { + RUBY_DEBUG_LOG2(file, line, "rec:%u owner:%u", vm->ractor.sync.lock_rec, + (unsigned int)rb_ractor_id(vm->ractor.sync.lock_owner)); + } } static void -vm_lock_leave(rb_vm_t *vm, bool no_barrier, unsigned int *lev APPEND_LOCATION_ARGS) +vm_lock_leave(rb_vm_t *vm, bool no_barrier, bool is_sweep_thread, unsigned int *lev APPEND_LOCATION_ARGS) { MAYBE_UNUSED(rb_ractor_t *cr = vm->ractor.sync.lock_owner); @@ -129,10 +153,15 @@ vm_lock_leave(rb_vm_t *vm, bool no_barrier, unsigned int *lev APPEND_LOCATION_AR ASSERT_vm_locking(); VM_ASSERT(vm->ractor.sync.lock_rec > 0); VM_ASSERT(vm->ractor.sync.lock_rec == *lev); - VM_ASSERT(cr == GET_RACTOR()); + if (is_sweep_thread) { + VM_ASSERT(cr == (void*)-1); + } + else { + VM_ASSERT(cr == GET_RACTOR()); + } #ifdef RUBY_THREAD_PTHREAD_H - if (vm->ractor.sched.barrier_ractor == cr && + if (!is_sweep_thread && vm->ractor.sched.barrier_ractor == cr && vm->ractor.sched.barrier_lock_rec == vm->ractor.sync.lock_rec) { VM_ASSERT(!no_barrier); rb_ractor_sched_barrier_end(vm, cr); @@ -152,11 +181,13 @@ void rb_vm_lock_enter_body(unsigned int *lev APPEND_LOCATION_ARGS) { rb_vm_t *vm = GET_VM(); + VM_ASSERT(vm); if (vm_locked(vm)) { - vm_lock_enter(NULL, vm, true, false, lev APPEND_LOCATION_PARAMS); + vm_lock_enter(NULL, vm, true, false, is_sweep_thread_p(), lev APPEND_LOCATION_PARAMS); } else { - vm_lock_enter(GET_RACTOR(), vm, false, false, lev APPEND_LOCATION_PARAMS); + bool is_sweep_th = is_sweep_thread_p(); + vm_lock_enter(is_sweep_th ? NULL : GET_RACTOR(), vm, false, false, is_sweep_th, lev APPEND_LOCATION_PARAMS); } } @@ -164,11 +195,13 @@ void rb_vm_lock_enter_body_nb(unsigned int *lev APPEND_LOCATION_ARGS) { rb_vm_t *vm = GET_VM(); + VM_ASSERT(vm); if (vm_locked(vm)) { - vm_lock_enter(NULL, vm, true, true, lev APPEND_LOCATION_PARAMS); + vm_lock_enter(NULL, vm, true, true, is_sweep_thread_p(), lev APPEND_LOCATION_PARAMS); } else { - vm_lock_enter(GET_RACTOR(), vm, false, true, lev APPEND_LOCATION_PARAMS); + bool is_sweep_th = is_sweep_thread_p(); + vm_lock_enter(is_sweep_th ? NULL : GET_RACTOR(), vm, false, true, is_sweep_th, lev APPEND_LOCATION_PARAMS); } } @@ -176,28 +209,31 @@ void rb_vm_lock_enter_body_cr(rb_ractor_t *cr, unsigned int *lev APPEND_LOCATION_ARGS) { rb_vm_t *vm = GET_VM(); - vm_lock_enter(cr, vm, vm_locked(vm), false, lev APPEND_LOCATION_PARAMS); + VM_ASSERT(vm); + vm_lock_enter(cr, vm, vm_locked(vm), false, false, lev APPEND_LOCATION_PARAMS); } void rb_vm_lock_leave_body_nb(unsigned int *lev APPEND_LOCATION_ARGS) { - vm_lock_leave(GET_VM(), true, lev APPEND_LOCATION_PARAMS); + vm_lock_leave(GET_VM(), true, is_sweep_thread_p(), lev APPEND_LOCATION_PARAMS); } void rb_vm_lock_leave_body(unsigned int *lev APPEND_LOCATION_ARGS) { - vm_lock_leave(GET_VM(), false, lev APPEND_LOCATION_PARAMS); + vm_lock_leave(GET_VM(), false, is_sweep_thread_p(), lev APPEND_LOCATION_PARAMS); } void rb_vm_lock_body(LOCATION_ARGS) { rb_vm_t *vm = GET_VM(); + VM_ASSERT(vm); ASSERT_vm_unlocking(); - vm_lock_enter(GET_RACTOR(), vm, false, false, &vm->ractor.sync.lock_rec APPEND_LOCATION_PARAMS); + bool is_sweep_th = is_sweep_thread_p(); + vm_lock_enter(is_sweep_th ? NULL : GET_RACTOR(), vm, false, false, is_sweep_th, &vm->ractor.sync.lock_rec APPEND_LOCATION_PARAMS); } void @@ -206,7 +242,7 @@ rb_vm_unlock_body(LOCATION_ARGS) rb_vm_t *vm = GET_VM(); ASSERT_vm_locking(); VM_ASSERT(vm->ractor.sync.lock_rec == 1); - vm_lock_leave(vm, false, &vm->ractor.sync.lock_rec APPEND_LOCATION_PARAMS); + vm_lock_leave(vm, false, is_sweep_thread_p(), &vm->ractor.sync.lock_rec APPEND_LOCATION_PARAMS); } static void @@ -254,6 +290,7 @@ void rb_vm_barrier(void) { RB_DEBUG_COUNTER_INC(vm_sync_barrier); + VM_ASSERT(!is_sweep_thread_p()); if (!rb_multi_ractor_p()) { // no other ractors diff --git a/vm_sync.h b/vm_sync.h index 314a2238a96581..761c1795eeb09d 100644 --- a/vm_sync.h +++ b/vm_sync.h @@ -44,7 +44,7 @@ rb_multi_ractor_p(void) { if (LIKELY(ruby_single_main_ractor)) { // 0 on boot time. - RUBY_ASSERT(GET_VM()->ractor.cnt <= 1); + RUBY_ASSERT(!GET_VM() || GET_VM()->ractor.cnt <= 1); return false; } else { From 5163a3a143710b7de4a2755dfbcbea65710c3e34 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Mon, 16 Mar 2026 11:26:05 -0400 Subject: [PATCH 05/67] Fix age_bits --- gc/default/default.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/gc/default/default.c b/gc/default/default.c index e7d1791affd04e..f52c116c2488bf 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -4110,6 +4110,20 @@ deferred_free(rb_objspace_t *objspace, VALUE obj) return result; } +// Spread N bits into 2N bits: bit k → bits 2k and 2k+1. +// e.g. 0b1010 → 0b11001100 +static inline bits_t +spread_bits(bits_t x) +{ + bits_t result = 0; + for (int b = 0; b < BITS_BITLENGTH / 2; b++) { + if (x & ((bits_t)1 << b)) { + result |= (bits_t)RVALUE_AGE_BIT_MASK << (b * RVALUE_AGE_BIT_COUNT); + } + } + return result; +} + // Clear bits for the page that was swept by the background thread. static inline void gc_post_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *sweep_page, bool force_setup_mark_bits) From 66d5b87aea5f934c4e115503dacaa52886b0e03b Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Mon, 16 Mar 2026 11:43:38 -0400 Subject: [PATCH 06/67] Use planar age bits like 4.1.0 --- gc/default/default.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index f52c116c2488bf..e7d1791affd04e 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -4110,20 +4110,6 @@ deferred_free(rb_objspace_t *objspace, VALUE obj) return result; } -// Spread N bits into 2N bits: bit k → bits 2k and 2k+1. -// e.g. 0b1010 → 0b11001100 -static inline bits_t -spread_bits(bits_t x) -{ - bits_t result = 0; - for (int b = 0; b < BITS_BITLENGTH / 2; b++) { - if (x & ((bits_t)1 << b)) { - result |= (bits_t)RVALUE_AGE_BIT_MASK << (b * RVALUE_AGE_BIT_COUNT); - } - } - return result; -} - // Clear bits for the page that was swept by the background thread. static inline void gc_post_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *sweep_page, bool force_setup_mark_bits) From 188bc3dbaa9350aed5abcd6530c5fcdc16d3da98 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Mon, 16 Mar 2026 14:15:57 -0400 Subject: [PATCH 07/67] Add RUBY_TYPED_CONCURRENT_FREE_SAFE flag Add this flag to all internal types. Internal extension types are skipped for now. --- array.c | 2 +- ast.c | 4 ++-- box.c | 6 +++--- compile.c | 8 ++++---- concurrent_set.c | 2 +- cont.c | 6 +++--- dir.c | 2 +- encoding.c | 2 +- enumerator.c | 16 ++++++++-------- error.c | 2 +- file.c | 2 +- gc.c | 2 +- hash.c | 2 +- id_table.c | 2 +- include/ruby/internal/core/rtypeddata.h | 9 +++++++++ io.c | 2 +- io_buffer.c | 2 +- iseq.c | 6 +++--- marshal.c | 6 +++--- memory_view.c | 4 ++-- proc.c | 6 +++--- process.c | 4 ++-- ractor.c | 4 ++-- ractor_sync.c | 2 +- random.c | 6 +++--- ruby_parser.c | 4 ++-- scheduler.c | 2 +- set.c | 2 +- shape.c | 2 +- string.c | 2 +- thread.c | 4 ++-- thread_sync.c | 8 ++++---- time.c | 2 +- transcode.c | 2 +- variable.c | 6 +++--- vm.c | 6 +++--- vm_backtrace.c | 4 ++-- vm_method.c | 2 +- vm_trace.c | 2 +- weakmap.c | 4 ++-- 40 files changed, 85 insertions(+), 76 deletions(-) diff --git a/array.c b/array.c index fbb712c7262624..71286d2d8d0669 100644 --- a/array.c +++ b/array.c @@ -6875,7 +6875,7 @@ static const rb_data_type_t ary_sample_memo_type = { .function = { .dfree = (RUBY_DATA_FUNC)st_free_table, }, - .flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY + .flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE diff --git a/ast.c b/ast.c index 5357aa38a5ae09..3321ae069f77a3 100644 --- a/ast.c +++ b/ast.c @@ -45,7 +45,7 @@ static const rb_data_type_t rb_node_type = { "AST/node", {node_gc_mark, RUBY_TYPED_DEFAULT_FREE, node_memsize,}, 0, 0, - RUBY_TYPED_FREE_IMMEDIATELY, + RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; struct ASTLocationData { @@ -70,7 +70,7 @@ static const rb_data_type_t rb_location_type = { "AST/location", {location_gc_mark, RUBY_TYPED_DEFAULT_FREE, location_memsize,}, 0, 0, - RUBY_TYPED_FREE_IMMEDIATELY, + RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; diff --git a/box.c b/box.c index fba494f7ad2e1c..88be74a0a9bfb1 100644 --- a/box.c +++ b/box.c @@ -300,7 +300,7 @@ static const rb_data_type_t rb_box_data_type = { box_entry_memsize, rb_box_gc_update_references, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY // TODO: enable RUBY_TYPED_WB_PROTECTED when inserting write barriers + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE // TODO: enable RUBY_TYPED_WB_PROTECTED when inserting write barriers }; static const rb_data_type_t rb_root_box_data_type = { @@ -311,7 +311,7 @@ static const rb_data_type_t rb_root_box_data_type = { box_entry_memsize, rb_box_gc_update_references, }, - &rb_box_data_type, 0, RUBY_TYPED_FREE_IMMEDIATELY // TODO: enable RUBY_TYPED_WB_PROTECTED when inserting write barriers + &rb_box_data_type, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE // TODO: enable RUBY_TYPED_WB_PROTECTED when inserting write barriers }; VALUE @@ -755,7 +755,7 @@ box_ext_cleanup_free(void *p) static const rb_data_type_t box_ext_cleanup_type = { "box_ext_cleanup", {box_ext_cleanup_mark, box_ext_cleanup_free}, - .flags = RUBY_TYPED_FREE_IMMEDIATELY, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; void diff --git a/compile.c b/compile.c index 100ab126ed152e..bad52f6620310a 100644 --- a/compile.c +++ b/compile.c @@ -12312,7 +12312,7 @@ static const rb_data_type_t labels_wrapper_type = { .dmark = (RUBY_DATA_FUNC)rb_mark_set, .dfree = (RUBY_DATA_FUNC)st_free_table, }, - .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; void @@ -12573,7 +12573,7 @@ static const rb_data_type_t pinned_list_type = { RUBY_DEFAULT_FREE, NULL, // No external memory to report, }, - 0, 0, RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE @@ -14724,7 +14724,7 @@ ibf_dump_memsize(const void *ptr) static const rb_data_type_t ibf_dump_type = { "ibf_dump", {ibf_dump_mark, ibf_dump_free, ibf_dump_memsize,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static void @@ -14961,7 +14961,7 @@ ibf_loader_memsize(const void *ptr) static const rb_data_type_t ibf_load_type = { "ibf_loader", {ibf_loader_mark, ibf_loader_free, ibf_loader_memsize,}, - 0, 0, RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; const rb_iseq_t * diff --git a/concurrent_set.c b/concurrent_set.c index c8b0c73881a85d..d6c1457e3822f8 100644 --- a/concurrent_set.c +++ b/concurrent_set.c @@ -91,7 +91,7 @@ static const rb_data_type_t concurrent_set_type = { .dsize = concurrent_set_size, }, /* Hack: NOT WB_PROTECTED on purpose (see above) */ - .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; VALUE diff --git a/cont.c b/cont.c index a1af339d331751..bb3bc9ab106f81 100644 --- a/cont.c +++ b/cont.c @@ -1446,7 +1446,7 @@ cont_handle_weak_references(void *ptr) static const rb_data_type_t rb_cont_data_type = { "continuation", {cont_mark, cont_free, cont_memsize, cont_compact, cont_handle_weak_references}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static inline void @@ -2207,7 +2207,7 @@ fiber_handle_weak_references(void *ptr) static const rb_data_type_t rb_fiber_data_type = { "fiber", {fiber_mark, fiber_free, fiber_memsize, fiber_compact, fiber_handle_weak_references}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE @@ -3615,7 +3615,7 @@ fiber_pool_memsize(const void *ptr) static const rb_data_type_t FiberPoolDataType = { "fiber_pool", {NULL, fiber_pool_free, fiber_pool_memsize,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE diff --git a/dir.c b/dir.c index d67de8cf06c830..72496d0906dbc0 100644 --- a/dir.c +++ b/dir.c @@ -545,7 +545,7 @@ static const rb_data_type_t dir_data_type = { dir_free, NULL, // Nothing allocated externally, so don't need a memsize function }, - 0, NULL, RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_DECL_MARKING | RUBY_TYPED_EMBEDDABLE + 0, NULL, RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_DECL_MARKING | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE dir_close(VALUE); diff --git a/encoding.c b/encoding.c index 8bb393b471ed54..04f5269d63f5ea 100644 --- a/encoding.c +++ b/encoding.c @@ -122,7 +122,7 @@ static int filesystem_encindex = ENCINDEX_ASCII_8BIT; static const rb_data_type_t encoding_data_type = { "encoding", {0, 0, 0,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE }; #define is_encoding_type(obj) (RTYPEDDATA_TYPE(obj) == &encoding_data_type) diff --git a/enumerator.c b/enumerator.c index 81b71bd8b43b29..2f181918f08cb2 100644 --- a/enumerator.c +++ b/enumerator.c @@ -280,7 +280,7 @@ static const rb_data_type_t enumerator_data_type = { NULL, // Nothing allocated externally, so don't need a memsize function NULL, }, - 0, NULL, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_DECL_MARKING | RUBY_TYPED_EMBEDDABLE + 0, NULL, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_DECL_MARKING | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static struct enumerator * @@ -311,7 +311,7 @@ static const rb_data_type_t proc_entry_data_type = { NULL, // Nothing allocated externally, so don't need a memsize function proc_entry_mark_and_move, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static struct proc_entry * @@ -1323,7 +1323,7 @@ static const rb_data_type_t yielder_data_type = { NULL, yielder_mark_and_move, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static struct yielder * @@ -1447,7 +1447,7 @@ static const rb_data_type_t generator_data_type = { NULL, generator_mark_and_move, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static struct generator * @@ -2978,7 +2978,7 @@ static const rb_data_type_t producer_data_type = { producer_memsize, producer_mark_and_move, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static struct producer * @@ -3196,7 +3196,7 @@ static const rb_data_type_t enum_chain_data_type = { enum_chain_memsize, enum_chain_mark_and_move, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static struct enum_chain * @@ -3511,7 +3511,7 @@ static const rb_data_type_t enum_product_data_type = { enum_product_memsize, enum_product_mark_and_move, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static struct enum_product * @@ -3849,7 +3849,7 @@ static const rb_data_type_t arith_seq_data_type = { NULL, }, .parent = &enumerator_data_type, - .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_DECL_MARKING | RUBY_TYPED_EMBEDDABLE + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_DECL_MARKING | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE diff --git a/error.c b/error.c index 6e88dcbfaff897..10162bd0e1b46b 100644 --- a/error.c +++ b/error.c @@ -2535,7 +2535,7 @@ static const rb_data_type_t name_err_mesg_data_type = { NULL, // No external memory to report, name_err_mesg_mark_and_move, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; /* :nodoc: */ diff --git a/file.c b/file.c index e40f67ec73817a..c90a499e6603ff 100644 --- a/file.c +++ b/file.c @@ -535,7 +535,7 @@ static const rb_data_type_t stat_data_type = { RUBY_TYPED_DEFAULT_FREE, NULL, // No external memory to report }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; struct rb_stat { diff --git a/gc.c b/gc.c index fe6f0c9b90e509..94bd70ec23a837 100644 --- a/gc.c +++ b/gc.c @@ -2159,7 +2159,7 @@ static const rb_data_type_t id2ref_tbl_type = { // dcompact function not required because the table is reference updated // in rb_gc_vm_weak_table_foreach }, - .flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY + .flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE diff --git a/hash.c b/hash.c index 773df7e78d8c7f..0df553db67853b 100644 --- a/hash.c +++ b/hash.c @@ -6905,7 +6905,7 @@ static const rb_data_type_t env_data_type = { NULL, NULL, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED, + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; /* diff --git a/id_table.c b/id_table.c index 76841d0cff8d07..c15867cc8715e1 100644 --- a/id_table.c +++ b/id_table.c @@ -349,7 +349,7 @@ const rb_data_type_t rb_managed_id_table_type = { .dfree = managed_id_table_free, .dsize = managed_id_table_memsize, }, - .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; static inline struct rb_id_table * diff --git a/include/ruby/internal/core/rtypeddata.h b/include/ruby/internal/core/rtypeddata.h index 22bf46eb031bba..204cf0b539c689 100644 --- a/include/ruby/internal/core/rtypeddata.h +++ b/include/ruby/internal/core/rtypeddata.h @@ -120,6 +120,7 @@ static inline VALUE rbimpl_check_external_typeddata(VALUE obj); * Macros to see if each corresponding flag is defined. */ #define RUBY_TYPED_FREE_IMMEDIATELY RUBY_TYPED_FREE_IMMEDIATELY +#define RUBY_TYPED_CONCURRENT_FREE_SAFE RUBY_TYPED_CONCURRENT_FREE_SAFE #define RUBY_TYPED_FROZEN_SHAREABLE RUBY_TYPED_FROZEN_SHAREABLE #define RUBY_TYPED_WB_PROTECTED RUBY_TYPED_WB_PROTECTED #define RUBY_TYPED_EMBEDDABLE RUBY_TYPED_EMBEDDABLE @@ -164,6 +165,14 @@ rbimpl_typeddata_flags { */ RUBY_TYPED_EMBEDDABLE = 2, + /** + * This flag indicates that the dfree function for this type is safe to + * call concurrently from a background sweep thread. When set, the GC + * may free objects of this type without holding the GVL. Only set this + * flag if the dfree function does not access shared mutable state. + */ + RUBY_TYPED_CONCURRENT_FREE_SAFE = 4, + /** * This flag has something to do with Ractor. Multiple Ractors run without * protecting each other. Sharing an object among Ractors is basically diff --git a/io.c b/io.c index ab04d8df22864c..596f7db352bee4 100644 --- a/io.c +++ b/io.c @@ -10017,7 +10017,7 @@ argf_memsize(const void *ptr) static const rb_data_type_t argf_type = { "ARGF", {argf_mark_and_move, RUBY_TYPED_DEFAULT_FREE, argf_memsize, argf_mark_and_move}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static inline void diff --git a/io_buffer.c b/io_buffer.c index 3c7b3eb16a756b..684bb8e1c53717 100644 --- a/io_buffer.c +++ b/io_buffer.c @@ -332,7 +332,7 @@ static const rb_data_type_t rb_io_buffer_type = { .dcompact = rb_io_buffer_type_compact, }, .data = NULL, - .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; static inline enum rb_io_buffer_flags diff --git a/iseq.c b/iseq.c index 6f87b2df3e085b..2c4ecb1caedf4a 100644 --- a/iseq.c +++ b/iseq.c @@ -1606,7 +1606,7 @@ static const rb_data_type_t iseqw_data_type = { iseqw_memsize, iseqw_mark_and_move, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_WB_PROTECTED + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_WB_PROTECTED|RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE @@ -2846,7 +2846,7 @@ iseq_inspect(const rb_iseq_t *iseq) static const rb_data_type_t tmp_set = { "tmpset", {(void (*)(void *))rb_mark_set, (void (*)(void *))st_free_table, 0, 0,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE @@ -3324,7 +3324,7 @@ cdhash_each(VALUE key, VALUE value, VALUE ary) static const rb_data_type_t label_wrapper = { "label_wrapper", {(void (*)(void *))rb_mark_tbl, (void (*)(void *))st_free_table, 0, 0,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; #define DECL_ID(name) \ diff --git a/marshal.c b/marshal.c index 967855529e6d76..c592f7fe387b4e 100644 --- a/marshal.c +++ b/marshal.c @@ -237,7 +237,7 @@ memsize_dump_arg(const void *ptr) static const rb_data_type_t dump_arg_data = { "dump_arg", {mark_dump_arg, free_dump_arg, memsize_dump_arg,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE @@ -1317,7 +1317,7 @@ memsize_load_arg(const void *ptr) static const rb_data_type_t load_arg_data = { "load_arg", {mark_load_arg, free_load_arg, memsize_load_arg,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; #define r_entry(v, arg) r_entry0((v), (arg)->data->num_entries, (arg)) @@ -2626,7 +2626,7 @@ static const rb_data_type_t marshal_compat_type = { .dsize = marshal_compat_table_memsize, .dcompact = marshal_compat_table_mark_and_move, }, - .flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY, + .flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; static st_table * diff --git a/memory_view.c b/memory_view.c index 9f5d6715804b22..f360c6c88091de 100644 --- a/memory_view.c +++ b/memory_view.c @@ -65,7 +65,7 @@ const rb_data_type_t rb_memory_view_exported_object_registry_data_type = { exported_object_registry_free, 0, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static int @@ -124,7 +124,7 @@ static const rb_data_type_t memory_view_entry_data_type = { 0, 0, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; /* Register memory view functions for the given class */ diff --git a/proc.c b/proc.c index 99fb880881b9d8..1550b9ad8c5ec1 100644 --- a/proc.c +++ b/proc.c @@ -106,7 +106,7 @@ const rb_data_type_t ruby_proc_data_type = { proc_memsize, proc_mark_and_move, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE }; #define proc_data_type ruby_proc_data_type @@ -285,7 +285,7 @@ const rb_data_type_t ruby_binding_data_type = { binding_memsize, binding_mark_and_move, }, - 0, 0, RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; VALUE @@ -1795,7 +1795,7 @@ static const rb_data_type_t method_data_type = { NULL, // No external memory to report, bm_mark_and_move, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_FROZEN_SHAREABLE_NO_REC + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_FROZEN_SHAREABLE_NO_REC | RUBY_TYPED_CONCURRENT_FREE_SAFE }; VALUE diff --git a/process.c b/process.c index ac2ecfbbe62d71..be912be27cc754 100644 --- a/process.c +++ b/process.c @@ -597,7 +597,7 @@ static const rb_data_type_t rb_process_status_type = { .dfree = RUBY_DEFAULT_FREE, .dsize = NULL, }, - .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; static VALUE @@ -1740,7 +1740,7 @@ memsize_exec_arg(const void *ptr) static const rb_data_type_t exec_arg_data_type = { "exec_arg", {mark_exec_arg, RUBY_TYPED_DEFAULT_FREE, memsize_exec_arg}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; #ifdef _WIN32 diff --git a/ractor.c b/ractor.c index 4726cf107bfb03..3deef5f6719410 100644 --- a/ractor.c +++ b/ractor.c @@ -321,7 +321,7 @@ static const rb_data_type_t ractor_data_type = { ractor_memsize, NULL, // update }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY /* | RUBY_TYPED_WB_PROTECTED */ + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE /* | RUBY_TYPED_WB_PROTECTED */ }; bool @@ -2450,7 +2450,7 @@ static const rb_data_type_t cross_ractor_require_data_type = { NULL, // memsize NULL, // compact }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_DECL_MARKING | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_DECL_MARKING | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE diff --git a/ractor_sync.c b/ractor_sync.c index 44c84ded92696f..405a7f8248eb08 100644 --- a/ractor_sync.c +++ b/ractor_sync.c @@ -36,7 +36,7 @@ static const rb_data_type_t ractor_port_data_type = { NULL, // memsize NULL, // update }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_EMBEDDABLE, + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_EMBEDDABLE, }; static st_data_t diff --git a/random.c b/random.c index b6c96f1b4d25ff..6795165962fe86 100644 --- a/random.c +++ b/random.c @@ -272,7 +272,7 @@ const rb_data_type_t rb_random_data_type = { random_free, random_memsize, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE }; #define random_mt_mark rb_random_mark @@ -293,7 +293,7 @@ static const rb_data_type_t random_mt_type = { }, &rb_random_data_type, (void *)&random_mt_if, - RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED + RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static rb_random_t * @@ -578,7 +578,7 @@ release_crypt(void *p) static const rb_data_type_t crypt_prov_type = { "HCRYPTPROV", {0, release_crypt,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static int diff --git a/ruby_parser.c b/ruby_parser.c index 267f619bf9cd18..d58d69de535f59 100644 --- a/ruby_parser.c +++ b/ruby_parser.c @@ -508,7 +508,7 @@ static const rb_data_type_t ruby_parser_data_type = { parser_free, parser_memsize, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; #ifdef UNIVERSAL_PARSER @@ -736,7 +736,7 @@ static const rb_data_type_t ast_data_type = { ast_free, NULL, // No dsize() because this object does not appear in ObjectSpace. }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE diff --git a/scheduler.c b/scheduler.c index c2f370a22aee4e..d542702d45b86d 100644 --- a/scheduler.c +++ b/scheduler.c @@ -90,7 +90,7 @@ static const rb_data_type_t blocking_operation_data_type = { RUBY_DEFAULT_FREE, blocking_operation_memsize, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE }; /* diff --git a/set.c b/set.c index 6bfded02a414ee..fc826aa5f6eeee 100644 --- a/set.c +++ b/set.c @@ -186,7 +186,7 @@ static const rb_data_type_t set_data_type = { .dsize = set_size, .dcompact = set_update_references, }, - .flags = RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE + .flags = RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static inline set_table * diff --git a/shape.c b/shape.c index 90036722f10026..bd9c2fc089c3b3 100644 --- a/shape.c +++ b/shape.c @@ -322,7 +322,7 @@ static const rb_data_type_t shape_tree_type = { .dsize = shape_tree_memsize, .dcompact = shape_tree_mark_and_move, }, - .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; diff --git a/string.c b/string.c index 55a229f37c3b5c..6f69c7472223e0 100644 --- a/string.c +++ b/string.c @@ -7835,7 +7835,7 @@ mapping_buffer_free(void *p) static const rb_data_type_t mapping_buffer_type = { "mapping_buffer", {0, mapping_buffer_free,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE diff --git a/thread.c b/thread.c index 94432809e39da9..444cd14d955e3b 100644 --- a/thread.c +++ b/thread.c @@ -5092,7 +5092,7 @@ static const rb_data_type_t thgroup_data_type = { RUBY_TYPED_DEFAULT_FREE, NULL, // No external memory to report }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; /* @@ -5261,7 +5261,7 @@ thread_shield_mark(void *ptr) static const rb_data_type_t thread_shield_data_type = { "thread_shield", {thread_shield_mark, 0, 0,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE diff --git a/thread_sync.c b/thread_sync.c index 4d39b924e462d3..1ee77b6aeeae0f 100644 --- a/thread_sync.c +++ b/thread_sync.c @@ -168,7 +168,7 @@ mutex_memsize(const void *ptr) static const rb_data_type_t mutex_data_type = { "mutex", {NULL, mutex_free, mutex_memsize,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static rb_mutex_t * @@ -789,7 +789,7 @@ static const rb_data_type_t queue_data_type = { .dsize = queue_memsize, .dcompact = queue_mark_and_move, }, - .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; static VALUE @@ -895,7 +895,7 @@ static const rb_data_type_t szqueue_data_type = { .dcompact = szqueue_mark_and_move, }, .parent = &queue_data_type, - .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; static VALUE @@ -1235,7 +1235,7 @@ condvar_memsize(const void *ptr) static const rb_data_type_t cv_data_type = { "condvar", {0, RUBY_TYPED_DEFAULT_FREE, condvar_memsize,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_WB_PROTECTED + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_WB_PROTECTED|RUBY_TYPED_CONCURRENT_FREE_SAFE }; static struct rb_condvar * diff --git a/time.c b/time.c index c3bda3f6af0472..261437a747a2f7 100644 --- a/time.c +++ b/time.c @@ -1909,7 +1909,7 @@ static const rb_data_type_t time_data_type = { .dsize = NULL, .dcompact = time_mark_and_move, }, - .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; static VALUE diff --git a/transcode.c b/transcode.c index f8b0fec42ef275..ede9002d7d8152 100644 --- a/transcode.c +++ b/transcode.c @@ -3019,7 +3019,7 @@ econv_memsize(const void *ptr) static const rb_data_type_t econv_data_type = { "econv", {0, econv_free, econv_memsize,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE diff --git a/variable.c b/variable.c index 96f38da135d36f..35eb86443a9d75 100644 --- a/variable.c +++ b/variable.c @@ -2736,7 +2736,7 @@ autoload_table_compact(void *ptr) static const rb_data_type_t autoload_table_type = { "autoload_table", {autoload_table_mark, autoload_table_free, autoload_table_memsize, autoload_table_compact,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE }; #define check_autoload_table(av) \ @@ -2850,7 +2850,7 @@ autoload_data_memsize(const void *ptr) static const rb_data_type_t autoload_data_type = { "autoload_data", {autoload_data_mark_and_move, autoload_data_free, autoload_data_memsize, autoload_data_mark_and_move}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static void @@ -2888,7 +2888,7 @@ autoload_const_free(void *ptr) static const rb_data_type_t autoload_const_type = { "autoload_const", {autoload_const_mark_and_move, autoload_const_free, autoload_const_memsize, autoload_const_mark_and_move,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static struct autoload_data * diff --git a/vm.c b/vm.c index c88137366b31a2..0eabdfeaa5f1df 100644 --- a/vm.c +++ b/vm.c @@ -3561,7 +3561,7 @@ vm_memsize(const void *ptr) const rb_data_type_t ruby_vm_data_type = { "VM", {0, 0, vm_memsize,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; #define vm_data_type ruby_vm_data_type @@ -3899,7 +3899,7 @@ const rb_data_type_t ruby_threadptr_data_type = { thread_memsize, thread_compact, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; VALUE @@ -4726,7 +4726,7 @@ static const rb_data_type_t pin_array_list_type = { .dsize = pin_array_list_memsize, .dcompact = pin_array_list_update_references, }, - .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; static VALUE diff --git a/vm_backtrace.c b/vm_backtrace.c index c0bc46b8caf5c7..35faedc6e487e7 100644 --- a/vm_backtrace.c +++ b/vm_backtrace.c @@ -157,7 +157,7 @@ static const rb_data_type_t location_data_type = { NULL, // No external memory to report, location_ref_update, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; int @@ -567,7 +567,7 @@ static const rb_data_type_t backtrace_data_type = { /* Cannot set the RUBY_TYPED_EMBEDDABLE flag because the loc of frame_info * points elements in the backtrace array. This can cause the loc to become * incorrect if this backtrace object is moved by compaction. */ - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_CONCURRENT_FREE_SAFE }; int diff --git a/vm_method.c b/vm_method.c index 021b06bf00109b..03038ef688eef9 100644 --- a/vm_method.c +++ b/vm_method.c @@ -135,7 +135,7 @@ static const rb_data_type_t cc_table_type = { .dcompact = vm_cc_table_compact, }, .parent = &rb_managed_id_table_type, - .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE, }; VALUE diff --git a/vm_trace.c b/vm_trace.c index 42b9991e7141bc..5457cc4627a8e5 100644 --- a/vm_trace.c +++ b/vm_trace.c @@ -905,7 +905,7 @@ static const rb_data_type_t tp_data_type = { RUBY_TYPED_DEFAULT_FREE, NULL, // Nothing allocated externally, so don't need a memsize function }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static VALUE diff --git a/weakmap.c b/weakmap.c index 7cef1fd46a63a7..256d0887655a03 100644 --- a/weakmap.c +++ b/weakmap.c @@ -141,7 +141,7 @@ const rb_data_type_t rb_weakmap_type = { wmap_compact, wmap_handle_weak_references, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static int @@ -627,7 +627,7 @@ static const rb_data_type_t rb_weakkeymap_type = { wkmap_compact, wkmap_handle_weak_references, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE }; static int From 4de7e589f7d008d89b379e2e2032416321932dde Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Mon, 16 Mar 2026 14:36:01 -0400 Subject: [PATCH 08/67] Don't free in sweep thread for typeddata that aren't concur free safe --- gc/default/default.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index e7d1791affd04e..31f0c0bde56ee5 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -4384,7 +4384,7 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p bool free_immediately = false; void (*dfree)(void *); if (RTYPEDDATA_P(vp)) { - free_immediately = (RTYPEDDATA_TYPE(vp)->flags & RUBY_TYPED_FREE_IMMEDIATELY) != 0; + free_immediately = (RTYPEDDATA_TYPE(vp)->flags & RUBY_TYPED_FREE_IMMEDIATELY) != 0 && (RTYPEDDATA_TYPE(vp)->flags & RUBY_TYPED_CONCURRENT_FREE_SAFE) != 0; dfree = RTYPEDDATA_TYPE(vp)->function.dfree; } else { @@ -4400,13 +4400,8 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p } } else { - if (rb_gc_obj_has_blacklisted_vm_weak_references(vp)) { - sweep_in_ruby_thread(objspace, page, vp, false); - break; - } - else { - goto free; - } + sweep_in_ruby_thread(objspace, page, vp, false); + break; } break; } @@ -5258,7 +5253,7 @@ gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) GC_ASSERT(!sweep_page->deferred_freelist); } else { sweep_page->free_slots = free_slots; - sweep_page->final_slots += deferred_free_final_slots; + // NOTE: sweep_page->final slots have already been updated by make_zombie GC_ASSERT(sweep_page->free_slots <= sweep_page->total_slots); GC_ASSERT(sweep_page->final_slots <= sweep_page->total_slots); sweep_page->heap->total_freed_objects += ctx.freed_slots; From 75b792df1c9b2d198d19e844994343510f917877 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Mon, 16 Mar 2026 15:29:37 -0400 Subject: [PATCH 09/67] Add a few concurrent free safe flags to ext typeddatas --- ext/date/date_core.c | 2 +- ext/digest/digest.c | 2 +- ext/socket/raddrinfo.c | 2 +- ext/stringio/stringio.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ext/date/date_core.c b/ext/date/date_core.c index f37c1a54e5f53e..f85dc3083a61be 100644 --- a/ext/date/date_core.c +++ b/ext/date/date_core.c @@ -3222,7 +3222,7 @@ static const rb_data_type_t d_lite_type = { "Date", {d_lite_gc_mark, RUBY_TYPED_DEFAULT_FREE, d_lite_memsize,}, 0, 0, - RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_WB_PROTECTED|RUBY_TYPED_FROZEN_SHAREABLE, + RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_CONCURRENT_FREE_SAFE|RUBY_TYPED_WB_PROTECTED|RUBY_TYPED_FROZEN_SHAREABLE, }; inline static VALUE diff --git a/ext/digest/digest.c b/ext/digest/digest.c index bd8d3e815ffe6a..e54f0d7bda8e7c 100644 --- a/ext/digest/digest.c +++ b/ext/digest/digest.c @@ -619,7 +619,7 @@ static const rb_data_type_t digest_type = { "digest", {0, RUBY_TYPED_DEFAULT_FREE, 0,}, 0, 0, - (RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_WB_PROTECTED), + (RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_CONCURRENT_FREE_SAFE|RUBY_TYPED_WB_PROTECTED), }; static inline void diff --git a/ext/socket/raddrinfo.c b/ext/socket/raddrinfo.c index 6cdf5c6abc40e7..53a4e7f4564c11 100644 --- a/ext/socket/raddrinfo.c +++ b/ext/socket/raddrinfo.c @@ -1295,7 +1295,7 @@ addrinfo_memsize(const void *ptr) static const rb_data_type_t addrinfo_type = { "socket/addrinfo", {addrinfo_mark, addrinfo_free, addrinfo_memsize,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_WB_PROTECTED, + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_WB_PROTECTED, }; static VALUE diff --git a/ext/stringio/stringio.c b/ext/stringio/stringio.c index 09757a283eaf7c..fdb7f0e6550e14 100644 --- a/ext/stringio/stringio.c +++ b/ext/stringio/stringio.c @@ -119,7 +119,7 @@ static const rb_data_type_t strio_data_type = { strio_free, strio_memsize, }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED // uses reference count, not concurrent free safe }; #define check_strio(self) ((struct StringIO*)rb_check_typeddata((self), &strio_data_type)) From 4fbf5611985a00b1cbb73ef3af138b902923b25e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Barri=C3=A9?= Date: Wed, 18 Mar 2026 17:05:54 +0100 Subject: [PATCH 10/67] Fix warnings --- gc/default/default.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index 31f0c0bde56ee5..f2d5ee8c08acc1 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -5136,8 +5136,8 @@ is_last_heap(rb_objspace_t *objspace, rb_heap_t *heap) static int gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) { - int swept_slots = 0; - int pooled_slots = 0; + size_t swept_slots = 0; + size_t pooled_slots = 0; if (heap->pre_swept_slots_nodeferred >= GC_INCREMENTAL_SWEEP_POOL_SLOT_COUNT) { swept_slots = heap->pre_swept_slots_nodeferred - GC_INCREMENTAL_SWEEP_POOL_SLOT_COUNT; } From 04e2ceeee6a9f7c5080cd1cb643ccd069f2f5a2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Barri=C3=A9?= Date: Wed, 25 Mar 2026 14:55:43 +0100 Subject: [PATCH 11/67] Add GC.stat about pages swept by sweep thread --- gc/default/default.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/gc/default/default.c b/gc/default/default.c index f2d5ee8c08acc1..de6dd1380cfcb8 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -663,6 +663,8 @@ typedef struct rb_objspace { struct timespec ruby_thread_sweep_cpu_start_time; struct timespec ruby_thread_sweep_wall_start_time; #endif + size_t pages_swept_by_sweep_thread; + size_t pages_swept_by_sweep_thread_had_deferred_free_objects; /* Weak references */ size_t weak_references_count; @@ -4519,7 +4521,10 @@ gc_pre_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *pa } p += BITS_BITLENGTH * slot_size; } - + objspace->profile.pages_swept_by_sweep_thread++; + if (page->pre_deferred_free_slots > 0) { + objspace->profile.pages_swept_by_sweep_thread_had_deferred_free_objects++; + } psweep_debug(1, "[sweep] gc_pre_sweep_page(heap:%p page:%p) done, deferred free:%d\n", heap, page, page->pre_deferred_free_slots); } @@ -9062,6 +9067,8 @@ enum gc_stat_sym { gc_stat_sym_remembered_wb_unprotected_objects_limit, gc_stat_sym_old_objects, gc_stat_sym_old_objects_limit, + gc_stat_sym_pages_swept_by_sweep_thread, + gc_stat_sym_pages_swept_by_sweep_thread_had_deferred_free_objects, #if RGENGC_ESTIMATE_OLDMALLOC gc_stat_sym_oldmalloc_increase_bytes, gc_stat_sym_oldmalloc_increase_bytes_limit, @@ -9112,6 +9119,8 @@ setup_gc_stat_symbols(void) S(remembered_wb_unprotected_objects_limit); S(old_objects); S(old_objects_limit); + S(pages_swept_by_sweep_thread); + S(pages_swept_by_sweep_thread_had_deferred_free_objects); #if RGENGC_ESTIMATE_OLDMALLOC S(oldmalloc_increase_bytes); S(oldmalloc_increase_bytes_limit); @@ -9193,6 +9202,8 @@ rb_gc_impl_stat(void *objspace_ptr, VALUE hash_or_sym) SET(remembered_wb_unprotected_objects_limit, objspace->rgengc.uncollectible_wb_unprotected_objects_limit); SET(old_objects, objspace->rgengc.old_objects); SET(old_objects_limit, objspace->rgengc.old_objects_limit); + SET(pages_swept_by_sweep_thread, objspace->profile.pages_swept_by_sweep_thread); + SET(pages_swept_by_sweep_thread_had_deferred_free_objects, objspace->profile.pages_swept_by_sweep_thread_had_deferred_free_objects); #if RGENGC_ESTIMATE_OLDMALLOC SET(oldmalloc_increase_bytes, objspace->malloc_counters.oldmalloc_increase); SET(oldmalloc_increase_bytes_limit, objspace->rgengc.oldmalloc_increase_limit); From 7395b9e2ba7ddb858af4a845c3611e55533ec3ac Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 26 Mar 2026 09:33:42 -0400 Subject: [PATCH 12/67] Turn off background sweep page bookkeeping --- gc/default/default.c | 66 -------------------------------------------- 1 file changed, 66 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index de6dd1380cfcb8..48af60fd4ebada 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -505,7 +505,6 @@ typedef struct rb_heap_struct { rb_atomic_t background_sweep_steps; // only incremented/checked by sweep thread rb_nativethread_cond_t sweep_page_cond; // associated with global sweep lock rb_nativethread_lock_t swept_pages_lock; - size_t pre_swept_slots_nodeferred; size_t pre_swept_slots_deferred; deferred_sweep_data_t deferred_sweep_data; bool is_finished_sweeping; @@ -4653,63 +4652,6 @@ gc_sweep_step_worker(rb_objspace_t *objspace, rb_heap_t *heap) int pre_empty_slots = sweep_page->pre_empty_slots; int free_slots = pre_freed_slots + pre_empty_slots; - if (objspace->background_sweep_mode && sweep_page->pre_deferred_free_slots == 0) { - if (free_slots == sweep_page->total_slots) { - GC_ASSERT(sweep_page->total_slots > 0); - psweep_debug(-6, "[sweep] (bg) gc_sweep_step_worker: heap %ld adding to empty_pages:%p (pre_empty:%d, pre_freed:%d)\n", - heap - heaps, sweep_page, sweep_page->pre_empty_slots, sweep_page->pre_freed_slots); - // We're guaranteed to stay in background mode during this (starting GC requires taking the - // sweep_lock to change sweep background mode to false) - GC_ASSERT(sweep_page->pre_final_slots == 0); - clear_pre_sweep_fields(sweep_page); - gc_post_sweep_page(objspace, heap, sweep_page, true); - move_to_empty_pages(objspace, heap, sweep_page); - continue; - } - else if (free_slots > 0) { - // These are just for statistics, not used in calculations - heap->freed_slots += sweep_page->pre_freed_slots; - heap->empty_slots += sweep_page->pre_empty_slots; - - sweep_page->free_slots = free_slots; - sweep_page->heap->total_freed_objects += sweep_page->pre_freed_slots; - clear_pre_sweep_fields(sweep_page); - gc_post_sweep_page(objspace, heap, sweep_page, false); - if (sweep_page->deferred_freelist) { - merge_freelists(sweep_page->deferred_freelist, sweep_page->freelist); - sweep_page->freelist = sweep_page->deferred_freelist; - } - sweep_page->deferred_freelist = NULL; - if (heap->pre_swept_slots_nodeferred < GC_INCREMENTAL_SWEEP_POOL_SLOT_COUNT) { - psweep_debug(-6, "[sweep] (bg) gc_sweep_step_worker: heap %ld adding to pooled pages:%p (pre_empty:%d, pre_freed:%d, pre_swept:%lu->%lu)\n", - heap - heaps, sweep_page, pre_empty_slots, pre_freed_slots, heap->pre_swept_slots_nodeferred, - heap->pre_swept_slots_nodeferred + free_slots); - heap->pre_swept_slots_nodeferred += free_slots; - heap_add_poolpage(objspace, heap, sweep_page); - continue; - } - else { - psweep_debug(-6, "[sweep] (bg) gc_sweep_step_worker: heap %ld adding to free pages:%p (pre_empty:%d, pre_freed:%d, pre_swept:%lu->%lu)\n", - heap - heaps, sweep_page, pre_empty_slots, pre_freed_slots, heap->pre_swept_slots_nodeferred, - heap->pre_swept_slots_nodeferred + free_slots); - heap_add_freepage(heap, sweep_page, "gc_sweep_step_worker"); - heap->pre_swept_slots_nodeferred += free_slots; - if (heap->pre_swept_slots_nodeferred > (GC_INCREMENTAL_SWEEP_SLOT_COUNT + GC_INCREMENTAL_SWEEP_POOL_SLOT_COUNT)) { - heap->pre_swept_slots_nodeferred = 0; - /*break;*/ - } - continue; - } - } - else { - // Don't even add to `swept_pages`, no further processing needed by ruby thread (no free slots) - clear_pre_sweep_fields(sweep_page); - gc_post_sweep_page(objspace, heap, sweep_page, false); - continue; - } - } - - #if PSWEEP_LOCK_STATS > 0 instrumented_lock_acquire(&heap->swept_pages_lock, &swept_pages_lock_stats); #else @@ -4820,7 +4762,6 @@ gc_sweep_start_heap(rb_objspace_t *objspace, rb_heap_t *heap) heap->swept_pages = NULL; heap->pooled_pages = NULL; heap->latest_swept_page = NULL; - heap->pre_swept_slots_nodeferred = 0; heap->pre_swept_slots_deferred = 0; heap->pre_sweeping_page = NULL; @@ -5143,13 +5084,6 @@ gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) { size_t swept_slots = 0; size_t pooled_slots = 0; - if (heap->pre_swept_slots_nodeferred >= GC_INCREMENTAL_SWEEP_POOL_SLOT_COUNT) { - swept_slots = heap->pre_swept_slots_nodeferred - GC_INCREMENTAL_SWEEP_POOL_SLOT_COUNT; - } - else if (heap->pre_swept_slots_nodeferred > 0) { - pooled_slots = heap->pre_swept_slots_nodeferred; - } - heap->pre_swept_slots_nodeferred = 0; #if VM_CHECK_MODE > 0 sweep_lock_lock(&objspace->sweep_lock); From d2f95d65c8d75ed62ff073c0d1196205aebfb194 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 26 Mar 2026 09:46:12 -0400 Subject: [PATCH 13/67] Update concurrent set for concurrent deletions --- concurrent_set.c | 526 +++++++++++++++++++++++++++++--------- internal/concurrent_set.h | 4 +- string.c | 4 +- symbol.c | 4 +- 4 files changed, 417 insertions(+), 121 deletions(-) diff --git a/concurrent_set.c b/concurrent_set.c index d6c1457e3822f8..1b630ad38ccdf5 100644 --- a/concurrent_set.c +++ b/concurrent_set.c @@ -4,14 +4,21 @@ #include "ruby/atomic.h" #include "vm_sync.h" -#define CONCURRENT_SET_CONTINUATION_BIT ((VALUE)1 << (sizeof(VALUE) * CHAR_BIT - 1)) -#define CONCURRENT_SET_HASH_MASK (~CONCURRENT_SET_CONTINUATION_BIT) +// insertion probes have gone past this slot +#define CONCURRENT_SET_CONTINUATION_BIT ((VALUE)0x2) +#define CONCURRENT_SET_KEY_MASK (~CONCURRENT_SET_CONTINUATION_BIT) +// This slot's hash can be reclaimed if and only if the key is EMPTY and it doesn't have a continuation bit. If the key is something +// else, this bit on the hash has no meaning and is ignored. +#define CONCURRENT_SET_HASH_RECLAIMABLE_BIT ((VALUE)1 << (sizeof(VALUE) * CHAR_BIT - 1)) +#define CONCURRENT_SET_HASH_MASK (~CONCURRENT_SET_HASH_RECLAIMABLE_BIT) + +#define CONCURRENT_SET_DEBUG 0 enum concurrent_set_special_values { - CONCURRENT_SET_EMPTY, - CONCURRENT_SET_DELETED, - CONCURRENT_SET_MOVED, - CONCURRENT_SET_SPECIAL_VALUE_COUNT + CONCURRENT_SET_EMPTY = 0, + CONCURRENT_SET_TOMBSTONE = 1, + CONCURRENT_SET_MOVED = 5, // continuation bit is 0x02, so 0x05 doesn't have bits in conflict with it + CONCURRENT_SET_SPECIAL_VALUE_COUNT = 6 }; struct concurrent_set_entry { @@ -22,26 +29,39 @@ struct concurrent_set_entry { struct concurrent_set { rb_atomic_t size; unsigned int capacity; - unsigned int deleted_entries; + rb_atomic_t deleted_entries; const struct rb_concurrent_set_funcs *funcs; struct concurrent_set_entry *entries; + int key_type; +#if CONCURRENT_SET_DEBUG + rb_atomic_t find_count; + rb_atomic_t find_probe_total; + rb_atomic_t find_probe_max; + rb_atomic_t insert_count; + rb_atomic_t insert_probe_total; + rb_atomic_t insert_probe_max; +#endif }; -static void -concurrent_set_mark_continuation(struct concurrent_set_entry *entry, VALUE curr_hash_and_flags) +static bool +concurrent_set_mark_continuation(struct concurrent_set_entry *entry, VALUE raw_key) { - if (curr_hash_and_flags & CONCURRENT_SET_CONTINUATION_BIT) return; - - RUBY_ASSERT((curr_hash_and_flags & CONCURRENT_SET_HASH_MASK) != 0); + if (raw_key & CONCURRENT_SET_CONTINUATION_BIT) return true; - VALUE new_hash = curr_hash_and_flags | CONCURRENT_SET_CONTINUATION_BIT; - VALUE prev_hash = rbimpl_atomic_value_cas(&entry->hash, curr_hash_and_flags, new_hash, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_RELAXED); + VALUE new_key = raw_key | CONCURRENT_SET_CONTINUATION_BIT; // NOTE: raw_key can be CONCURRENT_SET_EMPTY + VALUE prev_key = rbimpl_atomic_value_cas(&entry->key, raw_key, new_key, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_ACQUIRE); - // At the moment we only expect to be racing concurrently against another - // thread also setting the continuation bit. - // In the future if deletion is concurrent this will need adjusting - RUBY_ASSERT(prev_hash == curr_hash_and_flags || prev_hash == new_hash); - (void)prev_hash; + if (prev_key == raw_key || prev_key == new_key) { + return true; + } + else if ((prev_key & CONCURRENT_SET_KEY_MASK) == CONCURRENT_SET_TOMBSTONE) { + return true; + } + else { + // * key could have been made EMPTY, and anything could have happened to this slot since then. Need to retry. + // * key could have been moved during resize + return false; + } } static VALUE @@ -49,11 +69,9 @@ concurrent_set_hash(const struct concurrent_set *set, VALUE key) { VALUE hash = set->funcs->hash(key); hash &= CONCURRENT_SET_HASH_MASK; - if (hash == 0) { - hash ^= CONCURRENT_SET_HASH_MASK; - } + if (hash == 0) hash = ~(VALUE)0 & CONCURRENT_SET_HASH_MASK; RUBY_ASSERT(hash != 0); - RUBY_ASSERT(!(hash & CONCURRENT_SET_CONTINUATION_BIT)); + RUBY_ASSERT(!(hash & CONCURRENT_SET_HASH_RECLAIMABLE_BIT)); return hash; } @@ -91,20 +109,31 @@ static const rb_data_type_t concurrent_set_type = { .dsize = concurrent_set_size, }, /* Hack: NOT WB_PROTECTED on purpose (see above) */ - .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_CONCURRENT_FREE_SAFE + /* NOTE: don't make embedded due to compaction */ + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE }; VALUE -rb_concurrent_set_new(const struct rb_concurrent_set_funcs *funcs, int capacity) +rb_concurrent_set_new(const struct rb_concurrent_set_funcs *funcs, int capacity, int key_type) { struct concurrent_set *set; VALUE obj = TypedData_Make_Struct(0, struct concurrent_set, &concurrent_set_type, set); set->funcs = funcs; set->entries = ZALLOC_N(struct concurrent_set_entry, capacity); set->capacity = capacity; + (void)key_type; +#if CONCURRENT_SET_DEBUG + set->key_type = key_type; +#endif return obj; } +void * +rb_concurrent_set_get_data(VALUE set_obj) +{ + return RTYPEDDATA_GET_DATA(set_obj); +} + rb_atomic_t rb_concurrent_set_size(VALUE set_obj) { @@ -113,6 +142,50 @@ rb_concurrent_set_size(VALUE set_obj) return RUBY_ATOMIC_LOAD(set->size); } +unsigned int +rb_concurrent_set_capacity(VALUE set_obj) +{ + struct concurrent_set *set = RTYPEDDATA_GET_DATA(set_obj); + + return set->capacity; +} + +void +rb_concurrent_set_probe_stats(VALUE set_obj, + rb_atomic_t *find_count, rb_atomic_t *find_probe_total, rb_atomic_t *find_probe_max, + rb_atomic_t *insert_count, rb_atomic_t *insert_probe_total, rb_atomic_t *insert_probe_max) +{ +#if CONCURRENT_SET_DEBUG + struct concurrent_set *set = RTYPEDDATA_GET_DATA(set_obj); + *find_count = RUBY_ATOMIC_LOAD(set->find_count); + *find_probe_total = RUBY_ATOMIC_LOAD(set->find_probe_total); + *find_probe_max = RUBY_ATOMIC_LOAD(set->find_probe_max); + *insert_count = RUBY_ATOMIC_LOAD(set->insert_count); + *insert_probe_total = RUBY_ATOMIC_LOAD(set->insert_probe_total); + *insert_probe_max = RUBY_ATOMIC_LOAD(set->insert_probe_max); +#else + *find_count = 0; + *find_probe_total = 0; + *find_probe_max = 0; + *insert_count = 0; + *insert_probe_total = 0; + *insert_probe_max = 0; +#endif +} + +#if CONCURRENT_SET_DEBUG +static void +concurrent_set_atomic_max(rb_atomic_t *target, rb_atomic_t val) +{ + rb_atomic_t cur = RUBY_ATOMIC_LOAD(*target); + while (val > cur) { + rb_atomic_t prev = rbimpl_atomic_cas(target, cur, val, RBIMPL_ATOMIC_RELAXED, RBIMPL_ATOMIC_RELAXED); + if (prev == cur) break; + cur = prev; + } +} +#endif + struct concurrent_set_probe { int idx; int d; @@ -138,45 +211,35 @@ concurrent_set_probe_next(struct concurrent_set_probe *probe) } static void -concurrent_set_try_resize_without_locking(VALUE old_set_obj, VALUE *set_obj_ptr) +concurrent_set_try_resize_locked(VALUE old_set_obj, VALUE *set_obj_ptr, VALUE new_set_obj, int old_capacity) { - // Check if another thread has already resized. - if (rbimpl_atomic_value_load(set_obj_ptr, RBIMPL_ATOMIC_ACQUIRE) != old_set_obj) { - return; - } - struct concurrent_set *old_set = RTYPEDDATA_GET_DATA(old_set_obj); - - // This may overcount by up to the number of threads concurrently attempting to insert - // GC may also happen between now and the set being rebuilt - int expected_size = rbimpl_atomic_load(&old_set->size, RBIMPL_ATOMIC_RELAXED) - old_set->deleted_entries; - - // NOTE: new capacity must make sense with load factor, don't change one without checking the other. struct concurrent_set_entry *old_entries = old_set->entries; - int old_capacity = old_set->capacity; - int new_capacity = old_capacity * 2; - if (new_capacity > expected_size * 8) { - new_capacity = old_capacity / 2; - } - else if (new_capacity > expected_size * 4) { - new_capacity = old_capacity; - } - - // May cause GC and therefore deletes, so must happen first. - VALUE new_set_obj = rb_concurrent_set_new(old_set->funcs, new_capacity); struct concurrent_set *new_set = RTYPEDDATA_GET_DATA(new_set_obj); for (int i = 0; i < old_capacity; i++) { struct concurrent_set_entry *old_entry = &old_entries[i]; - VALUE key = rbimpl_atomic_value_exchange(&old_entry->key, CONCURRENT_SET_MOVED, RBIMPL_ATOMIC_ACQUIRE); - RUBY_ASSERT(key != CONCURRENT_SET_MOVED); + VALUE prev_key_raw = rbimpl_atomic_value_exchange(&old_entry->key, CONCURRENT_SET_MOVED, RBIMPL_ATOMIC_ACQUIRE); + VALUE prev_key = prev_key_raw & CONCURRENT_SET_KEY_MASK; + RUBY_ASSERT(prev_key != CONCURRENT_SET_MOVED); - if (key < CONCURRENT_SET_SPECIAL_VALUE_COUNT) continue; - if (!RB_SPECIAL_CONST_P(key) && rb_objspace_garbage_object_p(key)) continue; + if (prev_key < CONCURRENT_SET_SPECIAL_VALUE_COUNT) continue; - VALUE hash = rbimpl_atomic_value_load(&old_entry->hash, RBIMPL_ATOMIC_RELAXED) & CONCURRENT_SET_HASH_MASK; - RUBY_ASSERT(hash != 0); - RUBY_ASSERT(hash == concurrent_set_hash(old_set, key)); + if (!RB_SPECIAL_CONST_P(prev_key) && rb_objspace_garbage_object_p(prev_key)) continue; + +#if CONCURRENT_SET_DEBUG + if (new_set->key_type == T_STRING) { + RUBY_ASSERT(BUILTIN_TYPE(prev_key) == T_STRING); + RUBY_ASSERT(FL_TEST(prev_key, RSTRING_FSTR)); + } + else { + RUBY_ASSERT(STATIC_SYM_P(prev_key)); + } +#endif + + VALUE hash = rbimpl_atomic_value_load(&old_entry->hash, RBIMPL_ATOMIC_ACQUIRE) & CONCURRENT_SET_HASH_MASK; + if (hash == 0) continue; + RUBY_ASSERT(concurrent_set_hash(old_set, prev_key) == hash); // Insert key into new_set. struct concurrent_set_probe probe; @@ -185,19 +248,19 @@ concurrent_set_try_resize_without_locking(VALUE old_set_obj, VALUE *set_obj_ptr) while (true) { struct concurrent_set_entry *entry = &new_set->entries[idx]; - if (entry->hash == CONCURRENT_SET_EMPTY) { + if (entry->hash == 0) { RUBY_ASSERT(entry->key == CONCURRENT_SET_EMPTY); new_set->size++; RUBY_ASSERT(new_set->size <= new_set->capacity / 2); - entry->key = key; + entry->key = prev_key; // no continuation bit entry->hash = hash; break; } RUBY_ASSERT(entry->key >= CONCURRENT_SET_SPECIAL_VALUE_COUNT); - entry->hash |= CONCURRENT_SET_CONTINUATION_BIT; + entry->key |= CONCURRENT_SET_CONTINUATION_BIT; idx = concurrent_set_probe_next(&probe); } } @@ -207,12 +270,101 @@ concurrent_set_try_resize_without_locking(VALUE old_set_obj, VALUE *set_obj_ptr) RB_GC_GUARD(old_set_obj); } +// FIXME: cross-platform initializer. Also, we don't need rwlock anymore, just normal mutex will do +static pthread_rwlock_t resize_lock = PTHREAD_RWLOCK_INITIALIZER; +static pthread_t resize_lock_owner; +static unsigned int resize_lock_lvl; + +static inline void +resize_lock_wrlock(bool allow_reentry) +{ + if (allow_reentry && pthread_self() == resize_lock_owner) { + // Already held by this thread. + } + else { + int r; + if ((r = pthread_rwlock_wrlock(&resize_lock))) { + rb_bug_errno("pthread_rwlock_wrlock", r); + } + resize_lock_owner = pthread_self(); + } + resize_lock_lvl++; +} + +static inline void +resize_lock_wrunlock(void) +{ + RUBY_ASSERT(resize_lock_lvl > 0); + resize_lock_lvl--; + if (resize_lock_lvl == 0) { + resize_lock_owner = 0; + int r; + if ((r = pthread_rwlock_unlock(&resize_lock))) { + rb_bug_errno("pthread_rwlock_unlock", r); + } + } +} + +static inline bool +resize_lock_rdlock(void) +{ + if (resize_lock_owner == pthread_self()) { // we have the write lock, don't take it + return false; + } + int r; + if ((r = pthread_rwlock_rdlock(&resize_lock))) { + rb_bug_errno("pthread_rwlock_rdlock", r); + } + return true; +} + +static inline void +resize_lock_rdunlock(void) +{ + int r; + if ((r = pthread_rwlock_unlock(&resize_lock))) { + rb_bug_errno("pthread_rwlock_unlock", r); + } +} + static void concurrent_set_try_resize(VALUE old_set_obj, VALUE *set_obj_ptr) { - RB_VM_LOCKING() { - concurrent_set_try_resize_without_locking(old_set_obj, set_obj_ptr); + unsigned int lev; + RB_VM_LOCK_ENTER_LEV(&lev); + { + // Check if another thread has already resized. + if (rbimpl_atomic_value_load(set_obj_ptr, RBIMPL_ATOMIC_ACQUIRE) != old_set_obj) { + RB_VM_LOCK_LEAVE_LEV(&lev); + return; + } + struct concurrent_set *old_set = RTYPEDDATA_GET_DATA(old_set_obj); + + // This may overcount by up to the number of threads concurrently attempting to insert + // GC may also happen between now and the set being rebuilt + int expected_size = rbimpl_atomic_load(&old_set->size, RBIMPL_ATOMIC_RELAXED) - old_set->deleted_entries; + + // NOTE: new capacity must make sense with load factor, don't change one without checking the other. + int old_capacity = old_set->capacity; + int new_capacity = old_capacity * 2; + if (new_capacity > expected_size * 8) { + new_capacity = old_capacity / 2; + } + else if (new_capacity > expected_size * 4) { + new_capacity = old_capacity; + } + + // May cause GC and therefore deletes, so must happen first. + VALUE new_set_obj = rb_concurrent_set_new(old_set->funcs, new_capacity, old_set->key_type); + /*fprintf(stderr, "concurrent set resize from %d to %d\n", old_capacity, new_capacity);*/ + // deletes from sweep thread must not happen during resize and sweep thread can't take VM lock so it takes the resize lock + resize_lock_wrlock(true); + { + concurrent_set_try_resize_locked(old_set_obj, set_obj_ptr, new_set_obj, old_capacity); + } + resize_lock_wrunlock(); } + RB_VM_LOCK_LEAVE_LEV(&lev); } VALUE @@ -242,29 +394,39 @@ rb_concurrent_set_find(VALUE *set_obj_ptr, VALUE key) while (true) { struct concurrent_set_entry *entry = &set->entries[idx]; - VALUE curr_hash_and_flags = rbimpl_atomic_value_load(&entry->hash, RBIMPL_ATOMIC_ACQUIRE); - VALUE curr_hash = curr_hash_and_flags & CONCURRENT_SET_HASH_MASK; - bool continuation = curr_hash_and_flags & CONCURRENT_SET_CONTINUATION_BIT; - - if (curr_hash_and_flags == CONCURRENT_SET_EMPTY) { + VALUE curr_hash = rbimpl_atomic_value_load(&entry->hash, RBIMPL_ATOMIC_ACQUIRE) & CONCURRENT_SET_HASH_MASK; + + if (curr_hash == 0) { +#if CONCURRENT_SET_DEBUG + rbimpl_atomic_fetch_add(&set->find_count, 1, RBIMPL_ATOMIC_RELAXED); + rbimpl_atomic_fetch_add(&set->find_probe_total, probe.d, RBIMPL_ATOMIC_RELAXED); + concurrent_set_atomic_max(&set->find_probe_max, probe.d); +#endif return 0; } + VALUE raw_key = rbimpl_atomic_value_load(&entry->key, RBIMPL_ATOMIC_ACQUIRE); + VALUE curr_key = raw_key & CONCURRENT_SET_KEY_MASK; + bool continuation = raw_key & CONCURRENT_SET_CONTINUATION_BIT; + if (curr_hash != hash) { if (!continuation) { +#if CONCURRENT_SET_DEBUG + rbimpl_atomic_fetch_add(&set->find_count, 1, RBIMPL_ATOMIC_RELAXED); + rbimpl_atomic_fetch_add(&set->find_probe_total, probe.d, RBIMPL_ATOMIC_RELAXED); + concurrent_set_atomic_max(&set->find_probe_max, probe.d); +#endif return 0; } idx = concurrent_set_probe_next(&probe); continue; } - VALUE curr_key = rbimpl_atomic_value_load(&entry->key, RBIMPL_ATOMIC_ACQUIRE); - switch (curr_key) { case CONCURRENT_SET_EMPTY: - // In-progress insert: hash written but key not yet + // In-progress insert: hash written but key not yet. break; - case CONCURRENT_SET_DELETED: + case CONCURRENT_SET_TOMBSTONE: break; case CONCURRENT_SET_MOVED: // Wait @@ -280,11 +442,21 @@ rb_concurrent_set_find(VALUE *set_obj_ptr, VALUE key) if (set->funcs->cmp(key, curr_key)) { // We've found a match. +#if CONCURRENT_SET_DEBUG + rbimpl_atomic_fetch_add(&set->find_count, 1, RBIMPL_ATOMIC_RELAXED); + rbimpl_atomic_fetch_add(&set->find_probe_total, probe.d, RBIMPL_ATOMIC_RELAXED); + concurrent_set_atomic_max(&set->find_probe_max, probe.d); +#endif RB_GC_GUARD(set_obj); return curr_key; } if (!continuation) { +#if CONCURRENT_SET_DEBUG + rbimpl_atomic_fetch_add(&set->find_count, 1, RBIMPL_ATOMIC_RELAXED); + rbimpl_atomic_fetch_add(&set->find_probe_total, probe.d, RBIMPL_ATOMIC_RELAXED); + concurrent_set_atomic_max(&set->find_probe_max, probe.d); +#endif return 0; } @@ -312,7 +484,7 @@ rb_concurrent_set_find_or_insert(VALUE *set_obj_ptr, VALUE key, void *data) RUBY_ASSERT(set_obj); struct concurrent_set *set = RTYPEDDATA_GET_DATA(set_obj); - key = set->funcs->create(key, data); + key = set->funcs->create(key, data); // this can join GC (takes VM Lock) VALUE hash = concurrent_set_hash(set, key); struct concurrent_set_probe probe; @@ -333,33 +505,40 @@ rb_concurrent_set_find_or_insert(VALUE *set_obj_ptr, VALUE key, void *data) while (true) { struct concurrent_set_entry *entry = &set->entries[idx]; - VALUE curr_hash_and_flags = rbimpl_atomic_value_load(&entry->hash, RBIMPL_ATOMIC_ACQUIRE); - VALUE curr_hash = curr_hash_and_flags & CONCURRENT_SET_HASH_MASK; - bool continuation = curr_hash_and_flags & CONCURRENT_SET_CONTINUATION_BIT; - - if (curr_hash_and_flags == CONCURRENT_SET_EMPTY) { + bool can_continue_probing; + VALUE raw_hash = rbimpl_atomic_value_load(&entry->hash, RBIMPL_ATOMIC_ACQUIRE); + VALUE curr_hash = raw_hash & CONCURRENT_SET_HASH_MASK; + if (raw_hash == 0) { // Reserve this slot for our hash value - curr_hash_and_flags = rbimpl_atomic_value_cas(&entry->hash, CONCURRENT_SET_EMPTY, hash, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_RELAXED); - if (curr_hash_and_flags != CONCURRENT_SET_EMPTY) { + raw_hash = rbimpl_atomic_value_cas(&entry->hash, 0, hash, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_ACQUIRE); + if (raw_hash != 0) { // Lost race, retry same slot to check winner's hash continue; } - - // CAS succeeded, so these are the values stored - curr_hash_and_flags = hash; + raw_hash = hash; curr_hash = hash; - // Fall through to try to claim key } - if (curr_hash != hash) { - goto probe_next; - } - - VALUE curr_key = rbimpl_atomic_value_load(&entry->key, RBIMPL_ATOMIC_ACQUIRE); + VALUE raw_key = rbimpl_atomic_value_load(&entry->key, RBIMPL_ATOMIC_ACQUIRE); + VALUE curr_key = raw_key & CONCURRENT_SET_KEY_MASK; + bool continuation = raw_key & CONCURRENT_SET_CONTINUATION_BIT; switch (curr_key) { case CONCURRENT_SET_EMPTY: { + if ((raw_hash & CONCURRENT_SET_HASH_RECLAIMABLE_BIT) && !continuation) { + // Reclaim this reclaimable slot by clearing the reclaimable bit + VALUE prev_hash = rbimpl_atomic_value_cas(&entry->hash, raw_hash, hash, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_ACQUIRE); + if (prev_hash != raw_hash) { + // Lost race, retry same slot + continue; + } + curr_hash = hash; + raw_hash = hash; + } + if (curr_hash != hash) { + goto probe_next; + } rb_atomic_t prev_size = rbimpl_atomic_fetch_add(&set->size, 1, RBIMPL_ATOMIC_RELAXED); // Load_factor reached at 75% full. ex: prev_size: 32, capacity: 64, load_factor: 50%. @@ -370,9 +549,13 @@ rb_concurrent_set_find_or_insert(VALUE *set_obj_ptr, VALUE key, void *data) goto retry; } - VALUE prev_key = rbimpl_atomic_value_cas(&entry->key, CONCURRENT_SET_EMPTY, key, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_RELAXED); - if (prev_key == CONCURRENT_SET_EMPTY) { - RUBY_ASSERT(rb_concurrent_set_find(set_obj_ptr, key) == key); + VALUE prev_raw_key = rbimpl_atomic_value_cas(&entry->key, raw_key, key | (continuation ? CONCURRENT_SET_CONTINUATION_BIT : 0), RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_ACQUIRE); + if (prev_raw_key == raw_key) { +#if CONCURRENT_SET_DEBUG + rbimpl_atomic_fetch_add(&set->insert_count, 1, RBIMPL_ATOMIC_RELAXED); + rbimpl_atomic_fetch_add(&set->insert_probe_total, probe.d, RBIMPL_ATOMIC_RELAXED); + concurrent_set_atomic_max(&set->insert_probe_max, probe.d); +#endif RB_GC_GUARD(set_obj); return key; } @@ -380,31 +563,41 @@ rb_concurrent_set_find_or_insert(VALUE *set_obj_ptr, VALUE key, void *data) // Entry was not inserted. rbimpl_atomic_sub(&set->size, 1, RBIMPL_ATOMIC_RELAXED); - // Another thread won the race, try again at the same location. + // * Another thread with the same hash could have won the race, try again at the same location, we might find it. + // * A resize could also be underway, and `prev_raw_key` could be CONCURRENT_SET_MOVED. + // * The continuation bit could also have been set on the key just now, in which case we'll retry continue; } } - case CONCURRENT_SET_DELETED: + case CONCURRENT_SET_TOMBSTONE: break; case CONCURRENT_SET_MOVED: // Wait RB_VM_LOCKING(); goto retry; default: - // We're never GC during our search + // what about if hash is marked reclaimed but key is not cleared yet + if (curr_hash != hash) { + goto probe_next; + } // If the continuation bit wasn't set at the start of our search, - // any concurrent find with the same hash value would also look at + // any concurrent find_or_insert with the same hash value would also look at // this location and try to swap curr_key if (UNLIKELY(!RB_SPECIAL_CONST_P(curr_key) && rb_objspace_garbage_object_p(curr_key))) { if (continuation) { goto probe_next; } - rbimpl_atomic_value_cas(&entry->key, curr_key, CONCURRENT_SET_EMPTY, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_RELAXED); + rbimpl_atomic_value_cas(&entry->key, raw_key, CONCURRENT_SET_EMPTY, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_RELAXED); continue; } if (set->funcs->cmp(key, curr_key)) { // We've found a live match. +#if CONCURRENT_SET_DEBUG + rbimpl_atomic_fetch_add(&set->insert_count, 1, RBIMPL_ATOMIC_RELAXED); + rbimpl_atomic_fetch_add(&set->insert_probe_total, probe.d, RBIMPL_ATOMIC_RELAXED); + concurrent_set_atomic_max(&set->insert_probe_max, probe.d); +#endif RB_GC_GUARD(set_obj); // We created key using set->funcs->create, but we didn't end @@ -418,8 +611,10 @@ rb_concurrent_set_find_or_insert(VALUE *set_obj_ptr, VALUE key, void *data) } probe_next: - RUBY_ASSERT(curr_hash_and_flags != CONCURRENT_SET_EMPTY); - concurrent_set_mark_continuation(entry, curr_hash_and_flags); + can_continue_probing = concurrent_set_mark_continuation(entry, raw_key); + if (!can_continue_probing) { + continue; + } idx = concurrent_set_probe_next(&probe); } } @@ -429,22 +624,21 @@ concurrent_set_delete_entry_locked(struct concurrent_set *set, struct concurrent { ASSERT_vm_locking_with_barrier(); - if (entry->hash & CONCURRENT_SET_CONTINUATION_BIT) { - entry->hash = CONCURRENT_SET_CONTINUATION_BIT; - entry->key = CONCURRENT_SET_DELETED; + if (entry->key & CONCURRENT_SET_CONTINUATION_BIT) { + entry->key = CONCURRENT_SET_TOMBSTONE | CONCURRENT_SET_CONTINUATION_BIT; set->deleted_entries++; } else { - entry->hash = CONCURRENT_SET_EMPTY; + entry->hash = 0; entry->key = CONCURRENT_SET_EMPTY; set->size--; } } -VALUE -rb_concurrent_set_delete_by_identity(VALUE set_obj, VALUE key) + +static VALUE +rb_concurrent_set_delete_by_identity_locked(VALUE set_obj, VALUE key) { - ASSERT_vm_locking_with_barrier(); struct concurrent_set *set = RTYPEDDATA_GET_DATA(set_obj); @@ -452,25 +646,70 @@ rb_concurrent_set_delete_by_identity(VALUE set_obj, VALUE key) struct concurrent_set_probe probe; int idx = concurrent_set_probe_start(&probe, set, hash); + bool hash_cleared = false; + VALUE prev_hash = 0; while (true) { struct concurrent_set_entry *entry = &set->entries[idx]; - VALUE curr_key = entry->key; + VALUE raw_key = rbimpl_atomic_value_load(&entry->key, RBIMPL_ATOMIC_ACQUIRE); + VALUE loaded_hash_raw = rbimpl_atomic_value_load(&entry->hash, RBIMPL_ATOMIC_ACQUIRE); + VALUE loaded_hash = loaded_hash_raw & CONCURRENT_SET_HASH_MASK; + bool continuation = raw_key & CONCURRENT_SET_CONTINUATION_BIT; + VALUE curr_key = raw_key & CONCURRENT_SET_KEY_MASK; switch (curr_key) { case CONCURRENT_SET_EMPTY: - // We didn't find our entry to delete. - return 0; - case CONCURRENT_SET_DELETED: + if (!continuation) { + return 0; + } + break; + case CONCURRENT_SET_TOMBSTONE: break; case CONCURRENT_SET_MOVED: rb_bug("rb_concurrent_set_delete_by_identity: moved entry"); break; default: if (key == curr_key) { - RUBY_ASSERT((entry->hash & CONCURRENT_SET_HASH_MASK) == hash); - concurrent_set_delete_entry_locked(set, entry); - return curr_key; + VALUE new_key; + RUBY_ASSERT(hash_cleared || loaded_hash == hash); + if (continuation) { + new_key = CONCURRENT_SET_TOMBSTONE | CONCURRENT_SET_CONTINUATION_BIT; + } + else { + new_key = CONCURRENT_SET_EMPTY; + } + + if (!hash_cleared) { + // Hashes only change here and they get reclaimed in find_or_insert + prev_hash = rbimpl_atomic_value_cas(&entry->hash, loaded_hash_raw, hash | CONCURRENT_SET_HASH_RECLAIMABLE_BIT, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_ACQUIRE); + RUBY_ASSERT(prev_hash == hash || prev_hash == (hash | CONCURRENT_SET_HASH_RECLAIMABLE_BIT)); + hash_cleared = true; + } + VALUE prev_key = rbimpl_atomic_value_cas(&entry->key, raw_key, new_key, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_ACQUIRE); + if (prev_key == raw_key) { + if (continuation) { + rbimpl_atomic_add(&set->deleted_entries, 1, RBIMPL_ATOMIC_RELAXED); + } + else { + rbimpl_atomic_sub(&set->size, 1, RBIMPL_ATOMIC_RELAXED); + } + return curr_key; + } + else if (!continuation && prev_key == (raw_key | CONCURRENT_SET_CONTINUATION_BIT)) { + continue; // try again, the continuation bit was just set on this key so we can tombstone it + } + else if ((prev_key & CONCURRENT_SET_KEY_MASK) == CONCURRENT_SET_EMPTY || (prev_key & CONCURRENT_SET_KEY_MASK) == CONCURRENT_SET_TOMBSTONE) { + return curr_key; // the key was deleted by another thread + } + else { + // the key was changed to EMPTY by being garbage during find_or_insert and then a new key was put at the same slot. It's okay + // that the hash was marked reclaimable above. + RUBY_ASSERT(prev_hash != 0); + return curr_key; + } + } + else if (!continuation) { + return 0; } break; } @@ -479,8 +718,41 @@ rb_concurrent_set_delete_by_identity(VALUE set_obj, VALUE key) } } -void -rb_concurrent_set_foreach_with_replace(VALUE set_obj, int (*callback)(VALUE *key, void *data), void *data) +// This can be called concurrently by a ruby GC thread and the sweep thread. +VALUE +rb_concurrent_set_delete_by_identity(VALUE *set_obj_ptr, VALUE key) +{ + VALUE result; + bool is_sweep_thread_p(void); + + VALUE set_obj = rbimpl_atomic_value_load(set_obj_ptr, RBIMPL_ATOMIC_ACQUIRE); + + if (is_sweep_thread_p()) { + while (1) { + bool lock_taken = resize_lock_rdlock(); + { + VALUE current_set_obj = rbimpl_atomic_value_load(set_obj_ptr, RBIMPL_ATOMIC_ACQUIRE); + if (current_set_obj != set_obj) { + set_obj = current_set_obj; + // retry - resize happened + } + else { + result = rb_concurrent_set_delete_by_identity_locked(set_obj, key); + if (lock_taken) resize_lock_rdunlock(); + break; + } + } + if (lock_taken) resize_lock_rdunlock(); + } + } + else { + result = rb_concurrent_set_delete_by_identity_locked(set_obj, key); + } + return result; +} + +static void +rb_concurrent_set_foreach_with_replace_locked(VALUE set_obj, int (*callback)(VALUE *key, void *data), void *data) { ASSERT_vm_locking_with_barrier(); @@ -488,26 +760,50 @@ rb_concurrent_set_foreach_with_replace(VALUE set_obj, int (*callback)(VALUE *key for (unsigned int i = 0; i < set->capacity; i++) { struct concurrent_set_entry *entry = &set->entries[i]; - VALUE key = entry->key; + VALUE raw_key = entry->key; + bool continuation = raw_key & CONCURRENT_SET_CONTINUATION_BIT; + VALUE key = raw_key & CONCURRENT_SET_KEY_MASK; switch (key) { case CONCURRENT_SET_EMPTY: - case CONCURRENT_SET_DELETED: + case CONCURRENT_SET_TOMBSTONE: continue; case CONCURRENT_SET_MOVED: rb_bug("rb_concurrent_set_foreach_with_replace: moved entry"); break; default: { - int ret = callback(&entry->key, data); + VALUE cb_key = key; + int ret = callback(&cb_key, data); switch (ret) { case ST_STOP: return; case ST_DELETE: concurrent_set_delete_entry_locked(set, entry); break; + case ST_CONTINUE: + if (cb_key != key) { + // Key was replaced by callback + entry->key = cb_key | (continuation ? CONCURRENT_SET_CONTINUATION_BIT : 0); + } + break; + case ST_REPLACE: + rb_bug("unexpected concurrent_set callback return value: ST_REPLACE"); } break; } } } } + +void +rb_concurrent_set_foreach_with_replace(VALUE set_obj, int (*callback)(VALUE *key, void *data), void *data) +{ + RB_VM_LOCKING() { + // Don't allow concurrent deletes from sweep thread during this time. Maybe we can loosen this restriction. + resize_lock_wrlock(true); + { + rb_concurrent_set_foreach_with_replace_locked(set_obj, callback, data); + } + resize_lock_wrunlock(); + } +} diff --git a/internal/concurrent_set.h b/internal/concurrent_set.h index 76cbefab0413ec..ce0b366a3cdc66 100644 --- a/internal/concurrent_set.h +++ b/internal/concurrent_set.h @@ -11,11 +11,11 @@ struct rb_concurrent_set_funcs { void (*free)(VALUE key); }; -VALUE rb_concurrent_set_new(const struct rb_concurrent_set_funcs *funcs, int capacity); +VALUE rb_concurrent_set_new(const struct rb_concurrent_set_funcs *funcs, int capacity, int key_type); rb_atomic_t rb_concurrent_set_size(VALUE set_obj); VALUE rb_concurrent_set_find(VALUE *set_obj_ptr, VALUE key); VALUE rb_concurrent_set_find_or_insert(VALUE *set_obj_ptr, VALUE key, void *data); -VALUE rb_concurrent_set_delete_by_identity(VALUE set_obj, VALUE key); +VALUE rb_concurrent_set_delete_by_identity(VALUE *set_obj_ptr, VALUE key); void rb_concurrent_set_foreach_with_replace(VALUE set_obj, int (*callback)(VALUE *key, void *data), void *data); #endif diff --git a/string.c b/string.c index 6f69c7472223e0..56dd88c8b2007b 100644 --- a/string.c +++ b/string.c @@ -549,7 +549,7 @@ static const struct rb_concurrent_set_funcs fstring_concurrent_set_funcs = { void Init_fstring_table(void) { - fstring_table_obj = rb_concurrent_set_new(&fstring_concurrent_set_funcs, 8192); + fstring_table_obj = rb_concurrent_set_new(&fstring_concurrent_set_funcs, 8192, T_STRING); rb_gc_register_address(&fstring_table_obj); } @@ -599,7 +599,7 @@ rb_gc_free_fstring(VALUE obj) RUBY_ASSERT(OBJ_FROZEN(obj)); RUBY_ASSERT(!FL_TEST(obj, STR_SHARED)); - rb_concurrent_set_delete_by_identity(fstring_table_obj, obj); + rb_concurrent_set_delete_by_identity(&fstring_table_obj, obj); RB_DEBUG_COUNTER_INC(obj_str_fstr); diff --git a/symbol.c b/symbol.c index daadd557b9fa9b..e7a74b2550e6ce 100644 --- a/symbol.c +++ b/symbol.c @@ -418,7 +418,7 @@ Init_sym(void) { rb_symbols_t *symbols = &ruby_global_symbols; - symbols->sym_set = rb_concurrent_set_new(&sym_set_funcs, 1024); + symbols->sym_set = rb_concurrent_set_new(&sym_set_funcs, 1024, T_SYMBOL); symbols->ids = rb_ary_hidden_new(0); Init_op_tbl(); @@ -953,7 +953,7 @@ rb_gc_free_dsymbol(VALUE sym) VALUE str = RSYMBOL(sym)->fstr; if (str) { - rb_concurrent_set_delete_by_identity(ruby_global_symbols.sym_set, sym); + rb_concurrent_set_delete_by_identity(&ruby_global_symbols.sym_set, sym); RSYMBOL(sym)->fstr = 0; } From c6bfdfa861ae6ae91c72a46ef5d9271753a2366e Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 26 Mar 2026 09:46:27 -0400 Subject: [PATCH 14/67] gc: remove atomic operations on bitmaps --- gc/default/default.c | 41 +++++------------------------------------ 1 file changed, 5 insertions(+), 36 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index 48af60fd4ebada..cf8d686d387678 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -851,7 +851,6 @@ struct heap_page { unsigned int has_remembered_objects : 1; unsigned int has_uncollectible_wb_unprotected_objects : 1; } flags; - bool needs_setup_mark_bits; rb_atomic_t before_sweep; // bool rb_heap_t *heap; @@ -955,14 +954,6 @@ slot_index_for_offset(size_t offset, uint32_t div_magic) #define MARK_IN_BITMAP(bits, p) _MARK_IN_BITMAP(bits, GET_HEAP_PAGE(p), p) #define CLEAR_IN_BITMAP(bits, p) _CLEAR_IN_BITMAP(bits, GET_HEAP_PAGE(p), p) -/* Atomic bitmap operations for use during parallel sweep, where the sweep - * thread and mutator write barriers may modify different bits in the same - * bitmap word concurrently. */ -#define _ATOMIC_MARK_IN_BITMAP(bits, page, p) RUBY_ATOMIC_VALUE_OR((bits)[SLOT_BITMAP_INDEX(page, p)], SLOT_BITMAP_BIT(page, p)) -#define _ATOMIC_CLEAR_IN_BITMAP(bits, page, p) RUBY_ATOMIC_VALUE_AND((bits)[SLOT_BITMAP_INDEX(page, p)], ~SLOT_BITMAP_BIT(page, p)) -#define ATOMIC_MARK_IN_BITMAP(bits, p) _ATOMIC_MARK_IN_BITMAP(bits, GET_HEAP_PAGE(p), p) -#define ATOMIC_CLEAR_IN_BITMAP(bits, p) _ATOMIC_CLEAR_IN_BITMAP(bits, GET_HEAP_PAGE(p), p) - #define GET_HEAP_MARK_BITS(x) (&GET_HEAP_PAGE(x)->mark_bits[0]) #define GET_HEAP_PINNED_BITS(x) (&GET_HEAP_PAGE(x)->pinned_bits[0]) #define GET_HEAP_UNCOLLECTIBLE_BITS(x) (&GET_HEAP_PAGE(x)->uncollectible_bits[0]) @@ -2608,7 +2599,7 @@ newobj_init(VALUE klass, VALUE flags, int wb_protected, rb_objspace_t *objspace, #endif if (RB_UNLIKELY(wb_protected == FALSE)) { - ATOMIC_MARK_IN_BITMAP(GET_HEAP_WB_UNPROTECTED_BITS(obj), obj); + MARK_IN_BITMAP(GET_HEAP_WB_UNPROTECTED_BITS(obj), obj); } #if RGENGC_PROFILE @@ -4113,7 +4104,7 @@ deferred_free(rb_objspace_t *objspace, VALUE obj) // Clear bits for the page that was swept by the background thread. static inline void -gc_post_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *sweep_page, bool force_setup_mark_bits) +gc_post_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *sweep_page) { GC_ASSERT(sweep_page->heap == heap); @@ -4151,16 +4142,7 @@ gc_post_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *s } if (!heap->compact_cursor) { - if (objspace->background_sweep_mode && !force_setup_mark_bits) { - /* Defer gc_setup_mark_bits to gc_sweep_finish on the GC thread, - * because it overwrites mark_bits which would race with mutator - * write barriers for objects on the same page. */ - sweep_page->needs_setup_mark_bits = true; - } - else { - gc_setup_mark_bits(sweep_page); - sweep_page->needs_setup_mark_bits = false; - } + gc_setup_mark_bits(sweep_page); } if (RUBY_ATOMIC_PTR_LOAD(heap_pages_deferred_final) && !finalizing) { @@ -4938,19 +4920,6 @@ gc_sweep_finish(rb_objspace_t *objspace) objspace->use_background_sweep_thread = false; - /* Run deferred gc_setup_mark_bits for pages swept by the background thread. - * This must run on the GC thread to avoid racing with mutator write barriers - * that modify mark_bits and uncollectible_bits. */ - for (int i = 0; i < HEAP_COUNT; i++) { - struct heap_page *page; - ccan_list_for_each(&heaps[i].pages, page, page_node) { - if (page->needs_setup_mark_bits) { - gc_setup_mark_bits(page); - page->needs_setup_mark_bits = false; - } - } - } - gc_prof_set_heap_info(objspace); heap_pages_free_unused_pages(objspace); @@ -5135,7 +5104,7 @@ gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) GC_ASSERT(sweep_page->pre_deferred_free_slots == 0); } else { - gc_post_sweep_page(objspace, heap, sweep_page, false); // clear bits + gc_post_sweep_page(objspace, heap, sweep_page); // clear bits // Process deferred free objects unsigned short deferred_free_freed = 0; unsigned short deferred_to_free = sweep_page->pre_deferred_free_slots; @@ -7592,7 +7561,7 @@ rb_gc_impl_writebarrier_unprotect(void *objspace_ptr, VALUE obj) } RB_DEBUG_COUNTER_INC(obj_wb_unprotect); - ATOMIC_MARK_IN_BITMAP(GET_HEAP_WB_UNPROTECTED_BITS(obj), obj); + MARK_IN_BITMAP(GET_HEAP_WB_UNPROTECTED_BITS(obj), obj); } RB_GC_VM_UNLOCK_NO_BARRIER(lev); } From a3a803b6742c0feeb8f46073c832bad61877419b Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 26 Mar 2026 09:51:40 -0400 Subject: [PATCH 15/67] Turn on concurrent set deletions in sweep thread --- gc.c | 16 ++++++++++++---- gc/default/default.c | 3 ++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/gc.c b/gc.c index 94bd70ec23a837..2f0171d04761e2 100644 --- a/gc.c +++ b/gc.c @@ -2423,10 +2423,6 @@ bool rb_gc_obj_has_blacklisted_vm_weak_references(VALUE obj) { switch (BUILTIN_TYPE(obj)) { - case T_STRING: - return FL_TEST_RAW(obj, RSTRING_FSTR); - case T_SYMBOL: - return true; case T_IMEMO: switch (imemo_type(obj)) { case imemo_callcache: { @@ -2454,6 +2450,18 @@ rb_gc_obj_free_whitelisted_vm_weak_references_in_sweep_thread(VALUE obj) bool freed_generic = rb_free_generic_ivar(obj); if (!freed_generic) result = false; } + switch (BUILTIN_TYPE(obj)) { + case T_STRING: + if (FL_TEST_RAW(obj, RSTRING_FSTR)) { + rb_gc_free_fstring(obj); + } + break; + case T_SYMBOL: + rb_gc_free_dsymbol(obj); + break; + default: + break; + } return result; } diff --git a/gc/default/default.c b/gc/default/default.c index cf8d686d387678..5aabb270c80bad 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -4416,6 +4416,7 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p case T_BIGNUM: case T_OBJECT: case T_STRING: + case T_SYMBOL: case T_ARRAY: case T_HASH: case T_STRUCT: @@ -4431,7 +4432,7 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p } break; } - default: // ex: T_CLASS/T_MODULE/T_ICLASS/T_SYMBOL + default: // ex: T_CLASS/T_MODULE/T_ICLASS if (!rb_gc_obj_needs_cleanup_p(vp)) { heap_page_add_deferred_freeobj(objspace, page, vp); psweep_debug(2, "[sweep] freed: page(%p), obj(%p)\n", (void*)page, (void*)vp); From 713019e7279f8d69e4e3aff885f49465db19cca8 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 26 Mar 2026 11:48:34 -0400 Subject: [PATCH 16/67] Fix assertion in string.c --- string.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/string.c b/string.c index 56dd88c8b2007b..a6b6427bc1f5d2 100644 --- a/string.c +++ b/string.c @@ -593,8 +593,6 @@ rb_obj_is_fstring_table(VALUE obj) void rb_gc_free_fstring(VALUE obj) { - ASSERT_vm_locking_with_barrier(); - RUBY_ASSERT(FL_TEST(obj, RSTRING_FSTR)); RUBY_ASSERT(OBJ_FROZEN(obj)); RUBY_ASSERT(!FL_TEST(obj, STR_SHARED)); From b9bbab43f301b720380185c5014abc9303b22aae Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 26 Mar 2026 11:49:46 -0400 Subject: [PATCH 17/67] Fix age bits --- gc/default/default.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index 5aabb270c80bad..d38c3c85c38d05 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -2570,6 +2570,7 @@ newobj_init(VALUE klass, VALUE flags, int wb_protected, rb_objspace_t *objspace, } #endif GC_ASSERT(BUILTIN_TYPE(obj) == T_NONE); + GC_ASSERT(RVALUE_AGE_GET(obj) == 0); GC_ASSERT((flags & FL_WB_PROTECTED) == 0); RBASIC(obj)->flags = flags; *((VALUE *)&RBASIC(obj)->klass) = klass; @@ -3966,6 +3967,8 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit } #endif + if (RVALUE_WB_UNPROTECTED(objspace, vp)) CLEAR_IN_BITMAP(GET_HEAP_WB_UNPROTECTED_BITS(vp), vp); + #if RGENGC_CHECK_MODE #define CHECK(x) if (x(objspace, vp) != FALSE) rb_bug("obj_free: " #x "(%s) != FALSE", rb_obj_info(vp)) CHECK(RVALUE_WB_UNPROTECTED); @@ -3980,9 +3983,10 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit rb_gc_event_hook(vp, RUBY_INTERNAL_EVENT_FREEOBJ); } - (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)p, slot_size); + RVALUE_AGE_SET_BITMAP(vp, 0); heap_page_add_freeobj(objspace, sweep_page, vp); gc_report(3, objspace, "page_sweep: %s (fast path) added to freelist\n", rb_obj_info(vp)); + (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)vp, slot_size); ctx->freed_slots++; } else { @@ -3992,9 +3996,10 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit rb_gc_obj_free_vm_weak_references(vp); if (rb_gc_obj_free(objspace, vp)) { - (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)p, slot_size); + RVALUE_AGE_SET_BITMAP(vp, 0); heap_page_add_freeobj(objspace, sweep_page, vp); gc_report(3, objspace, "page_sweep: %s is added to freelist\n", rb_obj_info(vp)); + (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)vp, slot_size); ctx->freed_slots++; } else { From 40ddc683af0e3e815df5ad837573f5600aade13d Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 26 Mar 2026 14:34:28 -0400 Subject: [PATCH 18/67] Add RGENGC assertions to pre_sweep_plane --- gc/default/default.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/gc/default/default.c b/gc/default/default.c index d38c3c85c38d05..bf89396c81f122 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -4332,6 +4332,26 @@ zombie_needs_deferred_free(VALUE zombie) return ZOMBIE_NEEDS_FREE_P(zombie); } +#if RGENGC_CHECK_MODE +static void +debug_free_check(rb_objspace_t *objspace, VALUE vp) +{ + if (!is_full_marking(objspace)) { + if (RVALUE_OLD_P(objspace, vp)) rb_bug("page_sweep: %p - old while minor GC.", (void *)p); + if (RVALUE_REMEMBERED(objspace, vp)) rb_bug("page_sweep: %p - remembered.", (void *)p); + } + if (RVALUE_WB_UNPROTECTED(objspace, vp)) CLEAR_IN_BITMAP(GET_HEAP_WB_UNPROTECTED_BITS(vp), vp); +#define CHECK(x) if (x(objspace, vp) != FALSE) rb_bug("obj_free: " #x "(%s) != FALSE", rb_obj_info(vp)) + CHECK(RVALUE_WB_UNPROTECTED); + CHECK(RVALUE_MARKED); + CHECK(RVALUE_MARKING); + CHECK(RVALUE_UNCOLLECTIBLE); +#undef CHECK +} +#else +#define debug_free_check(...) (void)0 +#endif + static inline void gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page, uintptr_t p, bits_t bitset, short slot_size) { @@ -4364,6 +4384,7 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p } break; case T_DATA: { + debug_free_check(objspace, vp); void *data = RTYPEDDATA_P(vp) ? RTYPEDDATA_GET_DATA(vp) : DATA_PTR(vp); if (!data) { goto free; @@ -4394,6 +4415,7 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p break; } case T_IMEMO: { + debug_free_check(objspace, vp); if (rb_gc_obj_has_blacklisted_vm_weak_references(vp)) { sweep_in_ruby_thread(objspace, page, vp, true); break; @@ -4428,6 +4450,7 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p case T_MATCH: case T_REGEXP: case T_FILE: { + debug_free_check(objspace, vp); if (rb_gc_obj_has_blacklisted_vm_weak_references(vp)) { sweep_in_ruby_thread(objspace, page, vp, true); break; @@ -4438,6 +4461,7 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p break; } default: // ex: T_CLASS/T_MODULE/T_ICLASS + debug_free_check(objspace, vp); if (!rb_gc_obj_needs_cleanup_p(vp)) { heap_page_add_deferred_freeobj(objspace, page, vp); psweep_debug(2, "[sweep] freed: page(%p), obj(%p)\n", (void*)page, (void*)vp); @@ -4449,6 +4473,7 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p } break; free: { + debug_free_check(objspace, vp); if (rb_gc_obj_free_vm_weak_references(vp)) { bool can_put_back_on_freelist = rb_gc_obj_free(objspace, vp); if (can_put_back_on_freelist) { From c1c1e28f6db8aae43564ae3ea0f21d7e46268494 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 26 Mar 2026 14:38:46 -0400 Subject: [PATCH 19/67] Add major GC reasons to GC.stat --- gc/default/default.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/gc/default/default.c b/gc/default/default.c index bf89396c81f122..c4c7f75189bd4a 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -622,6 +622,11 @@ typedef struct rb_objspace { size_t minor_gc_count; size_t major_gc_count; + size_t major_gc_count_by_nofree; + size_t major_gc_count_by_oldgen; + size_t major_gc_count_by_shady; + size_t major_gc_count_by_force; + size_t major_gc_count_by_oldmalloc; size_t compact_count; size_t read_barrier_faults; #if RGENGC_PROFILE > 0 @@ -7921,6 +7926,11 @@ gc_start(rb_objspace_t *objspace, unsigned int reason) #if RGENGC_ESTIMATE_OLDMALLOC (void)RB_DEBUG_COUNTER_INC_IF(gc_major_oldmalloc, reason & GPR_FLAG_MAJOR_BY_OLDMALLOC); #endif + if (reason & GPR_FLAG_MAJOR_BY_NOFREE) objspace->profile.major_gc_count_by_nofree++; + if (reason & GPR_FLAG_MAJOR_BY_OLDGEN) objspace->profile.major_gc_count_by_oldgen++; + if (reason & GPR_FLAG_MAJOR_BY_SHADY) objspace->profile.major_gc_count_by_shady++; + if (reason & GPR_FLAG_MAJOR_BY_FORCE) objspace->profile.major_gc_count_by_force++; + if (reason & GPR_FLAG_MAJOR_BY_OLDMALLOC) objspace->profile.major_gc_count_by_oldmalloc++; } else { (void)RB_DEBUG_COUNTER_INC_IF(gc_minor_newobj, reason & GPR_FLAG_NEWOBJ); @@ -8994,6 +9004,11 @@ enum gc_stat_sym { gc_stat_sym_malloc_increase_bytes_limit, gc_stat_sym_minor_gc_count, gc_stat_sym_major_gc_count, + gc_stat_sym_major_gc_count_by_nofree, + gc_stat_sym_major_gc_count_by_oldgen, + gc_stat_sym_major_gc_count_by_shady, + gc_stat_sym_major_gc_count_by_force, + gc_stat_sym_major_gc_count_by_oldmalloc, gc_stat_sym_compact_count, gc_stat_sym_read_barrier_faults, gc_stat_sym_total_moved_objects, @@ -9046,6 +9061,11 @@ setup_gc_stat_symbols(void) S(malloc_increase_bytes_limit); S(minor_gc_count); S(major_gc_count); + S(major_gc_count_by_nofree); + S(major_gc_count_by_oldgen); + S(major_gc_count_by_shady); + S(major_gc_count_by_force); + S(major_gc_count_by_oldmalloc); S(compact_count); S(read_barrier_faults); S(total_moved_objects); @@ -9129,6 +9149,11 @@ rb_gc_impl_stat(void *objspace_ptr, VALUE hash_or_sym) SET(malloc_increase_bytes_limit, malloc_limit); SET(minor_gc_count, objspace->profile.minor_gc_count); SET(major_gc_count, objspace->profile.major_gc_count); + SET(major_gc_count_by_nofree, objspace->profile.major_gc_count_by_nofree); + SET(major_gc_count_by_oldgen, objspace->profile.major_gc_count_by_oldgen); + SET(major_gc_count_by_shady, objspace->profile.major_gc_count_by_shady); + SET(major_gc_count_by_force, objspace->profile.major_gc_count_by_force); + SET(major_gc_count_by_oldmalloc, objspace->profile.major_gc_count_by_oldmalloc); SET(compact_count, objspace->profile.compact_count); SET(read_barrier_faults, objspace->profile.read_barrier_faults); SET(total_moved_objects, objspace->rcompactor.total_moved); From b3ab6f779821164b890b5c0e9e2871374d6c757e Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 26 Mar 2026 16:37:20 -0400 Subject: [PATCH 20/67] Add DEBUG_SWEEP_BOOKKEEPING assertions --- gc/default/default.c | 76 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/gc/default/default.c b/gc/default/default.c index c4c7f75189bd4a..c848405a6c0ba8 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -852,6 +852,7 @@ struct heap_page { unsigned short pre_empty_slots; unsigned short pre_deferred_free_slots; unsigned short pre_final_slots; + unsigned short pre_zombie_slots; struct { unsigned int has_remembered_objects : 1; unsigned int has_uncollectible_wb_unprotected_objects : 1; @@ -946,6 +947,16 @@ slot_index_for_offset(size_t offset, uint32_t div_magic) return (size_t)(((uint64_t)offset * div_magic) >> 32); } +static inline int +popcount_bits(bits_t x) +{ +#if SIZEOF_VOIDP == 8 + return __builtin_popcountl(x); +#else + return __builtin_popcount(x); +#endif +} + #define SLOT_INDEX(page, p) slot_index_for_offset((uintptr_t)(p) - (page)->start, (page)->slot_div_magic) #define SLOT_BITMAP_INDEX(page, p) (SLOT_INDEX(page, p) / BITS_BITLENGTH) #define SLOT_BITMAP_OFFSET(page, p) (SLOT_INDEX(page, p) & (BITS_BITLENGTH - 1)) @@ -3922,6 +3933,7 @@ struct gc_sweep_context { int final_slots; int freed_slots; int empty_slots; + int zombie_slots; /* pre-existing zombies not yet ready to free */ }; bool rb_gc_obj_needs_cleanup_p(VALUE obj); @@ -3957,6 +3969,7 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit goto free_object; } /* already counted as final slot */ + ctx->zombie_slots++; break; case T_NONE: ctx->empty_slots++; /* already freed */ @@ -4215,6 +4228,35 @@ gc_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct gc_sweep_context } } +#ifdef DEBUG_SWEEP_BOOKKEEPING + { + /* Assert that all unmarked slots with live objects were either freed or made into zombies. + * Count unmarked slot-aligned bits the same way the sweep loop does. */ + int unmarked_slots = 0; + uintptr_t vp = (uintptr_t)sweep_page->start; + + bits_t bs = ~bits[0]; + bs >>= NUM_IN_PAGE(vp); + bs &= slot_mask; + unmarked_slots += popcount_bits(bs); + + for (int i = 1; i < bitmap_plane_count; i++) { + bs = ~bits[i] & slot_mask; + unmarked_slots += popcount_bits(bs); + } + + int freed_or_zombie = ctx->freed_slots + ctx->final_slots; + int unmarked_live = unmarked_slots - ctx->empty_slots - ctx->zombie_slots; + if (freed_or_zombie != unmarked_live) { + rb_bug("gc_sweep_page: unmarked live slot count mismatch: " + "unmarked_slots=%d - empty_slots=%d - zombie_slots=%d = %d unmarked live, " + "but freed_slots=%d + final_slots=%d = %d", + unmarked_slots, ctx->empty_slots, ctx->zombie_slots, unmarked_live, + ctx->freed_slots, ctx->final_slots, freed_or_zombie); + } + } +#endif + if (!heap->compact_cursor) { gc_setup_mark_bits(sweep_page); } @@ -4363,6 +4405,7 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p unsigned short freed = 0; unsigned short empties = 0; unsigned short finals = 0; + unsigned short zombies = 0; do { VALUE vp = (VALUE)p; GC_ASSERT(GET_HEAP_PAGE(vp) == page); @@ -4386,6 +4429,7 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p } else { // already counted as final_slot when made into a zombie + zombies++; } break; case T_DATA: { @@ -4510,6 +4554,7 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p page->pre_freed_slots += freed; page->pre_empty_slots += empties; page->pre_final_slots += finals; + page->pre_zombie_slots += zombies; } static void @@ -4523,6 +4568,7 @@ gc_pre_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *pa psweep_debug(1, "[sweep] gc_pre_sweep_page(heap:%p page:%p) start\n", heap, page); GC_ASSERT(page->heap == heap); page->pre_deferred_free_slots = 0; + page->pre_zombie_slots = 0; int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH); int out_of_range_bits = total_slots % BITS_BITLENGTH; @@ -4542,6 +4588,35 @@ gc_pre_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *pa if (page->pre_deferred_free_slots > 0) { objspace->profile.pages_swept_by_sweep_thread_had_deferred_free_objects++; } + +#ifdef DEBUG_SWEEP_BOOKKEEPING + { + /* Assert that all unmarked slots with live objects were either freed, made into + * zombies, or deferred to the Ruby thread. */ + int unmarked_slots = 0; + + bits_t bs = ~bits[0]; + bs >>= NUM_IN_PAGE((uintptr_t)page->start); + bs &= slot_mask; + unmarked_slots += popcount_bits(bs); + + for (int i = 1; i < bitmap_plane_count; i++) { + bs = ~bits[i] & slot_mask; + unmarked_slots += popcount_bits(bs); + } + + int freed_or_zombie = page->pre_freed_slots + page->pre_final_slots + page->pre_deferred_free_slots; + int unmarked_live = unmarked_slots - page->pre_empty_slots - page->pre_zombie_slots; + if (freed_or_zombie != unmarked_live) { + rb_bug("gc_pre_sweep_page: unmarked live slot count mismatch: " + "unmarked_slots=%d - empty_slots=%d - zombie_slots=%d = %d unmarked live, " + "but freed_slots=%d + final_slots=%d + deferred_free_slots=%d = %d", + unmarked_slots, page->pre_empty_slots, page->pre_zombie_slots, unmarked_live, + page->pre_freed_slots, page->pre_final_slots, page->pre_deferred_free_slots, freed_or_zombie); + } + } +#endif + psweep_debug(1, "[sweep] gc_pre_sweep_page(heap:%p page:%p) done, deferred free:%d\n", heap, page, page->pre_deferred_free_slots); } @@ -4600,6 +4675,7 @@ clear_pre_sweep_fields(struct heap_page *page) page->pre_deferred_free_slots = 0; page->pre_empty_slots = 0; page->pre_final_slots = 0; + page->pre_zombie_slots = 0; } // add beginning of b to end of a From 343912d1418b1bd065905a37cfd7bb0fdfb24573 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 26 Mar 2026 16:47:53 -0400 Subject: [PATCH 21/67] Add GC sweep bookkeeping assertions on sweep_finish --- gc/default/default.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/gc/default/default.c b/gc/default/default.c index c848405a6c0ba8..8915cc38744e4e 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -5038,6 +5038,18 @@ gc_sweep_finish(rb_objspace_t *objspace) for (int i = 0; i < HEAP_COUNT; i++) { rb_heap_t *heap = &heaps[i]; +#ifdef DEBUG_SWEEP_BOOKKEEPING + { + /* Assert that every page in this heap was swept. */ + struct heap_page *page; + ccan_list_for_each(&heap->pages, page, page_node) { + if (RUBY_ATOMIC_LOAD(page->before_sweep)) { + rb_bug("gc_sweep_finish: page %p in heap %d still has before_sweep set", (void *)page, i); + } + } + } +#endif + heap->freed_slots = 0; heap->empty_slots = 0; if (heap->background_sweep_steps < heap->foreground_sweep_steps) { From 6f9427c0fae3fb72951191701f20b7aaabe0209a Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 26 Mar 2026 22:32:45 -0400 Subject: [PATCH 22/67] Call post_sweep_page after freeing all deferred objects. This is so that garbage_object_p() will work correctly. --- gc/default/default.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index 8915cc38744e4e..e84528c33b8a87 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -4108,7 +4108,9 @@ deferred_free(rb_objspace_t *objspace, VALUE obj) #if VM_CHECK_MODE > 0 MAYBE_UNUSED(const char *obj_info) = rb_obj_info(obj); #endif - rb_gc_obj_free_vm_weak_references(obj); + bool freed_weakrefs = rb_gc_obj_free_vm_weak_references(obj); + (void)freed_weakrefs; + GC_ASSERT(freed_weakrefs); if (rb_gc_obj_free(objspace, obj)) { struct heap_page *page = GET_HEAP_PAGE(obj); psweep_debug(1, "[gc] deferred free: page(%p) obj(%p) %s (success)\n", page, (void*)obj, obj_info); @@ -5203,12 +5205,13 @@ gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) bool dequeued_unswept_page = false; // NOTE: pages we dequeue from the sweep thread need to be AFTER the list of heap->free_pages so we don't free from pages // we've allocated from since sweep started. - struct heap_page *sweep_page = gc_sweep_dequeue_page(objspace, heap, free_in_user_thread_p, &free_in_user_thread_p); + struct heap_page *sweep_page = gc_sweep_dequeue_page(objspace, heap, free_in_user_thread_p, &dequeued_unswept_page); if (RB_UNLIKELY(!sweep_page)) { psweep_debug(-2, "[gc] gc_sweep_step heap:%p (%ld) deq() = nil, break\n", heap, heap - heaps); break; } if (dequeued_unswept_page) { + free_in_user_thread_p = true; psweep_debug(-2, "[gc] gc_sweep_step heap:%p (%ld) deq unswept page\n", heap, heap - heaps); } else { @@ -5228,8 +5231,6 @@ gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) GC_ASSERT(sweep_page->pre_deferred_free_slots == 0); } else { - gc_post_sweep_page(objspace, heap, sweep_page); // clear bits - // Process deferred free objects unsigned short deferred_free_freed = 0; unsigned short deferred_to_free = sweep_page->pre_deferred_free_slots; @@ -5261,6 +5262,8 @@ gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) ctx.final_slots = sweep_page->pre_final_slots + deferred_free_final_slots; ctx.freed_slots = sweep_page->pre_freed_slots + deferred_free_freed; ctx.empty_slots = sweep_page->pre_empty_slots; + + gc_post_sweep_page(objspace, heap, sweep_page); // clear bits } if (0) fprintf(stderr, "gc_sweep_page(%"PRIdSIZE"): total_slots: %d, freed_slots: %d, empty_slots: %d, final_slots: %d\n", From fb0d274dcf7656b7ea33a91f90ea40493d58a5ca Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 26 Mar 2026 22:55:33 -0400 Subject: [PATCH 23/67] Fix garbage_object_p() to load and use background_sweep_thread atomically The use here is not protected by the sweep lock, so we should. Also, use atomic load for checking if object is marked so that it's not re-ordered past the next atomic load of page->before_sweep. --- gc/default/default.c | 63 +++++++++++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 24 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index e84528c33b8a87..8798726c6f66fa 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -555,13 +555,16 @@ typedef struct rb_objspace { unsigned int during_compacting : 1; unsigned int during_reference_updating : 1; unsigned int gc_stressful: 1; - unsigned int during_minor_gc : 1; + unsigned int has_newobj_hook: 1; unsigned int during_incremental_marking : 1; - unsigned int during_lazy_sweeping : 1; - unsigned int measure_gc : 1; } flags; + // This can't be a bitfield because it's accessed in garbage_object_p() from the sweep thread + // while the ruby GC thread could be running and changing other bitfields. + bool during_lazy_sweeping; + // This one too, it's accessed in debug_free_check + bool during_minor_gc; rb_event_flag_t hook_events; @@ -577,10 +580,10 @@ typedef struct rb_objspace { bool sweep_thread_sweep_exited; bool sweep_thread_waiting_request; bool sweep_thread_sweeping; + rb_atomic_t use_background_sweep_thread; bool background_sweep_mode; bool background_sweep_abort; bool background_sweep_restart_heaps; - bool use_background_sweep_thread; bool sweep_rest; unsigned int heaps_done_background_sweep; @@ -1379,12 +1382,12 @@ total_final_slots_count(rb_objspace_t *objspace) #define is_marking(objspace) (gc_mode(objspace) == gc_mode_marking) #define is_sweeping(objspace) (gc_mode(objspace) == gc_mode_sweeping) -#define is_full_marking(objspace) ((objspace)->flags.during_minor_gc == FALSE) +#define is_full_marking(objspace) ((objspace)->during_minor_gc == FALSE) #define is_incremental_marking(objspace) ((objspace)->flags.during_incremental_marking != FALSE) #define will_be_incremental_marking(objspace) ((objspace)->rgengc.need_major_gc != GPR_FLAG_NONE) #define GC_INCREMENTAL_SWEEP_SLOT_COUNT 2048 #define GC_INCREMENTAL_SWEEP_POOL_SLOT_COUNT 1024 -#define is_lazy_sweeping(objspace) ((objspace)->flags.during_lazy_sweeping != FALSE) +#define is_lazy_sweeping(objspace) ((objspace)->during_lazy_sweeping != FALSE) /* In lazy sweeping or the previous incremental marking finished and did not yield a free page. */ #define needs_continue_sweeping(objspace, heap) \ ((heap)->free_pages == NULL && is_lazy_sweeping(objspace)) @@ -1587,6 +1590,15 @@ RVALUE_MARKED(rb_objspace_t *objspace, VALUE obj) return RVALUE_MARKED_BITMAP(obj) != 0; } +static inline int +RVALUE_MARKED_ATOMIC(rb_objspace_t *objspace, VALUE obj) +{ + bits_t *bits = GET_HEAP_MARK_BITS(obj); + struct heap_page *page = GET_HEAP_PAGE(obj); + bits_t word = __atomic_load_n(&bits[SLOT_BITMAP_INDEX(page, obj)], __ATOMIC_SEQ_CST); + return (word & SLOT_BITMAP_BIT(page, obj)) != 0; +} + static inline int RVALUE_PINNED(rb_objspace_t *objspace, VALUE obj) { @@ -1934,8 +1946,11 @@ rb_gc_impl_garbage_object_p(void *objspace_ptr, VALUE ptr) bool dead = false; - if (!objspace->background_sweep_mode) { // set to false/true by ruby GC thread when entering/exiting GC - // psweep: not safe to read flags on object if during background sweeping + // Set to false/true by the ruby GC thread when entering/exiting GC, so shouldn't change throughout this call. + rb_atomic_t use_sweep_thread = RUBY_ATOMIC_LOAD(objspace->use_background_sweep_thread); + + if (!use_sweep_thread) { + // It's not safe to read flags on an object if the sweep thread is running asan_unpoisoning_object(ptr) { switch (BUILTIN_TYPE(ptr)) { case T_NONE: @@ -1954,17 +1969,19 @@ rb_gc_impl_garbage_object_p(void *objspace_ptr, VALUE ptr) struct heap_page *page = GET_HEAP_PAGE(ptr); bool during_lazy_sweep = is_lazy_sweeping(objspace); - if (!objspace->background_sweep_mode) { - return during_lazy_sweep && !RVALUE_MARKED(objspace, ptr) && RUBY_ATOMIC_LOAD(page->before_sweep); + if (!use_sweep_thread) { + // The ruby GC thread or a user thread called us + bool marked = RVALUE_MARKED(objspace, ptr); + GC_ASSERT(marked == RVALUE_MARKED_ATOMIC(objspace, ptr)); + return during_lazy_sweep && !marked && RUBY_ATOMIC_LOAD(page->before_sweep); } // we're currently lazy sweeping with the sweep thread in background mode else if (during_lazy_sweep) { - bool is_before1, is_before2; - // This is technically UB because reading of mark bits is not synchronized, but I think it's fine. - bool is_garbage = ((is_before1 = RUBY_ATOMIC_LOAD(page->before_sweep)) && - !RVALUE_MARKED(objspace, ptr) && (is_before2 = RUBY_ATOMIC_LOAD(page->before_sweep))); + bool marked = RVALUE_MARKED_ATOMIC(objspace, ptr); // load it atomically so it can't be re-ordered past the next atomic load + bool before_sweep = RUBY_ATOMIC_LOAD(page->before_sweep); + bool is_garbage = !marked && before_sweep; if (is_garbage) return true; - if (is_before1 && is_before2) return false; // must be marked (before_sweep and marked) + if (marked && before_sweep) return false; // already swept page, just check flags return BUILTIN_TYPE(ptr) == T_NONE || BUILTIN_TYPE(ptr) == T_MOVED || BUILTIN_TYPE(ptr) == T_ZOMBIE; } @@ -4389,9 +4406,7 @@ debug_free_check(rb_objspace_t *objspace, VALUE vp) if (RVALUE_OLD_P(objspace, vp)) rb_bug("page_sweep: %p - old while minor GC.", (void *)p); if (RVALUE_REMEMBERED(objspace, vp)) rb_bug("page_sweep: %p - remembered.", (void *)p); } - if (RVALUE_WB_UNPROTECTED(objspace, vp)) CLEAR_IN_BITMAP(GET_HEAP_WB_UNPROTECTED_BITS(vp), vp); #define CHECK(x) if (x(objspace, vp) != FALSE) rb_bug("obj_free: " #x "(%s) != FALSE", rb_obj_info(vp)) - CHECK(RVALUE_WB_UNPROTECTED); CHECK(RVALUE_MARKED); CHECK(RVALUE_MARKING); CHECK(RVALUE_UNCOLLECTIBLE); @@ -4923,7 +4938,7 @@ static void gc_sweep_start(rb_objspace_t *objspace) { gc_mode_transition(objspace, gc_mode_sweeping); - objspace->flags.during_lazy_sweeping = TRUE; + objspace->during_lazy_sweeping = TRUE; objspace->rincgc.pooled_slots = 0; // Background sweeping cannot be happening @@ -4963,7 +4978,7 @@ gc_sweep_start(rb_objspace_t *objspace) (objspace->profile.latest_gc_info & GPR_FLAG_METHOD) == 0 && !(objspace->hook_events & RUBY_INTERNAL_EVENT_FREEOBJ)) { - objspace->use_background_sweep_thread = true; + RUBY_ATOMIC_SET(objspace->use_background_sweep_thread, true); psweep_debug(-1, "[gc] gc_sweep_start: requesting sweep thread\n"); sweep_lock_lock(&objspace->sweep_lock); { @@ -4973,7 +4988,7 @@ gc_sweep_start(rb_objspace_t *objspace) sweep_lock_unlock(&objspace->sweep_lock); } else { - objspace->use_background_sweep_thread = false; + RUBY_ATOMIC_SET(objspace->use_background_sweep_thread, false); psweep_debug(-1, "[gc] gc_sweep_start: not using background sweep thread\n"); } } @@ -5032,7 +5047,7 @@ gc_sweep_finish(rb_objspace_t *objspace) gc_report(1, objspace, "gc_sweep_finish\n"); psweep_debug(-1, "[gc] gc_sweep_finish\n"); - objspace->use_background_sweep_thread = false; + RUBY_ATOMIC_SET(objspace->use_background_sweep_thread, false); gc_prof_set_heap_info(objspace); heap_pages_free_unused_pages(objspace); @@ -5074,7 +5089,7 @@ gc_sweep_finish(rb_objspace_t *objspace) rb_gc_event_hook(0, RUBY_INTERNAL_EVENT_GC_END_SWEEP); gc_mode_transition(objspace, gc_mode_none); - objspace->flags.during_lazy_sweeping = FALSE; + objspace->during_lazy_sweeping = FALSE; #if RGENGC_CHECK_MODE >= 2 gc_verify_internal_consistency(objspace); @@ -7301,7 +7316,7 @@ gc_marks_start(rb_objspace_t *objspace, int full_mark) "objspace->rincgc.pooled_page_num: %"PRIdSIZE", " "objspace->rincgc.step_slots: %"PRIdSIZE", \n", objspace->marked_slots, objspace->rincgc.pooled_slots, objspace->rincgc.step_slots); - objspace->flags.during_minor_gc = FALSE; + objspace->during_minor_gc = FALSE; if (ruby_enable_autocompact) { objspace->flags.during_compacting |= TRUE; } @@ -7326,7 +7341,7 @@ gc_marks_start(rb_objspace_t *objspace, int full_mark) } } else { - objspace->flags.during_minor_gc = TRUE; + objspace->during_minor_gc = TRUE; objspace->marked_slots = objspace->rgengc.old_objects + objspace->rgengc.uncollectible_wb_unprotected_objects; /* uncollectible objects are marked already */ objspace->profile.minor_gc_count++; From 62b195a061363a7b8b0cd44fc3e7ffb9e452ae8b Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Fri, 27 Mar 2026 12:17:27 -0400 Subject: [PATCH 24/67] Add per-page `pre_freed_malloc_bytes` to deal with malloc_increase issue --- gc/default/default.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index 8798726c6f66fa..5d96a3739e9016 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -184,6 +184,7 @@ #ifdef RB_THREAD_LOCAL_SPECIFIER #define USE_MALLOC_INCREASE_LOCAL 1 static RB_THREAD_LOCAL_SPECIFIER int malloc_increase_local; +static RB_THREAD_LOCAL_SPECIFIER struct heap_page *current_sweep_thread_page; #else #define USE_MALLOC_INCREASE_LOCAL 0 #endif @@ -856,6 +857,7 @@ struct heap_page { unsigned short pre_deferred_free_slots; unsigned short pre_final_slots; unsigned short pre_zombie_slots; + size_t pre_freed_malloc_bytes; struct { unsigned int has_remembered_objects : 1; unsigned int has_uncollectible_wb_unprotected_objects : 1; @@ -1420,6 +1422,8 @@ static int garbage_collect(rb_objspace_t *, unsigned int reason); static int gc_start(rb_objspace_t *objspace, unsigned int reason); static void gc_rest(rb_objspace_t *objspace); +static inline void atomic_sub_nounderflow(size_t *var, size_t sub); +static size_t malloc_increase_local_flush(rb_objspace_t *objspace); enum gc_enter_event { gc_enter_event_start, @@ -4586,6 +4590,8 @@ gc_pre_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *pa GC_ASSERT(page->heap == heap); page->pre_deferred_free_slots = 0; page->pre_zombie_slots = 0; + page->pre_freed_malloc_bytes = 0; + current_sweep_thread_page = page; int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH); int out_of_range_bits = total_slots % BITS_BITLENGTH; @@ -4634,6 +4640,9 @@ gc_pre_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *pa } #endif + malloc_increase_local_flush(objspace); + current_sweep_thread_page = NULL; + psweep_debug(1, "[sweep] gc_pre_sweep_page(heap:%p page:%p) done, deferred free:%d\n", heap, page, page->pre_deferred_free_slots); } @@ -4693,6 +4702,7 @@ clear_pre_sweep_fields(struct heap_page *page) page->pre_empty_slots = 0; page->pre_final_slots = 0; page->pre_zombie_slots = 0; + page->pre_freed_malloc_bytes = 0; } // add beginning of b to end of a @@ -5327,6 +5337,12 @@ gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) asan_lock_deferred_freelist(sweep_page); asan_lock_freelist(sweep_page); + if (sweep_page->pre_freed_malloc_bytes > 0) { + atomic_sub_nounderflow(&malloc_increase, sweep_page->pre_freed_malloc_bytes); +#if RGENGC_ESTIMATE_OLDMALLOC + atomic_sub_nounderflow(&objspace->malloc_counters.oldmalloc_increase, sweep_page->pre_freed_malloc_bytes); +#endif + } clear_pre_sweep_fields(sweep_page); } @@ -9711,6 +9727,7 @@ static size_t malloc_increase_commit(rb_objspace_t *objspace, size_t new_size, size_t old_size) { if (new_size > old_size) { + GC_ASSERT(!is_sweep_thread_p()); size_t delta = new_size - old_size; size_t old_val = rbimpl_atomic_size_fetch_add(&malloc_increase, delta, RBIMPL_ATOMIC_RELAXED); #if RGENGC_ESTIMATE_OLDMALLOC @@ -9719,10 +9736,16 @@ malloc_increase_commit(rb_objspace_t *objspace, size_t new_size, size_t old_size return old_val + delta; } else { - atomic_sub_nounderflow(&malloc_increase, old_size - new_size); + size_t delta = old_size - new_size; + if (current_sweep_thread_page) { + current_sweep_thread_page->pre_freed_malloc_bytes += delta; + } + else { + atomic_sub_nounderflow(&malloc_increase, delta); #if RGENGC_ESTIMATE_OLDMALLOC - atomic_sub_nounderflow(&objspace->malloc_counters.oldmalloc_increase, old_size - new_size); + atomic_sub_nounderflow(&objspace->malloc_counters.oldmalloc_increase, delta); #endif + } return 0; } } From 4fc9c580ff59bb7edbe6aa5a6da7edb4c5a7239e Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Fri, 27 Mar 2026 13:54:09 -0400 Subject: [PATCH 25/67] Add deferred free object bitmap per page --- gc/default/default.c | 139 ++++++++++++++++++------------------------- 1 file changed, 59 insertions(+), 80 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index 5d96a3739e9016..b093c92ebf0137 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -470,11 +470,6 @@ typedef struct mark_stack { typedef int (*gc_compact_compare_func)(const void *l, const void *r, void *d); -typedef struct { - rb_darray(VALUE) object_list; - rb_nativethread_lock_t lock; -} deferred_sweep_data_t; - typedef struct rb_heap_struct { short slot_size; @@ -507,7 +502,6 @@ typedef struct rb_heap_struct { rb_nativethread_cond_t sweep_page_cond; // associated with global sweep lock rb_nativethread_lock_t swept_pages_lock; size_t pre_swept_slots_deferred; - deferred_sweep_data_t deferred_sweep_data; bool is_finished_sweeping; bool done_background_sweep; bool skip_sweep_continue; // skip current sweep continue @@ -885,6 +879,7 @@ struct heap_page { /* If set, the object is not movable */ bits_t pinned_bits[HEAP_PAGE_BITMAP_LIMIT]; bits_t age_bits[HEAP_PAGE_BITMAP_LIMIT * RVALUE_AGE_BIT_COUNT]; + bits_t deferred_free_bits[HEAP_PAGE_BITMAP_LIMIT]; }; /* @@ -1105,7 +1100,7 @@ typedef struct lock_stats { static lock_stats_t sweep_lock_stats = {"objspace->sweep_lock", {{0}}, 0}; static lock_stats_t swept_pages_lock_stats = {"heap->swept_pages_lock", {{0}}, 0}; -static lock_stats_t deferred_sweep_data_lock_stats = {"heap->deferred_sweep_data.lock", {{0}}, 0}; + static lock_callsite_stats_t* find_or_create_callsite(lock_stats_t *stats, const char *function, int line) @@ -1156,9 +1151,9 @@ print_lock_stats(void) fprintf(stderr, "%-40s %-30s %12s %12s %10s\n", "Lock Name", "Callsite", "Uncontended", "Contended", "Ratio"); fprintf(stderr, "%-40s %-30s %12s %12s %10s\n", "---------", "--------", "-----------", "---------", "-----"); - lock_stats_t *all_stats[] = {&sweep_lock_stats, &swept_pages_lock_stats, &deferred_sweep_data_lock_stats}; + lock_stats_t *all_stats[] = {&sweep_lock_stats, &swept_pages_lock_stats}; - for (int i = 0; i < 3; i++) { + for (int i = 0; i < 2; i++) { lock_stats_t *stats = all_stats[i]; /* Sort callsites by total contentions (descending) */ @@ -1979,8 +1974,8 @@ rb_gc_impl_garbage_object_p(void *objspace_ptr, VALUE ptr) GC_ASSERT(marked == RVALUE_MARKED_ATOMIC(objspace, ptr)); return during_lazy_sweep && !marked && RUBY_ATOMIC_LOAD(page->before_sweep); } - // we're currently lazy sweeping with the sweep thread in background mode else if (during_lazy_sweep) { + // we're currently lazy sweeping with the sweep thread bool marked = RVALUE_MARKED_ATOMIC(objspace, ptr); // load it atomically so it can't be re-ordered past the next atomic load bool before_sweep = RUBY_ATOMIC_LOAD(page->before_sweep); bool is_garbage = !marked && before_sweep; @@ -4094,32 +4089,6 @@ wait_for_background_sweeping_to_finish(rb_objspace_t *objspace, bool abort_curre sweep_lock_unlock(&objspace->sweep_lock); } -// dequeue MIN(left_to_deq, 10) objects from the deferred object list into `obj_buf`, returning the amount dequeued. -static short -deq_deferred_sweep_objects(rb_objspace_t *objspace, rb_heap_t *heap, VALUE obj_buf[10], short left_to_deq) -{ - GC_ASSERT(left_to_deq > 0); - short to_deq = 10; - if (left_to_deq < 10) to_deq = left_to_deq; -#if PSWEEP_LOCK_STATS > 0 - instrumented_lock_acquire(&heap->deferred_sweep_data.lock, &deferred_sweep_data_lock_stats); -#else - rb_native_mutex_lock(&heap->deferred_sweep_data.lock); -#endif - { - if ((size_t)to_deq > rb_darray_size(heap->deferred_sweep_data.object_list)) { - psweep_debug(0, "Error: trying to deq %hi from object_list of size %lu\n", to_deq, rb_darray_size(heap->deferred_sweep_data.object_list)); - } - GC_ASSERT((size_t)to_deq <= rb_darray_size(heap->deferred_sweep_data.object_list)); - for (short i = 0; i < to_deq; i++) { - obj_buf[i] = rb_darray_get(heap->deferred_sweep_data.object_list, i); - } - } - rb_darray_shift_n(heap->deferred_sweep_data.object_list, to_deq); - rb_native_mutex_unlock(&heap->deferred_sweep_data.lock); - return to_deq; -} - // Free the object in a Ruby thread. Return whether or not we put the slot back on the page's freelist. static bool deferred_free(rb_objspace_t *objspace, VALUE obj) @@ -4378,22 +4347,10 @@ heap_page_freelist_append(struct heap_page *page, struct free_slot *freelist) static void sweep_in_ruby_thread(rb_objspace_t *objspace, struct heap_page *page, VALUE obj, bool nozombie) { - rb_heap_t *heap = page->heap; -#if PSWEEP_LOCK_STATS > 0 - instrumented_lock_acquire(&heap->deferred_sweep_data.lock, &deferred_sweep_data_lock_stats); -#else - rb_native_mutex_lock(&heap->deferred_sweep_data.lock); -#endif - { - page->pre_deferred_free_slots += 1; - psweep_debug(1, "[sweep] register sweep later: page(%p), obj(%p) %s\n", (void*)page, (void*)obj, rb_obj_info(obj)); - GC_ASSERT(BUILTIN_TYPE(obj) != T_NONE); - rb_darray_append_without_gc(&heap->deferred_sweep_data.object_list, obj); - /*if (rb_darray_size(heap->deferred_sweep_data.object_list) > 128) {*/ - /*fprintf(stderr, "deferred sweep data object list size:%lu\n", rb_darray_size(heap->deferred_sweep_data.object_list));*/ - /*}*/ - } - rb_native_mutex_unlock(&heap->deferred_sweep_data.lock); + page->pre_deferred_free_slots += 1; + psweep_debug(1, "[sweep] register sweep later: page(%p), obj(%p) %s\n", (void*)page, (void*)obj, rb_obj_info(obj)); + GC_ASSERT(BUILTIN_TYPE(obj) != T_NONE); + MARK_IN_BITMAP(page->deferred_free_bits, obj); } bool @@ -4589,6 +4546,7 @@ gc_pre_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *pa psweep_debug(1, "[sweep] gc_pre_sweep_page(heap:%p page:%p) start\n", heap, page); GC_ASSERT(page->heap == heap); page->pre_deferred_free_slots = 0; + memset(page->deferred_free_bits, 0, sizeof(page->deferred_free_bits)); page->pre_zombie_slots = 0; page->pre_freed_malloc_bytes = 0; current_sweep_thread_page = page; @@ -4699,6 +4657,7 @@ clear_pre_sweep_fields(struct heap_page *page) { page->pre_freed_slots = 0; page->pre_deferred_free_slots = 0; + memset(page->deferred_free_bits, 0, sizeof(page->deferred_free_bits)); page->pre_empty_slots = 0; page->pre_final_slots = 0; page->pre_zombie_slots = 0; @@ -4781,6 +4740,7 @@ gc_sweep_step_worker(rb_objspace_t *objspace, rb_heap_t *heap) { if (heap->swept_pages) { // NOTE: heap->swept_pages needs to be in swept order for gc_sweep_step to work properly. + // TODO: Change to LIFO to get better shared memory cache benefits across threads (L2/L3) struct heap_page *latest = heap->latest_swept_page; GC_ASSERT(latest); latest->free_next = sweep_page; @@ -4890,12 +4850,6 @@ gc_sweep_start_heap(rb_objspace_t *objspace, rb_heap_t *heap) heap->is_finished_sweeping = false; heap->done_background_sweep = false; heap->skip_sweep_continue = false; - // TODO - /*rb_darray_clear_and_free_without_gc(heap->deferred_sweep_data.object_list);*/ - /*if (rb_darray_size(heap->deferred_sweep_data.object_list) > 0) {*/ - /*psweep_debug(-1, "Error: gc_sweep_start_heap with object_list of size %lu\n", rb_darray_size(heap->deferred_sweep_data.object_list));*/ - /*}*/ - /*GC_ASSERT(rb_darray_size(heap->deferred_sweep_data.object_list) == 0);*/ struct heap_page *page = NULL; @@ -5259,31 +5213,59 @@ gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) unsigned short deferred_free_freed = 0; unsigned short deferred_to_free = sweep_page->pre_deferred_free_slots; - VALUE obj_buf[10]; - short deq_sz = 0; psweep_debug(-2, "[gc] gc_sweep_step: (heap:%p %ld, page:%p) free_ruby_th: %d, deferred_to_free:%d, pre_freed:%d, pre_empty:%d\n", heap, heap - heaps, sweep_page, free_in_user_thread_p, deferred_to_free, sweep_page->pre_freed_slots, sweep_page->pre_empty_slots); - int deferred_processed = 0; - while (deferred_processed < deferred_to_free) { - deq_sz = deq_deferred_sweep_objects(objspace, heap, obj_buf, deferred_to_free - deferred_processed); - psweep_debug(1, "[gc] gc_sweep_step(heap:%p %ld, page:%p) deq:%d\n", heap, heap - heaps, sweep_page, deq_sz); - for (short i = 0; i < deq_sz; i++) { - VALUE obj = obj_buf[i]; -#if VM_CHECK_MODE > 0 - if (GET_HEAP_PAGE(obj) != sweep_page) { - psweep_debug(0, "Error! bad heap page (got:%p, expecting:%p) obj type:%s\n", GET_HEAP_PAGE(obj), sweep_page, rb_obj_info(obj)); - } - GC_ASSERT(GET_HEAP_PAGE(obj) == sweep_page); -#endif - if (deferred_free(objspace, obj)) { - deferred_free_freed++; + + if (deferred_to_free > 0) { + uintptr_t p = (uintptr_t)sweep_page->start; + bits_t *deferred_bits = sweep_page->deferred_free_bits; + short slot_size = sweep_page->slot_size; + short slot_bits = slot_size / BASE_SLOT_SIZE; + bits_t slot_mask = heap->slot_bits_mask; + + int page_rvalue_count = sweep_page->total_slots * slot_bits; + int bitmap_plane_count = CEILDIV(NUM_IN_PAGE(p) + page_rvalue_count, BITS_BITLENGTH); + + // First plane: skip out-of-range slots at head of page + bits_t bitset = deferred_bits[0]; + bitset >>= NUM_IN_PAGE(p); + bitset &= slot_mask; + while (bitset) { + if (bitset & 1) { + VALUE obj = (VALUE)p; + GC_ASSERT(GET_HEAP_PAGE(obj) == sweep_page); + if (deferred_free(objspace, obj)) { + deferred_free_freed++; + } + else { + deferred_free_final_slots++; + } } - else { - deferred_free_final_slots++; + p += slot_size; + bitset >>= slot_bits; + } + p = (uintptr_t)sweep_page->start + (BITS_BITLENGTH - NUM_IN_PAGE((uintptr_t)sweep_page->start)) * BASE_SLOT_SIZE; + + for (int i = 1; i < bitmap_plane_count; i++) { + bitset = deferred_bits[i] & slot_mask; + while (bitset) { + if (bitset & 1) { + VALUE obj = (VALUE)p; + GC_ASSERT(GET_HEAP_PAGE(obj) == sweep_page); + if (deferred_free(objspace, obj)) { + deferred_free_freed++; + } + else { + deferred_free_final_slots++; + } + } + p += slot_size; + bitset >>= slot_bits; } - deferred_processed++; + p = (uintptr_t)sweep_page->start + (BITS_BITLENGTH * (i + 1) - NUM_IN_PAGE((uintptr_t)sweep_page->start)) * BASE_SLOT_SIZE; } } + ctx.final_slots = sweep_page->pre_final_slots + deferred_free_final_slots; ctx.freed_slots = sweep_page->pre_freed_slots + deferred_free_freed; ctx.empty_slots = sweep_page->pre_empty_slots; @@ -11195,7 +11177,6 @@ rb_gc_impl_after_fork(void *objspace_ptr, rb_pid_t pid) for (int i = 0; i < HEAP_COUNT; i++) { rb_heap_t *heap = &heaps[i]; - rb_native_mutex_initialize(&heap->deferred_sweep_data.lock); rb_native_mutex_initialize(&heap->swept_pages_lock); rb_native_cond_initialize(&heap->sweep_page_cond); heap->pre_sweeping_page = NULL; @@ -11308,9 +11289,7 @@ rb_gc_impl_objspace_init(void *objspace_ptr) slot_div_magics[i] = (uint32_t)((uint64_t)UINT32_MAX / heap->slot_size + 1); ccan_list_head_init(&heap->pages); - rb_native_mutex_initialize(&heap->deferred_sweep_data.lock); rb_native_mutex_initialize(&heap->swept_pages_lock); - rb_darray_make_without_gc(&heap->deferred_sweep_data.object_list, 0); rb_native_cond_initialize(&heap->sweep_page_cond); } From ebf3453ec9a8f097ea4bb1b8a09defb6536f5ed1 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Fri, 27 Mar 2026 18:22:26 -0400 Subject: [PATCH 26/67] Add concurrent_set.c debugging facilities --- concurrent_set.c | 52 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/concurrent_set.c b/concurrent_set.c index 1b630ad38ccdf5..215682fd1e0f47 100644 --- a/concurrent_set.c +++ b/concurrent_set.c @@ -13,6 +13,9 @@ #define CONCURRENT_SET_HASH_MASK (~CONCURRENT_SET_HASH_RECLAIMABLE_BIT) #define CONCURRENT_SET_DEBUG 0 +#define CONCURRENT_SET_DEBUG_STATS 0 +#define CONCURRENT_SET_DEBUG_DUPLICATES 0 +#define CONCURRENT_SET_DEBUG_BAD_HASH_FN 0 enum concurrent_set_special_values { CONCURRENT_SET_EMPTY = 0, @@ -33,7 +36,7 @@ struct concurrent_set { const struct rb_concurrent_set_funcs *funcs; struct concurrent_set_entry *entries; int key_type; -#if CONCURRENT_SET_DEBUG +#if CONCURRENT_SET_DEBUG_STATS rb_atomic_t find_count; rb_atomic_t find_probe_total; rb_atomic_t find_probe_max; @@ -68,6 +71,10 @@ static VALUE concurrent_set_hash(const struct concurrent_set *set, VALUE key) { VALUE hash = set->funcs->hash(key); +#if CONCURRENT_SET_DEBUG_BAD_HASH_FN + hash = hash % 1024; + if (hash == 0) hash = 1; +#endif hash &= CONCURRENT_SET_HASH_MASK; if (hash == 0) hash = ~(VALUE)0 & CONCURRENT_SET_HASH_MASK; RUBY_ASSERT(hash != 0); @@ -155,7 +162,7 @@ rb_concurrent_set_probe_stats(VALUE set_obj, rb_atomic_t *find_count, rb_atomic_t *find_probe_total, rb_atomic_t *find_probe_max, rb_atomic_t *insert_count, rb_atomic_t *insert_probe_total, rb_atomic_t *insert_probe_max) { -#if CONCURRENT_SET_DEBUG +#if CONCURRENT_SET_DEBUG_STATS struct concurrent_set *set = RTYPEDDATA_GET_DATA(set_obj); *find_count = RUBY_ATOMIC_LOAD(set->find_count); *find_probe_total = RUBY_ATOMIC_LOAD(set->find_probe_total); @@ -173,7 +180,7 @@ rb_concurrent_set_probe_stats(VALUE set_obj, #endif } -#if CONCURRENT_SET_DEBUG +#if CONCURRENT_SET_DEBUG_STATS static void concurrent_set_atomic_max(rb_atomic_t *target, rb_atomic_t val) { @@ -244,6 +251,7 @@ concurrent_set_try_resize_locked(VALUE old_set_obj, VALUE *set_obj_ptr, VALUE ne // Insert key into new_set. struct concurrent_set_probe probe; int idx = concurrent_set_probe_start(&probe, new_set, hash); + int start_idx = idx; while (true) { struct concurrent_set_entry *entry = &new_set->entries[idx]; @@ -262,6 +270,7 @@ concurrent_set_try_resize_locked(VALUE old_set_obj, VALUE *set_obj_ptr, VALUE ne RUBY_ASSERT(entry->key >= CONCURRENT_SET_SPECIAL_VALUE_COUNT); entry->key |= CONCURRENT_SET_CONTINUATION_BIT; idx = concurrent_set_probe_next(&probe); + RUBY_ASSERT(idx != start_idx); } } @@ -397,7 +406,7 @@ rb_concurrent_set_find(VALUE *set_obj_ptr, VALUE key) VALUE curr_hash = rbimpl_atomic_value_load(&entry->hash, RBIMPL_ATOMIC_ACQUIRE) & CONCURRENT_SET_HASH_MASK; if (curr_hash == 0) { -#if CONCURRENT_SET_DEBUG +#if CONCURRENT_SET_DEBUG_STATS rbimpl_atomic_fetch_add(&set->find_count, 1, RBIMPL_ATOMIC_RELAXED); rbimpl_atomic_fetch_add(&set->find_probe_total, probe.d, RBIMPL_ATOMIC_RELAXED); concurrent_set_atomic_max(&set->find_probe_max, probe.d); @@ -411,7 +420,7 @@ rb_concurrent_set_find(VALUE *set_obj_ptr, VALUE key) if (curr_hash != hash) { if (!continuation) { -#if CONCURRENT_SET_DEBUG +#if CONCURRENT_SET_DEBUG_STATS rbimpl_atomic_fetch_add(&set->find_count, 1, RBIMPL_ATOMIC_RELAXED); rbimpl_atomic_fetch_add(&set->find_probe_total, probe.d, RBIMPL_ATOMIC_RELAXED); concurrent_set_atomic_max(&set->find_probe_max, probe.d); @@ -442,7 +451,7 @@ rb_concurrent_set_find(VALUE *set_obj_ptr, VALUE key) if (set->funcs->cmp(key, curr_key)) { // We've found a match. -#if CONCURRENT_SET_DEBUG +#if CONCURRENT_SET_DEBUG_STATS rbimpl_atomic_fetch_add(&set->find_count, 1, RBIMPL_ATOMIC_RELAXED); rbimpl_atomic_fetch_add(&set->find_probe_total, probe.d, RBIMPL_ATOMIC_RELAXED); concurrent_set_atomic_max(&set->find_probe_max, probe.d); @@ -452,7 +461,7 @@ rb_concurrent_set_find(VALUE *set_obj_ptr, VALUE key) } if (!continuation) { -#if CONCURRENT_SET_DEBUG +#if CONCURRENT_SET_DEBUG_STATS rbimpl_atomic_fetch_add(&set->find_count, 1, RBIMPL_ATOMIC_RELAXED); rbimpl_atomic_fetch_add(&set->find_probe_total, probe.d, RBIMPL_ATOMIC_RELAXED); concurrent_set_atomic_max(&set->find_probe_max, probe.d); @@ -551,10 +560,35 @@ rb_concurrent_set_find_or_insert(VALUE *set_obj_ptr, VALUE key, void *data) VALUE prev_raw_key = rbimpl_atomic_value_cas(&entry->key, raw_key, key | (continuation ? CONCURRENT_SET_CONTINUATION_BIT : 0), RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_ACQUIRE); if (prev_raw_key == raw_key) { -#if CONCURRENT_SET_DEBUG +#if CONCURRENT_SET_DEBUG_STATS rbimpl_atomic_fetch_add(&set->insert_count, 1, RBIMPL_ATOMIC_RELAXED); rbimpl_atomic_fetch_add(&set->insert_probe_total, probe.d, RBIMPL_ATOMIC_RELAXED); concurrent_set_atomic_max(&set->insert_probe_max, probe.d); +#endif +#if CONCURRENT_SET_DEBUG_DUPLICATES + { + // Probe further to verify no duplicate of our key exists + struct concurrent_set_probe dup_probe = probe; + int dup_idx = concurrent_set_probe_next(&dup_probe); + int dup_idx_start = dup_idx; + while (true) { + struct concurrent_set_entry *dup_entry = &set->entries[dup_idx]; + VALUE dup_raw_key = rbimpl_atomic_value_load(&dup_entry->key, RBIMPL_ATOMIC_ACQUIRE); + VALUE dup_key = dup_raw_key & CONCURRENT_SET_KEY_MASK; + + if (dup_key == CONCURRENT_SET_EMPTY) break; + if (dup_key == CONCURRENT_SET_MOVED) break; + + if (dup_key >= CONCURRENT_SET_SPECIAL_VALUE_COUNT && dup_key == key) { + rb_bug("concurrent_set_find_or_insert: duplicate key %p found at index %d after inserting at index %d", + (void *)key, dup_idx, idx); + } + int next_dup_idx = concurrent_set_probe_next(&dup_probe); + if (dup_idx < dup_idx_start && next_dup_idx >= dup_idx_start) break; + if (next_dup_idx == dup_idx_start) break; + dup_idx = next_dup_idx; + } + } #endif RB_GC_GUARD(set_obj); return key; @@ -593,7 +627,7 @@ rb_concurrent_set_find_or_insert(VALUE *set_obj_ptr, VALUE key, void *data) if (set->funcs->cmp(key, curr_key)) { // We've found a live match. -#if CONCURRENT_SET_DEBUG +#if CONCURRENT_SET_DEBUG_STATS rbimpl_atomic_fetch_add(&set->insert_count, 1, RBIMPL_ATOMIC_RELAXED); rbimpl_atomic_fetch_add(&set->insert_probe_total, probe.d, RBIMPL_ATOMIC_RELAXED); concurrent_set_atomic_max(&set->insert_probe_max, probe.d); From e7a7070da8fa69990dcd9104cbbc580703a5ad97 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Sat, 28 Mar 2026 10:48:30 -0400 Subject: [PATCH 27/67] concurrent_set: change CAS memory order --- concurrent_set.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/concurrent_set.c b/concurrent_set.c index 215682fd1e0f47..6eb76d9b3647f2 100644 --- a/concurrent_set.c +++ b/concurrent_set.c @@ -519,7 +519,7 @@ rb_concurrent_set_find_or_insert(VALUE *set_obj_ptr, VALUE key, void *data) VALUE curr_hash = raw_hash & CONCURRENT_SET_HASH_MASK; if (raw_hash == 0) { // Reserve this slot for our hash value - raw_hash = rbimpl_atomic_value_cas(&entry->hash, 0, hash, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_ACQUIRE); + raw_hash = rbimpl_atomic_value_cas(&entry->hash, 0, hash, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_RELAXED); if (raw_hash != 0) { // Lost race, retry same slot to check winner's hash continue; @@ -537,7 +537,7 @@ rb_concurrent_set_find_or_insert(VALUE *set_obj_ptr, VALUE key, void *data) case CONCURRENT_SET_EMPTY: { if ((raw_hash & CONCURRENT_SET_HASH_RECLAIMABLE_BIT) && !continuation) { // Reclaim this reclaimable slot by clearing the reclaimable bit - VALUE prev_hash = rbimpl_atomic_value_cas(&entry->hash, raw_hash, hash, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_ACQUIRE); + VALUE prev_hash = rbimpl_atomic_value_cas(&entry->hash, raw_hash, hash, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_RELAXED); if (prev_hash != raw_hash) { // Lost race, retry same slot continue; @@ -558,7 +558,7 @@ rb_concurrent_set_find_or_insert(VALUE *set_obj_ptr, VALUE key, void *data) goto retry; } - VALUE prev_raw_key = rbimpl_atomic_value_cas(&entry->key, raw_key, key | (continuation ? CONCURRENT_SET_CONTINUATION_BIT : 0), RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_ACQUIRE); + VALUE prev_raw_key = rbimpl_atomic_value_cas(&entry->key, raw_key, key | (continuation ? CONCURRENT_SET_CONTINUATION_BIT : 0), RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_RELAXED); if (prev_raw_key == raw_key) { #if CONCURRENT_SET_DEBUG_STATS rbimpl_atomic_fetch_add(&set->insert_count, 1, RBIMPL_ATOMIC_RELAXED); @@ -715,7 +715,7 @@ rb_concurrent_set_delete_by_identity_locked(VALUE set_obj, VALUE key) if (!hash_cleared) { // Hashes only change here and they get reclaimed in find_or_insert - prev_hash = rbimpl_atomic_value_cas(&entry->hash, loaded_hash_raw, hash | CONCURRENT_SET_HASH_RECLAIMABLE_BIT, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_ACQUIRE); + prev_hash = rbimpl_atomic_value_cas(&entry->hash, loaded_hash_raw, hash | CONCURRENT_SET_HASH_RECLAIMABLE_BIT, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_RELAXED); RUBY_ASSERT(prev_hash == hash || prev_hash == (hash | CONCURRENT_SET_HASH_RECLAIMABLE_BIT)); hash_cleared = true; } From 01895435643d47e99870468b7cdf0e2ab6e6266b Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Sat, 28 Mar 2026 10:49:10 -0400 Subject: [PATCH 28/67] gc: change some atomic memory orderings --- gc/default/default.c | 74 +++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 45 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index b093c92ebf0137..39ee15a1c7a608 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -1594,7 +1594,7 @@ RVALUE_MARKED_ATOMIC(rb_objspace_t *objspace, VALUE obj) { bits_t *bits = GET_HEAP_MARK_BITS(obj); struct heap_page *page = GET_HEAP_PAGE(obj); - bits_t word = __atomic_load_n(&bits[SLOT_BITMAP_INDEX(page, obj)], __ATOMIC_SEQ_CST); + bits_t word = rbimpl_atomic_value_load((VALUE*)&bits[SLOT_BITMAP_INDEX(page, obj)], RBIMPL_ATOMIC_ACQUIRE); return (word & SLOT_BITMAP_BIT(page, obj)) != 0; } @@ -1946,7 +1946,7 @@ rb_gc_impl_garbage_object_p(void *objspace_ptr, VALUE ptr) bool dead = false; // Set to false/true by the ruby GC thread when entering/exiting GC, so shouldn't change throughout this call. - rb_atomic_t use_sweep_thread = RUBY_ATOMIC_LOAD(objspace->use_background_sweep_thread); + rb_atomic_t use_sweep_thread = rbimpl_atomic_load(&objspace->use_background_sweep_thread, RBIMPL_ATOMIC_RELAXED); if (!use_sweep_thread) { // It's not safe to read flags on an object if the sweep thread is running @@ -1971,13 +1971,12 @@ rb_gc_impl_garbage_object_p(void *objspace_ptr, VALUE ptr) if (!use_sweep_thread) { // The ruby GC thread or a user thread called us bool marked = RVALUE_MARKED(objspace, ptr); - GC_ASSERT(marked == RVALUE_MARKED_ATOMIC(objspace, ptr)); - return during_lazy_sweep && !marked && RUBY_ATOMIC_LOAD(page->before_sweep); + return during_lazy_sweep && !marked && rbimpl_atomic_load(&page->before_sweep, RBIMPL_ATOMIC_RELAXED); } else if (during_lazy_sweep) { // we're currently lazy sweeping with the sweep thread bool marked = RVALUE_MARKED_ATOMIC(objspace, ptr); // load it atomically so it can't be re-ordered past the next atomic load - bool before_sweep = RUBY_ATOMIC_LOAD(page->before_sweep); + rb_atomic_t before_sweep = rbimpl_atomic_load(&page->before_sweep, RBIMPL_ATOMIC_ACQUIRE); bool is_garbage = !marked && before_sweep; if (is_garbage) return true; if (marked && before_sweep) return false; @@ -4132,7 +4131,7 @@ gc_post_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *s GC_ASSERT(RUBY_ATOMIC_LOAD(sweep_page->before_sweep)); } #endif - RUBY_ATOMIC_SET(sweep_page->before_sweep, 0); + rbimpl_atomic_store(&sweep_page->before_sweep, 0, RBIMPL_ATOMIC_RELEASE); bits = sweep_page->mark_bits; @@ -4186,7 +4185,7 @@ gc_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct gc_sweep_context GC_ASSERT(RUBY_ATOMIC_LOAD(sweep_page->before_sweep)); } #endif - RUBY_ATOMIC_SET(sweep_page->before_sweep, 0); + rbimpl_atomic_store(&sweep_page->before_sweep, 0, RBIMPL_ATOMIC_RELEASE); sweep_page->free_slots = 0; p = (uintptr_t)sweep_page->start; @@ -4607,7 +4606,7 @@ gc_pre_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *pa static inline bool done_worker_incremental_sweep_steps_p(rb_objspace_t *objspace, rb_heap_t *heap) { - if (ATOMIC_LOAD_RELAXED(heap->foreground_sweep_steps) != heap->background_sweep_steps) { + if (rbimpl_atomic_load(&heap->foreground_sweep_steps, RBIMPL_ATOMIC_ACQUIRE) != heap->background_sweep_steps) { GC_ASSERT(ATOMIC_LOAD_RELAXED(heap->foreground_sweep_steps) > heap->background_sweep_steps); return true; } @@ -4942,7 +4941,7 @@ gc_sweep_start(rb_objspace_t *objspace) (objspace->profile.latest_gc_info & GPR_FLAG_METHOD) == 0 && !(objspace->hook_events & RUBY_INTERNAL_EVENT_FREEOBJ)) { - RUBY_ATOMIC_SET(objspace->use_background_sweep_thread, true); + rbimpl_atomic_store(&objspace->use_background_sweep_thread, true, RBIMPL_ATOMIC_RELEASE); psweep_debug(-1, "[gc] gc_sweep_start: requesting sweep thread\n"); sweep_lock_lock(&objspace->sweep_lock); { @@ -4952,7 +4951,7 @@ gc_sweep_start(rb_objspace_t *objspace) sweep_lock_unlock(&objspace->sweep_lock); } else { - RUBY_ATOMIC_SET(objspace->use_background_sweep_thread, false); + rbimpl_atomic_store(&objspace->use_background_sweep_thread, false, RBIMPL_ATOMIC_RELEASE); psweep_debug(-1, "[gc] gc_sweep_start: not using background sweep thread\n"); } } @@ -5011,7 +5010,7 @@ gc_sweep_finish(rb_objspace_t *objspace) gc_report(1, objspace, "gc_sweep_finish\n"); psweep_debug(-1, "[gc] gc_sweep_finish\n"); - RUBY_ATOMIC_SET(objspace->use_background_sweep_thread, false); + rbimpl_atomic_store(&objspace->use_background_sweep_thread, false, RBIMPL_ATOMIC_RELEASE); gc_prof_set_heap_info(objspace); heap_pages_free_unused_pages(objspace); @@ -5019,9 +5018,8 @@ gc_sweep_finish(rb_objspace_t *objspace) for (int i = 0; i < HEAP_COUNT; i++) { rb_heap_t *heap = &heaps[i]; -#ifdef DEBUG_SWEEP_BOOKKEEPING +#if VM_CHECK_MODE > 0 { - /* Assert that every page in this heap was swept. */ struct heap_page *page; ccan_list_for_each(&heap->pages, page, page_node) { if (RUBY_ATOMIC_LOAD(page->before_sweep)) { @@ -5177,10 +5175,10 @@ gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) #endif psweep_debug(-2, "[gc] gc_sweep_step heap:%p (%ld) use_sweep_thread:%d\n", heap, heap - heaps, objspace->use_background_sweep_thread); bool sweep_rest = objspace->sweep_rest; - bool use_bg_thread = objspace->use_background_sweep_thread; + bool use_sweep_thread = objspace->use_background_sweep_thread; while (1) { - bool free_in_user_thread_p = !use_bg_thread; + bool free_in_user_thread_p = !use_sweep_thread; bool dequeued_unswept_page = false; // NOTE: pages we dequeue from the sweep thread need to be AFTER the list of heap->free_pages so we don't free from pages // we've allocated from since sweep started. @@ -5219,39 +5217,25 @@ gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) if (deferred_to_free > 0) { uintptr_t p = (uintptr_t)sweep_page->start; bits_t *deferred_bits = sweep_page->deferred_free_bits; + int total_slots = sweep_page->total_slots; short slot_size = sweep_page->slot_size; - short slot_bits = slot_size / BASE_SLOT_SIZE; - bits_t slot_mask = heap->slot_bits_mask; - - int page_rvalue_count = sweep_page->total_slots * slot_bits; - int bitmap_plane_count = CEILDIV(NUM_IN_PAGE(p) + page_rvalue_count, BITS_BITLENGTH); - - // First plane: skip out-of-range slots at head of page - bits_t bitset = deferred_bits[0]; - bitset >>= NUM_IN_PAGE(p); - bitset &= slot_mask; - while (bitset) { - if (bitset & 1) { - VALUE obj = (VALUE)p; - GC_ASSERT(GET_HEAP_PAGE(obj) == sweep_page); - if (deferred_free(objspace, obj)) { - deferred_free_freed++; - } - else { - deferred_free_final_slots++; - } - } - p += slot_size; - bitset >>= slot_bits; + + int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH); + int out_of_range_bits = total_slots % BITS_BITLENGTH; + bits_t bitset; + + if (out_of_range_bits != 0) { + deferred_bits[bitmap_plane_count - 1] &= (((bits_t)1 << out_of_range_bits) - 1); } - p = (uintptr_t)sweep_page->start + (BITS_BITLENGTH - NUM_IN_PAGE((uintptr_t)sweep_page->start)) * BASE_SLOT_SIZE; - for (int i = 1; i < bitmap_plane_count; i++) { - bitset = deferred_bits[i] & slot_mask; + for (int i = 0; i < bitmap_plane_count; i++) { + bitset = deferred_bits[i]; + p = (uintptr_t)sweep_page->start + (i * BITS_BITLENGTH * slot_size); while (bitset) { if (bitset & 1) { VALUE obj = (VALUE)p; GC_ASSERT(GET_HEAP_PAGE(obj) == sweep_page); + GC_ASSERT(!RVALUE_MARKED(objspace, obj)); if (deferred_free(objspace, obj)) { deferred_free_freed++; } @@ -5260,11 +5244,11 @@ gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) } } p += slot_size; - bitset >>= slot_bits; + bitset >>= 1; } - p = (uintptr_t)sweep_page->start + (BITS_BITLENGTH * (i + 1) - NUM_IN_PAGE((uintptr_t)sweep_page->start)) * BASE_SLOT_SIZE; } } + GC_ASSERT(deferred_to_free == (deferred_free_freed + deferred_free_final_slots)); ctx.final_slots = sweep_page->pre_final_slots + deferred_free_final_slots; ctx.freed_slots = sweep_page->pre_freed_slots + deferred_free_freed; @@ -5369,8 +5353,8 @@ gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) heap_add_freepage(heap, sweep_page, "gc_sweep_step"); swept_slots += free_slots; if (swept_slots > GC_INCREMENTAL_SWEEP_SLOT_COUNT) { - if (!sweep_rest && use_bg_thread) { - RUBY_ATOMIC_INC(heap->foreground_sweep_steps); // signal sweep thread to move on + if (!sweep_rest && use_sweep_thread) { + rbimpl_atomic_inc(&heap->foreground_sweep_steps, RBIMPL_ATOMIC_RELEASE); // signal sweep thread to move on } psweep_debug(0, "[gc] gc_sweep_step got to SWEEP_SLOT_COUNT, break\n"); break; From f7ff576467f6e5402ce4f985c188f2a72d3c4dda Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Mon, 30 Mar 2026 15:32:06 -0400 Subject: [PATCH 29/67] assert all unmarked slots are freed if RGENGC_CHECK_MODE > 0 Make popcount_bits work for all platforms --- gc/default/default.c | 40 +++++++++++----------------------------- 1 file changed, 11 insertions(+), 29 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index 39ee15a1c7a608..c55d52dfbaf9e6 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -947,14 +947,10 @@ slot_index_for_offset(size_t offset, uint32_t div_magic) return (size_t)(((uint64_t)offset * div_magic) >> 32); } -static inline int +static inline unsigned popcount_bits(bits_t x) { -#if SIZEOF_VOIDP == 8 - return __builtin_popcountl(x); -#else - return __builtin_popcount(x); -#endif + return rb_popcount_intptr((uintptr_t)x); } #define SLOT_INDEX(page, p) slot_index_for_offset((uintptr_t)(p) - (page)->start, (page)->slot_div_magic) @@ -4219,21 +4215,13 @@ gc_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct gc_sweep_context } } -#ifdef DEBUG_SWEEP_BOOKKEEPING +#if RGENGC_CHECK_MODE { - /* Assert that all unmarked slots with live objects were either freed or made into zombies. - * Count unmarked slot-aligned bits the same way the sweep loop does. */ + /* Assert that all unmarked slots with live objects were either freed or made into zombies. */ int unmarked_slots = 0; - uintptr_t vp = (uintptr_t)sweep_page->start; - - bits_t bs = ~bits[0]; - bs >>= NUM_IN_PAGE(vp); - bs &= slot_mask; - unmarked_slots += popcount_bits(bs); - - for (int i = 1; i < bitmap_plane_count; i++) { - bs = ~bits[i] & slot_mask; - unmarked_slots += popcount_bits(bs); + for (int i = 0; i < bitmap_plane_count; i++) { + bits_t unmarked = ~bits[i]; + unmarked_slots += (int)popcount_bits(unmarked); } int freed_or_zombie = ctx->freed_slots + ctx->final_slots; @@ -4569,20 +4557,14 @@ gc_pre_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *pa objspace->profile.pages_swept_by_sweep_thread_had_deferred_free_objects++; } -#ifdef DEBUG_SWEEP_BOOKKEEPING +#if RGENGC_CHECK_MODE { /* Assert that all unmarked slots with live objects were either freed, made into * zombies, or deferred to the Ruby thread. */ int unmarked_slots = 0; - - bits_t bs = ~bits[0]; - bs >>= NUM_IN_PAGE((uintptr_t)page->start); - bs &= slot_mask; - unmarked_slots += popcount_bits(bs); - - for (int i = 1; i < bitmap_plane_count; i++) { - bs = ~bits[i] & slot_mask; - unmarked_slots += popcount_bits(bs); + for (int i = 0; i < bitmap_plane_count; i++) { + bits_t unmarked = ~bits[i]; + unmarked_slots += (int)popcount_bits(unmarked); } int freed_or_zombie = page->pre_freed_slots + page->pre_final_slots + page->pre_deferred_free_slots; From 5d496a7ddc5fcce6af9cc2692f83f8600971d122 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Tue, 31 Mar 2026 08:48:18 -0400 Subject: [PATCH 30/67] Remove a bitfield that shouldn't be there --- gc/default/default.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index c55d52dfbaf9e6..dfc485c6a04a84 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -550,9 +550,7 @@ typedef struct rb_objspace { unsigned int during_compacting : 1; unsigned int during_reference_updating : 1; unsigned int gc_stressful: 1; - unsigned int has_newobj_hook: 1; unsigned int during_incremental_marking : 1; - unsigned int measure_gc : 1; } flags; // This can't be a bitfield because it's accessed in garbage_object_p() from the sweep thread From 603ac4284bf58673373d21b0afc8336bcc80d659 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Tue, 31 Mar 2026 09:09:55 -0400 Subject: [PATCH 31/67] parallel sweep: imemo_callcache handled by weakref cleanup --- gc.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/gc.c b/gc.c index 2f0171d04761e2..247f27ffc5419e 100644 --- a/gc.c +++ b/gc.c @@ -2425,10 +2425,6 @@ rb_gc_obj_has_blacklisted_vm_weak_references(VALUE obj) switch (BUILTIN_TYPE(obj)) { case T_IMEMO: switch (imemo_type(obj)) { - case imemo_callcache: { - const struct rb_callcache *cc = (const struct rb_callcache *)obj; - return vm_cc_refinement_p(cc); - } case imemo_callinfo: case imemo_ment: return true; From 0c3bb5f84e975e51917f35d9e9235a81cb7e932d Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Tue, 31 Mar 2026 09:40:39 -0400 Subject: [PATCH 32/67] Parallel sweep: add more fiber pool lock assertions to cont.c --- cont.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/cont.c b/cont.c index bb3bc9ab106f81..4ada1ba00cef36 100644 --- a/cont.c +++ b/cont.c @@ -298,16 +298,28 @@ rb_free_shared_fiber_pool(void) static ID fiber_initialize_keywords[3] = {0}; +// We don't use the VM lock to protect the shared fiber pool because the sweep +// thread needs to be able to free fibers and it can't take the VM lock. rb_nativethread_lock_t fiber_lock; #ifdef RUBY_THREAD_PTHREAD_H pthread_t fiber_pool_lock_owner; #endif +MAYBE_UNUSED(static inline bool +fiber_pool_locked_p(bool fallback)) +{ +#ifdef RUBY_THREAD_PTHREAD_H + return pthread_self() == fiber_pool_lock_owner; +#else + return fallback; +#endif +} + static inline void ASSERT_fiber_pool_locked(void) { #ifdef RUBY_THREAD_PTHREAD_H - VM_ASSERT(pthread_self() == fiber_pool_lock_owner); + VM_ASSERT(fiber_pool_locked_p(true)); #endif } @@ -315,7 +327,7 @@ static inline void ASSERT_fiber_pool_unlocked(void) { #ifdef RUBY_THREAD_PTHREAD_H - VM_ASSERT(pthread_self() != fiber_pool_lock_owner); + VM_ASSERT(!fiber_pool_locked_p(false)); #endif } @@ -439,6 +451,7 @@ fiber_pool_vacancy_reset(struct fiber_pool_vacancy * vacancy) inline static struct fiber_pool_vacancy * fiber_pool_vacancy_push(struct fiber_pool_vacancy * vacancy, struct fiber_pool_vacancy * head) { + ASSERT_fiber_pool_locked(); vacancy->next = head; #ifdef FIBER_POOL_ALLOCATION_FREE @@ -471,7 +484,7 @@ fiber_pool_vacancy_remove(struct fiber_pool_vacancy * vacancy) inline static struct fiber_pool_vacancy * fiber_pool_vacancy_pop(struct fiber_pool * pool) { - // fiber_pool_lock is acquired + ASSERT_fiber_pool_locked(); struct fiber_pool_vacancy * vacancy = pool->vacancies; if (vacancy) { @@ -484,7 +497,7 @@ fiber_pool_vacancy_pop(struct fiber_pool * pool) inline static struct fiber_pool_vacancy * fiber_pool_vacancy_pop(struct fiber_pool * pool) { - // fiber_pool_lock is acquired + ASSERT_fiber_pool_locked(); struct fiber_pool_vacancy * vacancy = pool->vacancies; if (vacancy) { @@ -583,7 +596,7 @@ fiber_pool_expand(struct fiber_pool * fiber_pool, size_t count, bool needs_lock, // must not run after base is mapped, or the region would leak. struct fiber_pool_allocation * allocation = RB_ALLOC(struct fiber_pool_allocation); - if (needs_lock) fiber_pool_lock(); + if (needs_lock) fiber_pool_lock(); // no xmalloc allocations can occur with this lock held { STACK_GROW_DIR_DETECTION; @@ -702,9 +715,11 @@ fiber_pool_expand(struct fiber_pool * fiber_pool, size_t count, bool needs_lock, static struct fiber_pool_vacancy * fiber_pool_expand_and_pop(struct fiber_pool * fiber_pool, size_t count, bool needs_lock, bool unlock_before_raise) { - struct fiber_pool_vacancy *vacancy_out; + RUBY_ASSERT(needs_lock || (!needs_lock && fiber_pool_locked_p(true))); + struct fiber_pool_vacancy *vacancy_out = NULL; struct fiber_pool_allocation *allocation = fiber_pool_expand(fiber_pool, count, needs_lock, unlock_before_raise, &vacancy_out); if (allocation) { + RUBY_ASSERT(vacancy_out); return vacancy_out; } else { @@ -731,7 +746,7 @@ fiber_pool_initialize(struct fiber_pool * fiber_pool, size_t size, size_t minimu fiber_pool->vm_stack_size = vm_stack_size; if (fiber_pool->minimum_count > 0) { - if (RB_UNLIKELY(!fiber_pool_expand(fiber_pool, fiber_pool->minimum_count, false, false, NULL))) { + if (RB_UNLIKELY(!fiber_pool_expand(fiber_pool, fiber_pool->minimum_count, true, true, NULL))) { rb_raise(rb_eFiberError, "can't allocate initial fiber stacks (%"PRIuSIZE" x %"PRIuSIZE" bytes): %s", fiber_pool->minimum_count, fiber_pool->size, strerror(errno)); } } @@ -786,6 +801,7 @@ fiber_pool_allocation_free(struct fiber_pool_allocation * allocation) static size_t fiber_pool_stack_expand_count(const struct fiber_pool *pool) { + ASSERT_fiber_pool_locked(); const size_t maximum_allocations = FIBER_POOL_MAXIMUM_ALLOCATIONS; const size_t minimum_count = FIBER_POOL_MINIMUM_COUNT; @@ -964,6 +980,7 @@ fiber_pool_stack_free(struct fiber_pool_stack * stack) static void fiber_pool_stack_release(struct fiber_pool_stack * stack) { + ASSERT_fiber_pool_locked(); struct fiber_pool * pool = stack->pool; struct fiber_pool_vacancy * vacancy = fiber_pool_vacancy_pointer(stack->base, stack->size); From 504a426a1bebefeb62aefdd8bc5fea59b7fa34bd Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Tue, 31 Mar 2026 13:56:49 -0400 Subject: [PATCH 33/67] Fix RGENGC_CHECK_MODE without VM_CHECK_MODE --- gc/default/default.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index dfc485c6a04a84..e305355fe92e67 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -1206,8 +1206,8 @@ sweep_lock_lock_impl(rb_nativethread_lock_t *sweep_lock, const char *function, i static inline void sweep_lock_unlock(rb_nativethread_lock_t *sweep_lock) { - GC_ASSERT(sweep_lock_owner == pthread_self()); #if VM_CHECK_MODE > 0 + GC_ASSERT(sweep_lock_owner == pthread_self()); sweep_lock_owner = 0; #endif rb_native_mutex_unlock(sweep_lock); @@ -1225,8 +1225,8 @@ sweep_lock_set_locked(void) static inline void sweep_lock_set_unlocked(void) { - GC_ASSERT(sweep_lock_owner == pthread_self()); #if VM_CHECK_MODE > 0 + GC_ASSERT(sweep_lock_owner == pthread_self()); sweep_lock_owner = 0; #endif } @@ -1640,6 +1640,10 @@ check_rvalue_consistency_force(rb_objspace_t *objspace, const VALUE obj, int ter { int err = 0; + + rb_execution_context_t *ec = rb_current_execution_context(false); + if (!ec) return 0; // sweep thread + int lev = RB_GC_VM_LOCK_NO_BARRIER(); { if (SPECIAL_CONST_P(obj)) { @@ -4349,8 +4353,8 @@ static void debug_free_check(rb_objspace_t *objspace, VALUE vp) { if (!is_full_marking(objspace)) { - if (RVALUE_OLD_P(objspace, vp)) rb_bug("page_sweep: %p - old while minor GC.", (void *)p); - if (RVALUE_REMEMBERED(objspace, vp)) rb_bug("page_sweep: %p - remembered.", (void *)p); + if (RVALUE_OLD_P(objspace, vp)) rb_bug("page_sweep: %p - old while minor GC.", (void *)vp); + if (RVALUE_REMEMBERED(objspace, vp)) rb_bug("page_sweep: %p - remembered.", (void *)vp); } #define CHECK(x) if (x(objspace, vp) != FALSE) rb_bug("obj_free: " #x "(%s) != FALSE", rb_obj_info(vp)) CHECK(RVALUE_MARKED); @@ -4433,6 +4437,7 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p break; } switch (imemo_type(vp)) { + case imemo_callcache: case imemo_constcache: case imemo_cref: case imemo_env: From 6f6a1d9cd6aa5bd9dd781d48adb5e5c79608879e Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Tue, 31 Mar 2026 14:27:52 -0400 Subject: [PATCH 34/67] Remove commented out function in darray.h --- darray.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/darray.h b/darray.h index c65c01df7355ff..1c2485b935ed1b 100644 --- a/darray.h +++ b/darray.h @@ -48,10 +48,6 @@ #define rb_darray_append_without_gc(ptr_to_ary, element) \ rb_darray_append_impl(ptr_to_ary, element, rb_darray_realloc_mul_add_without_gc) -//#define rb_darray_clear_and_free_without_gc(ptr_to_ary) \ - //rb_darray_size(ptr_to_ary) ? (rb_darray_free_without_gc(ptr_to_ary)) : (void)0 - - #define rb_darray_append_impl(ptr_to_ary, element, realloc_func) do { \ rb_darray_ensure_space((ptr_to_ary), \ sizeof(**(ptr_to_ary)), \ From 947a598062b18c8a2706ec5920861554ee82c7a2 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Tue, 31 Mar 2026 15:22:34 -0400 Subject: [PATCH 35/67] parallel sweep: fix for id2ref_tbl being managed table --- gc.c | 70 +++++++++++++++++++++++++++--------------------------------- 1 file changed, 31 insertions(+), 39 deletions(-) diff --git a/gc.c b/gc.c index 247f27ffc5419e..22365e04e956c0 100644 --- a/gc.c +++ b/gc.c @@ -2141,13 +2141,9 @@ id2ref_tbl_unlock(void) static void id2ref_tbl_free(void *data) { - id2ref_tbl_lock(true); - { - RUBY_ATOMIC_PTR_SET(id2ref_tbl, NULL); // clear global ref - st_table *table = (st_table *)data; - st_free_table(table); - } - id2ref_tbl_unlock(); + st_table *table = (st_table *)data; + st_free_table(table); + RUBY_ATOMIC_PTR_SET(id2ref_tbl, NULL); // clear global ref } static const rb_data_type_t id2ref_tbl_type = { @@ -2159,7 +2155,7 @@ static const rb_data_type_t id2ref_tbl_type = { // dcompact function not required because the table is reference updated // in rb_gc_vm_weak_table_foreach }, - .flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_CONCURRENT_FREE_SAFE + .flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY }; static VALUE @@ -2270,36 +2266,26 @@ build_id2ref_i(VALUE obj, void *data) { st_table *id2ref_tbl = (st_table *)data; + if (rb_objspace_garbage_object_p(obj)) return; + switch (BUILTIN_TYPE(obj)) { case T_CLASS: case T_MODULE: RUBY_ASSERT(!rb_objspace_garbage_object_p(obj)); if (RCLASS(obj)->object_id) { - id2ref_tbl_lock(false); - { - st_insert(id2ref_tbl, RCLASS(obj)->object_id, obj); - } - id2ref_tbl_unlock(); + st_insert(id2ref_tbl, RCLASS(obj)->object_id, obj); } break; case T_IMEMO: RUBY_ASSERT(!rb_objspace_garbage_object_p(obj)); if (IMEMO_TYPE_P(obj, imemo_fields) && rb_shape_obj_has_id(obj)) { - id2ref_tbl_lock(false); - { - st_insert(id2ref_tbl, rb_obj_id(obj), rb_imemo_fields_owner(obj)); - } - id2ref_tbl_unlock(); + st_insert(id2ref_tbl, rb_obj_id(obj), rb_imemo_fields_owner(obj)); } break; case T_OBJECT: RUBY_ASSERT(!rb_objspace_garbage_object_p(obj)); if (rb_shape_obj_has_id(obj)) { - id2ref_tbl_lock(false); - { - st_insert(id2ref_tbl, rb_obj_id(obj), obj); - } - id2ref_tbl_unlock(); + st_insert(id2ref_tbl, rb_obj_id(obj), obj); } break; default: @@ -2317,24 +2303,30 @@ object_id_to_ref(void *objspace_ptr, VALUE object_id) if (!RUBY_ATOMIC_PTR_LOAD(id2ref_tbl)) { rb_gc_vm_barrier(); // stop other ractors, background sweeper could still be running + if (!RUBY_ATOMIC_PTR_LOAD(id2ref_tbl)) { + + // GC Must not trigger while we build the table, otherwise if we end + // up freeing an object that had an ID, we might try to delete it from + // the table even though it wasn't inserted yet. + st_table *tmp_id2ref_tbl = st_init_table(&object_id_hash_type); + VALUE tmp_id2ref_value = TypedData_Wrap_Struct(0, &id2ref_tbl_type, tmp_id2ref_tbl); + + // build_id2ref_i will most certainly malloc, which could trigger GC and sweep + // objects we just added to the table. + // By calling rb_gc_disable() we also save having to handle potentially garbage objects. + bool gc_disabled = RTEST(rb_gc_disable()); + { + id2ref_value = tmp_id2ref_value; - // GC Must not trigger while we build the table, otherwise if we end - // up freeing an object that had an ID, we might try to delete it from - // the table even though it wasn't inserted yet. - st_table *tmp_id2ref_tbl = st_init_table(&object_id_hash_type); - VALUE tmp_id2ref_value = TypedData_Wrap_Struct(0, &id2ref_tbl_type, tmp_id2ref_tbl); - - // build_id2ref_i will most certainly malloc, which could trigger GC and sweep - // objects we just added to the table. - // By calling rb_gc_disable() we also save having to handle potentially garbage objects. - bool gc_disabled = RTEST(rb_gc_disable()); - { - id2ref_value = tmp_id2ref_value; - - rb_gc_impl_each_object(objspace, build_id2ref_i, (void *)tmp_id2ref_tbl); - RUBY_ATOMIC_PTR_SET(id2ref_tbl, tmp_id2ref_tbl); + id2ref_tbl_lock(false); + { + rb_gc_impl_each_object(objspace, build_id2ref_i, (void *)tmp_id2ref_tbl); + } + id2ref_tbl_unlock(); + RUBY_ATOMIC_PTR_SET(id2ref_tbl, tmp_id2ref_tbl); + } + if (!gc_disabled) rb_gc_enable(); } - if (!gc_disabled) rb_gc_enable(); } VALUE obj; From 19c82be6293836cb73345811ede9bfeb95f2c4fe Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Tue, 31 Mar 2026 16:16:41 -0400 Subject: [PATCH 36/67] Parallel sweep: fix rb_gc_obj_needs_cleanup_p() for T_ZOMBIE --- gc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/gc.c b/gc.c index 22365e04e956c0..7c7349c6c1d203 100644 --- a/gc.c +++ b/gc.c @@ -1402,6 +1402,10 @@ rb_gc_obj_needs_cleanup_p(VALUE obj) case T_COMPLEX: return rb_shape_has_fields(shape_id); + case T_ZOMBIE: + RUBY_ASSERT(flags & FL_FREEZE); + return true; + default: UNREACHABLE_RETURN(true); } From 82933ebdbc28659388f4d932b84484e8a31046e8 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Wed, 1 Apr 2026 10:19:35 -0400 Subject: [PATCH 37/67] gc: Add more assertions --- gc.c | 2 +- gc/default/default.c | 21 ++++++++++++++++----- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/gc.c b/gc.c index 7c7349c6c1d203..15f95455237f2a 100644 --- a/gc.c +++ b/gc.c @@ -1407,7 +1407,7 @@ rb_gc_obj_needs_cleanup_p(VALUE obj) return true; default: - UNREACHABLE_RETURN(true); + rb_bug("bad object type in needs_cleanup_p: %lu", flags & RUBY_T_MASK); } } diff --git a/gc/default/default.c b/gc/default/default.c index e305355fe92e67..a44159790d91ea 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -1683,7 +1683,7 @@ check_rvalue_consistency_force(rb_objspace_t *objspace, const VALUE obj, int ter fprintf(stderr, "check_rvalue_consistency: %s is T_NONE.\n", rb_obj_info(obj)); err++; } - if (BUILTIN_TYPE(obj) == T_ZOMBIE) { + if (BUILTIN_TYPE(obj) == T_ZOMBIE && !FL_TEST(obj, FL_FREEZE)) { fprintf(stderr, "check_rvalue_consistency: %s is T_ZOMBIE.\n", rb_obj_info(obj)); err++; } @@ -3977,7 +3977,7 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit ctx->empty_slots++; heap_page_add_freeobj(objspace, sweep_page, vp); break; - case T_ZOMBIE: // FIXME: no more zombies? + case T_ZOMBIE: if (ZOMBIE_NEEDS_FREE_P(vp)) { goto free_object; } @@ -3993,8 +3993,8 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit psweep_debug(0, "[gc] gc_sweep_plane: heap:%p (%ld) freeing obj:%p (%s)\n", heap, heap - heaps, (void*)vp, rb_obj_info(vp)); #if RGENGC_CHECK_MODE if (!is_full_marking(objspace)) { - if (RVALUE_OLD_P(objspace, vp)) rb_bug("page_sweep: %p - old while minor GC.", (void *)p); - if (RVALUE_REMEMBERED(objspace, vp)) rb_bug("page_sweep: %p - remembered.", (void *)p); + if (RVALUE_OLD_P(objspace, vp)) rb_bug("page_sweep: %p - old while minor GC.", (void *)vp); + if (RVALUE_REMEMBERED(objspace, vp)) rb_bug("page_sweep: %p - remembered.", (void *)vp); } #endif @@ -4021,7 +4021,7 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit ctx->freed_slots++; } else { - gc_report(2, objspace, "page_sweep: free %p\n", (void *)p); + gc_report(2, objspace, "page_sweep: free %p\n", (void *)vp); rb_gc_event_hook(vp, RUBY_INTERNAL_EVENT_FREEOBJ); @@ -4040,6 +4040,9 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit break; } } + else { + GC_ASSERT(RVALUE_MARKED(objspace, vp)); + } p += slot_size; bitset >>= 1; } while (bitset); @@ -4107,6 +4110,11 @@ deferred_free(rb_objspace_t *objspace, VALUE obj) result = true; } else { +#if RUBY_DEBUG + if (!(BUILTIN_TYPE(obj) == T_ZOMBIE && !FL_TEST(obj, FL_FREEZE))) { + rb_bug("should be unfreeable zombie"); + } +#endif result = false; MAYBE_UNUSED(struct heap_page *page) = GET_HEAP_PAGE(obj); psweep_debug(1, "[gc] deferred sweep: page(%p) obj(%p) %s (zombie)\n", page, (void*)obj, obj_info); @@ -4514,6 +4522,9 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p } } } + else { + GC_ASSERT(RVALUE_MARKED(objspace, vp)); + } p += slot_size; bitset >>= 1; From a43e9829d76f7bf7d673e4a1a4684897b647e6e5 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Wed, 1 Apr 2026 11:22:45 -0400 Subject: [PATCH 38/67] gc: remove comments and change VM_CHECK_MODE --- gc/default/default.c | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index a44159790d91ea..71c211467c9642 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -116,7 +116,6 @@ #define GC_HEAP_INIT_BYTES (2560 * 1024) #endif -/*#define PSWEEP_DEBUG -6*/ #if defined(PSWEEP_DEBUG) #define psweep_debug(lvl, ...) if (lvl <= PSWEEP_DEBUG) fprintf(stderr, __VA_ARGS__) #else @@ -3431,7 +3430,7 @@ gc_abort(void *objspace_ptr) objspace->flags.during_incremental_marking = FALSE; } -#if VM_CHECK_MODE > 0 +#if RUBY_DEBUG sweep_lock_lock(&objspace->sweep_lock); GC_ASSERT(!objspace->sweep_rest); sweep_lock_unlock(&objspace->sweep_lock); @@ -4095,7 +4094,7 @@ deferred_free(rb_objspace_t *objspace, VALUE obj) { ASSERT_vm_locking_with_barrier(); bool result; -#if VM_CHECK_MODE > 0 +#ifdef PSWEEP_DEBUG MAYBE_UNUSED(const char *obj_info) = rb_obj_info(obj); #endif bool freed_weakrefs = rb_gc_obj_free_vm_weak_references(obj); @@ -4387,6 +4386,7 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p rb_asan_unpoison_object(vp, false); if (bitset & 1) { + GC_ASSERT(!RVALUE_MARKED(objspace, vp)); switch (BUILTIN_TYPE(vp)) { case T_MOVED: { empties++; @@ -4395,7 +4395,6 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p break; } case T_NONE: - /*psweep_debug("[sweep] empty: page(%p), obj(%p)\n", (void*)page, (void*)vp);*/ empties++; // already in freelist break; case T_ZOMBIE: @@ -5014,7 +5013,7 @@ gc_sweep_finish(rb_objspace_t *objspace) for (int i = 0; i < HEAP_COUNT; i++) { rb_heap_t *heap = &heaps[i]; -#if VM_CHECK_MODE > 0 +#if RUBY_DEBUG { struct heap_page *page; ccan_list_for_each(&heap->pages, page, page_node) { @@ -5153,7 +5152,7 @@ gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) size_t swept_slots = 0; size_t pooled_slots = 0; -#if VM_CHECK_MODE > 0 +#if RUBY_DEBUG sweep_lock_lock(&objspace->sweep_lock); GC_ASSERT(!objspace->background_sweep_mode); sweep_lock_unlock(&objspace->sweep_lock); @@ -5412,7 +5411,6 @@ gc_sweep_rest(rb_objspace_t *objspace) } sweep_lock_unlock(&objspace->sweep_lock); - // We go backwards because the sweep thread goes forwards, and we want to avoid lock contention for (int i = 0; i < HEAP_COUNT; i++) { rb_heap_t *heap = &heaps[i]; @@ -5424,16 +5422,6 @@ gc_sweep_rest(rb_objspace_t *objspace) heap->background_sweep_steps = heap->foreground_sweep_steps; } - /*for (int i = 0; i < HEAP_COUNT; i++) {*/ - /*rb_heap_t *heap = &heaps[i];*/ - - /*while (!heap_is_sweep_done(objspace, heap)) {*/ - /*psweep_debug(0, "[gc] gc_sweep_rest: gc_sweep_step heap:%p (heap %ld)\n", heap, heap - heaps);*/ - /*gc_sweep_step(objspace, heap, false);*/ - /*}*/ - /*GC_ASSERT(heap->is_finished_sweeping);*/ - /*heap->background_sweep_steps = heap->foreground_sweep_steps;*/ - /*}*/ GC_ASSERT(!has_sweeping_pages(objspace)); GC_ASSERT(gc_mode(objspace) == gc_mode_none); } @@ -5623,7 +5611,7 @@ gc_compact_start(rb_objspace_t *objspace) struct heap_page *page = NULL; gc_mode_transition(objspace, gc_mode_compacting); -#if VM_CHECK_MODE > 0 +#if RUBY_DEBUG sweep_lock_lock(&objspace->sweep_lock); GC_ASSERT(!objspace->sweep_thread_sweeping && !objspace->sweep_thread_sweep_requested); sweep_lock_unlock(&objspace->sweep_lock); @@ -8454,7 +8442,7 @@ rb_gc_impl_start(void *objspace_ptr, bool full_mark, bool immediate_mark, bool i } garbage_collect(objspace, reason); -#if VM_CHECK_MODE > 0 +#if RUBY_DEBUG if (immediate_sweep) { sweep_lock_lock(&objspace->sweep_lock); { From d50cb3312c17609a1a59db9f164e3ac8580d14a0 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Wed, 1 Apr 2026 14:04:32 -0400 Subject: [PATCH 39/67] Debugging issues after rebase --- gc/default/default.c | 73 ++++++++++++++++++++++++++++++++------------ 1 file changed, 53 insertions(+), 20 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index 71c211467c9642..a29e1dbbb12cef 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -495,6 +495,8 @@ typedef struct rb_heap_struct { struct heap_page *pooled_pages; size_t total_pages; /* total page count in a heap */ size_t total_slots; /* total slot count */ + unsigned short made_zombies; + unsigned short freed_zombies; rb_atomic_t foreground_sweep_steps; // incremented by ruby thread, checked by sweep thread rb_atomic_t background_sweep_steps; // only incremented/checked by sweep thread @@ -543,9 +545,7 @@ typedef struct rb_objspace { struct { unsigned int mode : 2; unsigned int immediate_sweep : 1; - unsigned int dont_gc : 1; unsigned int dont_incremental : 1; - unsigned int during_gc : 1; unsigned int during_compacting : 1; unsigned int during_reference_updating : 1; unsigned int gc_stressful: 1; @@ -557,6 +557,10 @@ typedef struct rb_objspace { bool during_lazy_sweeping; // This one too, it's accessed in debug_free_check bool during_minor_gc; + bool during_gc; + bool dont_gc; + size_t will_be_swept_slots; + size_t have_swept_slots; rb_event_flag_t hook_events; @@ -1017,7 +1021,7 @@ RVALUE_AGE_SET(VALUE obj, int age) #define heap_pages_freeable_pages objspace->heap_pages.freeable_pages #define heap_pages_deferred_final objspace->heap_pages.deferred_final #define heaps objspace->heaps -#define during_gc objspace->flags.during_gc +#define during_gc objspace->during_gc #define finalizing objspace->atomic_flags.finalizing #define finalizer_table objspace->finalizer_table #define ruby_gc_stressful objspace->flags.gc_stressful @@ -1031,15 +1035,15 @@ RVALUE_AGE_SET(VALUE obj, int age) #endif #if 0 -#define dont_gc_on() (fprintf(stderr, "dont_gc_on@%s:%d\n", __FILE__, __LINE__), objspace->flags.dont_gc = 1) -#define dont_gc_off() (fprintf(stderr, "dont_gc_off@%s:%d\n", __FILE__, __LINE__), objspace->flags.dont_gc = 0) -#define dont_gc_set(b) (fprintf(stderr, "dont_gc_set(%d)@%s:%d\n", __FILE__, __LINE__), objspace->flags.dont_gc = (int)(b)) -#define dont_gc_val() (objspace->flags.dont_gc) +#define dont_gc_on() (fprintf(stderr, "dont_gc_on@%s:%d\n", __FILE__, __LINE__), objspace->dont_gc = 1) +#define dont_gc_off() (fprintf(stderr, "dont_gc_off@%s:%d\n", __FILE__, __LINE__), objspace->dont_gc = 0) +#define dont_gc_set(b) (fprintf(stderr, "dont_gc_set(%d)@%s:%d\n", __FILE__, __LINE__), objspace->dont_gc = (int)(b)) +#define dont_gc_val() (objspace->dont_gc) #else -#define dont_gc_on() (objspace->flags.dont_gc = 1) -#define dont_gc_off() (objspace->flags.dont_gc = 0) -#define dont_gc_set(b) (objspace->flags.dont_gc = (int)(b)) -#define dont_gc_val() (objspace->flags.dont_gc) +#define dont_gc_on() (objspace->dont_gc = 1) +#define dont_gc_off() (objspace->dont_gc = 0) +#define dont_gc_set(b) (objspace->dont_gc = (bool)(b)) +#define dont_gc_val() (objspace->dont_gc) #endif #define gc_config_full_mark_set(b) (objspace->gc_config.full_mark = (int)(b)) @@ -1934,6 +1938,12 @@ rb_gc_impl_get_measure_total_time(void *objspace_ptr) return objspace->flags.measure_gc; } +#define ZOMBIE_OBJ_KEPT_FLAGS (FL_FINALIZE) +// Zombie needs to be put back on the freelist later (during GC) and finalizer has ran +#define ZOMBIE_NEEDS_FREE_FLAG (FL_FREEZE) +#define ZOMBIE_NEEDS_FREE_P(zombie) (FL_TEST(zombie, ZOMBIE_NEEDS_FREE_FLAG)) +#define ZOMBIE_SET_NEEDS_FREE_FLAG(zombie) (FL_SET(zombie, ZOMBIE_NEEDS_FREE_FLAG)) + /* garbage objects will be collected soon. */ bool rb_gc_impl_garbage_object_p(void *objspace_ptr, VALUE ptr) @@ -1951,9 +1961,11 @@ rb_gc_impl_garbage_object_p(void *objspace_ptr, VALUE ptr) switch (BUILTIN_TYPE(ptr)) { case T_NONE: case T_MOVED: - case T_ZOMBIE: dead = true; break; + case T_ZOMBIE: + dead = ZOMBIE_NEEDS_FREE_P(ptr); + break; default: break; } @@ -1978,10 +1990,10 @@ rb_gc_impl_garbage_object_p(void *objspace_ptr, VALUE ptr) if (is_garbage) return true; if (marked && before_sweep) return false; // already swept page, just check flags - return BUILTIN_TYPE(ptr) == T_NONE || BUILTIN_TYPE(ptr) == T_MOVED || BUILTIN_TYPE(ptr) == T_ZOMBIE; + return BUILTIN_TYPE(ptr) == T_NONE || BUILTIN_TYPE(ptr) == T_MOVED || (BUILTIN_TYPE(ptr) == T_ZOMBIE && ZOMBIE_NEEDS_FREE_P(ptr)); } else { - return BUILTIN_TYPE(ptr) == T_NONE || BUILTIN_TYPE(ptr) == T_MOVED || BUILTIN_TYPE(ptr) == T_ZOMBIE; + return BUILTIN_TYPE(ptr) == T_NONE || BUILTIN_TYPE(ptr) == T_MOVED || (BUILTIN_TYPE(ptr) == T_ZOMBIE && ZOMBIE_NEEDS_FREE_P(ptr)); } } @@ -3051,11 +3063,6 @@ rb_gc_impl_pointer_to_heap_p(void *objspace_ptr, const void *ptr) return is_pointer_to_heap(objspace_ptr, ptr); } -#define ZOMBIE_OBJ_KEPT_FLAGS (FL_FINALIZE) -// Zombie needs to be put back on the freelist later (during GC) and finalizer has ran -#define ZOMBIE_NEEDS_FREE_FLAG (FL_FREEZE) -#define ZOMBIE_NEEDS_FREE_P(zombie) (FL_TEST(zombie, ZOMBIE_NEEDS_FREE_FLAG)) -#define ZOMBIE_SET_NEEDS_FREE_FLAG(zombie) (FL_SET(zombie, ZOMBIE_NEEDS_FREE_FLAG)) void rb_gc_impl_make_zombie(void *objspace_ptr, VALUE obj, void (*dfree)(void *), void *data) @@ -3075,6 +3082,7 @@ rb_gc_impl_make_zombie(void *objspace_ptr, VALUE obj, void (*dfree)(void *), voi next = RUBY_ATOMIC_VALUE_CAS(heap_pages_deferred_final, prev, obj); } while (next != prev); page->final_slots++; // NOTE: not synchronized, but either background thread or user thread owns page during free + page->heap->made_zombies++; RUBY_ATOMIC_SIZE_INC(page->heap->final_slots_count); } @@ -3355,6 +3363,7 @@ rb_gc_impl_free_zombie(rb_objspace_t *objspace, VALUE obj) GC_ASSERT(RUBY_ATOMIC_VALUE_LOAD(page->heap->final_slots_count) > 0); RUBY_ATOMIC_SIZE_DEC(page->heap->final_slots_count); GC_ASSERT(page->final_slots > 0); + page->heap->freed_zombies++; page->final_slots--; RVALUE_AGE_SET_BITMAP(obj, 0); } @@ -4838,6 +4847,8 @@ gc_sweep_start_heap(rb_objspace_t *objspace, rb_heap_t *heap) heap->pooled_pages = NULL; heap->latest_swept_page = NULL; heap->pre_swept_slots_deferred = 0; + heap->made_zombies = 0; + heap->freed_zombies = 0; heap->pre_sweeping_page = NULL; heap->background_sweep_steps = heap->foreground_sweep_steps; @@ -4962,6 +4973,12 @@ gc_sweep_finish_heap(rb_objspace_t *objspace, rb_heap_t *heap) psweep_debug(-1, "[gc] gc_sweep_finish heap:%p (%ld)\n", heap, heap - heaps); + /*fprintf(stderr, "swept heap %d, freed:%lu out of %lu\n", heap - heaps, swept_slots, total_slots);*/ + if (is_full_marking(objspace)) { + objspace->have_swept_slots += swept_slots; + objspace->have_swept_slots += heap->made_zombies; + } + GC_ASSERT(heap->background_sweep_steps <= ATOMIC_LOAD_RELAXED(heap->foreground_sweep_steps)); GC_ASSERT(!heap->is_finished_sweeping); heap->is_finished_sweeping = true; @@ -5007,6 +5024,13 @@ gc_sweep_finish(rb_objspace_t *objspace) rbimpl_atomic_store(&objspace->use_background_sweep_thread, false, RBIMPL_ATOMIC_RELEASE); + if (is_full_marking(objspace)) { + if (objspace->will_be_swept_slots != objspace->have_swept_slots) { + fprintf(stderr, "Expecting to free %lu slots, freed %lu slots\n", objspace->will_be_swept_slots, objspace->have_swept_slots); + rb_bug("woops"); + } + } + gc_prof_set_heap_info(objspace); heap_pages_free_unused_pages(objspace); @@ -5330,11 +5354,14 @@ gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) psweep_debug(0, "[gc] gc_sweep_step: dequeued page(heap:%p %ld, page:%p) free_slots:%u,total_slots:%u\n", heap, heap - heaps, sweep_page, free_slots, sweep_page->total_slots); if (free_slots == sweep_page->total_slots) { + if (sweep_page->total_slots == 0) { + rb_bug("?"); + } + objspace->have_swept_slots += sweep_page->total_slots; psweep_debug(0, "[gc] gc_sweep_step: adding to empty_pages:%p\n", sweep_page); move_to_empty_pages(objspace, heap, sweep_page); } else if (free_slots > 0) { - // These are just for statistics, not used in calculations heap->freed_slots += ctx.freed_slots; heap->empty_slots += ctx.empty_slots; @@ -6943,7 +6970,13 @@ gc_marks_finish(rb_objspace_t *objspace) min_free_slots = gc_params.heap_free_slots * r_mul; } + int full_marking = is_full_marking(objspace); + if (full_marking) { + objspace->have_swept_slots = 0; + objspace->will_be_swept_slots = sweep_slots; + /*fprintf(stderr, "Full marking end. total_slots:%lu, marked:%lu, to sweep:%lu\n", total_slots, objspace->marked_slots, sweep_slots);*/ + } GC_ASSERT(objspace_available_slots(objspace) >= objspace->marked_slots); From 9b584ad046847d21fd5527a32fbb5eab95290bc0 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Wed, 1 Apr 2026 17:18:48 -0400 Subject: [PATCH 40/67] before claude code help --- gc.c | 5 +++-- gc/default/default.c | 13 ++++++++----- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/gc.c b/gc.c index 15f95455237f2a..66aa134a35f83f 100644 --- a/gc.c +++ b/gc.c @@ -1596,7 +1596,7 @@ rb_gc_obj_free(void *objspace, VALUE obj) } break; case T_DATA: - if (!rb_data_free(objspace, obj)) return false; + if (!rb_data_free(objspace, obj)) return FALSE; break; case T_MATCH: { @@ -1686,7 +1686,7 @@ rb_gc_obj_free(void *objspace, VALUE obj) GC_ASSERT(!FL_TEST(obj, FL_FINALIZE)); void rb_gc_impl_free_zombie(rb_objspace_t *, VALUE); rb_gc_impl_free_zombie(objspace, obj); - break; + return TRUE; default: rb_bug("gc_sweep(): unknown data type 0x%x(%p) 0x%"PRIxVALUE, BUILTIN_TYPE(obj), (void*)obj, RBASIC(obj)->flags); @@ -2406,6 +2406,7 @@ obj_free_object_id(VALUE obj, bool in_user_gc_thread) // The the object is a T_IMEMO/fields, then it's possible the actual object // has been garbage collected already. if (!RB_TYPE_P(obj, T_IMEMO)) { + id2ref_tbl_unlock(); rb_bug("Object ID seen, but not in _id2ref table: object_id=%llu object=%s", NUM2ULL(obj_id), rb_obj_info(obj)); } } diff --git a/gc/default/default.c b/gc/default/default.c index a29e1dbbb12cef..a259f6feff0555 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -496,7 +496,7 @@ typedef struct rb_heap_struct { size_t total_pages; /* total page count in a heap */ size_t total_slots; /* total slot count */ unsigned short made_zombies; - unsigned short freed_zombies; + unsigned short to_free_zombies; rb_atomic_t foreground_sweep_steps; // incremented by ruby thread, checked by sweep thread rb_atomic_t background_sweep_steps; // only incremented/checked by sweep thread @@ -3363,7 +3363,6 @@ rb_gc_impl_free_zombie(rb_objspace_t *objspace, VALUE obj) GC_ASSERT(RUBY_ATOMIC_VALUE_LOAD(page->heap->final_slots_count) > 0); RUBY_ATOMIC_SIZE_DEC(page->heap->final_slots_count); GC_ASSERT(page->final_slots > 0); - page->heap->freed_zombies++; page->final_slots--; RVALUE_AGE_SET_BITMAP(obj, 0); } @@ -4848,7 +4847,6 @@ gc_sweep_start_heap(rb_objspace_t *objspace, rb_heap_t *heap) heap->latest_swept_page = NULL; heap->pre_swept_slots_deferred = 0; heap->made_zombies = 0; - heap->freed_zombies = 0; heap->pre_sweeping_page = NULL; heap->background_sweep_steps = heap->foreground_sweep_steps; @@ -4975,9 +4973,10 @@ gc_sweep_finish_heap(rb_objspace_t *objspace, rb_heap_t *heap) /*fprintf(stderr, "swept heap %d, freed:%lu out of %lu\n", heap - heaps, swept_slots, total_slots);*/ if (is_full_marking(objspace)) { - objspace->have_swept_slots += swept_slots; + objspace->have_swept_slots += swept_slots - heap->to_free_zombies; objspace->have_swept_slots += heap->made_zombies; } + heap->to_free_zombies = heap->made_zombies; GC_ASSERT(heap->background_sweep_steps <= ATOMIC_LOAD_RELAXED(heap->foreground_sweep_steps)); GC_ASSERT(!heap->is_finished_sweeping); @@ -5357,7 +5356,7 @@ gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) if (sweep_page->total_slots == 0) { rb_bug("?"); } - objspace->have_swept_slots += sweep_page->total_slots; + objspace->have_swept_slots += free_slots; psweep_debug(0, "[gc] gc_sweep_step: adding to empty_pages:%p\n", sweep_page); move_to_empty_pages(objspace, heap, sweep_page); } @@ -6975,6 +6974,10 @@ gc_marks_finish(rb_objspace_t *objspace) if (full_marking) { objspace->have_swept_slots = 0; objspace->will_be_swept_slots = sweep_slots; + for (int i = 0; i < HEAP_COUNT; i++) { + GC_ASSERT((&heaps[i])->empty_slots == 0); + GC_ASSERT((&heaps[i])->freed_slots == 0); + } /*fprintf(stderr, "Full marking end. total_slots:%lu, marked:%lu, to sweep:%lu\n", total_slots, objspace->marked_slots, sweep_slots);*/ } From a87512f17edd13b9ad116485f116d61c13ed8338 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Wed, 1 Apr 2026 22:54:26 -0400 Subject: [PATCH 41/67] More debugging code --- gc/default/default.c | 45 ++++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index a259f6feff0555..cb5ae6bddaea32 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -483,6 +483,7 @@ typedef struct rb_heap_struct { /* Sweeping statistics */ size_t freed_slots; size_t empty_slots; + size_t zombie_slots; // pre-existing zombies not ready yet to free struct heap_page *free_pages; struct ccan_list_head pages; @@ -495,7 +496,7 @@ typedef struct rb_heap_struct { struct heap_page *pooled_pages; size_t total_pages; /* total page count in a heap */ size_t total_slots; /* total slot count */ - unsigned short made_zombies; + rb_atomic_t made_zombies; unsigned short to_free_zombies; rb_atomic_t foreground_sweep_steps; // incremented by ruby thread, checked by sweep thread @@ -547,8 +548,8 @@ typedef struct rb_objspace { unsigned int immediate_sweep : 1; unsigned int dont_incremental : 1; unsigned int during_compacting : 1; + unsigned int was_compacting: 1; unsigned int during_reference_updating : 1; - unsigned int gc_stressful: 1; unsigned int during_incremental_marking : 1; unsigned int measure_gc : 1; } flags; @@ -559,6 +560,7 @@ typedef struct rb_objspace { bool during_minor_gc; bool during_gc; bool dont_gc; + bool gc_stressful; size_t will_be_swept_slots; size_t have_swept_slots; @@ -1024,7 +1026,7 @@ RVALUE_AGE_SET(VALUE obj, int age) #define during_gc objspace->during_gc #define finalizing objspace->atomic_flags.finalizing #define finalizer_table objspace->finalizer_table -#define ruby_gc_stressful objspace->flags.gc_stressful +#define ruby_gc_stressful objspace->gc_stressful #define ruby_gc_stress_mode objspace->gc_stress_mode #if GC_DEBUG_STRESS_TO_CLASS #define stress_to_class objspace->stress_to_class @@ -2412,9 +2414,12 @@ heap_add_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page, page->freelist = NULL; page->deferred_freelist = NULL; asan_unpoison_memory_region(page->body, HEAP_PAGE_SIZE, false); + int i = 0; for (VALUE p = (VALUE)start; p < start + (slot_count * heap->slot_size); p += heap->slot_size) { + i++; heap_page_add_freeobj(objspace, page, p); } + GC_ASSERT(i == slot_count); asan_lock_deferred_freelist(page); asan_lock_freelist(page); @@ -2429,6 +2434,7 @@ heap_add_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page, if (!sweep_lock_taken) sweep_lock_unlock(&objspace->sweep_lock); heap->total_pages++; + GC_ASSERT(page->total_slots == page->free_slots); heap->total_slots += page->total_slots; } @@ -3082,7 +3088,7 @@ rb_gc_impl_make_zombie(void *objspace_ptr, VALUE obj, void (*dfree)(void *), voi next = RUBY_ATOMIC_VALUE_CAS(heap_pages_deferred_final, prev, obj); } while (next != prev); page->final_slots++; // NOTE: not synchronized, but either background thread or user thread owns page during free - page->heap->made_zombies++; + RUBY_ATOMIC_INC(page->heap->made_zombies); RUBY_ATOMIC_SIZE_INC(page->heap->final_slots_count); } @@ -4971,12 +4977,11 @@ gc_sweep_finish_heap(rb_objspace_t *objspace, rb_heap_t *heap) psweep_debug(-1, "[gc] gc_sweep_finish heap:%p (%ld)\n", heap, heap - heaps); - /*fprintf(stderr, "swept heap %d, freed:%lu out of %lu\n", heap - heaps, swept_slots, total_slots);*/ - if (is_full_marking(objspace)) { - objspace->have_swept_slots += swept_slots - heap->to_free_zombies; + if (!objspace->flags.during_compacting) { + objspace->have_swept_slots += swept_slots; objspace->have_swept_slots += heap->made_zombies; + objspace->will_be_swept_slots -= heap->zombie_slots; } - heap->to_free_zombies = heap->made_zombies; GC_ASSERT(heap->background_sweep_steps <= ATOMIC_LOAD_RELAXED(heap->foreground_sweep_steps)); GC_ASSERT(!heap->is_finished_sweeping); @@ -5023,12 +5028,19 @@ gc_sweep_finish(rb_objspace_t *objspace) rbimpl_atomic_store(&objspace->use_background_sweep_thread, false, RBIMPL_ATOMIC_RELEASE); - if (is_full_marking(objspace)) { + if ((!objspace->flags.was_compacting) && gc_config_full_mark_val) { if (objspace->will_be_swept_slots != objspace->have_swept_slots) { - fprintf(stderr, "Expecting to free %lu slots, freed %lu slots\n", objspace->will_be_swept_slots, objspace->have_swept_slots); - rb_bug("woops"); + fprintf(stderr, "Expecting to free %lu slots, freed %lu slots (major:%d)\n", objspace->will_be_swept_slots, objspace->have_swept_slots, is_full_marking(objspace)); + for (int i = 0; i < HEAP_COUNT; i++) { + rb_heap_t *heap = &heaps[i]; + fprintf(stderr, "heap %ld zombies_created:%u freed_slots:%lu empty_slots:%lu zombie_slots:%lu, total_slots:%lu\n", + heap - heaps, heap->made_zombies, heap->freed_slots, heap->empty_slots, heap->zombie_slots, heap->total_slots); + } + + rb_bug("MISMATCH: marked_slots:%lu, pooled_slots:%lu, empty_pages:%lu", objspace->marked_slots, objspace->rincgc.pooled_slots, objspace->empty_pages_count); } } + objspace->flags.was_compacting = FALSE; gc_prof_set_heap_info(objspace); heap_pages_free_unused_pages(objspace); @@ -5049,6 +5061,7 @@ gc_sweep_finish(rb_objspace_t *objspace) heap->freed_slots = 0; heap->empty_slots = 0; + heap->zombie_slots = 0; if (heap->background_sweep_steps < heap->foreground_sweep_steps) { heap->background_sweep_steps = heap->foreground_sweep_steps; } @@ -5271,6 +5284,7 @@ gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) ctx.final_slots = sweep_page->pre_final_slots + deferred_free_final_slots; ctx.freed_slots = sweep_page->pre_freed_slots + deferred_free_freed; ctx.empty_slots = sweep_page->pre_empty_slots; + ctx.zombie_slots = sweep_page->pre_zombie_slots; gc_post_sweep_page(objspace, heap, sweep_page); // clear bits } @@ -5352,6 +5366,8 @@ gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) psweep_debug(0, "[gc] gc_sweep_step: dequeued page(heap:%p %ld, page:%p) free_slots:%u,total_slots:%u\n", heap, heap - heaps, sweep_page, free_slots, sweep_page->total_slots); + heap->zombie_slots += ctx.zombie_slots; + if (free_slots == sweep_page->total_slots) { if (sweep_page->total_slots == 0) { rb_bug("?"); @@ -6971,14 +6987,13 @@ gc_marks_finish(rb_objspace_t *objspace) int full_marking = is_full_marking(objspace); - if (full_marking) { + if (!objspace->flags.during_compacting) { objspace->have_swept_slots = 0; objspace->will_be_swept_slots = sweep_slots; for (int i = 0; i < HEAP_COUNT; i++) { GC_ASSERT((&heaps[i])->empty_slots == 0); GC_ASSERT((&heaps[i])->freed_slots == 0); } - /*fprintf(stderr, "Full marking end. total_slots:%lu, marked:%lu, to sweep:%lu\n", total_slots, objspace->marked_slots, sweep_slots);*/ } GC_ASSERT(objspace_available_slots(objspace) >= objspace->marked_slots); @@ -8006,12 +8021,14 @@ gc_start(rb_objspace_t *objspace, unsigned int reason) /* Explicitly enable compaction (GC.compact) */ if (do_full_mark && ruby_enable_autocompact) { objspace->flags.during_compacting = TRUE; + objspace->flags.was_compacting = TRUE; #if RGENGC_CHECK_MODE objspace->rcompactor.compare_func = ruby_autocompact_compare_func; #endif } else { objspace->flags.during_compacting = !!(reason & GPR_FLAG_COMPACT); + objspace->flags.was_compacting = objspace->flags.during_compacting; } if (!GC_ENABLE_LAZY_SWEEP || objspace->flags.dont_incremental) { @@ -9479,7 +9496,7 @@ rb_gc_impl_stress_set(void *objspace_ptr, VALUE flag) { rb_objspace_t *objspace = objspace_ptr; - objspace->flags.gc_stressful = RTEST(flag); + objspace->gc_stressful = RTEST(flag); objspace->gc_stress_mode = flag; } From f36b8afae087ae371508b54e84099bddce5c50fb Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Wed, 1 Apr 2026 23:23:06 -0400 Subject: [PATCH 42/67] gc/default.c: Put debugging behind RUBY_DEBUG --- gc/default/default.c | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index cb5ae6bddaea32..713b69babe57de 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -483,7 +483,9 @@ typedef struct rb_heap_struct { /* Sweeping statistics */ size_t freed_slots; size_t empty_slots; +#if RUBY_DEBUG size_t zombie_slots; // pre-existing zombies not ready yet to free +#endif struct heap_page *free_pages; struct ccan_list_head pages; @@ -496,8 +498,9 @@ typedef struct rb_heap_struct { struct heap_page *pooled_pages; size_t total_pages; /* total page count in a heap */ size_t total_slots; /* total slot count */ +#if RUBY_DEBUG rb_atomic_t made_zombies; - unsigned short to_free_zombies; +#endif rb_atomic_t foreground_sweep_steps; // incremented by ruby thread, checked by sweep thread rb_atomic_t background_sweep_steps; // only incremented/checked by sweep thread @@ -548,7 +551,9 @@ typedef struct rb_objspace { unsigned int immediate_sweep : 1; unsigned int dont_incremental : 1; unsigned int during_compacting : 1; +#if RUBY_DEBUG unsigned int was_compacting: 1; +#endif unsigned int during_reference_updating : 1; unsigned int during_incremental_marking : 1; unsigned int measure_gc : 1; @@ -561,8 +566,10 @@ typedef struct rb_objspace { bool during_gc; bool dont_gc; bool gc_stressful; +#if RUBY_DEBUG size_t will_be_swept_slots; size_t have_swept_slots; +#endif rb_event_flag_t hook_events; @@ -3088,7 +3095,9 @@ rb_gc_impl_make_zombie(void *objspace_ptr, VALUE obj, void (*dfree)(void *), voi next = RUBY_ATOMIC_VALUE_CAS(heap_pages_deferred_final, prev, obj); } while (next != prev); page->final_slots++; // NOTE: not synchronized, but either background thread or user thread owns page during free +#if RUBY_DEBUG RUBY_ATOMIC_INC(page->heap->made_zombies); +#endif RUBY_ATOMIC_SIZE_INC(page->heap->final_slots_count); } @@ -4852,7 +4861,9 @@ gc_sweep_start_heap(rb_objspace_t *objspace, rb_heap_t *heap) heap->pooled_pages = NULL; heap->latest_swept_page = NULL; heap->pre_swept_slots_deferred = 0; +#if RUBY_DEBUG heap->made_zombies = 0; +#endif heap->pre_sweeping_page = NULL; heap->background_sweep_steps = heap->foreground_sweep_steps; @@ -4977,11 +4988,13 @@ gc_sweep_finish_heap(rb_objspace_t *objspace, rb_heap_t *heap) psweep_debug(-1, "[gc] gc_sweep_finish heap:%p (%ld)\n", heap, heap - heaps); +#if RUBY_DEBUG if (!objspace->flags.during_compacting) { objspace->have_swept_slots += swept_slots; objspace->have_swept_slots += heap->made_zombies; objspace->will_be_swept_slots -= heap->zombie_slots; } +#endif GC_ASSERT(heap->background_sweep_steps <= ATOMIC_LOAD_RELAXED(heap->foreground_sweep_steps)); GC_ASSERT(!heap->is_finished_sweeping); @@ -5028,7 +5041,8 @@ gc_sweep_finish(rb_objspace_t *objspace) rbimpl_atomic_store(&objspace->use_background_sweep_thread, false, RBIMPL_ATOMIC_RELEASE); - if ((!objspace->flags.was_compacting) && gc_config_full_mark_val) { +#if RUBY_DEBUG + if (!objspace->flags.was_compacting) { if (objspace->will_be_swept_slots != objspace->have_swept_slots) { fprintf(stderr, "Expecting to free %lu slots, freed %lu slots (major:%d)\n", objspace->will_be_swept_slots, objspace->have_swept_slots, is_full_marking(objspace)); for (int i = 0; i < HEAP_COUNT; i++) { @@ -5041,6 +5055,7 @@ gc_sweep_finish(rb_objspace_t *objspace) } } objspace->flags.was_compacting = FALSE; +#endif gc_prof_set_heap_info(objspace); heap_pages_free_unused_pages(objspace); @@ -5057,11 +5072,11 @@ gc_sweep_finish(rb_objspace_t *objspace) } } } + heap->zombie_slots = 0; #endif heap->freed_slots = 0; heap->empty_slots = 0; - heap->zombie_slots = 0; if (heap->background_sweep_steps < heap->foreground_sweep_steps) { heap->background_sweep_steps = heap->foreground_sweep_steps; } @@ -5366,13 +5381,17 @@ gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) psweep_debug(0, "[gc] gc_sweep_step: dequeued page(heap:%p %ld, page:%p) free_slots:%u,total_slots:%u\n", heap, heap - heaps, sweep_page, free_slots, sweep_page->total_slots); +#if RUBY_DEBUG heap->zombie_slots += ctx.zombie_slots; +#endif if (free_slots == sweep_page->total_slots) { if (sweep_page->total_slots == 0) { rb_bug("?"); } +#if RUBY_DEBUG objspace->have_swept_slots += free_slots; +#endif psweep_debug(0, "[gc] gc_sweep_step: adding to empty_pages:%p\n", sweep_page); move_to_empty_pages(objspace, heap, sweep_page); } @@ -6987,14 +7006,12 @@ gc_marks_finish(rb_objspace_t *objspace) int full_marking = is_full_marking(objspace); +#if RUBY_DEBUG if (!objspace->flags.during_compacting) { objspace->have_swept_slots = 0; objspace->will_be_swept_slots = sweep_slots; - for (int i = 0; i < HEAP_COUNT; i++) { - GC_ASSERT((&heaps[i])->empty_slots == 0); - GC_ASSERT((&heaps[i])->freed_slots == 0); - } } +#endif GC_ASSERT(objspace_available_slots(objspace) >= objspace->marked_slots); @@ -8021,14 +8038,18 @@ gc_start(rb_objspace_t *objspace, unsigned int reason) /* Explicitly enable compaction (GC.compact) */ if (do_full_mark && ruby_enable_autocompact) { objspace->flags.during_compacting = TRUE; +#if RUBY_DEBUG objspace->flags.was_compacting = TRUE; +#endif #if RGENGC_CHECK_MODE objspace->rcompactor.compare_func = ruby_autocompact_compare_func; #endif } else { objspace->flags.during_compacting = !!(reason & GPR_FLAG_COMPACT); +#if RUBY_DEBUG objspace->flags.was_compacting = objspace->flags.during_compacting; +#endif } if (!GC_ENABLE_LAZY_SWEEP || objspace->flags.dont_incremental) { From 295aecfa4bd2d4151ba3ddcd38bb8ba11e836008 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Wed, 1 Apr 2026 23:23:22 -0400 Subject: [PATCH 43/67] Better sweep lock management --- gc/default/default.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index 713b69babe57de..331f8adc6f3d25 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -2528,17 +2528,24 @@ heap_prepare(rb_objspace_t *objspace, rb_heap_t *heap) { GC_ASSERT(heap->free_pages == NULL); - sweep_lock_lock(&objspace->sweep_lock); - { - if (heap->total_slots < gc_params.heap_init_bytes / heap->slot_size && - heap->sweeping_page == NULL && heap->swept_pages == NULL && !heap->pre_sweeping_page) { - heap_page_allocate_and_initialize_force(objspace, heap, true); - GC_ASSERT(heap->free_pages != NULL); - sweep_lock_unlock(&objspace->sweep_lock); - return; + if (heap->is_finished_sweeping && heap->total_slots < (gc_params.heap_init_bytes / heap->slot_size)) { + heap_page_allocate_and_initialize_force(objspace, heap, false); + GC_ASSERT(heap->free_pages != NULL); + return; + } + else { + sweep_lock_lock(&objspace->sweep_lock); + { + if (heap->total_slots < (gc_params.heap_init_bytes / heap->slot_size) && + heap->sweeping_page == NULL && heap->swept_pages == NULL && !heap->pre_sweeping_page) { + heap_page_allocate_and_initialize_force(objspace, heap, true); + GC_ASSERT(heap->free_pages != NULL); + sweep_lock_unlock(&objspace->sweep_lock); + return; + } } + sweep_lock_unlock(&objspace->sweep_lock); } - sweep_lock_unlock(&objspace->sweep_lock); /* Continue incremental marking or lazy sweeping, if in any of those steps. */ gc_continue(objspace, heap); @@ -2767,12 +2774,8 @@ heap_next_free_page(rb_objspace_t *objspace, rb_heap_t *heap) heap_prepare(objspace, heap); } - sweep_lock_lock(&objspace->sweep_lock); - { - page = heap->free_pages; - heap->free_pages = page->free_next; - } - sweep_lock_unlock(&objspace->sweep_lock); + page = heap->free_pages; + heap->free_pages = page->free_next; psweep_debug(1, "[gc] heap_next_free_page heap:%p free_pages:%p -> %p (free_slots:%d)\n", heap, page, heap->free_pages, page->free_slots); GC_ASSERT(page->free_slots > 0); From ab9962fe894876d7b093b7a958bca14f0ba568f4 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 2 Apr 2026 00:29:32 -0400 Subject: [PATCH 44/67] cleanup PSWEEP debug macros --- gc/default/default.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index 331f8adc6f3d25..32d0fe5552571c 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -116,18 +116,13 @@ #define GC_HEAP_INIT_BYTES (2560 * 1024) #endif -#if defined(PSWEEP_DEBUG) +#define PSWEEP_DEBUG 0 +#if PSWEEP_DEBUG #define psweep_debug(lvl, ...) if (lvl <= PSWEEP_DEBUG) fprintf(stderr, __VA_ARGS__) #else #define psweep_debug(...) (void)0 #endif - -/* Define PSWEEP_LOCK_STATS to > 0 to enable lock contention statistics */ -#define PSWEEP_LOCK_STATS 0 -#ifndef PSWEEP_LOCK_STATS #define PSWEEP_LOCK_STATS 0 -#endif - #define PSWEEP_COLLECT_TIMINGS 0 #ifndef GC_HEAP_FREE_SLOTS From 8f48f3c1612abe7391ed4f3d87257184bdf0657d Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 2 Apr 2026 00:30:24 -0400 Subject: [PATCH 45/67] Fix psweep if not USE_MALLOC_INCREASE_LOCAL --- gc/default/default.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index 32d0fe5552571c..78b68f1ae05907 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -181,6 +181,7 @@ static RB_THREAD_LOCAL_SPECIFIER int malloc_increase_local; static RB_THREAD_LOCAL_SPECIFIER struct heap_page *current_sweep_thread_page; #else #define USE_MALLOC_INCREASE_LOCAL 0 +static struct heap_page *current_sweep_thread_page; #endif #ifndef GC_CAN_COMPILE_COMPACTION @@ -4613,7 +4614,9 @@ gc_pre_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *pa } #endif +#if USE_MALLOC_INCREASE_LOCAL malloc_increase_local_flush(objspace); +#endif current_sweep_thread_page = NULL; psweep_debug(1, "[sweep] gc_pre_sweep_page(heap:%p page:%p) done, deferred free:%d\n", heap, page, page->pre_deferred_free_slots); @@ -9746,7 +9749,7 @@ objspace_malloc_gc_stress(rb_objspace_t *objspace) } static size_t -malloc_increase_commit(rb_objspace_t *objspace, size_t new_size, size_t old_size) +malloc_increase_commit(rb_objspace_t *objspace, size_t new_size, size_t old_size, struct heap_page *sweep_thread_page) { if (new_size > old_size) { GC_ASSERT(!is_sweep_thread_p()); @@ -9759,8 +9762,8 @@ malloc_increase_commit(rb_objspace_t *objspace, size_t new_size, size_t old_size } else { size_t delta = old_size - new_size; - if (current_sweep_thread_page) { - current_sweep_thread_page->pre_freed_malloc_bytes += delta; + if (sweep_thread_page) { + sweep_thread_page->pre_freed_malloc_bytes += delta; } else { atomic_sub_nounderflow(&malloc_increase, delta); @@ -9781,10 +9784,10 @@ malloc_increase_local_flush(rb_objspace_t *objspace) malloc_increase_local = 0; if (delta > 0) { - return malloc_increase_commit(objspace, (size_t)delta, 0); + return malloc_increase_commit(objspace, (size_t)delta, 0, NULL); } else { - return malloc_increase_commit(objspace, 0, (size_t)(-delta)); + return malloc_increase_commit(objspace, 0, (size_t)(-delta), current_sweep_thread_page); } } #else @@ -9824,10 +9827,10 @@ objspace_malloc_increase_body(rb_objspace_t *objspace, void *mem, size_t new_siz } else { malloc_increase_local_flush(objspace); - current_malloc_increase = malloc_increase_commit(objspace, new_size, old_size); + current_malloc_increase = malloc_increase_commit(objspace, new_size, old_size, current_sweep_thread_page); } #else - current_malloc_increase = malloc_increase_commit(objspace, new_size, old_size); + current_malloc_increase = malloc_increase_commit(objspace, new_size, old_size, is_sweep_thread_p() ? current_sweep_thread_page : NULL); #endif if (type == MEMOP_TYPE_MALLOC && gc_allowed) { From c8e503330da74acd9b948d163f19e540fd4eaadb Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 2 Apr 2026 00:30:49 -0400 Subject: [PATCH 46/67] Remove old rb_bug --- gc/default/default.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index 78b68f1ae05907..438919c73d7694 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -5387,9 +5387,6 @@ gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) #endif if (free_slots == sweep_page->total_slots) { - if (sweep_page->total_slots == 0) { - rb_bug("?"); - } #if RUBY_DEBUG objspace->have_swept_slots += free_slots; #endif From 1a8274c1002580f89c991b0f4af7203395b09cc8 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 2 Apr 2026 10:04:02 -0400 Subject: [PATCH 47/67] Remove unused parameter --- gc/default/default.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index 438919c73d7694..b82c2452da22d3 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -4363,7 +4363,7 @@ heap_page_freelist_append(struct heap_page *page, struct free_slot *freelist) } static void -sweep_in_ruby_thread(rb_objspace_t *objspace, struct heap_page *page, VALUE obj, bool nozombie) +sweep_in_ruby_thread(rb_objspace_t *objspace, struct heap_page *page, VALUE obj) { page->pre_deferred_free_slots += 1; psweep_debug(1, "[sweep] register sweep later: page(%p), obj(%p) %s\n", (void*)page, (void*)obj, rb_obj_info(obj)); @@ -4421,7 +4421,7 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p break; case T_ZOMBIE: if (zombie_needs_deferred_free(vp)) { - sweep_in_ruby_thread(objspace, page, vp, false); + sweep_in_ruby_thread(objspace, page, vp); } else { // already counted as final_slot when made into a zombie @@ -4446,7 +4446,7 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p } if (!dfree || dfree == RUBY_DEFAULT_FREE || free_immediately) { if (rb_gc_obj_has_blacklisted_vm_weak_references(vp)) { - sweep_in_ruby_thread(objspace, page, vp, true); + sweep_in_ruby_thread(objspace, page, vp); break; } else { @@ -4454,7 +4454,7 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p } } else { - sweep_in_ruby_thread(objspace, page, vp, false); + sweep_in_ruby_thread(objspace, page, vp); break; } break; @@ -4462,7 +4462,7 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p case T_IMEMO: { debug_free_check(objspace, vp); if (rb_gc_obj_has_blacklisted_vm_weak_references(vp)) { - sweep_in_ruby_thread(objspace, page, vp, true); + sweep_in_ruby_thread(objspace, page, vp); break; } switch (imemo_type(vp)) { @@ -4478,7 +4478,7 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p case imemo_fields: goto free; default: - sweep_in_ruby_thread(objspace, page, vp, true); + sweep_in_ruby_thread(objspace, page, vp); break; } break; @@ -4498,7 +4498,7 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p case T_FILE: { debug_free_check(objspace, vp); if (rb_gc_obj_has_blacklisted_vm_weak_references(vp)) { - sweep_in_ruby_thread(objspace, page, vp, true); + sweep_in_ruby_thread(objspace, page, vp); break; } else { @@ -4506,7 +4506,9 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p } break; } - default: // ex: T_CLASS/T_MODULE/T_ICLASS + case T_CLASS: + case T_MODULE: + case T_ICLASS: debug_free_check(objspace, vp); if (!rb_gc_obj_needs_cleanup_p(vp)) { heap_page_add_deferred_freeobj(objspace, page, vp); @@ -4515,7 +4517,7 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p freed++; } else { - sweep_in_ruby_thread(objspace, page, vp, true); + sweep_in_ruby_thread(objspace, page, vp); } break; free: { @@ -4529,7 +4531,6 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)vp, page->slot_size); } else { - // They are zombies now RUBY_ASSERT(BUILTIN_TYPE(vp) == T_ZOMBIE); psweep_debug(2, "[sweep] zombie: page(%p), obj(%p)\n", (void*)page, (void*)vp); finals++; @@ -4537,10 +4538,12 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p } else { GC_ASSERT(BUILTIN_TYPE(vp) != T_NONE); - sweep_in_ruby_thread(objspace, page, vp, true); + sweep_in_ruby_thread(objspace, page, vp); } break; } + default: + rb_bug("unexpected type: %d\n", BUILTIN_TYPE(vp)); } } else { From 84fd23556685131fa79c559405497edc64b5a034 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 2 Apr 2026 10:05:25 -0400 Subject: [PATCH 48/67] GC: better id2ref_lock management with comments --- gc.c | 59 +++++++++++++++++++++++++++++++---------------------------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/gc.c b/gc.c index 66aa134a35f83f..ef888afc8632de 100644 --- a/gc.c +++ b/gc.c @@ -2145,9 +2145,13 @@ id2ref_tbl_unlock(void) static void id2ref_tbl_free(void *data) { - st_table *table = (st_table *)data; - st_free_table(table); - RUBY_ATOMIC_PTR_SET(id2ref_tbl, NULL); // clear global ref + id2ref_tbl_lock(true); + { + st_table *table = (st_table *)data; + st_free_table(table); + RUBY_ATOMIC_PTR_SET(id2ref_tbl, NULL); // clear global ref + } + id2ref_tbl_unlock(); } static const rb_data_type_t id2ref_tbl_type = { @@ -2159,6 +2163,8 @@ static const rb_data_type_t id2ref_tbl_type = { // dcompact function not required because the table is reference updated // in rb_gc_vm_weak_table_foreach }, + // Not marked concurrent free safe so that we can know that when we take the VM lock and check for + // the id2ref_tbl, it won't be deleted out from under us while the VM lock is held. .flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY }; @@ -2177,7 +2183,7 @@ class_object_id(VALUE klass) if (RB_UNLIKELY(id2ref_tbl)) { id2ref_tbl_lock(false); { - st_insert(id2ref_tbl, id, klass); // FIXME: currently needs VM lock for allocation + st_insert(id2ref_tbl, id, klass); // needs VM lock for allocation } id2ref_tbl_unlock(); } @@ -2230,7 +2236,7 @@ object_id0(VALUE obj) RB_VM_LOCKING() { id2ref_tbl_lock(false); { - st_insert(id2ref_tbl, (st_data_t)id, (st_data_t)obj); // FIXME: currently needs VM lock for allocation + st_insert(id2ref_tbl, (st_data_t)id, (st_data_t)obj); // needs VM lock for allocation } id2ref_tbl_unlock(); } @@ -2306,31 +2312,25 @@ object_id_to_ref(void *objspace_ptr, VALUE object_id) unsigned int lev = RB_GC_VM_LOCK(); if (!RUBY_ATOMIC_PTR_LOAD(id2ref_tbl)) { - rb_gc_vm_barrier(); // stop other ractors, background sweeper could still be running - if (!RUBY_ATOMIC_PTR_LOAD(id2ref_tbl)) { - - // GC Must not trigger while we build the table, otherwise if we end - // up freeing an object that had an ID, we might try to delete it from - // the table even though it wasn't inserted yet. - st_table *tmp_id2ref_tbl = st_init_table(&object_id_hash_type); - VALUE tmp_id2ref_value = TypedData_Wrap_Struct(0, &id2ref_tbl_type, tmp_id2ref_tbl); - - // build_id2ref_i will most certainly malloc, which could trigger GC and sweep - // objects we just added to the table. - // By calling rb_gc_disable() we also save having to handle potentially garbage objects. - bool gc_disabled = RTEST(rb_gc_disable()); - { - id2ref_value = tmp_id2ref_value; + rb_gc_vm_barrier(); // stop other ractors but sweep thread could still be running + + // GC Must not trigger while we build the table, otherwise if we end + // up freeing an object that had an ID, we might try to delete it from + // the table even though it wasn't inserted yet. + st_table *tmp_id2ref_tbl = st_init_table(&object_id_hash_type); + VALUE tmp_id2ref_value = TypedData_Wrap_Struct(0, &id2ref_tbl_type, tmp_id2ref_tbl); + + // build_id2ref_i will most certainly malloc, which could trigger GC and sweep + // objects we just added to the table. The sweep thread could still be running so + // we need to handle garbage objects. + bool gc_disabled = RTEST(rb_gc_disable()); + { + id2ref_value = tmp_id2ref_value; - id2ref_tbl_lock(false); - { - rb_gc_impl_each_object(objspace, build_id2ref_i, (void *)tmp_id2ref_tbl); - } - id2ref_tbl_unlock(); - RUBY_ATOMIC_PTR_SET(id2ref_tbl, tmp_id2ref_tbl); - } - if (!gc_disabled) rb_gc_enable(); + rb_gc_impl_each_object(objspace, build_id2ref_i, (void *)tmp_id2ref_tbl); + RUBY_ATOMIC_PTR_SET(id2ref_tbl, tmp_id2ref_tbl); } + if (!gc_disabled) rb_gc_enable(); } VALUE obj; @@ -2395,6 +2395,9 @@ obj_free_object_id(VALUE obj, bool in_user_gc_thread) if (RB_UNLIKELY(obj_id)) { RUBY_ASSERT(FIXNUM_P(obj_id) || RB_TYPE_P(obj_id, T_BIGNUM)); + // If we're in the sweep thread, we must use trylock because GC could have been + // triggered by inserting into the id2ref_tbl, which means the GC thread holds the + // lock and we can't wait on it. bool needs_id2ref_tbl_trylock = !in_user_gc_thread; if (needs_id2ref_tbl_trylock) { bool did_lock = id2ref_tbl_trylock(false); From 6936e4c362d63074a38dd44f76d7e75d85f80634 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 2 Apr 2026 12:25:34 -0400 Subject: [PATCH 49/67] parallel sweep: more checks for garbage needed in rb_obj_info --- gc.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/gc.c b/gc.c index ef888afc8632de..ce63a3a8338076 100644 --- a/gc.c +++ b/gc.c @@ -5029,7 +5029,7 @@ rb_method_type_name(rb_method_type_t type) static void rb_raw_iseq_info(char *const buff, const size_t buff_size, const rb_iseq_t *iseq) { - if (buff_size > 0 && ISEQ_BODY(iseq) && ISEQ_BODY(iseq)->location.label && !RB_TYPE_P(ISEQ_BODY(iseq)->location.pathobj, T_MOVED)) { + if (buff_size > 0 && ISEQ_BODY(iseq) && ISEQ_BODY(iseq)->location.label && !rb_objspace_garbage_object_p(ISEQ_BODY(iseq)->location.pathobj)) { VALUE path = rb_iseq_path(iseq); int n = ISEQ_BODY(iseq)->location.first_lineno; snprintf(buff, buff_size, " %s@%s:%d", @@ -5060,7 +5060,7 @@ str_len_no_raise(VALUE str) #define C(c, s) ((c) != 0 ? (s) : " ") static size_t -rb_raw_obj_info_common(char *const buff, const size_t buff_size, const VALUE obj) +rb_raw_obj_info_common(char *const buff, const size_t buff_size, const VALUE obj, bool *is_garbage_out) { size_t pos = 0; @@ -5103,6 +5103,10 @@ rb_raw_obj_info_common(char *const buff, const size_t buff_size, const VALUE obj else if (RBASIC(obj)->klass == 0) { APPEND_S("(temporary internal)"); } + else if (rb_objspace_garbage_object_p(RBASIC(obj)->klass)) { + APPEND_S("(garbage class)"); + *is_garbage_out = true; + } else if (RTEST(RBASIC(obj)->klass)) { VALUE class_path = rb_class_path_cached(RBASIC(obj)->klass); if (!NIL_P(class_path)) { @@ -5201,9 +5205,14 @@ rb_raw_obj_info_buitin_type(char *const buff, const size_t buff_size, const VALU } case T_ICLASS: { - VALUE class_path = rb_class_path_cached(RBASIC_CLASS(obj)); - if (!NIL_P(class_path)) { - APPEND_F("src:%s", RSTRING_PTR(class_path)); + if (rb_objspace_garbage_object_p(RBASIC_CLASS(obj))) { + APPEND_S("src: garbage"); + } + else { + VALUE class_path = rb_class_path_cached(RBASIC_CLASS(obj)); + if (!NIL_P(class_path)) { + APPEND_F("src:%s", RSTRING_PTR(class_path)); + } } break; } @@ -5344,8 +5353,11 @@ rb_asan_poisoned_object_p(VALUE obj) static void raw_obj_info(char *const buff, const size_t buff_size, VALUE obj) { - size_t pos = rb_raw_obj_info_common(buff, buff_size, obj); - pos = rb_raw_obj_info_buitin_type(buff, buff_size, obj, pos); + bool is_garbage = false; + size_t pos = rb_raw_obj_info_common(buff, buff_size, obj, &is_garbage); + if (!is_garbage) { + pos = rb_raw_obj_info_buitin_type(buff, buff_size, obj, pos); + } if (pos >= buff_size) {} // truncated } @@ -5360,11 +5372,9 @@ rb_raw_obj_info(char *const buff, const size_t buff_size, VALUE obj) else if (!rb_gc_impl_pointer_to_heap_p(objspace, (const void *)obj)) { snprintf(buff, buff_size, "out-of-heap:%p", (void *)obj); } -#if 0 // maybe no need to check it? - else if (0 && rb_gc_impl_garbage_object_p(objspace, obj)) { + else if (rb_gc_impl_garbage_object_p(objspace, obj)) { snprintf(buff, buff_size, "garbage:%p", (void *)obj); } -#endif else { asan_unpoisoning_object(obj) { raw_obj_info(buff, buff_size, obj); From 5f78bb34e1f22bcbe612ae7226127ed14d8b77d6 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 2 Apr 2026 12:26:12 -0400 Subject: [PATCH 50/67] Remove unneeded code --- gc/default/default.c | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index b82c2452da22d3..45703215d4f51a 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -1239,35 +1239,6 @@ sweep_lock_set_unlocked(void) #endif } -// TODO: put it in objspace struct -/*static rb_nativethread_lock_t gc_data_lock_ = PTHREAD_MUTEX_INITIALIZER;*/ -/*static pthread_t gc_data_lock_owner = 0;*/ -/*static int gc_data_lock_lvl = 0;*/ - -/*static inline void*/ -/*gc_data_lock(rb_objspace_t *objspace, bool allow_reentry)*/ -/*{*/ - /*if (allow_reentry && pthread_self() == gc_data_lock_owner) {*/ - /*}*/ - /*else {*/ - /*GC_ASSERT(gc_data_lock_owner != pthread_self());*/ - /*rb_native_mutex_lock(&gc_data_lock_);*/ - /*gc_data_lock_owner = pthread_self();*/ - /*}*/ - /*gc_data_lock_lvl++;*/ -/*}*/ - -/*static inline void*/ -/*gc_data_unlock(rb_objspace_t *objspace)*/ -/*{*/ - /*GC_ASSERT(gc_data_lock_owner == pthread_self());*/ - /*gc_data_lock_lvl--;*/ - /*if (gc_data_lock_lvl == 0) {*/ - /*gc_data_lock_owner = 0;*/ - /*rb_native_mutex_unlock(&gc_data_lock_);*/ - /*}*/ -/*}*/ - // Returns true when the background sweep thread and Ruby thread have finished processing // (background sweeping + ruby thread post-processing or deferred freeing) all pages for that heap. static bool @@ -2620,11 +2591,6 @@ rb_gc_impl_source_location_cstr(int *ptr) static inline VALUE newobj_init(VALUE klass, VALUE flags, int wb_protected, rb_objspace_t *objspace, VALUE obj) { -#if VM_CHECK_MODE > 0 - if (BUILTIN_TYPE(obj) != T_NONE) { - fprintf(stderr, "BUILTIN_TYPE(newobj) = %s for obj:%p\n", rb_obj_info(obj), (void*)obj); - } -#endif GC_ASSERT(BUILTIN_TYPE(obj) == T_NONE); GC_ASSERT(RVALUE_AGE_GET(obj) == 0); GC_ASSERT((flags & FL_WB_PROTECTED) == 0); From 8f857aa86a4e7b90e9b727bb219fd11f1aa6dff3 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 2 Apr 2026 12:26:51 -0400 Subject: [PATCH 51/67] move gc_report to before making object T_NONE --- gc/default/default.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index 45703215d4f51a..aadf43422f437c 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -4001,9 +4001,9 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit rb_gc_event_hook(vp, RUBY_INTERNAL_EVENT_FREEOBJ); } + gc_report(3, objspace, "page_sweep: %s (fast path) added to freelist\n", rb_obj_info(vp)); RVALUE_AGE_SET_BITMAP(vp, 0); heap_page_add_freeobj(objspace, sweep_page, vp); - gc_report(3, objspace, "page_sweep: %s (fast path) added to freelist\n", rb_obj_info(vp)); (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)vp, slot_size); ctx->freed_slots++; } @@ -4014,9 +4014,9 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit rb_gc_obj_free_vm_weak_references(vp); if (rb_gc_obj_free(objspace, vp)) { + gc_report(3, objspace, "page_sweep: %s is added to freelist\n", rb_obj_info(vp)); RVALUE_AGE_SET_BITMAP(vp, 0); heap_page_add_freeobj(objspace, sweep_page, vp); - gc_report(3, objspace, "page_sweep: %s is added to freelist\n", rb_obj_info(vp)); (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)vp, slot_size); ctx->freed_slots++; } From 316ece5eac4e17717d0346a8e5f40d6095484bf7 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 2 Apr 2026 12:27:33 -0400 Subject: [PATCH 52/67] Fix issue with gc_rest() and MISMATCH debug --- gc/default/default.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gc/default/default.c b/gc/default/default.c index aadf43422f437c..5f50c5fc5bb72c 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -5012,7 +5012,9 @@ gc_sweep_finish(rb_objspace_t *objspace) rbimpl_atomic_store(&objspace->use_background_sweep_thread, false, RBIMPL_ATOMIC_RELEASE); #if RUBY_DEBUG - if (!objspace->flags.was_compacting) { + // When calling GC.start, if in the middle of a non-full mark it will be set as full mark in gc_rest() so the numbers + // will be off. + if (!objspace->flags.was_compacting && !objspace->sweep_rest) { if (objspace->will_be_swept_slots != objspace->have_swept_slots) { fprintf(stderr, "Expecting to free %lu slots, freed %lu slots (major:%d)\n", objspace->will_be_swept_slots, objspace->have_swept_slots, is_full_marking(objspace)); for (int i = 0; i < HEAP_COUNT; i++) { From 384577812465c3d287cf13236e182e11923300a0 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 2 Apr 2026 13:48:31 -0400 Subject: [PATCH 53/67] More debug fixes --- gc/default/default.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index 5f50c5fc5bb72c..f3c3aef40dd8f4 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -4618,7 +4618,9 @@ move_to_empty_pages(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page * GC_ASSERT(bitmap_is_all_zero(page->wb_unprotected_bits, HEAP_PAGE_BITMAP_LIMIT)); GC_ASSERT(bitmap_is_all_zero(page->marking_bits, HEAP_PAGE_BITMAP_LIMIT)); GC_ASSERT(bitmap_is_all_zero(page->remembered_bits, HEAP_PAGE_BITMAP_LIMIT)); + GC_ASSERT(bitmap_is_all_zero(page->deferred_free_bits, HEAP_PAGE_BITMAP_LIMIT)); GC_ASSERT(bitmap_is_all_zero(page->age_bits, HEAP_PAGE_BITMAP_LIMIT * RVALUE_AGE_BIT_COUNT)); + // NOTE: pinned bits can still be set, but it's okay because they are cleared when compaction starts heap_unlink_page(objspace, heap, page); @@ -5014,7 +5016,7 @@ gc_sweep_finish(rb_objspace_t *objspace) #if RUBY_DEBUG // When calling GC.start, if in the middle of a non-full mark it will be set as full mark in gc_rest() so the numbers // will be off. - if (!objspace->flags.was_compacting && !objspace->sweep_rest) { + if (!objspace->flags.was_compacting && !objspace->sweep_rest && gc_config_full_mark_val) { if (objspace->will_be_swept_slots != objspace->have_swept_slots) { fprintf(stderr, "Expecting to free %lu slots, freed %lu slots (major:%d)\n", objspace->will_be_swept_slots, objspace->have_swept_slots, is_full_marking(objspace)); for (int i = 0; i < HEAP_COUNT; i++) { @@ -5076,13 +5078,12 @@ gc_sweep_finish(rb_objspace_t *objspace) #endif } -// Dequeue a page swept by the background thread. If `free_in_user_thread` is true, then +// Dequeue a page swept by the sweep thread. If `free_in_user_thread` is true, then // dequeue an unswept page to be swept by the Ruby thread. It can also dequeue an unswept -// page if otherwise it would have to wait for the background thread. In that case, `dequeued_unswept_page` +// page if otherwise it would have to wait for the sweep thread. In that case, `dequeued_unswept_page` // is set to true. // -// It returns NULL when there are no more pages to sweep for the heap, and also when the incremental -// step is finished for the heap (1 incremental step = `gc_continue()`). +// It returns NULL when there are no more pages to sweep for the heap. static struct heap_page * gc_sweep_dequeue_page(rb_objspace_t *objspace, rb_heap_t *heap, bool free_in_user_thread, bool *dequeued_unswept_page) { @@ -5423,11 +5424,11 @@ gc_sweep_rest(rb_objspace_t *objspace) sweep_rest_count++; sweep_lock_lock(&objspace->sweep_lock); { + objspace->sweep_rest = true; // reset to false in `gc_sweeping_exit` if (background_sweep_done_p(objspace)) { psweep_debug(-2, "[gc] gc_sweep_rest: bg done, not requesting\n"); } else { - objspace->sweep_rest = true; // reset to false in `gc_sweeping_exit` if (objspace->use_background_sweep_thread && !objspace->sweep_thread_sweeping && !objspace->sweep_thread_sweep_requested) { psweep_debug(-2, "[gc] gc_sweep_rest: request sweep thread\n"); objspace->sweep_thread_sweep_requested = true; From f08c4fb92ae669e6c9bd965653673f4521c45b05 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 2 Apr 2026 18:10:43 -0400 Subject: [PATCH 54/67] tmp commit --- gc/default/default.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index f3c3aef40dd8f4..d33023b8bdd50d 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -2056,7 +2056,7 @@ heap_allocatable_bytes_expand(rb_objspace_t *objspace, } if (gc_params.growth_max_bytes > 0) { - size_t max_total_slots = total_slots + gc_params.growth_max_bytes / slot_size; + size_t max_total_slots = total_slots + (gc_params.growth_max_bytes / slot_size); if (target_total_slots > max_total_slots) target_total_slots = max_total_slots; } @@ -2106,6 +2106,7 @@ heap_unlink_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *pag GC_ASSERT(heap->total_pages > 0); heap->total_pages--; GC_ASSERT(heap->total_slots >= page->total_slots); + GC_ASSERT(page->total_slots > 0); heap->total_slots -= page->total_slots; } @@ -3053,8 +3054,7 @@ rb_gc_impl_make_zombie(void *objspace_ptr, VALUE obj, void (*dfree)(void *), voi zombie->dfree = dfree; zombie->data = data; VALUE prev, next = (VALUE)RUBY_ATOMIC_PTR_LOAD(heap_pages_deferred_final); - struct heap_page *page_after = GET_HEAP_PAGE(zombie); - GC_ASSERT(page == page_after); + GC_ASSERT(page == GET_HEAP_PAGE(zombie)); do { zombie->next = prev = next; next = RUBY_ATOMIC_VALUE_CAS(heap_pages_deferred_final, prev, obj); @@ -7019,7 +7019,13 @@ gc_marks_finish(rb_objspace_t *objspace) } if (full_marking) { - heap_allocatable_bytes_expand(objspace, NULL, sweep_slots, total_slots, heaps[0].slot_size); + /* Use weighted average slot size since total_slots spans all heaps */ + size_t total_heap_bytes = 0; + for (int i = 0; i < HEAP_COUNT; i++) { + total_heap_bytes += heaps[i].total_slots * heaps[i].slot_size; + } + size_t avg_slot_size = total_slots > 0 ? total_heap_bytes / total_slots : heaps[0].slot_size; + heap_allocatable_bytes_expand(objspace, NULL, sweep_slots, total_slots, avg_slot_size); } } From 8bfa0f5e715b559faccaf68bc903cd13d72214ec Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 9 Apr 2026 11:52:03 -0400 Subject: [PATCH 55/67] parallel sweep: blacklist imemo types specifically --- gc/default/default.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/gc/default/default.c b/gc/default/default.c index d33023b8bdd50d..f674ec2144ee25 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -4443,9 +4443,13 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p case imemo_tmpbuf: case imemo_fields: goto free; - default: + case imemo_callinfo: + case imemo_ment: + case imemo_iseq: sweep_in_ruby_thread(objspace, page, vp); break; + default: + rb_bug("Unknown imemo type: %d\n", imemo_type(vp)); } break; } From 78cce78cc615de28b80406fd9f0f9317803bda12 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 9 Apr 2026 11:52:42 -0400 Subject: [PATCH 56/67] parallel sweep: remove a test from test_tracepoint.rb that no longer makes sense --- test/-ext-/tracepoint/test_tracepoint.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/test/-ext-/tracepoint/test_tracepoint.rb b/test/-ext-/tracepoint/test_tracepoint.rb index 603fd01fd5c7e6..4805b323baa9af 100644 --- a/test/-ext-/tracepoint/test_tracepoint.rb +++ b/test/-ext-/tracepoint/test_tracepoint.rb @@ -47,7 +47,6 @@ def test_tracks_objspace_count assert_operator stat2[:total_allocated_objects] - stat1[:total_allocated_objects], :>=, newobj_count assert_operator 1_000_000, :<=, newobj_count - assert_operator stat2[:total_freed_objects] + stat2[:heap_final_slots] - stat1[:total_freed_objects], :>=, free_count assert_operator stat2[:count] - stat1[:count], :==, gc_start_count assert_operator gc_start_count, :==, gc_end_mark_count From 2180916f60f7daca7fbeb2374284261a1ad200ed Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 9 Apr 2026 12:05:38 -0400 Subject: [PATCH 57/67] Parallel Sweep: no longer user blacklisted_vm_weak_references --- gc/default/default.c | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index f674ec2144ee25..2c43bccb597f00 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -4411,13 +4411,7 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p dfree = RDATA(vp)->dfree; } if (!dfree || dfree == RUBY_DEFAULT_FREE || free_immediately) { - if (rb_gc_obj_has_blacklisted_vm_weak_references(vp)) { - sweep_in_ruby_thread(objspace, page, vp); - break; - } - else { - goto free; - } + goto free; } else { sweep_in_ruby_thread(objspace, page, vp); @@ -4427,10 +4421,6 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p } case T_IMEMO: { debug_free_check(objspace, vp); - if (rb_gc_obj_has_blacklisted_vm_weak_references(vp)) { - sweep_in_ruby_thread(objspace, page, vp); - break; - } switch (imemo_type(vp)) { case imemo_callcache: case imemo_constcache: @@ -4442,10 +4432,11 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p case imemo_throw_data: case imemo_tmpbuf: case imemo_fields: + case imemo_iseq: goto free; case imemo_callinfo: case imemo_ment: - case imemo_iseq: + // blacklisted due to vm weak references sweep_in_ruby_thread(objspace, page, vp); break; default: @@ -4467,14 +4458,7 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p case T_REGEXP: case T_FILE: { debug_free_check(objspace, vp); - if (rb_gc_obj_has_blacklisted_vm_weak_references(vp)) { - sweep_in_ruby_thread(objspace, page, vp); - break; - } - else { - goto free; - } - break; + goto free; } case T_CLASS: case T_MODULE: From 4de9fe0ae92e140211263b93b8999d415ab06d39 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 9 Apr 2026 13:03:54 -0400 Subject: [PATCH 58/67] Pull out deferred freeing into its own function in from gc_sweep_step() Also, don't call GET_EC() for every call to free_vm_weak_references. We know in advance when this is called from the sweep thread, so just call the correct function in pre_sweep_plane. --- gc.c | 29 ++----------- gc/default/default.c | 96 ++++++++++++++++++++++---------------------- gc/gc.h | 1 - 3 files changed, 52 insertions(+), 74 deletions(-) diff --git a/gc.c b/gc.c index ce63a3a8338076..cc2beeb490f690 100644 --- a/gc.c +++ b/gc.c @@ -1596,7 +1596,7 @@ rb_gc_obj_free(void *objspace, VALUE obj) } break; case T_DATA: - if (!rb_data_free(objspace, obj)) return FALSE; + if (!RB_LIKELY(rb_data_free(objspace, obj))) return FALSE; break; case T_MATCH: { @@ -2420,29 +2420,11 @@ obj_free_object_id(VALUE obj, bool in_user_gc_thread) } bool -rb_gc_obj_has_blacklisted_vm_weak_references(VALUE obj) -{ - switch (BUILTIN_TYPE(obj)) { - case T_IMEMO: - switch (imemo_type(obj)) { - case imemo_callinfo: - case imemo_ment: - return true; - default: - break; - } - return false; - default: - return false; - } -} - -static bool rb_gc_obj_free_whitelisted_vm_weak_references_in_sweep_thread(VALUE obj) { VM_ASSERT(pthread_self() == GET_VM()->gc.sweep_thread); bool result = obj_free_object_id(obj, false); - if (rb_obj_gen_fields_p(obj)) { + if (RB_UNLIKELY(rb_obj_gen_fields_p(obj))) { bool freed_generic = rb_free_generic_ivar(obj); if (!freed_generic) result = false; } @@ -2466,14 +2448,9 @@ rb_gc_obj_free_vm_weak_references(VALUE obj) { ASSUME(!RB_SPECIAL_CONST_P(obj)); - rb_execution_context_t *ec = rb_current_execution_context(false); - if (!ec) { - return rb_gc_obj_free_whitelisted_vm_weak_references_in_sweep_thread(obj); - } - obj_free_object_id(obj, true); - if (rb_obj_gen_fields_p(obj)) { + if (RB_UNLIKELY(rb_obj_gen_fields_p(obj))) { rb_free_generic_ivar(obj); } diff --git a/gc/default/default.c b/gc/default/default.c index 2c43bccb597f00..c60ca451e8d0f9 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -872,7 +872,6 @@ struct heap_page { struct free_slot *freelist; struct free_slot *deferred_freelist; struct ccan_list_node page_node; - rb_ractor_newobj_heap_cache_t *heap_cache; bits_t wb_unprotected_bits[HEAP_PAGE_BITMAP_LIMIT]; /* the following three bitmaps are cleared at the beginning of full GC */ @@ -1255,7 +1254,7 @@ heap_is_sweep_done(rb_objspace_t *objspace, rb_heap_t *heap) } // We always dequeue the last page, never the sweep thread. This avoids locking in the common case. - // It should be synchronized, but it's a "benign race" (FIXME: use atomics?) + // It should be synchronized, but it's a "benign race". if (heap->sweeping_page) { return false; } @@ -2759,15 +2758,10 @@ ractor_cache_set_page(rb_objspace_t *objspace, rb_ractor_newobj_cache_t *cache, GC_ASSERT(page->free_slots != 0); GC_ASSERT(page->freelist != NULL); - if (heap_cache->using_page) { - heap_cache->using_page->heap_cache = NULL; - } - heap_cache->using_page = page; heap_cache->freelist = page->freelist; page->free_slots = 0; page->freelist = NULL; - page->heap_cache = heap_cache; rb_asan_unpoison_object((VALUE)heap_cache->freelist, false); GC_ASSERT(RB_TYPE_P((VALUE)heap_cache->freelist, T_NONE)); @@ -3952,7 +3946,7 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit if (bitset & 1) { switch (BUILTIN_TYPE(vp)) { case T_MOVED: - if (objspace->flags.during_compacting) { + if (RB_UNLIKELY(objspace->flags.during_compacting)) { /* The sweep cursor shouldn't have made it to any * T_MOVED slots while the compact flag is enabled. * The sweep cursor and compact cursor move in @@ -4337,7 +4331,7 @@ sweep_in_ruby_thread(rb_objspace_t *objspace, struct heap_page *page, VALUE obj) MARK_IN_BITMAP(page->deferred_free_bits, obj); } -bool +static inline bool zombie_needs_deferred_free(VALUE zombie) { return ZOMBIE_NEEDS_FREE_P(zombie); @@ -4361,6 +4355,8 @@ debug_free_check(rb_objspace_t *objspace, VALUE vp) #define debug_free_check(...) (void)0 #endif +bool rb_gc_obj_free_whitelisted_vm_weak_references_in_sweep_thread(VALUE obj); + static inline void gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page, uintptr_t p, bits_t bitset, short slot_size) { @@ -4476,7 +4472,7 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p break; free: { debug_free_check(objspace, vp); - if (rb_gc_obj_free_vm_weak_references(vp)) { + if (RB_LIKELY(rb_gc_obj_free_whitelisted_vm_weak_references_in_sweep_thread(vp))) { bool can_put_back_on_freelist = rb_gc_obj_free(objspace, vp); if (can_put_back_on_freelist) { heap_page_add_deferred_freeobj(objspace, page, vp); @@ -4872,7 +4868,6 @@ gc_ractor_newobj_cache_clear(void *c, void *data) heap_page_freelist_append(page, freelist); - if (page) page->heap_cache = NULL; cache->using_page = NULL; cache->freelist = NULL; } @@ -5157,6 +5152,47 @@ is_last_heap(rb_objspace_t *objspace, rb_heap_t *heap) return heap - heaps == (HEAP_COUNT - 1); } +static void +gc_sweep_step_deferred_free(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *sweep_page, unsigned short *freed_out, unsigned short *finals_out) +{ + unsigned short freed = 0; + unsigned short finals = 0; + uintptr_t p = (uintptr_t)sweep_page->start; + bits_t *deferred_bits = sweep_page->deferred_free_bits; + int total_slots = sweep_page->total_slots; + short slot_size = sweep_page->slot_size; + + int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH); + int out_of_range_bits = total_slots % BITS_BITLENGTH; + bits_t bitset; + + if (out_of_range_bits != 0) { + deferred_bits[bitmap_plane_count - 1] &= (((bits_t)1 << out_of_range_bits) - 1); + } + + for (int i = 0; i < bitmap_plane_count; i++) { + bitset = deferred_bits[i]; + p = (uintptr_t)sweep_page->start + (i * BITS_BITLENGTH * slot_size); + while (bitset) { + if (bitset & 1) { + VALUE obj = (VALUE)p; + GC_ASSERT(GET_HEAP_PAGE(obj) == sweep_page); + GC_ASSERT(!RVALUE_MARKED(objspace, obj)); + if (deferred_free(objspace, obj)) { + freed++; + } + else { + finals++; + } + } + p += slot_size; + bitset >>= 1; + } + } + *freed_out = freed; + *finals_out = finals; +} + // Perform incremental (lazy) sweep on a heap. static int gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) @@ -5209,51 +5245,20 @@ gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) .page = sweep_page }; - unsigned short deferred_free_final_slots = 0; if (free_in_user_thread_p) { gc_sweep_page(objspace, heap, &ctx); GC_ASSERT(sweep_page->pre_deferred_free_slots == 0); } else { unsigned short deferred_free_freed = 0; + unsigned short deferred_free_final_slots = 0; unsigned short deferred_to_free = sweep_page->pre_deferred_free_slots; psweep_debug(-2, "[gc] gc_sweep_step: (heap:%p %ld, page:%p) free_ruby_th: %d, deferred_to_free:%d, pre_freed:%d, pre_empty:%d\n", heap, heap - heaps, sweep_page, free_in_user_thread_p, deferred_to_free, sweep_page->pre_freed_slots, sweep_page->pre_empty_slots); if (deferred_to_free > 0) { - uintptr_t p = (uintptr_t)sweep_page->start; - bits_t *deferred_bits = sweep_page->deferred_free_bits; - int total_slots = sweep_page->total_slots; - short slot_size = sweep_page->slot_size; - - int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH); - int out_of_range_bits = total_slots % BITS_BITLENGTH; - bits_t bitset; - - if (out_of_range_bits != 0) { - deferred_bits[bitmap_plane_count - 1] &= (((bits_t)1 << out_of_range_bits) - 1); - } - - for (int i = 0; i < bitmap_plane_count; i++) { - bitset = deferred_bits[i]; - p = (uintptr_t)sweep_page->start + (i * BITS_BITLENGTH * slot_size); - while (bitset) { - if (bitset & 1) { - VALUE obj = (VALUE)p; - GC_ASSERT(GET_HEAP_PAGE(obj) == sweep_page); - GC_ASSERT(!RVALUE_MARKED(objspace, obj)); - if (deferred_free(objspace, obj)) { - deferred_free_freed++; - } - else { - deferred_free_final_slots++; - } - } - p += slot_size; - bitset >>= 1; - } - } + gc_sweep_step_deferred_free(objspace, heap, sweep_page, &deferred_free_freed, &deferred_free_final_slots); } GC_ASSERT(deferred_to_free == (deferred_free_freed + deferred_free_final_slots)); @@ -5320,9 +5325,6 @@ gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) clear_pre_sweep_fields(sweep_page); } - // We never sweep a page that's currently in free_pages, such as a cached page. Our iterator is past those already. - GC_ASSERT(!sweep_page->heap_cache); - #if RGENGC_CHECK_MODE short freelist_len = 0; asan_unlock_freelist(sweep_page); diff --git a/gc/gc.h b/gc/gc.h index 20d941ef9102fd..e6856986d4042c 100644 --- a/gc/gc.h +++ b/gc/gc.h @@ -82,7 +82,6 @@ MODULAR_GC_FN void rb_gc_run_obj_finalizer(VALUE objid, long count, VALUE (*call MODULAR_GC_FN void rb_gc_set_pending_interrupt(void); MODULAR_GC_FN void rb_gc_unset_pending_interrupt(void); MODULAR_GC_FN bool rb_gc_obj_free_vm_weak_references(VALUE obj); -MODULAR_GC_FN bool rb_gc_obj_has_blacklisted_vm_weak_references(VALUE obj); MODULAR_GC_FN bool rb_gc_obj_free(void *objspace, VALUE obj); MODULAR_GC_FN void rb_gc_save_machine_context(void); MODULAR_GC_FN void rb_gc_mark_roots(void *objspace, const char **categoryp); From c3be4297d8018607851124a95831c9f7da48e0ac Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 9 Apr 2026 13:29:25 -0400 Subject: [PATCH 59/67] Remove deferred freelist We no longer need it because we changed how zombies are handled. --- gc/default/default.c | 94 ++++++++------------------------------------ 1 file changed, 16 insertions(+), 78 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index c60ca451e8d0f9..2e5133def3ffb7 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -870,7 +870,6 @@ struct heap_page { struct heap_page_body *body; uintptr_t start; struct free_slot *freelist; - struct free_slot *deferred_freelist; struct ccan_list_node page_node; bits_t wb_unprotected_bits[HEAP_PAGE_BITMAP_LIMIT]; @@ -905,18 +904,6 @@ asan_unlock_freelist(struct heap_page *page) asan_unpoison_memory_region(&page->freelist, sizeof(struct free_list *), false); } -static void -asan_lock_deferred_freelist(struct heap_page *page) -{ - asan_poison_memory_region(&page->deferred_freelist, sizeof(struct free_list *)); -} - -static void -asan_unlock_deferred_freelist(struct heap_page *page) -{ - asan_unpoison_memory_region(&page->deferred_freelist, sizeof(struct free_list *), false); -} - static inline bool heap_page_in_global_empty_pages_pool(rb_objspace_t *objspace, struct heap_page *page) { @@ -1977,7 +1964,7 @@ static void mark_stack_free_cache(mark_stack_t *); static void heap_page_free(rb_objspace_t *objspace, struct heap_page *page, bool log); static inline void -heap_page_add_freeobj(rb_objspace_t *objspace, struct heap_page *page, VALUE obj) +heap_page_add_freeobj(rb_objspace_t *objspace, struct heap_page *page, VALUE obj, bool from_sweep_thread) { rb_asan_unpoison_object(obj, false); @@ -1989,8 +1976,10 @@ heap_page_add_freeobj(rb_objspace_t *objspace, struct heap_page *page, VALUE obj page->freelist = slot; asan_lock_freelist(page); - // Should have already been reset - GC_ASSERT(RVALUE_AGE_GET(obj) == 0); + if (!from_sweep_thread) { + // Should have already been reset + GC_ASSERT(RVALUE_AGE_GET(obj) == 0); + } if (RGENGC_CHECK_MODE && /* obj should belong to page */ @@ -2004,22 +1993,6 @@ heap_page_add_freeobj(rb_objspace_t *objspace, struct heap_page *page, VALUE obj gc_report(3, objspace, "heap_page_add_freeobj: add %p to freelist\n", (void *)obj); } -static inline void -heap_page_add_deferred_freeobj(rb_objspace_t *objspace, struct heap_page *page, VALUE obj) -{ - rb_asan_unpoison_object(obj, false); - - struct free_slot *slot = (struct free_slot *)obj; - slot->flags = 0; - asan_unlock_deferred_freelist(page); - slot->next = page->deferred_freelist; - page->deferred_freelist = slot; - asan_lock_deferred_freelist(page); - - rb_asan_poison_object(obj); - gc_report(3, objspace, "heap_page_add_deferred_freeobj: add %p to deferred_freelist\n", (void *)obj); -} - static void heap_allocatable_bytes_expand(rb_objspace_t *objspace, rb_heap_t *heap, size_t free_slots, size_t total_slots, size_t slot_size) @@ -2384,17 +2357,14 @@ heap_add_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page, memset(&page->age_bits[0], 0, sizeof(page->age_bits)); asan_unlock_freelist(page); - asan_unlock_deferred_freelist(page); page->freelist = NULL; - page->deferred_freelist = NULL; asan_unpoison_memory_region(page->body, HEAP_PAGE_SIZE, false); int i = 0; for (VALUE p = (VALUE)start; p < start + (slot_count * heap->slot_size); p += heap->slot_size) { i++; - heap_page_add_freeobj(objspace, page, p); + heap_page_add_freeobj(objspace, page, p, false); } GC_ASSERT(i == slot_count); - asan_lock_deferred_freelist(page); asan_lock_freelist(page); page->free_slots = slot_count; @@ -3956,7 +3926,7 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit } gc_report(3, objspace, "page_sweep: %s is added to freelist\n", rb_obj_info(vp)); ctx->empty_slots++; - heap_page_add_freeobj(objspace, sweep_page, vp); + heap_page_add_freeobj(objspace, sweep_page, vp, false); break; case T_ZOMBIE: if (ZOMBIE_NEEDS_FREE_P(vp)) { @@ -3997,7 +3967,7 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit gc_report(3, objspace, "page_sweep: %s (fast path) added to freelist\n", rb_obj_info(vp)); RVALUE_AGE_SET_BITMAP(vp, 0); - heap_page_add_freeobj(objspace, sweep_page, vp); + heap_page_add_freeobj(objspace, sweep_page, vp, false); (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)vp, slot_size); ctx->freed_slots++; } @@ -4010,7 +3980,7 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit if (rb_gc_obj_free(objspace, vp)) { gc_report(3, objspace, "page_sweep: %s is added to freelist\n", rb_obj_info(vp)); RVALUE_AGE_SET_BITMAP(vp, 0); - heap_page_add_freeobj(objspace, sweep_page, vp); + heap_page_add_freeobj(objspace, sweep_page, vp, false); (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)vp, slot_size); ctx->freed_slots++; } @@ -4086,7 +4056,7 @@ deferred_free(rb_objspace_t *objspace, VALUE obj) struct heap_page *page = GET_HEAP_PAGE(obj); psweep_debug(1, "[gc] deferred free: page(%p) obj(%p) %s (success)\n", page, (void*)obj, obj_info); RVALUE_AGE_SET_BITMAP(obj, 0); - heap_page_add_freeobj(objspace, page, obj); + heap_page_add_freeobj(objspace, page, obj, false); (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)obj, page->slot_size); result = true; } @@ -4322,7 +4292,7 @@ heap_page_freelist_append(struct heap_page *page, struct free_slot *freelist) } } -static void +static inline void sweep_in_ruby_thread(rb_objspace_t *objspace, struct heap_page *page, VALUE obj) { page->pre_deferred_free_slots += 1; @@ -4374,7 +4344,7 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p switch (BUILTIN_TYPE(vp)) { case T_MOVED: { empties++; - heap_page_add_deferred_freeobj(objspace, page, vp); + heap_page_add_freeobj(objspace, page, vp, true); (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)vp, page->slot_size); break; } @@ -4428,9 +4398,9 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p case imemo_throw_data: case imemo_tmpbuf: case imemo_fields: - case imemo_iseq: goto free; case imemo_callinfo: + case imemo_iseq: // calls rb_yjit_iseq_free which is not concurrency safe case imemo_ment: // blacklisted due to vm weak references sweep_in_ruby_thread(objspace, page, vp); @@ -4461,7 +4431,7 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p case T_ICLASS: debug_free_check(objspace, vp); if (!rb_gc_obj_needs_cleanup_p(vp)) { - heap_page_add_deferred_freeobj(objspace, page, vp); + heap_page_add_freeobj(objspace, page, vp, true); psweep_debug(2, "[sweep] freed: page(%p), obj(%p)\n", (void*)page, (void*)vp); (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)vp, page->slot_size); freed++; @@ -4475,7 +4445,7 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p if (RB_LIKELY(rb_gc_obj_free_whitelisted_vm_weak_references_in_sweep_thread(vp))) { bool can_put_back_on_freelist = rb_gc_obj_free(objspace, vp); if (can_put_back_on_freelist) { - heap_page_add_deferred_freeobj(objspace, page, vp); + heap_page_add_freeobj(objspace, page, vp, true); freed++; psweep_debug(2, "[sweep] freed: page(%p), obj(%p)\n", (void*)page, (void*)vp); (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)vp, page->slot_size); @@ -4637,18 +4607,6 @@ clear_pre_sweep_fields(struct heap_page *page) page->pre_freed_malloc_bytes = 0; } -// add beginning of b to end of a -static void -merge_freelists(struct free_slot *a, struct free_slot *b) -{ - if (a && b) { - while (a->next) { - a = a->next; - } - a->next = b; - } -} - // Perform incremental (lazy) sweep on a heap by the background sweep thread. static void gc_sweep_step_worker(rb_objspace_t *objspace, rb_heap_t *heap) @@ -5289,32 +5247,12 @@ gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap) if (free_in_user_thread_p) { GC_ASSERT(sweep_page->free_slots == free_slots); // gc_sweep_page() sets sweep_page->free slots GC_ASSERT(sweep_page->heap->total_freed_objects >= (unsigned long)ctx.freed_slots); - GC_ASSERT(!sweep_page->deferred_freelist); } else { sweep_page->free_slots = free_slots; // NOTE: sweep_page->final slots have already been updated by make_zombie GC_ASSERT(sweep_page->free_slots <= sweep_page->total_slots); GC_ASSERT(sweep_page->final_slots <= sweep_page->total_slots); sweep_page->heap->total_freed_objects += ctx.freed_slots; - // merge freelists - asan_unlock_freelist(sweep_page); - asan_unlock_deferred_freelist(sweep_page); - struct free_slot *deferred_freelist = sweep_page->deferred_freelist; - psweep_debug(1, "[gc] gc_sweep_step: deferred freelist size:%d, free slots:%d\n", freelist_size(deferred_freelist), free_slots); - if (deferred_freelist) { - struct free_slot *cur_list = sweep_page->freelist; - psweep_debug(1, "[gc] gc_sweep_step: sweep_page->freelist size:%d\n", freelist_size(cur_list)); - if (cur_list) { - merge_freelists(deferred_freelist, cur_list); - } - sweep_page->freelist = deferred_freelist; - sweep_page->deferred_freelist = NULL; - } - else { - GC_ASSERT(sweep_page->pre_freed_slots == 0); - } - asan_lock_deferred_freelist(sweep_page); - asan_lock_freelist(sweep_page); if (sweep_page->pre_freed_malloc_bytes > 0) { atomic_sub_nounderflow(&malloc_increase, sweep_page->pre_freed_malloc_bytes); @@ -5588,7 +5526,7 @@ invalidate_moved_plane(rb_objspace_t *objspace, struct heap_page *page, uintptr_ struct heap_page *orig_page = GET_HEAP_PAGE(object); orig_page->free_slots++; RVALUE_AGE_SET_BITMAP(object, 0); - heap_page_add_freeobj(objspace, orig_page, object); + heap_page_add_freeobj(objspace, orig_page, object, false); GC_ASSERT(RVALUE_MARKED(objspace, forwarding_object)); GC_ASSERT(BUILTIN_TYPE(forwarding_object) != T_MOVED); From f78728f817944a3d534486cef3c3db42c67c0c2f Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 9 Apr 2026 13:33:58 -0400 Subject: [PATCH 60/67] Change name of free_whitelisted_vm_weak_references_from_sweep_thread Since MMTK might also use this, called it something more generic. It's now `rb_gc_obj_free_concurrency_safe_vm_weak_references`. --- gc.c | 3 +-- gc/default/default.c | 4 +--- gc/gc.h | 1 + 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/gc.c b/gc.c index cc2beeb490f690..817037553b67f9 100644 --- a/gc.c +++ b/gc.c @@ -2420,9 +2420,8 @@ obj_free_object_id(VALUE obj, bool in_user_gc_thread) } bool -rb_gc_obj_free_whitelisted_vm_weak_references_in_sweep_thread(VALUE obj) +rb_gc_obj_free_concurrency_safe_vm_weak_references(VALUE obj) { - VM_ASSERT(pthread_self() == GET_VM()->gc.sweep_thread); bool result = obj_free_object_id(obj, false); if (RB_UNLIKELY(rb_obj_gen_fields_p(obj))) { bool freed_generic = rb_free_generic_ivar(obj); diff --git a/gc/default/default.c b/gc/default/default.c index 2e5133def3ffb7..ee244ae542c4af 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -4325,8 +4325,6 @@ debug_free_check(rb_objspace_t *objspace, VALUE vp) #define debug_free_check(...) (void)0 #endif -bool rb_gc_obj_free_whitelisted_vm_weak_references_in_sweep_thread(VALUE obj); - static inline void gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page, uintptr_t p, bits_t bitset, short slot_size) { @@ -4442,7 +4440,7 @@ gc_pre_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *p break; free: { debug_free_check(objspace, vp); - if (RB_LIKELY(rb_gc_obj_free_whitelisted_vm_weak_references_in_sweep_thread(vp))) { + if (RB_LIKELY(rb_gc_obj_free_concurrency_safe_vm_weak_references(vp))) { bool can_put_back_on_freelist = rb_gc_obj_free(objspace, vp); if (can_put_back_on_freelist) { heap_page_add_freeobj(objspace, page, vp, true); diff --git a/gc/gc.h b/gc/gc.h index e6856986d4042c..44ff018aa123e7 100644 --- a/gc/gc.h +++ b/gc/gc.h @@ -82,6 +82,7 @@ MODULAR_GC_FN void rb_gc_run_obj_finalizer(VALUE objid, long count, VALUE (*call MODULAR_GC_FN void rb_gc_set_pending_interrupt(void); MODULAR_GC_FN void rb_gc_unset_pending_interrupt(void); MODULAR_GC_FN bool rb_gc_obj_free_vm_weak_references(VALUE obj); +MODULAR_GC_FN bool rb_gc_obj_free_concurrency_safe_vm_weak_references(VALUE obj); MODULAR_GC_FN bool rb_gc_obj_free(void *objspace, VALUE obj); MODULAR_GC_FN void rb_gc_save_machine_context(void); MODULAR_GC_FN void rb_gc_mark_roots(void *objspace, const char **categoryp); From 9dbf1d713c2b31656e16f58e886d830c3b249b44 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 9 Apr 2026 14:27:33 -0400 Subject: [PATCH 61/67] Fix concurrent_set.c when found garbage object --- concurrent_set.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/concurrent_set.c b/concurrent_set.c index 6eb76d9b3647f2..227f1b53f9f02b 100644 --- a/concurrent_set.c +++ b/concurrent_set.c @@ -610,7 +610,6 @@ rb_concurrent_set_find_or_insert(VALUE *set_obj_ptr, VALUE key, void *data) RB_VM_LOCKING(); goto retry; default: - // what about if hash is marked reclaimed but key is not cleared yet if (curr_hash != hash) { goto probe_next; } @@ -621,8 +620,13 @@ rb_concurrent_set_find_or_insert(VALUE *set_obj_ptr, VALUE key, void *data) if (continuation) { goto probe_next; } - rbimpl_atomic_value_cas(&entry->key, raw_key, CONCURRENT_SET_EMPTY, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_RELAXED); - continue; + { + VALUE prev = rbimpl_atomic_value_cas(&entry->key, raw_key, CONCURRENT_SET_EMPTY, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_RELAXED); + if (prev == raw_key) { + rbimpl_atomic_sub(&set->size, 1, RBIMPL_ATOMIC_RELAXED); + } + } + continue; // try to reclaim same slot, because the hash is the same and it's now EMPTY } if (set->funcs->cmp(key, curr_key)) { From 65e09d41d0b52cab159c246c186af63c5bc51ec1 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Fri, 10 Apr 2026 15:05:42 -0400 Subject: [PATCH 62/67] Didn't mean to merge this a while back --- vm_callinfo.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/vm_callinfo.h b/vm_callinfo.h index 5168ce57670745..9f147522815d50 100644 --- a/vm_callinfo.h +++ b/vm_callinfo.h @@ -399,9 +399,6 @@ vm_cc_refinement_p(const struct rb_callcache *cc) static inline bool vm_cc_class_check(const struct rb_callcache *cc, VALUE klass) { - if (!IMEMO_TYPE_P(cc, imemo_callcache)) { - fprintf(stderr, "Error: vm_cc_class_check called on %s (%p)\n", rb_obj_info((VALUE)cc), (void*)cc); - } VM_ASSERT(IMEMO_TYPE_P(cc, imemo_callcache)); VM_ASSERT(cc_check_class(cc->klass)); return cc->klass == klass; From b8760d68df44e91e8edc866804a5e86595c05157 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Fri, 10 Apr 2026 15:28:54 -0400 Subject: [PATCH 63/67] Go from 2 to 1 allocations in onig_region_resize --- regexec.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/regexec.c b/regexec.c index 3210c7cc1b5603..73f49d2963ad5a 100644 --- a/regexec.c +++ b/regexec.c @@ -905,16 +905,13 @@ onig_region_resize(OnigRegion* region, int n) if (n < ONIG_NREGION) n = ONIG_NREGION; + size_t region_half_sz = n * sizeof(OnigPosition); if (region->allocated == 0) { - region->beg = (OnigPosition* )xmalloc(n * sizeof(OnigPosition)); + region->beg = (OnigPosition* )xmalloc(region_half_sz * 2); if (region->beg == 0) return ONIGERR_MEMORY; - region->end = (OnigPosition* )xmalloc(n * sizeof(OnigPosition)); - if (region->end == 0) { - xfree(region->beg); - return ONIGERR_MEMORY; - } + region->end = (OnigPosition* )region->beg + n; region->allocated = n; } @@ -922,20 +919,13 @@ onig_region_resize(OnigRegion* region, int n) OnigPosition *tmp; region->allocated = 0; - tmp = (OnigPosition* )xrealloc(region->beg, n * sizeof(OnigPosition)); + tmp = (OnigPosition* )xrealloc(region->beg, region_half_sz * 2); if (tmp == 0) { xfree(region->beg); - xfree(region->end); return ONIGERR_MEMORY; } region->beg = tmp; - tmp = (OnigPosition* )xrealloc(region->end, n * sizeof(OnigPosition)); - if (tmp == 0) { - xfree(region->beg); - xfree(region->end); - return ONIGERR_MEMORY; - } - region->end = tmp; + region->end = (OnigPosition*)region->beg + n; region->allocated = n; } @@ -998,7 +988,6 @@ onig_region_free(OnigRegion* r, int free_self) if (r) { if (r->allocated > 0) { xfree(r->beg); - xfree(r->end); } #ifdef USE_CAPTURE_HISTORY history_root_free(r); From ba09ff97b782086e76df8c58c4a2db3186e09134 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Mon, 13 Apr 2026 11:25:41 -0400 Subject: [PATCH 64/67] Remove old is_sweep_thread_p checks in vm_sync.c --- vm_sync.c | 73 ++++++++++++++++++------------------------------------- 1 file changed, 23 insertions(+), 50 deletions(-) diff --git a/vm_sync.c b/vm_sync.c index 457af1ec215a8a..5b33309ebbd572 100644 --- a/vm_sync.c +++ b/vm_sync.c @@ -8,25 +8,13 @@ void rb_ractor_sched_barrier_start(rb_vm_t *vm, rb_ractor_t *cr); void rb_ractor_sched_barrier_join(rb_vm_t *vm, rb_ractor_t *cr); void rb_ractor_sched_barrier_end(rb_vm_t *vm, rb_ractor_t *cr); - -static bool -is_sweep_thread_p(void) -{ - rb_vm_t *vm = GET_VM(); - if (!vm) return false; - return vm->gc.sweep_thread == pthread_self(); -} +bool is_sweep_thread_p(void); static bool vm_locked(rb_vm_t *vm) { if (!vm) return false; - if (is_sweep_thread_p()) { - return vm->ractor.sync.lock_owner == (void*)-1; - } - else { - return vm_locked_by_ractor_p(vm, GET_RACTOR()); - } + return vm_locked_by_ractor_p(vm, GET_RACTOR()); } #if RUBY_DEBUG > 0 @@ -91,24 +79,19 @@ vm_need_barrier(bool no_barrier, const rb_ractor_t *cr, const rb_vm_t *vm) } static void -vm_lock_enter(rb_ractor_t *cr, rb_vm_t *vm, bool locked, bool no_barrier, bool is_sweep_thread, unsigned int *lev APPEND_LOCATION_ARGS) +vm_lock_enter(rb_ractor_t *cr, rb_vm_t *vm, bool locked, bool no_barrier, unsigned int *lev APPEND_LOCATION_ARGS) { RUBY_DEBUG_LOG2(file, line, "start locked:%d", locked); + VM_ASSERT(!is_sweep_thread_p()); + if (locked) { ASSERT_vm_locking(); } else { #if RACTOR_CHECK_MODE - if (is_sweep_thread) { - VM_ASSERT(0); - VM_ASSERT(cr == 0); - VM_ASSERT(vm->ractor.sync.lock_owner != (void*)-1); - } - else { - // locking ractor and acquire VM lock will cause deadlock - VM_ASSERT(cr->sync.locked_by != rb_ractor_self(cr)); - } + // locking ractor and acquire VM lock will cause deadlock + VM_ASSERT(cr->sync.locked_by != rb_ractor_self(cr)); #endif // lock rb_native_mutex_lock(&vm->ractor.sync.lock); @@ -116,7 +99,7 @@ vm_lock_enter(rb_ractor_t *cr, rb_vm_t *vm, bool locked, bool no_barrier, bool i VM_ASSERT(vm->ractor.sync.lock_rec == 0); // barrier - if (!is_sweep_thread && vm_need_barrier(no_barrier, cr, vm)) { + if (vm_need_barrier(no_barrier, cr, vm)) { rb_execution_context_t *ec = GET_EC(); RB_VM_SAVE_MACHINE_CONTEXT(rb_ec_thread_ptr(ec)); @@ -129,20 +112,18 @@ vm_lock_enter(rb_ractor_t *cr, rb_vm_t *vm, bool locked, bool no_barrier, bool i VM_ASSERT(vm->ractor.sync.lock_rec == 0); VM_ASSERT(vm->ractor.sync.lock_owner == NULL); - vm->ractor.sync.lock_owner = is_sweep_thread ? (rb_ractor_t*)-1 : cr; + vm->ractor.sync.lock_owner = cr; } vm->ractor.sync.lock_rec++; *lev = vm->ractor.sync.lock_rec; - if (!is_sweep_thread) { - RUBY_DEBUG_LOG2(file, line, "rec:%u owner:%u", vm->ractor.sync.lock_rec, - (unsigned int)rb_ractor_id(vm->ractor.sync.lock_owner)); - } + RUBY_DEBUG_LOG2(file, line, "rec:%u owner:%u", vm->ractor.sync.lock_rec, + (unsigned int)rb_ractor_id(vm->ractor.sync.lock_owner)); } static void -vm_lock_leave(rb_vm_t *vm, bool no_barrier, bool is_sweep_thread, unsigned int *lev APPEND_LOCATION_ARGS) +vm_lock_leave(rb_vm_t *vm, bool no_barrier, unsigned int *lev APPEND_LOCATION_ARGS) { MAYBE_UNUSED(rb_ractor_t *cr = vm->ractor.sync.lock_owner); @@ -153,15 +134,10 @@ vm_lock_leave(rb_vm_t *vm, bool no_barrier, bool is_sweep_thread, unsigned int * ASSERT_vm_locking(); VM_ASSERT(vm->ractor.sync.lock_rec > 0); VM_ASSERT(vm->ractor.sync.lock_rec == *lev); - if (is_sweep_thread) { - VM_ASSERT(cr == (void*)-1); - } - else { - VM_ASSERT(cr == GET_RACTOR()); - } + VM_ASSERT(cr == GET_RACTOR()); #ifdef RUBY_THREAD_PTHREAD_H - if (!is_sweep_thread && vm->ractor.sched.barrier_ractor == cr && + if (vm->ractor.sched.barrier_ractor == cr && vm->ractor.sched.barrier_lock_rec == vm->ractor.sync.lock_rec) { VM_ASSERT(!no_barrier); rb_ractor_sched_barrier_end(vm, cr); @@ -183,11 +159,10 @@ rb_vm_lock_enter_body(unsigned int *lev APPEND_LOCATION_ARGS) rb_vm_t *vm = GET_VM(); VM_ASSERT(vm); if (vm_locked(vm)) { - vm_lock_enter(NULL, vm, true, false, is_sweep_thread_p(), lev APPEND_LOCATION_PARAMS); + vm_lock_enter(NULL, vm, true, false, lev APPEND_LOCATION_PARAMS); } else { - bool is_sweep_th = is_sweep_thread_p(); - vm_lock_enter(is_sweep_th ? NULL : GET_RACTOR(), vm, false, false, is_sweep_th, lev APPEND_LOCATION_PARAMS); + vm_lock_enter(GET_RACTOR(), vm, false, false, lev APPEND_LOCATION_PARAMS); } } @@ -197,11 +172,10 @@ rb_vm_lock_enter_body_nb(unsigned int *lev APPEND_LOCATION_ARGS) rb_vm_t *vm = GET_VM(); VM_ASSERT(vm); if (vm_locked(vm)) { - vm_lock_enter(NULL, vm, true, true, is_sweep_thread_p(), lev APPEND_LOCATION_PARAMS); + vm_lock_enter(NULL, vm, true, true, lev APPEND_LOCATION_PARAMS); } else { - bool is_sweep_th = is_sweep_thread_p(); - vm_lock_enter(is_sweep_th ? NULL : GET_RACTOR(), vm, false, true, is_sweep_th, lev APPEND_LOCATION_PARAMS); + vm_lock_enter(GET_RACTOR(), vm, false, true, lev APPEND_LOCATION_PARAMS); } } @@ -210,19 +184,19 @@ rb_vm_lock_enter_body_cr(rb_ractor_t *cr, unsigned int *lev APPEND_LOCATION_ARGS { rb_vm_t *vm = GET_VM(); VM_ASSERT(vm); - vm_lock_enter(cr, vm, vm_locked(vm), false, false, lev APPEND_LOCATION_PARAMS); + vm_lock_enter(cr, vm, vm_locked(vm), false, lev APPEND_LOCATION_PARAMS); } void rb_vm_lock_leave_body_nb(unsigned int *lev APPEND_LOCATION_ARGS) { - vm_lock_leave(GET_VM(), true, is_sweep_thread_p(), lev APPEND_LOCATION_PARAMS); + vm_lock_leave(GET_VM(), true, lev APPEND_LOCATION_PARAMS); } void rb_vm_lock_leave_body(unsigned int *lev APPEND_LOCATION_ARGS) { - vm_lock_leave(GET_VM(), false, is_sweep_thread_p(), lev APPEND_LOCATION_PARAMS); + vm_lock_leave(GET_VM(), false, lev APPEND_LOCATION_PARAMS); } void @@ -232,8 +206,7 @@ rb_vm_lock_body(LOCATION_ARGS) VM_ASSERT(vm); ASSERT_vm_unlocking(); - bool is_sweep_th = is_sweep_thread_p(); - vm_lock_enter(is_sweep_th ? NULL : GET_RACTOR(), vm, false, false, is_sweep_th, &vm->ractor.sync.lock_rec APPEND_LOCATION_PARAMS); + vm_lock_enter(GET_RACTOR(), vm, false, false, &vm->ractor.sync.lock_rec APPEND_LOCATION_PARAMS); } void @@ -242,7 +215,7 @@ rb_vm_unlock_body(LOCATION_ARGS) rb_vm_t *vm = GET_VM(); ASSERT_vm_locking(); VM_ASSERT(vm->ractor.sync.lock_rec == 1); - vm_lock_leave(vm, false, is_sweep_thread_p(), &vm->ractor.sync.lock_rec APPEND_LOCATION_PARAMS); + vm_lock_leave(vm, false, &vm->ractor.sync.lock_rec APPEND_LOCATION_PARAMS); } static void From f2f93581c1acf12d4804fd286f988528e5e75671 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Mon, 13 Apr 2026 11:26:00 -0400 Subject: [PATCH 65/67] Remove old is_sweep_pthread_p check in darray.h --- darray.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/darray.h b/darray.h index 1c2485b935ed1b..08d79a45c27bd7 100644 --- a/darray.h +++ b/darray.h @@ -233,8 +233,6 @@ rb_darray_realloc_mul_add(void *orig_ptr, size_t capa, size_t element_size, size return ptr; } -bool is_sweep_thread_p(void); - /* Internal function. Like rb_xrealloc_mul_add but does not trigger GC. */ static inline void * rb_darray_realloc_mul_add_without_gc(void *orig_ptr, size_t x, size_t y, size_t z) @@ -243,12 +241,7 @@ rb_darray_realloc_mul_add_without_gc(void *orig_ptr, size_t x, size_t y, size_t void *ptr = realloc(orig_ptr, size); if (ptr == NULL) { - if (!is_sweep_thread_p()) { - rb_bug("rb_darray_realloc_mul_add_without_gc: failed"); - } - else { - fprintf(stderr, "darray: realloc failed (from sweep thread)\n"); - } + rb_bug("rb_darray_realloc_mul_add_without_gc: failed"); } return ptr; From 729063033016080216597fc91474b8321c79e2ea Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Mon, 13 Apr 2026 13:50:07 -0400 Subject: [PATCH 66/67] fix GC.verify_internal_consistency with Parallel Sweep It also works with RGENGC_CHECK_MODE=1 --- gc/default/default.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index ee244ae542c4af..4746959b3d061f 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -6402,6 +6402,7 @@ struct verify_internal_consistency_struct { int err_count; size_t live_object_count; size_t zombie_object_count; + size_t zombie_ran_finalizer_object_count; VALUE parent; size_t old_object_count; @@ -6457,7 +6458,6 @@ verify_internal_consistency_i(void *page_start, void *page_end, size_t stride, { VALUE obj; rb_objspace_t *objspace = data->objspace; - return 0; // FIXME for parallel sweep for (obj = (VALUE)page_start; obj != (VALUE)page_end; obj += stride) { asan_unpoisoning_object(obj) { @@ -6499,7 +6499,11 @@ verify_internal_consistency_i(void *page_start, void *page_end, size_t stride, if (BUILTIN_TYPE(obj) == T_ZOMBIE) { data->zombie_object_count++; - if ((RBASIC(obj)->flags & ~ZOMBIE_OBJ_KEPT_FLAGS) != T_ZOMBIE) { + if (FL_TEST(obj, ZOMBIE_NEEDS_FREE_FLAG)) { + data->zombie_ran_finalizer_object_count++; + } + + if ((RBASIC(obj)->flags & ~(ZOMBIE_OBJ_KEPT_FLAGS|ZOMBIE_NEEDS_FREE_FLAG)) != T_ZOMBIE) { fprintf(stderr, "verify_internal_consistency_i: T_ZOMBIE has extra flags set: %s\n", rb_obj_info(obj)); data->err_count++; @@ -6622,7 +6626,6 @@ gc_verify_heap_pages(rb_objspace_t *objspace) static void gc_verify_internal_consistency_(rb_objspace_t *objspace) { - return; // FIXME for parallel sweep struct verify_internal_consistency_struct data = {0}; data.objspace = objspace; @@ -6637,6 +6640,7 @@ gc_verify_internal_consistency_(rb_objspace_t *objspace) uintptr_t end = start + page->total_slots * slot_size; verify_internal_consistency_i((void *)start, (void *)end, slot_size, &data); + data.live_object_count += (page->pre_freed_slots + page->pre_final_slots + page->pre_zombie_slots); } if (data.err_count != 0) { @@ -6689,7 +6693,7 @@ gc_verify_internal_consistency_(rb_objspace_t *objspace) } if (total_final_slots_count(objspace) != data.zombie_object_count || - total_final_slots_count(objspace) != list_count) { + (data.zombie_object_count - data.zombie_ran_finalizer_object_count) != list_count) { rb_bug("inconsistent finalizing object count:\n" " expect %"PRIuSIZE"\n" @@ -6714,6 +6718,7 @@ gc_verify_internal_consistency(void *objspace_ptr) rb_gc_vm_barrier(); // stop other ractors unsigned int prev_during_gc = during_gc; + wait_for_background_sweeping_to_finish(objspace, true, false, "verify_internal_consistency"); during_gc = FALSE; // stop gc here { gc_verify_internal_consistency_(objspace); From bbe44b9d0b7216604e48ada8d65f5e81833aabef Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Mon, 13 Apr 2026 15:49:09 -0400 Subject: [PATCH 67/67] Enable sweep thread when calling GC.start --- gc/default/default.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index 4746959b3d061f..40f8d4501d2068 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -4869,10 +4869,7 @@ gc_sweep_start(rb_objspace_t *objspace) rb_gc_ractor_newobj_cache_foreach(gc_ractor_newobj_cache_clear, NULL); psweep_debug(1, "[gc] gc_sweep_start\n"); - if (!objspace->flags.during_compacting && - (objspace->profile.latest_gc_info & GPR_FLAG_METHOD) == 0 && - !(objspace->hook_events & RUBY_INTERNAL_EVENT_FREEOBJ)) { - + if (!objspace->flags.during_compacting && !(objspace->hook_events & RUBY_INTERNAL_EVENT_FREEOBJ)) { rbimpl_atomic_store(&objspace->use_background_sweep_thread, true, RBIMPL_ATOMIC_RELEASE); psweep_debug(-1, "[gc] gc_sweep_start: requesting sweep thread\n"); sweep_lock_lock(&objspace->sweep_lock); @@ -8428,7 +8425,6 @@ rb_gc_impl_start(void *objspace_ptr, bool full_mark, bool immediate_mark, bool i sweep_lock_lock(&objspace->sweep_lock); { GC_ASSERT(!objspace->sweep_thread_sweeping); - GC_ASSERT(!objspace->sweep_thread_sweep_requested); for (int j = 0; j < HEAP_COUNT; j++) { rb_heap_t *heap = &heaps[j]; GC_ASSERT(!heap->swept_pages);