From 27de2abd501a2157f1450c370c064483cbeabe58 Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Tue, 26 May 2026 10:00:02 +0200 Subject: [PATCH 01/32] Rewrite Model with bitmask wave and AC-4 propagator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the per-cell Tile-array design with a flat bitmask wave, a precomputed `propagator[dir][tile]` table, and an AC-4 compatible counter stored as a byte buffer. Propagation is iterative on an explicit stack; entropy is updated incrementally with a noise tiebreak; the chosen tile is cached per cell so `grid` lookups are O(1). Contradictions restart. The original `Model` and `Cell` are preserved verbatim as `LegacyModel` for direct benchmark comparison; `Tile` now stores raw edge signatures so both implementations can share it. The Window calls `model.grid` once per draw rather than receiving a grid from every `iterate`. Speedup (Ruby 4.0.1 + YJIT, arm64-darwin): 20×20: 1.70s -> 0.37s (4.5x) 30×30: 4.06s -> 0.86s (4.7x) New scales to 100×100 in ~10s and 150×150 in ~27s where the legacy implementation is impractical. `bin/benchmark` now compares both models across grid sizes; CI JSON contract preserved. --- bin/benchmark | 103 ++-- bin/profile | 40 ++ lib/wave_function_collapse.rb | 2 +- lib/wave_function_collapse/cell.rb | 52 -- lib/wave_function_collapse/legacy_model.rb | 217 ++++++++ lib/wave_function_collapse/model.rb | 571 +++++++++++++++++---- lib/wave_function_collapse/tile.rb | 12 +- lib/wave_function_collapse/window.rb | 10 +- test/test_model.rb | 19 +- 9 files changed, 818 insertions(+), 208 deletions(-) create mode 100755 bin/profile delete mode 100644 lib/wave_function_collapse/cell.rb create mode 100644 lib/wave_function_collapse/legacy_model.rb diff --git a/bin/benchmark b/bin/benchmark index 260200c..e046ace 100755 --- a/bin/benchmark +++ b/bin/benchmark @@ -3,17 +3,13 @@ $LOAD_PATH.unshift File.expand_path("../lib", __dir__) -require "benchmark" require "json" require "wave_function_collapse" -WIDTH = 20 -HEIGHT = 20 +TILE_PATH = File.expand_path("../assets/map.tsj", __dir__) -srand(WIDTH * HEIGHT) - -json = JSON.load_file!("assets/map.tsj") -tiles = +def build_tiles + json = JSON.load_file(TILE_PATH) json["wangsets"].last["wangtiles"].map do |tile| prob = json["tiles"]&.find { |t| t["id"] == tile["tileid"] }&.fetch("probability") WaveFunctionCollapse::Tile.new( @@ -22,33 +18,82 @@ tiles = probability: prob ) end -times = [] +end -puts RUBY_DESCRIPTION unless ENV["CI"] +def run_once(klass, tiles, w, h) + t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC) + model = klass.new(tiles, w, h) + model.solve + iters = 1 + until model.complete? + model.iterate + iters += 1 + end + [Process.clock_gettime(Process::CLOCK_MONOTONIC) - t0, iters] +end -times = 10.times.map { |i| - time = Benchmark.realtime { +def median(xs) + s = xs.sort + n = s.length + n.odd? ? s[n / 2] : (s[n / 2 - 1] + s[n / 2]) / 2.0 +end + +tiles = build_tiles + +if ENV["CI"] + # CI contract: 10 runs of 20x20 on the new Model, JSON output for the + # github-action-benchmark workflow. Do not change without updating the workflow. + WIDTH = 20 + HEIGHT = 20 + srand(WIDTH * HEIGHT) + times = 10.times.map { + t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC) model = WaveFunctionCollapse::Model.new(tiles, WIDTH, HEIGHT) - print "Run ##{i + 1}: Benchmark for Model(grid=#{model.width}x#{model.height} entropy=#{model.max_entropy})… " unless ENV["CI"] model.solve - until model.complete? - model.iterate - end + model.iterate until model.complete? + Process.clock_gettime(Process::CLOCK_MONOTONIC) - t0 } - puts "Finished in #{time.round(2)}s" unless ENV["CI"] - time -} - -average = times.sum / times.size -if ENV["CI"] puts JSON.dump([ - { name: "Average time", unit: "Seconds", value: average }, - { name: "Slowest time", unit: "Seconds", value: times.max }, - { name: "Fastest time", unit: "Seconds", value: times.min }, - { name: "P90", unit: "Seconds", value: times.sort[8] } + {name: "Average time", unit: "Seconds", value: times.sum / times.size}, + {name: "Slowest time", unit: "Seconds", value: times.max}, + {name: "Fastest time", unit: "Seconds", value: times.min}, + {name: "P90", unit: "Seconds", value: times.sort[8]} ]) -else - puts "Average time: #{average}" - puts "Slowest time: #{times.max}" - puts "Fastest time: #{times.min}" + exit +end + +yjit_status = defined?(RubyVM::YJIT) && RubyVM::YJIT.enabled? ? "on" : "off" +puts RUBY_DESCRIPTION +puts "Tiles loaded: #{tiles.size}, YJIT=#{yjit_status}" +puts + +legacy_sizes = (ENV["LEGACY_SIZES"] || "10,15,20,25,30").split(",").map(&:to_i) +new_sizes = (ENV["NEW_SIZES"] || "10,15,20,30,50,75,100").split(",").map(&:to_i) +runs = Integer(ENV["RUNS"] || "3") +skip_legacy = ARGV.include?("--no-legacy") + +printf("%-8s %-9s %-7s %-10s %-10s %-12s %-10s\n", + "Model", "Grid", "Cells", "Median", "Best", "Obs/sec", "Iters") +printf("%-8s %-9s %-7s %-10s %-10s %-12s %-10s\n", + "-----", "----", "-----", "------", "----", "-------", "-----") + +[ + [WaveFunctionCollapse::LegacyModel, skip_legacy ? [] : legacy_sizes, "Legacy"], + [WaveFunctionCollapse::Model, new_sizes, "New"] +].each do |klass, sizes, label| + sizes.each do |s| + times = [] + iters_seen = 0 + runs.times do |r| + srand(s * 1000 + r + 1) + elapsed, iters = run_once(klass, tiles, s, s) + times << elapsed + iters_seen = iters + end + med = median(times) + best = times.min + printf("%-8s %-9s %-7d %9.3fs %9.3fs %12.0f %10d\n", + label, "#{s}x#{s}", s * s, med, best, iters_seen / med, iters_seen) + end + puts end diff --git a/bin/profile b/bin/profile new file mode 100755 index 0000000..ff4bfd6 --- /dev/null +++ b/bin/profile @@ -0,0 +1,40 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +$LOAD_PATH.unshift File.expand_path("../lib", __dir__) + +require "json" +require "wave_function_collapse" +require "ruby-prof" + +WIDTH = 20 +HEIGHT = 20 + +srand(WIDTH * HEIGHT) + +json = JSON.load_file!("assets/map.tsj") +tiles = + json["wangsets"].last["wangtiles"].map do |tile| + prob = json["tiles"]&.find { |t| t["id"] == tile["tileid"] }&.fetch("probability") + WaveFunctionCollapse::Tile.new( + tileid: tile["tileid"], + wangid: tile["wangid"], + probability: prob + ) + end + +# Profile +profile = RubyProf::Profile.new +profile.start + +model = WaveFunctionCollapse::Model.new(tiles, WIDTH, HEIGHT) +model.solve +until model.complete? + model.iterate +end + +result = profile.stop + +# Print a flat profile to text +printer = RubyProf::FlatPrinter.new(result) +printer.print(STDOUT, min_percent: 1) diff --git a/lib/wave_function_collapse.rb b/lib/wave_function_collapse.rb index a121d59..ea989d5 100644 --- a/lib/wave_function_collapse.rb +++ b/lib/wave_function_collapse.rb @@ -3,7 +3,7 @@ module WaveFunctionCollapse class Error < StandardError; end - autoload :Cell, "wave_function_collapse/cell" + autoload :LegacyModel, "wave_function_collapse/legacy_model" autoload :Model, "wave_function_collapse/model" autoload :Tile, "wave_function_collapse/tile" autoload :Window, "wave_function_collapse/window" diff --git a/lib/wave_function_collapse/cell.rb b/lib/wave_function_collapse/cell.rb deleted file mode 100644 index 5dc6c4f..0000000 --- a/lib/wave_function_collapse/cell.rb +++ /dev/null @@ -1,52 +0,0 @@ -module WaveFunctionCollapse - class Cell < BasicObject - @@cellid = 0 - attr_reader :tiles, :cellid - attr_accessor :collapsed, :entropy, :x, :y - alias_method :collapsed?, :collapsed - - def initialize(x, y, tiles) - @cellid = @@cellid - @collapsed = tiles.size == 1 - @entropy = tiles.size - @tiles = tiles - @neighbors = {} - @x = x - @y = y - @@cellid = @@cellid.succ - end - - def ==(other) - @cellid == other.cellid - end - - def tiles=(new_tiles) - @tiles = new_tiles - update - end - - def update - @entropy = @tiles.size - @collapsed = @entropy == 1 - end - - def tile - @tiles[0] if @collapsed - end - - def collapse - self.tiles = [@tiles.max_by { |t| ::Kernel.rand**(1.0 / t.probability) }] - end - - def neighbors(model) - @neighbors[model.width * y + x] ||= begin - up = model.cell_at(@x, @y + 1) if @y < model.height - 1 - down = model.cell_at(@x, @y - 1) if @y.positive? - right = model.cell_at(@x + 1, @y) if @x < model.width - 1 - left = model.cell_at(@x - 1, @y) if @x.positive? - - {up: up, down: down, right: right, left: left} - end - end - end -end diff --git a/lib/wave_function_collapse/legacy_model.rb b/lib/wave_function_collapse/legacy_model.rb new file mode 100644 index 0000000..ec6a3e1 --- /dev/null +++ b/lib/wave_function_collapse/legacy_model.rb @@ -0,0 +1,217 @@ +# frozen_string_literal: true + +module WaveFunctionCollapse + # Snapshot of the original Model and Cell implementation, preserved verbatim + # so the benchmark can measure the speedup of the rewrite. Not used at runtime + # by the Window or the default `WaveFunctionCollapse::Model`. + class LegacyCell < BasicObject + @@cellid = 0 + attr_reader :tiles, :cellid + attr_accessor :collapsed, :entropy, :x, :y + alias_method :collapsed?, :collapsed + + def initialize(x, y, tiles) + @cellid = @@cellid + @collapsed = tiles.size == 1 + @entropy = tiles.size + @tiles = tiles + @neighbors = {} + @x = x + @y = y + @@cellid = @@cellid.succ + end + + def ==(other) + @cellid == other.cellid + end + + def tiles=(new_tiles) + @tiles = new_tiles + update + end + + def update + @entropy = @tiles.size + @collapsed = @entropy == 1 + end + + def tile + @tiles[0] if @collapsed + end + + def collapse + self.tiles = [@tiles.max_by { |t| ::Kernel.rand**(1.0 / t.probability) }] + end + + def neighbors(model) + @neighbors[model.width * y + x] ||= begin + up = model.cell_at(@x, @y + 1) if @y < model.height - 1 + down = model.cell_at(@x, @y - 1) if @y.positive? + right = model.cell_at(@x + 1, @y) if @x < model.width - 1 + left = model.cell_at(@x - 1, @y) if @x.positive? + + {up: up, down: down, right: right, left: left} + end + end + end + + class LegacyModel + DIRECTION_TO_INDEXES = { + up: [7, 0, 1], + right: [1, 2, 3], + down: [5, 4, 3], + left: [7, 6, 5] + }.freeze + + OPPOSITE_OF = { + up: :down, + right: :left, + down: :up, + left: :right + }.freeze + + attr_reader :tiles, :width, :height, :cells, :max_entropy + + def initialize(tiles, width, height) + @tiles = tiles + @width = width.to_i + @height = height.to_i + @cells = [] + @height.times { |y| @width.times { |x| @cells << LegacyCell.new(x, y, @tiles.shuffle) } } + @uncollapsed_cells = @cells.reject(&:collapsed) + @max_entropy = @tiles.length + end + + def cell_at(x, y) + @cells[@width * y + x] + end + + def complete? + @uncollapsed_cells.empty? + end + + def percent + ((@width * @height) - @uncollapsed_cells.length.to_f) / (@width * @height) * 100 + end + + def solve + cell = random_cell + process_cell(cell) + generate_grid + end + + def iterate + return false if @uncollapsed_cells.empty? + + next_cell = find_lowest_entropy + return false unless next_cell + + process_cell(next_cell) + generate_grid + end + + def prepend_empty_row + @cells = @cells.drop(@width) + @cells.each { |cell| cell.y -= 1 } + x = 0 + while x < @width + new_cell = LegacyCell.new(x, @height - 1, @tiles) + @cells << new_cell + @uncollapsed_cells << new_cell + x = x.succ + end + @width.times { |x| + evaluate_neighbor(cell_at(x, @height - 2), :up) + } + end + + def random_cell + @uncollapsed_cells.sample + end + + def generate_grid + x = 0 + result = [] + + while x < @width + rx = result[x] = [] + y = 0 + + while y < @height + rx[y] = cell_at(x, y).tile + y = y.succ + end + x = x.succ + end + + result + end + + def process_cell(cell) + cell.collapse + @uncollapsed_cells.delete(cell) + return if @uncollapsed_cells.empty? + + propagate(cell) + end + + def propagate(source_cell) + evaluate_neighbor(source_cell, :up) + evaluate_neighbor(source_cell, :right) + evaluate_neighbor(source_cell, :down) + evaluate_neighbor(source_cell, :left) + end + + def evaluate_neighbor(source_cell, evaluation_direction) + neighbor_cell = source_cell.neighbors(self)[evaluation_direction] || return + return if neighbor_cell.collapsed + + original_tile_count = neighbor_cell.tiles.length + opposite_direction = OPPOSITE_OF[evaluation_direction] + + valid_edges = {} + source_cell.tiles.each do |source_tile| + valid_edges[source_tile.__send__(evaluation_direction)] = true + end + + neighbor_tiles = neighbor_cell.tiles + new_tiles = [] + i = 0 + ntc = neighbor_tiles.length + while i < ntc + tile = neighbor_tiles[i] + new_tiles << tile if valid_edges[tile.__send__(opposite_direction)] + i = i.succ + end + + neighbor_cell.tiles = new_tiles unless new_tiles.empty? + @uncollapsed_cells.delete(neighbor_cell) if neighbor_cell.collapsed + + propagate(neighbor_cell) if neighbor_cell.tiles.length != original_tile_count + end + + def find_lowest_entropy + ucg = @uncollapsed_cells + i = 0 + l = ucg.length + min_e = ucg[0].entropy + acc = [] + while i < l + cc = ucg[i] + next i = i.succ if !cc + + ce = cc.entropy + if ce < min_e + min_e = ce + acc.clear + acc << i + elsif ce == min_e + acc << i + end + + i = i.succ + end + ucg[acc.sample] + end + end +end diff --git a/lib/wave_function_collapse/model.rb b/lib/wave_function_collapse/model.rb index a24b790..c11366f 100644 --- a/lib/wave_function_collapse/model.rb +++ b/lib/wave_function_collapse/model.rb @@ -1,166 +1,527 @@ +# frozen_string_literal: true + module WaveFunctionCollapse + # Wave Function Collapse — bitmask wave + AC-4 compatible counter. + # + # Each cell's domain is a single Integer bitmask (`@wave[c]`). Adjacency is + # precomputed as `@propagator[d][t]` masks and `@propagator_lists[d][t]` + # index arrays. Supporter counts are kept in a flat byte buffer + # (`@compatible`, addressed via `setbyte`/`getbyte`) — when a count hits + # zero the tile is banned at that cell, which kicks off iterative + # propagation through an explicit stack. Entropy is maintained + # incrementally per cell. class Model - MAX_ITERATIONS = 5_000 - - DIRECTION_TO_INDEXES = { - up: [7, 0, 1], - right: [1, 2, 3], - down: [5, 4, 3], - left: [7, 6, 5] - }.freeze - - OPPOSITE_OF = { - up: :down, - right: :left, - down: :up, - left: :right - }.freeze + DX = [0, 1, 0, -1].freeze + DY = [1, 0, -1, 0].freeze + OPP = [2, 3, 0, 1].freeze - attr_reader :tiles, :width, :height, :cells, :max_entropy + attr_reader :tiles, :width, :height, :max_entropy def initialize(tiles, width, height) @tiles = tiles @width = width.to_i @height = height.to_i - @cells = [] - @height.times { |y| @width.times { |x| @cells << Cell.new(x, y, @tiles.shuffle) } } - @uncollapsed_cells = @cells.reject(&:collapsed) - @max_entropy = @tiles.length - end + @num_tiles = tiles.length + @max_entropy = @num_tiles + @cells_count = @width * @height - def cell_at(x, y) - @cells[@width * y + x] + build_propagator + build_initial_state + setup_wave_state end def complete? - @uncollapsed_cells.empty? + @uncollapsed_count == 0 end def percent - ((@width * @height) - @uncollapsed_cells.length.to_f) / (@width * @height) * 100 + (@cells_count - @uncollapsed_count).to_f / @cells_count * 100 + end + + def entropy_at(x, y) + @remaining[y * @width + x] end def solve - cell = random_cell - process_cell(cell) - generate_grid + observe_and_propagate + true end def iterate - return false if @uncollapsed_cells.empty? - - next_cell = find_lowest_entropy - return false unless next_cell + return false if complete? + observe_and_propagate + true + end - process_cell(next_cell) + # Returns a 2-D array indexed [x][y] of tiles (nil for uncollapsed cells). + # Called on demand by the renderer; not by iterate/solve. + def grid generate_grid end def prepend_empty_row - @cells = @cells.drop(@width) - @cells.each { |cell| cell.y -= 1 } - x = 0 - while x < @width - new_cell = Cell.new(x, @height - 1, @tiles) - @cells << new_cell - @uncollapsed_cells << new_cell - x = x.succ + w = @width + n = @cells_count + + # Shift state down: drop bottom row (cells 0..w-1), append new top row. + @wave = @wave[w, n - w] + Array.new(w, @full_mask) + @remaining = @remaining[w, n - w] + Array.new(w, @num_tiles) + @sum_w = @sum_w[w, n - w] + Array.new(w, @initial_sum_w) + @sum_w_log_w = @sum_w_log_w[w, n - w] + Array.new(w, @initial_sum_w_log_w) + @entropies = @entropies[w, n - w] + Array.new(w, @initial_entropy) + @noise = @noise[w, n - w] + Array.new(w) { ::Kernel.rand * 1e-6 } + @chosen_tile = @chosen_tile[w, n - w] + Array.new(w, -1) + + @uncollapsed_count = 0 + c = 0 + while c < n + @uncollapsed_count += 1 if @remaining[c] > 1 + c += 1 end - @width.times { |x| - evaluate_neighbor(cell_at(x, @height - 2), :up) - } - end - def random_cell - @uncollapsed_cells.sample + rebuild_compatible_from_wave + orphan_ban_pass + propagate + true end + # Returns a 2-D array indexed [x][y] of tiles, or nil for uncollapsed cells. def generate_grid + result = ::Array.new(@width) x = 0 - result = [] - while x < @width - rx = result[x] = [] + col = result[x] = ::Array.new(@height) y = 0 - while y < @height - rx[y] = cell_at(x, y).tile - y = y.succ + col[y] = tile_at(x, y) + y += 1 end - x = x.succ + x += 1 end - result end - def process_cell(cell) - cell.collapse - @uncollapsed_cells.delete(cell) - return if @uncollapsed_cells.empty? + private + + # ---- one-time precomputation ------------------------------------------------ + + def build_propagator + tiles = @tiles + t_max = @num_tiles + + # Canonical integer ID per unique edge signature (Array of 3 ints). + edge_id = {} + ups = ::Array.new(t_max) + rights = ::Array.new(t_max) + downs = ::Array.new(t_max) + lefts = ::Array.new(t_max) + t = 0 + while t < t_max + tile = tiles[t] + ups[t] = (edge_id[tile.up] ||= edge_id.size) + rights[t] = (edge_id[tile.right] ||= edge_id.size) + downs[t] = (edge_id[tile.down] ||= edge_id.size) + lefts[t] = (edge_id[tile.left] ||= edge_id.size) + t += 1 + end + + # Edge per (tile, direction). Index by direction id: 0=up,1=right,2=down,3=left. + edges_per_dir = [ups, rights, downs, lefts] - propagate(cell) + # propagator[d][a] = bitmask of b such that match(a, d, b) — i.e. + # tile a's edge in dir d equals tile b's edge in opposite(d). + propagator = ::Array.new(4) { ::Array.new(t_max, 0) } + propagator_lists = ::Array.new(4) { ::Array.new(t_max) } + + d = 0 + while d < 4 + opp_d = OPP[d] + my_edges = edges_per_dir[d] + opp_edges = edges_per_dir[opp_d] + a = 0 + while a < t_max + my_edge = my_edges[a] + mask = 0 + list = [] + b = 0 + while b < t_max + if opp_edges[b] == my_edge + mask |= (1 << b) + list << b + end + b += 1 + end + propagator[d][a] = mask + propagator_lists[d][a] = list.freeze + a += 1 + end + propagator[d].freeze + propagator_lists[d].freeze + d += 1 + end + + @propagator = propagator.freeze + @propagator_lists = propagator_lists.freeze + + # Weights + weights = ::Array.new(t_max) + log_weights = ::Array.new(t_max) + weights_log_weights = ::Array.new(t_max) + sum_w = 0.0 + sum_w_log_w = 0.0 + t = 0 + while t < t_max + w = tiles[t].probability.to_f + weights[t] = w + lw = ::Math.log(w) + log_weights[t] = lw + weights_log_weights[t] = w * lw + sum_w += w + sum_w_log_w += w * lw + t += 1 + end + @weights = weights.freeze + @weights_log_weights = weights_log_weights.freeze + @initial_sum_w = sum_w + @initial_sum_w_log_w = sum_w_log_w + @initial_entropy = ::Math.log(sum_w) - sum_w_log_w / sum_w + + @full_mask = (1 << t_max) - 1 + + # Precompute the 4-byte-per-tile block representing an interior cell's + # initial supporter counts (one byte per direction). Used to build the + # @compatible buffer quickly. + block = ::String.new(::String.new.b, capacity: t_max * 4) + block.force_encoding(::Encoding::BINARY) + t = 0 + while t < t_max + block << propagator_lists[0][t].length.chr + block << propagator_lists[1][t].length.chr + block << propagator_lists[2][t].length.chr + block << propagator_lists[3][t].length.chr + t += 1 + end + @interior_block = block.freeze end - def propagate(source_cell) - evaluate_neighbor(source_cell, :up) - evaluate_neighbor(source_cell, :right) - evaluate_neighbor(source_cell, :down) - evaluate_neighbor(source_cell, :left) + def build_initial_state + # Stack buffers reused across propagations. + @prop_cells = [] + @prop_tiles = [] end - def evaluate_neighbor(source_cell, evaluation_direction) - neighbor_cell = source_cell.neighbors(self)[evaluation_direction] || return - return if neighbor_cell.collapsed + # ---- per-run state (resettable on contradiction/restart) --------------------- + + def setup_wave_state + n = @cells_count + t_max = @num_tiles + + @wave = ::Array.new(n, @full_mask) + @remaining = ::Array.new(n, t_max) + @sum_w = ::Array.new(n, @initial_sum_w) + @sum_w_log_w = ::Array.new(n, @initial_sum_w_log_w) + @entropies = ::Array.new(n, @initial_entropy) + @noise = ::Array.new(n) { ::Kernel.rand * 1e-6 } + @chosen_tile = ::Array.new(n, -1) + @uncollapsed_count = n + @contradiction = false + @prop_cells.clear + @prop_tiles.clear - original_tile_count = neighbor_cell.tiles.length - opposite_direction = OPPOSITE_OF[evaluation_direction] + build_initial_compatible + orphan_ban_pass + propagate + end + + def build_initial_compatible + n = @cells_count + t_max = @num_tiles + w = @width + h = @height - # Build set of valid edges from source cell - valid_edges = {} - source_cell.tiles.each do |source_tile| - valid_edges[source_tile.__send__(evaluation_direction)] = true + buf = ::String.new(::String.new.b, capacity: n * t_max * 4) + buf.force_encoding(::Encoding::BINARY) + c = 0 + while c < n + buf << @interior_block + c += 1 end - # Filter neighbor tiles that have matching edges - neighbor_tiles = neighbor_cell.tiles - new_tiles = [] - i = 0 - ntc = neighbor_tiles.length - while i < ntc - tile = neighbor_tiles[i] - new_tiles << tile if valid_edges[tile.__send__(opposite_direction)] - i = i.succ + # Patch border cells: missing directions get sentinel 255. + c = 0 + while c < n + cx = c % w + cy = c / w + d = 0 + while d < 4 + nx = cx + DX[d] + ny = cy + DY[d] + unless nx >= 0 && nx < w && ny >= 0 && ny < h + base = (c * t_max) * 4 + d + t = 0 + while t < t_max + buf.setbyte(base + t * 4, 255) + t += 1 + end + end + d += 1 + end + c += 1 end - neighbor_cell.tiles = new_tiles unless new_tiles.empty? - @uncollapsed_cells.delete(neighbor_cell) if neighbor_cell.collapsed + @compatible = buf + end + + def rebuild_compatible_from_wave + n = @cells_count + t_max = @num_tiles + w = @width + h = @height + propagator = @propagator + wave = @wave + + buf = ::String.new(::String.new.b, capacity: n * t_max * 4) + buf.force_encoding(::Encoding::BINARY) + buf << "\xff".b * (n * t_max * 4) - # if the number of tiles changed, we need to evaluate current cell's neighbors now - propagate(neighbor_cell) if neighbor_cell.tiles.length != original_tile_count + c = 0 + while c < n + cx = c % w + cy = c / w + d = 0 + while d < 4 + nx = cx + DX[d] + ny = cy + DY[d] + if nx >= 0 && nx < w && ny >= 0 && ny < h + nc = ny * w + nx + wmask = wave[nc] + t = 0 + while t < t_max + cnt = popcount(propagator[d][t] & wmask) + cnt = 255 if cnt > 255 + buf.setbyte((c * t_max + t) * 4 + d, cnt) + t += 1 + end + end + d += 1 + end + c += 1 + end + + @compatible = buf end - def find_lowest_entropy - ucg = @uncollapsed_cells - i = 0 - l = ucg.length - min_e = ucg[0].entropy - acc = [] - while i < l - cc = ucg[i] - next i = i.succ if !cc + def orphan_ban_pass + n = @cells_count + t_max = @num_tiles + compatible = @compatible + wave = @wave - ce = cc.entropy - if ce < min_e - min_e = ce - acc.clear - acc << i - elsif ce == min_e - acc << i + c = 0 + while c < n + t = 0 + while t < t_max + bit = 1 << t + if (wave[c] & bit) != 0 + base = (c * t_max + t) * 4 + if compatible.getbyte(base) == 0 || + compatible.getbyte(base + 1) == 0 || + compatible.getbyte(base + 2) == 0 || + compatible.getbyte(base + 3) == 0 + ban(c, t) + end + end + t += 1 end + c += 1 + end + end + + # ---- core observe / ban / propagate ----------------------------------------- + + def observe_and_propagate + loop do + c = find_lowest_entropy_cell + return false unless c + + observe(c) + propagate + + if @contradiction + # Restart: rebuild wave state and try again. + setup_wave_state + next + end + return true + end + end + + def find_lowest_entropy_cell + n = @cells_count + remaining = @remaining + entropies = @entropies + noise = @noise + best_c = -1 + best_e = ::Float::INFINITY + c = 0 + while c < n + if remaining[c] > 1 + e = entropies[c] + noise[c] + if e < best_e + best_e = e + best_c = c + end + end + c += 1 + end + best_c < 0 ? nil : best_c + end + + def observe(c) + wmask = @wave[c] + total = @sum_w[c] + r = ::Kernel.rand * total + + weights = @weights + chosen = -1 + mask = wmask + t = 0 + while mask > 0 + if (mask & 1) != 0 + r -= weights[t] + if r <= 0 + chosen = t + break + end + end + mask >>= 1 + t += 1 + end + + if chosen < 0 + # Floating-point edge: pick the last set bit in wmask. + mask = wmask + t = 0 + while mask > 0 + chosen = t if (mask & 1) != 0 + mask >>= 1 + t += 1 + end + end + + # Ban every other tile at this cell. + mask = wmask + t = 0 + while mask > 0 + if (mask & 1) != 0 && t != chosen + ban(c, t) + return if @contradiction + end + mask >>= 1 + t += 1 + end + end + + def ban(c, t) + bit = 1 << t + wave = @wave + return if (wave[c] & bit) == 0 + + wave[c] = wave[c] ^ bit + @remaining[c] -= 1 + + w = @weights[t] + wlogw = @weights_log_weights[t] + @sum_w[c] -= w + @sum_w_log_w[c] -= wlogw + + r = @remaining[c] + if r == 0 + @contradiction = true + return + end + + s = @sum_w[c] + @entropies[c] = ::Math.log(s) - @sum_w_log_w[c] / s + + if r == 1 + @uncollapsed_count -= 1 + # Record the single remaining tile so grid() is O(1) per cell. + mask = @wave[c] + tt = 0 + while mask > 0 + if (mask & 1) != 0 + @chosen_tile[c] = tt + break + end + mask >>= 1 + tt += 1 + end + end + + @prop_cells.push(c) + @prop_tiles.push(t) + end + + def propagate + prop_cells = @prop_cells + prop_tiles = @prop_tiles + propagator_lists = @propagator_lists + compatible = @compatible + wave = @wave + t_max = @num_tiles + w = @width + h = @height + + until prop_cells.empty? + return if @contradiction + t = prop_tiles.pop + c = prop_cells.pop + + cx = c % w + cy = c / w + + d = 0 + while d < 4 + nx = cx + DX[d] + ny = cy + DY[d] + if nx >= 0 && nx < w && ny >= 0 && ny < h + nc = ny * w + nx + list = propagator_lists[d][t] + opp_d = OPP[d] + i = 0 + len = list.length + while i < len + tp = list[i] + idx = (nc * t_max + tp) * 4 + opp_d + count = compatible.getbyte(idx) - 1 + compatible.setbyte(idx, count) + if count == 0 && (wave[nc] & (1 << tp)) != 0 + ban(nc, tp) + return if @contradiction + end + i += 1 + end + end + d += 1 + end + end + end + + # ---- helpers ---------------------------------------------------------------- + + def tile_at(x, y) + t = @chosen_tile[y * @width + x] + t < 0 ? nil : @tiles[t] + end - i = i.succ + def popcount(x) + c = 0 + while x > 0 + c += 1 if (x & 1) != 0 + x >>= 1 end - ucg[acc.sample] + c end end end diff --git a/lib/wave_function_collapse/tile.rb b/lib/wave_function_collapse/tile.rb index a50d6c4..37b2728 100644 --- a/lib/wave_function_collapse/tile.rb +++ b/lib/wave_function_collapse/tile.rb @@ -1,14 +1,16 @@ +# frozen_string_literal: true + module WaveFunctionCollapse - class Tile < BasicObject + class Tile attr_reader :tileid, :probability, :up, :right, :down, :left def initialize(tileid:, wangid:, probability: 1.0) @tileid = tileid @probability = probability || 1.0 - @up = wangid.values_at(7, 0, 1).hash - @right = wangid.values_at(1, 2, 3).hash - @down = wangid.values_at(5, 4, 3).hash - @left = wangid.values_at(7, 6, 5).hash + @up = wangid.values_at(7, 0, 1).freeze + @right = wangid.values_at(1, 2, 3).freeze + @down = wangid.values_at(5, 4, 3).freeze + @left = wangid.values_at(7, 6, 5).freeze end end end diff --git a/lib/wave_function_collapse/window.rb b/lib/wave_function_collapse/window.rb index d09166a..1807a46 100644 --- a/lib/wave_function_collapse/window.rb +++ b/lib/wave_function_collapse/window.rb @@ -33,14 +33,18 @@ def defaults def update @labels = [] - @map = @model.solve if @map.nil? + if @map.nil? + @model.solve + @map = @model.grid + end return if @paused unless @model.complete? time_start = Process.clock_gettime(Process::CLOCK_MONOTONIC) - @map = @model.iterate + @model.iterate @times << Process.clock_gettime(Process::CLOCK_MONOTONIC) - time_start + @map = @model.grid end end @@ -122,7 +126,7 @@ def draw_map column.reverse.each_with_index do |tile, y| inverted_y = (y - @model.height + 1).abs - entropy = @model.cell_at(x, inverted_y).entropy + entropy = @model.entropy_at(x, inverted_y) if entropy > 1 percent_entropy = (entropy.to_f / @model.max_entropy * 255).round diff --git a/test/test_model.rb b/test/test_model.rb index 96f503e..ff632cf 100644 --- a/test/test_model.rb +++ b/test/test_model.rb @@ -13,9 +13,8 @@ def test_initialize assert_equal 320, model.width assert_equal 240, model.height - assert_equal 320 * 240, model.cells.size assert_equal 3, model.max_entropy - assert_equal 0, model.percent + assert_in_delta(100.0 / (320 * 240), model.percent, 0.01) refute model.complete? assert model.solve assert model.iterate @@ -37,20 +36,14 @@ def test_prepend_empty_row model.prepend_empty_row - assert_equal 4, model.cells.size - assert_equal 1, model.cells[0].entropy - assert_equal 1, model.cells[1].entropy - assert_predicate model.cells[0], :collapsed? - assert_predicate model.cells[0], :collapsed? - assert_equal 3, model.cells[2].entropy - assert_equal 3, model.cells[3].entropy - refute_predicate model.cells[2], :collapsed? - refute_predicate model.cells[3], :collapsed? + assert_equal 1, model.entropy_at(0, 0) + assert_equal 1, model.entropy_at(1, 0) + assert_equal 3, model.entropy_at(0, 1) + assert_equal 3, model.entropy_at(1, 1) assert_equal 2, model.width assert_equal 2, model.height - assert_equal 2 * 2, model.cells.size assert_equal 3, model.max_entropy - assert_equal 50, model.percent + assert_equal 50.0, model.percent end end From 4738796ff1e96854c648f480a76d0e1797ca63de Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Tue, 26 May 2026 12:07:52 +0200 Subject: [PATCH 02/32] Skip supporter decrement for already-banned tiles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The propagator inner loop decremented `compatible[nc][tp][d]` for every neighbour tile `tp` even when `tp` had already been banned at `nc`. `String#setbyte` wraps on underflow rather than raising, so this was silent — but the count drifted from reality and the work was wasted, since the `wave[nc] & bit` check below it would suppress the ban call anyway. Hoist that bit check above the byte read so banned tiles are skipped entirely. --- lib/wave_function_collapse/model.rb | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/lib/wave_function_collapse/model.rb b/lib/wave_function_collapse/model.rb index c11366f..679f5de 100644 --- a/lib/wave_function_collapse/model.rb +++ b/lib/wave_function_collapse/model.rb @@ -493,12 +493,18 @@ def propagate len = list.length while i < len tp = list[i] - idx = (nc * t_max + tp) * 4 + opp_d - count = compatible.getbyte(idx) - 1 - compatible.setbyte(idx, count) - if count == 0 && (wave[nc] & (1 << tp)) != 0 - ban(nc, tp) - return if @contradiction + # Skip tiles already banned at the neighbour — decrementing + # their supporter count would silently wrap past zero and + # waste work; the bit check below would suppress the ban + # anyway. + if (wave[nc] & (1 << tp)) != 0 + idx = (nc * t_max + tp) * 4 + opp_d + count = compatible.getbyte(idx) - 1 + compatible.setbyte(idx, count) + if count == 0 + ban(nc, tp) + return if @contradiction + end end i += 1 end From 5cf00c0327cca04e3f9f11353063be5985842f02 Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Tue, 26 May 2026 12:08:11 +0200 Subject: [PATCH 03/32] Drop unused log_weights array in propagator setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `log_weights` was allocated and populated alongside `weights_log_weights` but never read after assignment — the per-iteration `lw` local was used both for the array slot and the `w * lw` product, then thrown away. Fold the multiplication into a single local and remove the array. --- lib/wave_function_collapse/model.rb | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/lib/wave_function_collapse/model.rb b/lib/wave_function_collapse/model.rb index 679f5de..e871bcf 100644 --- a/lib/wave_function_collapse/model.rb +++ b/lib/wave_function_collapse/model.rb @@ -165,7 +165,6 @@ def build_propagator # Weights weights = ::Array.new(t_max) - log_weights = ::Array.new(t_max) weights_log_weights = ::Array.new(t_max) sum_w = 0.0 sum_w_log_w = 0.0 @@ -173,11 +172,10 @@ def build_propagator while t < t_max w = tiles[t].probability.to_f weights[t] = w - lw = ::Math.log(w) - log_weights[t] = lw - weights_log_weights[t] = w * lw + wlogw = w * ::Math.log(w) + weights_log_weights[t] = wlogw sum_w += w - sum_w_log_w += w * lw + sum_w_log_w += wlogw t += 1 end @weights = weights.freeze From 10ee256ca0b3e5b88bb707a8488c889d1ea22d00 Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Tue, 26 May 2026 12:08:30 +0200 Subject: [PATCH 04/32] Assert percent is exactly zero before solving MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The old assertion used `assert_in_delta(100.0 / (320 * 240), …, 0.01)`, which only passed because the delta was wider than the value it was comparing against — masking the fact that the new Model reports 0.0% progress until the first observation. Pin the expected value directly. --- test/test_model.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_model.rb b/test/test_model.rb index ff632cf..2e9fb04 100644 --- a/test/test_model.rb +++ b/test/test_model.rb @@ -14,7 +14,7 @@ def test_initialize assert_equal 320, model.width assert_equal 240, model.height assert_equal 3, model.max_entropy - assert_in_delta(100.0 / (320 * 240), model.percent, 0.01) + assert_equal 0.0, model.percent refute model.complete? assert model.solve assert model.iterate From f2169996a13c6dedcf1defad865cd42c1930efaf Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Tue, 26 May 2026 12:09:13 +0200 Subject: [PATCH 05/32] Terminate when the tileset has only one tile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `@uncollapsed_count` was set to `n` unconditionally, so when `tiles.size` was 1 — every cell already trivially collapsed — `complete?` stayed false and `iterate until model.complete?` looped forever. Detect the single-tile case during state setup, pre-fill `@chosen_tile` with the sole tile index, and start with zero uncollapsed cells so `grid` returns a fully populated result on the first call. --- lib/wave_function_collapse/model.rb | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/lib/wave_function_collapse/model.rb b/lib/wave_function_collapse/model.rb index e871bcf..4f20bbc 100644 --- a/lib/wave_function_collapse/model.rb +++ b/lib/wave_function_collapse/model.rb @@ -221,7 +221,16 @@ def setup_wave_state @entropies = ::Array.new(n, @initial_entropy) @noise = ::Array.new(n) { ::Kernel.rand * 1e-6 } @chosen_tile = ::Array.new(n, -1) - @uncollapsed_count = n + # When the tileset has a single tile every cell is born collapsed, + # so `complete?` must report true immediately. Fill `@chosen_tile` + # for any cell whose wave already has exactly one bit and count + # only the genuinely undetermined cells. + if t_max == 1 + @chosen_tile.fill(0) + @uncollapsed_count = 0 + else + @uncollapsed_count = n + end @contradiction = false @prop_cells.clear @prop_tiles.clear From 8a0434b7661fa3f16c0338fffb5fdf42ceea6041 Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Tue, 26 May 2026 12:58:59 +0200 Subject: [PATCH 06/32] Precompute per-tile bitmasks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `1 << t` was being recomputed on every propagation inner iteration, every observation, and every ban — and for tile indices ≥ 62 those left-shifts allocated a fresh Bignum each time. Materialise the masks once at construction (`@bit[t]`) and read them by index from then on. --- lib/wave_function_collapse/model.rb | 39 ++++++++++++++++------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/lib/wave_function_collapse/model.rb b/lib/wave_function_collapse/model.rb index 4f20bbc..4947e9a 100644 --- a/lib/wave_function_collapse/model.rb +++ b/lib/wave_function_collapse/model.rb @@ -185,6 +185,9 @@ def build_propagator @initial_entropy = ::Math.log(sum_w) - sum_w_log_w / sum_w @full_mask = (1 << t_max) - 1 + # Precomputed `1 << t` per tile — saves a Bignum allocation per + # propagation inner iteration and ban call. + @bit = ::Array.new(t_max) { |t| 1 << t }.freeze # Precompute the 4-byte-per-tile block representing an interior cell's # initial supporter counts (one byte per direction). Used to build the @@ -323,13 +326,13 @@ def orphan_ban_pass t_max = @num_tiles compatible = @compatible wave = @wave + bit_table = @bit c = 0 while c < n t = 0 while t < t_max - bit = 1 << t - if (wave[c] & bit) != 0 + if (wave[c] & bit_table[t]) != 0 base = (c * t_max + t) * 4 if compatible.getbyte(base) == 0 || compatible.getbyte(base + 1) == 0 || @@ -390,47 +393,46 @@ def observe(c) r = ::Kernel.rand * total weights = @weights + bit_table = @bit + t_max = @num_tiles chosen = -1 - mask = wmask t = 0 - while mask > 0 - if (mask & 1) != 0 + while t < t_max + if (wmask & bit_table[t]) != 0 r -= weights[t] if r <= 0 chosen = t break end end - mask >>= 1 t += 1 end if chosen < 0 # Floating-point edge: pick the last set bit in wmask. - mask = wmask - t = 0 - while mask > 0 - chosen = t if (mask & 1) != 0 - mask >>= 1 - t += 1 + t = t_max - 1 + while t >= 0 + if (wmask & bit_table[t]) != 0 + chosen = t + break + end + t -= 1 end end # Ban every other tile at this cell. - mask = wmask t = 0 - while mask > 0 - if (mask & 1) != 0 && t != chosen + while t < t_max + if t != chosen && (wmask & bit_table[t]) != 0 ban(c, t) return if @contradiction end - mask >>= 1 t += 1 end end def ban(c, t) - bit = 1 << t + bit = @bit[t] wave = @wave return if (wave[c] & bit) == 0 @@ -476,6 +478,7 @@ def propagate propagator_lists = @propagator_lists compatible = @compatible wave = @wave + bit_table = @bit t_max = @num_tiles w = @width h = @height @@ -504,7 +507,7 @@ def propagate # their supporter count would silently wrap past zero and # waste work; the bit check below would suppress the ban # anyway. - if (wave[nc] & (1 << tp)) != 0 + if (wave[nc] & bit_table[tp]) != 0 idx = (nc * t_max + tp) * 4 + opp_d count = compatible.getbyte(idx) - 1 compatible.setbyte(idx, count) From c951f6c17da6cfd38478d402bfab84de9222b01c Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Tue, 26 May 2026 12:59:45 +0200 Subject: [PATCH 07/32] Precompute per-cell neighbour table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `c % w`, `c / w`, the `cx + DX[d]` / `cy + DY[d]` recomputation, the four bounds compares, and `ny * w + nx` all run on every direction iteration of the propagation, border-patch, and rebuild loops — and none of it depends on the wave state. Cache the resolved neighbour index per `(cell, direction)` at construction in a flat `@neighbours[c * 4 + d]` table (-1 for off-grid) and let the hot loops do a single Array lookup instead. --- lib/wave_function_collapse/model.rb | 62 ++++++++++++++++++----------- 1 file changed, 38 insertions(+), 24 deletions(-) diff --git a/lib/wave_function_collapse/model.rb b/lib/wave_function_collapse/model.rb index 4947e9a..426de0d 100644 --- a/lib/wave_function_collapse/model.rb +++ b/lib/wave_function_collapse/model.rb @@ -209,6 +209,36 @@ def build_initial_state # Stack buffers reused across propagations. @prop_cells = [] @prop_tiles = [] + build_neighbours + end + + # Precompute the neighbour cell index for every (cell, direction) pair, + # stored flat at `@neighbours[c * 4 + d]`. Missing neighbours (off-grid) + # are encoded as -1. Replaces per-iteration `c % w`, `c / w`, bounds + # checks, and `ny * w + nx` in every hot loop that walks neighbours. + def build_neighbours + n = @cells_count + w = @width + h = @height + neighbours = ::Array.new(n * 4) + c = 0 + while c < n + cx = c % w + cy = c / w + d = 0 + while d < 4 + nx = cx + DX[d] + ny = cy + DY[d] + neighbours[c * 4 + d] = if nx >= 0 && nx < w && ny >= 0 && ny < h + ny * w + nx + else + -1 + end + d += 1 + end + c += 1 + end + @neighbours = neighbours.freeze end # ---- per-run state (resettable on contradiction/restart) --------------------- @@ -246,8 +276,7 @@ def setup_wave_state def build_initial_compatible n = @cells_count t_max = @num_tiles - w = @width - h = @height + neighbours = @neighbours buf = ::String.new(::String.new.b, capacity: n * t_max * 4) buf.force_encoding(::Encoding::BINARY) @@ -260,13 +289,9 @@ def build_initial_compatible # Patch border cells: missing directions get sentinel 255. c = 0 while c < n - cx = c % w - cy = c / w d = 0 while d < 4 - nx = cx + DX[d] - ny = cy + DY[d] - unless nx >= 0 && nx < w && ny >= 0 && ny < h + if neighbours[c * 4 + d] < 0 base = (c * t_max) * 4 + d t = 0 while t < t_max @@ -285,8 +310,7 @@ def build_initial_compatible def rebuild_compatible_from_wave n = @cells_count t_max = @num_tiles - w = @width - h = @height + neighbours = @neighbours propagator = @propagator wave = @wave @@ -296,14 +320,10 @@ def rebuild_compatible_from_wave c = 0 while c < n - cx = c % w - cy = c / w d = 0 while d < 4 - nx = cx + DX[d] - ny = cy + DY[d] - if nx >= 0 && nx < w && ny >= 0 && ny < h - nc = ny * w + nx + nc = neighbours[c * 4 + d] + if nc >= 0 wmask = wave[nc] t = 0 while t < t_max @@ -479,24 +499,18 @@ def propagate compatible = @compatible wave = @wave bit_table = @bit + neighbours = @neighbours t_max = @num_tiles - w = @width - h = @height until prop_cells.empty? return if @contradiction t = prop_tiles.pop c = prop_cells.pop - cx = c % w - cy = c / w - d = 0 while d < 4 - nx = cx + DX[d] - ny = cy + DY[d] - if nx >= 0 && nx < w && ny >= 0 && ny < h - nc = ny * w + nx + nc = neighbours[c * 4 + d] + if nc >= 0 list = propagator_lists[d][t] opp_d = OPP[d] i = 0 From 444d2b69bbb1fafdbff40efb31f2f6bc5c5f70cc Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Tue, 26 May 2026 13:00:03 +0200 Subject: [PATCH 08/32] Resolve chosen tile via Integer#bit_length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a cell's `remaining` count hits 1, the surviving tile was found by walking the wave mask one bit at a time — average ~T/2 iterations, which is ~94 for the current tileset and fires once per uncollapsed cell. The wave is a single-bit value at that point, so `mask.bit_length - 1` gives the index directly via a C-implemented Integer primitive. --- lib/wave_function_collapse/model.rb | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/lib/wave_function_collapse/model.rb b/lib/wave_function_collapse/model.rb index 426de0d..83b34ff 100644 --- a/lib/wave_function_collapse/model.rb +++ b/lib/wave_function_collapse/model.rb @@ -475,17 +475,10 @@ def ban(c, t) if r == 1 @uncollapsed_count -= 1 - # Record the single remaining tile so grid() is O(1) per cell. - mask = @wave[c] - tt = 0 - while mask > 0 - if (mask & 1) != 0 - @chosen_tile[c] = tt - break - end - mask >>= 1 - tt += 1 - end + # Single bit left — `bit_length` returns its position + 1, so + # subtracting one gives the tile index in O(1) instead of + # scanning the mask bit by bit. + @chosen_tile[c] = wave[c].bit_length - 1 end @prop_cells.push(c) From aebf07e419183c311edd8f3b5ee91a8bec0479dc Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Tue, 26 May 2026 13:00:38 +0200 Subject: [PATCH 09/32] Cache the initial supporter-count buffer for restarts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `build_initial_compatible` ran on every contradiction restart, rebuilding the full N×T×4 byte buffer from the interior block and re-applying the border sentinels with `setbyte`. The result is fully determined by the tileset and grid dimensions, so build it once at construction (`@initial_compatible`) and `dup` it into `@compatible` on each restart — a flat memcpy beats the per-byte border pass, and the win grows with the grid. --- lib/wave_function_collapse/model.rb | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/wave_function_collapse/model.rb b/lib/wave_function_collapse/model.rb index 83b34ff..667de38 100644 --- a/lib/wave_function_collapse/model.rb +++ b/lib/wave_function_collapse/model.rb @@ -210,6 +210,7 @@ def build_initial_state @prop_cells = [] @prop_tiles = [] build_neighbours + build_initial_compatible_template end # Precompute the neighbour cell index for every (cell, direction) pair, @@ -268,12 +269,15 @@ def setup_wave_state @prop_cells.clear @prop_tiles.clear - build_initial_compatible + @compatible = @initial_compatible.dup orphan_ban_pass propagate end - def build_initial_compatible + # The initial supporter-count buffer is fully determined by the tileset + # and grid dimensions, so build it once and `dup` per run instead of + # repeating the border-patch pass on every contradiction restart. + def build_initial_compatible_template n = @cells_count t_max = @num_tiles neighbours = @neighbours @@ -304,7 +308,7 @@ def build_initial_compatible c += 1 end - @compatible = buf + @initial_compatible = buf.freeze end def rebuild_compatible_from_wave From 4d4fafdad18f1a7767906ef79622610be9638800 Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Tue, 26 May 2026 13:11:25 +0200 Subject: [PATCH 10/32] Use setbyte for interior_block construction Replaces 4*t_max one-byte `.chr` String allocations in build_propagator with a single pre-sized zero-filled buffer written via setbyte. One-time setup cost only, but removes setup-phase allocation noise. --- lib/wave_function_collapse/model.rb | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/lib/wave_function_collapse/model.rb b/lib/wave_function_collapse/model.rb index 667de38..1e12b1b 100644 --- a/lib/wave_function_collapse/model.rb +++ b/lib/wave_function_collapse/model.rb @@ -191,15 +191,16 @@ def build_propagator # Precompute the 4-byte-per-tile block representing an interior cell's # initial supporter counts (one byte per direction). Used to build the - # @compatible buffer quickly. - block = ::String.new(::String.new.b, capacity: t_max * 4) - block.force_encoding(::Encoding::BINARY) + # @compatible buffer quickly. Sized via a single fill string, then + # written by setbyte — avoids 4*t_max one-byte `.chr` allocations. + block = "\x00".b * (t_max * 4) t = 0 while t < t_max - block << propagator_lists[0][t].length.chr - block << propagator_lists[1][t].length.chr - block << propagator_lists[2][t].length.chr - block << propagator_lists[3][t].length.chr + base = t * 4 + block.setbyte(base, propagator_lists[0][t].length) + block.setbyte(base + 1, propagator_lists[1][t].length) + block.setbyte(base + 2, propagator_lists[2][t].length) + block.setbyte(base + 3, propagator_lists[3][t].length) t += 1 end @interior_block = block.freeze From 6ea4d9fd24fbaf7a3ce14db2e9ed3111cdd02280 Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Tue, 26 May 2026 13:11:51 +0200 Subject: [PATCH 11/32] Intern tile edge signatures via class-level cache Tilesets share edge signatures across many tiles. A class-level cache keyed by packed wang IDs collapses 4-per-tile array allocations down to one per unique signature. Array#hash is value-based so build_propagator's edge_id dedup keeps working unchanged. --- lib/wave_function_collapse/tile.rb | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/lib/wave_function_collapse/tile.rb b/lib/wave_function_collapse/tile.rb index 37b2728..b89a90b 100644 --- a/lib/wave_function_collapse/tile.rb +++ b/lib/wave_function_collapse/tile.rb @@ -4,13 +4,22 @@ module WaveFunctionCollapse class Tile attr_reader :tileid, :probability, :up, :right, :down, :left + # Tilesets typically share edge signatures across many tiles, so intern + # the 3-element edge arrays in a class-level cache keyed by the packed + # wang IDs. Collapses 4-per-tile array allocations to one per unique + # signature. Array#hash is value-based, so consumers that key off these + # arrays (build_propagator's edge_id dedup) keep working unchanged. + def self.intern_edge(a, b, c) + (@edges ||= {})[(a << 16) | (b << 8) | c] ||= [a, b, c].freeze + end + def initialize(tileid:, wangid:, probability: 1.0) @tileid = tileid @probability = probability || 1.0 - @up = wangid.values_at(7, 0, 1).freeze - @right = wangid.values_at(1, 2, 3).freeze - @down = wangid.values_at(5, 4, 3).freeze - @left = wangid.values_at(7, 6, 5).freeze + @up = Tile.intern_edge(wangid[7], wangid[0], wangid[1]) + @right = Tile.intern_edge(wangid[1], wangid[2], wangid[3]) + @down = Tile.intern_edge(wangid[5], wangid[4], wangid[3]) + @left = Tile.intern_edge(wangid[7], wangid[6], wangid[5]) end end end From ddf3b50881d2b294f8d6d1072e84b0e45e026ecc Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Tue, 26 May 2026 13:12:26 +0200 Subject: [PATCH 12/32] Reuse per-cell state arrays across restarts Pre-allocate the seven per-cell state arrays once in build_initial_state and reset them in place via Array#fill (and a tight while loop for the noise refresh) in setup_wave_state. Contradiction restarts no longer discard and reallocate ~7*n slots' worth of Array objects. --- lib/wave_function_collapse/model.rb | 35 ++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/lib/wave_function_collapse/model.rb b/lib/wave_function_collapse/model.rb index 1e12b1b..0a7072b 100644 --- a/lib/wave_function_collapse/model.rb +++ b/lib/wave_function_collapse/model.rb @@ -207,9 +207,19 @@ def build_propagator end def build_initial_state + n = @cells_count # Stack buffers reused across propagations. @prop_cells = [] @prop_tiles = [] + # Pre-allocate per-cell state arrays once; setup_wave_state resets + # them in place via Array#fill (no per-restart allocations). + @wave = ::Array.new(n) + @remaining = ::Array.new(n) + @sum_w = ::Array.new(n) + @sum_w_log_w = ::Array.new(n) + @entropies = ::Array.new(n) + @noise = ::Array.new(n) + @chosen_tile = ::Array.new(n) build_neighbours build_initial_compatible_template end @@ -249,17 +259,22 @@ def setup_wave_state n = @cells_count t_max = @num_tiles - @wave = ::Array.new(n, @full_mask) - @remaining = ::Array.new(n, t_max) - @sum_w = ::Array.new(n, @initial_sum_w) - @sum_w_log_w = ::Array.new(n, @initial_sum_w_log_w) - @entropies = ::Array.new(n, @initial_entropy) - @noise = ::Array.new(n) { ::Kernel.rand * 1e-6 } - @chosen_tile = ::Array.new(n, -1) + # Reset per-cell state in place — buffers are pre-allocated in + # build_initial_state, so contradiction restarts don't churn the GC. + @wave.fill(@full_mask) + @remaining.fill(t_max) + @sum_w.fill(@initial_sum_w) + @sum_w_log_w.fill(@initial_sum_w_log_w) + @entropies.fill(@initial_entropy) + @chosen_tile.fill(-1) + i = 0 + while i < n + @noise[i] = ::Kernel.rand * 1e-6 + i += 1 + end # When the tileset has a single tile every cell is born collapsed, - # so `complete?` must report true immediately. Fill `@chosen_tile` - # for any cell whose wave already has exactly one bit and count - # only the genuinely undetermined cells. + # so `complete?` must report true immediately. Fill must come after + # the generic `-1` fill above so we overwrite, not the other way. if t_max == 1 @chosen_tile.fill(0) @uncollapsed_count = 0 From a1c1dc99b823c48764334b2780a93363989d71a1 Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Tue, 26 May 2026 13:13:09 +0200 Subject: [PATCH 13/32] Reuse compatible buffer via String#replace Allocate @compatible once in build_initial_state and reset it via String#replace on every restart instead of dup'ing @initial_compatible into a fresh n*t_max*4-byte String. Same idea applies to rebuild_compatible_from_wave, which now clears via a frozen 0xFF sentinel (built once) and writes directly into @compatible. --- lib/wave_function_collapse/model.rb | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/lib/wave_function_collapse/model.rb b/lib/wave_function_collapse/model.rb index 0a7072b..cab22af 100644 --- a/lib/wave_function_collapse/model.rb +++ b/lib/wave_function_collapse/model.rb @@ -222,6 +222,11 @@ def build_initial_state @chosen_tile = ::Array.new(n) build_neighbours build_initial_compatible_template + # Persistent supporter-count buffer: sized once, reset via + # String#replace on every restart so we never allocate a fresh + # n*t_max*4-byte String on a contradiction. + @compatible = ::String.new(capacity: @cells_count * @num_tiles * 4, encoding: ::Encoding::BINARY) + @compatible << @initial_compatible end # Precompute the neighbour cell index for every (cell, direction) pair, @@ -285,7 +290,7 @@ def setup_wave_state @prop_cells.clear @prop_tiles.clear - @compatible = @initial_compatible.dup + @compatible.replace(@initial_compatible) orphan_ban_pass propagate end @@ -325,6 +330,10 @@ def build_initial_compatible_template end @initial_compatible = buf.freeze + # Frozen 0xFF-filled sentinel of the same size, reused by + # rebuild_compatible_from_wave to clear @compatible in place + # without allocating an intermediate fill string. + @compatible_fill = ("\xff".b * (n * t_max * 4)).freeze end def rebuild_compatible_from_wave @@ -333,10 +342,9 @@ def rebuild_compatible_from_wave neighbours = @neighbours propagator = @propagator wave = @wave + buf = @compatible - buf = ::String.new(::String.new.b, capacity: n * t_max * 4) - buf.force_encoding(::Encoding::BINARY) - buf << "\xff".b * (n * t_max * 4) + buf.replace(@compatible_fill) c = 0 while c < n @@ -357,8 +365,6 @@ def rebuild_compatible_from_wave end c += 1 end - - @compatible = buf end def orphan_ban_pass From 2dd31e5e665166802b4303b166dda41ebb9a3149 Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Tue, 26 May 2026 13:13:57 +0200 Subject: [PATCH 14/32] Shift prepend_empty_row state in place Replace each `arr[w, n-w] + Array.new(w, val)` (three array allocations per per-cell array, seven arrays total) with an in-place shift_uniform! helper. Copies low-to-high so each source index is read before its destination is overwritten. Adds a 3x3 prepend test to guard the shift direction. --- lib/wave_function_collapse/model.rb | 43 +++++++++++++++++++++++------ test/test_model.rb | 25 +++++++++++++++++ 2 files changed, 60 insertions(+), 8 deletions(-) diff --git a/lib/wave_function_collapse/model.rb b/lib/wave_function_collapse/model.rb index cab22af..1d7c06c 100644 --- a/lib/wave_function_collapse/model.rb +++ b/lib/wave_function_collapse/model.rb @@ -62,15 +62,28 @@ def grid def prepend_empty_row w = @width n = @cells_count + shift_count = n - w + + # Shift state down in place: drop bottom row (cells 0..w-1), fill + # the new top row with default values. Copying low-to-high is safe + # because each source index (i + w) is greater than its destination. + shift_uniform!(@wave, shift_count, @full_mask) + shift_uniform!(@remaining, shift_count, @num_tiles) + shift_uniform!(@sum_w, shift_count, @initial_sum_w) + shift_uniform!(@sum_w_log_w, shift_count, @initial_sum_w_log_w) + shift_uniform!(@entropies, shift_count, @initial_entropy) + shift_uniform!(@chosen_tile, shift_count, -1) - # Shift state down: drop bottom row (cells 0..w-1), append new top row. - @wave = @wave[w, n - w] + Array.new(w, @full_mask) - @remaining = @remaining[w, n - w] + Array.new(w, @num_tiles) - @sum_w = @sum_w[w, n - w] + Array.new(w, @initial_sum_w) - @sum_w_log_w = @sum_w_log_w[w, n - w] + Array.new(w, @initial_sum_w_log_w) - @entropies = @entropies[w, n - w] + Array.new(w, @initial_entropy) - @noise = @noise[w, n - w] + Array.new(w) { ::Kernel.rand * 1e-6 } - @chosen_tile = @chosen_tile[w, n - w] + Array.new(w, -1) + noise = @noise + i = 0 + while i < shift_count + noise[i] = noise[i + w] + i += 1 + end + while i < n + noise[i] = ::Kernel.rand * 1e-6 + i += 1 + end @uncollapsed_count = 0 c = 0 @@ -564,6 +577,20 @@ def tile_at(x, y) t < 0 ? nil : @tiles[t] end + def shift_uniform!(arr, shift_count, fill_value) + w = @width + i = 0 + while i < shift_count + arr[i] = arr[i + w] + i += 1 + end + n = arr.length + while i < n + arr[i] = fill_value + i += 1 + end + end + def popcount(x) c = 0 while x > 0 diff --git a/test/test_model.rb b/test/test_model.rb index 2e9fb04..d06005d 100644 --- a/test/test_model.rb +++ b/test/test_model.rb @@ -46,4 +46,29 @@ def test_prepend_empty_row assert_equal 3, model.max_entropy assert_equal 50.0, model.percent end + + def test_prepend_empty_row_3x3 + tiles = [ + Tile.new(tileid: 0, wangid: [0, 0, 0, 0, 0, 0, 0, 0]), + Tile.new(tileid: 1, wangid: [0, 0, 0, 0, 0, 0, 0, 0]), + Tile.new(tileid: 2, wangid: [0, 0, 0, 0, 0, 0, 0, 0]) + ] + model = Model.new(tiles, 3, 3) + model.iterate until model.complete? + assert model.complete? + + model.prepend_empty_row + + # Bottom two rows were the bottom two of the prior 3x3 (collapsed). + # Top row is the freshly-inserted empty row (entropy == max_entropy == 3). + assert_equal 1, model.entropy_at(0, 0) + assert_equal 1, model.entropy_at(1, 0) + assert_equal 1, model.entropy_at(2, 0) + assert_equal 1, model.entropy_at(0, 1) + assert_equal 1, model.entropy_at(1, 1) + assert_equal 1, model.entropy_at(2, 1) + assert_equal 3, model.entropy_at(0, 2) + assert_equal 3, model.entropy_at(1, 2) + assert_equal 3, model.entropy_at(2, 2) + end end From c0c7c4f2ec1a42329ed43e4fd68998acd38833f1 Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Tue, 26 May 2026 13:14:21 +0200 Subject: [PATCH 15/32] Compact heap after one-time model setup Trigger GC.compact at the tail of initialize so the frozen long-lived data (propagator, neighbours, bit table, compatible template + fill sentinel, weights) settles into old gen and doesn't fragment the heap as solves allocate and free young-gen objects. Skipped on tiny grids where compaction cost outweighs the win. --- lib/wave_function_collapse/model.rb | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/wave_function_collapse/model.rb b/lib/wave_function_collapse/model.rb index 1d7c06c..3f2ae12 100644 --- a/lib/wave_function_collapse/model.rb +++ b/lib/wave_function_collapse/model.rb @@ -28,6 +28,14 @@ def initialize(tiles, width, height) build_propagator build_initial_state setup_wave_state + + # All long-lived precomputed data (propagator, neighbours, bit + # tables, compatible template + fill sentinel, weights) is frozen + # and lives for the model's lifetime. Compact once now so it + # settles into old gen and doesn't fragment the heap as solves + # churn young-gen objects. Skipped for tiny grids where compaction + # cost outweighs the win. + ::GC.compact if @cells_count >= 400 end def complete? From a52b8afc2a83c39ec75d9b500ed3773cdda36a0a Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Tue, 26 May 2026 13:15:43 +0200 Subject: [PATCH 16/32] Emit optional GC stats from bin/benchmark MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When GC_STATS=1 is set, bin/benchmark wraps each run_once with a GC.start + GC.stat snapshot and reports per-run total_allocated_objects, malloc_increase_bytes, minor_gc_count, and major_gc_count alongside the existing timing columns. The CI JSON contract is preserved exactly — the ENV["CI"] branch is unchanged. --- bin/benchmark | 46 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/bin/benchmark b/bin/benchmark index e046ace..3ef4d1c 100755 --- a/bin/benchmark +++ b/bin/benchmark @@ -20,7 +20,15 @@ def build_tiles end end +GC_STAT_KEYS = %i[total_allocated_objects malloc_increase_bytes minor_gc_count major_gc_count].freeze + +def gc_delta(before, after) + GC_STAT_KEYS.each_with_object({}) { |k, h| h[k] = after[k] - before[k] } +end + def run_once(klass, tiles, w, h) + GC.start + gc_before = GC.stat t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC) model = klass.new(tiles, w, h) model.solve @@ -29,7 +37,9 @@ def run_once(klass, tiles, w, h) model.iterate iters += 1 end - [Process.clock_gettime(Process::CLOCK_MONOTONIC) - t0, iters] + elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - t0 + gc_after = GC.stat + [elapsed, iters, gc_delta(gc_before, gc_after)] end def median(xs) @@ -71,11 +81,21 @@ legacy_sizes = (ENV["LEGACY_SIZES"] || "10,15,20,25,30").split(",").map(&:to_i) new_sizes = (ENV["NEW_SIZES"] || "10,15,20,30,50,75,100").split(",").map(&:to_i) runs = Integer(ENV["RUNS"] || "3") skip_legacy = ARGV.include?("--no-legacy") +show_gc = ENV["GC_STATS"] -printf("%-8s %-9s %-7s %-10s %-10s %-12s %-10s\n", - "Model", "Grid", "Cells", "Median", "Best", "Obs/sec", "Iters") -printf("%-8s %-9s %-7s %-10s %-10s %-12s %-10s\n", - "-----", "----", "-----", "------", "----", "-------", "-----") +if show_gc + printf("%-8s %-9s %-7s %-10s %-10s %-12s %-10s %-12s %-14s %-8s %-8s\n", + "Model", "Grid", "Cells", "Median", "Best", "Obs/sec", "Iters", + "Alloc/run", "Malloc B/run", "MinorGC", "MajorGC") + printf("%-8s %-9s %-7s %-10s %-10s %-12s %-10s %-12s %-14s %-8s %-8s\n", + "-----", "----", "-----", "------", "----", "-------", "-----", + "---------", "------------", "-------", "-------") +else + printf("%-8s %-9s %-7s %-10s %-10s %-12s %-10s\n", + "Model", "Grid", "Cells", "Median", "Best", "Obs/sec", "Iters") + printf("%-8s %-9s %-7s %-10s %-10s %-12s %-10s\n", + "-----", "----", "-----", "------", "----", "-------", "-----") +end [ [WaveFunctionCollapse::LegacyModel, skip_legacy ? [] : legacy_sizes, "Legacy"], @@ -84,16 +104,26 @@ printf("%-8s %-9s %-7s %-10s %-10s %-12s %-10s\n", sizes.each do |s| times = [] iters_seen = 0 + gc_deltas = [] runs.times do |r| srand(s * 1000 + r + 1) - elapsed, iters = run_once(klass, tiles, s, s) + elapsed, iters, gc = run_once(klass, tiles, s, s) times << elapsed iters_seen = iters + gc_deltas << gc end med = median(times) best = times.min - printf("%-8s %-9s %-7d %9.3fs %9.3fs %12.0f %10d\n", - label, "#{s}x#{s}", s * s, med, best, iters_seen / med, iters_seen) + if show_gc + avg = ->(k) { gc_deltas.sum { |g| g[k] } / gc_deltas.size } + printf("%-8s %-9s %-7d %9.3fs %9.3fs %12.0f %10d %12d %14d %8d %8d\n", + label, "#{s}x#{s}", s * s, med, best, iters_seen / med, iters_seen, + avg.call(:total_allocated_objects), avg.call(:malloc_increase_bytes), + avg.call(:minor_gc_count), avg.call(:major_gc_count)) + else + printf("%-8s %-9s %-7d %9.3fs %9.3fs %12.0f %10d\n", + label, "#{s}x#{s}", s * s, med, best, iters_seen / med, iters_seen) + end end puts end From d149ff3340c29cf618356dbd9b987c335c8c60b2 Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Tue, 26 May 2026 13:16:31 +0200 Subject: [PATCH 17/32] Add bin/memory_benchmark for allocation profiling Dedicated allocation-only measurement: N cold solves (Model.new + solve to completion) and a streaming sub-bench (prepend_empty_row + iterate on a single model, repeated). Reports median/p95/min/max for total_allocated_objects, malloc_increase_bytes, minor/major GC counts, and new shape transitions per measure block. Optional JSON dump via JSON=path/to/out.json. --- bin/memory_benchmark | 113 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100755 bin/memory_benchmark diff --git a/bin/memory_benchmark b/bin/memory_benchmark new file mode 100755 index 0000000..e4ea10b --- /dev/null +++ b/bin/memory_benchmark @@ -0,0 +1,113 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +$LOAD_PATH.unshift File.expand_path("../lib", __dir__) + +require "json" +require "wave_function_collapse" + +TILE_PATH = File.expand_path("../assets/map.tsj", __dir__) + +def build_tiles + json = JSON.load_file(TILE_PATH) + json["wangsets"].last["wangtiles"].map do |tile| + prob = json["tiles"]&.find { |t| t["id"] == tile["tileid"] }&.fetch("probability") + WaveFunctionCollapse::Tile.new( + tileid: tile["tileid"], + wangid: tile["wangid"], + probability: prob + ) + end +end + +GC_KEYS = %i[total_allocated_objects malloc_increase_bytes minor_gc_count major_gc_count].freeze + +def measure + GC.start + before_gc = GC.stat + before_shape = RubyVM.stat[:next_shape_id] + yield + after_gc = GC.stat + after_shape = RubyVM.stat[:next_shape_id] + out = {new_shapes: after_shape - before_shape} + GC_KEYS.each { |k| out[k] = after_gc[k] - before_gc[k] } + out +end + +def median(xs) + s = xs.sort + n = s.length + n.odd? ? s[n / 2] : (s[n / 2 - 1] + s[n / 2]) / 2.0 +end + +def p95(xs) + s = xs.sort + s[(s.length * 0.95).ceil - 1] || s.last +end + +WIDTH = Integer(ENV["WIDTH"] || 20) +HEIGHT = Integer(ENV["HEIGHT"] || 20) +RUNS = Integer(ENV["RUNS"] || 20) +STREAM_ROWS = Integer(ENV["STREAM_ROWS"] || 50) + +tiles = build_tiles +yjit_status = defined?(RubyVM::YJIT) && RubyVM::YJIT.enabled? ? "on" : "off" +puts RUBY_DESCRIPTION +puts "Tiles loaded: #{tiles.size}, YJIT=#{yjit_status}" +puts "Grid: #{WIDTH}x#{HEIGHT}, Runs: #{RUNS}, Stream rows: #{STREAM_ROWS}" +puts + +# Section 1: cold solves — full lifecycle per sample. +cold = RUNS.times.map do |i| + srand(WIDTH * 1000 + i + 1) + measure do + model = WaveFunctionCollapse::Model.new(tiles, WIDTH, HEIGHT) + model.solve + model.iterate until model.complete? + end +end + +puts "== Cold solves (Model.new + solve to completion) ==" +GC_KEYS.each do |k| + xs = cold.map { |s| s[k] } + printf(" %-26s median=%-12d p95=%-12d min=%-12d max=%-12d\n", + k.to_s, median(xs), p95(xs), xs.min, xs.max) +end +shape_xs = cold.map { |s| s[:new_shapes] } +printf(" %-26s median=%-12d p95=%-12d min=%-12d max=%-12d\n", + "new_shapes", median(shape_xs), p95(shape_xs), shape_xs.min, shape_xs.max) +puts + +# Section 2: streaming via prepend_empty_row on a single model. +srand(12345) +model = WaveFunctionCollapse::Model.new(tiles, WIDTH, HEIGHT) +model.solve +model.iterate until model.complete? + +stream = STREAM_ROWS.times.map do + measure do + model.prepend_empty_row + model.iterate until model.complete? + end +end + +puts "== Streaming (#{STREAM_ROWS} x prepend_empty_row + solve) ==" +GC_KEYS.each do |k| + xs = stream.map { |s| s[k] } + printf(" %-26s median=%-12d p95=%-12d min=%-12d max=%-12d\n", + k.to_s, median(xs), p95(xs), xs.min, xs.max) +end +shape_xs = stream.map { |s| s[:new_shapes] } +printf(" %-26s median=%-12d p95=%-12d min=%-12d max=%-12d\n", + "new_shapes", median(shape_xs), p95(shape_xs), shape_xs.min, shape_xs.max) +puts + +if ENV["JSON"] + payload = { + config: {width: WIDTH, height: HEIGHT, runs: RUNS, stream_rows: STREAM_ROWS}, + cold: cold, + stream: stream + } + File.write(ENV["JSON"], JSON.dump(payload)) + puts "Wrote raw samples to #{ENV["JSON"]}" +end From 22a0a0c9a7452b0e19f63fbf5c7953e36c758720 Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Tue, 26 May 2026 13:33:30 +0200 Subject: [PATCH 18/32] Inline ban into propagate's inner loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `ban` was called from the inner propagation loop on every supporter that hit zero — millions of times per large solve. Each call paid for method dispatch and re-read several instance variables that were already lifted to locals in `propagate` (wave, bit table, etc.), plus the `wave[c] & bit == 0` guard that the caller had just verified. Inline the fast-path body directly at the call site. `ban` itself stays callable for `orphan_ban_pass` and `observe`, where the guard is still load-bearing. Measured (Ruby 4.0.1 + YJIT, arm64-darwin, 3 runs each, medians): 20×20: 0.359s -> 0.209s (~42% faster) 30×30: 0.806s -> 0.467s 50×50: 2.263s -> 1.303s 75×75: 5.344s -> 3.146s 100×100: 9.715s -> 5.927s obs/sec lifted from ~850 to ~1500 across the table. --- lib/wave_function_collapse/model.rb | 38 +++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/lib/wave_function_collapse/model.rb b/lib/wave_function_collapse/model.rb index 3f2ae12..fbf95f3 100644 --- a/lib/wave_function_collapse/model.rb +++ b/lib/wave_function_collapse/model.rb @@ -541,6 +541,13 @@ def propagate bit_table = @bit neighbours = @neighbours t_max = @num_tiles + remaining = @remaining + sum_w = @sum_w + sum_w_log_w = @sum_w_log_w + entropies = @entropies + weights = @weights + weights_log_weights = @weights_log_weights + chosen_tile = @chosen_tile until prop_cells.empty? return if @contradiction @@ -557,17 +564,38 @@ def propagate len = list.length while i < len tp = list[i] + bit_tp = bit_table[tp] # Skip tiles already banned at the neighbour — decrementing # their supporter count would silently wrap past zero and - # waste work; the bit check below would suppress the ban - # anyway. - if (wave[nc] & bit_table[tp]) != 0 + # waste work. + if (wave[nc] & bit_tp) != 0 idx = (nc * t_max + tp) * 4 + opp_d count = compatible.getbyte(idx) - 1 compatible.setbyte(idx, count) if count == 0 - ban(nc, tp) - return if @contradiction + # Inlined fast-path of `ban(nc, tp)`. We already know + # `bit_tp` is set in `wave[nc]` from the check above, + # so the redundant gate in `ban` is skipped. The same + # state updates run; `ban` itself stays callable for + # `orphan_ban_pass` and `observe` where the gate is + # still needed. + wave[nc] = wave[nc] ^ bit_tp + new_remaining = remaining[nc] - 1 + remaining[nc] = new_remaining + sum_w[nc] -= weights[tp] + sum_w_log_w[nc] -= weights_log_weights[tp] + if new_remaining == 0 + @contradiction = true + return + end + s = sum_w[nc] + entropies[nc] = ::Math.log(s) - sum_w_log_w[nc] / s + if new_remaining == 1 + @uncollapsed_count -= 1 + chosen_tile[nc] = wave[nc].bit_length - 1 + end + prop_cells.push(nc) + prop_tiles.push(tp) end end i += 1 From 9f803ad5b67f97d5241386eb2a5819688df9ef44 Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Tue, 26 May 2026 15:02:21 +0200 Subject: [PATCH 19/32] Add serena --- .serena/.gitignore | 2 + .serena/project.yml | 133 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 135 insertions(+) create mode 100644 .serena/.gitignore create mode 100644 .serena/project.yml diff --git a/.serena/.gitignore b/.serena/.gitignore new file mode 100644 index 0000000..2e510af --- /dev/null +++ b/.serena/.gitignore @@ -0,0 +1,2 @@ +/cache +/project.local.yml diff --git a/.serena/project.yml b/.serena/project.yml new file mode 100644 index 0000000..6ddd4b3 --- /dev/null +++ b/.serena/project.yml @@ -0,0 +1,133 @@ +# the name by which the project can be referenced within Serena +project_name: "wave-function-collapse-ruby" + + +# list of languages for which language servers are started; choose from: +# al angular ansible bash clojure +# cpp cpp_ccls crystal csharp csharp_omnisharp +# dart elixir elm erlang fortran +# fsharp go groovy haskell haxe +# hlsl html java json julia +# kotlin lean4 lua luau markdown +# matlab msl nix ocaml pascal +# perl php php_phpactor powershell python +# python_jedi python_ty r rego ruby +# ruby_solargraph rust scala scss solidity +# svelte swift systemverilog terraform toml +# typescript typescript_vts vue yaml zig +# (This list may be outdated. For the current list, see values of Language enum here: +# https://github.com/oraios/serena/blob/main/src/solidlsp/ls_config.py +# For some languages, there are alternative language servers, e.g. csharp_omnisharp, ruby_solargraph.) +# Note: +# - For C, use cpp +# - For JavaScript, use typescript +# - For Angular projects, use angular (subsumes typescript+html; requires `npm install` in the project root) +# - For Svelte projects, use svelte (subsumes typescript/javascript for .svelte projects; requires npm) +# - For SCSS / Sass / plain CSS, use scss (some-sass-language-server handles all three) +# - For Free Pascal/Lazarus, use pascal +# Special requirements: +# Some languages require additional setup/installations. +# See here for details: https://oraios.github.io/serena/01-about/020_programming-languages.html#language-servers +# When using multiple languages, the first language server that supports a given file will be used for that file. +# The first language is the default language and the respective language server will be used as a fallback. +# Note that when using the JetBrains backend, language servers are not used and this list is correspondingly ignored. +languages: +- ruby + +# the encoding used by text files in the project +# For a list of possible encodings, see https://docs.python.org/3.11/library/codecs.html#standard-encodings +encoding: "utf-8" + +# line ending convention to use when writing source files. +# Possible values: unset (use global setting), "lf", "crlf", or "native" (platform default) +# This does not affect Serena's own files (e.g. memories and configuration files), which always use native line endings. +line_ending: + +# The language backend to use for this project. +# If not set, the global setting from serena_config.yml is used. +# Valid values: LSP, JetBrains +# Note: the backend is fixed at startup. If a project with a different backend +# is activated post-init, an error will be returned. +language_backend: + +# whether to use project's .gitignore files to ignore files +ignore_all_files_in_gitignore: true + +# advanced configuration option allowing to configure language server-specific options. +# Maps the language key to the options. +# Have a look at the docstring of the constructors of the LS implementations within solidlsp (e.g., for C# or PHP) to see which options are available. +# No documentation on options means no options are available. +ls_specific_settings: {} + +# list of additional workspace folder paths for cross-package reference support (e.g. in monorepos). +# Paths can be absolute or relative to the project root. +# Each folder is registered as an LSP workspace folder, enabling language servers to discover +# symbols and references across package boundaries. +# Currently supported for: TypeScript. +# Example: +# additional_workspace_folders: +# - ../sibling-package +# - ../shared-lib +additional_workspace_folders: [] + +# list of additional paths to ignore in this project. +# Same syntax as gitignore, so you can use * and **. +# Note: global ignored_paths from serena_config.yml are also applied additively. +ignored_paths: [] + +# whether the project is in read-only mode +# If set to true, all editing tools will be disabled and attempts to use them will result in an error +# Added on 2025-04-18 +read_only: false + +# list of tool names to exclude. +# This extends the existing exclusions (e.g. from the global configuration) +# Find the list of tools here: https://oraios.github.io/serena/01-about/035_tools.html +excluded_tools: [] + +# list of tools to include that would otherwise be disabled (particularly optional tools that are disabled by default). +# This extends the existing inclusions (e.g. from the global configuration). +# Find the list of tools here: https://oraios.github.io/serena/01-about/035_tools.html +included_optional_tools: [] + +# fixed set of tools to use as the base tool set (if non-empty), replacing Serena's default set of tools. +# This cannot be combined with non-empty excluded_tools or included_optional_tools. +# Find the list of tools here: https://oraios.github.io/serena/01-about/035_tools.html +fixed_tools: [] + +# list of mode names that are to be activated by default, overriding the setting in the global configuration. +# The full set of modes to be activated is base_modes (from global config) + default_modes + added_modes. +# If the setting is undefined/empty, the default_modes from the global configuration (serena_config.yml) apply. +# Otherwise, this overrides the setting from the global configuration (serena_config.yml). +# Therefore, you can set this to [] if you do not want the default modes defined in the global config to apply +# for this project. +# This setting can, in turn, be overridden by CLI parameters (--mode). +# See https://oraios.github.io/serena/02-usage/050_configuration.html#modes +default_modes: + +# list of mode names to be activated additionally for this project, e.g. ["query-projects"] +# The full set of modes to be activated is base_modes (from global config) + default_modes + added_modes. +# See https://oraios.github.io/serena/02-usage/050_configuration.html#modes +added_modes: + +# initial prompt for the project. It will always be given to the LLM upon activating the project +# (contrary to the memories, which are loaded on demand). +initial_prompt: "" + +# time budget (seconds) per tool call for the retrieval of additional symbol information +# such as docstrings or parameter information. +# This overrides the corresponding setting in the global configuration; see the documentation there. +# If null or missing, use the setting from the global configuration. +symbol_info_budget: + +# list of regex patterns which, when matched, mark a memory entry as read‑only. +# Extends the list from the global configuration, merging the two lists. +read_only_memory_patterns: [] + +# list of regex patterns for memories to completely ignore. +# Matching memories will not appear in list_memories or activate_project output +# and cannot be accessed via read_memory or write_memory. +# To access ignored memory files, use the read_file tool on the raw file path. +# Extends the list from the global configuration, merging the two lists. +# Example: ["_archive/.*", "_episodes/.*"] +ignored_memory_patterns: [] From 6cc5bd33d33a3511fa2125848adb3034171f35ba Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Tue, 26 May 2026 23:32:58 +0200 Subject: [PATCH 20/32] Handle zero-probability tiles without producing NaN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `weights_log_weights[t] = w * Math.log(w)` is NaN when w is 0, because Math.log(0) is -Infinity and 0 * -Infinity is NaN under IEEE 754. That NaN flows into `@initial_sum_w_log_w`, every cell's entropy, and the comparison inside `find_lowest_entropy_cell` (NaN < x is false for any x), so the solver silently picks nothing and `model.iterate until model.complete?` spins forever. The mathematical limit of `w * log(w)` as `w → 0` is 0, so use that explicitly. Add a regression test that exercises a zero-probability tile end-to-end. --- lib/wave_function_collapse/model.rb | 6 +++++- test/test_model.rb | 14 ++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/lib/wave_function_collapse/model.rb b/lib/wave_function_collapse/model.rb index fbf95f3..17fec1f 100644 --- a/lib/wave_function_collapse/model.rb +++ b/lib/wave_function_collapse/model.rb @@ -193,7 +193,11 @@ def build_propagator while t < t_max w = tiles[t].probability.to_f weights[t] = w - wlogw = w * ::Math.log(w) + # `w * Math.log(w)` is NaN for w == 0 (since 0 * -Infinity = NaN), + # which would then propagate into every cell's entropy and make + # `find_lowest_entropy_cell` return nothing forever. The limit + # lim_{w→0} w*log(w) is 0, so use that. + wlogw = (w == 0.0) ? 0.0 : w * ::Math.log(w) weights_log_weights[t] = wlogw sum_w += w sum_w_log_w += wlogw diff --git a/test/test_model.rb b/test/test_model.rb index d06005d..2234448 100644 --- a/test/test_model.rb +++ b/test/test_model.rb @@ -71,4 +71,18 @@ def test_prepend_empty_row_3x3 assert_equal 3, model.entropy_at(1, 2) assert_equal 3, model.entropy_at(2, 2) end + + def test_zero_probability_tile_does_not_poison_entropy + # `w * Math.log(w)` is NaN for w == 0; if that leaks into the entropy + # table, `find_lowest_entropy_cell` never picks a cell and the solve + # loop spins forever. Regression for the bug found by /pr-bug-hunt. + tiles = [ + Tile.new(tileid: 0, wangid: [0, 0, 0, 0, 0, 0, 0, 0], probability: 1.0), + Tile.new(tileid: 1, wangid: [0, 0, 0, 0, 0, 0, 0, 0], probability: 0.0) + ] + model = Model.new(tiles, 2, 2) + model.iterate until model.complete? + assert model.complete? + model.grid.each { |col| col.each { |t| refute_nil t } } + end end From 25b831e073891a33d6a15a816af671e4b5b87e90 Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Tue, 26 May 2026 23:33:41 +0200 Subject: [PATCH 21/32] Reject tilesets whose supporter counts would overflow a byte MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `@compatible` packs supporter counts into one byte per `(cell, tile, direction)` entry. If any propagator list contains more than 255 entries, `String#setbyte` writes `length & 0xff` at build time — silently wrapping. The downstream `orphan_ban_pass` then sees a fresh cell whose counts are already zero and bans every tile, which can drive `@uncollapsed_count` to 0 and `@contradiction` to true at the same time. `complete?` returned `true` while the wave was actually broken — a public-API invariant violation. Validate the propagator lists at construction and raise `WaveFunctionCollapse::Error` with a clear message when a list exceeds 255. Also strengthen `complete?` to require `!@contradiction`, so any future hole in the wrapping argument can't make the public signal lie. --- lib/wave_function_collapse/model.rb | 24 +++++++++++++++++++++++- test/test_model.rb | 7 +++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/lib/wave_function_collapse/model.rb b/lib/wave_function_collapse/model.rb index 17fec1f..41d9bad 100644 --- a/lib/wave_function_collapse/model.rb +++ b/lib/wave_function_collapse/model.rb @@ -39,7 +39,7 @@ def initialize(tiles, width, height) end def complete? - @uncollapsed_count == 0 + @uncollapsed_count == 0 && !@contradiction end def percent @@ -184,6 +184,28 @@ def build_propagator @propagator = propagator.freeze @propagator_lists = propagator_lists.freeze + # Supporter counts live in a byte buffer (`@compatible`), so any + # propagator list above 255 entries would silently wrap modulo 256 + # at build time and corrupt the AC-4 invariants — `complete?` can + # even start returning true while the wave is actually contradicted. + # Reject these tilesets up front rather than producing wrong output. + d = 0 + while d < 4 + a = 0 + while a < t_max + if propagator_lists[d][a].length > 255 + ::Kernel.raise( + ::WaveFunctionCollapse::Error, + "tile #{a} has #{propagator_lists[d][a].length} compatible " \ + "neighbours in direction #{d}; the byte-packed supporter " \ + "counter only fits 0..255" + ) + end + a += 1 + end + d += 1 + end + # Weights weights = ::Array.new(t_max) weights_log_weights = ::Array.new(t_max) diff --git a/test/test_model.rb b/test/test_model.rb index 2234448..0b28d01 100644 --- a/test/test_model.rb +++ b/test/test_model.rb @@ -72,6 +72,13 @@ def test_prepend_empty_row_3x3 assert_equal 3, model.entropy_at(2, 2) end + def test_rejects_tileset_that_overflows_supporter_byte + # Supporter counts are stored as bytes (0..255). 256 mutually-compatible + # tiles would wrap and silently corrupt the wave; reject up front. + tiles = 256.times.map { |i| Tile.new(tileid: i, wangid: [0, 0, 0, 0, 0, 0, 0, 0]) } + assert_raises(WaveFunctionCollapse::Error) { Model.new(tiles, 2, 1) } + end + def test_zero_probability_tile_does_not_poison_entropy # `w * Math.log(w)` is NaN for w == 0; if that leaks into the entropy # table, `find_lowest_entropy_cell` never picks a cell and the solve From 28a120a8e1d3ac37cfce610f55156f897945bcce Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Tue, 26 May 2026 23:36:26 +0200 Subject: [PATCH 22/32] Recover instead of silently resetting when prepend_empty_row contradicts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `prepend_empty_row` calls `rebuild_compatible_from_wave`, `orphan_ban_pass`, and `propagate`, any of which can set `@contradiction = true`. The method never inspected the flag, so it returned `true` even on failure and left the wave in a half-mutated state. The next `iterate` would then enter `observe_and_propagate`, see `@contradiction == true`, call `setup_wave_state`, and silently wipe every previously-streamed row — the exact thing the streaming contract was supposed to prevent. Clear the flag and the propagation stacks on entry so a leftover state from a prior failure can't poison the new pass, then check the flag after `propagate`. On contradiction, restore the model to a clean blank state via `setup_wave_state` and return `false` so callers know the prepend failed; on success, return `true` as before. --- lib/wave_function_collapse/model.rb | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/lib/wave_function_collapse/model.rb b/lib/wave_function_collapse/model.rb index 41d9bad..d6d82df 100644 --- a/lib/wave_function_collapse/model.rb +++ b/lib/wave_function_collapse/model.rb @@ -72,6 +72,13 @@ def prepend_empty_row n = @cells_count shift_count = n - w + # Don't carry a leftover flag from an earlier failed run into the + # new pass — and reset the propagation stacks too, since they may + # still hold entries from a contradiction that returned early. + @contradiction = false + @prop_cells.clear + @prop_tiles.clear + # Shift state down in place: drop bottom row (cells 0..w-1), fill # the new top row with default values. Copying low-to-high is safe # because each source index (i + w) is greater than its destination. @@ -103,6 +110,17 @@ def prepend_empty_row rebuild_compatible_from_wave orphan_ban_pass propagate + + if @contradiction + # The new row can't be reconciled with the row below it. The + # wave is now half-mutated — if we returned anyway, the next + # `iterate` would observe `@contradiction == true`, call + # `setup_wave_state`, and silently wipe every streamed row. + # Reset to a clean blank state and tell the caller the prepend + # failed so it can decide what to do. + setup_wave_state + return false + end true end From 57f465cb5a128f3921f011456be69c4a5dbc0bb0 Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Wed, 27 May 2026 01:58:36 +0200 Subject: [PATCH 23/32] Materialise chosen_tile for the new row of single-tile tilesets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `prepend_empty_row` filled the new row's `@chosen_tile` slots with the generic `-1` sentinel, but `setup_wave_state` has a special branch that pre-fills `0` when `t_max == 1` (every cell is born collapsed). Without the matching branch in the prepend path, `complete?` reported true while `grid` returned `nil` for every new cell — the inserted row rendered as a blank strip. --- lib/wave_function_collapse/model.rb | 7 ++++++- test/test_model.rb | 10 ++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/lib/wave_function_collapse/model.rb b/lib/wave_function_collapse/model.rb index d6d82df..ffdb0b3 100644 --- a/lib/wave_function_collapse/model.rb +++ b/lib/wave_function_collapse/model.rb @@ -87,7 +87,12 @@ def prepend_empty_row shift_uniform!(@sum_w, shift_count, @initial_sum_w) shift_uniform!(@sum_w_log_w, shift_count, @initial_sum_w_log_w) shift_uniform!(@entropies, shift_count, @initial_entropy) - shift_uniform!(@chosen_tile, shift_count, -1) + # When the tileset has a single tile every cell is born collapsed, + # so the new row's chosen_tile must point at tile 0 rather than the + # generic "uncollapsed" sentinel — mirroring the t_max==1 branch in + # setup_wave_state. Without this, complete? returns true (because + # remaining[c] == 1) while grid returns nil for the whole new row. + shift_uniform!(@chosen_tile, shift_count, (@num_tiles == 1) ? 0 : -1) noise = @noise i = 0 diff --git a/test/test_model.rb b/test/test_model.rb index 0b28d01..0b9fa56 100644 --- a/test/test_model.rb +++ b/test/test_model.rb @@ -72,6 +72,16 @@ def test_prepend_empty_row_3x3 assert_equal 3, model.entropy_at(2, 2) end + def test_prepend_empty_row_fills_chosen_tile_for_single_tile_set + # With a single-tile tileset every cell is already collapsed, so the + # new row inserted by prepend_empty_row must materialise tile 0 in + # `grid` — not return nil because chosen_tile is still -1. + tiles = [Tile.new(tileid: 42, wangid: [0, 0, 0, 0, 0, 0, 0, 0])] + model = Model.new(tiles, 2, 2) + model.prepend_empty_row + model.grid.each { |col| col.each { |t| assert_equal 42, t.tileid } } + end + def test_rejects_tileset_that_overflows_supporter_byte # Supporter counts are stored as bytes (0..255). 256 mutually-compatible # tiles would wrap and silently corrupt the wave; reject up front. From c27ee0c9254c0c422b485509baf6315e2264a91c Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Wed, 27 May 2026 01:59:25 +0200 Subject: [PATCH 24/32] Key intern_edge by the triple itself instead of a packed integer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `(a << 16) | (b << 8) | c` only encodes the triple losslessly when each component fits in 8 bits, so two distinct edges — for example `[1, 0, 256]` and `[1, 1, 0]` — collided to the same cache key. The hash then handed out one frozen Array for both, the propagator's edge-ID dedup treated incompatible edges as equal, and the algorithm silently allowed illegal adjacencies for any Wang ID ≥ 256. Use the triple itself as the key. Array#hash and #eql? are content-based, so the cache still dedupes correctly and the lookup is collision-free for any integer components. --- lib/wave_function_collapse/tile.rb | 7 ++++--- test/test_tile.rb | 22 ++++++++++++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) create mode 100644 test/test_tile.rb diff --git a/lib/wave_function_collapse/tile.rb b/lib/wave_function_collapse/tile.rb index b89a90b..736e918 100644 --- a/lib/wave_function_collapse/tile.rb +++ b/lib/wave_function_collapse/tile.rb @@ -5,12 +5,13 @@ class Tile attr_reader :tileid, :probability, :up, :right, :down, :left # Tilesets typically share edge signatures across many tiles, so intern - # the 3-element edge arrays in a class-level cache keyed by the packed - # wang IDs. Collapses 4-per-tile array allocations to one per unique + # the 3-element edge arrays in a class-level cache keyed by the triple + # itself. Collapses 4-per-tile array allocations to one per unique # signature. Array#hash is value-based, so consumers that key off these # arrays (build_propagator's edge_id dedup) keep working unchanged. def self.intern_edge(a, b, c) - (@edges ||= {})[(a << 16) | (b << 8) | c] ||= [a, b, c].freeze + key = [a, b, c] + (@edges ||= {})[key] ||= key.freeze end def initialize(tileid:, wangid:, probability: 1.0) diff --git a/test/test_tile.rb b/test/test_tile.rb new file mode 100644 index 0000000..06b62d4 --- /dev/null +++ b/test/test_tile.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +require "test_helper" + +class TestTile < Minitest::Test + def test_intern_edge_does_not_collide_for_wide_wang_ids + # Earlier the cache key was packed as `(a << 16) | (b << 8) | c`, which + # silently collided once any component exceeded 255. Regression: the + # interning cache must distinguish triples with components ≥ 256. + e1 = Tile.intern_edge(1, 0, 256) + e2 = Tile.intern_edge(1, 1, 0) + assert_equal [1, 0, 256], e1 + assert_equal [1, 1, 0], e2 + refute_equal e1, e2 + end + + def test_intern_edge_returns_same_object_for_equal_triples + e1 = Tile.intern_edge(3, 4, 5) + e2 = Tile.intern_edge(3, 4, 5) + assert_same e1, e2 + end +end From 1b804ce7eae0e40cdeb3c9c98231731a157e98c9 Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Wed, 27 May 2026 02:00:22 +0200 Subject: [PATCH 25/32] Cap consecutive restarts in observe_and_propagate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A tileset that orphan_ban_pass can't fully reject — i.e. one where setup_wave_state leaves some cells uncollapsed but the wave is unsolvable — would push observe_and_propagate into an unbounded observe → contradiction → setup_wave_state loop. The caller's `iterate until complete?` then hangs with no progress and no signal. Cap the loop at MAX_RESTARTS=100 consecutive contradictions and raise `WaveFunctionCollapse::Error` instead of looping. Solvable tilesets finish well under the cap; broken inputs now fail visibly. --- lib/wave_function_collapse/model.rb | 16 ++++++++++++++++ test/test_model.rb | 22 ++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/lib/wave_function_collapse/model.rb b/lib/wave_function_collapse/model.rb index ffdb0b3..4dc314a 100644 --- a/lib/wave_function_collapse/model.rb +++ b/lib/wave_function_collapse/model.rb @@ -15,6 +15,12 @@ class Model DY = [1, 0, -1, 0].freeze OPP = [2, 3, 0, 1].freeze + # Upper bound on consecutive contradiction restarts before + # `observe_and_propagate` gives up. Solvable tilesets almost always + # succeed in one or two attempts; inherently-broken inputs would + # otherwise loop forever. + MAX_RESTARTS = 100 + attr_reader :tiles, :width, :height, :max_entropy def initialize(tiles, width, height) @@ -466,6 +472,7 @@ def orphan_ban_pass # ---- core observe / ban / propagate ----------------------------------------- def observe_and_propagate + restarts = 0 loop do c = find_lowest_entropy_cell return false unless c @@ -474,6 +481,15 @@ def observe_and_propagate propagate if @contradiction + restarts += 1 + if restarts > MAX_RESTARTS + ::Kernel.raise( + ::WaveFunctionCollapse::Error, + "exceeded #{MAX_RESTARTS} consecutive contradiction " \ + "restarts; the tileset may be inherently unsolvable on " \ + "this grid" + ) + end # Restart: rebuild wave state and try again. setup_wave_state next diff --git a/test/test_model.rb b/test/test_model.rb index 0b9fa56..e918c8a 100644 --- a/test/test_model.rb +++ b/test/test_model.rb @@ -89,6 +89,28 @@ def test_rejects_tileset_that_overflows_supporter_byte assert_raises(WaveFunctionCollapse::Error) { Model.new(tiles, 2, 1) } end + def test_observe_and_propagate_caps_consecutive_restarts + # If the tileset is inherently unsolvable but orphan_ban_pass leaves + # some cells with multiple candidates, the solver would otherwise + # cycle observe → contradiction → setup_wave_state forever. Force + # that condition by overriding setup_wave_state to always end in a + # contradiction state, and verify the cap fires instead of hanging. + klass = Class.new(Model) do + def setup_wave_state + super + @contradiction = true + end + end + tiles = [ + Tile.new(tileid: 0, wangid: [0, 0, 0, 0, 0, 0, 0, 0]), + Tile.new(tileid: 1, wangid: [0, 0, 0, 0, 0, 0, 0, 0]) + ] + model = klass.new(tiles, 2, 2) + assert_raises(WaveFunctionCollapse::Error) do + model.iterate until model.complete? + end + end + def test_zero_probability_tile_does_not_poison_entropy # `w * Math.log(w)` is NaN for w == 0; if that leaks into the entropy # table, `find_lowest_entropy_cell` never picks a cell and the solve From 1813bb07d33b4a37dcc896378315de94539f6041 Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Wed, 27 May 2026 09:42:56 +0200 Subject: [PATCH 26/32] Cache map into a Gosu macro and batch iterates per frame Renderer hot path was the bottleneck for bin/run: - @model.grid allocated a fresh 2-D Array every update - draw_map iterated 64x36 cells and called draw_text_rel per uncollapsed cell, each frame - @times.sort ran on an unbounded array every frame for P90/P99 - iterate was capped at one call per update tick (~60Hz) Cache the grid into a Gosu macro via record(); rebuild only when Model#generation advances. Run iterate in a per-update time budget (ITERATE_BUDGET) and drop update_interval so the framework no longer sleeps between ticks. Throttle map redraws to ~30Hz via needs_redraw? to keep the macro rebuild from contending with iterate. Add Model#generation (bumped from observe_and_propagate, setup_wave_state, and prepend_empty_row success paths) and Model#tile_id_at so the window can read state without going through the allocating #grid accessor. Pre-build the 256 entropy overlay colors. Cap @times to a 240-entry rolling buffer. Add E key to toggle the entropy overlay. --- lib/wave_function_collapse/model.rb | 13 +- lib/wave_function_collapse/window.rb | 176 +++++++++++++++++++++------ 2 files changed, 148 insertions(+), 41 deletions(-) diff --git a/lib/wave_function_collapse/model.rb b/lib/wave_function_collapse/model.rb index 4dc314a..403171a 100644 --- a/lib/wave_function_collapse/model.rb +++ b/lib/wave_function_collapse/model.rb @@ -21,7 +21,7 @@ class Model # otherwise loop forever. MAX_RESTARTS = 100 - attr_reader :tiles, :width, :height, :max_entropy + attr_reader :tiles, :width, :height, :max_entropy, :generation def initialize(tiles, width, height) @tiles = tiles @@ -30,6 +30,7 @@ def initialize(tiles, width, height) @num_tiles = tiles.length @max_entropy = @num_tiles @cells_count = @width * @height + @generation = 0 build_propagator build_initial_state @@ -56,6 +57,13 @@ def entropy_at(x, y) @remaining[y * @width + x] end + # Tileset asset id (Integer) at (x, y), or nil if uncollapsed. Lighter + # than `tile_at` for hot draw paths that only need the asset id. + def tile_id_at(x, y) + t = @chosen_tile[y * @width + x] + t < 0 ? nil : @tiles[t].tileid + end + def solve observe_and_propagate true @@ -132,6 +140,7 @@ def prepend_empty_row setup_wave_state return false end + @generation += 1 true end @@ -369,6 +378,7 @@ def setup_wave_state @compatible.replace(@initial_compatible) orphan_ban_pass propagate + @generation += 1 end # The initial supporter-count buffer is fully determined by the tileset @@ -494,6 +504,7 @@ def observe_and_propagate setup_wave_state next end + @generation += 1 return true end end diff --git a/lib/wave_function_collapse/window.rb b/lib/wave_function_collapse/window.rb index 1807a46..27c9437 100644 --- a/lib/wave_function_collapse/window.rb +++ b/lib/wave_function_collapse/window.rb @@ -5,8 +5,29 @@ module WaveFunctionCollapse class Window < Gosu::Window WIDTH = 1280 HEIGHT = 720 + # Rolling window for per-iteration timing stats. Bounded so that + # sorting for P90/P99 each frame stays O(TIMES_CAPACITY log N) instead + # of drifting up with run length. + TIMES_CAPACITY = 240 + # Per-`update` budget (seconds) for running model iterations. With + # update_interval lowered below, `update` is effectively called as + # fast as it returns, so this controls how long we batch model work + # before yielding back for a possible redraw. + ITERATE_BUDGET = 0.014 + # Minimum gap (seconds) between consecutive map redraws while + # generation is in progress. ~30 Hz is plenty for watching the wave + # collapse, and keeps the macro rebuild from eating into iterate + # time. + DRAW_INTERVAL = 1.0 / 30 + # Gosu's main loop sleeps until the next update_interval boundary. + # Default is ~16.6 ms (60 Hz); lowering it removes that idle gap so + # we can spend the wall-clock time iterating instead. The draw rate + # is throttled separately via `needs_redraw?`. + UPDATE_INTERVAL_MS = 2 + def initialize super(WIDTH, HEIGHT) + self.update_interval = UPDATE_INTERVAL_MS self.caption = "Wave Function Collapse in Ruby" @font = Gosu::Font.new(14) @small_font = Gosu::Font.new(12) @@ -16,9 +37,17 @@ def initialize @tiles = Gosu::Image.load_tiles("assets/#{@map_json["image"]}", @tile_width, @tile_height, tileable: true) @times = [] @paused = false + @show_entropy = true + @last_iterates_per_frame = 0 @labels = [] + # Pre-build the 256 entropy overlay colors so the per-cell text draw + # doesn't allocate a Gosu::Color each call. + @entropy_colors = Array.new(256) { |i| Gosu::Color.new(160, i, 255 - i, 0) } @model = nil - @map = nil + @map_macro = nil + @last_rendered_generation = -1 + @force_redraw = true + @last_drawn_at = 0.0 @started_at = nil @finished_at = nil defaults @@ -26,29 +55,56 @@ def initialize def defaults @model = Model.new(build_tiles, WIDTH.div(@tile_width), HEIGHT.div(@tile_height)) - @map = nil @started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC) @finished_at = nil + @map_macro = nil + @last_rendered_generation = -1 + @force_redraw = true + @last_iterates_per_frame = 0 + @last_drawn_at = 0.0 end def update - @labels = [] - if @map.nil? - @model.solve - @map = @model.grid - end - return if @paused + return if @model.complete? - unless @model.complete? - time_start = Process.clock_gettime(Process::CLOCK_MONOTONIC) + frame_start = Process.clock_gettime(Process::CLOCK_MONOTONIC) + iters = 0 + until @model.complete? + t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC) @model.iterate - @times << Process.clock_gettime(Process::CLOCK_MONOTONIC) - time_start - @map = @model.grid + elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - t0 + @times << elapsed + @times.shift if @times.size > TIMES_CAPACITY + iters += 1 + break if (Process.clock_gettime(Process::CLOCK_MONOTONIC) - frame_start) >= ITERATE_BUDGET + end + @last_iterates_per_frame = iters + end + + def needs_redraw? + return true if @force_redraw + return false if @paused + if @model.complete? + return @model.generation != @last_rendered_generation end + # Throttle redraws while generating so the macro rebuild doesn't + # contend with iterate for CPU. The first draw of a new state and + # any externally-forced redraw still go through. + (Process.clock_gettime(Process::CLOCK_MONOTONIC) - @last_drawn_at) >= DRAW_INTERVAL end def draw + @last_drawn_at = Process.clock_gettime(Process::CLOCK_MONOTONIC) + @force_redraw = false + @labels.clear + + if @model.generation != @last_rendered_generation + rebuild_map_macro + @last_rendered_generation = @model.generation + end + @map_macro&.draw(0, 0, ZOrder::MAP) + if @model.complete? @finished_at ||= Process.clock_gettime(Process::CLOCK_MONOTONIC) time = @finished_at - @started_at @@ -57,23 +113,30 @@ def draw else time = Process.clock_gettime(Process::CLOCK_MONOTONIC) - @started_at add_label("Generating #{@model.width}x#{@model.height}. Elapsed #{"%02.2f" % time}s. #{"%02.2f" % @model.percent}% complete.") - add_label("Press P to pause/unpause, R to restart.") + add_label("Press P to pause/unpause, R to restart, E to toggle entropy overlay.") end + + add_label("Iterates/frame: #{@last_iterates_per_frame} | Entropy overlay: #{@show_entropy ? "ON" : "OFF"} (E)") + if (last_time = @times.last) mss = last_time * 1000 color = (mss > 16) ? Gosu::Color::RED : Gosu::Color::GREEN add_label("Last iteration: #{"%03.2f" % mss}ms", color) end - draw_map - average_time_mss = (@times.sum / @times.size.to_f) * 1000 - add_label("AVG(mss)=#{"%03.2f" % average_time_mss}ms", (average_time_mss > 16) ? Gosu::Color::RED : Gosu::Color::GREEN) + unless @times.empty? + sorted = @times.sort + avg_mss = (@times.sum / @times.size.to_f) * 1000 + add_label("AVG(mss)=#{"%03.2f" % avg_mss}ms", (avg_mss > 16) ? Gosu::Color::RED : Gosu::Color::GREEN) - p90_time = @times.sort[(@times.size * 0.9).to_i] * 1000 - add_label("P90(mss)=#{"%03.2f" % p90_time}ms", (p90_time > 16) ? Gosu::Color::RED : Gosu::Color::GREEN) + p90_mss = sorted[(sorted.size * 0.9).to_i] * 1000 + add_label("P90(mss)=#{"%03.2f" % p90_mss}ms", (p90_mss > 16) ? Gosu::Color::RED : Gosu::Color::GREEN) - p99_time = @times.sort[(@times.size * 0.99).to_i] * 1000 - add_label("P99(mss)=#{"%03.2f" % p99_time}ms", (p99_time > 16) ? Gosu::Color::RED : Gosu::Color::GREEN) + p99_mss = sorted[(sorted.size * 0.99).to_i] * 1000 + add_label("P99(mss)=#{"%03.2f" % p99_mss}ms", (p99_mss > 16) ? Gosu::Color::RED : Gosu::Color::GREEN) + end + + add_label("FPS: #{Gosu.fps}") if @paused @font.draw_text_rel("Paused", WIDTH / 2, HEIGHT / 2, ZOrder::UI, 0.5, 0.5) @@ -86,18 +149,29 @@ def button_down(id) case id when Gosu::KB_R puts "Restarting..." + @times = [] defaults when Gosu::KB_A if @model.complete? puts "Adding empty row..." @times = [] @model.prepend_empty_row + @finished_at = nil + @started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC) + @force_redraw = true end when Gosu::KB_P @paused = !@paused + @force_redraw = true + when Gosu::KB_E + @show_entropy = !@show_entropy + # Invalidate the cached macro so the toggle takes effect immediately. + @last_rendered_generation = -1 + @force_redraw = true when Gosu::KB_S puts "Solving..." @model.solve + @force_redraw = true end end @@ -121,28 +195,50 @@ def draw_labels @small_font.draw_text_rel(version_label, WIDTH - 4, HEIGHT - 2, ZOrder::UI, 1.0, 1.0, 1, 1, Gosu::Color::GRAY) end - def draw_map - @map.each_with_index do |column, x| - column.reverse.each_with_index do |tile, y| - inverted_y = (y - @model.height + 1).abs - - entropy = @model.entropy_at(x, inverted_y) - - if entropy > 1 - percent_entropy = (entropy.to_f / @model.max_entropy * 255).round - color = Gosu::Color.new(160, percent_entropy, 255 - percent_entropy, 0) - @small_font.draw_text_rel( - entropy, - x * @tile_width + (@tile_width / 2), - y * @tile_height + (@tile_height / 2), - ZOrder::MAP, 0.5, 0.5, 1, 1, color - ) - end + # Re-record the full grid into a Gosu macro so subsequent frames replay + # it as one batched draw instead of per-cell Ruby calls. Called only + # when @model.generation advances (i.e. iterate ran), and we also lazily + # skip the costly entropy overlay unless the user has toggled it on. + def rebuild_map_macro + model = @model + width = model.width + height = model.height + tw = @tile_width + th = @tile_height + tiles = @tiles + show_entropy = @show_entropy + max_entropy = model.max_entropy.to_f + small_font = @small_font + entropy_colors = @entropy_colors - next unless tile + @map_macro = record(WIDTH, HEIGHT) do + x = 0 + while x < width + screen_x = x * tw + y = 0 + while y < height + screen_y = (height - 1 - y) * th - image = @tiles[tile.tileid] - image.draw(x * @tile_width, y * @tile_height, ZOrder::MAP) + tile_id = model.tile_id_at(x, y) + if tile_id + tiles[tile_id].draw(screen_x, screen_y, 0) + elsif show_entropy + entropy = model.entropy_at(x, y) + if entropy > 1 + percent_entropy = (entropy / max_entropy * 255).to_i + percent_entropy = 255 if percent_entropy > 255 + percent_entropy = 0 if percent_entropy < 0 + small_font.draw_text_rel( + entropy, + screen_x + (tw / 2), + screen_y + (th / 2), + 0, 0.5, 0.5, 1, 1, entropy_colors[percent_entropy] + ) + end + end + y += 1 + end + x += 1 end end end From b033fb1dffdbf731e67cf3d47d8df8002f97cb71 Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Wed, 27 May 2026 09:43:08 +0200 Subject: [PATCH 27/32] Support WxH sizes and stream progress in bin/benchmark - Accept "WxH" in SIZES/LEGACY_SIZES/NEW_SIZES, not just square N - Add a unified SIZES env var; each model also falls back to the other's *_SIZES when only one is set - Print the model/grid/cells prefix before runs so it's visible which size is being measured rather than appearing only after it finishes - Right-align header columns to match the %9.3fs data columns --- bin/benchmark | 52 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/bin/benchmark b/bin/benchmark index 3ef4d1c..c64b139 100755 --- a/bin/benchmark +++ b/bin/benchmark @@ -77,23 +77,45 @@ puts RUBY_DESCRIPTION puts "Tiles loaded: #{tiles.size}, YJIT=#{yjit_status}" puts -legacy_sizes = (ENV["LEGACY_SIZES"] || "10,15,20,25,30").split(",").map(&:to_i) -new_sizes = (ENV["NEW_SIZES"] || "10,15,20,30,50,75,100").split(",").map(&:to_i) +# Accepts either "N" (treated as NxN) or "WxH" entries. +def parse_sizes(str) + str.split(",").map do |tok| + tok = tok.strip + if tok.include?("x") + w, h = tok.split("x", 2).map(&:to_i) + [w, h] + else + n = tok.to_i + [n, n] + end + end +end + +# SIZES overrides both lists. Otherwise each model uses its own *_SIZES var if +# set, or falls back to the other model's setting, or finally its own default. +shared = ENV["SIZES"] +legacy_env = ENV["LEGACY_SIZES"] +new_env = ENV["NEW_SIZES"] +legacy_sizes = parse_sizes(shared || legacy_env || new_env || "10,15,20,25,30") +new_sizes = parse_sizes(shared || new_env || legacy_env || "10,15,20,30,50,64x36,75,100") runs = Integer(ENV["RUNS"] || "3") skip_legacy = ARGV.include?("--no-legacy") show_gc = ENV["GC_STATS"] +# Header widths match the data printf below so columns align. +# Data: "%-8s %-9s %-7d %9.3fs %9.3fs %12.0f %10d" (+ GC: %12d %14d %8d %8d) +# %9.3fs renders as 10 chars (9 numeric + literal 's'). if show_gc - printf("%-8s %-9s %-7s %-10s %-10s %-12s %-10s %-12s %-14s %-8s %-8s\n", + printf("%-8s %-9s %-7s %10s %10s %12s %10s %12s %14s %8s %8s\n", "Model", "Grid", "Cells", "Median", "Best", "Obs/sec", "Iters", "Alloc/run", "Malloc B/run", "MinorGC", "MajorGC") - printf("%-8s %-9s %-7s %-10s %-10s %-12s %-10s %-12s %-14s %-8s %-8s\n", + printf("%-8s %-9s %-7s %10s %10s %12s %10s %12s %14s %8s %8s\n", "-----", "----", "-----", "------", "----", "-------", "-----", "---------", "------------", "-------", "-------") else - printf("%-8s %-9s %-7s %-10s %-10s %-12s %-10s\n", + printf("%-8s %-9s %-7s %10s %10s %12s %10s\n", "Model", "Grid", "Cells", "Median", "Best", "Obs/sec", "Iters") - printf("%-8s %-9s %-7s %-10s %-10s %-12s %-10s\n", + printf("%-8s %-9s %-7s %10s %10s %12s %10s\n", "-----", "----", "-----", "------", "----", "-------", "-----") end @@ -101,13 +123,17 @@ end [WaveFunctionCollapse::LegacyModel, skip_legacy ? [] : legacy_sizes, "Legacy"], [WaveFunctionCollapse::Model, new_sizes, "New"] ].each do |klass, sizes, label| - sizes.each do |s| + sizes.each do |(w, h)| + cells = w * h + grid = "#{w}x#{h}" + printf("%-8s %-9s %-7d ", label, grid, cells) + $stdout.flush times = [] iters_seen = 0 gc_deltas = [] runs.times do |r| - srand(s * 1000 + r + 1) - elapsed, iters, gc = run_once(klass, tiles, s, s) + srand(cells * 1000 + r + 1) + elapsed, iters, gc = run_once(klass, tiles, w, h) times << elapsed iters_seen = iters gc_deltas << gc @@ -116,13 +142,13 @@ end best = times.min if show_gc avg = ->(k) { gc_deltas.sum { |g| g[k] } / gc_deltas.size } - printf("%-8s %-9s %-7d %9.3fs %9.3fs %12.0f %10d %12d %14d %8d %8d\n", - label, "#{s}x#{s}", s * s, med, best, iters_seen / med, iters_seen, + printf("%9.3fs %9.3fs %12.0f %10d %12d %14d %8d %8d\n", + med, best, iters_seen / med, iters_seen, avg.call(:total_allocated_objects), avg.call(:malloc_increase_bytes), avg.call(:minor_gc_count), avg.call(:major_gc_count)) else - printf("%-8s %-9s %-7d %9.3fs %9.3fs %12.0f %10d\n", - label, "#{s}x#{s}", s * s, med, best, iters_seen / med, iters_seen) + printf("%9.3fs %9.3fs %12.0f %10d\n", + med, best, iters_seen / med, iters_seen) end end puts From 35cee4d785db85da986f89d4c58cfffd9324703c Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Wed, 27 May 2026 12:23:39 +0200 Subject: [PATCH 28/32] Split wave into chunked Fixnums for allocation-free propagation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hot path was bottlenecked on Bignum arithmetic: with 188 tiles, the wave mask was a 188-bit Bignum, so every `wave[c] & bit` and `wave[c] ^ bit` in propagate/ban/observe allocated a fresh Bignum. Profiling showed Integer#& at 31% of total wall time and ~11% in GC. Store the wave as @chunk_count parallel Fixnum arrays of 62-bit chunks (WAVE_CHUNK_BITS = 62 keeps each chunk in tagged-integer land on MRI). Precompute @propagator_chunks[d][t][ch] in the same layout so the inner loop in propagate becomes a Fixnum AND of the wave chunk with the propagator mask, iterated via `m & -m` / `m ^= lowest`. This walks exactly the tiles that still need a supporter-count decrement and skips already-banned tiles for free — no per-tile bit test, no Bignum allocations. Observe, ban, orphan_ban_pass, and prepend_empty_row updated to the same chunked iteration. rebuild_compatible_from_wave sums popcounts per chunk instead of operating on a full-width Bignum. At 64x36 with YJIT: 1.193s -> 0.438s median (~2.7x), 0 minor GCs per run, ~19k allocations per run. --- lib/wave_function_collapse/model.rb | 313 ++++++++++++++++++---------- 1 file changed, 201 insertions(+), 112 deletions(-) diff --git a/lib/wave_function_collapse/model.rb b/lib/wave_function_collapse/model.rb index 403171a..4b3a7ae 100644 --- a/lib/wave_function_collapse/model.rb +++ b/lib/wave_function_collapse/model.rb @@ -1,20 +1,27 @@ # frozen_string_literal: true module WaveFunctionCollapse - # Wave Function Collapse — bitmask wave + AC-4 compatible counter. + # Wave Function Collapse — chunked-Fixnum wave + AC-4 compatible counter. # - # Each cell's domain is a single Integer bitmask (`@wave[c]`). Adjacency is - # precomputed as `@propagator[d][t]` masks and `@propagator_lists[d][t]` - # index arrays. Supporter counts are kept in a flat byte buffer - # (`@compatible`, addressed via `setbyte`/`getbyte`) — when a count hits - # zero the tile is banned at that cell, which kicks off iterative - # propagation through an explicit stack. Entropy is maintained - # incrementally per cell. + # Each cell's domain is split across `@chunk_count` parallel Fixnum arrays + # (`@wave_chunks[ch][c]`), each chunk holding up to `WAVE_CHUNK_BITS` tiles. + # Keeping every chunk a Fixnum lets the hot propagation loop AND a wave + # chunk with a precomputed `@propagator_chunks[d][t][ch]` mask and iterate + # the resulting set bits — no Bignum allocations on the inner path. + # Supporter counts live in a flat byte buffer (`@compatible`); when a count + # hits zero the tile is banned at that cell, which kicks off iterative + # propagation through an explicit stack. Entropy is maintained incrementally + # per cell. class Model DX = [0, 1, 0, -1].freeze DY = [1, 0, -1, 0].freeze OPP = [2, 3, 0, 1].freeze + # Bits per wave chunk. MRI's Fixnum holds 62 unsigned bits before + # promoting to Bignum, so 62 keeps every wave/propagator chunk in + # tagged-integer land and every `&`/`^`/`& -m`/`bit_length` op cheap. + WAVE_CHUNK_BITS = 62 + # Upper bound on consecutive contradiction restarts before # `observe_and_propagate` gives up. Solvable tilesets almost always # succeed in one or two attempts; inherently-broken inputs would @@ -31,6 +38,7 @@ def initialize(tiles, width, height) @max_entropy = @num_tiles @cells_count = @width * @height @generation = 0 + @chunk_count = (@num_tiles + WAVE_CHUNK_BITS - 1) / WAVE_CHUNK_BITS build_propagator build_initial_state @@ -96,7 +104,11 @@ def prepend_empty_row # Shift state down in place: drop bottom row (cells 0..w-1), fill # the new top row with default values. Copying low-to-high is safe # because each source index (i + w) is greater than its destination. - shift_uniform!(@wave, shift_count, @full_mask) + ch = 0 + while ch < @chunk_count + shift_uniform!(@wave_chunks[ch], shift_count, @full_chunk_masks[ch]) + ch += 1 + end shift_uniform!(@remaining, shift_count, @num_tiles) shift_uniform!(@sum_w, shift_count, @initial_sum_w) shift_uniform!(@sum_w_log_w, shift_count, @initial_sum_w_log_w) @@ -167,6 +179,7 @@ def generate_grid def build_propagator tiles = @tiles t_max = @num_tiles + chunk_count = @chunk_count # Canonical integer ID per unique edge signature (Array of 3 ints). edge_id = {} @@ -187,10 +200,15 @@ def build_propagator # Edge per (tile, direction). Index by direction id: 0=up,1=right,2=down,3=left. edges_per_dir = [ups, rights, downs, lefts] - # propagator[d][a] = bitmask of b such that match(a, d, b) — i.e. - # tile a's edge in dir d equals tile b's edge in opposite(d). + # propagator_chunks[d][a][ch] = Fixnum mask of tiles in chunk `ch` + # such that match(a, d, b) — i.e. tile a's edge in dir d equals + # tile b's edge in opposite(d). Also keep `propagator_counts[d][a]` + # = popcount of all chunks (initial supporter count for an interior + # cell). Bignum `propagator[d][a]` is built once for + # `rebuild_compatible_from_wave` (cold path) only. propagator = ::Array.new(4) { ::Array.new(t_max, 0) } - propagator_lists = ::Array.new(4) { ::Array.new(t_max) } + propagator_chunks = ::Array.new(4) { ::Array.new(t_max) { ::Array.new(chunk_count, 0) } } + propagator_counts = ::Array.new(4) { ::Array.new(t_max, 0) } d = 0 while d < 4 @@ -201,40 +219,47 @@ def build_propagator while a < t_max my_edge = my_edges[a] mask = 0 - list = [] + count = 0 + chunks = propagator_chunks[d][a] b = 0 while b < t_max if opp_edges[b] == my_edge mask |= (1 << b) - list << b + ch = b / WAVE_CHUNK_BITS + chunks[ch] |= (1 << (b - ch * WAVE_CHUNK_BITS)) + count += 1 end b += 1 end propagator[d][a] = mask - propagator_lists[d][a] = list.freeze + propagator_counts[d][a] = count + chunks.freeze a += 1 end propagator[d].freeze - propagator_lists[d].freeze + propagator_chunks[d].each(&:freeze) + propagator_chunks[d].freeze + propagator_counts[d].freeze d += 1 end @propagator = propagator.freeze - @propagator_lists = propagator_lists.freeze + @propagator_chunks = propagator_chunks.freeze + @propagator_counts = propagator_counts.freeze # Supporter counts live in a byte buffer (`@compatible`), so any - # propagator list above 255 entries would silently wrap modulo 256 - # at build time and corrupt the AC-4 invariants — `complete?` can + # propagator count above 255 would silently wrap modulo 256 at + # build time and corrupt the AC-4 invariants — `complete?` can # even start returning true while the wave is actually contradicted. # Reject these tilesets up front rather than producing wrong output. d = 0 while d < 4 a = 0 while a < t_max - if propagator_lists[d][a].length > 255 + if propagator_counts[d][a] > 255 ::Kernel.raise( ::WaveFunctionCollapse::Error, - "tile #{a} has #{propagator_lists[d][a].length} compatible " \ + "tile #{a} has #{propagator_counts[d][a]} compatible " \ "neighbours in direction #{d}; the byte-packed supporter " \ "counter only fits 0..255" ) @@ -269,10 +294,18 @@ def build_propagator @initial_sum_w_log_w = sum_w_log_w @initial_entropy = ::Math.log(sum_w) - sum_w_log_w / sum_w - @full_mask = (1 << t_max) - 1 - # Precomputed `1 << t` per tile — saves a Bignum allocation per - # propagation inner iteration and ban call. - @bit = ::Array.new(t_max) { |t| 1 << t }.freeze + # Per-tile chunk index + Fixnum bit-within-chunk mask. Used by `ban` + # and `observe`, where iteration is by absolute tile index. + @chunk_of = ::Array.new(t_max) { |i| i / WAVE_CHUNK_BITS }.freeze + @bit_in_chunk = ::Array.new(t_max) { |i| 1 << (i - (i / WAVE_CHUNK_BITS) * WAVE_CHUNK_BITS) }.freeze + + # Full-domain mask per chunk. The last chunk only has `t_max % + # WAVE_CHUNK_BITS` tiles; everything else is 62 bits. + @full_chunk_masks = ::Array.new(chunk_count) { |ch| + bits = t_max - ch * WAVE_CHUNK_BITS + bits = WAVE_CHUNK_BITS if bits > WAVE_CHUNK_BITS + (1 << bits) - 1 + }.freeze # Precompute the 4-byte-per-tile block representing an interior cell's # initial supporter counts (one byte per direction). Used to build the @@ -282,10 +315,10 @@ def build_propagator t = 0 while t < t_max base = t * 4 - block.setbyte(base, propagator_lists[0][t].length) - block.setbyte(base + 1, propagator_lists[1][t].length) - block.setbyte(base + 2, propagator_lists[2][t].length) - block.setbyte(base + 3, propagator_lists[3][t].length) + block.setbyte(base, propagator_counts[0][t]) + block.setbyte(base + 1, propagator_counts[1][t]) + block.setbyte(base + 2, propagator_counts[2][t]) + block.setbyte(base + 3, propagator_counts[3][t]) t += 1 end @interior_block = block.freeze @@ -298,7 +331,7 @@ def build_initial_state @prop_tiles = [] # Pre-allocate per-cell state arrays once; setup_wave_state resets # them in place via Array#fill (no per-restart allocations). - @wave = ::Array.new(n) + @wave_chunks = ::Array.new(@chunk_count) { ::Array.new(n) } @remaining = ::Array.new(n) @sum_w = ::Array.new(n) @sum_w_log_w = ::Array.new(n) @@ -351,7 +384,11 @@ def setup_wave_state # Reset per-cell state in place — buffers are pre-allocated in # build_initial_state, so contradiction restarts don't churn the GC. - @wave.fill(@full_mask) + ch = 0 + while ch < @chunk_count + @wave_chunks[ch].fill(@full_chunk_masks[ch]) + ch += 1 + end @remaining.fill(t_max) @sum_w.fill(@initial_sum_w) @sum_w_log_w.fill(@initial_sum_w_log_w) @@ -426,8 +463,9 @@ def rebuild_compatible_from_wave n = @cells_count t_max = @num_tiles neighbours = @neighbours - propagator = @propagator - wave = @wave + propagator_chunks = @propagator_chunks + wave_chunks = @wave_chunks + chunk_count = @chunk_count buf = @compatible buf.replace(@compatible_fill) @@ -438,10 +476,14 @@ def rebuild_compatible_from_wave while d < 4 nc = neighbours[c * 4 + d] if nc >= 0 - wmask = wave[nc] t = 0 while t < t_max - cnt = popcount(propagator[d][t] & wmask) + cnt = 0 + ch = 0 + while ch < chunk_count + cnt += popcount(propagator_chunks[d][t][ch] & wave_chunks[ch][nc]) + ch += 1 + end cnt = 255 if cnt > 255 buf.setbyte((c * t_max + t) * 4 + d, cnt) t += 1 @@ -457,23 +499,29 @@ def orphan_ban_pass n = @cells_count t_max = @num_tiles compatible = @compatible - wave = @wave - bit_table = @bit + wave_chunks = @wave_chunks + chunk_count = @chunk_count c = 0 while c < n - t = 0 - while t < t_max - if (wave[c] & bit_table[t]) != 0 - base = (c * t_max + t) * 4 + base_c = c * t_max * 4 + ch = 0 + while ch < chunk_count + v = wave_chunks[ch][c] + tile_offset = ch * WAVE_CHUNK_BITS + while v != 0 + lowest = v & -v + t = tile_offset + lowest.bit_length - 1 + base = base_c + (t << 2) if compatible.getbyte(base) == 0 || compatible.getbyte(base + 1) == 0 || compatible.getbyte(base + 2) == 0 || compatible.getbyte(base + 3) == 0 ban(c, t) end + v ^= lowest end - t += 1 + ch += 1 end c += 1 end @@ -531,55 +579,74 @@ def find_lowest_entropy_cell end def observe(c) - wmask = @wave[c] total = @sum_w[c] r = ::Kernel.rand * total weights = @weights - bit_table = @bit - t_max = @num_tiles + wave_chunks = @wave_chunks + chunk_count = @chunk_count + chosen = -1 - t = 0 - while t < t_max - if (wmask & bit_table[t]) != 0 + ch = 0 + while ch < chunk_count + v = wave_chunks[ch][c] + tile_offset = ch * WAVE_CHUNK_BITS + while v != 0 + lowest = v & -v + t = tile_offset + lowest.bit_length - 1 r -= weights[t] if r <= 0 chosen = t break end + v ^= lowest end - t += 1 + break if chosen >= 0 + ch += 1 end if chosen < 0 - # Floating-point edge: pick the last set bit in wmask. - t = t_max - 1 - while t >= 0 - if (wmask & bit_table[t]) != 0 - chosen = t + # Floating-point edge: pick the highest set bit across chunks. + ch = chunk_count - 1 + while ch >= 0 + v = wave_chunks[ch][c] + if v != 0 + chosen = ch * WAVE_CHUNK_BITS + v.bit_length - 1 break end - t -= 1 + ch -= 1 end end - # Ban every other tile at this cell. - t = 0 - while t < t_max - if t != chosen && (wmask & bit_table[t]) != 0 - ban(c, t) - return if @contradiction + # Ban every other tile at this cell. Snapshot chunks before iterating + # because `ban` mutates them in place — we want to ban every tile that + # was alive *before* this observation, not the shrinking set. + ch = 0 + while ch < chunk_count + v = wave_chunks[ch][c] + tile_offset = ch * WAVE_CHUNK_BITS + while v != 0 + lowest = v & -v + t = tile_offset + lowest.bit_length - 1 + if t != chosen + ban(c, t) + return if @contradiction + end + v ^= lowest end - t += 1 + ch += 1 end end def ban(c, t) - bit = @bit[t] - wave = @wave - return if (wave[c] & bit) == 0 - - wave[c] = wave[c] ^ bit + ch = @chunk_of[t] + b = @bit_in_chunk[t] + wave_ch = @wave_chunks[ch] + v = wave_ch[c] + return if (v & b) == 0 + + v ^= b + wave_ch[c] = v @remaining[c] -= 1 w = @weights[t] @@ -598,23 +665,32 @@ def ban(c, t) if r == 1 @uncollapsed_count -= 1 - # Single bit left — `bit_length` returns its position + 1, so - # subtracting one gives the tile index in O(1) instead of - # scanning the mask bit by bit. - @chosen_tile[c] = wave[c].bit_length - 1 + @chosen_tile[c] = find_single_tile(c) end @prop_cells.push(c) @prop_tiles.push(t) end + # Locate the single remaining tile across `@wave_chunks` for cell `c`. + # Only called when `@remaining[c]` just dropped to 1, so exactly one + # chunk has a non-zero entry with a single set bit. + def find_single_tile(c) + ch = @chunk_count - 1 + while ch >= 0 + v = @wave_chunks[ch][c] + return ch * WAVE_CHUNK_BITS + v.bit_length - 1 if v != 0 + ch -= 1 + end + -1 + end + def propagate prop_cells = @prop_cells prop_tiles = @prop_tiles - propagator_lists = @propagator_lists + propagator_chunks = @propagator_chunks compatible = @compatible - wave = @wave - bit_table = @bit + wave_chunks = @wave_chunks neighbours = @neighbours t_max = @num_tiles remaining = @remaining @@ -624,57 +700,70 @@ def propagate weights = @weights weights_log_weights = @weights_log_weights chosen_tile = @chosen_tile + chunk_count = @chunk_count + t_max4 = t_max * 4 + chunk_bits = WAVE_CHUNK_BITS until prop_cells.empty? return if @contradiction t = prop_tiles.pop c = prop_cells.pop + c4 = c * 4 + prop_dir = propagator_chunks d = 0 while d < 4 - nc = neighbours[c * 4 + d] + nc = neighbours[c4 + d] if nc >= 0 - list = propagator_lists[d][t] opp_d = OPP[d] - i = 0 - len = list.length - while i < len - tp = list[i] - bit_tp = bit_table[tp] - # Skip tiles already banned at the neighbour — decrementing - # their supporter count would silently wrap past zero and - # waste work. - if (wave[nc] & bit_tp) != 0 - idx = (nc * t_max + tp) * 4 + opp_d - count = compatible.getbyte(idx) - 1 - compatible.setbyte(idx, count) - if count == 0 - # Inlined fast-path of `ban(nc, tp)`. We already know - # `bit_tp` is set in `wave[nc]` from the check above, - # so the redundant gate in `ban` is skipped. The same - # state updates run; `ban` itself stays callable for - # `orphan_ban_pass` and `observe` where the gate is - # still needed. - wave[nc] = wave[nc] ^ bit_tp - new_remaining = remaining[nc] - 1 - remaining[nc] = new_remaining - sum_w[nc] -= weights[tp] - sum_w_log_w[nc] -= weights_log_weights[tp] - if new_remaining == 0 - @contradiction = true - return - end - s = sum_w[nc] - entropies[nc] = ::Math.log(s) - sum_w_log_w[nc] / s - if new_remaining == 1 - @uncollapsed_count -= 1 - chosen_tile[nc] = wave[nc].bit_length - 1 + nc_base = nc * t_max4 + opp_d + prop_dt = prop_dir[d][t] + + ch = 0 + while ch < chunk_count + prop_mask = prop_dt[ch] + if prop_mask != 0 + wave_ch = wave_chunks[ch] + # Intersection: tiles that are both still alive at the + # neighbour and compatible with originating tile t in + # direction d. Iterating set bits of `m` walks exactly + # the tiles that need a supporter-count decrement — no + # per-tile bit test, no work for already-banned tiles. + m = wave_ch[nc] & prop_mask + tile_offset = ch * chunk_bits + while m != 0 + lowest = m & -m + tp = tile_offset + lowest.bit_length - 1 + idx = nc_base + (tp << 2) + count = compatible.getbyte(idx) - 1 + compatible.setbyte(idx, count) + if count == 0 + # Inlined fast-path of `ban(nc, tp)`. We know the bit + # is set in this chunk (from the intersection), so + # XOR-clearing it is correct without re-testing. + new_chunk = wave_ch[nc] ^ lowest + wave_ch[nc] = new_chunk + new_remaining = remaining[nc] - 1 + remaining[nc] = new_remaining + sum_w[nc] -= weights[tp] + sum_w_log_w[nc] -= weights_log_weights[tp] + if new_remaining == 0 + @contradiction = true + return + end + s = sum_w[nc] + entropies[nc] = ::Math.log(s) - sum_w_log_w[nc] / s + if new_remaining == 1 + @uncollapsed_count -= 1 + chosen_tile[nc] = find_single_tile(nc) + end + prop_cells.push(nc) + prop_tiles.push(tp) end - prop_cells.push(nc) - prop_tiles.push(tp) + m ^= lowest end end - i += 1 + ch += 1 end end d += 1 From 08d85137276e89d577713f8fa51a83b17a3c8709 Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Wed, 27 May 2026 12:26:47 +0200 Subject: [PATCH 29/32] Merge entropy and noise into a single array for the lowest-entropy scan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Noise (random jitter for tie-breaking among equal entropies) is set once at setup and never changes. The find_lowest_entropy_cell scan was reading both arrays and adding them every iteration — O(n) cells per observe, ~3.7M cell scans total on a 64x36 solve. Store `entropy + noise` directly in @entropy_noise and update it whenever the entropy itself changes (ban + inlined ban in propagate). @noise stays around so the addition can be recomputed when the entropy is updated, but find_lowest_entropy_cell now reads one array per cell instead of two. At 64x36 over 30 runs: median 0.426s, best 0.399s. --- lib/wave_function_collapse/model.rb | 39 +++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/lib/wave_function_collapse/model.rb b/lib/wave_function_collapse/model.rb index 4b3a7ae..023fff4 100644 --- a/lib/wave_function_collapse/model.rb +++ b/lib/wave_function_collapse/model.rb @@ -112,7 +112,6 @@ def prepend_empty_row shift_uniform!(@remaining, shift_count, @num_tiles) shift_uniform!(@sum_w, shift_count, @initial_sum_w) shift_uniform!(@sum_w_log_w, shift_count, @initial_sum_w_log_w) - shift_uniform!(@entropies, shift_count, @initial_entropy) # When the tileset has a single tile every cell is born collapsed, # so the new row's chosen_tile must point at tile 0 rather than the # generic "uncollapsed" sentinel — mirroring the t_max==1 branch in @@ -121,13 +120,22 @@ def prepend_empty_row shift_uniform!(@chosen_tile, shift_count, (@num_tiles == 1) ? 0 : -1) noise = @noise + entropy_noise = @entropy_noise + initial_entropy = @initial_entropy i = 0 + # Carry both the noise and the merged `entropy + noise` value down + # so existing rows keep their evolved entropies, then mint fresh + # noise (and corresponding initial entropy_noise) for the new top + # rows. while i < shift_count noise[i] = noise[i + w] + entropy_noise[i] = entropy_noise[i + w] i += 1 end while i < n - noise[i] = ::Kernel.rand * 1e-6 + nz = ::Kernel.rand * 1e-6 + noise[i] = nz + entropy_noise[i] = initial_entropy + nz i += 1 end @@ -335,7 +343,12 @@ def build_initial_state @remaining = ::Array.new(n) @sum_w = ::Array.new(n) @sum_w_log_w = ::Array.new(n) - @entropies = ::Array.new(n) + # `entropy + noise` for find_lowest_entropy_cell. Maintained + # eagerly on every ban so the lowest-entropy scan reads a single + # array. Noise (jitter for tie-breaking) is baked in once at setup + # and stays constant per cell for the run, so the addition only + # has to happen when the entropy itself changes. + @entropy_noise = ::Array.new(n) @noise = ::Array.new(n) @chosen_tile = ::Array.new(n) build_neighbours @@ -392,11 +405,15 @@ def setup_wave_state @remaining.fill(t_max) @sum_w.fill(@initial_sum_w) @sum_w_log_w.fill(@initial_sum_w_log_w) - @entropies.fill(@initial_entropy) @chosen_tile.fill(-1) + noise = @noise + entropy_noise = @entropy_noise + initial_entropy = @initial_entropy i = 0 while i < n - @noise[i] = ::Kernel.rand * 1e-6 + nz = ::Kernel.rand * 1e-6 + noise[i] = nz + entropy_noise[i] = initial_entropy + nz i += 1 end # When the tileset has a single tile every cell is born collapsed, @@ -560,14 +577,13 @@ def observe_and_propagate def find_lowest_entropy_cell n = @cells_count remaining = @remaining - entropies = @entropies - noise = @noise + entropy_noise = @entropy_noise best_c = -1 best_e = ::Float::INFINITY c = 0 while c < n if remaining[c] > 1 - e = entropies[c] + noise[c] + e = entropy_noise[c] if e < best_e best_e = e best_c = c @@ -661,7 +677,7 @@ def ban(c, t) end s = @sum_w[c] - @entropies[c] = ::Math.log(s) - @sum_w_log_w[c] / s + @entropy_noise[c] = ::Math.log(s) - @sum_w_log_w[c] / s + @noise[c] if r == 1 @uncollapsed_count -= 1 @@ -696,7 +712,8 @@ def propagate remaining = @remaining sum_w = @sum_w sum_w_log_w = @sum_w_log_w - entropies = @entropies + entropy_noise = @entropy_noise + noise = @noise weights = @weights weights_log_weights = @weights_log_weights chosen_tile = @chosen_tile @@ -752,7 +769,7 @@ def propagate return end s = sum_w[nc] - entropies[nc] = ::Math.log(s) - sum_w_log_w[nc] / s + entropy_noise[nc] = ::Math.log(s) - sum_w_log_w[nc] / s + noise[nc] if new_remaining == 1 @uncollapsed_count -= 1 chosen_tile[nc] = find_single_tile(nc) From 1d93616484211c601b10a43ee8f29a4951d2c4ad Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Wed, 27 May 2026 12:33:57 +0200 Subject: [PATCH 30/32] Re-enable GC in bin/run MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The disable was a workaround for the original solver's heavy Bignum allocation churn during propagation — pausing GC traded memory growth for smoother frames. With the chunked-Fixnum wave the hot path allocates almost nothing (a 64x36 solve runs with zero minor GCs), so GC has nothing to do during a solve and leaving it disabled just lets the process bloat over long sessions. --- bin/run | 3 --- 1 file changed, 3 deletions(-) diff --git a/bin/run b/bin/run index 404fb19..7f09d76 100755 --- a/bin/run +++ b/bin/run @@ -6,7 +6,4 @@ $LOAD_PATH.unshift File.expand_path("../lib", __dir__) require "bundler/setup" require "wave_function_collapse" -# Disable GC - -GC.disable WaveFunctionCollapse::Window.new.show From dd5350b830a124ebd493c5710af20a3c65327446 Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Wed, 27 May 2026 13:04:14 +0200 Subject: [PATCH 31/32] Split compatible per direction and tighten propagate inner loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three independent wins, primarily helping the non-YJIT interpreted path: - Compatible-count storage was a single interleaved String of bytes indexed `(c * t_max + t) * 4 + d`. The inner loop's setbyte/getbyte calls are ~30% slower than Array `[]`/`[]=` in interpreted Ruby (per microbench). Split into four parallel Arrays of Fixnums, one per direction, indexed by the flat `c * t_max + t`. The inner loop no longer needs the `<<2` and `+ opp_d` arithmetic. - Pre-permute by `OPP` into `@compatible_per_opp` so the propagation loop reads `compatible_per_opp[d]` directly instead of `compatible_per_dir[OPP[d]]` — one Array#[] instead of two. - Replace the `m ^= lowest` advance with `m -= lowest`. Both clear the lowest set bit (we just extracted it), but the subtract form is consistently faster in the interpreter on this branch. The trade-off: ~14 MB of compatible storage instead of 1.7 MB (Fixnum slot is 8 bytes vs 1 byte) — still well within L3 on modern hardware. 64x36 timings: - Non-YJIT: 1.326s → ~1.11s median (16% faster) - YJIT: 0.426s → ~0.418s median (within noise) --- lib/wave_function_collapse/model.rb | 162 +++++++++++++++------------- 1 file changed, 87 insertions(+), 75 deletions(-) diff --git a/lib/wave_function_collapse/model.rb b/lib/wave_function_collapse/model.rb index 023fff4..dbb7f10 100644 --- a/lib/wave_function_collapse/model.rb +++ b/lib/wave_function_collapse/model.rb @@ -315,21 +315,6 @@ def build_propagator (1 << bits) - 1 }.freeze - # Precompute the 4-byte-per-tile block representing an interior cell's - # initial supporter counts (one byte per direction). Used to build the - # @compatible buffer quickly. Sized via a single fill string, then - # written by setbyte — avoids 4*t_max one-byte `.chr` allocations. - block = "\x00".b * (t_max * 4) - t = 0 - while t < t_max - base = t * 4 - block.setbyte(base, propagator_counts[0][t]) - block.setbyte(base + 1, propagator_counts[1][t]) - block.setbyte(base + 2, propagator_counts[2][t]) - block.setbyte(base + 3, propagator_counts[3][t]) - t += 1 - end - @interior_block = block.freeze end def build_initial_state @@ -353,11 +338,16 @@ def build_initial_state @chosen_tile = ::Array.new(n) build_neighbours build_initial_compatible_template - # Persistent supporter-count buffer: sized once, reset via - # String#replace on every restart so we never allocate a fresh - # n*t_max*4-byte String on a contradiction. - @compatible = ::String.new(capacity: @cells_count * @num_tiles * 4, encoding: ::Encoding::BINARY) - @compatible << @initial_compatible + # Supporter-count storage. Split per direction so the inner + # propagation loop can index by a flat `(c * t_max + t)` integer + # (no `<<2`, no `+ opp_d`). Each is pre-allocated and reset in + # place via Array#replace on every restart — no per-restart + # allocations. + @compatible_per_dir = ::Array.new(4) { ::Array.new(@cells_count * @num_tiles) } + # Same arrays in OPP order so the propagation loop can do a single + # array lookup per direction (`compatible_per_opp[d]`) rather than + # `compatible_per_dir[OPP[d]]` — two Array#[] reads per direction. + @compatible_per_opp = OPP.map { |opp_d| @compatible_per_dir[opp_d] }.freeze end # Precompute the neighbour cell index for every (cell, direction) pair, @@ -429,51 +419,56 @@ def setup_wave_state @prop_cells.clear @prop_tiles.clear - @compatible.replace(@initial_compatible) + d = 0 + while d < 4 + @compatible_per_dir[d].replace(@initial_compatible_per_dir[d]) + d += 1 + end orphan_ban_pass propagate @generation += 1 end - # The initial supporter-count buffer is fully determined by the tileset - # and grid dimensions, so build it once and `dup` per run instead of - # repeating the border-patch pass on every contradiction restart. + # The initial supporter-count arrays are fully determined by the tileset + # and grid dimensions, so build them once and `replace` per run instead + # of repeating the border-patch pass on every contradiction restart. + # One Array per direction; index by `c * t_max + t`. def build_initial_compatible_template n = @cells_count t_max = @num_tiles neighbours = @neighbours - - buf = ::String.new(::String.new.b, capacity: n * t_max * 4) - buf.force_encoding(::Encoding::BINARY) - c = 0 - while c < n - buf << @interior_block - c += 1 - end - - # Patch border cells: missing directions get sentinel 255. - c = 0 - while c < n - d = 0 - while d < 4 + propagator_counts = @propagator_counts + + @initial_compatible_per_dir = ::Array.new(4) do |d| + counts = propagator_counts[d] + arr = ::Array.new(n * t_max) + c = 0 + while c < n + base = c * t_max if neighbours[c * 4 + d] < 0 - base = (c * t_max) * 4 + d + # Missing neighbour in this direction: sentinel 255 so + # `orphan_ban_pass` can't see a zero and incorrectly ban + # tiles at the boundary. + t = 0 + while t < t_max + arr[base + t] = 255 + t += 1 + end + else t = 0 while t < t_max - buf.setbyte(base + t * 4, 255) + arr[base + t] = counts[t] t += 1 end end - d += 1 + c += 1 end - c += 1 + arr.freeze end - - @initial_compatible = buf.freeze - # Frozen 0xFF-filled sentinel of the same size, reused by - # rebuild_compatible_from_wave to clear @compatible in place - # without allocating an intermediate fill string. - @compatible_fill = ("\xff".b * (n * t_max * 4)).freeze + @initial_compatible_per_dir.freeze + # Sentinel 255-filled array reused by rebuild_compatible_from_wave + # to clear arrays in place without allocating an intermediate. + @compatible_fill = ::Array.new(n * t_max, 255).freeze end def rebuild_compatible_from_wave @@ -483,9 +478,13 @@ def rebuild_compatible_from_wave propagator_chunks = @propagator_chunks wave_chunks = @wave_chunks chunk_count = @chunk_count - buf = @compatible + compatible_per_dir = @compatible_per_dir - buf.replace(@compatible_fill) + d = 0 + while d < 4 + compatible_per_dir[d].replace(@compatible_fill) + d += 1 + end c = 0 while c < n @@ -493,6 +492,8 @@ def rebuild_compatible_from_wave while d < 4 nc = neighbours[c * 4 + d] if nc >= 0 + compat_d = compatible_per_dir[d] + base_c = c * t_max t = 0 while t < t_max cnt = 0 @@ -502,7 +503,7 @@ def rebuild_compatible_from_wave ch += 1 end cnt = 255 if cnt > 255 - buf.setbyte((c * t_max + t) * 4 + d, cnt) + compat_d[base_c + t] = cnt t += 1 end end @@ -515,13 +516,17 @@ def rebuild_compatible_from_wave def orphan_ban_pass n = @cells_count t_max = @num_tiles - compatible = @compatible + compatible_per_dir = @compatible_per_dir + compat_d0 = compatible_per_dir[0] + compat_d1 = compatible_per_dir[1] + compat_d2 = compatible_per_dir[2] + compat_d3 = compatible_per_dir[3] wave_chunks = @wave_chunks chunk_count = @chunk_count c = 0 while c < n - base_c = c * t_max * 4 + base_c = c * t_max ch = 0 while ch < chunk_count v = wave_chunks[ch][c] @@ -529,11 +534,11 @@ def orphan_ban_pass while v != 0 lowest = v & -v t = tile_offset + lowest.bit_length - 1 - base = base_c + (t << 2) - if compatible.getbyte(base) == 0 || - compatible.getbyte(base + 1) == 0 || - compatible.getbyte(base + 2) == 0 || - compatible.getbyte(base + 3) == 0 + base = base_c + t + if compat_d0[base] == 0 || + compat_d1[base] == 0 || + compat_d2[base] == 0 || + compat_d3[base] == 0 ban(c, t) end v ^= lowest @@ -705,7 +710,7 @@ def propagate prop_cells = @prop_cells prop_tiles = @prop_tiles propagator_chunks = @propagator_chunks - compatible = @compatible + compatible_per_opp = @compatible_per_opp wave_chunks = @wave_chunks neighbours = @neighbours t_max = @num_tiles @@ -718,7 +723,6 @@ def propagate weights_log_weights = @weights_log_weights chosen_tile = @chosen_tile chunk_count = @chunk_count - t_max4 = t_max * 4 chunk_bits = WAVE_CHUNK_BITS until prop_cells.empty? @@ -726,40 +730,44 @@ def propagate t = prop_tiles.pop c = prop_cells.pop c4 = c * 4 - prop_dir = propagator_chunks d = 0 while d < 4 nc = neighbours[c4 + d] if nc >= 0 - opp_d = OPP[d] - nc_base = nc * t_max4 + opp_d - prop_dt = prop_dir[d][t] + # Single Array indexed by `nc * t_max + tp` for this + # direction's supporter counts. Avoids the `<<2` + `+opp_d` + # arithmetic the interleaved-byte-buffer layout required. + compat = compatible_per_opp[d] + nc_base = nc * t_max + prop_dt = propagator_chunks[d][t] ch = 0 while ch < chunk_count prop_mask = prop_dt[ch] if prop_mask != 0 wave_ch = wave_chunks[ch] + wnc = wave_ch[nc] # Intersection: tiles that are both still alive at the # neighbour and compatible with originating tile t in # direction d. Iterating set bits of `m` walks exactly - # the tiles that need a supporter-count decrement — no - # per-tile bit test, no work for already-banned tiles. - m = wave_ch[nc] & prop_mask + # the tiles that need a supporter-count decrement. + m = wnc & prop_mask tile_offset = ch * chunk_bits + nc_base_ch = nc_base + tile_offset while m != 0 lowest = m & -m - tp = tile_offset + lowest.bit_length - 1 - idx = nc_base + (tp << 2) - count = compatible.getbyte(idx) - 1 - compatible.setbyte(idx, count) + bit_pos = lowest.bit_length - 1 + idx = nc_base_ch + bit_pos + count = compat[idx] - 1 + compat[idx] = count if count == 0 # Inlined fast-path of `ban(nc, tp)`. We know the bit # is set in this chunk (from the intersection), so - # XOR-clearing it is correct without re-testing. - new_chunk = wave_ch[nc] ^ lowest - wave_ch[nc] = new_chunk + # subtracting the single-bit `lowest` flips it off. + wnc -= lowest + wave_ch[nc] = wnc + tp = tile_offset + bit_pos new_remaining = remaining[nc] - 1 remaining[nc] = new_remaining sum_w[nc] -= weights[tp] @@ -777,7 +785,11 @@ def propagate prop_cells.push(nc) prop_tiles.push(tp) end - m ^= lowest + # `m -= lowest` clears the lowest set bit (the bit we + # just processed) without touching the others — same + # effect as `m ^= lowest` but consistently faster in + # the interpreter on this branch. + m -= lowest end end ch += 1 From 9ea443cc5640fab0dcf8b5c86ab6623885e4f09b Mon Sep 17 00:00:00 2001 From: Piotr Usewicz Date: Wed, 27 May 2026 13:06:39 +0200 Subject: [PATCH 32/32] Add ZJIT status --- bin/benchmark | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/benchmark b/bin/benchmark index c64b139..48e5574 100755 --- a/bin/benchmark +++ b/bin/benchmark @@ -73,8 +73,9 @@ if ENV["CI"] end yjit_status = defined?(RubyVM::YJIT) && RubyVM::YJIT.enabled? ? "on" : "off" +zjit_status = defined?(RubyVM::ZJIT) && RubyVM::ZJIT.enabled? ? "on" : "off" puts RUBY_DESCRIPTION -puts "Tiles loaded: #{tiles.size}, YJIT=#{yjit_status}" +puts "Tiles loaded: #{tiles.size}, YJIT=#{yjit_status}, ZJIT=#{zjit_status}" puts # Accepts either "N" (treated as NxN) or "WxH" entries.