Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions lib/aerospike/cluster.rb
Original file line number Diff line number Diff line change
Expand Up @@ -369,13 +369,17 @@ def launch_tend_thread
@tend_thread = Thread.new do
Thread.current.abort_on_exception = false
loop do

# Sleep is unconditional: the previous structure put `sleep` on the
# success path of an implicit `begin/rescue`, so a persistent error
# (e.g. a poisoned `@cluster_nodes` entry) busy-looped at full CPU
# and produced ~50 errors/sec. The rescue must not skip the backoff.
begin
tend
sleep(@tend_interval / 1000.0)
rescue => e
rescue => e
Aerospike.logger.error("Exception occured during tend: #{e}")
Aerospike.logger.debug { e.backtrace.join("\n") }

end
sleep(@tend_interval / 1000.0)
end
end
end
Expand Down
13 changes: 13 additions & 0 deletions lib/aerospike/node/refresh/peers.rb
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,19 @@ def call(node, peers)
begin
nv = NodeValidator.new(cluster, host, cluster.connection_timeout, cluster.cluster_name, cluster.tls_options)

# Defense-in-depth at the integration point: refuse a half-initialized
# validator before it reaches `cluster.create_node`. A `nv` with empty
# aliases produces a `Node` with `@host = nil` that crashes every
# subsequent tend cycle. Must run before the mismatch check below — the
# `name == peer.node_name && aliases.empty?` case slips past it cleanly.
if nv.name.nil? || nv.aliases.empty?
::Aerospike.logger.warn(
"Skipping peer #{peer.node_name} at #{host}: validator returned " \
"name=#{nv.name.inspect} aliases=#{nv.aliases.size}"
)
next
end

if nv.name != peer.node_name
::Aerospike.logger.warn("Peer node #{peer.node_name} is different than actual node #{nv.name} for host #{host}");
# Must look for new node name in the unlikely event that node names do not agree.
Expand Down
13 changes: 11 additions & 2 deletions lib/aerospike/node_validator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,15 @@ def initialize(cluster, host, timeout, cluster_name, tls_options = {})
resolve(host.name).each do |address|
@aliases += get_hosts(address)
end

# Reject a half-initialized validator: `get_hosts` swallows connection
# errors silently, so without this guard a degenerate `nv` (no name, no
# aliases) escapes to `Cluster#create_node` and produces a `Node` with
# `@host = nil` that crashes on the next tend cycle.
if @name.nil? || @aliases.empty?
raise ::Aerospike::Exceptions::InvalidNode,
"Failed to validate node at #{host}: name=#{@name.inspect} aliases=#{@aliases.size}"
end
end

private
Expand Down Expand Up @@ -66,8 +75,8 @@ def get_hosts(address)
end

res = aliases.map { |al| Host.new(al[:address], al[:port], host.tls_name) }
rescue
# we don't care about the actual connection error; Just need to continue
rescue => e
::Aerospike.logger.debug { "NodeValidator: get_hosts(#{address}) failed: #{e.class}: #{e.message}" }
ensure
conn.close if conn
end
Expand Down
33 changes: 33 additions & 0 deletions spec/aerospike/cluster_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -144,4 +144,37 @@
it { is_expected.to be false }
end
end

describe '#launch_tend_thread (Fix C: rescue path must not skip sleep)' do
subject(:launch!) { instance.send(:launch_tend_thread) }

let(:tend_calls) { ::Aerospike::Atomic.new(0) }
let(:sleep_calls) { ::Aerospike::Atomic.new(0) }

before do
instance.instance_variable_set(:@tend_interval, 1)
allow(instance).to receive(:tend) do
tend_calls.update { |c| c + 1 }
raise StandardError, 'persistent tend failure'
end
allow(instance).to receive(:sleep) do |_|
calls = sleep_calls.update { |c| c + 1 }
Thread.exit if calls >= 3
end
allow(::Aerospike.logger).to receive(:error)
allow(::Aerospike.logger).to receive(:debug)
end

after do
instance.instance_variable_get(:@tend_thread)&.kill
end

it 'sleeps once per iteration even when tend raises persistently (no busy-loop)' do
launch!
instance.instance_variable_get(:@tend_thread).join(2)

expect(sleep_calls.value).to eq(tend_calls.value)
expect(sleep_calls.value).to be >= 3
end
end
end
152 changes: 152 additions & 0 deletions spec/aerospike/node/refresh/peers_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
# frozen_string_literal: true

# Copyright 2026 Aerospike, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.

RSpec.describe Aerospike::Node::Refresh::Peers do
subject(:call!) { described_class.call(node, peers) }

let(:node) { double('node') }
let(:cluster) { double('cluster') }
let(:tend_connection) { double('tend_connection') }
let(:peers) { ::Aerospike::Peers.new }
let(:peers_generation) { ::Aerospike::Node::Generation.new }
let(:peers_count) { ::Aerospike::Atomic.new(0) }

let(:peer_node_name) { 'BB92118BAB14E12' }
let(:peer_host) { ::Aerospike::Host.new('172.31.73.252', 3000) }
let(:peer) do
::Aerospike::Peer.new.tap do |p|
p.node_name = peer_node_name
p.hosts = [peer_host]
end
end

let(:fetch_collection) do
instance_double(
::Aerospike::Peers::Parse::Object,
generation: 1,
peers: [peer]
)
end

before do
allow(node).to receive(:cluster).and_return(cluster)
allow(node).to receive(:tend_connection).and_return(tend_connection)
allow(node).to receive(:peers_count).and_return(peers_count)
allow(node).to receive(:peers_generation).and_return(peers_generation)
allow(node).to receive(:failures).and_return(::Aerospike::Atomic.new(0))
allow(node).to receive(:active?).and_return(true)
allow(node).to receive(:name).and_return('node-self')

allow(cluster).to receive(:connection_timeout).and_return(1000)
allow(cluster).to receive(:cluster_name).and_return('test')
allow(cluster).to receive(:tls_options).and_return({})
allow(cluster).to receive(:find_node_by_name).and_return(nil)
allow(cluster).to receive(:create_node)

allow(::Aerospike::Peers::Fetch).to receive(:call).and_return(fetch_collection)
allow(::Aerospike::Cluster::FindNode).to receive(:call).and_return(nil)
allow(::Aerospike::Node::Refresh::Failed).to receive(:call)
allow(::Aerospike.logger).to receive(:warn)
end

context 'happy path: validator returns matching name and non-empty aliases' do
let(:new_node) { instance_double(::Aerospike::Node) }
let(:nv) do
instance_double(
::Aerospike::NodeValidator,
name: peer_node_name,
aliases: [peer_host]
)
end

before do
allow(::Aerospike::NodeValidator).to receive(:new).and_return(nv)
allow(cluster).to receive(:create_node).with(nv).and_return(new_node)
call!
end

it 'creates the node and stores it in peers under the validator name' do
expect(cluster).to have_received(:create_node).with(nv)
expect(peers.nodes[peer_node_name]).to eq(new_node)
expect(peers_generation.changed?).to be(true)
end
end

context 'guard fires when validator name is nil' do
let(:nv) do
instance_double(::Aerospike::NodeValidator, name: nil, aliases: [])
end

before do
allow(::Aerospike::NodeValidator).to receive(:new).and_return(nv)
call!
end

it 'skips create_node, leaves peers.nodes empty, and warns' do
expect(cluster).not_to have_received(:create_node)
expect(peers.nodes).to be_empty
expect(::Aerospike.logger).to have_received(:warn).with(/Skipping peer.*name=nil/)
end
end

context 'guard fires when name matches but aliases is empty (path 4)' do
let(:nv) do
instance_double(::Aerospike::NodeValidator, name: peer_node_name, aliases: [])
end

before do
allow(::Aerospike::NodeValidator).to receive(:new).and_return(nv)
call!
end

it 'still skips create_node — mismatch check below cannot catch this' do
expect(cluster).not_to have_received(:create_node)
expect(peers.nodes).to be_empty
expect(::Aerospike.logger).to have_received(:warn).with(/aliases=0/)
end
end

context 'continues iterating peer.hosts after a guarded skip (next, not break)' do
let(:peer_host_b) { ::Aerospike::Host.new('172.31.73.253', 3000) }
let(:peer) do
::Aerospike::Peer.new.tap do |p|
p.node_name = peer_node_name
p.hosts = [peer_host, peer_host_b]
end
end

let(:bad_nv) do
instance_double(::Aerospike::NodeValidator, name: nil, aliases: [])
end
let(:good_nv) do
instance_double(::Aerospike::NodeValidator, name: peer_node_name, aliases: [peer_host_b])
end
let(:new_node) { instance_double(::Aerospike::Node) }

before do
allow(::Aerospike::NodeValidator).to receive(:new) do |_, host, *|
host == peer_host ? bad_nv : good_nv
end
allow(cluster).to receive(:create_node).with(good_nv).and_return(new_node)
call!
end

it 'validates the second host and stores its node' do
expect(cluster).to have_received(:create_node).with(good_nv)
expect(peers.nodes[peer_node_name]).to eq(new_node)
end
end
end
48 changes: 34 additions & 14 deletions spec/aerospike/node_validator_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,9 @@
let(:hosts) { [::Aerospike::Host.new('127.0.0.1', '3000')] }

before do
allow(socket).to receive(:write).and_return(nil)
allow(socket).to receive(:read).and_return(nil)
allow(socket).to receive(:timeout=).and_return(nil)
expect(::Aerospike::Cluster::CreateConnection).to receive(:call).and_return(socket)
allow(::Aerospike::Cluster::CreateConnection).to receive(:call).and_return(socket)
allow(::Aerospike::Info).to receive(:request).and_return({ 'node' => 'test-node' })
end

it { expect(validator.aliases).to eq(hosts) }
Expand All @@ -35,7 +34,7 @@
allow(socket).to receive(:timeout=).and_return(nil)
allow(::Aerospike::Cluster::CreateConnection).to receive(:call).and_return(socket)

expect(::Aerospike::Info).to receive(:request).and_return(
allow(::Aerospike::Info).to receive(:request).and_return(
{
'node' => 'test-node',
'service-clear-std' => '192.168.1.1:3000'
Expand All @@ -53,15 +52,13 @@
before do
allow(socket).to receive(:timeout=).and_return(nil)
allow(::Aerospike::Cluster::CreateConnection).to receive(:call).and_return(socket)
expect(Resolv).to receive(:getaddresses).and_return(['101.1.1.1', '102.1.1.1'])
allow(Resolv).to receive(:getaddresses).and_return(['101.1.1.1', '102.1.1.1'])

expect(::Aerospike::Info).to receive(:request).and_return(
allow(::Aerospike::Info).to receive(:request).and_return(
{
'node' => 'test-node',
'service-clear-std' => '101.1.1.2:3002,101.1.1.3:3003'
}
)
expect(::Aerospike::Info).to receive(:request).and_return(
},
{
'node' => 'test-node',
'service-clear-std' => '102.1.1.2:3002,102.1.1.3:3003'
Expand All @@ -84,15 +81,13 @@
before do
allow(socket).to receive(:timeout=).and_return(nil)
allow(::Aerospike::Cluster::CreateConnection).to receive(:call).and_return(socket)
expect(Resolv).to receive(:getaddresses).and_return(['101.1.1.1', '102.1.1.1'])
allow(Resolv).to receive(:getaddresses).and_return(['101.1.1.1', '102.1.1.1'])

expect(::Aerospike::Info).to receive(:request).and_return(
allow(::Aerospike::Info).to receive(:request).and_return(
{
'node' => 'test-node',
'service-tls-std' => '101.1.1.2:3002,101.1.1.3:3003'
}
)
expect(::Aerospike::Info).to receive(:request).and_return(
},
{
'node' => 'test-node',
'service-tls-std' => '102.1.1.2:3002,102.1.1.3:3003'
Expand All @@ -107,4 +102,29 @@
)
end
end

describe 'post-condition: refuses half-initialized validators' do
let(:hosts) { [::Aerospike::Host.new('192.168.1.1', '3000')] }

before do
allow(socket).to receive(:timeout=).and_return(nil)
allow(::Aerospike::Cluster::CreateConnection).to receive(:call).and_return(socket)
end

context 'when get_hosts fails before assigning @name' do
before { allow(::Aerospike::Info).to receive(:request).and_raise(Errno::ETIMEDOUT) }

it 'raises InvalidNode' do
expect { validator }.to raise_error(::Aerospike::Exceptions::InvalidNode, /name=nil aliases=0/)
end
end

context 'when @name is set but aliases stay empty (path 4)' do
before { allow(::Aerospike::Info).to receive(:request).and_return({ 'node' => 'test-node' }) }

it 'raises InvalidNode even though @name was assigned' do
expect { validator }.to raise_error(::Aerospike::Exceptions::InvalidNode, /aliases=0/)
end
end
end
end
Loading