diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0ef39ac2f..441e2948a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -126,7 +126,8 @@ jobs: - name: Test # Limit test threads to reduce resource contention on high-core-count runners # Without this, 64+ parallel tests cause timing-sensitive network tests to fail - run: cargo test --workspace --no-default-features --features trace,websocket,redb -- --test-threads=8 + # --nocapture shows logs in real-time instead of buffering until test completion + run: cargo test --workspace --no-default-features --features trace,websocket,redb -- --test-threads=8 --nocapture six_peer_regression: name: six-peer-regression diff --git a/crates/core/src/contract/executor.rs b/crates/core/src/contract/executor.rs index 538a100fe..826881fa1 100644 --- a/crates/core/src/contract/executor.rs +++ b/crates/core/src/contract/executor.rs @@ -462,35 +462,6 @@ impl ComposeNetworkMessage for SubscribeCont } } -#[allow(unused)] -struct PutContract { - contract: ContractContainer, - state: WrappedState, - related_contracts: RelatedContracts<'static>, -} - -impl ComposeNetworkMessage for PutContract { - fn initiate_op(self, op_manager: &OpManager) -> operations::put::PutOp { - let PutContract { - contract, - state, - related_contracts, - } = self; - operations::put::start_op( - contract, - related_contracts, - state, - op_manager.ring.max_hops_to_live, - false, - ) - } - - async fn resume_op(op: operations::put::PutOp, op_manager: &OpManager) -> Result<(), OpError> { - operations::put::request_put(op_manager, op).await - } -} - -#[allow(unused)] struct UpdateContract { key: ContractKey, new_state: WrappedState, diff --git a/crates/core/src/message.rs b/crates/core/src/message.rs index 6b572b15a..503dc40b2 100644 --- a/crates/core/src/message.rs +++ b/crates/core/src/message.rs @@ -3,7 +3,7 @@ //! See `architecture.md`. use std::{ - borrow::{Borrow, Cow}, + borrow::Cow, fmt::Display, net::SocketAddr, time::{Duration, SystemTime}, @@ -98,7 +98,8 @@ impl Transaction { self.id.0.to_le_bytes() } - fn elapsed(&self) -> Duration { + /// Returns the elapsed time since this transaction was created. + pub fn elapsed(&self) -> Duration { let current_unix_epoch_ts = SystemTime::now() .duration_since(SystemTime::UNIX_EPOCH) .expect("now should be always be later than unix epoch") @@ -264,8 +265,6 @@ mod sealed_msg_type { pub(crate) trait MessageStats { fn id(&self) -> &Transaction; - fn target(&self) -> Option; - fn requested_location(&self) -> Option; } @@ -397,8 +396,6 @@ impl From for semver::Version { pub trait InnerMessage: Into { fn id(&self) -> &Transaction; - fn target(&self) -> Option>; - fn requested_location(&self) -> Option; } @@ -551,12 +548,6 @@ impl MessageStats for NetMessage { } } - fn target(&self) -> Option { - match self { - NetMessage::V1(msg) => msg.target(), - } - } - fn requested_location(&self) -> Option { match self { NetMessage::V1(msg) => msg.requested_location(), @@ -578,19 +569,6 @@ impl MessageStats for NetMessageV1 { } } - fn target(&self) -> Option { - match self { - NetMessageV1::Connect(op) => op.target().cloned(), - NetMessageV1::Put(op) => op.target().as_ref().map(|b| b.borrow().clone()), - NetMessageV1::Get(op) => op.target().as_ref().map(|b| b.borrow().clone()), - NetMessageV1::Subscribe(op) => op.target().as_ref().map(|b| b.borrow().clone()), - NetMessageV1::Update(op) => op.target().as_ref().map(|b| b.borrow().clone()), - NetMessageV1::Aborted(_) => None, - NetMessageV1::Unsubscribed { .. } => None, - NetMessageV1::ProximityCache { .. } => None, - } - } - fn requested_location(&self) -> Option { match self { NetMessageV1::Connect(op) => op.requested_location(), diff --git a/crates/core/src/node/network_bridge/p2p_protoc.rs b/crates/core/src/node/network_bridge/p2p_protoc.rs index 6f322df0d..6b729be56 100644 --- a/crates/core/src/node/network_bridge/p2p_protoc.rs +++ b/crates/core/src/node/network_bridge/p2p_protoc.rs @@ -39,7 +39,7 @@ use crate::{ ContractHandlerChannel, ExecutorToEventLoopChannel, NetworkEventListenerHalve, WaitingResolution, }, - message::{MessageStats, NetMessage, NodeEvent, Transaction}, + message::{MessageStats, NetMessage, NodeEvent, Transaction, TransactionType}, node::{ handle_aborted_op, process_message_decoupled, NetEventRegister, NodeConfig, OpManager, PeerId, @@ -403,71 +403,34 @@ impl P2pConnManager { tracing::error!(%tx, "Aborted transaction"); } ConnEvent::OutboundMessage(msg) => { - let Some(target_peer) = msg.target() else { - let id = *msg.id(); - tracing::error!(%id, %msg, "Target peer not set, must be set for connection outbound message"); - ctx.bridge.op_manager.completed(id); - continue; - }; - - // Check if message targets self - if so, process locally instead of sending over network - let self_addr = ctx - .bridge - .op_manager - .ring - .connection_manager - .get_own_addr() - .expect("own address should be set"); - if target_peer.socket_addr() == Some(self_addr) { - tracing::error!( - tx = %msg.id(), - msg_type = %msg, - target_peer = %target_peer, - self_addr = %self_addr, - "BUG: OutboundMessage targets self! This indicates a routing logic error - messages should not reach OutboundMessage handler if they target self" - ); - // Convert to InboundMessage and process locally (no remote source) - ctx.handle_inbound_message(msg, None, &op_manager, &mut state) - .await?; - continue; - } - - tracing::info!( + // With hop-by-hop routing, messages should go through OutboundMessageWithTarget. + // This path should not be reached - if we get here, it means a message was sent + // without proper target extraction from operation state. + tracing::error!( tx = %msg.id(), msg_type = %msg, - target_peer = %target_peer, - "Sending outbound message to peer" + "OutboundMessage received but no target - this is a bug. \ + With hop-by-hop routing, all outbound messages should use \ + OutboundMessageWithTarget. Processing locally instead." ); - // IMPORTANT: Use a single get() call to avoid TOCTOU race - // between contains_key() and get(). The connection can be - // removed by another task between those two calls. - let peer_connection = ctx - .connections - .get(&target_peer.socket_addr().expect("target peer should have address")) - .or_else(|| { - if target_peer.socket_addr().expect("target peer should have address").ip().is_unspecified() { - ctx.connection_entry_by_pub_key(target_peer.pub_key()) - .map(|(resolved_addr, entry)| { - tracing::info!( - tx = %msg.id(), - target_peer = %target_peer.pub_key(), - resolved_addr = %resolved_addr, - "Resolved outbound connection using peer public key due to unspecified address" - ); - entry - }) - } else { - None - } - }); - tracing::debug!( + // Process locally as a fallback + ctx.handle_inbound_message(msg, None, &op_manager, &mut state) + .await?; + } + ConnEvent::OutboundMessageWithTarget { target_addr, msg } => { + // This variant uses an explicit target address from P2pBridge::send(), + // which is critical for NAT scenarios where the address in the message + // differs from the actual transport address we should send to. + tracing::info!( tx = %msg.id(), - self_peer = %ctx.bridge.op_manager.ring.connection_manager.pub_key, - target = %target_peer.pub_key(), - conn_map_size = ctx.connections.len(), - has_connection = peer_connection.is_some(), - "[CONN_TRACK] LOOKUP: Checking for existing connection in HashMap" + msg_type = %msg, + target_addr = %target_addr, + "Sending outbound message with explicit target address (hop-by-hop routing)" ); + + // Look up the connection using the explicit target address + let peer_connection = ctx.connections.get(&target_addr); + match peer_connection { Some(peer_connection) => { if let Err(e) = @@ -475,35 +438,52 @@ impl P2pConnManager { { tracing::error!( tx = %msg.id(), + target_addr = %target_addr, "Failed to send message to peer: {}", e ); } else { tracing::info!( tx = %msg.id(), - target_peer = %target_peer, - "Message successfully sent to peer connection" + target_addr = %target_addr, + "Message successfully sent to peer connection via explicit address" ); } } None => { + // No existing connection - need to establish one first. + // This happens for initial Connect requests to a gateway. + let tx = *msg.id(); + + // Try to get full peer info from operation state (needed for handshake) + // Uses peek method to avoid pop/push overhead + let target_peer = ctx.bridge.op_manager.peek_target_peer(&tx); + + let Some(target_peer) = target_peer else { + // Can't establish connection without peer info + // This shouldn't happen for hop-by-hop routing since + // we should have the connection from a previous request + tracing::error!( + tx = %tx, + target_addr = %target_addr, + connections = ?ctx.connections.keys().collect::>(), + "No connection and no peer info available - cannot establish connection" + ); + ctx.bridge.op_manager.completed(tx); + continue; + }; + tracing::warn!( - id = %msg.id(), - target = %target_peer.pub_key(), - "No existing outbound connection, establishing connection first" + tx = %tx, + target_addr = %target_addr, + target_peer = %target_peer, + connections = ?ctx.connections.keys().collect::>(), + "No existing connection, establishing connection first" ); // Queue the message for sending after connection is established - let tx = *msg.id(); - let (callback, mut result) = tokio::sync::mpsc::channel(10); - let target_peer_loc = target_peer.clone(); + let (callback, mut result) = mpsc::channel(10); let msg_clone = msg.clone(); let bridge_sender = ctx.bridge.ev_listener_tx.clone(); - let self_addr = ctx - .bridge - .op_manager - .ring - .connection_manager - .get_own_addr(); let op_manager = ctx.bridge.op_manager.clone(); let gateways = ctx.gateways.clone(); @@ -520,31 +500,40 @@ impl P2pConnManager { tracing::info!( tx = %tx, - target = %target_peer_loc, - "connect_peer: dispatched connect request, waiting asynchronously" + target_addr = %target_addr, + "Dispatched ConnectPeer, waiting asynchronously for connection" ); + // Spawn a task to wait for connection and then send the message + let target_peer_for_resend = target_peer.clone(); tokio::spawn(async move { match timeout(Duration::from_secs(20), result.recv()).await { - Ok(Some(Ok(_))) => { + Ok(Some(Ok((connected_addr, _)))) => { tracing::info!( tx = %tx, - target = %target_peer_loc, - self_addr = ?self_addr, - "connect_peer: connection established, rescheduling message send" + target_addr = %target_addr, + connected_addr = %connected_addr, + "Connection established, rescheduling message send" ); + // Construct PeerKeyLocation with the connected address + // and the original public key + let connected_peer = PeerKeyLocation::new( + target_peer_for_resend.pub_key().clone(), + connected_addr, + ); + // Re-send via P2pBridge which will route correctly if let Err(e) = bridge_sender .send(Left(( - target_peer_loc.clone(), + connected_peer, Box::new(msg_clone), ))) .await { tracing::error!( tx = %tx, - target = %target_peer_loc, - "connect_peer: failed to reschedule message after connection: {:?}", + target_addr = %target_addr, + "Failed to reschedule message after connection: {:?}", e ); } @@ -552,8 +541,8 @@ impl P2pConnManager { Ok(Some(Err(e))) => { tracing::error!( tx = %tx, - target = %target_peer_loc, - "connect_peer: connection attempt returned error: {:?}", + target_addr = %target_addr, + "Connection attempt failed: {:?}", e ); if let Err(err) = @@ -562,17 +551,16 @@ impl P2pConnManager { { tracing::warn!( tx = %tx, - target = %target_peer_loc, ?err, - "connect_peer: failed to propagate aborted operation" + "Failed to propagate aborted operation" ); } } Ok(None) => { tracing::error!( tx = %tx, - target = %target_peer_loc, - "connect_peer: response channel closed before connection result" + target_addr = %target_addr, + "Response channel closed before connection result" ); if let Err(err) = handle_aborted_op(tx, &op_manager, &gateways) @@ -580,17 +568,16 @@ impl P2pConnManager { { tracing::warn!( tx = %tx, - target = %target_peer_loc, ?err, - "connect_peer: failed to propagate aborted operation" + "Failed to propagate aborted operation" ); } } Err(_) => { tracing::error!( tx = %tx, - target = %target_peer_loc, - "connect_peer: timeout waiting for connection result" + target_addr = %target_addr, + "Timeout waiting for connection result" ); if let Err(err) = handle_aborted_op(tx, &op_manager, &gateways) @@ -598,9 +585,8 @@ impl P2pConnManager { { tracing::warn!( tx = %tx, - target = %target_peer_loc, ?err, - "connect_peer: failed to propagate aborted operation" + "Failed to propagate aborted operation" ); } } @@ -609,53 +595,6 @@ impl P2pConnManager { } } } - ConnEvent::OutboundMessageWithTarget { target_addr, msg } => { - // This variant uses an explicit target address from P2pBridge::send(), - // which is critical for NAT scenarios where the address in the message - // differs from the actual transport address we should send to. - tracing::info!( - tx = %msg.id(), - msg_type = %msg, - target_addr = %target_addr, - msg_target = ?msg.target().and_then(|t| t.socket_addr()), - "Sending outbound message with explicit target address (NAT routing)" - ); - - // Look up the connection using the explicit target address - let peer_connection = ctx.connections.get(&target_addr); - - match peer_connection { - Some(peer_connection) => { - if let Err(e) = - peer_connection.sender.send(Left(msg.clone())).await - { - tracing::error!( - tx = %msg.id(), - target_addr = %target_addr, - "Failed to send message to peer: {}", e - ); - } else { - tracing::info!( - tx = %msg.id(), - target_addr = %target_addr, - "Message successfully sent to peer connection via explicit address" - ); - } - } - None => { - // No existing connection - this is unexpected for NAT scenarios - // since we should have the connection from the original request - tracing::error!( - tx = %msg.id(), - target_addr = %target_addr, - msg_target = ?msg.target().and_then(|t| t.socket_addr()), - connections = ?ctx.connections.keys().collect::>(), - "No connection found for explicit target address - NAT routing failed" - ); - ctx.bridge.op_manager.completed(*msg.id()); - } - } - } ConnEvent::TransportClosed { remote_addr, error } => { tracing::debug!( remote = %remote_addr, @@ -888,6 +827,7 @@ impl P2pConnManager { } NodeEvent::QueryConnections { callback } => { // Reconstruct PeerKeyLocations from stored connections + let total_connections = ctx.connections.len(); let connections: Vec = ctx .connections .iter() @@ -898,6 +838,12 @@ impl P2pConnManager { }) }) .collect(); + let with_pub_key = connections.len(); + tracing::info!( + total_connections, + with_pub_key, + "QueryConnections: returning connections" + ); match timeout( Duration::from_secs(1), callback.send(QueryResult::Connections(connections)), @@ -1500,24 +1446,32 @@ impl P2pConnManager { .socket_addr() .unwrap_or_else(|| SocketAddr::new(IpAddr::V4(Ipv4Addr::UNSPECIFIED), 0)); - if peer_addr.ip().is_unspecified() { - if let Some((existing_addr, _)) = self.connection_entry_by_pub_key(peer.pub_key()) { - peer_addr = existing_addr; - peer = PeerKeyLocation::new(peer.pub_key().clone(), existing_addr); + // IMPORTANT: Always try pub_key lookup first, not just for unspecified addresses. + // The peer's advertised address (from PeerKeyLocation) may differ from the actual + // TCP connection's remote address stored in self.connections. For example: + // - PeerKeyLocation may have the peer's listening port (e.g., 37791) + // - self.connections is keyed by the actual TCP source port (ephemeral port) + // By looking up via pub_key (populated when we receive the first message from this + // peer), we can find the correct connection entry regardless of address mismatch. + if let Some((existing_addr, _)) = self.connection_entry_by_pub_key(peer.pub_key()) { + if existing_addr != peer_addr { tracing::info!( tx = %tx, remote = %peer, - fallback_addr = %peer_addr, + advertised_addr = %peer_addr, + actual_addr = %existing_addr, transient, - "ConnectPeer provided unspecified address; using existing connection address" - ); - } else { - tracing::debug!( - tx = %tx, - transient, - "ConnectPeer received unspecified address without existing connection reference" + "ConnectPeer: using existing connection address (differs from advertised)" ); } + peer_addr = existing_addr; + peer = PeerKeyLocation::new(peer.pub_key().clone(), existing_addr); + } else if peer_addr.ip().is_unspecified() { + tracing::debug!( + tx = %tx, + transient, + "ConnectPeer received unspecified address without existing connection reference" + ); } tracing::info!( @@ -1630,13 +1584,26 @@ impl P2pConnManager { entry.pub_key = Some(peer.pub_key().clone()); self.addr_by_pub_key .insert(peer.pub_key().clone(), peer_addr); - tracing::debug!( + tracing::info!( tx = %tx, %peer_addr, pub_key = %peer.pub_key(), "connect_peer: updated transport entry with peer identity" ); + } else { + tracing::info!( + tx = %tx, + %peer_addr, + existing_pub_key = ?entry.pub_key, + "connect_peer: transport entry already has pub_key" + ); } + } else { + tracing::warn!( + tx = %tx, + %peer_addr, + "connect_peer: no transport entry found for peer_addr" + ); } // Return the remote peer's address we are connected to. @@ -2328,37 +2295,41 @@ impl P2pConnManager { fn handle_notification_msg(&self, msg: Option>) -> EventResult { match msg { Some(Left(msg)) => { - // Check if message has a target peer - if so, route as outbound, otherwise process locally - if let Some(target) = msg.target() { - let self_pub_key: &TransportPublicKey = - &self.bridge.op_manager.ring.connection_manager.pub_key; - + // With hop-by-hop routing, messages no longer embed target. + // For initial requests (GET, PUT, Subscribe, Connect), extract next hop from operation state. + // For other messages, process locally (they'll be routed through network_bridge.send()). + let tx = *msg.id(); + + // Try to get next hop from operation state for initial outbound requests + // Uses peek methods to avoid pop/push overhead + if let Some(next_hop_addr) = self.bridge.op_manager.peek_next_hop_addr(&tx) { tracing::debug!( tx = %msg.id(), msg_type = %msg, - target_peer = %target, - self_peer = ?self_pub_key, - target_equals_self = (target.pub_key() == self_pub_key), - "[ROUTING] handle_notification_msg: Checking if message targets self" + next_hop = %next_hop_addr, + "handle_notification_msg: Found next hop in operation state, routing as OutboundMessage" ); - - if target.pub_key() != self_pub_key { - // Message targets another peer - send as outbound - tracing::info!( - tx = %msg.id(), - msg_type = %msg, - target_peer = %target, - "handle_notification_msg: Message has target peer, routing as OutboundMessage" - ); - return EventResult::Event(ConnEvent::OutboundMessage(msg).into()); + // For UPDATE operations only: mark complete after routing because UPDATE + // uses fire-and-forget semantics (client result already sent). + // Other operations (GET, PUT, Subscribe, Connect) expect responses and + // must remain in the state map until their response handling completes. + if tx.transaction_type() == TransactionType::Update { + self.bridge.op_manager.completed(tx); } + return EventResult::Event( + ConnEvent::OutboundMessageWithTarget { + target_addr: next_hop_addr, + msg, + } + .into(), + ); } - // Message targets self or has no target - process locally + // Message has no next hop or couldn't get from op state - process locally tracing::debug!( tx = %msg.id(), msg_type = %msg, - "handle_notification_msg: Received NetMessage notification, converting to InboundMessage" + "handle_notification_msg: No next hop found, processing locally" ); EventResult::Event(ConnEvent::InboundMessage(msg.into()).into()) } @@ -2879,21 +2850,13 @@ fn decode_msg(data: &[u8]) -> Result { fn extract_sender_from_message(msg: &NetMessage) -> Option { match msg { NetMessage::V1(msg_v1) => match msg_v1 { - NetMessageV1::Connect(connect_msg) => match connect_msg { - // Connect Request/Response no longer have from/sender fields - - // use connection-based routing from transport layer source address - ConnectMsg::Response { .. } => None, - ConnectMsg::Request { .. } => None, - ConnectMsg::ObservedAddress { target, .. } => Some(target.clone()), - }, - // Get messages no longer have sender - use connection-based routing - NetMessageV1::Get(_) => None, - // Put messages no longer have sender - use connection-based routing - NetMessageV1::Put(_) => None, - // Update messages no longer have sender - use connection-based routing - NetMessageV1::Update(_) => None, - // Subscribe messages no longer have sender - use connection-based routing - NetMessageV1::Subscribe(_) => None, + // All operations now use hop-by-hop routing via upstream_addr in operation state. + // No sender/target is embedded in messages - routing is determined by transport layer. + NetMessageV1::Connect(_) + | NetMessageV1::Get(_) + | NetMessageV1::Put(_) + | NetMessageV1::Update(_) + | NetMessageV1::Subscribe(_) => None, // Other message types don't have sender info _ => None, }, @@ -2903,21 +2866,13 @@ fn extract_sender_from_message(msg: &NetMessage) -> Option { fn extract_sender_from_message_mut(msg: &mut NetMessage) -> Option<&mut PeerKeyLocation> { match msg { NetMessage::V1(msg_v1) => match msg_v1 { - NetMessageV1::Connect(connect_msg) => match connect_msg { - // Connect Request/Response no longer have from/sender fields - - // use connection-based routing from transport layer source address - ConnectMsg::Response { .. } => None, - ConnectMsg::Request { .. } => None, - ConnectMsg::ObservedAddress { target, .. } => Some(target), - }, - // Get messages no longer have sender - use connection-based routing - NetMessageV1::Get(_) => None, - // Put messages no longer have sender - use connection-based routing - NetMessageV1::Put(_) => None, - // Update messages no longer have sender - use connection-based routing - NetMessageV1::Update(_) => None, - // Subscribe messages no longer have sender - use connection-based routing - NetMessageV1::Subscribe(_) => None, + // All operations now use hop-by-hop routing via upstream_addr in operation state. + // No sender/target is embedded in messages - routing is determined by transport layer. + NetMessageV1::Connect(_) + | NetMessageV1::Get(_) + | NetMessageV1::Put(_) + | NetMessageV1::Update(_) + | NetMessageV1::Subscribe(_) => None, _ => None, }, } diff --git a/crates/core/src/node/op_state_manager.rs b/crates/core/src/node/op_state_manager.rs index 3556fb798..c301e19eb 100644 --- a/crates/core/src/node/op_state_manager.rs +++ b/crates/core/src/node/op_state_manager.rs @@ -477,6 +477,49 @@ impl OpManager { Ok(()) } + /// Peek at the next hop address for an operation without removing it. + /// Used by hop-by-hop routing to determine where to send initial outbound messages. + /// Returns None if the operation doesn't exist or doesn't have a next hop address. + pub fn peek_next_hop_addr(&self, id: &Transaction) -> Option { + if self.ops.completed.contains(id) || self.ops.under_progress.contains(id) { + return None; + } + match id.transaction_type() { + TransactionType::Connect => self + .ops + .connect + .get(id) + .and_then(|op| op.get_next_hop_addr()), + TransactionType::Put => self.ops.put.get(id).and_then(|op| op.get_next_hop_addr()), + TransactionType::Get => self.ops.get.get(id).and_then(|op| op.get_next_hop_addr()), + TransactionType::Subscribe => self + .ops + .subscribe + .get(id) + .and_then(|op| op.get_next_hop_addr()), + TransactionType::Update => self + .ops + .update + .get(id) + .and_then(|op| op.get_next_hop_addr()), + } + } + + /// Peek at the full target peer (including public key) without removing the operation. + /// Used when establishing new connections where we need the public key for handshake. + pub fn peek_target_peer(&self, id: &Transaction) -> Option { + if self.ops.completed.contains(id) || self.ops.under_progress.contains(id) { + return None; + } + match id.transaction_type() { + TransactionType::Connect => { + self.ops.connect.get(id).and_then(|op| op.get_target_peer()) + } + // Other operations only store addresses, not full peer info + _ => None, + } + } + pub fn pop(&self, id: &Transaction) -> Result, OpNotAvailable> { if self.ops.completed.contains(id) { return Err(OpNotAvailable::Completed); @@ -653,9 +696,11 @@ impl OpManager { /// Notify the operation manager that a transaction is being transacted over the network. pub fn sending_transaction(&self, peer: &PeerKeyLocation, msg: &NetMessage) { let transaction = msg.id(); - if let (Some(recipient), Some(target)) = (msg.target(), msg.requested_location()) { + // With hop-by-hop routing, record the request using the peer we're sending to + // and the message's requested location (contract location) + if let Some(target_loc) = msg.requested_location() { self.ring - .record_request(recipient.clone(), target, transaction.transaction_type()); + .record_request(peer.clone(), target_loc, transaction.transaction_type()); } if let Some(peer_addr) = peer.socket_addr() { self.ring @@ -767,7 +812,13 @@ async fn garbage_cleanup_task( } else { ops.under_progress.remove(&tx); ops.completed.remove(&tx); - tracing::debug!("Transaction timed out: {tx}"); + tracing::info!( + tx = %tx, + tx_type = ?tx.transaction_type(), + elapsed_ms = tx.elapsed().as_millis(), + ttl_ms = crate::config::OPERATION_TTL.as_millis(), + "Transaction timed out" + ); // Check if this is a child operation and propagate timeout to parent if let Some(parent_tx) = sub_op_tracker.get_parent(tx) { @@ -812,7 +863,13 @@ async fn garbage_cleanup_task( TransactionType::Update => ops.update.remove(&tx).is_some(), }; if removed { - tracing::debug!("Transaction timed out: {tx}"); + tracing::info!( + tx = %tx, + tx_type = ?tx.transaction_type(), + elapsed_ms = tx.elapsed().as_millis(), + ttl_ms = crate::config::OPERATION_TTL.as_millis(), + "Transaction timed out" + ); // Check if this is a child operation and propagate timeout to parent if let Some(parent_tx) = sub_op_tracker.get_parent(tx) { diff --git a/crates/core/src/operations/connect.rs b/crates/core/src/operations/connect.rs index eccee8e8e..c5f8d41d6 100644 --- a/crates/core/src/operations/connect.rs +++ b/crates/core/src/operations/connect.rs @@ -33,23 +33,23 @@ const RECENCY_COOLDOWN: Duration = Duration::from_secs(30); #[derive(Debug, Clone, Serialize, Deserialize)] pub(crate) enum ConnectMsg { /// Join request that travels *towards* the target location. + /// Routing is determined by `payload.desired_location`, not an embedded target. /// The sender is determined from the transport layer's source address. Request { id: Transaction, - target: PeerKeyLocation, payload: ConnectRequest, }, /// Join acceptance that travels back along the discovered path. + /// Routing uses hop-by-hop via `upstream_addr` stored in operation state. /// The sender is determined from the transport layer's source address. Response { id: Transaction, - target: PeerKeyLocation, payload: ConnectResponse, }, /// Informational packet letting the joiner know the address a peer observed. + /// Routing uses hop-by-hop via `upstream_addr` stored in operation state. ObservedAddress { id: Transaction, - target: PeerKeyLocation, address: SocketAddr, }, } @@ -63,15 +63,6 @@ impl InnerMessage for ConnectMsg { } } - #[allow(refining_impl_trait)] - fn target(&self) -> Option<&PeerKeyLocation> { - match self { - ConnectMsg::Request { target, .. } - | ConnectMsg::Response { target, .. } - | ConnectMsg::ObservedAddress { target, .. } => Some(target), - } - } - fn requested_location(&self) -> Option { match self { ConnectMsg::Request { payload, .. } => Some(payload.desired_location), @@ -83,40 +74,17 @@ impl InnerMessage for ConnectMsg { impl fmt::Display for ConnectMsg { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - ConnectMsg::Request { - target, payload, .. - } => write!( + ConnectMsg::Request { payload, .. } => write!( f, - "ConnectRequest {{ target: {target}, desired: {}, ttl: {}, joiner: {} }}", + "ConnectRequest {{ desired: {}, ttl: {}, joiner: {} }}", payload.desired_location, payload.ttl, payload.joiner ), - ConnectMsg::Response { - target, payload, .. - } => write!( - f, - "ConnectResponse {{ target: {target}, acceptor: {} }}", - payload.acceptor, - ), - ConnectMsg::ObservedAddress { - target, address, .. - } => { - write!( - f, - "ObservedAddress {{ target: {target}, address: {address} }}" - ) + ConnectMsg::Response { payload, .. } => { + write!(f, "ConnectResponse {{ acceptor: {} }}", payload.acceptor,) + } + ConnectMsg::ObservedAddress { address, .. } => { + write!(f, "ObservedAddress {{ address: {address} }}") } - } - } -} - -impl ConnectMsg { - /// Returns the socket address of the target peer for routing. - /// Used by OperationResult to determine where to send the message. - pub fn target_addr(&self) -> Option { - match self { - ConnectMsg::Request { target, .. } - | ConnectMsg::Response { target, .. } - | ConnectMsg::ObservedAddress { target, .. } => target.socket_addr(), } } } @@ -200,8 +168,6 @@ pub(crate) struct RelayActions { pub expect_connection_from: Option, pub forward: Option<(PeerKeyLocation, ConnectRequest)>, pub observed_address: Option<(PeerKeyLocation, SocketAddr)>, - /// The target to send the ConnectResponse to (with observed external address). - pub response_target: Option, } #[derive(Debug, Clone)] @@ -309,8 +275,7 @@ impl RelayState { acceptor: acceptor.clone(), }); actions.expect_connection_from = Some(self.request.joiner.clone()); - // Use the joiner with updated observed address for response routing - actions.response_target = Some(self.request.joiner.clone()); + // Response is routed hop-by-hop via upstream_addr, no target embedded in message tracing::info!( acceptor_pub_key = %self_loc.pub_key(), joiner_pub_key = %self.request.joiner.pub_key(), @@ -518,7 +483,8 @@ impl JoinerState { pub(crate) struct ConnectOp { pub(crate) id: Transaction, pub(crate) state: Option, - pub(crate) gateway: Option>, + /// The peer we sent/forwarded the connect request to (first hop from joiner's perspective). + pub(crate) first_hop: Option>, pub(crate) backoff: Option, pub(crate) desired_location: Option, /// Tracks when we last forwarded this connect to a peer, to avoid hammering the same @@ -570,7 +536,7 @@ impl ConnectOp { Self { id, state: Some(state), - gateway: gateway.map(Box::new), + first_hop: gateway.map(Box::new), backoff, desired_location: Some(desired_location), recency: HashMap::new(), @@ -595,7 +561,7 @@ impl ConnectOp { Self { id, state: Some(state), - gateway: None, + first_hop: None, backoff: None, desired_location: None, recency: HashMap::new(), @@ -629,7 +595,19 @@ impl ConnectOp { } pub(crate) fn gateway(&self) -> Option<&PeerKeyLocation> { - self.gateway.as_deref() + self.first_hop.as_deref() + } + + /// Get the next hop address if this operation is in a state that needs to send + /// an outbound message. For Connect, this is the first hop peer we're connecting through. + pub(crate) fn get_next_hop_addr(&self) -> Option { + self.first_hop.as_deref().and_then(|g| g.socket_addr()) + } + + /// Get the full target peer (including public key) for connection establishment. + /// For Connect operations, this returns the gateway peer. + pub(crate) fn get_target_peer(&self) -> Option { + self.first_hop.as_deref().cloned() } fn take_desired_location(&mut self) -> Option { @@ -677,7 +655,6 @@ impl ConnectOp { let msg = ConnectMsg::Request { id: tx, - target, payload: request, }; @@ -694,13 +671,27 @@ impl ConnectOp { tracing::info!( acceptor_pub_key = %response.acceptor.pub_key(), acceptor_loc = ?response.acceptor.location(), + target_connections = state.target_connections, + accepted_count = state.accepted.len(), "connect: joiner received ConnectResponse" ); let result = state.register_acceptance(response, now); if let Some(new_acceptor) = &result.new_acceptor { self.recency.remove(&new_acceptor.peer); } + tracing::info!( + tx = %self.id, + satisfied = result.satisfied, + accepted_count = state.accepted.len(), + target_connections = state.target_connections, + "connect: register_acceptance result" + ); if result.satisfied { + tracing::info!( + tx = %self.id, + elapsed_ms = self.id.elapsed().as_millis(), + "Connect operation completed" + ); self.state = Some(ConnectState::Completed); } Some(result) @@ -824,14 +815,12 @@ impl Operation for ConnectOp { let actions = self.handle_request(&env, upstream_addr, payload.clone(), &estimator); - if let Some((target, address)) = actions.observed_address { + if let Some((_target, address)) = actions.observed_address { let msg = ConnectMsg::ObservedAddress { id: self.id, - target: target.clone(), address, }; - // Route through upstream (where the request came from) since we may - // not have a direct connection to the target. + // Route through upstream (where the request came from) using hop-by-hop routing. // Note: upstream_addr is already validated from source_addr at the start of this match arm. network_bridge .send(upstream_addr, NetMessage::V1(NetMessageV1::Connect(msg))) @@ -900,7 +889,6 @@ impl Operation for ConnectOp { self.recency.insert(next.clone(), Instant::now()); let forward_msg = ConnectMsg::Request { id: self.id, - target: next.clone(), payload: request, }; if let Some(next_addr) = next.socket_addr() { @@ -914,20 +902,12 @@ impl Operation for ConnectOp { } if let Some(response) = actions.accept_response { - // response_target has the joiner's address (filled in from packet source) - let response_target = actions.response_target.ok_or_else(|| { - OpError::from(ConnectionError::TransportError( - "ConnectMsg::Request: accept_response but no response_target" - .into(), - )) - })?; let response_msg = ConnectMsg::Response { id: self.id, - target: response_target, payload: response, }; // Route the response through upstream (where the request came from) - // since we may not have a direct connection to the joiner. + // using hop-by-hop routing. We don't embed target in the message. // Note: upstream_addr is already validated from source_addr at the start of this match arm. network_bridge .send( @@ -1009,11 +989,13 @@ impl Operation for ConnectOp { tracing::info!( %peer_id, tx=%self.id, + elapsed_ms = self.id.elapsed().as_millis(), "connect joined peer" ); } else { tracing::warn!( tx=%self.id, + elapsed_ms = self.id.elapsed().as_millis(), "connect ConnectPeer failed" ); } @@ -1027,14 +1009,13 @@ impl Operation for ConnectOp { Ok(store_operation_state(&mut self)) } else if let Some(ConnectState::Relaying(state)) = self.state.as_mut() { - // Relay: forward response toward joiner - let (forwarded, desired, upstream_addr, joiner) = { + // Relay: forward response toward joiner via hop-by-hop routing + let (forwarded, desired, upstream_addr) = { let st = state; ( st.forwarded_to.clone(), st.request.desired_location, st.upstream_addr, - st.request.joiner.clone(), ) }; if let Some(fwd) = forwarded { @@ -1046,10 +1027,9 @@ impl Operation for ConnectOp { acceptor_pub_key = %payload.acceptor.pub_key(), "connect: forwarding response towards joiner" ); - // Forward response toward the joiner via upstream + // Forward response toward the joiner via upstream using hop-by-hop routing let forward_msg = ConnectMsg::Response { id: self.id, - target: joiner, payload, }; network_bridge @@ -1116,16 +1096,16 @@ fn store_operation_state(op: &mut ConnectOp) -> OperationResult { fn store_operation_state_with_msg(op: &mut ConnectOp, msg: Option) -> OperationResult { let state_clone = op.state.clone(); - // Extract target address from the message for routing - let target_addr = msg.as_ref().and_then(|m| m.target_addr()); + // Hop-by-hop routing: messages are sent directly via network_bridge.send() with + // explicit target addresses. No next_hop is embedded in the result. OperationResult { return_msg: msg.map(|m| NetMessage::V1(NetMessageV1::Connect(m))), - target_addr, + next_hop: None, state: state_clone.map(|state| { OpEnum::Connect(Box::new(ConnectOp { id: op.id, state: Some(state), - gateway: op.gateway.clone(), + first_hop: op.first_hop.clone(), backoff: op.backoff.clone(), desired_location: op.desired_location, recency: op.recency.clone(), @@ -1202,12 +1182,18 @@ pub(crate) async fn join_ring_request( op_manager.connect_forward_estimator.clone(), ); - op.gateway = Some(Box::new(gateway.clone())); + op.first_hop = Some(Box::new(gateway.clone())); if let Some(backoff) = backoff { op.backoff = Some(backoff); } - tracing::info!(gateway = %gateway.pub_key(), tx = %tx, "Attempting network join using connect"); + tracing::info!( + gateway = %gateway.pub_key(), + tx = %tx, + target_connections, + ttl, + "Attempting network join using connect" + ); op_manager .notify_op_change( @@ -1627,12 +1613,7 @@ mod tests { ); match msg { - ConnectMsg::Request { - target: msg_target, - payload, - .. - } => { - assert_eq!(msg_target.pub_key(), target.pub_key()); + ConnectMsg::Request { payload, .. } => { assert_eq!(payload.desired_location, desired); assert_eq!(payload.ttl, ttl); // visited now contains SocketAddr, not PeerKeyLocation @@ -1713,10 +1694,10 @@ mod tests { assert_eq!(expect_conn.pub_key(), joiner.pub_key()); } - /// Regression test for issue #2141: ConnectResponse must be sent to the joiner's - /// observed external address, not the original private/NAT address. + /// Regression test for issue #2141: expect_connection_from must have the joiner's + /// observed external address for NAT hole-punching to work. #[test] - fn connect_response_uses_observed_address_not_private() { + fn connect_expect_connection_uses_observed_address() { // Joiner behind NAT: original creation used private address, but the network bridge // fills in the observed public address from the packet source. let private_addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(192, 168, 1, 100)), 9000); @@ -1756,16 +1737,17 @@ mod tests { "relay should accept joiner" ); - // Critical: response_target must have the observed public address, not private - let response_target = actions - .response_target - .expect("response_target should be set when accepting"); + // Critical: expect_connection_from must have the observed public address + // so the acceptor can initiate hole-punching to the correct address + let expect_conn = actions + .expect_connection_from + .expect("expect_connection_from should be set when accepting"); assert_eq!( - response_target + expect_conn .socket_addr() - .expect("response target must have address"), + .expect("expect_connection_from must have address"), observed_public_addr, - "response_target must use observed external address ({}) not private address ({})", + "expect_connection_from must use observed external address ({}) not private address ({})", observed_public_addr, private_addr ); diff --git a/crates/core/src/operations/get.rs b/crates/core/src/operations/get.rs index 52d32ee6f..81a1928d3 100644 --- a/crates/core/src/operations/get.rs +++ b/crates/core/src/operations/get.rs @@ -211,18 +211,18 @@ pub(crate) async fn request_get( requester: None, current_hop: op_manager.ring.max_hops_to_live, subscribe, - current_target: target.clone(), + next_hop: target.clone(), tried_peers, alternatives: candidates, attempts_at_hop: 1, skip_list: skip_list.clone(), }); - let msg = GetMsg::RequestGet { + let msg = GetMsg::Request { id, key: key_val, - target: target.clone(), fetch_contract, + htl: op_manager.ring.max_hops_to_live, skip_list, }; @@ -273,7 +273,7 @@ enum GetState { /// Note: With connection-based routing, this is only used for state tracking, /// not for response routing (which uses upstream_addr instead). #[allow(dead_code)] - current_target: PeerKeyLocation, + next_hop: PeerKeyLocation, /// Peers we've already tried at this hop level tried_peers: HashSet, /// Alternative peers we could still try at this hop @@ -391,24 +391,22 @@ impl GetOp { pub(crate) async fn handle_abort(self, op_manager: &OpManager) -> Result<(), OpError> { if let Some(GetState::AwaitingResponse { key, - current_target: _, - skip_list, + next_hop: _, + skip_list: _, .. }) = &self.state { - // We synthesize an empty ReturnGet back to ourselves to reuse the existing + // We synthesize an empty Response back to ourselves to reuse the existing // fallback path that tries the next candidate. The state stays // AwaitingResponse so the retry logic can pick up from the stored // alternatives/skip list. - let return_msg = GetMsg::ReturnGet { + let return_msg = GetMsg::Response { id: self.id, key: *key, value: StoreResponse { state: None, contract: None, }, - target: op_manager.ring.connection_manager.own_location(), - skip_list: skip_list.clone(), }; op_manager @@ -446,6 +444,15 @@ impl GetOp { .into()), } } + + /// Get the next hop address if this operation is in a state that needs to send + /// an outbound message. Used for hop-by-hop routing. + pub(crate) fn get_next_hop_addr(&self) -> Option { + match &self.state { + Some(GetState::AwaitingResponse { next_hop, .. }) => next_hop.socket_addr(), + _ => None, + } + } } impl Operation for GetOp { @@ -524,13 +531,20 @@ impl Operation for GetOp { }); match input { - GetMsg::RequestGet { + GetMsg::Request { key, id, - target, fetch_contract, + htl, skip_list, } => { + // Handle GET request - either initial or forwarded + let ring_max_htl = op_manager.ring.max_hops_to_live.max(1); + let htl = (*htl).min(ring_max_htl); + let id = *id; + let key: ContractKey = *key; + let fetch_contract = *fetch_contract; + // Use sender_from_addr for logging (falls back to source_addr if lookup fails) let sender_display = sender_from_addr .as_ref() @@ -543,11 +557,11 @@ impl Operation for GetOp { tracing::info!( tx = %id, %key, - target = %target, sender = %sender_display, - fetch_contract = *fetch_contract, + fetch_contract, + htl, skip = ?skip_list, - "GET: received RequestGet" + "GET: received Request" ); // Use sender_from_addr (looked up from source_addr) instead of message field @@ -555,7 +569,7 @@ impl Operation for GetOp { tracing::warn!( tx = %id, %key, - "GET: RequestGet without sender lookup - cannot process" + "GET: Request without sender lookup - cannot process" ); return Err(OpError::invalid_transition(self.id)); }; @@ -564,12 +578,35 @@ impl Operation for GetOp { if matches!(self.state, Some(GetState::Finished { .. })) { tracing::debug!( tx = %id, - "GET: RequestGet received for already completed operation, ignoring duplicate request" + "GET: Request received for already completed operation, ignoring duplicate request" ); // Return the operation in its current state new_state = self.state; return_msg = None; result = self.result; + } else if htl == 0 { + // HTL exhausted - return empty response + tracing::warn!( + tx = %id, + %key, + sender = %sender_display, + "Dropping GET Request with zero HTL" + ); + return build_op_result( + id, + None, + Some(GetMsg::Response { + id, + key, + value: StoreResponse { + state: None, + contract: None, + }, + }), + None, + stats, + self.upstream_addr, + ); } else { // Normal case: operation should be in ReceivedRequest or AwaitingResponse state debug_assert!(matches!( @@ -580,24 +617,35 @@ impl Operation for GetOp { tracing::debug!( tx = %id, %key, - target = %target, - "GET: RequestGet processing in state {:?}", + htl, + "GET: Request processing in state {:?}", self.state ); - // Initialize stats for tracking the operation - stats = Some(Box::new(GetStats { - contract_location: Location::from(key), - next_peer: None, - transfer_time: None, - first_response_time: None, - })); + // Initialize/update stats for tracking the operation + let this_peer = op_manager.ring.connection_manager.own_location(); + if stats.is_none() { + stats = Some(Box::new(GetStats { + contract_location: Location::from(&key), + next_peer: Some(this_peer.clone()), + transfer_time: None, + first_response_time: None, + })); + } else if let Some(s) = stats.as_mut() { + s.next_peer = Some(this_peer.clone()); + } + + // Update skip list with current peer address + let mut new_skip_list = skip_list.clone(); + if let Some(addr) = this_peer.socket_addr() { + new_skip_list.insert(addr); + } // First check if we have the contract locally before forwarding let get_result = op_manager .notify_contract_handler(ContractHandlerEvent::GetQuery { - key: *key, - return_contract_code: *fetch_contract, + key, + return_contract_code: fetch_contract, }) .await; @@ -610,11 +658,12 @@ impl Operation for GetOp { }), .. }) => { - if *fetch_contract && contract.is_none() { + if fetch_contract && contract.is_none() { tracing::debug!( tx = %id, %key, - "GET: state available locally but contract code missing; continuing search" + %this_peer, + "GET: state available locally but contract code missing; continuing search" ); None } else { @@ -629,8 +678,8 @@ impl Operation for GetOp { tracing::info!( tx = %id, %key, - fetch_contract = *fetch_contract, - "GET: contract found locally in RequestGet handler" + fetch_contract, + "GET: contract found locally" ); // Check if this is a forwarded request or a local request @@ -638,27 +687,55 @@ impl Operation for GetOp { Some(GetState::ReceivedRequest { requester }) if requester.is_some() => { - // This is a forwarded request - send result back to requester - let requester = requester.clone().unwrap(); - tracing::debug!(tx = %id, "Returning contract {} to requester {}", key, requester); + // This is a forwarded request - send result back to upstream + tracing::debug!(tx = %id, "Returning contract {} to upstream", key); + new_state = None; + return_msg = Some(GetMsg::Response { + id, + key, + value: StoreResponse { + state: Some(state), + contract, + }, + }); + } + Some(GetState::AwaitingResponse { requester, .. }) + if requester.is_some() => + { + // Forward contract to upstream new_state = None; - return_msg = Some(GetMsg::ReturnGet { - id: *id, - key: *key, + tracing::debug!(tx = %id, "Returning contract {} to upstream", key); + return_msg = Some(GetMsg::Response { + id, + key, value: StoreResponse { state: Some(state), contract, }, - target: requester, - skip_list: skip_list.clone(), + }); + } + Some(GetState::AwaitingResponse { + requester: None, .. + }) => { + // Operation completed for original requester + tracing::debug!( + tx = %id, + "Completed operation, get response received for contract {key}" + ); + new_state = Some(GetState::Finished { key }); + return_msg = None; + result = Some(GetResult { + key, + state, + contract, }); } _ => { // This is the original requester (locally initiated request) - new_state = Some(GetState::Finished { key: *key }); + new_state = Some(GetState::Finished { key }); return_msg = None; result = Some(GetResult { - key: *key, + key, state, contract, }); @@ -669,23 +746,18 @@ impl Operation for GetOp { tracing::debug!( tx = %id, %key, - "Contract not found locally (or missing code), forwarding to {}", - target + %this_peer, + "Contract not found @ peer {}, forwarding to other peers", + sender ); - // Prepare skip list with own address - let own_loc = op_manager.ring.connection_manager.own_location(); - let mut new_skip_list = skip_list.clone(); - if let Some(addr) = own_loc.socket_addr() { - new_skip_list.insert(addr); - } - // Forward using standard routing helper + // Note: target is determined by routing, sender from source_addr return try_forward_or_return( - *id, - *key, - (op_manager.ring.max_hops_to_live.max(1), *fetch_contract), - (target.clone(), sender.clone()), + id, + key, + (htl, fetch_contract), + (this_peer, sender.clone()), new_skip_list, op_manager, stats, @@ -695,177 +767,10 @@ impl Operation for GetOp { } } } - GetMsg::SeekNode { - key, - id, - fetch_contract, - target, - htl, - skip_list, - } => { - let ring_max_htl = op_manager.ring.max_hops_to_live.max(1); - let htl = (*htl).min(ring_max_htl); - let id = *id; - let key: ContractKey = *key; - let fetch_contract = *fetch_contract; - let this_peer = target.clone(); - - // Use sender_from_addr (looked up from source_addr) instead of message field - let Some(sender) = sender_from_addr.clone() else { - tracing::warn!( - tx = %id, - %key, - "GET: SeekNode without sender lookup - cannot process" - ); - return Err(OpError::invalid_transition(self.id)); - }; - - if htl == 0 { - let sender_display = sender.to_string(); - tracing::warn!( - tx = %id, - %key, - sender = %sender_display, - "Dropping GET SeekNode with zero HTL" - ); - return build_op_result( - id, - None, - Some(GetMsg::ReturnGet { - id, - key, - value: StoreResponse { - state: None, - contract: None, - }, - target: sender.clone(), - skip_list: skip_list.clone(), - }), - None, - stats, - self.upstream_addr, - ); - } - - // Update stats with next peer - if let Some(s) = stats.as_mut() { - s.next_peer = Some(this_peer.clone()); - } - - // Update skip list with current peer address - let mut new_skip_list = skip_list.clone(); - if let Some(addr) = this_peer.socket_addr() { - new_skip_list.insert(addr); - } - - // Try to get contract from local storage - let get_result = op_manager - .notify_contract_handler(ContractHandlerEvent::GetQuery { - key, - return_contract_code: fetch_contract, - }) - .await; - - // Process get result - let local_value = match get_result { - Ok(ContractHandlerEvent::GetResponse { - response: - Ok(StoreResponse { - state: Some(state), - contract, - }), - .. - }) => { - if fetch_contract && contract.is_none() { - tracing::debug!( - tx = %id, - %key, - %this_peer, - "Contract state available but code missing @ peer {}, retrying", - sender - ); - None - } else { - Some((state, contract)) - } - } - _ => None, - }; - - if let Some((state, contract)) = local_value { - tracing::debug!(tx = %id, "Contract {key} found @ peer {}", target); - - match self.state { - Some(GetState::AwaitingResponse { requester, .. }) => { - if let Some(requester) = requester { - // Forward contract to requester - new_state = None; - tracing::debug!(tx = %id, "Returning contract {} to {}", key, requester); - return_msg = Some(GetMsg::ReturnGet { - id, - key, - value: StoreResponse { - state: Some(state), - contract, - }, - target: requester, - skip_list: skip_list.clone(), - }); - } else { - // Operation completed for original requester - tracing::debug!( - tx = %id, - "Completed operation, get response received for contract {key}" - ); - new_state = None; - return_msg = None; - } - } - Some(GetState::ReceivedRequest { .. }) => { - // Return contract to sender - new_state = None; - tracing::debug!(tx = %id, "Returning contract {} to {}", key, sender); - return_msg = Some(GetMsg::ReturnGet { - id, - key, - value: StoreResponse { - state: Some(state), - contract, - }, - target: sender.clone(), - skip_list: skip_list.clone(), - }); - } - _ => return Err(OpError::invalid_transition(self.id)), - } - } else { - // Contract not found locally, try forwarding to other peers - tracing::debug!( - tx = %id, - %key, - %this_peer, - "Contract not found @ peer {}, retrying with other peers", - sender - ); - return try_forward_or_return( - id, - key, - (htl, fetch_contract), - (this_peer, sender.clone()), - new_skip_list, - op_manager, - stats, - self.upstream_addr, - ) - .await; - } - } - GetMsg::ReturnGet { + GetMsg::Response { id, key, value: StoreResponse { state: None, .. }, - target, - skip_list, } => { let id = *id; let key = *key; @@ -875,7 +780,7 @@ impl Operation for GetOp { tracing::warn!( tx = %id, %key, - "GET: ReturnGet without sender lookup - cannot process" + "GET: Response without sender lookup - cannot process" ); return Err(OpError::invalid_transition(self.id)); }; @@ -884,12 +789,10 @@ impl Operation for GetOp { tx = %id, %key, from = %sender, - to = %target, - skip = ?skip_list, - "GET: ReturnGet received with empty value" + "GET: Response received with empty value" ); // Handle case where neither contract nor state was found - let this_peer = target; + let this_peer = op_manager.ring.connection_manager.own_location(); tracing::warn!( tx = %id, %key, @@ -909,7 +812,7 @@ impl Operation for GetOp { mut tried_peers, mut alternatives, attempts_at_hop, - current_target: _, + next_hop: _, skip_list, .. }) => { @@ -937,10 +840,9 @@ impl Operation for GetOp { "Trying alternative peer at same hop level" ); - return_msg = Some(GetMsg::SeekNode { + return_msg = Some(GetMsg::Request { id, key, - target: next_target.clone(), fetch_contract, htl: current_hop, skip_list: tried_peers.clone(), @@ -961,7 +863,7 @@ impl Operation for GetOp { alternatives, attempts_at_hop: attempts_at_hop + 1, key, - current_target: next_target, + next_hop: next_target, // Preserve the accumulated skip_list so future candidate // selection still avoids already-specified peers; tried_peers // tracks attempts at this hop. @@ -993,10 +895,9 @@ impl Operation for GetOp { if !new_candidates.is_empty() { // Try with the best new peer let target = new_candidates.remove(0); - return_msg = Some(GetMsg::SeekNode { + return_msg = Some(GetMsg::Request { id, key, - target: target.clone(), fetch_contract, htl: current_hop, skip_list: new_skip_list.clone(), @@ -1018,7 +919,7 @@ impl Operation for GetOp { alternatives: new_candidates, attempts_at_hop: 1, key, - current_target: target, + next_hop: target, skip_list: new_skip_list.clone(), }); } else if let Some(requester_peer) = requester.clone() { @@ -1032,15 +933,13 @@ impl Operation for GetOp { skip = ?new_skip_list, "No other peers found while trying to get the contract, returning response to requester" ); - return_msg = Some(GetMsg::ReturnGet { + return_msg = Some(GetMsg::Response { id, key, value: StoreResponse { state: None, contract: None, }, - target: requester_peer, - skip_list: new_skip_list.clone(), }); } else { // Original requester, operation failed @@ -1079,15 +978,13 @@ impl Operation for GetOp { skip = ?skip_list, "No other peers found while trying to get the contract, returning response to requester" ); - return_msg = Some(GetMsg::ReturnGet { + return_msg = Some(GetMsg::Response { id, key, value: StoreResponse { state: None, contract: None, }, - target: requester_peer, - skip_list: skip_list.clone(), }); new_state = None; } else { @@ -1111,21 +1008,19 @@ impl Operation for GetOp { // Return failure to sender tracing::debug!(tx = %id, "Returning contract {} to {}", key, sender); new_state = None; - return_msg = Some(GetMsg::ReturnGet { + return_msg = Some(GetMsg::Response { id, key, value: StoreResponse { state: None, contract: None, }, - target: sender.clone(), - skip_list: skip_list.clone(), }); } _ => return Err(OpError::invalid_transition(self.id)), }; } - GetMsg::ReturnGet { + GetMsg::Response { id, key, value: @@ -1133,8 +1028,6 @@ impl Operation for GetOp { state: Some(value), contract, }, - target: _, - skip_list, } => { let id = *id; let key = *key; @@ -1144,12 +1037,12 @@ impl Operation for GetOp { tracing::warn!( tx = %id, %key, - "GET: ReturnGet without sender lookup - cannot process" + "GET: Response without sender lookup - cannot process" ); return Err(OpError::invalid_transition(self.id)); }; - tracing::info!(tx = %id, %key, "Received get response with state: {:?}", self.state.as_ref().unwrap()); + tracing::info!(tx = %id, %key, "GET: Response received with state: {:?}", self.state.as_ref().unwrap()); // Check if contract is required let require_contract = matches!( @@ -1171,7 +1064,7 @@ impl Operation for GetOp { // Handle case where contract is required but not provided if require_contract && contract.is_none() { - if let Some(requester) = requester { + if let Some(_requester) = requester { // no contract, consider this like an error ignoring the incoming update value tracing::warn!( tx = %id, @@ -1179,31 +1072,23 @@ impl Operation for GetOp { sender ); - let mut new_skip_list = skip_list.clone(); - if let Some(addr) = sender.socket_addr() { - new_skip_list.insert(addr); - } - tracing::warn!( tx = %id, %key, at = %sender, - target = %requester, - "Contract not received while required, returning response to requester", + "Contract not received while required, returning response to upstream", ); // Forward error to requester op_manager .notify_op_change( - NetMessage::from(GetMsg::ReturnGet { + NetMessage::from(GetMsg::Response { id, key, value: StoreResponse { state: None, contract: None, }, - target: requester.clone(), - skip_list: new_skip_list, }), OpEnum::Get(GetOp { id, @@ -1351,15 +1236,13 @@ impl Operation for GetOp { // Forward response to requester tracing::info!(tx = %id, %key, "Get response received for contract at hop peer"); new_state = None; - return_msg = Some(GetMsg::ReturnGet { + return_msg = Some(GetMsg::Response { id, key, value: StoreResponse { state: Some(value.clone()), contract: contract.clone(), }, - target: requester.clone(), - skip_list: skip_list.clone(), }); tracing::debug!(tx = %id, %key, target = %requester, "Returning contract to requester"); result = Some(GetResult { @@ -1372,15 +1255,13 @@ impl Operation for GetOp { // Return response to sender tracing::info!(tx = %id, "Returning contract {} to {}", key, sender); new_state = None; - return_msg = Some(GetMsg::ReturnGet { + return_msg = Some(GetMsg::Response { id, key, value: StoreResponse { state: Some(value.clone()), contract: contract.clone(), }, - target: sender.clone(), - skip_list: skip_list.clone(), }); } Some(other) => { @@ -1414,13 +1295,15 @@ fn build_op_result( stats: Option>, upstream_addr: Option, ) -> Result { - // For response messages (ReturnGet), use upstream_addr directly for routing. - // This is more reliable than extracting from the message's target field, which - // may have been looked up from connection_manager (subject to race conditions). - // For forward messages (SeekNode, RequestGet), use the message's target. - let target_addr = match &msg { - Some(GetMsg::ReturnGet { .. }) => upstream_addr, - _ => msg.as_ref().and_then(|m| m.target_addr()), + // Determine the next hop for sending the message: + // - For Response messages: route back to upstream_addr (who sent us the request) + // - For Request messages being forwarded: use next_hop from state + let next_hop = match (&msg, &state) { + (Some(GetMsg::Response { .. }), _) => upstream_addr, + (Some(GetMsg::Request { .. }), Some(GetState::AwaitingResponse { next_hop, .. })) => { + next_hop.socket_addr() + } + _ => None, }; let output_op = state.map(|state| GetOp { @@ -1432,7 +1315,7 @@ fn build_op_result( }); Ok(OperationResult { return_msg: msg.map(NetMessage::from), - target_addr, + next_hop, state: output_op.map(OpEnum::Get), }) } @@ -1510,17 +1393,16 @@ async fn try_forward_or_return( fetch_contract, current_hop: new_htl, subscribe: false, - current_target: target.clone(), + next_hop: target.clone(), tried_peers, alternatives, attempts_at_hop: 1, skip_list: new_skip_list.clone(), }), - Some(GetMsg::SeekNode { + Some(GetMsg::Request { id, key, fetch_contract, - target, htl: new_htl, skip_list: new_skip_list, }), @@ -1538,15 +1420,13 @@ async fn try_forward_or_return( build_op_result( id, None, - Some(GetMsg::ReturnGet { + Some(GetMsg::Response { key, id, value: StoreResponse { state: None, contract: None, }, - target: sender, - skip_list: new_skip_list, }), None, stats, @@ -1562,74 +1442,47 @@ impl IsOperationCompleted for GetOp { } mod messages { - use std::{borrow::Borrow, fmt::Display}; + use std::fmt::Display; use serde::{Deserialize, Serialize}; use super::*; + /// GET operation messages. + /// + /// Uses hop-by-hop routing: each node stores `upstream_addr` from the transport layer + /// to route responses back. No `PeerKeyLocation` is embedded in wire messages. #[derive(Debug, Serialize, Deserialize, Clone)] pub(crate) enum GetMsg { - RequestGet { + /// Request to retrieve a contract. Forwarded hop-by-hop toward contract location. + Request { id: Transaction, - target: PeerKeyLocation, key: ContractKey, fetch_contract: bool, - skip_list: HashSet, - }, - SeekNode { - id: Transaction, - key: ContractKey, - fetch_contract: bool, - target: PeerKeyLocation, + /// Hops to live - decremented at each hop. When 0, stop forwarding. htl: usize, skip_list: HashSet, }, - ReturnGet { + /// Response containing the contract data. Routed hop-by-hop back to originator. + Response { id: Transaction, key: ContractKey, value: StoreResponse, - target: PeerKeyLocation, - skip_list: HashSet, }, } impl InnerMessage for GetMsg { fn id(&self) -> &Transaction { match self { - Self::RequestGet { id, .. } => id, - Self::SeekNode { id, .. } => id, - Self::ReturnGet { id, .. } => id, - } - } - - fn target(&self) -> Option> { - match self { - Self::SeekNode { target, .. } => Some(target), - Self::RequestGet { target, .. } => Some(target), - Self::ReturnGet { target, .. } => Some(target), + Self::Request { id, .. } | Self::Response { id, .. } => id, } } fn requested_location(&self) -> Option { match self { - GetMsg::RequestGet { key, .. } => Some(Location::from(key.id())), - GetMsg::SeekNode { key, .. } => Some(Location::from(key.id())), - GetMsg::ReturnGet { key, .. } => Some(Location::from(key.id())), - } - } - } - - impl GetMsg { - // sender() method removed - use connection-based routing via upstream_addr instead - - /// Returns the socket address of the target peer for routing. - /// Used by OperationResult to determine where to send the message. - pub fn target_addr(&self) -> Option { - match self { - Self::RequestGet { target, .. } - | Self::SeekNode { target, .. } - | Self::ReturnGet { target, .. } => target.socket_addr(), + Self::Request { key, .. } | Self::Response { key, .. } => { + Some(Location::from(key.id())) + } } } } @@ -1638,9 +1491,10 @@ mod messages { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let id = self.id(); match self { - Self::RequestGet { .. } => write!(f, "RequestGet(id: {id})"), - Self::SeekNode { .. } => write!(f, "SeekNode(id: {id})"), - Self::ReturnGet { .. } => write!(f, "ReturnGet(id: {id})"), + Self::Request { key, htl, .. } => { + write!(f, "Get::Request(id: {id}, key: {key}, htl: {htl})") + } + Self::Response { key, .. } => write!(f, "Get::Response(id: {id}, key: {key})"), } } } @@ -1650,7 +1504,7 @@ mod messages { mod tests { use super::*; use crate::message::Transaction; - use crate::operations::test_utils::{make_contract_key, make_peer}; + use crate::operations::test_utils::make_contract_key; use std::collections::HashSet; fn make_get_op(state: Option, result: Option) -> GetOp { @@ -1766,51 +1620,14 @@ mod tests { } // Tests for GetMsg helper methods - #[test] - fn get_msg_target_addr_returns_socket_for_request_get() { - let target = make_peer(5000); - let msg = GetMsg::RequestGet { - id: Transaction::new::(), - target: target.clone(), - key: make_contract_key(1), - fetch_contract: false, - skip_list: HashSet::new(), - }; - assert_eq!( - msg.target_addr(), - target.socket_addr(), - "target_addr should return target's socket address for RequestGet" - ); - } - - #[test] - fn get_msg_target_addr_returns_socket_for_return_get() { - let target = make_peer(6000); - let msg = GetMsg::ReturnGet { - id: Transaction::new::(), - key: make_contract_key(1), - value: StoreResponse { - state: None, - contract: None, - }, - target: target.clone(), - skip_list: HashSet::new(), - }; - assert_eq!( - msg.target_addr(), - target.socket_addr(), - "target_addr should return target's socket address for ReturnGet" - ); - } - #[test] fn get_msg_id_returns_transaction() { let tx = Transaction::new::(); - let msg = GetMsg::RequestGet { + let msg = GetMsg::Request { id: tx, - target: make_peer(5000), key: make_contract_key(1), fetch_contract: false, + htl: 5, skip_list: HashSet::new(), }; assert_eq!(*msg.id(), tx, "id() should return the transaction ID"); @@ -1819,17 +1636,16 @@ mod tests { #[test] fn get_msg_display_formats_correctly() { let tx = Transaction::new::(); - let msg = GetMsg::SeekNode { + let msg = GetMsg::Request { id: tx, key: make_contract_key(1), fetch_contract: false, - target: make_peer(5000), htl: 5, skip_list: HashSet::new(), }; let display = format!("{}", msg); assert!( - display.contains("SeekNode"), + display.contains("Request"), "Display should contain message type name" ); } diff --git a/crates/core/src/operations/mod.rs b/crates/core/src/operations/mod.rs index d55981325..87b7b3795 100644 --- a/crates/core/src/operations/mod.rs +++ b/crates/core/src/operations/mod.rs @@ -53,9 +53,10 @@ where pub(crate) struct OperationResult { /// Inhabited if there is a message to return to the other peer. pub return_msg: Option, - /// Where to send the return message. Required if return_msg is Some. - /// This replaces the old pattern of embedding target in the message itself. - pub target_addr: Option, + /// The next hop to send the message to. Required if return_msg is Some. + /// For responses, this is upstream_addr (back toward originator). + /// For forwarded requests, this is the next peer toward the contract location. + pub next_hop: Option, /// None if the operation has been completed. pub state: Option, } @@ -117,7 +118,7 @@ where } Ok(OperationResult { return_msg: None, - target_addr: _, + next_hop: _, state: Some(final_state), }) if final_state.finalized() => { if op_manager.failed_parents().remove(&tx_id).is_some() { @@ -151,14 +152,14 @@ where } Ok(OperationResult { return_msg: Some(msg), - target_addr, + next_hop, state: Some(updated_state), }) => { if updated_state.finalized() { let id = *msg.id(); tracing::debug!(%id, "operation finalized with outgoing message"); op_manager.completed(id); - if let Some(target) = target_addr { + if let Some(target) = next_hop { tracing::debug!(%id, ?target, "sending final message to target"); network_bridge.send(target, msg).await?; } @@ -166,10 +167,13 @@ where } else { let id = *msg.id(); tracing::debug!(%id, "operation in progress"); - if let Some(target) = target_addr { + if let Some(target) = next_hop { tracing::debug!(%id, ?target, "sending updated op state"); - network_bridge.send(target, msg).await?; + // IMPORTANT: Push state BEFORE sending message to avoid race condition. + // If we send first, a fast response might arrive before the state is saved, + // causing load_or_init to fail to find the operation. op_manager.push(id, updated_state).await?; + network_bridge.send(target, msg).await?; } else { tracing::debug!(%id, "queueing op state for local processing"); debug_assert!( @@ -189,7 +193,7 @@ where Ok(OperationResult { return_msg: None, - target_addr: _, + next_hop: _, state: Some(updated_state), }) => { let id = *updated_state.id(); @@ -197,19 +201,19 @@ where } Ok(OperationResult { return_msg: Some(msg), - target_addr, + next_hop, state: None, }) => { op_manager.completed(tx_id); - if let Some(target) = target_addr { + if let Some(target) = next_hop { tracing::debug!(%tx_id, ?target, "sending back message to target"); network_bridge.send(target, msg).await?; } } Ok(OperationResult { return_msg: None, - target_addr: _, + next_hop: _, state: None, }) => { op_manager.completed(tx_id); diff --git a/crates/core/src/operations/put.rs b/crates/core/src/operations/put.rs index 89b273a5e..fd6805181 100644 --- a/crates/core/src/operations/put.rs +++ b/crates/core/src/operations/put.rs @@ -19,7 +19,7 @@ use crate::{ contract::ContractHandlerEvent, message::{InnerMessage, NetMessage, Transaction}, node::{NetworkBridge, OpManager}, - ring::{Location, PeerKeyLocation}, + ring::Location, }; pub(crate) struct PutOp { @@ -76,6 +76,15 @@ impl PutOp { .into()) } } + + /// Get the next hop address if this operation is in a state that needs to send + /// an outbound message to a downstream peer. + pub(crate) fn get_next_hop_addr(&self) -> Option { + match &self.state { + Some(PutState::AwaitingResponse { next_hop, .. }) => *next_hop, + _ => None, + } + } } impl IsOperationCompleted for PutOp { @@ -124,16 +133,18 @@ impl Operation for PutOp { Err(OpError::OpNotPresent(tx)) } Ok(None) => { - // new request to put a new value for a contract, initialize the machine + // New incoming request - we're a forwarder or final node. + // We don't need persistent state, just track upstream_addr for response routing. tracing::debug!( tx = %tx, - "PutOp::load_or_init: No existing operation found, initializing new ReceivedRequest" + source = ?source_addr, + "PutOp::load_or_init: New incoming request" ); Ok(OpInitialization { op: Self { - state: Some(PutState::ReceivedRequest), + state: None, // No state needed for forwarding nodes id: tx, - upstream_addr: source_addr, // Connection-based routing: store who sent us this request + upstream_addr: source_addr, // Remember who to send response to }, source_addr, }) @@ -155,957 +166,294 @@ impl Operation for PutOp { fn process_message<'a, NB: NetworkBridge>( self, - conn_manager: &'a mut NB, + _conn_manager: &'a mut NB, op_manager: &'a OpManager, input: &'a Self::Message, - source_addr: Option, + _source_addr: Option, ) -> Pin> + Send + 'a>> { Box::pin(async move { - // Look up sender's PeerKeyLocation from source address for logging/routing - // This replaces the sender field that was previously embedded in messages - let sender_from_addr = source_addr.and_then(|addr| { - op_manager - .ring - .connection_manager - .get_peer_location_by_addr(addr) - }); - - let return_msg; - let new_state; + let id = self.id; + let upstream_addr = self.upstream_addr; + let is_originator = upstream_addr.is_none(); + + // Extract subscribe flag from state (only relevant for originator) + let subscribe = match &self.state { + Some(PutState::PrepareRequest { subscribe, .. }) => *subscribe, + Some(PutState::AwaitingResponse { subscribe, .. }) => *subscribe, + _ => false, + }; match input { - PutMsg::RequestPut { - id, - origin, + PutMsg::Request { + id: _msg_id, contract, related_contracts, value, htl, - target: _, + skip_list, } => { - // Fill in origin's external address from transport layer if unknown. - // This is the key step where the first recipient determines the - // origin's external address from the actual packet source address. - let mut origin = origin.clone(); - if origin.peer_addr.is_unknown() { - let addr = source_addr - .expect("RequestPut with unknown origin address requires source_addr"); - origin.set_addr(addr); - tracing::debug!( - tx = %id, - origin_addr = %addr, - "put: filled RequestPut origin address from source_addr" - ); - } - - // Get the contract key and own location let key = contract.key(); - let own_location = op_manager.ring.connection_manager.own_location(); - // Use origin (from message) instead of sender_from_addr (from connection lookup). - // The origin has the correct pub_key and its address is filled from source_addr. - // Connection lookup can return wrong identity due to race condition where - // transport connection arrives before ExpectPeerConnection is processed. - let prev_sender = origin.clone(); + let htl = *htl; tracing::info!( - "Requesting put for contract {} from {} to {}", - key, - prev_sender, - own_location + tx = %id, + %key, + htl, + is_originator, + subscribe, + "Processing PUT Request" ); - let subscribe = match &self.state { - Some(PutState::PrepareRequest { subscribe, .. }) => *subscribe, - Some(PutState::AwaitingResponse { subscribe, .. }) => *subscribe, - _ => false, - }; + // Check if we're already subscribed to this contract BEFORE storing + let was_seeding = op_manager.ring.is_seeding_contract(&key); - // Always cache contracts we encounter - LRU will handle eviction - let should_seed = true; + // Step 1: Store contract locally (all nodes cache) + let merged_value = put_contract( + op_manager, + key, + value.clone(), + related_contracts.clone(), + contract, + ) + .await?; - let modified_value = if should_seed { - // Cache locally when initiating a PUT. This ensures: - // 1. Nodes with no connections can still cache contracts - // 2. Nodes that are the optimal location cache locally - // 3. Initiators always have the data they're putting (per Nacho's requirement) - // 4. States are properly merged (Freenet states are commutative monoids) - let is_already_seeding = op_manager.ring.is_seeding_contract(&key); + // Mark as seeding if not already + if !was_seeding { + op_manager.ring.seed_contract(key, value.size() as u64); + super::announce_contract_cached(op_manager, &key).await; + } + // If we were already subscribed and the merged value differs from input, + // trigger an Update to propagate the change to other subscribers + let state_changed = merged_value.as_ref() != value.as_ref(); + if was_seeding && state_changed { tracing::debug!( tx = %id, %key, - peer = %prev_sender, - is_already_seeding, - "Processing local PUT in initiating node" + "PUT on subscribed contract resulted in state change, triggering Update" ); + start_update_after_put(op_manager, id, key, merged_value.clone()).await; + } - // DEBUG: Log contract key/hash details before put_contract - tracing::debug!( - "DEBUG PUT: Before put_contract - key={}, key.code_hash={:?}, contract.key={}, contract.key.code_hash={:?}, code_len={}", - key, - key.code_hash(), - contract.key(), - contract.key().code_hash(), - contract.data().len() - ); + // Step 2: Determine if we should forward or respond + // Build skip list: include sender (upstream) and already-tried peers + let mut new_skip_list = skip_list.clone(); + if let Some(addr) = upstream_addr { + new_skip_list.insert(addr); + } + // Add our own address to skip list + if let Some(own_addr) = op_manager.ring.connection_manager.get_own_addr() { + new_skip_list.insert(own_addr); + } - // Always call put_contract to ensure proper state merging - // Since Freenet states are commutative monoids, merging is always safe - // and necessary to maintain consistency - let result = put_contract( - op_manager, - key, - value.clone(), - related_contracts.clone(), - contract, - ) - .await?; - - // Mark as seeded locally if not already - if !is_already_seeding { - op_manager.ring.seed_contract(key, value.size() as u64); - super::announce_contract_cached(op_manager, &key).await; - tracing::debug!( - tx = %id, - %key, - peer = %prev_sender, - "Marked contract as seeding locally" - ); - } + // Find next hop toward contract location + let next_hop = if htl > 0 { + op_manager + .ring + .closest_potentially_caching(&key, &new_skip_list) + } else { + None + }; - tracing::debug!( - tx = %id, - %key, - peer = %prev_sender, - was_already_seeding = is_already_seeding, - "Successfully processed contract locally with merge" - ); + if let Some(next_peer) = next_hop { + // Forward to next hop + let next_addr = + next_peer.socket_addr().expect("next hop must have address"); - result - } else { tracing::debug!( tx = %id, %key, - peer = %prev_sender, - "Not initiator, skipping local caching" + next = %next_addr, + htl = htl - 1, + "Forwarding PUT to next hop" ); - value.clone() - }; - - // Determine next forwarding target - find peers closer to the contract location - // Don't reuse the target from RequestPut as that's US (the current processing peer) - // Skip list now uses SocketAddr instead of PeerId - let skip: Vec<_> = prev_sender.socket_addr().into_iter().collect(); - let next_target = op_manager.ring.closest_potentially_caching(&key, &skip); - tracing::info!( - tx = %id, - %key, - next_target = ?next_target, - skip = ?skip, - "PUT seek evaluating next forwarding target" - ); - - if let Some(forward_target) = next_target { - // Create a SeekNode message to forward to the next hop - return_msg = Some(PutMsg::SeekNode { - id: *id, - origin: origin.clone(), - target: forward_target, - value: modified_value.clone(), + let forward_msg = PutMsg::Request { + id, contract: contract.clone(), related_contracts: related_contracts.clone(), - htl: *htl, - }); - - // When we're the origin node we already seeded the contract locally. - // Treat downstream SuccessfulPut messages as best-effort so River is unblocked. - if origin.pub_key() == own_location.pub_key() { - tracing::debug!( - tx = %id, - %key, - "Origin node finishing PUT without waiting for SuccessfulPut ack" - ); + value: merged_value, + htl: htl.saturating_sub(1), + skip_list: new_skip_list, + }; - if subscribe { - if !op_manager.failed_parents().contains(id) { - let child_tx = - super::start_subscription_request(op_manager, *id, key); - tracing::debug!( - tx = %id, - %child_tx, - "started subscription as child operation" - ); - } else { - tracing::warn!( - tx = %id, - "not starting subscription for failed parent operation" - ); - } - } + // Transition to AwaitingResponse, preserving subscribe flag for originator + // Store next_hop so handle_notification_msg can route the message + let new_state = Some(PutState::AwaitingResponse { + subscribe, + next_hop: Some(next_addr), + }); - new_state = Some(PutState::Finished { key }); - } else { - // Transition to AwaitingResponse state to handle future SuccessfulPut messages - new_state = Some(PutState::AwaitingResponse { - key, - upstream: Some(prev_sender.clone()), - contract: contract.clone(), - state: modified_value, - subscribe, - origin: origin.clone(), - }); - } + Ok(OperationResult { + return_msg: Some(NetMessage::from(forward_msg)), + next_hop: Some(next_addr), + state: Some(OpEnum::Put(PutOp { + id, + state: new_state, + upstream_addr, + })), + }) } else { - // No other peers to forward to - we're the final destination - tracing::warn!( + // No next hop - we're the final destination (or htl exhausted) + tracing::info!( tx = %id, %key, - skip = ?skip, - "No peers to forward to after local processing - completing PUT locally" + "PUT complete at this node, sending response" ); - // Send SuccessfulPut back to the sender (upstream node) - return_msg = Some(PutMsg::SuccessfulPut { - id: *id, - target: prev_sender.clone(), - key, - origin: origin.clone(), - }); + if is_originator { + // We're both originator and final destination + // Start subscription if requested + if subscribe { + start_subscription_after_put(op_manager, id, key).await; + } - // Mark operation as finished - new_state = Some(PutState::Finished { key }); - } - } - PutMsg::SeekNode { - id, - value, - contract, - related_contracts, - htl, - target: _, - origin, - } => { - // Fill in origin's external address from transport layer if unknown. - // This is the key step where the recipient determines the - // origin's external address from the actual packet source address. - let mut origin = origin.clone(); - if origin.peer_addr.is_unknown() { - if let Some(addr) = source_addr { - origin.set_addr(addr); - tracing::debug!( - tx = %id, - origin_addr = %addr, - "put: filled SeekNode origin address from source_addr" - ); + Ok(OperationResult { + return_msg: None, + next_hop: None, + state: Some(OpEnum::Put(PutOp { + id, + state: Some(PutState::Finished { key }), + upstream_addr: None, + })), + }) + } else { + // Send response back to upstream + let response = PutMsg::Response { id, key }; + let upstream = + upstream_addr.expect("non-originator must have upstream"); + + Ok(OperationResult { + return_msg: Some(NetMessage::from(response)), + next_hop: Some(upstream), + state: None, // Operation complete for this node + }) } } + } - // Use origin (from message) instead of sender_from_addr (from connection lookup). - // The origin has the correct pub_key and its address is filled from source_addr. - // Connection lookup can fail or return wrong identity due to race conditions. - let sender = origin.clone(); - // Get the contract key and check if we should handle it - let key = contract.key(); - let is_subscribed_contract = op_manager.ring.is_seeding_contract(&key); - // Always cache contracts - LRU handles eviction - let should_handle_locally = !is_subscribed_contract; - - tracing::debug!( + PutMsg::Response { id: _msg_id, key } => { + tracing::info!( tx = %id, %key, - target = %op_manager.ring.connection_manager.own_location(), - sender = %sender, - "Putting contract at target peer", + is_originator, + subscribe, + "Processing PUT Response" ); - // Determine if this is the last hop or if we should forward - let last_hop = if let Some(new_htl) = htl.checked_sub(1) { - // Forward changes to nodes closer to the contract location - let skip_list = sender.socket_addr().into_iter().collect::>(); - forward_put( - op_manager, - conn_manager, - contract, - value.clone(), - *id, - new_htl, - skip_list, - origin.clone(), - ) - .await - } else { - // Last hop, no more forwarding - true - }; - - // Handle local storage and subscription if needed - if should_handle_locally { - // Store contract locally - tracing::debug!( + if is_originator { + // We're the originator - operation complete! + tracing::info!( tx = %id, %key, - peer = %op_manager.ring.connection_manager.own_location(), - "Caching contract locally as it's not already seeded" - ); - - tracing::debug!(tx = %id, "Attempting contract value put"); - // We don't need to capture the return value here since the value - // has already been processed at the initiating node - let _ = put_contract( - op_manager, - key, - value.clone(), - related_contracts.clone(), - contract, - ) - .await?; - - let own_location = op_manager.ring.connection_manager.own_location(); - tracing::debug!( - tx = %id, - "Successfully put value for contract {} @ {:?}", - key, - own_location.location() + elapsed_ms = id.elapsed().as_millis(), + "PUT operation completed successfully" ); - // Start subscription - // Note: skip_list is no longer used here as subscriptions handle their own routing - - let child_tx = super::start_subscription_request(op_manager, *id, key); - tracing::debug!(tx = %id, %child_tx, "started subscription as child operation"); - op_manager.ring.seed_contract(key, value.size() as u64); - super::announce_contract_cached(op_manager, &key).await; - - true - } else { - false - }; - - // Broadcast changes to subscribers - let broadcast_to = op_manager.get_broadcast_targets(&key, &sender); - match try_to_broadcast( - *id, - last_hop, - op_manager, - self.state, - origin.clone(), - (broadcast_to, sender.clone()), - key, - (contract.clone(), value.clone()), - self.upstream_addr, - ) - .await - { - Ok((state, msg)) => { - new_state = state; - return_msg = msg; + // Start subscription if requested + if subscribe { + start_subscription_after_put(op_manager, id, *key).await; } - Err(err) => return Err(err), - } - } - PutMsg::BroadcastTo { - id, - key, - new_value, - contract, - origin, - .. - } => { - // Get sender from connection-based routing - let sender = sender_from_addr - .clone() - .expect("BroadcastTo requires source_addr"); - // Get own location - let target = op_manager.ring.connection_manager.own_location(); - - // Update the contract locally - tracing::debug!(tx = %id, %key, "Attempting contract value update"); - let updated_value = put_contract( - op_manager, - *key, - new_value.clone(), - RelatedContracts::default(), - contract, - ) - .await?; - tracing::debug!(tx = %id, %key, "Contract successfully updated"); - - // Broadcast changes to subscribers - let broadcast_to = op_manager.get_broadcast_targets(key, &sender); - tracing::debug!( - tx = %id, - %key, - location = ?target.location(), - "Successfully updated contract value" - ); - // Try to broadcast the changes - match try_to_broadcast( - *id, - false, - op_manager, - self.state, - origin.clone(), - (broadcast_to, sender.clone()), - *key, - (contract.clone(), updated_value), - self.upstream_addr, - ) - .await - { - Ok((state, msg)) => { - new_state = state; - return_msg = msg; - } - Err(err) => return Err(err), - } - } - PutMsg::Broadcasting { - id, - broadcasted_to, - broadcast_to, - key, - new_value, - contract, - upstream, - origin, - .. - } => { - // Get own location and initialize counter - let sender = op_manager.ring.connection_manager.own_location(); - let mut broadcasted_to = *broadcasted_to; - - if upstream.pub_key() == sender.pub_key() { - // Originator reached the subscription tree. This path should be filtered - // out by the deduplication layer, so treat it as a warning if it happens - // to help surface potential bugs. - tracing::warn!( - tx = %id, - %key, - "PUT originator re-entered broadcast loop; dedup should have completed" - ); - new_state = Some(PutState::Finished { key: *key }); + Ok(OperationResult { + return_msg: None, + next_hop: None, + state: Some(OpEnum::Put(PutOp { + id, + state: Some(PutState::Finished { key: *key }), + upstream_addr: None, + })), + }) } else { - // Notify the upstream hop right away so the request - // path does not wait for the broadcast to finish. - let ack = PutMsg::SuccessfulPut { - id: *id, - target: upstream.clone(), - key: *key, - origin: origin.clone(), - }; + // Forward response to our upstream + let upstream = upstream_addr.expect("non-originator must have upstream"); - tracing::trace!( + tracing::debug!( tx = %id, %key, upstream = %upstream, - "Forwarding SuccessfulPut upstream before broadcast" - ); - - conn_manager - .send( - upstream.socket_addr().expect( - "upstream address must be known for Broadcasting message", - ), - NetMessage::from(ack), - ) - .await?; - new_state = None; - } - - // Broadcast to all peers in parallel - let mut broadcasting = Vec::with_capacity(broadcast_to.len()); - for peer in broadcast_to.iter() { - let msg = PutMsg::BroadcastTo { - id: *id, - key: *key, - new_value: new_value.clone(), - origin: origin.clone(), - contract: contract.clone(), - target: peer.clone(), - }; - if let Some(addr) = peer.socket_addr() { - let f = conn_manager.send(addr, msg.into()); - broadcasting.push(f); - } else { - tracing::warn!(tx = %id, %key, "Skipping broadcast to peer with unknown address"); - } - } - - // Collect errors from broadcasting - let error_futures = futures::future::join_all(broadcasting) - .await - .into_iter() - .enumerate() - .filter_map(|(p, err)| { - if let Err(err) = err { - Some((p, err)) - } else { - None - } - }); - - // Handle failed broadcasts - let mut incorrect_results = 0; - for (peer_num, err) in error_futures { - // Remove the failed peers in reverse order - let peer = broadcast_to.get(peer_num).unwrap(); - tracing::warn!( - "failed broadcasting put change to {} with error {}; dropping connection", - peer, - err - ); - // todo: review this, maybe we should just dropping this subscription - if let Some(addr) = peer.socket_addr() { - conn_manager.drop_connection(addr).await?; - } - incorrect_results += 1; - } - - // Update broadcast count and log success - broadcasted_to += broadcast_to.len() - incorrect_results; - tracing::debug!( - "Successfully broadcasted put into contract {key} to {broadcasted_to} peers" - ); - - return_msg = None; - } - PutMsg::SuccessfulPut { id, .. } => { - tracing::debug!( - tx = %id, - current_state = ?self.state, - "PutOp::process_message: handling SuccessfulPut" - ); - match self.state { - Some(PutState::AwaitingResponse { - key, - upstream, - contract, - state, - subscribe, - origin: state_origin, - }) => { - tracing::debug!( - tx = %id, - %key, - subscribe = subscribe, - "Processing LocalPut with subscribe flag" - ); - - // Check if already stored before any operations - let is_seeding_contract = op_manager.ring.is_seeding_contract(&key); - - // Only store the contract locally if not already seeded - if !is_seeding_contract { - tracing::debug!( - tx = %id, - %key, - peer = %op_manager.ring.connection_manager.own_location(), - "Storing contract locally after successful put" - ); - - // Store the contract locally - put_contract( - op_manager, - key, - state.clone(), - RelatedContracts::default(), - &contract.clone(), - ) - .await?; - - // Always seed the contract locally after a successful put - tracing::debug!( - tx = %id, - %key, - peer = %op_manager.ring.connection_manager.own_location(), - "Adding contract to local seed list" - ); - op_manager.ring.seed_contract(key, state.size() as u64); - super::announce_contract_cached(op_manager, &key).await; - } else { - tracing::debug!( - tx = %id, - %key, - peer = %op_manager.ring.connection_manager.own_location(), - "Contract already seeded locally, skipping duplicate caching" - ); - } - - tracing::info!( - tx = %id, - %key, - this_peer = %op_manager.ring.connection_manager.own_location(), - "Peer completed contract value put", - ); - - new_state = Some(PutState::Finished { key }); - - if subscribe { - // Check if this parent has already failed due to a previous child failure - if !op_manager.failed_parents().contains(id) { - tracing::debug!( - tx = %id, - %key, - "starting child subscription for PUT operation" - ); - let child_tx = - super::start_subscription_request(op_manager, *id, key); - tracing::debug!(tx = %id, %child_tx, "started subscription as child operation"); - } else { - tracing::warn!( - tx = %id, - "not starting subscription for failed parent operation" - ); - } - } - - // Forward success message upstream if needed - if let Some(upstream_peer) = upstream.clone() { - tracing::trace!( - tx = %id, - %key, - upstream = %upstream_peer, - "PutOp::process_message: Forwarding SuccessfulPut upstream" - ); - return_msg = Some(PutMsg::SuccessfulPut { - id: *id, - target: upstream_peer, - key, - origin: state_origin.clone(), - }); - } else { - tracing::trace!( - tx = %id, - %key, - "PutOp::process_message: SuccessfulPut originated locally; no upstream" - ); - return_msg = None; - } - } - Some(PutState::Finished { .. }) => { - // Operation already completed - this is a duplicate SuccessfulPut message - // This can happen when multiple peers send success confirmations - tracing::debug!( - tx = %id, - "Received duplicate SuccessfulPut for already completed operation, ignoring" - ); - new_state = None; // Mark for completion - return_msg = None; - } - _ => return Err(OpError::invalid_transition(self.id)), - }; - } - PutMsg::PutForward { - id, - contract, - new_value, - htl, - skip_list, - origin, - .. - } => { - // Get sender from connection-based routing - let sender = sender_from_addr - .clone() - .expect("PutForward requires source_addr"); - let max_htl = op_manager.ring.max_hops_to_live.max(1); - let htl_value = (*htl).min(max_htl); - if htl_value == 0 { - tracing::warn!( - tx = %id, - %contract, - sender = %sender, - "Discarding PutForward with zero HTL" + "Forwarding PUT Response to upstream" ); - return Ok(OperationResult { - return_msg: None, - target_addr: None, - state: None, - }); - } - // Get contract key and own location - let key = contract.key(); - let peer_loc = op_manager.ring.connection_manager.own_location(); - let is_seeding_contract = op_manager.ring.is_seeding_contract(&key); - // Always cache contracts - LRU handles eviction - let should_handle_locally = !is_seeding_contract; - tracing::debug!( - tx = %id, - %key, - this_peer = %peer_loc, - "Forwarding changes, trying to put the contract" - ); + let response = PutMsg::Response { id, key: *key }; - // Put the contract locally if needed - let already_put = if should_handle_locally { - tracing::debug!(tx = %id, %key, "Seeding contract locally"); - put_contract( - op_manager, - key, - new_value.clone(), - RelatedContracts::default(), - contract, - ) - .await?; - true - } else { - false - }; - - // Determine if this is the last hop and handle forwarding - let last_hop = if let Some(new_htl) = htl_value.checked_sub(1) { - // Create updated skip list - let mut new_skip_list = skip_list.clone(); - if let Some(addr) = sender.socket_addr() { - new_skip_list.insert(addr); - } - - // Forward to closer peers - let put_here = forward_put( - op_manager, - conn_manager, - contract, - new_value.clone(), - *id, - new_htl, - new_skip_list.clone(), - origin.clone(), - ) - .await; - - put_here - } else { - // Last hop, no more forwarding - true - }; - - // Handle subscription and local storage if this is the last hop - if last_hop && should_handle_locally { - // Put the contract if not already done - if !already_put { - put_contract( - op_manager, - key, - new_value.clone(), - RelatedContracts::default(), - contract, - ) - .await?; - } - - // Start subscription and record cache access - let child_tx = super::start_subscription_request(op_manager, *id, key); - tracing::debug!(tx = %id, %child_tx, "started subscription as child operation"); - let _evicted = op_manager.ring.seed_contract(key, new_value.size() as u64); - super::announce_contract_cached(op_manager, &key).await; - // Note: Evicted contracts are handled by SeedingManager (subscribers cleaned up internally) - } else if last_hop && !already_put { - // Last hop but not handling locally, still need to put - put_contract( - op_manager, - key, - new_value.clone(), - RelatedContracts::default(), - contract, - ) - .await?; - } - - // Broadcast changes to subscribers - let broadcast_to = op_manager.get_broadcast_targets(&key, &sender); - match try_to_broadcast( - *id, - last_hop, - op_manager, - self.state, - origin.clone(), - (broadcast_to, sender.clone()), - key, - (contract.clone(), new_value.clone()), - self.upstream_addr, - ) - .await - { - Ok((state, msg)) => { - new_state = state; - return_msg = msg; - } - Err(err) => return Err(err), + Ok(OperationResult { + return_msg: Some(NetMessage::from(response)), + next_hop: Some(upstream), + state: None, // Operation complete for this node + }) } } - _ => return Err(OpError::UnexpectedOpState), } - - build_op_result(self.id, new_state, return_msg, self.upstream_addr) }) } } -impl OpManager { - fn get_broadcast_targets( - &self, - key: &ContractKey, - sender: &PeerKeyLocation, - ) -> Vec { - let subscribers = self - .ring - .subscribers_of(key) - .map(|subs| { - subs.value() - .iter() - .filter(|pk| pk.pub_key() != sender.pub_key()) - .cloned() - .collect::>() - }) - .unwrap_or_default(); - subscribers +/// Helper to start subscription after PUT completes (only for originator) +async fn start_subscription_after_put( + op_manager: &OpManager, + parent_tx: Transaction, + key: ContractKey, +) { + // Note: This failed_parents check may be unnecessary since we only spawn the subscription + // at PUT completion, so there's no earlier child operation that could have failed. + // Keeping it as defensive check in case of race conditions not currently understood. + if !op_manager.failed_parents().contains(&parent_tx) { + let child_tx = super::start_subscription_request(op_manager, parent_tx, key); + tracing::debug!( + tx = %parent_tx, + %child_tx, + %key, + "Started subscription as child operation after PUT" + ); + } else { + tracing::warn!( + tx = %parent_tx, + %key, + "Not starting subscription for failed parent PUT operation" + ); } } -fn build_op_result( - id: Transaction, - state: Option, - msg: Option, - upstream_addr: Option, -) -> Result { - // For routing, prefer msg.target_addr() (the explicit message destination) over upstream_addr. - // The message's target field contains the intended recipient (e.g., for SuccessfulPut, the - // upstream node that should receive the response). upstream_addr is the sender of the - // current message we're processing, which is NOT where we want to send responses. - // Only fall back to upstream_addr if msg.target_addr() is None (e.g., for NAT scenarios - // where the target's address is unknown and we need to route back through the sender). - let target_addr = msg.as_ref().and_then(|m| m.target_addr()).or(upstream_addr); - - let output_op = state.map(|op| PutOp { - id, - state: Some(op), - upstream_addr, - }); - Ok(OperationResult { - return_msg: msg.map(NetMessage::from), - target_addr, - state: output_op.map(OpEnum::Put), - }) -} - -#[allow(clippy::too_many_arguments)] -async fn try_to_broadcast( - id: Transaction, - last_hop: bool, +/// Helper to start an Update operation when a PUT on a subscribed contract results in state change. +/// This propagates the merged state to other subscribers. +async fn start_update_after_put( op_manager: &OpManager, - state: Option, - origin: PeerKeyLocation, - (broadcast_to, upstream): (Vec, PeerKeyLocation), + parent_tx: Transaction, key: ContractKey, - (contract, new_value): (ContractContainer, WrappedState), - upstream_addr: Option, -) -> Result<(Option, Option), OpError> { - let new_state; - let return_msg; + new_state: WrappedState, +) { + use super::update; - let subscribe = match &state { - Some(PutState::AwaitingResponse { subscribe, .. }) => *subscribe, - _ => false, - }; + let child_tx = Transaction::new_child_of::(&parent_tx); + op_manager.expect_and_register_sub_operation(parent_tx, child_tx); - let preserved_upstream = match &state { - Some(PutState::AwaitingResponse { - upstream: Some(existing), - .. - }) => Some(existing.clone()), - _ => None, - }; + tracing::debug!( + tx = %parent_tx, + %child_tx, + %key, + "Starting Update as child operation after PUT changed subscribed contract state" + ); - match state { - // Handle initiating node that's also the target (single node or targeting self) - Some(PutState::AwaitingResponse { - upstream: None, - subscribe, - .. - }) if broadcast_to.is_empty() && last_hop => { - // We're the initiating node and the target - operation complete - tracing::debug!( - "PUT operation complete - initiating node is also target (broadcast_to empty, last hop)" - ); + let op_manager_cloned = op_manager.clone(); - // NOTE: We do NOT start a network subscription here when we're the target. - // Client subscriptions are handled independently through the WebSocket API - // and contract executor, not through the ring operations layer. - // See: https://github.com/freenet/freenet-core/issues/1782 - if subscribe { - tracing::debug!( - "Subscription requested but not starting network subscription (we are the target). \ - Client subscriptions are handled through WebSocket/contract executor layer" - ); - } + tokio::spawn(async move { + tokio::task::yield_now().await; - new_state = Some(PutState::Finished { key }); - return_msg = None; - } - Some( - PutState::ReceivedRequest - | PutState::BroadcastOngoing - | PutState::AwaitingResponse { .. }, - ) => { - if broadcast_to.is_empty() && !last_hop { - // broadcast complete - tracing::debug!( - "Empty broadcast list while updating value for contract {}", - key - ); - // means the whole tx finished so can return early - let upstream_for_completion = preserved_upstream - .clone() - .or_else(|| Some(upstream.clone())); - new_state = Some(PutState::AwaitingResponse { - key, - upstream: upstream_for_completion, - contract: contract.clone(), // No longer optional - state: new_value.clone(), - subscribe, - origin: origin.clone(), - }); - return_msg = None; - } else if !broadcast_to.is_empty() { - tracing::debug!("Callback to start broadcasting to other nodes"); - new_state = Some(PutState::BroadcastOngoing); - return_msg = Some(PutMsg::Broadcasting { - id, - new_value, - broadcasted_to: 0, - broadcast_to, - key, - contract, - upstream, - origin: origin.clone(), - }); - - let op = PutOp { - id, - state: new_state, - upstream_addr, - }; - op_manager - .notify_op_change(NetMessage::from(return_msg.unwrap()), OpEnum::Put(op)) - .await?; - return Err(OpError::StatePushed); - } else { - new_state = None; - return_msg = Some(PutMsg::SuccessfulPut { - id, - target: upstream, - key, - origin, - }); + let update_op = + update::start_op_with_id(key, new_state, RelatedContracts::default(), child_tx); + + match update::request_update(&op_manager_cloned, update_op).await { + Ok(_) => { + tracing::debug!(%child_tx, %parent_tx, %key, "child Update completed"); + } + Err(error) => { + tracing::error!(%parent_tx, %child_tx, %key, %error, "child Update failed"); + // Note: We don't propagate this failure to the parent PUT since the PUT itself + // succeeded - the Update is best-effort propagation to subscribers } } - _ => return Err(OpError::invalid_transition(id)), - }; - - Ok((new_state, return_msg)) + }); } pub(crate) fn start_op( @@ -1120,7 +468,6 @@ pub(crate) fn start_op( tracing::debug!(%contract_location, %key, "Requesting put"); let id = Transaction::new::(); - // let payload_size = contract.data().len(); let state = Some(PutState::PrepareRequest { contract, related_contracts, @@ -1149,7 +496,6 @@ pub(crate) fn start_op_with_id( let contract_location = Location::from(&key); tracing::debug!(%contract_location, %key, tx = %id, "Requesting put with existing transaction ID"); - // let payload_size = contract.data().len(); let state = Some(PutState::PrepareRequest { contract, related_contracts, @@ -1165,38 +511,37 @@ pub(crate) fn start_op_with_id( } } -#[derive(Debug)] -#[allow(clippy::large_enum_variant)] +/// State machine for PUT operations. +/// +/// State transitions: +/// - Originator: PrepareRequest → AwaitingResponse → Finished +/// - Forwarder: (receives Request) → AwaitingResponse → (receives Response) → done +/// - Final node: (receives Request) → stores contract → sends Response → done +#[derive(Debug, Clone)] pub enum PutState { - ReceivedRequest, - /// Preparing request for put op. + /// Local originator preparing to send initial request. PrepareRequest { contract: ContractContainer, related_contracts: RelatedContracts<'static>, value: WrappedState, htl: usize, + /// If true, start a subscription after PUT completes subscribe: bool, }, - /// Awaiting response from petition. + /// Waiting for response from downstream node. AwaitingResponse { - key: ContractKey, - upstream: Option, - contract: ContractContainer, - state: WrappedState, + /// If true, start a subscription after PUT completes (originator only) subscribe: bool, - origin: PeerKeyLocation, - }, - /// Broadcasting changes to subscribers. - BroadcastOngoing, - /// Operation completed. - Finished { - key: ContractKey, + /// Next hop address for routing the outbound message + next_hop: Option, }, + /// Operation completed successfully. + Finished { key: ContractKey }, } /// Request to insert/update a value into a contract. -pub(crate) async fn request_put(op_manager: &OpManager, mut put_op: PutOp) -> Result<(), OpError> { - // Process PrepareRequest state and transition to next state +/// Called when a client initiates a PUT operation. +pub(crate) async fn request_put(op_manager: &OpManager, put_op: PutOp) -> Result<(), OpError> { let (id, contract, value, related_contracts, htl, subscribe) = match &put_op.state { Some(PutState::PrepareRequest { contract, @@ -1213,183 +558,50 @@ pub(crate) async fn request_put(op_manager: &OpManager, mut put_op: PutOp) -> Re *subscribe, ), _ => { - tracing::error!(tx = %put_op.id, op_state = ?put_op.state, "request_put called with unexpected state, expected PrepareRequest"); + tracing::error!( + tx = %put_op.id, + state = ?put_op.state, + "request_put called with unexpected state" + ); return Err(OpError::UnexpectedOpState); } }; let key = contract.key(); - let own_location = op_manager.ring.connection_manager.own_location(); - - // Find the optimal target for this contract - let skip_self: Vec<_> = own_location.socket_addr().into_iter().collect(); - let target = op_manager - .ring - .closest_potentially_caching(&key, &skip_self); - - tracing::debug!( - tx = %id, - %key, - target_found = target.is_some(), - target_peer = ?target.as_ref().map(|t| t.to_string()), - "Determined PUT routing target" - ); - - // No other peers found - handle locally - if target.is_none() { - tracing::debug!(tx = %id, %key, "No other peers available, handling put operation locally"); - - // Store the contract locally - let updated_value = - put_contract(op_manager, key, value, related_contracts.clone(), &contract).await?; - - // Always seed the contract locally after a successful put - tracing::debug!( - tx = %id, - %key, - peer = %op_manager.ring.connection_manager.own_location(), - "Adding contract to local seed list" - ); - op_manager - .ring - .seed_contract(key, updated_value.size() as u64); - super::announce_contract_cached(op_manager, &key).await; - - // Determine which peers need to be notified and broadcast the update - let broadcast_to = op_manager.get_broadcast_targets(&key, &own_location); - - if broadcast_to.is_empty() { - // No peers to broadcast to - operation complete - tracing::debug!(tx = %id, %key, "No broadcast targets, completing operation"); - - // Set up state for SuccessfulPut message handling - put_op.state = Some(PutState::AwaitingResponse { - key, - upstream: None, - contract: contract.clone(), - state: updated_value.clone(), - subscribe, - origin: own_location.clone(), - }); - - // Create a SuccessfulPut message to trigger the completion handling - let success_msg = PutMsg::SuccessfulPut { - id, - target: own_location.clone(), - key, - origin: own_location.clone(), - }; - - // Use notify_op_change to trigger the completion handling - op_manager - .notify_op_change(NetMessage::from(success_msg), OpEnum::Put(put_op)) - .await?; - return Ok(()); - } else { - // Broadcast to subscribers - let sender = own_location.clone(); - let broadcast_state = Some(PutState::ReceivedRequest); - - let (new_state, return_msg) = try_to_broadcast( - id, - false, - op_manager, - broadcast_state, - own_location.clone(), - (broadcast_to, sender), - key, - (contract.clone(), updated_value), - put_op.upstream_addr, - ) - .await?; - - put_op.state = new_state; - - if let Some(msg) = return_msg { - op_manager - .notify_op_change(NetMessage::from(msg), OpEnum::Put(put_op)) - .await?; - } else { - // Complete the operation locally if no further messages needed - put_op.state = Some(PutState::Finished { key }); - } - } + tracing::info!(tx = %id, %key, htl, subscribe, "Starting PUT operation"); - return Ok(()); + // Build initial skip list with our own address + let mut skip_list = HashSet::new(); + if let Some(own_addr) = op_manager.ring.connection_manager.get_own_addr() { + skip_list.insert(own_addr); } - // At least one peer found - cache locally first, then forward to network - let target_peer = target.unwrap(); - - tracing::debug!( - tx = %id, - %key, - target_peer = %target_peer, - target_location = ?target_peer.location(), - "Caching state locally before forwarding PUT to target peer" - ); - - // Cache the contract state locally before forwarding - // This ensures the publishing node has immediate access to the new state - let updated_value = put_contract( - op_manager, - key, - value.clone(), - related_contracts.clone(), - &contract, - ) - .await - .map_err(|e| { - tracing::error!( - tx = %id, - %key, - error = %e, - "Failed to cache state locally before forwarding PUT" - ); - e - })?; - - tracing::debug!( - tx = %id, - %key, - "Local cache updated, now forwarding PUT to target peer" - ); - - put_op.state = Some(PutState::AwaitingResponse { - key, - upstream: None, - contract: contract.clone(), - state: updated_value.clone(), - subscribe, - origin: own_location.clone(), - }); - - // Create RequestPut message and forward to target peer - // Use PeerAddr::Unknown for origin - the sender doesn't know their own - // external address (especially behind NAT). The first recipient will - // fill this in from the packet source address. - let origin_for_msg = PeerKeyLocation::with_unknown_addr(own_location.pub_key().clone()); - let msg = PutMsg::RequestPut { + // Create the request message + let msg = PutMsg::Request { id, - origin: origin_for_msg, contract, related_contracts, - value: updated_value, + value, htl, - target: target_peer, + skip_list, }; - tracing::debug!( - tx = %id, - %key, - "Calling notify_op_change to send PUT message to network" - ); + // Transition to AwaitingResponse and send the message + // Note: upstream_addr is None because we're the originator + // next_hop is None initially - we process locally first then determine routing + let new_op = PutOp { + id, + state: Some(PutState::AwaitingResponse { + subscribe, + next_hop: None, + }), + upstream_addr: None, + }; - // Use notify_op_change to trigger the operation processing - // This will cause the operation to be processed through process_message for network propagation + // Send through the operation processing pipeline op_manager - .notify_op_change(NetMessage::from(msg), OpEnum::Put(put_op)) + .notify_op_change(NetMessage::from(msg), OpEnum::Put(new_op)) .await?; Ok(()) @@ -1431,296 +643,79 @@ async fn put_contract( } } -/// Forwards the put request to a peer which is closer to the assigned contract location if possible. -/// If is not possible to forward the request, then this peer is the final target and should store the contract. -/// It returns whether this peer should be storing the contract or not. -/// -/// This operation is "fire and forget" and the node does not keep track if is successful or not. -#[allow(clippy::too_many_arguments)] -async fn forward_put( - op_manager: &OpManager, - conn_manager: &CB, - contract: &ContractContainer, - new_value: WrappedState, - id: Transaction, - htl: usize, - skip_list: HashSet, - origin: PeerKeyLocation, -) -> bool -where - CB: NetworkBridge, -{ - let key = contract.key(); - let contract_loc = Location::from(&key); - let max_htl = op_manager.ring.max_hops_to_live.max(1); - let capped_htl = htl.min(max_htl); - if capped_htl == 0 { - tracing::warn!( - tx = %id, - %key, - skip = ?skip_list, - "Discarding PutForward with zero HTL after sanitization" - ); - return true; - } - let target_peer = op_manager - .ring - .closest_potentially_caching(&key, &skip_list); - let own_pkloc = op_manager.ring.connection_manager.own_location(); - let Some(own_loc) = own_pkloc.location() else { - tracing::warn!( - tx = %id, - %key, - skip = ?skip_list, - "Not forwarding PUT – own ring location not assigned yet; caching locally" - ); - return true; - }; - - tracing::info!( - tx = %id, - %key, - contract_location = %contract_loc.0, - own_location = %own_loc.0, - skip_list = ?skip_list, - "Evaluating PUT forwarding decision" - ); - - if let Some(peer) = target_peer { - let other_loc = peer.location().expect("target peer must have location"); - let other_distance = contract_loc.distance(other_loc); - let self_distance = contract_loc.distance(own_loc); - - tracing::info!( - tx = %id, - %key, - target_peer = %peer, - target_location = %other_loc.0, - target_distance = ?other_distance, - self_distance = ?self_distance, - skip_list = ?skip_list, - "Found potential forward target" - ); - - if peer.pub_key() == own_pkloc.pub_key() { - tracing::info!( - tx = %id, - %key, - skip_list = ?skip_list, - "Not forwarding - candidate peer resolves to self" - ); - return true; - } - - if htl == 0 { - tracing::info!( - tx = %id, - %key, - target_peer = %peer, - "HTL exhausted - storing locally" - ); - return true; - } - - let mut updated_skip_list = skip_list.clone(); - if let Some(addr) = own_pkloc.socket_addr() { - updated_skip_list.insert(addr); - } - - if other_distance < self_distance { - tracing::info!( - tx = %id, - %key, - from_peer = %own_pkloc, - to_peer = %peer, - contract_location = %contract_loc.0, - from_location = %own_loc.0, - to_location = %other_loc.0, - skip_list = ?updated_skip_list, - "Forwarding PUT to closer peer" - ); - } else { - tracing::info!( - tx = %id, - %key, - from_peer = %own_pkloc, - to_peer = %peer, - contract_location = %contract_loc.0, - from_location = %own_loc.0, - to_location = %other_loc.0, - skip_list = ?updated_skip_list, - "Forwarding PUT to peer despite non-improving distance (avoiding local minimum)" - ); - } - - let _ = conn_manager - .send( - peer.socket_addr() - .expect("target peer must have known address for forwarding"), - (PutMsg::PutForward { - id, - target: peer.clone(), - origin, - contract: contract.clone(), - new_value: new_value.clone(), - htl: capped_htl, - skip_list: updated_skip_list, - }) - .into(), - ) - .await; - return false; - } else { - tracing::info!( - tx = %id, - %key, - skip_list = ?skip_list, - "No peers available for forwarding - caching locally" - ); - } - true -} - mod messages { - use std::{borrow::Borrow, fmt::Display}; - - use super::*; + use std::{collections::HashSet, fmt::Display}; + use freenet_stdlib::prelude::*; use serde::{Deserialize, Serialize}; + use crate::message::{InnerMessage, Transaction}; + use crate::ring::Location; + + /// PUT operation messages. + /// + /// The PUT operation stores a contract and its initial state in the network. + /// It uses hop-by-hop routing: each node forwards toward the contract location + /// and remembers where the request came from to route the response back. + /// + /// If a PUT reaches a node that is already subscribed to the contract and the + /// merged state differs from the input, an Update operation is triggered to + /// propagate the change to other subscribers. #[derive(Debug, Serialize, Deserialize, Clone)] pub(crate) enum PutMsg { - /// Internal node instruction to find a route to the target node. - RequestPut { + /// Request to store a contract. Forwarded hop-by-hop toward contract location. + /// Each receiving node: + /// 1. Stores the contract locally (caching) + /// 2. Forwards to the next hop closer to contract location + /// 3. Remembers upstream_addr to route the response back + Request { id: Transaction, - origin: PeerKeyLocation, contract: ContractContainer, #[serde(deserialize_with = "RelatedContracts::deser_related_contracts")] related_contracts: RelatedContracts<'static>, value: WrappedState, - /// max hops to live - htl: usize, - target: PeerKeyLocation, - }, - /// Internal node instruction to await the result of a put. - AwaitPut { id: Transaction }, - /// Forward a contract and it's latest value to an other node - PutForward { - id: Transaction, - target: PeerKeyLocation, - origin: PeerKeyLocation, - contract: ContractContainer, - new_value: WrappedState, - /// current htl, reduced by one at each hop + /// Hops to live - decremented at each hop, request fails if reaches 0 htl: usize, + /// Addresses to skip when selecting next hop (prevents loops) skip_list: HashSet, }, - /// Value successfully inserted/updated. - SuccessfulPut { - id: Transaction, - target: PeerKeyLocation, - key: ContractKey, - origin: PeerKeyLocation, - }, - /// Target the node which is closest to the key - SeekNode { - id: Transaction, - target: PeerKeyLocation, - origin: PeerKeyLocation, - value: WrappedState, - contract: ContractContainer, - #[serde(deserialize_with = "RelatedContracts::deser_related_contracts")] - related_contracts: RelatedContracts<'static>, - /// max hops to live - htl: usize, - }, - /// Internal node instruction that a change (either a first time insert or an update). - Broadcasting { - id: Transaction, - broadcasted_to: usize, - broadcast_to: Vec, - key: ContractKey, - new_value: WrappedState, - contract: ContractContainer, - upstream: PeerKeyLocation, - origin: PeerKeyLocation, - }, - /// Broadcasting a change to a peer, which then will relay the changes to other peers. - BroadcastTo { - id: Transaction, - origin: PeerKeyLocation, - key: ContractKey, - new_value: WrappedState, - contract: ContractContainer, - target: PeerKeyLocation, - }, + /// Response indicating the PUT completed. Routed hop-by-hop back to originator + /// using each node's stored upstream_addr. + Response { id: Transaction, key: ContractKey }, } impl InnerMessage for PutMsg { fn id(&self) -> &Transaction { match self { - Self::SeekNode { id, .. } => id, - Self::RequestPut { id, .. } => id, - Self::Broadcasting { id, .. } => id, - Self::SuccessfulPut { id, .. } => id, - Self::PutForward { id, .. } => id, - Self::AwaitPut { id } => id, - Self::BroadcastTo { id, .. } => id, - } - } - - fn target(&self) -> Option> { - match self { - Self::SeekNode { target, .. } => Some(target), - Self::RequestPut { target, .. } => Some(target), - Self::SuccessfulPut { target, .. } => Some(target), - Self::PutForward { target, .. } => Some(target), - Self::BroadcastTo { target, .. } => Some(target), - _ => None, + Self::Request { id, .. } | Self::Response { id, .. } => id, } } fn requested_location(&self) -> Option { match self { - Self::SeekNode { contract, .. } => Some(Location::from(contract.id())), - Self::RequestPut { contract, .. } => Some(Location::from(contract.id())), - Self::Broadcasting { key, .. } => Some(Location::from(key.id())), - Self::PutForward { contract, .. } => Some(Location::from(contract.id())), - Self::BroadcastTo { key, .. } => Some(Location::from(key.id())), - _ => None, - } - } - } - - impl PutMsg { - // sender() method removed - use connection-based routing via source_addr instead - - /// Returns the socket address of the target peer for routing. - /// Used by OperationResult to determine where to send the message. - pub fn target_addr(&self) -> Option { - match self { - Self::SeekNode { target, .. } - | Self::RequestPut { target, .. } - | Self::SuccessfulPut { target, .. } - | Self::PutForward { target, .. } - | Self::BroadcastTo { target, .. } => target.socket_addr(), - // AwaitPut and Broadcasting are internal messages, no network target - Self::AwaitPut { .. } | Self::Broadcasting { .. } => None, + Self::Request { contract, .. } => Some(Location::from(contract.id())), + Self::Response { key, .. } => Some(Location::from(key.id())), } } } impl Display for PutMsg { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let id = self.id(); match self { - Self::SeekNode { .. } => write!(f, "SeekNode(id: {id})"), - Self::RequestPut { .. } => write!(f, "RequestPut(id: {id})"), - Self::Broadcasting { .. } => write!(f, "Broadcasting(id: {id})"), - Self::SuccessfulPut { .. } => write!(f, "SuccessfulPut(id: {id})"), - Self::PutForward { .. } => write!(f, "PutForward(id: {id})"), - Self::AwaitPut { .. } => write!(f, "AwaitPut(id: {id})"), - Self::BroadcastTo { .. } => write!(f, "BroadcastTo(id: {id})"), + Self::Request { + id, contract, htl, .. + } => { + write!( + f, + "PutRequest(id: {}, key: {}, htl: {})", + id, + contract.key(), + htl + ) + } + Self::Response { id, key } => { + write!(f, "PutResponse(id: {}, key: {})", id, key) + } } } } @@ -1730,7 +725,7 @@ mod messages { mod tests { use super::*; use crate::message::Transaction; - use crate::operations::test_utils::{make_contract_key, make_peer}; + use crate::operations::test_utils::make_contract_key; fn make_put_op(state: Option) -> PutOp { PutOp { @@ -1762,20 +757,14 @@ mod tests { } #[test] - fn put_op_not_finalized_when_received_request() { - let op = make_put_op(Some(PutState::ReceivedRequest)); - assert!( - !op.finalized(), - "PutOp should not be finalized in ReceivedRequest state" - ); - } - - #[test] - fn put_op_not_finalized_when_broadcast_ongoing() { - let op = make_put_op(Some(PutState::BroadcastOngoing)); + fn put_op_not_finalized_when_awaiting_response() { + let op = make_put_op(Some(PutState::AwaitingResponse { + subscribe: false, + next_hop: None, + })); assert!( !op.finalized(), - "PutOp should not be finalized in BroadcastOngoing state" + "PutOp should not be finalized in AwaitingResponse state" ); } @@ -1802,7 +791,10 @@ mod tests { #[test] fn put_op_to_host_result_error_when_not_finished() { - let op = make_put_op(Some(PutState::ReceivedRequest)); + let op = make_put_op(Some(PutState::AwaitingResponse { + subscribe: false, + next_hop: None, + })); let result = op.to_host_result(); assert!( result.is_err(), @@ -1834,56 +826,37 @@ mod tests { #[test] fn put_op_is_not_completed_when_in_progress() { - let op = make_put_op(Some(PutState::ReceivedRequest)); + let op = make_put_op(Some(PutState::AwaitingResponse { + subscribe: false, + next_hop: None, + })); assert!( !op.is_completed(), - "is_completed should return false for ReceivedRequest state" + "is_completed should return false for AwaitingResponse state" ); } // Tests for PutMsg helper methods - #[test] - fn put_msg_target_addr_returns_socket_for_successful_put() { - let target = make_peer(5000); - let origin = make_peer(6000); - let msg = PutMsg::SuccessfulPut { - id: Transaction::new::(), - target: target.clone(), - key: make_contract_key(1), - origin, - }; - assert_eq!( - msg.target_addr(), - target.socket_addr(), - "target_addr should return target's socket address for SuccessfulPut" - ); - } - - #[test] - fn put_msg_target_addr_returns_none_for_await_put() { - let msg = PutMsg::AwaitPut { - id: Transaction::new::(), - }; - assert!( - msg.target_addr().is_none(), - "target_addr should return None for AwaitPut message" - ); - } - #[test] fn put_msg_id_returns_transaction() { let tx = Transaction::new::(); - let msg = PutMsg::AwaitPut { id: tx }; + let msg = PutMsg::Response { + id: tx, + key: make_contract_key(1), + }; assert_eq!(*msg.id(), tx, "id() should return the transaction ID"); } #[test] fn put_msg_display_formats_correctly() { let tx = Transaction::new::(); - let msg = PutMsg::AwaitPut { id: tx }; + let msg = PutMsg::Response { + id: tx, + key: make_contract_key(1), + }; let display = format!("{}", msg); assert!( - display.contains("AwaitPut"), + display.contains("PutResponse"), "Display should contain message type name" ); } diff --git a/crates/core/src/operations/subscribe.rs b/crates/core/src/operations/subscribe.rs index 474d96e94..b1819c23b 100644 --- a/crates/core/src/operations/subscribe.rs +++ b/crates/core/src/operations/subscribe.rs @@ -7,10 +7,9 @@ use super::{get, OpEnum, OpError, OpInitialization, OpOutcome, Operation, Operat use crate::node::IsOperationCompleted; use crate::{ client_events::HostResult, - contract::{ContractHandlerEvent, StoreResponse}, message::{InnerMessage, NetMessage, Transaction}, node::{NetworkBridge, OpManager}, - ring::{Location, PeerKeyLocation, RingError}, + ring::{Location, RingError}, }; use freenet_stdlib::{ client_api::{ContractResponse, ErrorKind, HostResponse}, @@ -19,9 +18,6 @@ use freenet_stdlib::{ use serde::{Deserialize, Serialize}; use tokio::time::{sleep, Duration}; -const MAX_RETRIES: usize = 10; -const LOCAL_FETCH_TIMEOUT_MS: u64 = 1_500; -const LOCAL_FETCH_POLL_INTERVAL_MS: u64 = 25; /// Timeout for waiting on contract storage notification. /// Used when a subscription arrives before the contract has been propagated via PUT. const CONTRACT_WAIT_TIMEOUT_MS: u64 = 2_000; @@ -63,93 +59,34 @@ async fn wait_for_contract_with_timeout( super::has_contract(op_manager, key).await } -fn subscribers_snapshot(op_manager: &OpManager, key: &ContractKey) -> Vec { - op_manager - .ring - .subscribers_of(key) - .map(|subs| { - subs.iter() - .filter_map(|loc| loc.socket_addr()) - .map(|addr| format!("{:.8}", addr)) - .collect::>() - }) - .unwrap_or_default() -} - -/// Poll local storage for a short period until the fetched contract becomes available. -async fn wait_for_local_contract( - op_manager: &OpManager, - key: ContractKey, -) -> Result { - let mut elapsed = 0; - while elapsed < LOCAL_FETCH_TIMEOUT_MS { - if super::has_contract(op_manager, key).await? { - return Ok(true); - } - sleep(Duration::from_millis(LOCAL_FETCH_POLL_INTERVAL_MS)).await; - elapsed += LOCAL_FETCH_POLL_INTERVAL_MS; - } - Ok(false) -} - async fn fetch_contract_if_missing( op_manager: &OpManager, key: ContractKey, ) -> Result<(), OpError> { - if has_contract_with_code(op_manager, key).await? { + if super::has_contract(op_manager, key).await? { return Ok(()); } + // Start a GET operation to fetch the contract let get_op = get::start_op(key, true, false); get::request_get(op_manager, get_op, HashSet::new()).await?; - if wait_for_local_contract(op_manager, key).await? - && has_contract_with_code(op_manager, key).await? - { - Ok(()) - } else { - Err(RingError::NoCachingPeers(key).into()) - } -} + // Wait for contract to arrive + wait_for_contract_with_timeout(op_manager, key, CONTRACT_WAIT_TIMEOUT_MS).await?; -async fn has_contract_with_code(op_manager: &OpManager, key: ContractKey) -> Result { - match op_manager - .notify_contract_handler(ContractHandlerEvent::GetQuery { - key, - return_contract_code: true, - }) - .await? - { - ContractHandlerEvent::GetResponse { - response: - Ok(StoreResponse { - state: Some(_), - contract: Some(_), - }), - .. - } => Ok(true), - _ => Ok(false), - } + Ok(()) } #[derive(Debug)] enum SubscribeState { /// Prepare the request to subscribe. - PrepareRequest { - id: Transaction, - key: ContractKey, - }, - /// Received a request to subscribe to this network. - ReceivedRequest, - /// Awaitinh response from petition. + PrepareRequest { id: Transaction, key: ContractKey }, + /// Awaiting response from downstream peer. AwaitingResponse { - skip_list: HashSet, - retries: usize, - upstream_subscriber: Option, - current_hop: usize, - }, - Completed { - key: ContractKey, + /// The target we're sending to (for hop-by-hop routing) + next_hop: Option, }, + /// Subscription completed. + Completed { key: ContractKey }, } pub(crate) struct SubscribeResult {} @@ -191,129 +128,70 @@ pub(crate) async fn request_subscribe( op_manager: &OpManager, sub_op: SubscribeOp, ) -> Result<(), OpError> { - if let Some(SubscribeState::PrepareRequest { id, key }) = &sub_op.state { - let own_loc = op_manager.ring.connection_manager.own_location(); - let local_has_contract = super::has_contract(op_manager, *key).await?; - - let own_addr = own_loc - .socket_addr() - .expect("own location must have socket address"); - tracing::debug!( - tx = %id, - %key, - subscriber_peer = %own_addr, - local_has_contract, - "subscribe: request_subscribe invoked" - ); - - let mut skip_list: HashSet = HashSet::new(); - skip_list.insert(own_addr); - - // Use k_closest_potentially_caching to try multiple candidates - // Try up to 3 candidates - let candidates = op_manager - .ring - .k_closest_potentially_caching(key, &skip_list, 3); - - if tracing::enabled!(tracing::Level::INFO) { - let skip_display: Vec = skip_list - .iter() - .map(|addr| format!("{:.8}", addr)) - .collect(); - let candidate_display: Vec = candidates - .iter() - .filter_map(|cand| cand.socket_addr()) - .map(|addr| format!("{:.8}", addr)) - .collect(); - tracing::info!( - tx = %id, - %key, - skip = ?skip_display, - candidates = ?candidate_display, - "subscribe: k_closest_potentially_caching results" - ); - } - - let target = match candidates.first() { - Some(peer) => peer.clone(), - None => { - // No remote peers available - rely on local contract if present. - tracing::debug!( - %key, - "No remote peers available for subscription, checking locally" - ); - - if local_has_contract { - tracing::info!( - %key, - tx = %id, - "No remote peers, fulfilling subscription locally" - ); - return complete_local_subscription(op_manager, *id, *key).await; - } else { - let connection_count = op_manager.ring.connection_manager.num_connections(); - let subscribers = op_manager - .ring - .subscribers_of(key) - .map(|subs| { - subs.value() - .iter() - .filter_map(|loc| loc.socket_addr()) - .map(|addr| format!("{:.8}", addr)) - .collect::>() - }) - .unwrap_or_default(); - tracing::warn!( - %key, - tx = %id, - connection_count, - subscribers = ?subscribers, - "Contract not available locally and no remote peers" - ); - return Err(RingError::NoCachingPeers(*key).into()); - } - } - }; - - // Forward to remote peer - let new_state = Some(SubscribeState::AwaitingResponse { - skip_list, - retries: 0, - current_hop: op_manager.ring.max_hops_to_live, - upstream_subscriber: None, - }); - let target_addr = target - .socket_addr() - .expect("target must have socket address"); - tracing::debug!( - tx = %id, - %key, - target_peer = %target_addr, - target_location = ?target.location(), - "subscribe: forwarding RequestSub to target peer" - ); - // Create subscriber with PeerAddr::Unknown - the subscriber doesn't know their own - // external address (especially behind NAT). The first recipient (gateway) - // will fill this in from the packet source address. - let subscriber = PeerKeyLocation::with_unknown_addr(own_loc.pub_key().clone()); - let msg = SubscribeMsg::RequestSub { - id: *id, - key: *key, - target, - subscriber, - }; - let op = SubscribeOp { - id: *id, - state: new_state, - upstream_addr: sub_op.upstream_addr, - }; - op_manager - .notify_op_change(NetMessage::from(msg), OpEnum::Subscribe(op)) - .await?; - } else { + let Some(SubscribeState::PrepareRequest { id, key }) = &sub_op.state else { return Err(OpError::UnexpectedOpState); + }; + + let own_loc = op_manager.ring.connection_manager.own_location(); + let own_addr = own_loc + .socket_addr() + .expect("own location must have socket address"); + + tracing::debug!(tx = %id, %key, "subscribe: request_subscribe invoked"); + + // Check if we already have the contract locally + let has_contract_locally = super::has_contract(op_manager, *key).await?; + + // Find a peer to forward the request to (needed even if we have contract locally) + let mut skip_list: HashSet = HashSet::new(); + skip_list.insert(own_addr); + + let candidates = op_manager + .ring + .k_closest_potentially_caching(key, &skip_list, 3); + + // If we have the contract locally but no remote peers, complete locally only + if has_contract_locally && candidates.is_empty() { + tracing::info!(tx = %id, %key, "Contract available locally, no remote peers, completing subscription locally"); + return complete_local_subscription(op_manager, *id, *key).await; } + let Some(target) = candidates.first() else { + tracing::warn!(tx = %id, %key, "No remote peers available for subscription"); + return Err(RingError::NoCachingPeers(*key).into()); + }; + + let target_addr = target + .socket_addr() + .expect("target must have socket address"); + skip_list.insert(target_addr); + + tracing::debug!( + tx = %id, + %key, + target_peer = %target_addr, + "subscribe: forwarding Request to target peer" + ); + + let msg = SubscribeMsg::Request { + id: *id, + key: *key, + htl: op_manager.ring.max_hops_to_live, + skip_list, + }; + + let op = SubscribeOp { + id: *id, + state: Some(SubscribeState::AwaitingResponse { + next_hop: Some(target_addr), + }), + upstream_addr: None, // We're the originator + }; + + op_manager + .notify_op_change(NetMessage::from(msg), OpEnum::Subscribe(op)) + .await?; + Ok(()) } @@ -387,6 +265,15 @@ impl SubscribeOp { .into()) } } + + /// Get the next hop address if this operation is in a state that needs to send + /// an outbound message. Used for hop-by-hop routing. + pub(crate) fn get_next_hop_addr(&self) -> Option { + match &self.state { + Some(SubscribeState::AwaitingResponse { next_hop }) => *next_hop, + _ => None, + } + } } impl Operation for SubscribeOp { @@ -402,7 +289,7 @@ impl Operation for SubscribeOp { match op_manager.pop(msg.id()) { Ok(Some(OpEnum::Subscribe(subscribe_op))) => { - // was an existing operation, the other peer messaged back + // Existing operation - response from downstream peer Ok(OpInitialization { op: subscribe_op, source_addr, @@ -413,12 +300,14 @@ impl Operation for SubscribeOp { Err(OpError::OpNotPresent(id)) } Ok(None) => { - // new request to subscribe to a contract, initialize the machine + // New request from another peer - we're an intermediate/terminal node Ok(OpInitialization { op: Self { - state: Some(SubscribeState::ReceivedRequest), id, - upstream_addr: source_addr, // Connection-based routing: store who sent us this request + state: Some(SubscribeState::AwaitingResponse { + next_hop: None, // Will be determined during processing + }), + upstream_addr: source_addr, // Store who sent us this request }, source_addr, }) @@ -436,760 +325,229 @@ impl Operation for SubscribeOp { _conn_manager: &'a mut NB, op_manager: &'a OpManager, input: &'a Self::Message, - source_addr: Option, + _source_addr: Option, ) -> Pin> + Send + 'a>> { Box::pin(async move { - // Look up sender's PeerKeyLocation from source address for logging/routing - // This replaces the sender field that was previously embedded in messages - let sender_from_addr = source_addr.and_then(|addr| { - op_manager - .ring - .connection_manager - .get_peer_location_by_addr(addr) - }); - - let return_msg; - let new_state; + let id = self.id; match input { - SubscribeMsg::RequestSub { + SubscribeMsg::Request { id, key, - target: _, - subscriber, + htl, + skip_list, } => { - // Fill in subscriber's external address from transport layer if unknown. - // This is the key step where the first recipient (gateway) determines the - // subscriber's external address from the actual packet source address. - // IMPORTANT: Must fill address BEFORE any .peer() calls to avoid panic. - let mut subscriber = subscriber.clone(); - - if subscriber.peer_addr.is_unknown() { - if let Some(addr) = source_addr { - subscriber.set_addr(addr); - tracing::debug!( - tx = %id, - %key, - subscriber_addr = %addr, - "subscribe: filled subscriber address from source_addr" - ); - } - } - - let subscriber_addr = subscriber - .socket_addr() - .expect("subscriber must have socket address after filling"); tracing::debug!( tx = %id, %key, - subscriber = %subscriber_addr, - source_addr = ?source_addr, - "subscribe: processing RequestSub" + htl, + upstream_addr = ?self.upstream_addr, + "subscribe: processing Request" ); - let own_loc = op_manager.ring.connection_manager.own_location(); - - if !matches!( - self.state, - Some(SubscribeState::AwaitingResponse { .. }) - | Some(SubscribeState::ReceivedRequest) - ) { - tracing::warn!( - tx = %id, - %key, - state = ?self.state, - "subscribe: RequestSub received in unexpected state" - ); - return Err(OpError::invalid_transition(self.id)); - } + // Check if we have the contract if super::has_contract(op_manager, *key).await? { - let before_direct = subscribers_snapshot(op_manager, key); - tracing::info!( - tx = %id, - %key, - subscriber = %subscriber_addr, - subscribers_before = ?before_direct, - "subscribe: handling RequestSub locally (contract available)" - ); - - // Local registration - no upstream NAT address - if op_manager - .ring - .add_subscriber(key, subscriber.clone(), None) - .is_err() - { - tracing::warn!( - tx = %id, - %key, - subscriber = %subscriber_addr, - subscribers_before = ?before_direct, - "subscribe: direct registration failed (max subscribers reached)" - ); - let return_msg = SubscribeMsg::ReturnSub { - id: *id, - key: *key, - target: subscriber.clone(), - subscribed: false, - }; - return Ok(OperationResult { - target_addr: return_msg.target_addr(), - return_msg: Some(NetMessage::from(return_msg)), - state: None, - }); - } - - let after_direct = subscribers_snapshot(op_manager, key); - tracing::info!( - tx = %id, - %key, - subscriber = %subscriber_addr, - subscribers_after = ?after_direct, - "subscribe: registered direct subscriber (RequestSub)" - ); - - let own_addr = own_loc - .socket_addr() - .expect("own location must have socket address"); - if subscriber_addr == own_addr { - tracing::debug!( - tx = %id, - %key, - "RequestSub originated locally; sending LocalSubscribeComplete" - ); - if let Err(err) = op_manager - .notify_node_event( - crate::message::NodeEvent::LocalSubscribeComplete { - tx: *id, - key: *key, - subscribed: true, - }, - ) - .await + // We have the contract - register upstream as subscriber and respond + if let Some(upstream_addr) = self.upstream_addr { + // Register the upstream peer as subscriber + if let Some(upstream_peer) = op_manager + .ring + .connection_manager + .get_peer_location_by_addr(upstream_addr) { - tracing::error!( - tx = %id, - %key, - error = %err, - "Failed to send LocalSubscribeComplete event for RequestSub" + let _ = op_manager.ring.add_subscriber( + key, + upstream_peer, + Some(upstream_addr.into()), ); - return Err(err); } - return build_op_result(self.id, None, None, self.upstream_addr); + tracing::info!(tx = %id, %key, "Subscription fulfilled, sending Response"); + return Ok(OperationResult { + return_msg: Some(NetMessage::from(SubscribeMsg::Response { + id: *id, + key: *key, + subscribed: true, + })), + next_hop: Some(upstream_addr), + state: None, + }); + } else { + // We're the originator and have the contract locally + complete_local_subscription(op_manager, *id, *key).await?; + return Ok(OperationResult { + return_msg: None, + next_hop: None, + state: None, + }); } - - let return_msg = SubscribeMsg::ReturnSub { - id: *id, - key: *key, - target: subscriber.clone(), - subscribed: true, - }; - - return build_op_result( - self.id, - None, - Some(return_msg), - self.upstream_addr, - ); } - // Contract not found locally. Wait briefly in case a PUT is in flight. - tracing::debug!( - tx = %id, - %key, - "subscribe: contract not found, waiting for possible in-flight PUT" - ); - - // Wait for contract with timeout (handles race conditions internally) + // Contract not found - wait briefly for in-flight PUT if wait_for_contract_with_timeout(op_manager, *key, CONTRACT_WAIT_TIMEOUT_MS) .await? { - tracing::info!( - tx = %id, - %key, - "subscribe: contract arrived, handling locally" - ); - - // Contract exists, register subscription - if op_manager - .ring - .add_subscriber(key, subscriber.clone(), None) - .is_err() - { - let return_msg = SubscribeMsg::ReturnSub { - id: *id, - key: *key, - target: subscriber.clone(), - subscribed: false, - }; + // Contract arrived - handle same as above + if let Some(upstream_addr) = self.upstream_addr { + if let Some(upstream_peer) = op_manager + .ring + .connection_manager + .get_peer_location_by_addr(upstream_addr) + { + let _ = op_manager.ring.add_subscriber( + key, + upstream_peer, + Some(upstream_addr.into()), + ); + } + return Ok(OperationResult { - target_addr: return_msg.target_addr(), - return_msg: Some(NetMessage::from(return_msg)), + return_msg: Some(NetMessage::from(SubscribeMsg::Response { + id: *id, + key: *key, + subscribed: true, + })), + next_hop: Some(upstream_addr), + state: None, + }); + } else { + complete_local_subscription(op_manager, *id, *key).await?; + return Ok(OperationResult { + return_msg: None, + next_hop: None, state: None, }); } - - let return_msg = SubscribeMsg::ReturnSub { - id: *id, - key: *key, - target: subscriber.clone(), - subscribed: true, - }; - return build_op_result( - self.id, - None, - Some(return_msg), - self.upstream_addr, - ); } - // Contract still not found after waiting, try to forward - tracing::debug!( - tx = %id, - %key, - "subscribe: contract not found after waiting, attempting to forward" - ); - - let own_addr = own_loc - .socket_addr() - .expect("own location must have socket address"); - let mut skip = HashSet::new(); - skip.insert(subscriber_addr); - skip.insert(own_addr); - - let forward_target = op_manager - .ring - .k_closest_potentially_caching(key, &skip, 3) - .into_iter() - .find(|candidate| { - candidate - .socket_addr() - .map(|addr| addr != own_addr) - .unwrap_or(false) - }); - - // If no forward target available, send ReturnSub(subscribed: false) back - // This allows the subscriber to complete locally if they have the contract - let forward_target = match forward_target { - Some(target) => target, - None => { - tracing::warn!( - tx = %id, - %key, - "subscribe: no forward target available, returning unsubscribed" - ); - let return_msg = SubscribeMsg::ReturnSub { - id: *id, - key: *key, - target: subscriber.clone(), - subscribed: false, - }; + // Contract still not found - try to forward + if *htl == 0 { + tracing::warn!(tx = %id, %key, "Subscribe request exhausted HTL"); + if let Some(upstream_addr) = self.upstream_addr { return Ok(OperationResult { - target_addr: return_msg.target_addr(), - return_msg: Some(NetMessage::from(return_msg)), + return_msg: Some(NetMessage::from(SubscribeMsg::Response { + id: *id, + key: *key, + subscribed: false, + })), + next_hop: Some(upstream_addr), state: None, }); } - }; - - let forward_target_addr = forward_target - .socket_addr() - .expect("forward target must have socket address"); - skip.insert(forward_target_addr); - - new_state = self.state; - return_msg = Some(SubscribeMsg::SeekNode { - id: *id, - key: *key, - target: forward_target, - subscriber: subscriber.clone(), - skip_list: skip.clone(), - htl: op_manager.ring.max_hops_to_live.max(1), - retries: 0, - }); - } - SubscribeMsg::SeekNode { - key, - id, - subscriber, - target, - skip_list, - htl, - retries, - } => { - // Fill in subscriber's external address from transport layer if unknown. - // This is the key step where the recipient determines the subscriber's - // external address from the actual packet source address. - let mut subscriber = subscriber.clone(); - if subscriber.peer_addr.is_unknown() { - if let Some(addr) = source_addr { - subscriber.set_addr(addr); - tracing::debug!( - tx = %id, - %key, - subscriber_addr = %addr, - "subscribe: filled SeekNode subscriber address from source_addr" - ); - } + return Err(RingError::NoCachingPeers(*key).into()); } - let ring_max_htl = op_manager.ring.max_hops_to_live.max(1); - let htl = (*htl).min(ring_max_htl); - let this_peer = op_manager.ring.connection_manager.own_location(); - let return_not_subbed = || -> OperationResult { - let return_msg = SubscribeMsg::ReturnSub { - key: *key, - id: *id, - subscribed: false, - target: subscriber.clone(), - }; - OperationResult { - target_addr: return_msg.target_addr(), - return_msg: Some(NetMessage::from(return_msg)), - state: None, - } - }; - - if htl == 0 { - let subscriber_addr = subscriber - .socket_addr() - .expect("subscriber must have socket address"); - tracing::warn!( - tx = %id, - %key, - subscriber = %subscriber_addr, - "Dropping Subscribe SeekNode with zero HTL" - ); - return Ok(return_not_subbed()); + // Find next hop + let own_addr = op_manager + .ring + .connection_manager + .own_location() + .socket_addr() + .expect("own address"); + let mut new_skip_list = skip_list.clone(); + new_skip_list.insert(own_addr); + if let Some(upstream) = self.upstream_addr { + new_skip_list.insert(upstream); } - if !super::has_contract(op_manager, *key).await? { - tracing::debug!(tx = %id, %key, "Contract not found, trying other peer"); - - // Use k_closest_potentially_caching to try multiple candidates - let candidates = op_manager + let candidates = + op_manager .ring - .k_closest_potentially_caching(key, skip_list, 3); - if candidates.is_empty() { - let connection_count = - op_manager.ring.connection_manager.num_connections(); - tracing::warn!( - tx = %id, - %key, - skip = ?skip_list, - connection_count, - "No remote peer available for forwarding" - ); - tracing::info!( - tx = %id, - %key, - "Attempting to fetch contract locally before aborting subscribe" - ); - - let get_op = get::start_op(*key, true, false); - if let Err(fetch_err) = - get::request_get(op_manager, get_op, HashSet::new()).await - { - tracing::warn!( - tx = %id, - %key, - error = %fetch_err, - "Failed to fetch contract locally while handling subscribe" - ); - return Ok(return_not_subbed()); - } - - if wait_for_local_contract(op_manager, *key).await? { - tracing::info!( - tx = %id, - %key, - "Fetched contract locally while handling subscribe" - ); - } else { - tracing::warn!( - tx = %id, - %key, - "Contract still unavailable locally after fetch attempt" - ); - return Ok(return_not_subbed()); - } - } else { - let Some(new_target) = candidates.first() else { - return Ok(return_not_subbed()); - }; - let new_target = new_target.clone(); - let new_htl = htl.saturating_sub(1); - - if new_htl == 0 { - tracing::debug!(tx = %id, %key, "Max number of hops reached while trying to get contract"); - return Ok(return_not_subbed()); - } - - let mut new_skip_list = skip_list.clone(); - if let Some(target_addr) = target.socket_addr() { - new_skip_list.insert(target_addr); - } + .k_closest_potentially_caching(key, &new_skip_list, 3); - let new_target_addr = new_target - .socket_addr() - .expect("new target must have socket address"); - let subscriber_addr = subscriber - .socket_addr() - .expect("subscriber must have socket address"); - tracing::info!( - tx = %id, - %key, - new_target = %new_target_addr, - upstream = %subscriber_addr, - "Forward request to peer" - ); - tracing::debug!( - tx = %id, - %key, - candidates = ?candidates, - skip = ?new_skip_list, - "Forwarding seek to next candidate" - ); - // Retry seek node when the contract to subscribe has not been found in this node - return build_op_result( - *id, - Some(SubscribeState::AwaitingResponse { - skip_list: new_skip_list.clone(), - retries: *retries, - current_hop: new_htl, - upstream_subscriber: Some(subscriber.clone()), - }), - // Use PeerAddr::Unknown - the subscriber doesn't know their own - // external address (especially behind NAT). The recipient will - // fill this in from the packet source address. - (SubscribeMsg::SeekNode { + let Some(next_hop) = candidates.first() else { + // No forward target + if let Some(upstream_addr) = self.upstream_addr { + return Ok(OperationResult { + return_msg: Some(NetMessage::from(SubscribeMsg::Response { id: *id, key: *key, - subscriber: PeerKeyLocation::with_unknown_addr( - this_peer.pub_key().clone(), - ), - target: new_target, - skip_list: new_skip_list, - htl: new_htl, - retries: *retries, - }) - .into(), - self.upstream_addr, - ); + subscribed: false, + })), + next_hop: Some(upstream_addr), + state: None, + }); } - // After fetch attempt we should now have the contract locally. - } + return Err(RingError::NoCachingPeers(*key).into()); + }; - let before_direct = subscribers_snapshot(op_manager, key); - let subscriber_addr = subscriber - .socket_addr() - .expect("subscriber must have socket address"); - tracing::info!( - tx = %id, - %key, - subscriber = %subscriber_addr, - subscribers_before = ?before_direct, - "subscribe: attempting to register direct subscriber" - ); - // Local registration - no upstream NAT address - if op_manager - .ring - .add_subscriber(key, subscriber.clone(), None) - .is_err() - { - tracing::warn!( - tx = %id, - %key, - subscriber = %subscriber_addr, - subscribers_before = ?before_direct, - "subscribe: direct registration failed (max subscribers reached)" - ); - // max number of subscribers for this contract reached - return Ok(return_not_subbed()); - } - let after_direct = subscribers_snapshot(op_manager, key); - tracing::info!( - tx = %id, - %key, - subscriber = %subscriber_addr, - subscribers_after = ?after_direct, - "subscribe: registered direct subscriber" - ); + let next_addr = next_hop.socket_addr().expect("next hop address"); + new_skip_list.insert(next_addr); - match self.state { - Some(SubscribeState::ReceivedRequest) => { - tracing::info!( - tx = %id, - %key, - subscriber = %subscriber_addr, - "Peer successfully subscribed to contract", - ); - new_state = None; - return_msg = Some(SubscribeMsg::ReturnSub { - target: subscriber.clone(), - id: *id, - key: *key, - subscribed: true, - }); - } - _ => return Err(OpError::invalid_transition(self.id)), - } + tracing::debug!(tx = %id, %key, next = %next_addr, "Forwarding subscribe request"); + + Ok(OperationResult { + return_msg: Some(NetMessage::from(SubscribeMsg::Request { + id: *id, + key: *key, + htl: htl.saturating_sub(1), + skip_list: new_skip_list, + })), + next_hop: Some(next_addr), + state: Some(OpEnum::Subscribe(SubscribeOp { + id: *id, + state: Some(SubscribeState::AwaitingResponse { + next_hop: None, // Already routing via next_hop in OperationResult + }), + upstream_addr: self.upstream_addr, + })), + }) } - SubscribeMsg::ReturnSub { - subscribed: false, + + SubscribeMsg::Response { + id: msg_id, key, - target: _, - id, + subscribed, } => { - // Get sender from connection-based routing for skip list and logging - let sender = sender_from_addr - .clone() - .expect("ReturnSub requires source_addr"); - let sender_addr = sender - .socket_addr() - .expect("sender must have socket address"); - tracing::warn!( - tx = %id, + tracing::debug!( + tx = %msg_id, %key, - potential_provider = %sender_addr, - "Contract not found at potential subscription provider", + subscribed, + upstream_addr = ?self.upstream_addr, + "subscribe: processing Response" ); - // will error out in case it has reached max number of retries - match self.state { - Some(SubscribeState::AwaitingResponse { - mut skip_list, - retries, - upstream_subscriber, - current_hop, - }) => { - if retries < MAX_RETRIES { - skip_list.insert(sender_addr); - // Use k_closest_potentially_caching to try multiple candidates - let candidates = op_manager - .ring - .k_closest_potentially_caching(key, &skip_list, 3); - if let Some(target) = candidates.first() { - // Use PeerAddr::Unknown - the subscriber doesn't know their own - // external address (especially behind NAT). The recipient will - // fill this in from the packet source address. - let own_loc = op_manager.ring.connection_manager.own_location(); - let subscriber = PeerKeyLocation::with_unknown_addr( - own_loc.pub_key().clone(), - ); - return_msg = Some(SubscribeMsg::SeekNode { - id: *id, - key: *key, - subscriber, - target: target.clone(), - skip_list: skip_list.clone(), - htl: current_hop, - retries: retries + 1, - }); - } else { - // No more candidates - try to complete locally as fallback - if super::has_contract(op_manager, *key).await? { - tracing::info!( - tx = %id, - %key, - "No remote peers, completing subscription locally as fallback" - ); - complete_local_subscription(op_manager, *id, *key).await?; - return Ok(OperationResult { - return_msg: None, - target_addr: None, - state: None, - }); - } - return Err(RingError::NoCachingPeers(*key).into()); - } - new_state = Some(SubscribeState::AwaitingResponse { - skip_list, - retries: retries + 1, - upstream_subscriber, - current_hop, - }); - } else { - return Err(OpError::MaxRetriesExceeded( - *id, - id.transaction_type(), - )); - } - } - _ => return Err(OpError::invalid_transition(self.id)), - } - } - SubscribeMsg::ReturnSub { - subscribed: true, - key, - id, - target, - } => match self.state { - Some(SubscribeState::AwaitingResponse { - upstream_subscriber, - .. - }) => { - // Get sender from connection-based routing for logging - let sender = sender_from_addr - .clone() - .expect("ReturnSub requires source_addr"); - fetch_contract_if_missing(op_manager, *key).await?; - let target_addr = target - .socket_addr() - .expect("target must have socket address"); - let sender_addr = sender - .socket_addr() - .expect("sender must have socket address"); - tracing::info!( - tx = %id, - %key, - this_peer = %target_addr, - provider = %sender_addr, - "Subscribed to contract" - ); - tracing::info!( - tx = %id, - %key, - upstream = upstream_subscriber - .as_ref() - .and_then(|loc| loc.socket_addr()) - .map(|addr| format!("{:.8}", addr)) - .unwrap_or_else(|| "".into()), - "Handling ReturnSub (subscribed=true)" - ); - if let Some(upstream_subscriber) = upstream_subscriber.as_ref() { - let before_upstream = subscribers_snapshot(op_manager, key); - let upstream_addr = upstream_subscriber - .socket_addr() - .expect("upstream subscriber must have socket address"); - tracing::info!( - tx = %id, - %key, - upstream = %upstream_addr, - subscribers_before = ?before_upstream, - "subscribe: attempting to register upstream link" - ); - // Local registration - no upstream NAT address - if op_manager - .ring - .add_subscriber(key, upstream_subscriber.clone(), None) - .is_err() - { - tracing::warn!( - tx = %id, - %key, - upstream = %upstream_addr, - subscribers_before = ?before_upstream, - "subscribe: upstream registration failed (max subscribers reached)" - ); - } else { - let after_upstream = subscribers_snapshot(op_manager, key); - tracing::info!( - tx = %id, - %key, - upstream = %upstream_addr, - subscribers_after = ?after_upstream, - "subscribe: registered upstream link" - ); - } - } + if *subscribed { + // Fetch contract if we don't have it + fetch_contract_if_missing(op_manager, *key).await?; + } - let before_provider = subscribers_snapshot(op_manager, key); - tracing::info!( - tx = %id, - %key, - provider = %sender_addr, - subscribers_before = ?before_provider, - "subscribe: registering provider/subscription source" - ); - // Local registration - no upstream NAT address - if op_manager - .ring - .add_subscriber(key, sender.clone(), None) - .is_err() - { - // concurrently it reached max number of subscribers for this contract - tracing::debug!( - tx = %id, - %key, - "Max number of subscribers reached for contract" - ); - return Err(OpError::UnexpectedOpState); - } - let after_provider = subscribers_snapshot(op_manager, key); - tracing::info!( - tx = %id, - %key, - provider = %sender_addr, - subscribers_after = ?after_provider, - "subscribe: registered provider/subscription source" - ); - - new_state = Some(SubscribeState::Completed { key: *key }); - if let Some(upstream_subscriber) = upstream_subscriber { - let upstream_addr = upstream_subscriber - .socket_addr() - .expect("upstream subscriber must have socket address"); - tracing::debug!( - tx = %id, - %key, - upstream_subscriber = %upstream_addr, - "Forwarding subscription to upstream subscriber" - ); - return_msg = Some(SubscribeMsg::ReturnSub { - id: *id, + // Forward response upstream or complete + if let Some(upstream_addr) = self.upstream_addr { + // We're an intermediate node - forward response upstream + tracing::debug!(tx = %msg_id, %key, upstream = %upstream_addr, "Forwarding response upstream"); + Ok(OperationResult { + return_msg: Some(NetMessage::from(SubscribeMsg::Response { + id: *msg_id, key: *key, - target: upstream_subscriber, - subscribed: true, - }); - } else { - tracing::debug!( - tx = %id, - %key, - "No upstream subscriber, subscription completed" - ); - return_msg = None; - } - } - _other => { - return Err(OpError::invalid_transition(self.id)); + subscribed: *subscribed, + })), + next_hop: Some(upstream_addr), + state: Some(OpEnum::Subscribe(SubscribeOp { + id, + state: Some(SubscribeState::Completed { key: *key }), + upstream_addr: None, + })), + }) + } else { + // We're the originator - complete the operation + tracing::info!(tx = %msg_id, %key, subscribed, "Subscribe completed (originator)"); + Ok(OperationResult { + return_msg: None, + next_hop: None, + state: Some(OpEnum::Subscribe(SubscribeOp { + id, + state: Some(SubscribeState::Completed { key: *key }), + upstream_addr: None, + })), + }) } - }, - _ => return Err(OpError::UnexpectedOpState), + } } - - build_op_result(self.id, new_state, return_msg, self.upstream_addr) }) } } -fn build_op_result( - id: Transaction, - state: Option, - msg: Option, - upstream_addr: Option, -) -> Result { - // For response messages (ReturnSub), use upstream_addr directly for routing. - // This is more reliable than extracting from the message's target field, which - // may have been looked up from connection_manager (subject to race conditions). - // For forward messages (SeekNode, RequestSub, FetchRouting), use the message's target. - let target_addr = match &msg { - Some(SubscribeMsg::ReturnSub { .. }) => upstream_addr, - _ => msg.as_ref().and_then(|m| m.target_addr()), - }; - - let output_op = state.map(|state| SubscribeOp { - id, - state: Some(state), - upstream_addr, - }); - Ok(OperationResult { - return_msg: msg.map(NetMessage::from), - target_addr, - state: output_op.map(OpEnum::Subscribe), - }) -} - impl IsOperationCompleted for SubscribeOp { fn is_completed(&self) -> bool { matches!(self.state, Some(SubscribeState::Completed { .. })) @@ -1200,35 +558,29 @@ impl IsOperationCompleted for SubscribeOp { mod tests; mod messages { - use std::{borrow::Borrow, fmt::Display}; + use std::fmt::Display; use super::*; + /// Subscribe operation messages. + /// + /// Uses hop-by-hop routing: each node stores `upstream_addr` from the transport layer + /// to route responses back. No `PeerKeyLocation` is embedded in wire messages. #[derive(Debug, Serialize, Deserialize, Clone)] pub(crate) enum SubscribeMsg { - FetchRouting { - id: Transaction, - target: PeerKeyLocation, - }, - RequestSub { + /// Request to subscribe to a contract. Forwarded hop-by-hop toward contract location. + Request { id: Transaction, key: ContractKey, - target: PeerKeyLocation, - subscriber: PeerKeyLocation, - }, - SeekNode { - id: Transaction, - key: ContractKey, - target: PeerKeyLocation, - subscriber: PeerKeyLocation, - skip_list: HashSet, + /// Hops to live - decremented at each hop htl: usize, - retries: usize, + /// Addresses to skip when selecting next hop (prevents loops) + skip_list: HashSet, }, - ReturnSub { + /// Response indicating subscription result. Routed hop-by-hop back to originator. + Response { id: Transaction, key: ContractKey, - target: PeerKeyLocation, subscribed: bool, }, } @@ -1236,43 +588,15 @@ mod messages { impl InnerMessage for SubscribeMsg { fn id(&self) -> &Transaction { match self { - Self::SeekNode { id, .. } => id, - Self::FetchRouting { id, .. } => id, - Self::RequestSub { id, .. } => id, - Self::ReturnSub { id, .. } => id, - } - } - - fn target(&self) -> Option> { - match self { - Self::RequestSub { target, .. } => Some(target), - Self::SeekNode { target, .. } => Some(target), - Self::ReturnSub { target, .. } => Some(target), - _ => None, + Self::Request { id, .. } | Self::Response { id, .. } => id, } } fn requested_location(&self) -> Option { match self { - Self::SeekNode { key, .. } => Some(Location::from(key.id())), - Self::RequestSub { key, .. } => Some(Location::from(key.id())), - Self::ReturnSub { key, .. } => Some(Location::from(key.id())), - _ => None, - } - } - } - - impl SubscribeMsg { - // sender() method removed - use connection-based routing via source_addr instead - - /// Returns the socket address of the target peer for routing. - /// Used by OperationResult to determine where to send the message. - pub fn target_addr(&self) -> Option { - match self { - Self::FetchRouting { target, .. } - | Self::RequestSub { target, .. } - | Self::SeekNode { target, .. } - | Self::ReturnSub { target, .. } => target.socket_addr(), + Self::Request { key, .. } | Self::Response { key, .. } => { + Some(Location::from(key.id())) + } } } } @@ -1281,10 +605,15 @@ mod messages { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let id = self.id(); match self { - Self::SeekNode { .. } => write!(f, "SeekNode(id: {id})"), - Self::FetchRouting { .. } => write!(f, "FetchRouting(id: {id})"), - Self::RequestSub { .. } => write!(f, "RequestSub(id: {id})"), - Self::ReturnSub { .. } => write!(f, "ReturnSub(id: {id})"), + Self::Request { key, .. } => write!(f, "Subscribe::Request(id: {id}, key: {key})"), + Self::Response { + key, subscribed, .. + } => { + write!( + f, + "Subscribe::Response(id: {id}, key: {key}, subscribed: {subscribed})" + ) + } } } } diff --git a/crates/core/src/operations/subscribe/tests.rs b/crates/core/src/operations/subscribe/tests.rs index 10d21b913..d6625bf78 100644 --- a/crates/core/src/operations/subscribe/tests.rs +++ b/crates/core/src/operations/subscribe/tests.rs @@ -448,38 +448,18 @@ async fn test_subscription_validates_k_closest_usage() { ); } - // Test 3: Validate subscription state transitions maintain skip list correctly + // Test 3: Validate simplified AwaitingResponse state { - // Create AwaitingResponse state with skip list (of socket addresses) - use std::net::{IpAddr, Ipv4Addr}; - let failed_addr1 = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8001); - let failed_addr2 = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8002); - let mut skip_list: HashSet = HashSet::new(); - skip_list.insert(failed_addr1); - skip_list.insert(failed_addr2); - let op = SubscribeOp { id: transaction_id, - state: Some(SubscribeState::AwaitingResponse { - skip_list: skip_list.clone(), - retries: 2, - upstream_subscriber: None, - current_hop: 5, - }), + state: Some(SubscribeState::AwaitingResponse { next_hop: None }), upstream_addr: None, }; - // Verify skip list is maintained in state - if let Some(SubscribeState::AwaitingResponse { - skip_list: list, - retries, - .. - }) = &op.state - { - assert_eq!(list.len(), 2, "Skip list maintains failed peers"); - assert!(list.contains(&failed_addr1)); - assert!(list.contains(&failed_addr2)); - assert_eq!(*retries, 2, "Retry count is tracked"); - } + // State is simplified - skip list is now in the Request message, not state + assert!(matches!( + op.state, + Some(SubscribeState::AwaitingResponse { .. }) + )); } } diff --git a/crates/core/src/operations/update.rs b/crates/core/src/operations/update.rs index f437afeef..b483013d1 100644 --- a/crates/core/src/operations/update.rs +++ b/crates/core/src/operations/update.rs @@ -33,6 +33,15 @@ impl UpdateOp { matches!(self.state, None | Some(UpdateState::Finished { .. })) } + /// Get the next hop address for hop-by-hop routing. + /// For UPDATE, this extracts the socket address from the stats.target field. + pub fn get_next_hop_addr(&self) -> Option { + self.stats + .as_ref() + .and_then(|s| s.target.as_ref()) + .and_then(|t| t.socket_addr()) + } + pub(super) fn to_host_result(&self) -> HostResult { if let Some(UpdateState::Finished { key, summary }) = &self.state { tracing::debug!( @@ -150,12 +159,13 @@ impl Operation for UpdateOp { let return_msg; let new_state; let stats = self.stats; + // Track the next hop when forwarding RequestUpdate to another peer + let mut forward_hop: Option = None; match input { UpdateMsg::RequestUpdate { id, key, - target, related_contracts, value, } => { @@ -164,44 +174,26 @@ impl Operation for UpdateOp { .clone() .expect("RequestUpdate requires source_addr"); let self_location = op_manager.ring.connection_manager.own_location(); - let executing_addr = self_location.socket_addr(); - let request_sender_addr = request_sender.socket_addr(); - let target_addr_opt = target.socket_addr(); tracing::debug!( tx = %id, %key, executing_peer = ?executing_addr, - request_sender = ?request_sender_addr, - target_peer = ?target_addr_opt, - "UPDATE RequestUpdate: executing_peer received request from sender targeting peer" + request_sender = ?request_sender.socket_addr(), + "UPDATE RequestUpdate: processing request" ); - // If target is not us, this message is meant for another peer - // This can happen when the initiator processes its own message before sending to network - if target_addr_opt != executing_addr { - tracing::debug!( - tx = %id, - %key, - our_peer = ?executing_addr, - target_peer = ?target_addr_opt, - "RequestUpdate target is not us - message will be routed to target" - ); - // Keep current state, message will be sent to target peer via network - return_msg = None; - new_state = self.state; + // With hop-by-hop routing, if we receive the message, we process it. + // Determine if this is a local request (from our own client) or remote request + let is_local_request = + matches!(&self.state, Some(UpdateState::PrepareRequest { .. })); + let upstream = if is_local_request { + None // No upstream - we are the initiator } else { - // Target is us - process the request - // Determine if this is a local request (from our own client) or remote request - let is_local_request = - matches!(&self.state, Some(UpdateState::PrepareRequest { .. })); - let upstream = if is_local_request { - None // No upstream - we are the initiator - } else { - Some(request_sender.clone()) // Upstream is the peer that sent us this request - }; - + Some(request_sender.clone()) // Upstream is the peer that sent us this request + }; + { // First check if we have the contract locally let has_contract = match op_manager .notify_contract_handler(ContractHandlerEvent::GetQuery { @@ -333,15 +325,16 @@ impl Operation for UpdateOp { "Forwarding UPDATE to peer that might have contract" ); - // Create a SeekNode message to forward to the next hop - return_msg = Some(UpdateMsg::SeekNode { + // Forward RequestUpdate to the next hop + return_msg = Some(UpdateMsg::RequestUpdate { id: *id, - target: forward_target, - value: value.clone(), key: *key, related_contracts: related_contracts.clone(), + value: value.clone(), }); new_state = None; + // Track where to forward this message + forward_hop = Some(forward_addr); } else { // No peers available and we don't have the contract - capture context let subscribers = op_manager @@ -378,182 +371,7 @@ impl Operation for UpdateOp { } } } - UpdateMsg::SeekNode { - id, - value, - key, - related_contracts, - target: _, - } => { - // Get sender from connection-based routing - let sender = sender_from_addr - .clone() - .expect("SeekNode requires source_addr"); - // Check if we have the contract locally - let has_contract = match op_manager - .notify_contract_handler(ContractHandlerEvent::GetQuery { - key: *key, - return_contract_code: false, - }) - .await - { - Ok(ContractHandlerEvent::GetResponse { - response: Ok(StoreResponse { state: Some(_), .. }), - .. - }) => { - tracing::debug!(tx = %id, %key, "Contract exists locally for SeekNode UPDATE"); - true - } - _ => { - tracing::debug!(tx = %id, %key, "Contract not found locally for SeekNode UPDATE"); - false - } - }; - - if has_contract { - tracing::debug!("Contract found locally - handling UPDATE"); - let UpdateExecution { - value: updated_value, - summary: _summary, // summary not used here yet - changed, - } = update_contract( - op_manager, - *key, - value.clone(), - related_contracts.clone(), - ) - .await?; - let self_location = op_manager.ring.connection_manager.own_location(); - tracing::debug!( - tx = %id, - "Successfully updated a value for contract {} @ {:?} - update", - key, - self_location.location() - ); - - if !changed { - tracing::debug!( - tx = %id, - %key, - "SeekNode update produced no change, stopping propagation" - ); - new_state = None; - return_msg = None; - } else { - // Get broadcast targets - let sender_addr = sender - .socket_addr() - .expect("sender must have socket address for broadcast"); - let broadcast_to = - op_manager.get_broadcast_targets_update(key, &sender_addr); - - // If no peers to broadcast to, nothing else to do - if broadcast_to.is_empty() { - tracing::debug!( - tx = %id, - %key, - "No broadcast targets for SeekNode - completing locally" - ); - new_state = None; - return_msg = None; - } else { - // Have peers to broadcast to - use try_to_broadcast - match try_to_broadcast( - *id, - true, - op_manager, - self.state, - (broadcast_to, sender.clone()), - *key, - updated_value.clone(), - false, - ) - .await - { - Ok((state, msg)) => { - new_state = state; - return_msg = msg; - } - Err(err) => return Err(err), - } - } - } - } else { - // Contract not found - forward to another peer - let self_location = op_manager.ring.connection_manager.own_location(); - let self_addr = self_location - .socket_addr() - .expect("self location must have socket address"); - let sender_addr = sender - .socket_addr() - .expect("sender must have socket address"); - let skip_list = vec![sender_addr, self_addr]; - - let next_target = op_manager - .ring - .closest_potentially_caching(key, skip_list.as_slice()); - - if let Some(forward_target) = next_target { - let forward_addr = forward_target - .socket_addr() - .expect("forward target must have socket address"); - tracing::debug!( - tx = %id, - %key, - next_peer = %forward_addr, - "Contract not found - forwarding SeekNode to next peer" - ); - - // Forward SeekNode to the next peer - return_msg = Some(UpdateMsg::SeekNode { - id: *id, - target: forward_target, - value: value.clone(), - key: *key, - related_contracts: related_contracts.clone(), - }); - new_state = None; - } else { - // No more peers to try - capture context for diagnostics - let subscribers = op_manager - .ring - .subscribers_of(key) - .map(|subs| { - subs.value() - .iter() - .filter_map(|loc| loc.socket_addr()) - .map(|addr| format!("{:.8}", addr)) - .collect::>() - }) - .unwrap_or_default(); - let candidates = op_manager - .ring - .k_closest_potentially_caching(key, skip_list.as_slice(), 5) - .into_iter() - .filter_map(|loc| loc.socket_addr()) - .map(|addr| format!("{:.8}", addr)) - .collect::>(); - let connection_count = - op_manager.ring.connection_manager.num_connections(); - tracing::error!( - tx = %id, - %key, - subscribers = ?subscribers, - candidates = ?candidates, - connection_count, - sender = ?sender_addr, - "Cannot handle UPDATE SeekNode: contract not found and no peers to forward to" - ); - return Err(OpError::RingError(RingError::NoCachingPeers(*key))); - } - } - } - UpdateMsg::BroadcastTo { - id, - key, - new_value, - target: _, - } => { + UpdateMsg::BroadcastTo { id, key, new_value } => { // Get sender from connection-based routing let sender = sender_from_addr .clone() @@ -620,7 +438,6 @@ impl Operation for UpdateOp { broadcasted_to, key, new_value, - upstream: _upstream, .. } => { let mut broadcasted_to = *broadcasted_to; @@ -632,7 +449,6 @@ impl Operation for UpdateOp { id: *id, key: *key, new_value: new_value.clone(), - target: peer.clone(), }; let peer_addr = peer .socket_addr() @@ -678,10 +494,16 @@ impl Operation for UpdateOp { new_state = None; return_msg = None; } - _ => return Err(OpError::UnexpectedOpState), } - build_op_result(self.id, new_state, return_msg, stats, self.upstream_addr) + build_op_result( + self.id, + new_state, + return_msg, + stats, + self.upstream_addr, + forward_hop, + ) }) } } @@ -692,7 +514,7 @@ async fn try_to_broadcast( last_hop: bool, _op_manager: &OpManager, state: Option, - (broadcast_to, upstream): (Vec, PeerKeyLocation), + (broadcast_to, _upstream): (Vec, PeerKeyLocation), key: ContractKey, new_value: WrappedState, is_from_a_broadcasted_to_peer: bool, @@ -727,7 +549,6 @@ async fn try_to_broadcast( broadcasted_to: 0, broadcast_to, key, - upstream, }); } else { new_state = None; @@ -761,6 +582,7 @@ impl OpManager { let allow_self = self_addr.as_ref().map(|me| me == sender).unwrap_or(false); // Collect explicit subscribers (downstream interest) + // Only include subscribers we're currently connected to let subscribers: HashSet = self .ring .subscribers_of(key) @@ -769,6 +591,17 @@ impl OpManager { .iter() // Filter out the sender to avoid sending the update back to where it came from .filter(|pk| pk.socket_addr().as_ref() != Some(sender)) + // Only include peers we're actually connected to + .filter(|pk| { + pk.socket_addr() + .map(|addr| { + self.ring + .connection_manager + .get_peer_by_addr(addr) + .is_some() + }) + .unwrap_or(false) + }) .cloned() .collect::>() }) @@ -856,9 +689,13 @@ fn build_op_result( return_msg: Option, stats: Option, upstream_addr: Option, + forward_hop: Option, ) -> Result { - // Extract target address from the message for routing - let target_addr = return_msg.as_ref().and_then(|m| m.target_addr()); + // With hop-by-hop routing: + // - forward_hop is set when forwarding RequestUpdate to the next peer + // - Broadcasting messages have next_hop = None (they're processed locally) + // - BroadcastTo uses explicit addresses via conn_manager.send() + let next_hop = forward_hop; let output_op = state.map(|op| UpdateOp { id, @@ -869,7 +706,7 @@ fn build_op_result( let state = output_op.map(OpEnum::Update); Ok(OperationResult { return_msg: return_msg.map(NetMessage::from), - target_addr, + next_hop, state, }) } @@ -1247,26 +1084,52 @@ pub(crate) async fn request_update( stats.target = Some(target.clone()); } - deliver_update_result(op_manager, id, key, summary.clone()).await?; let msg = UpdateMsg::RequestUpdate { id, key, related_contracts, - target, value: updated_value, // Send the updated value, not the original }; + // Create operation state with target for hop-by-hop routing. + // This allows peek_next_hop_addr to find the next hop address when + // handle_notification_msg routes the outbound message. + let op_state = UpdateOp { + id, + state: Some(UpdateState::ReceivedRequest), + stats: Some(UpdateStats { + target: Some(target), + }), + upstream_addr: None, // We're the originator + }; + + // Use notify_op_change to: + // 1. Register the operation state (so peek_next_hop_addr can find the next hop) + // 2. Send the message via the event loop (which routes via network bridge) op_manager - .to_event_listener - .notifications_sender() - .send(Either::Left(NetMessage::from(msg))) + .notify_op_change(NetMessage::from(msg), OpEnum::Update(op_state)) + .await?; + + // Deliver the UPDATE result to the client (fire-and-forget semantics). + // NOTE: We do NOT call op_manager.completed() here because the operation + // needs to remain in the state map until peek_next_hop_addr can route it. + // The operation will be marked complete later when the message is processed. + let op = UpdateOp { + id, + state: Some(UpdateState::Finished { + key, + summary: summary.clone(), + }), + stats: None, + upstream_addr: None, + }; + let host_result = op.to_host_result(); + op_manager + .result_router_tx + .send((id, host_result)) .await .map_err(|error| { - tracing::error!( - tx = %id, - %error, - "Failed to enqueue UPDATE RequestUpdate message" - ); + tracing::error!(tx = %id, %error, "Failed to send UPDATE result to result router"); OpError::NotificationError })?; @@ -1332,7 +1195,7 @@ impl IsOperationCompleted for UpdateOp { } mod messages { - use std::{borrow::Borrow, fmt::Display}; + use std::fmt::Display; use freenet_stdlib::prelude::{ContractKey, RelatedContracts, WrappedState}; use serde::{Deserialize, Serialize}; @@ -1343,88 +1206,49 @@ mod messages { }; #[derive(Debug, Serialize, Deserialize, Clone)] + /// Update operation messages. + /// + /// Uses hop-by-hop routing for request forwarding. Broadcasting to subscribers + /// uses explicit addresses since there are multiple targets. pub(crate) enum UpdateMsg { + /// Request to update a contract state. Forwarded hop-by-hop toward contract location. RequestUpdate { id: Transaction, key: ContractKey, - target: PeerKeyLocation, #[serde(deserialize_with = "RelatedContracts::deser_related_contracts")] related_contracts: RelatedContracts<'static>, value: WrappedState, }, - AwaitUpdate { - id: Transaction, - }, - SeekNode { - id: Transaction, - target: PeerKeyLocation, - value: WrappedState, - key: ContractKey, - #[serde(deserialize_with = "RelatedContracts::deser_related_contracts")] - related_contracts: RelatedContracts<'static>, - }, - /// Internal node instruction that a change (either a first time insert or an update). + /// Internal node instruction to track broadcasting progress. Broadcasting { id: Transaction, broadcasted_to: usize, broadcast_to: Vec, key: ContractKey, new_value: WrappedState, - //contract: ContractContainer, - upstream: PeerKeyLocation, }, - /// Broadcasting a change to a peer, which then will relay the changes to other peers. + /// Broadcasting a change to a specific subscriber. BroadcastTo { id: Transaction, key: ContractKey, new_value: WrappedState, - target: PeerKeyLocation, }, } impl InnerMessage for UpdateMsg { fn id(&self) -> &Transaction { match self { - UpdateMsg::RequestUpdate { id, .. } => id, - UpdateMsg::AwaitUpdate { id, .. } => id, - UpdateMsg::SeekNode { id, .. } => id, - UpdateMsg::Broadcasting { id, .. } => id, - UpdateMsg::BroadcastTo { id, .. } => id, - } - } - - fn target(&self) -> Option> { - match self { - UpdateMsg::RequestUpdate { target, .. } => Some(target), - UpdateMsg::SeekNode { target, .. } => Some(target), - UpdateMsg::BroadcastTo { target, .. } => Some(target), - _ => None, + UpdateMsg::RequestUpdate { id, .. } + | UpdateMsg::Broadcasting { id, .. } + | UpdateMsg::BroadcastTo { id, .. } => id, } } fn requested_location(&self) -> Option { match self { - UpdateMsg::RequestUpdate { key, .. } => Some(Location::from(key.id())), - UpdateMsg::SeekNode { key, .. } => Some(Location::from(key.id())), - UpdateMsg::Broadcasting { key, .. } => Some(Location::from(key.id())), - UpdateMsg::BroadcastTo { key, .. } => Some(Location::from(key.id())), - _ => None, - } - } - } - - impl UpdateMsg { - // sender() method removed - use connection-based routing via source_addr instead - - /// Returns the socket address of the target peer for routing. - /// Used by OperationResult to determine where to send the message. - pub fn target_addr(&self) -> Option { - match self { - Self::RequestUpdate { target, .. } - | Self::SeekNode { target, .. } - | Self::BroadcastTo { target, .. } => target.socket_addr(), - // AwaitUpdate and Broadcasting are internal messages, no network target - Self::AwaitUpdate { .. } | Self::Broadcasting { .. } => None, + UpdateMsg::RequestUpdate { key, .. } + | UpdateMsg::Broadcasting { key, .. } + | UpdateMsg::BroadcastTo { key, .. } => Some(Location::from(key.id())), } } } @@ -1433,8 +1257,6 @@ mod messages { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { UpdateMsg::RequestUpdate { id, .. } => write!(f, "RequestUpdate(id: {id})"), - UpdateMsg::AwaitUpdate { id } => write!(f, "AwaitUpdate(id: {id})"), - UpdateMsg::SeekNode { id, .. } => write!(f, "SeekNode(id: {id})"), UpdateMsg::Broadcasting { id, .. } => write!(f, "Broadcasting(id: {id})"), UpdateMsg::BroadcastTo { id, .. } => write!(f, "BroadcastTo(id: {id})"), } diff --git a/crates/core/src/tracing/mod.rs b/crates/core/src/tracing/mod.rs index 904ed0038..848fba84f 100644 --- a/crates/core/src/tracing/mod.rs +++ b/crates/core/src/tracing/mod.rs @@ -182,12 +182,15 @@ impl<'a> NetEventLog<'a> { let peer_id = PeerId::new(own_addr, own_loc.pub_key().clone()); let kind = match msg { NetMessage::V1(NetMessageV1::Connect(connect::ConnectMsg::Response { - target, .. + payload, + .. })) => { + // With hop-by-hop routing, we (the joiner) are the target. + // The acceptor is in the payload. let this_peer = ring.connection_manager.own_location(); EventKind::Connect(ConnectEvent::Connected { this: this_peer, - connected: target.clone(), + connected: payload.acceptor.clone(), }) } _ => EventKind::Ignored, @@ -204,134 +207,88 @@ impl<'a> NetEventLog<'a> { op_manager: &'a OpManager, ) -> Either> { let kind = match msg { - NetMessageV1::Connect(connect::ConnectMsg::Response { - target, payload, .. - }) => { + NetMessageV1::Connect(connect::ConnectMsg::Response { payload, .. }) => { let acceptor = payload.acceptor.clone(); - // Skip event if addresses are unknown (e.g., gateway behind NAT without public address) - let (Some(acceptor_addr), Some(target_addr)) = - (acceptor.socket_addr(), target.socket_addr()) + // With hop-by-hop routing, the target (joiner) is determined from op_manager + let this_peer = op_manager.ring.connection_manager.own_location(); + // Skip event if addresses are unknown + let (Some(acceptor_addr), Some(this_addr)) = + (acceptor.socket_addr(), this_peer.socket_addr()) else { return Either::Right(vec![]); }; - let acceptor_peer = PeerId::new(acceptor_addr, acceptor.pub_key().clone()); - let target_peer = PeerId::new(target_addr, target.pub_key().clone()); + let acceptor_peer_id = PeerId::new(acceptor_addr, acceptor.pub_key().clone()); + let this_peer_id = PeerId::new(this_addr, this_peer.pub_key().clone()); let events = vec![ NetEventLog { tx: msg.id(), - peer_id: acceptor_peer.clone(), + peer_id: acceptor_peer_id.clone(), kind: EventKind::Connect(ConnectEvent::Connected { this: acceptor.clone(), - connected: target.clone(), + connected: this_peer.clone(), }), }, NetEventLog { tx: msg.id(), - peer_id: target_peer, + peer_id: this_peer_id, kind: EventKind::Connect(ConnectEvent::Connected { - this: target.clone(), + this: this_peer, connected: acceptor, }), }, ]; return Either::Right(events); } - NetMessageV1::Put(PutMsg::RequestPut { - contract, - target, - id, - .. - }) => { + NetMessageV1::Put(PutMsg::Request { contract, id, .. }) => { let this_peer = &op_manager.ring.connection_manager.own_location(); let key = contract.key(); EventKind::Put(PutEvent::Request { requester: this_peer.clone(), - target: target.clone(), + target: this_peer.clone(), // No embedded target - use own location key, id: *id, timestamp: chrono::Utc::now().timestamp() as u64, }) } - NetMessageV1::Put(PutMsg::SuccessfulPut { - id, - target, - key, - origin, - }) => EventKind::Put(PutEvent::PutSuccess { - id: *id, - requester: origin.clone(), - target: target.clone(), - key: *key, - timestamp: chrono::Utc::now().timestamp() as u64, - }), - NetMessageV1::Put(PutMsg::Broadcasting { - new_value, - broadcast_to, - broadcasted_to, // broadcasted_to n peers - key, - id, - upstream, - origin, - .. - }) => EventKind::Put(PutEvent::BroadcastEmitted { - id: *id, - upstream: upstream.clone(), - broadcast_to: broadcast_to.clone(), - broadcasted_to: *broadcasted_to, - key: *key, - value: new_value.clone(), - sender: origin.clone(), - timestamp: chrono::Utc::now().timestamp() as u64, - }), - NetMessageV1::Put(PutMsg::BroadcastTo { - origin, - new_value, - key, - target, - id, - .. - }) => EventKind::Put(PutEvent::BroadcastReceived { - id: *id, - requester: origin.clone(), - key: *key, - value: new_value.clone(), - target: target.clone(), - timestamp: chrono::Utc::now().timestamp() as u64, - }), - NetMessageV1::Get(GetMsg::ReturnGet { + NetMessageV1::Put(PutMsg::Response { id, key }) => { + let this_peer = &op_manager.ring.connection_manager.own_location(); + EventKind::Put(PutEvent::PutSuccess { + id: *id, + requester: this_peer.clone(), + target: this_peer.clone(), + key: *key, + timestamp: chrono::Utc::now().timestamp() as u64, + }) + } + NetMessageV1::Get(GetMsg::Response { id, key, value: StoreResponse { state: Some(_), .. }, - target, - .. }) => EventKind::Get { id: *id, key: *key, timestamp: chrono::Utc::now().timestamp() as u64, - requester: target.clone(), - // Note: sender no longer embedded in message - use connection-based routing - target: target.clone(), // Placeholder - actual sender from source_addr + // Note: target no longer embedded in message - use connection-based routing + requester: op_manager.ring.connection_manager.own_location(), + target: op_manager.ring.connection_manager.own_location(), }, - NetMessageV1::Subscribe(SubscribeMsg::ReturnSub { + NetMessageV1::Subscribe(SubscribeMsg::Response { id, subscribed: true, key, - target, }) => EventKind::Subscribed { id: *id, key: *key, - // Note: sender no longer embedded in message - use connection-based routing - at: target.clone(), // Placeholder - actual sender from source_addr + // Note: target no longer embedded in message - use connection-based routing + at: op_manager.ring.connection_manager.own_location(), timestamp: chrono::Utc::now().timestamp() as u64, - requester: target.clone(), + requester: op_manager.ring.connection_manager.own_location(), }, - NetMessageV1::Update(UpdateMsg::RequestUpdate { - key, target, id, .. - }) => { - let this_peer = &op_manager.ring.connection_manager.own_location(); + NetMessageV1::Update(UpdateMsg::RequestUpdate { key, id, .. }) => { + let this_peer = op_manager.ring.connection_manager.own_location(); EventKind::Update(UpdateEvent::Request { requester: this_peer.clone(), - target: target.clone(), + target: this_peer, // With hop-by-hop routing, we are the target key: *key, id: *id, timestamp: chrono::Utc::now().timestamp() as u64, @@ -340,35 +297,33 @@ impl<'a> NetEventLog<'a> { NetMessageV1::Update(UpdateMsg::Broadcasting { new_value, broadcast_to, - broadcasted_to, // broadcasted_to n peers + broadcasted_to, key, id, - upstream, - }) => EventKind::Update(UpdateEvent::BroadcastEmitted { - id: *id, - upstream: upstream.clone(), - broadcast_to: broadcast_to.clone(), - broadcasted_to: *broadcasted_to, - key: *key, - value: new_value.clone(), - // Note: sender no longer embedded in message - use connection-based routing - sender: upstream.clone(), // Placeholder - actual sender from source_addr - timestamp: chrono::Utc::now().timestamp() as u64, - }), - NetMessageV1::Update(UpdateMsg::BroadcastTo { - new_value, - key, - target, - id, - }) => EventKind::Update(UpdateEvent::BroadcastReceived { - id: *id, - requester: target.clone(), - key: *key, - value: new_value.clone(), - // Note: sender no longer embedded in message - use connection-based routing - target: target.clone(), // Placeholder - actual sender from source_addr - timestamp: chrono::Utc::now().timestamp() as u64, - }), + }) => { + let this_peer = op_manager.ring.connection_manager.own_location(); + EventKind::Update(UpdateEvent::BroadcastEmitted { + id: *id, + upstream: this_peer.clone(), // We are the broadcaster + broadcast_to: broadcast_to.clone(), + broadcasted_to: *broadcasted_to, + key: *key, + value: new_value.clone(), + sender: this_peer, // We are the sender + timestamp: chrono::Utc::now().timestamp() as u64, + }) + } + NetMessageV1::Update(UpdateMsg::BroadcastTo { new_value, key, id }) => { + let this_peer = op_manager.ring.connection_manager.own_location(); + EventKind::Update(UpdateEvent::BroadcastReceived { + id: *id, + requester: this_peer.clone(), + key: *key, + value: new_value.clone(), + target: this_peer, // We are the target + timestamp: chrono::Utc::now().timestamp() as u64, + }) + } _ => EventKind::Ignored, }; let own_loc = op_manager.ring.connection_manager.own_location(); diff --git a/crates/core/src/wasm_runtime/delegate_store.rs b/crates/core/src/wasm_runtime/delegate_store.rs index 239f008e9..27bdcc8dc 100644 --- a/crates/core/src/wasm_runtime/delegate_store.rs +++ b/crates/core/src/wasm_runtime/delegate_store.rs @@ -184,7 +184,7 @@ mod test { let temp_dir = tempfile::tempdir()?; let cdelegate_dir = temp_dir.path().join("delegates-store-test"); std::fs::create_dir_all(&cdelegate_dir)?; - let mut store = DelegateStore::new(cdelegate_dir, 10_000)?; + let mut store = DelegateStore::new(cdelegate_dir.clone(), 10_000)?; let delegate = { let delegate = Delegate::from((&vec![0, 1, 2].into(), &vec![].into())); DelegateContainer::Wasm(DelegateWasmAPIVersion::V1(delegate)) @@ -192,6 +192,8 @@ mod test { store.store_delegate(delegate.clone())?; let f = store.fetch_delegate(delegate.key(), &vec![].into()); assert!(f.is_some()); + // Clean up after test + let _ = std::fs::remove_dir_all(&cdelegate_dir); Ok(()) } } diff --git a/crates/core/src/wasm_runtime/secrets_store.rs b/crates/core/src/wasm_runtime/secrets_store.rs index c66dad38a..a1b9a6c35 100644 --- a/crates/core/src/wasm_runtime/secrets_store.rs +++ b/crates/core/src/wasm_runtime/secrets_store.rs @@ -254,7 +254,7 @@ mod test { let secrets_dir = temp_dir.path().join("secrets-store-test"); std::fs::create_dir_all(&secrets_dir)?; - let mut store = SecretsStore::new(secrets_dir, Default::default())?; + let mut store = SecretsStore::new(secrets_dir.clone(), Default::default())?; let delegate = Delegate::from((&vec![0, 1, 2].into(), &vec![].into())); @@ -268,6 +268,8 @@ mod test { let f = store.get_secret(delegate.key(), &secret_id); assert!(f.is_ok()); + // Clean up after test + let _ = std::fs::remove_dir_all(&secrets_dir); Ok(()) } } diff --git a/crates/core/tests/connectivity.rs b/crates/core/tests/connectivity.rs index 7b2d6d61b..a93ccd1e0 100644 --- a/crates/core/tests/connectivity.rs +++ b/crates/core/tests/connectivity.rs @@ -324,6 +324,13 @@ async fn test_three_node_network_connectivity(ctx: &mut TestContext) -> TestResu let mut last_snapshot = (String::new(), String::new(), String::new()); for attempt in 1..=MAX_RETRIES { + // Use println! for first 5 attempts to ensure visibility in CI stdout + if attempt <= 5 { + println!( + "Attempt {}/{}: Querying all nodes for connected peers...", + attempt, MAX_RETRIES + ); + } tracing::info!( "Attempt {}/{}: Querying all nodes for connected peers...", attempt, @@ -361,6 +368,15 @@ async fn test_three_node_network_connectivity(ctx: &mut TestContext) -> TestResu Err(e) => bail!("Error receiving peer2 response: {}", e), }; + // Use println! for first 5 attempts to ensure visibility in CI stdout + if attempt <= 5 { + println!( + " - Gateway has {} connections, Peer1 has {}, Peer2 has {}", + gw_peers.len(), + peer1_peers.len(), + peer2_peers.len() + ); + } tracing::info!(" - Gateway has {} connections", gw_peers.len()); tracing::info!(" - Peer1 has {} connections", peer1_peers.len()); tracing::info!(" - Peer2 has {} connections", peer2_peers.len()); diff --git a/crates/core/tests/operations.rs b/crates/core/tests/operations.rs index d43463420..f77c14cd3 100644 --- a/crates/core/tests/operations.rs +++ b/crates/core/tests/operations.rs @@ -1526,6 +1526,14 @@ async fn test_put_with_subscribe_flag(ctx: &mut TestContext) -> TestResult { let mut client_api2 = WebApi::start(stream2); // First client puts contract with initial state and auto-subscribes + let put_start = std::time::Instant::now(); + tracing::info!( + contract = %contract_key, + client = 1, + subscribe = true, + phase = "put_request", + "Sending PUT request" + ); make_put( &mut client_api1, wrapped_state.clone(), @@ -1542,6 +1550,13 @@ async fn test_put_with_subscribe_flag(ctx: &mut TestContext) -> TestResult { match resp { Ok(Ok(HostResponse::ContractResponse(ContractResponse::PutResponse { key }))) => { assert_eq!(key, contract_key, "Contract key mismatch in PUT response"); + tracing::info!( + contract = %contract_key, + client = 1, + elapsed_ms = put_start.elapsed().as_millis(), + phase = "put_response", + "PUT response received" + ); put_response_received = true; } Ok(Ok(HostResponse::ContractResponse(ContractResponse::SubscribeResponse { @@ -1555,29 +1570,54 @@ async fn test_put_with_subscribe_flag(ctx: &mut TestContext) -> TestResult { } Ok(Ok(other)) => { tracing::debug!( - "Client 1: Received non-PUT response while waiting for PUT: {:?}", - other + contract = %contract_key, + client = 1, + elapsed_ms = start.elapsed().as_millis(), + response = ?other, + "Received unexpected response while waiting for PUT" ); - // Continue waiting - might receive other messages before PUT response } Ok(Err(e)) => { - tracing::error!("Client 1: Error receiving put response: {}", e); + tracing::error!( + contract = %contract_key, + client = 1, + elapsed_ms = start.elapsed().as_millis(), + error = %e, + phase = "put_error", + "WebSocket error receiving PUT response" + ); bail!("WebSocket error while waiting for PUT response: {}", e); } Err(_) => { - // Timeout on recv - continue looping with outer timeout check tracing::debug!( - "Client 1: No message received in 5s, continuing to wait for PUT response" + contract = %contract_key, + client = 1, + elapsed_ms = start.elapsed().as_millis(), + "Waiting for PUT response (no message in 5s)" ); } } } if !put_response_received { + tracing::error!( + contract = %contract_key, + client = 1, + elapsed_ms = start.elapsed().as_millis(), + phase = "put_timeout", + "PUT response timeout after 30 seconds" + ); bail!("Client 1: Did not receive PUT response within 30 seconds"); } // Second client gets the contract (without subscribing) + let get_start = std::time::Instant::now(); + tracing::info!( + contract = %contract_key, + client = 2, + phase = "get_request", + "Sending GET request" + ); make_get(&mut client_api2, contract_key, true, false).await?; // Wait for get response on second client @@ -1592,29 +1632,54 @@ async fn test_put_with_subscribe_flag(ctx: &mut TestContext) -> TestResult { state: _, }))) => { assert_eq!(key, contract_key, "Contract key mismatch in GET response"); + tracing::info!( + contract = %contract_key, + client = 2, + elapsed_ms = get_start.elapsed().as_millis(), + phase = "get_response", + "GET response received" + ); get_response_received = true; } Ok(Ok(other)) => { tracing::debug!( - "Client 2: Received non-GET response while waiting for GET: {:?}", - other + contract = %contract_key, + client = 2, + elapsed_ms = start.elapsed().as_millis(), + response = ?other, + "Received unexpected response while waiting for GET" ); - // Continue waiting - might receive other messages before GET response } Ok(Err(e)) => { - tracing::error!("Client 2: Error receiving get response: {}", e); + tracing::error!( + contract = %contract_key, + client = 2, + elapsed_ms = start.elapsed().as_millis(), + error = %e, + phase = "get_error", + "WebSocket error receiving GET response" + ); bail!("WebSocket error while waiting for GET response: {}", e); } Err(_) => { - // Timeout on recv - continue looping with outer timeout check tracing::debug!( - "Client 2: No message received in 5s, continuing to wait for GET response" + contract = %contract_key, + client = 2, + elapsed_ms = start.elapsed().as_millis(), + "Waiting for GET response (no message in 5s)" ); } } } if !get_response_received { + tracing::error!( + contract = %contract_key, + client = 2, + elapsed_ms = start.elapsed().as_millis(), + phase = "get_timeout", + "GET response timeout after 30 seconds" + ); bail!("Client 2: Did not receive GET response within 30 seconds"); } @@ -1639,7 +1704,13 @@ async fn test_put_with_subscribe_flag(ctx: &mut TestContext) -> TestResult { let updated_state = WrappedState::from(updated_bytes); // Second client updates the contract - tracing::info!("Client 2: Updating contract to trigger notification"); + let update_start = std::time::Instant::now(); + tracing::info!( + contract = %contract_key, + client = 2, + phase = "update_request", + "Sending UPDATE request to trigger notification" + ); make_update(&mut client_api2, contract_key, updated_state.clone()).await?; // Wait for update response @@ -1656,29 +1727,54 @@ async fn test_put_with_subscribe_flag(ctx: &mut TestContext) -> TestResult { key, contract_key, "Contract key mismatch in UPDATE response" ); + tracing::info!( + contract = %contract_key, + client = 2, + elapsed_ms = update_start.elapsed().as_millis(), + phase = "update_response", + "UPDATE response received" + ); update_response_received = true; } Ok(Ok(other)) => { tracing::debug!( - "Client 2: Received non-UPDATE response while waiting for UPDATE: {:?}", - other + contract = %contract_key, + client = 2, + elapsed_ms = start.elapsed().as_millis(), + response = ?other, + "Received unexpected response while waiting for UPDATE" ); - // Continue waiting - might receive other messages before UPDATE response } Ok(Err(e)) => { - tracing::error!("Client 2: Error receiving update response: {}", e); + tracing::error!( + contract = %contract_key, + client = 2, + elapsed_ms = start.elapsed().as_millis(), + error = %e, + phase = "update_error", + "WebSocket error receiving UPDATE response" + ); bail!("WebSocket error while waiting for UPDATE response: {}", e); } Err(_) => { - // Timeout on recv - continue looping with outer timeout check tracing::debug!( - "Client 2: No message received in 5s, continuing to wait for UPDATE response" + contract = %contract_key, + client = 2, + elapsed_ms = start.elapsed().as_millis(), + "Waiting for UPDATE response (no message in 5s)" ); } } } if !update_response_received { + tracing::error!( + contract = %contract_key, + client = 2, + elapsed_ms = start.elapsed().as_millis(), + phase = "update_timeout", + "UPDATE response timeout after 30 seconds" + ); bail!("Client 2: Did not receive UPDATE response within 30 seconds"); } @@ -1693,10 +1789,16 @@ async fn test_put_with_subscribe_flag(ctx: &mut TestContext) -> TestResult { // Wait for update notification on client 1 (should be auto-subscribed from PUT) let mut client1_received_notification = false; + let notification_start = std::time::Instant::now(); + tracing::info!( + contract = %contract_key, + client = 1, + phase = "notification_wait", + "Waiting for UPDATE notification (auto-subscribed via PUT)" + ); // Try for up to 30 seconds to receive the notification - let start_time = std::time::Instant::now(); - while start_time.elapsed() < Duration::from_secs(30) && !client1_received_notification { + while notification_start.elapsed() < Duration::from_secs(30) && !client1_received_notification { let resp = tokio::time::timeout(Duration::from_secs(1), client_api1.recv()).await; match resp { Ok(Ok(HostResponse::ContractResponse(ContractResponse::UpdateNotification { @@ -1737,21 +1839,44 @@ async fn test_put_with_subscribe_flag(ctx: &mut TestContext) -> TestResult { "Task priority should match" ); - tracing::info!("Client 1: Successfully verified update content"); + tracing::info!( + contract = %contract_key, + client = 1, + elapsed_ms = notification_start.elapsed().as_millis(), + phase = "notification_received", + "UPDATE notification received and verified" + ); } _ => { - tracing::warn!("Client 1: Received unexpected update type: {:?}", update); + tracing::warn!( + contract = %contract_key, + client = 1, + update_type = ?update, + "Received unexpected update type in notification" + ); } } client1_received_notification = true; break; } Ok(Ok(other)) => { - tracing::debug!("Client 1: Received non-notification response while waiting for update notification: {:?}", other); - // Continue waiting - might receive other messages before notification + tracing::debug!( + contract = %contract_key, + client = 1, + elapsed_ms = notification_start.elapsed().as_millis(), + response = ?other, + "Received unexpected response while waiting for notification" + ); } Ok(Err(e)) => { - tracing::error!("Client 1: Error receiving update notification: {}", e); + tracing::error!( + contract = %contract_key, + client = 1, + elapsed_ms = notification_start.elapsed().as_millis(), + error = %e, + phase = "notification_error", + "WebSocket error receiving notification" + ); bail!( "WebSocket error while waiting for update notification: {}", e @@ -1759,7 +1884,6 @@ async fn test_put_with_subscribe_flag(ctx: &mut TestContext) -> TestResult { } Err(_) => { // Timeout on recv - this is expected, just continue looping - tracing::debug!("Client 1: No message received in 1s, continuing to wait for update notification"); } } @@ -1768,11 +1892,25 @@ async fn test_put_with_subscribe_flag(ctx: &mut TestContext) -> TestResult { } // Assert that client 1 received the notification (proving auto-subscribe worked) + if !client1_received_notification { + tracing::error!( + contract = %contract_key, + client = 1, + elapsed_ms = notification_start.elapsed().as_millis(), + phase = "notification_timeout", + "UPDATE notification not received within 30 seconds - auto-subscribe via PUT may have failed" + ); + } assert!( client1_received_notification, "Client 1 did not receive update notification within timeout period (auto-subscribe via PUT failed)" ); + tracing::info!( + contract = %contract_key, + phase = "test_complete", + "test_put_with_subscribe_flag completed successfully" + ); Ok(()) } diff --git a/scripts/ci-failure-catcher.sh b/scripts/ci-failure-catcher.sh new file mode 100755 index 000000000..6591c80b1 --- /dev/null +++ b/scripts/ci-failure-catcher.sh @@ -0,0 +1,81 @@ +#!/bin/bash +# CI Failure Catcher - Re-runs CI until failure and saves logs +# Usage: ./scripts/ci-failure-catcher.sh [max_attempts] + +set -e + +REPO="freenet/freenet-core" +BRANCH="fix/hop-by-hop-routing" +MAX_ATTEMPTS="${1:-20}" +LOG_DIR="/tmp/ci-failure-logs" +ATTEMPT=0 + +mkdir -p "$LOG_DIR" + +echo "=== CI Failure Catcher ===" +echo "Branch: $BRANCH" +echo "Max attempts: $MAX_ATTEMPTS" +echo "Logs will be saved to: $LOG_DIR" +echo "" + +while [ $ATTEMPT -lt $MAX_ATTEMPTS ]; do + ATTEMPT=$((ATTEMPT + 1)) + echo "--- Attempt $ATTEMPT/$MAX_ATTEMPTS ---" + + # Trigger a new CI run by re-running the latest workflow + LATEST_RUN=$(gh run list -R "$REPO" --branch "$BRANCH" --limit 1 --json databaseId -q '.[0].databaseId') + + echo "Re-running workflow $LATEST_RUN..." + gh run rerun "$LATEST_RUN" -R "$REPO" --failed 2>/dev/null || gh run rerun "$LATEST_RUN" -R "$REPO" + + # Wait for the new run to start + sleep 5 + + # Get the new run ID + NEW_RUN=$(gh run list -R "$REPO" --branch "$BRANCH" --limit 1 --json databaseId -q '.[0].databaseId') + echo "Watching run $NEW_RUN..." + + # Wait for completion (suppress output, just wait) + if gh run watch "$NEW_RUN" -R "$REPO" --exit-status >/dev/null 2>&1; then + echo "✓ Run $NEW_RUN passed" + else + echo "✗ Run $NEW_RUN FAILED!" + echo "" + echo "=== FAILURE DETECTED ===" + echo "Saving logs to $LOG_DIR/run-$NEW_RUN/" + + mkdir -p "$LOG_DIR/run-$NEW_RUN" + + # Download all logs + gh run view "$NEW_RUN" -R "$REPO" --log > "$LOG_DIR/run-$NEW_RUN/full.log" 2>&1 || true + + # Get failed jobs + gh run view "$NEW_RUN" -R "$REPO" --json jobs -q '.jobs[] | select(.conclusion == "failure") | .name' > "$LOG_DIR/run-$NEW_RUN/failed-jobs.txt" + + # Save summary + gh run view "$NEW_RUN" -R "$REPO" > "$LOG_DIR/run-$NEW_RUN/summary.txt" 2>&1 || true + + # Extract test failure info + echo "Extracting test failure details..." + grep -A 50 "FAILED\|panicked\|error\[" "$LOG_DIR/run-$NEW_RUN/full.log" > "$LOG_DIR/run-$NEW_RUN/failures.txt" 2>/dev/null || true + + # Extract connect operation logs + grep -i "connect.*target_connections\|register_acceptance\|Transaction timed out\|Connect operation" "$LOG_DIR/run-$NEW_RUN/full.log" > "$LOG_DIR/run-$NEW_RUN/connect-ops.txt" 2>/dev/null || true + + echo "" + echo "Logs saved! Key files:" + echo " - $LOG_DIR/run-$NEW_RUN/full.log (complete logs)" + echo " - $LOG_DIR/run-$NEW_RUN/failures.txt (failure excerpts)" + echo " - $LOG_DIR/run-$NEW_RUN/connect-ops.txt (connect operation logs)" + echo "" + echo "Run ID: $NEW_RUN" + echo "View online: https://github.com/$REPO/actions/runs/$NEW_RUN" + + exit 0 + fi + + echo "" +done + +echo "=== No failures after $MAX_ATTEMPTS attempts ===" +exit 1 diff --git a/wiki b/wiki deleted file mode 160000 index 4c50f30da..000000000 --- a/wiki +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 4c50f30da7030f0007fea157673fc02e4fd23840