diff --git a/crates/agent/src/nvue.rs b/crates/agent/src/nvue.rs index 6eff260e2b..7b902f44c3 100644 --- a/crates/agent/src/nvue.rs +++ b/crates/agent/src/nvue.rs @@ -41,6 +41,18 @@ pub fn template_for(vtype: VpcVirtualizationType) -> eyre::Result<&'static str> VpcVirtualizationType::EthernetVirtualizer | VpcVirtualizationType::EthernetVirtualizerWithNvue => Ok(TMPL_ETV_WITH_NVUE), VpcVirtualizationType::Fnn => Ok(TMPL_FNN), + // Flat VPCs attach instances via a plain NIC (the host's + // primary fabric interface is not a DPU), so there's no NVUE + // template to render for them -- this function is the DPU + // agent's template selector, and Flat instances don't run + // through a DPU agent at all. + // + // (NICo today doesn't model mixed-mode hosts that have a NIC + // primary plus secondary DPUs used for VFs; if that ever + // becomes a target, the dispatch here would need rethinking.) + VpcVirtualizationType::Flat => { + Err(eyre::eyre!("Flat VPC virtualization type not supported",)) + } } } diff --git a/crates/api-db/migrations/20260518165405_flat_network_virtualization_type.sql b/crates/api-db/migrations/20260518165405_flat_network_virtualization_type.sql new file mode 100644 index 0000000000..4fb31ee03f --- /dev/null +++ b/crates/api-db/migrations/20260518165405_flat_network_virtualization_type.sql @@ -0,0 +1,14 @@ +--- +--- 20260518165405_flat_network_virtualization_type.sql +--- +--- Adds a third network virtualization type, `flat`, for VPCs whose tenant +--- instances live directly on the underlay (no DPU, or DPU in NIC mode) and +--- whose interfaces sit on `HostInband` network segments rather than a +--- NICo-managed overlay. Flat VPCs are still real tenant VPCs -- they +--- have a VNI, support NSGs (as descriptive metadata for pluggable SDN +--- hooks), and can peer with ETV/FNN VPCs -- but NICo doesn't drive +--- their data plane. Routing and ACL enforcement between Flat VPCs and +--- other VPCs is the network operator's responsibility. +--- + +ALTER TYPE network_virtualization_type_t ADD VALUE 'flat'; diff --git a/crates/api-integration-tests/tests/lib.rs b/crates/api-integration-tests/tests/lib.rs index 77aa869868..26b2ed668d 100644 --- a/crates/api-integration-tests/tests/lib.rs +++ b/crates/api-integration-tests/tests/lib.rs @@ -111,7 +111,11 @@ async fn test_integration() -> eyre::Result<()> { let domain_id = domain::create(carbide_api_addrs, "tenant-1.local").await?; let managed_segment_id = subnet::create(carbide_api_addrs, &tenant1_vpc, &domain_id, 10, false).await?; - subnet::create(carbide_api_addrs, &tenant1_vpc, &domain_id, 11, true).await?; + + // HostInband segments must live in a Flat VPC -- those VPC types are + // mutually bound. Create one for the HostInband fixture. + let flat_vpc = vpc::create_flat(carbide_api_addrs, tenant_org_id).await?; + subnet::create(carbide_api_addrs, &flat_vpc, &domain_id, 11, true).await?; // Create FNN VPC + VPC prefixes (IPv4 + IPv6) for dual-stack L3 linknet testing. let fnn_vpc = vpc::create_fnn(carbide_api_addrs, tenant_org_id).await?; diff --git a/crates/api-model/src/vpc/capability.rs b/crates/api-model/src/vpc/capability.rs new file mode 100644 index 0000000000..7612c66519 --- /dev/null +++ b/crates/api-model/src/vpc/capability.rs @@ -0,0 +1,970 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//! Profile-driven capability policy for `VpcVirtualizationType`. +//! +//! Each VPC virtualization type maps to a single `VpcCapabilities` +//! profile that declares its policy answers as data, e.g. +//! - Which host fabric interface it attaches to. +//! - Which segment types it accepts. +//! - Whether it honors routing profiles. +//! - Whether it supports IPv6. +//! - Which VPC types it peers with. +//! - Etc... +//! +//! The methods on the `VpcVirtualizationTypeCapabilities` extension +//! trait just read from that profile. +//! +//! The idea is that adding a new VPC virtualization type means: +//! (1) Add the enum variant in the `network` crate. +//! (2) Declare a `VpcCapabilities` constant here. +//! (3) Add the arm in `VpcVirtualizationTypeCapabilities::capabilities`. +//! +//! ...and then hopefully you have limited code changes you need to +//! make (e.g. less match arms, less conditional branching, etc etc). +//! +//! Technically most of this is serde-serializable, so we could maybe +//! even some day drive it from config files. +//! +//! This also introduces a DataPlaneKind enum, which gives us an +//! additional level of flexibility in how we define the capabilities +//! of a VPC virtualization type, with the idea being we should be +//! able to express how different data plane types wire into our +//! business logic, letting us derive a certain collection of +//! capabilities based on this kind. It's also intended to make it so +//! certain mutually exclusive configs can't cause a misconfiguration. + +use std::fmt; + +use carbide_network::virtualization::VpcVirtualizationType; + +use crate::network_segment::{NetworkSegmentType, NewNetworkSegment}; + +/// Which host-side fabric interface kind a VPC virtualization type +/// attaches to. Used at instance-allocation time to decide which hosts +/// are eligible for which VPCs: the host's primary fabric interface +/// kind must match the VPC's declared `fabric_interface_type`. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum FabricInterfaceType { + /// A DPU-managed fabric attachment. The host's primary data path + /// is its DPU, and NICo drives the overlay (VRFs, EVPN, routing + /// profiles) via the DPU agent. + Dpu, + + /// A plain NIC fabric attachment. The host's data NIC sits + /// directly on the operator's segment (`HostInband`); NICo + /// does not mediate the data plane. + Nic, +} + +impl fmt::Display for FabricInterfaceType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Dpu => write!(f, "dpu"), + Self::Nic => write!(f, "nic"), + } + } +} + +/// Which kind of data plane a VPC virtualization type runs. This is the +/// knob that drives most of the correlated per-type capabilities (fabric +/// interface, routing profile support, SVI allocation, VNI exchange, etc.), +/// rather than declaring those as independent bools per variant (which can +/// be combined nonsensically); we encode them as a function of the kind of +/// data plane for the VPC virtualization type. +/// +/// Adding a new VPC virtualization type usually means picking the +/// closest `DataPlaneKind` and letting all the derived capabilities +/// fall out. If a future type genuinely needs a new layout (e.g. a +/// hybrid that doesn't fit any current kind), add a new variant here +/// and a single match arm per derived accessor below. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DataPlaneKind { + /// DPU-managed L2 overlay. The DPU stretches an L2 broadcast + /// domain across hosts via overlay tunneling. Example: ETV. + DpuOverlayL2, + + /// DPU-managed L3 overlay. The DPU runs per-VPC L3 routing, + /// applying routing profiles (route-targets, leak rules) and + /// importing peer VNIs via EVPN. Example: FNN. + DpuOverlayL3, + + /// No NICo-managed data plane. The operator's network fabric + /// owns reachability; NICo only persists VPC bookkeeping and + /// exposes optional metadata (e.g. the VNI) for SDN integrations + /// to consume. Example: Flat. + OperatorManaged, +} + +impl DataPlaneKind { + /// Host fabric interface kind required by VPCs of this data plane. + pub const fn fabric_interface_type(self) -> FabricInterfaceType { + match self { + Self::DpuOverlayL2 | Self::DpuOverlayL3 => FabricInterfaceType::Dpu, + Self::OperatorManaged => FabricInterfaceType::Nic, + } + } + + /// Whether the `routing_profile_type` field is accepted on the VPC + /// create API. DPU-overlay types both accept it (for tenant + /// access-tier authorization and VNI pool selection via the + /// profile's `internal` flag), even though only L3 actually + /// applies the profile to its overlay. Operator-managed VPCs have + /// no routing layer at all and reject the field. + pub const fn supports_routing_profile_type(self) -> bool { + matches!(self, Self::DpuOverlayL2 | Self::DpuOverlayL3) + } + + /// Whether the looked-up routing profile's policy (route-target + /// imports/exports, leak rules, etc.) is applied to the DPU's + /// VRF configuration. L3-overlay only -- L2 accepts the field + /// for cross-cutting concerns but doesn't apply the profile. + pub const fn supports_applying_routing_profile(self) -> bool { + matches!(self, Self::DpuOverlayL3) + } + + /// Whether this data plane is *capable* of allocating an SVI IP + /// for segments. This is a precondition, not a guarantee per + /// segment: only stretched-L2 tenant segments get an SVI IP -- + /// non-stretched segments (e.g. tenant /31 link segments) don't + /// get one even on data planes where this returns `true`. The + /// per-segment combination lives in + /// [`VpcVirtualizationTypeCapabilities::allocates_svi_for`]. + /// + /// L3-overlay only today (see + /// `carbide_network::virtualization::get_svi_ip`). + pub const fn allocates_svi_ip(self) -> bool { + matches!(self, Self::DpuOverlayL3) + } + + /// Whether the DPU agent imports peer VPCs' VNIs into the local + /// VRF for EVPN-style route exchange. L3-overlay only. + pub const fn imports_peer_vnis_into_overlay(self) -> bool { + matches!(self, Self::DpuOverlayL3) + } + + /// Whether this data plane's VNI should be exposed to peers in + /// their `vpc_peer_vnis` lists. L3-overlay (peers actively use + /// the VNI for EVPN imports) and operator-managed (operator's + /// SDN may use it for switch-side VTEPs/ACLs/etc.). L2-overlay + /// has a VNI but doesn't surface it to peers. + pub const fn vni_advertised_to_peers(self) -> bool { + matches!(self, Self::DpuOverlayL3 | Self::OperatorManaged) + } +} + +/// Per-variant policy profile for a `VpcVirtualizationType`. The +/// `data_plane` field is the source of truth for the correlated +/// capabilities (fabric interface, routing profile support, SVI, VNI +/// exchange) -- derived accessors live on [`DataPlaneKind`]. The +/// remaining fields capture per-variant policy that varies +/// independently of kind (segment-type whitelist, address-family +/// support, peering relation). +pub struct VpcCapabilities { + /// What kind of data plane this VPC type runs. Drives the + /// correlated capabilities (see [`DataPlaneKind`]). + pub data_plane: DataPlaneKind, + + /// Which `NetworkSegmentType`s may be bound to a VPC of this + /// kind. The handler enforces this both ways: a VPC of this + /// type rejects segments whose type is absent from this list, + /// and a segment whose type appears here only in `Flat`'s profile + /// (e.g. `HostInband`) is correspondingly rejected from other + /// VPC types. + pub allowed_segment_types: &'static [NetworkSegmentType], + + /// Whether this type supports IPv4 prefixes (either on VPC + /// prefixes or on network segments contained in the VPC). All + /// current VPC types support v4; declared here for symmetry with + /// `supports_ipv6_prefix` so a future v6-only type can be + /// expressed without an implicit "v4 is always fine" assumption. + pub supports_ipv4_prefix: bool, + + /// Whether this type supports IPv6 prefixes (either on VPC + /// prefixes or on network segments contained in the VPC). Varies + /// independently of data plane kind today: ETV (L2 overlay) + /// doesn't support v6 because that overlay generation was never + /// wired for it; a future L2 overlay type with v6 support would + /// set this true with the same `DpuOverlayL2` kind. + pub supports_ipv6_prefix: bool, + + /// Which other VPC virtualization types this one can be peered + /// with under the site's `Exclusive` peering policy. Must be + /// maintained symmetrically -- if A lists B, B should list A. + /// The `peering_relation_is_symmetric` test in this module + /// enforces that at test time. + pub peers_with: &'static [VpcVirtualizationType], +} + +/// Every variant NICo handles. Iteration target for capability-driven +/// filters (e.g. "give me all variants that exchange VNI for peering"). +/// If a new variant is added to `VpcVirtualizationType`, add it here +/// too. +/// +/// ...at least for now. Maybe we can change how we structure this. +pub const ALL_VPC_VIRTUALIZATION_TYPES: &[VpcVirtualizationType] = &[ + VpcVirtualizationType::EthernetVirtualizer, + VpcVirtualizationType::EthernetVirtualizerWithNvue, + VpcVirtualizationType::Fnn, + VpcVirtualizationType::Flat, +]; + +const ETV_CAPABILITIES: VpcCapabilities = VpcCapabilities { + data_plane: DataPlaneKind::DpuOverlayL2, + allowed_segment_types: &[ + NetworkSegmentType::Tenant, + NetworkSegmentType::Admin, + NetworkSegmentType::Underlay, + ], + supports_ipv4_prefix: true, + supports_ipv6_prefix: false, + peers_with: &[ + VpcVirtualizationType::EthernetVirtualizer, + VpcVirtualizationType::EthernetVirtualizerWithNvue, + VpcVirtualizationType::Flat, + ], +}; + +const FNN_CAPABILITIES: VpcCapabilities = VpcCapabilities { + data_plane: DataPlaneKind::DpuOverlayL3, + allowed_segment_types: &[ + NetworkSegmentType::Tenant, + NetworkSegmentType::Admin, + NetworkSegmentType::Underlay, + ], + supports_ipv4_prefix: true, + supports_ipv6_prefix: true, + peers_with: &[VpcVirtualizationType::Fnn, VpcVirtualizationType::Flat], +}; + +const FLAT_CAPABILITIES: VpcCapabilities = VpcCapabilities { + data_plane: DataPlaneKind::OperatorManaged, + allowed_segment_types: &[NetworkSegmentType::HostInband], + supports_ipv4_prefix: true, + supports_ipv6_prefix: true, + peers_with: &[ + VpcVirtualizationType::EthernetVirtualizer, + VpcVirtualizationType::EthernetVirtualizerWithNvue, + VpcVirtualizationType::Fnn, + VpcVirtualizationType::Flat, + ], +}; + +/// Why a VPC capability check failed. Each variant carries the parties +/// involved so the message can be formatted without the caller knowing +/// the wording, and so the variant itself is matchable in tests. +#[derive(Debug, thiserror::Error)] +pub enum VpcCapabilityError { + #[error("{vpc_type} VPCs do not support {segment_type} network segments")] + UnsupportedSegmentType { + vpc_type: VpcVirtualizationType, + segment_type: NetworkSegmentType, + }, + + #[error("{vpc_type} VPCs do not support IPv4 network prefixes")] + Ipv4Unsupported { vpc_type: VpcVirtualizationType }, + + #[error("{vpc_type} VPCs do not support IPv6 network prefixes")] + Ipv6Unsupported { vpc_type: VpcVirtualizationType }, + + #[error( + "{vpc_type} VPCs do not accept a `routing_profile_type` field; this VPC type has no NICo-managed routing layer" + )] + RoutingProfileTypeUnsupported { vpc_type: VpcVirtualizationType }, + + #[error("{a} and {b} VPCs cannot be peered")] + PeeringIncompatible { + a: VpcVirtualizationType, + b: VpcVirtualizationType, + }, +} + +/// Extension trait that exposes [`VpcCapabilities`] policy on +/// [`VpcVirtualizationType`]. Only the [`Self::capabilities`] method +/// is variant-specific; every other method reads from the profile. +pub trait VpcVirtualizationTypeCapabilities { + /// The policy profile for this variant. + fn capabilities(self) -> &'static VpcCapabilities; + + /// Which host fabric interface kind a VPC of this type attaches + /// to. Instance allocation rejects hosts whose primary fabric + /// interface does not match. + fn fabric_interface_type(self) -> FabricInterfaceType; + + /// Whether a segment of `segment_type` is allowed in a VPC of + /// this type. + fn supports_segment_type(self, segment_type: NetworkSegmentType) -> bool; + + /// Whether a given segment is allowed in a VPC of this type. + /// Composite of [`Self::supports_segment_type`] and the + /// per-address-family prefix checks ([`Self::supports_ipv4_prefix`] + /// and [`Self::supports_ipv6_prefix`]) for any prefixes the segment + /// carries. + fn supports_segment(self, segment: &NewNetworkSegment) -> bool; + + /// Whether this type can have IPv4 network prefixes. + fn supports_ipv4_prefix(self) -> bool; + + /// Whether this type can have IPv6 network prefixes. + fn supports_ipv6_prefix(self) -> bool; + + /// Whether the `routing_profile_type` field is accepted on the VPC + /// create API for this type. See + /// [`VpcCapabilities::supports_routing_profile_type`]. + fn supports_routing_profile_type(self) -> bool; + + /// Whether the looked-up routing profile is applied to the DPU's + /// VRF configuration. See + /// [`VpcCapabilities::supports_applying_routing_profile`]. + fn supports_applying_routing_profile(self) -> bool; + + /// Whether this type is *capable* of allocating an SVI IP for its + /// segments (precondition, not guarantee per segment). See + /// [`DataPlaneKind::allocates_svi_ip`]. + fn allocates_svi_ip(self) -> bool; + + /// Whether a SVI IP should be allocated for this specific segment. + /// Combines the data plane's capability with the segment's + /// `can_stretch` opt-in -- a SVI is only allocated on stretched-L2 + /// segments in a SVI-capable VPC type. Tenant /31 link segments + /// (`can_stretch = false`) don't get one even on FNN. + fn allocates_svi_for(self, segment: &NewNetworkSegment) -> bool; + + /// Whether this type's DPU agent imports peer VPCs' VNIs into the + /// local VRF for EVPN-style route exchange. See + /// [`VpcCapabilities::imports_peer_vnis_into_overlay`]. + fn imports_peer_vnis_into_overlay(self) -> bool; + + /// Whether this type's VNI should be exposed to peers in their + /// `vpc_peer_vnis` lists. See + /// [`VpcCapabilities::vni_advertised_to_peers`]. + fn vni_advertised_to_peers(self) -> bool; + + /// Whether two VPC virtualization types can be peered under the + /// `Exclusive` peering policy. + fn can_peer_with(self, other: Self) -> bool; + + /// `ensure_*` variants of the above; return a structured error + /// suitable for `?` propagation when a check fails. + fn ensure_supports_segment_type( + self, + segment_type: NetworkSegmentType, + ) -> Result<(), VpcCapabilityError>; + /// Validates segment-type compatibility plus per-address-family + /// support for any prefixes the segment carries. + fn ensure_supports_segment(self, segment: &NewNetworkSegment) + -> Result<(), VpcCapabilityError>; + fn ensure_supports_ipv4_prefix(self) -> Result<(), VpcCapabilityError>; + fn ensure_supports_ipv6_prefix(self) -> Result<(), VpcCapabilityError>; + fn ensure_supports_routing_profile_type(self) -> Result<(), VpcCapabilityError>; + fn ensure_can_peer_with(self, other: VpcVirtualizationType) -> Result<(), VpcCapabilityError>; +} + +impl VpcVirtualizationTypeCapabilities for VpcVirtualizationType { + fn capabilities(self) -> &'static VpcCapabilities { + match self { + Self::EthernetVirtualizer | Self::EthernetVirtualizerWithNvue => &ETV_CAPABILITIES, + Self::Fnn => &FNN_CAPABILITIES, + Self::Flat => &FLAT_CAPABILITIES, + } + } + + fn fabric_interface_type(self) -> FabricInterfaceType { + self.capabilities().data_plane.fabric_interface_type() + } + + fn supports_segment_type(self, segment_type: NetworkSegmentType) -> bool { + self.capabilities() + .allowed_segment_types + .contains(&segment_type) + } + + fn supports_segment(self, segment: &NewNetworkSegment) -> bool { + if !self.supports_segment_type(segment.segment_type) { + return false; + } + let has_ipv4_prefix = segment.prefixes.iter().any(|p| p.prefix.is_ipv4()); + let has_ipv6_prefix = segment.prefixes.iter().any(|p| p.prefix.is_ipv6()); + (!has_ipv4_prefix || self.supports_ipv4_prefix()) + && (!has_ipv6_prefix || self.supports_ipv6_prefix()) + } + + fn supports_ipv4_prefix(self) -> bool { + self.capabilities().supports_ipv4_prefix + } + + fn supports_ipv6_prefix(self) -> bool { + self.capabilities().supports_ipv6_prefix + } + + fn supports_routing_profile_type(self) -> bool { + self.capabilities() + .data_plane + .supports_routing_profile_type() + } + + fn supports_applying_routing_profile(self) -> bool { + self.capabilities() + .data_plane + .supports_applying_routing_profile() + } + + fn allocates_svi_ip(self) -> bool { + self.capabilities().data_plane.allocates_svi_ip() + } + + fn allocates_svi_for(self, segment: &NewNetworkSegment) -> bool { + segment.can_stretch.unwrap_or(true) && self.allocates_svi_ip() + } + + fn imports_peer_vnis_into_overlay(self) -> bool { + self.capabilities() + .data_plane + .imports_peer_vnis_into_overlay() + } + + fn vni_advertised_to_peers(self) -> bool { + self.capabilities().data_plane.vni_advertised_to_peers() + } + + fn can_peer_with(self, other: Self) -> bool { + self.capabilities().peers_with.contains(&other) + } + + fn ensure_supports_segment_type( + self, + segment_type: NetworkSegmentType, + ) -> Result<(), VpcCapabilityError> { + if self.supports_segment_type(segment_type) { + Ok(()) + } else { + Err(VpcCapabilityError::UnsupportedSegmentType { + vpc_type: self, + segment_type, + }) + } + } + + fn ensure_supports_segment( + self, + segment: &NewNetworkSegment, + ) -> Result<(), VpcCapabilityError> { + self.ensure_supports_segment_type(segment.segment_type)?; + if segment.prefixes.iter().any(|p| p.prefix.is_ipv4()) { + self.ensure_supports_ipv4_prefix()?; + } + if segment.prefixes.iter().any(|p| p.prefix.is_ipv6()) { + self.ensure_supports_ipv6_prefix()?; + } + Ok(()) + } + + fn ensure_supports_ipv4_prefix(self) -> Result<(), VpcCapabilityError> { + if self.supports_ipv4_prefix() { + Ok(()) + } else { + Err(VpcCapabilityError::Ipv4Unsupported { vpc_type: self }) + } + } + + fn ensure_supports_ipv6_prefix(self) -> Result<(), VpcCapabilityError> { + if self.supports_ipv6_prefix() { + Ok(()) + } else { + Err(VpcCapabilityError::Ipv6Unsupported { vpc_type: self }) + } + } + + fn ensure_supports_routing_profile_type(self) -> Result<(), VpcCapabilityError> { + if self.supports_routing_profile_type() { + Ok(()) + } else { + Err(VpcCapabilityError::RoutingProfileTypeUnsupported { vpc_type: self }) + } + } + + fn ensure_can_peer_with(self, other: VpcVirtualizationType) -> Result<(), VpcCapabilityError> { + if self.can_peer_with(other) { + Ok(()) + } else { + Err(VpcCapabilityError::PeeringIncompatible { a: self, b: other }) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn data_plane_maps_to_expected_variants() { + assert_eq!( + VpcVirtualizationType::EthernetVirtualizer + .capabilities() + .data_plane, + DataPlaneKind::DpuOverlayL2, + ); + assert_eq!( + VpcVirtualizationType::EthernetVirtualizerWithNvue + .capabilities() + .data_plane, + DataPlaneKind::DpuOverlayL2, + ); + assert_eq!( + VpcVirtualizationType::Fnn.capabilities().data_plane, + DataPlaneKind::DpuOverlayL3, + ); + assert_eq!( + VpcVirtualizationType::Flat.capabilities().data_plane, + DataPlaneKind::OperatorManaged, + ); + } + + #[test] + fn data_plane_derived_capabilities() { + // Spot-check that the derived accessors agree with the per-kind + // semantics documented on `DataPlaneKind`. If a variant ever + // diverges from its kind's defaults, the trait impl would need + // a per-variant override -- and these assertions would be the + // first to fire. + assert_eq!( + DataPlaneKind::DpuOverlayL3.fabric_interface_type(), + FabricInterfaceType::Dpu + ); + assert_eq!( + DataPlaneKind::OperatorManaged.fabric_interface_type(), + FabricInterfaceType::Nic + ); + assert!(DataPlaneKind::DpuOverlayL3.supports_applying_routing_profile()); + assert!(!DataPlaneKind::DpuOverlayL2.supports_applying_routing_profile()); + assert!(!DataPlaneKind::OperatorManaged.supports_applying_routing_profile()); + assert!(DataPlaneKind::DpuOverlayL3.imports_peer_vnis_into_overlay()); + assert!(!DataPlaneKind::OperatorManaged.imports_peer_vnis_into_overlay()); + assert!(DataPlaneKind::DpuOverlayL3.vni_advertised_to_peers()); + assert!(DataPlaneKind::OperatorManaged.vni_advertised_to_peers()); + assert!(!DataPlaneKind::DpuOverlayL2.vni_advertised_to_peers()); + } + + #[test] + fn fabric_interface_type_matches_intuition() { + assert_eq!( + VpcVirtualizationType::EthernetVirtualizer.fabric_interface_type(), + FabricInterfaceType::Dpu + ); + assert_eq!( + VpcVirtualizationType::Fnn.fabric_interface_type(), + FabricInterfaceType::Dpu + ); + assert_eq!( + VpcVirtualizationType::Flat.fabric_interface_type(), + FabricInterfaceType::Nic + ); + } + + #[test] + fn flat_only_supports_host_inband_segments() { + let flat = VpcVirtualizationType::Flat; + assert!(flat.supports_segment_type(NetworkSegmentType::HostInband)); + assert!(!flat.supports_segment_type(NetworkSegmentType::Tenant)); + assert!(!flat.supports_segment_type(NetworkSegmentType::Admin)); + assert!(!flat.supports_segment_type(NetworkSegmentType::Underlay)); + } + + #[test] + fn host_inband_segments_only_supported_on_flat() { + for vt in [ + VpcVirtualizationType::EthernetVirtualizer, + VpcVirtualizationType::EthernetVirtualizerWithNvue, + VpcVirtualizationType::Fnn, + ] { + assert!(!vt.supports_segment_type(NetworkSegmentType::HostInband)); + assert!(matches!( + vt.ensure_supports_segment_type(NetworkSegmentType::HostInband), + Err(VpcCapabilityError::UnsupportedSegmentType { .. }) + )); + } + } + + #[test] + fn ipv6_supported_on_fnn_and_flat_only() { + assert!(VpcVirtualizationType::Fnn.supports_ipv6_prefix()); + assert!(VpcVirtualizationType::Flat.supports_ipv6_prefix()); + assert!(!VpcVirtualizationType::EthernetVirtualizer.supports_ipv6_prefix()); + assert!(matches!( + VpcVirtualizationType::EthernetVirtualizer.ensure_supports_ipv6_prefix(), + Err(VpcCapabilityError::Ipv6Unsupported { .. }) + )); + } + + #[test] + fn routing_profile_type_accepted_on_etv_and_fnn_not_flat() { + assert!(VpcVirtualizationType::EthernetVirtualizer.supports_routing_profile_type()); + assert!(VpcVirtualizationType::Fnn.supports_routing_profile_type()); + assert!(!VpcVirtualizationType::Flat.supports_routing_profile_type()); + assert!(matches!( + VpcVirtualizationType::Flat.ensure_supports_routing_profile_type(), + Err(VpcCapabilityError::RoutingProfileTypeUnsupported { .. }) + )); + } + + #[test] + fn applying_routing_profile_is_fnn_only() { + // ETV accepts the routing_profile_type field but doesn't apply + // the profile's policy to its overlay -- that's FNN-only. + assert!(VpcVirtualizationType::Fnn.supports_applying_routing_profile()); + assert!(!VpcVirtualizationType::EthernetVirtualizer.supports_applying_routing_profile()); + assert!(!VpcVirtualizationType::Flat.supports_applying_routing_profile()); + } + + #[test] + fn svi_ip_allocation_is_fnn_only() { + assert!(VpcVirtualizationType::Fnn.allocates_svi_ip()); + assert!(!VpcVirtualizationType::EthernetVirtualizer.allocates_svi_ip()); + assert!(!VpcVirtualizationType::Flat.allocates_svi_ip()); + } + + #[test] + fn only_fnn_imports_peer_vnis_into_overlay() { + assert!(VpcVirtualizationType::Fnn.imports_peer_vnis_into_overlay()); + assert!(!VpcVirtualizationType::EthernetVirtualizer.imports_peer_vnis_into_overlay()); + assert!(!VpcVirtualizationType::Flat.imports_peer_vnis_into_overlay()); + } + + #[test] + fn fnn_and_flat_advertise_vni_to_peers_etv_does_not() { + // FNN advertises its VNI because peers (other FNN VPCs) actively + // use it for EVPN route imports. Flat advertises because + // operator-side pluggable SDN integrations may consume the VNI + // (switch-side VTEPs, ACLs, etc.). ETV does not advertise its + // VNI -- legacy and not surfaced to peers. + assert!(VpcVirtualizationType::Fnn.vni_advertised_to_peers()); + assert!(VpcVirtualizationType::Flat.vni_advertised_to_peers()); + assert!(!VpcVirtualizationType::EthernetVirtualizer.vni_advertised_to_peers()); + assert!(!VpcVirtualizationType::EthernetVirtualizerWithNvue.vni_advertised_to_peers()); + } + + #[test] + fn flat_peers_with_everything() { + for vt in ALL_VPC_VIRTUALIZATION_TYPES { + assert!(VpcVirtualizationType::Flat.can_peer_with(*vt)); + } + } + + #[test] + fn etv_cannot_peer_with_fnn() { + assert!( + !VpcVirtualizationType::EthernetVirtualizer.can_peer_with(VpcVirtualizationType::Fnn) + ); + assert!(matches!( + VpcVirtualizationType::EthernetVirtualizer + .ensure_can_peer_with(VpcVirtualizationType::Fnn), + Err(VpcCapabilityError::PeeringIncompatible { .. }) + )); + } + + #[test] + fn etv_nvue_treated_as_etv_for_peering() { + assert!( + VpcVirtualizationType::EthernetVirtualizer + .can_peer_with(VpcVirtualizationType::EthernetVirtualizerWithNvue) + ); + } + + /// Guards against forgetting to declare a reciprocal entry in + /// another variant's `peers_with` slice. If A says it peers with + /// B, B must say it peers with A. + #[test] + fn peering_relation_is_symmetric() { + for a in ALL_VPC_VIRTUALIZATION_TYPES { + for b in ALL_VPC_VIRTUALIZATION_TYPES { + assert_eq!( + a.can_peer_with(*b), + b.can_peer_with(*a), + "peering relation is asymmetric between {a} and {b}; \ + check the `peers_with` slices in their profiles", + ); + } + } + } + + /// Single-source-of-truth matrix asserting every capability for + /// every variant. The per-capability tests above check one + /// capability across variants; this test checks every capability + /// per variant, so any single value changing produces exactly one + /// failing assertion with a clear "which variant, which capability" + /// message. + /// + /// If a new VPC virtualization type is added, append a row here + /// and the compiler-/test-driven coverage stays exhaustive. + #[test] + fn capability_matrix_per_variant() { + struct Expected { + data_plane: DataPlaneKind, + fabric_interface_type: FabricInterfaceType, + supports_ipv4_prefix: bool, + supports_ipv6_prefix: bool, + supports_routing_profile_type: bool, + supports_applying_routing_profile: bool, + allocates_svi_ip: bool, + imports_peer_vnis_into_overlay: bool, + vni_advertised_to_peers: bool, + allowed_segment_types: &'static [NetworkSegmentType], + peers_with: &'static [VpcVirtualizationType], + } + + let cases: &[(VpcVirtualizationType, Expected)] = &[ + ( + VpcVirtualizationType::EthernetVirtualizer, + Expected { + data_plane: DataPlaneKind::DpuOverlayL2, + fabric_interface_type: FabricInterfaceType::Dpu, + supports_ipv4_prefix: true, + supports_ipv6_prefix: false, + supports_routing_profile_type: true, + supports_applying_routing_profile: false, + allocates_svi_ip: false, + imports_peer_vnis_into_overlay: false, + vni_advertised_to_peers: false, + allowed_segment_types: &[ + NetworkSegmentType::Tenant, + NetworkSegmentType::Admin, + NetworkSegmentType::Underlay, + ], + peers_with: &[ + VpcVirtualizationType::EthernetVirtualizer, + VpcVirtualizationType::EthernetVirtualizerWithNvue, + VpcVirtualizationType::Flat, + ], + }, + ), + ( + VpcVirtualizationType::EthernetVirtualizerWithNvue, + Expected { + // Deprecated -- treated identically to ETV. + data_plane: DataPlaneKind::DpuOverlayL2, + fabric_interface_type: FabricInterfaceType::Dpu, + supports_ipv4_prefix: true, + supports_ipv6_prefix: false, + supports_routing_profile_type: true, + supports_applying_routing_profile: false, + allocates_svi_ip: false, + imports_peer_vnis_into_overlay: false, + vni_advertised_to_peers: false, + allowed_segment_types: &[ + NetworkSegmentType::Tenant, + NetworkSegmentType::Admin, + NetworkSegmentType::Underlay, + ], + peers_with: &[ + VpcVirtualizationType::EthernetVirtualizer, + VpcVirtualizationType::EthernetVirtualizerWithNvue, + VpcVirtualizationType::Flat, + ], + }, + ), + ( + VpcVirtualizationType::Fnn, + Expected { + data_plane: DataPlaneKind::DpuOverlayL3, + fabric_interface_type: FabricInterfaceType::Dpu, + supports_ipv4_prefix: true, + supports_ipv6_prefix: true, + supports_routing_profile_type: true, + supports_applying_routing_profile: true, + allocates_svi_ip: true, + imports_peer_vnis_into_overlay: true, + vni_advertised_to_peers: true, + allowed_segment_types: &[ + NetworkSegmentType::Tenant, + NetworkSegmentType::Admin, + NetworkSegmentType::Underlay, + ], + peers_with: &[VpcVirtualizationType::Fnn, VpcVirtualizationType::Flat], + }, + ), + ( + VpcVirtualizationType::Flat, + Expected { + data_plane: DataPlaneKind::OperatorManaged, + fabric_interface_type: FabricInterfaceType::Nic, + supports_ipv4_prefix: true, + supports_ipv6_prefix: true, + supports_routing_profile_type: false, + supports_applying_routing_profile: false, + allocates_svi_ip: false, + imports_peer_vnis_into_overlay: false, + vni_advertised_to_peers: true, + allowed_segment_types: &[NetworkSegmentType::HostInband], + peers_with: &[ + VpcVirtualizationType::EthernetVirtualizer, + VpcVirtualizationType::EthernetVirtualizerWithNvue, + VpcVirtualizationType::Fnn, + VpcVirtualizationType::Flat, + ], + }, + ), + ]; + + // Belt-and-suspenders: ensure the matrix covers every variant + // that exists in `ALL_VPC_VIRTUALIZATION_TYPES`. If a new + // variant is added there but not here, this fires. + assert_eq!( + cases.len(), + ALL_VPC_VIRTUALIZATION_TYPES.len(), + "capability_matrix_per_variant is missing a row -- ensure every variant in \ + ALL_VPC_VIRTUALIZATION_TYPES is represented here", + ); + + for (vt, expected) in cases { + let caps = vt.capabilities(); + assert_eq!(caps.data_plane, expected.data_plane, "data_plane for {vt}"); + assert_eq!( + vt.fabric_interface_type(), + expected.fabric_interface_type, + "fabric_interface_type for {vt}", + ); + assert_eq!( + caps.supports_ipv4_prefix, expected.supports_ipv4_prefix, + "supports_ipv4_prefix for {vt}", + ); + assert_eq!( + caps.supports_ipv6_prefix, expected.supports_ipv6_prefix, + "supports_ipv6_prefix for {vt}", + ); + assert_eq!( + vt.supports_routing_profile_type(), + expected.supports_routing_profile_type, + "supports_routing_profile_type for {vt}", + ); + assert_eq!( + vt.supports_applying_routing_profile(), + expected.supports_applying_routing_profile, + "supports_applying_routing_profile for {vt}", + ); + assert_eq!( + vt.allocates_svi_ip(), + expected.allocates_svi_ip, + "allocates_svi_ip for {vt}", + ); + assert_eq!( + vt.imports_peer_vnis_into_overlay(), + expected.imports_peer_vnis_into_overlay, + "imports_peer_vnis_into_overlay for {vt}", + ); + assert_eq!( + vt.vni_advertised_to_peers(), + expected.vni_advertised_to_peers, + "vni_advertised_to_peers for {vt}", + ); + assert_eq!( + caps.allowed_segment_types, expected.allowed_segment_types, + "allowed_segment_types for {vt}", + ); + assert_eq!(caps.peers_with, expected.peers_with, "peers_with for {vt}",); + } + } + + fn segment_with( + segment_type: NetworkSegmentType, + prefixes: Vec<&str>, + can_stretch: Option, + ) -> NewNetworkSegment { + use crate::network_prefix::NewNetworkPrefix; + use crate::network_segment::AllocationStrategy; + + NewNetworkSegment { + id: uuid::Uuid::new_v4().into(), + name: "test-segment".to_string(), + subdomain_id: None, + vpc_id: None, + mtu: 1500, + prefixes: prefixes + .into_iter() + .map(|p| NewNetworkPrefix { + prefix: p.parse().unwrap(), + gateway: None, + num_reserved: 0, + }) + .collect(), + vlan_id: None, + vni: None, + segment_type, + can_stretch, + allocation_strategy: AllocationStrategy::Dynamic, + } + } + + #[test] + fn ensure_supports_segment_passes_for_compatible_segment_v4_only() { + let segment = segment_with(NetworkSegmentType::Tenant, vec!["192.0.2.0/24"], None); + VpcVirtualizationType::Fnn + .ensure_supports_segment(&segment) + .expect("FNN + Tenant + IPv4 is the standard happy path"); + } + + #[test] + fn ensure_supports_segment_rejects_unsupported_segment_type() { + let segment = segment_with(NetworkSegmentType::Tenant, vec!["192.0.2.0/24"], None); + let err = VpcVirtualizationType::Flat + .ensure_supports_segment(&segment) + .expect_err("Flat doesn't accept Tenant segments"); + assert!(matches!( + err, + VpcCapabilityError::UnsupportedSegmentType { .. } + )); + } + + #[test] + fn ensure_supports_segment_rejects_ipv6_on_etv() { + let segment = segment_with( + NetworkSegmentType::Tenant, + vec!["192.0.2.0/24", "2001:db8::/64"], + None, + ); + let err = VpcVirtualizationType::EthernetVirtualizer + .ensure_supports_segment(&segment) + .expect_err("ETV doesn't accept IPv6 prefixes even if segment-type matches"); + assert!(matches!(err, VpcCapabilityError::Ipv6Unsupported { .. })); + } + + #[test] + fn ensure_supports_segment_allows_ipv6_on_flat_with_host_inband() { + let segment = segment_with( + NetworkSegmentType::HostInband, + vec!["192.0.2.0/24", "2001:db8::/64"], + None, + ); + VpcVirtualizationType::Flat + .ensure_supports_segment(&segment) + .expect("Flat + HostInband + IPv6 is a supported combination"); + } + + #[test] + fn allocates_svi_for_only_when_fnn_and_can_stretch() { + let stretchable = segment_with(NetworkSegmentType::Tenant, vec!["192.0.2.0/24"], None); + let unstretchable = segment_with( + NetworkSegmentType::Tenant, + vec!["192.0.2.0/24"], + Some(false), + ); + + assert!(VpcVirtualizationType::Fnn.allocates_svi_for(&stretchable)); + assert!(!VpcVirtualizationType::Fnn.allocates_svi_for(&unstretchable)); + assert!(!VpcVirtualizationType::EthernetVirtualizer.allocates_svi_for(&stretchable)); + assert!(!VpcVirtualizationType::Flat.allocates_svi_for(&stretchable)); + } +} diff --git a/crates/api-model/src/vpc.rs b/crates/api-model/src/vpc/mod.rs similarity index 96% rename from crates/api-model/src/vpc.rs rename to crates/api-model/src/vpc/mod.rs index 93c0733ada..ae45a2eab8 100644 --- a/crates/api-model/src/vpc.rs +++ b/crates/api-model/src/vpc/mod.rs @@ -14,9 +14,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +pub mod capability; + use std::collections::HashMap; use std::net::IpAddr; +pub use capability::{ + ALL_VPC_VIRTUALIZATION_TYPES, DataPlaneKind, FabricInterfaceType, VpcCapabilities, + VpcCapabilityError, VpcVirtualizationTypeCapabilities, +}; use carbide_network::virtualization::VpcVirtualizationType; use carbide_uuid::machine::MachineId; use carbide_uuid::network_security_group::NetworkSecurityGroupId; diff --git a/crates/api-test-helper/src/vpc.rs b/crates/api-test-helper/src/vpc.rs index 96dec4e081..1235f3fb11 100644 --- a/crates/api-test-helper/src/vpc.rs +++ b/crates/api-test-helper/src/vpc.rs @@ -48,3 +48,21 @@ pub async fn create_fnn( tracing::info!("FNN VPC created with ID {vpc_id}"); Ok(vpc_id) } + +pub async fn create_flat( + carbide_api_addrs: &[SocketAddr], + tenant_org_id: &str, +) -> eyre::Result { + tracing::info!("Creating Flat VPC"); + + // Flat VPCs reject `routing_profile_type` -- there's no NICo-managed + // data plane to apply a routing profile to. + let data = serde_json::json!({ + "metadata": { "name": "tenant_vpc_flat" }, + "tenantOrganizationId": tenant_org_id, + "network_virtualization_type": 6, // FLAT + }); + let vpc_id = grpcurl_id(carbide_api_addrs, "CreateVpc", &data.to_string()).await?; + tracing::info!("Flat VPC created with ID {vpc_id}"); + Ok(vpc_id) +} diff --git a/crates/api/src/errors.rs b/crates/api/src/errors.rs index 6e04de0d2b..4cd1c9c106 100644 --- a/crates/api/src/errors.rs +++ b/crates/api/src/errors.rs @@ -31,6 +31,7 @@ use model::errors::ModelError; use model::hardware_info::HardwareInfoError; use model::network_devices::LldpError; use model::tenant::TenantError; +use model::vpc::VpcCapabilityError; use model::{ConfigValidationError, resource_pool}; use tonic::Status; @@ -82,6 +83,9 @@ pub enum CarbideError { #[error("Argument is invalid: {0}")] InvalidArgument(String), + #[error("Argument is invalid: {0}")] + VpcCapability(#[from] VpcCapabilityError), + #[error(transparent)] AddressAlreadyInUse(#[from] AddressAlreadyInUseError), @@ -381,6 +385,7 @@ impl From for tonic::Status { match &from { e @ CarbideError::Internal { .. } => Status::internal(e.to_string()), CarbideError::InvalidArgument(msg) => Status::invalid_argument(msg), + error @ CarbideError::VpcCapability(_) => Status::invalid_argument(error.to_string()), CarbideError::InvalidConfiguration(e) => Status::invalid_argument(e.to_string()), CarbideError::RpcDataConversionError(e) => Status::invalid_argument(e.to_string()), e @ CarbideError::DhcpError(_) => Status::resource_exhausted(e.to_string()), diff --git a/crates/api/src/ethernet_virtualization.rs b/crates/api/src/ethernet_virtualization.rs index 356ace67cf..10cfafec3f 100644 --- a/crates/api/src/ethernet_virtualization.rs +++ b/crates/api/src/ethernet_virtualization.rs @@ -21,6 +21,7 @@ use carbide_network::virtualization::{VpcVirtualizationType, get_svi_ip}; use carbide_uuid::instance::InstanceId; use carbide_uuid::machine::{MachineId, MachineInterfaceId}; use carbide_uuid::network::NetworkSegmentId; +use carbide_uuid::vpc::VpcId; use db::vpc::{self}; use db::vpc_peering::get_prefixes_by_vpcs; use db::{self, ObjectColumnFilter, network_security_group}; @@ -33,6 +34,7 @@ use model::network_security_group::{ }; use model::network_segment::NetworkSegment; use model::resource_pool::common::CommonPools; +use model::vpc::{ALL_VPC_VIRTUALIZATION_TYPES, VpcVirtualizationTypeCapabilities}; use sqlx::PgConnection; use crate::CarbideError; @@ -485,41 +487,49 @@ pub async fn tenant_network( if let Some(policy) = vpc_peering_policy_on_existing && let Some(vpc_id) = segment.config.vpc_id { - match policy { + // The peer-ID universe depends on the site policy. Under + // `Exclusive`, the per-type capability layer dictates which + // peer types are compatible (e.g. an FNN VPC can have Flat + // peers via Flat's `peers_with` listing). Under `Mixed`, the + // operator opts out of capability enforcement and we accept + // any peering record. `None` disables peering entirely. + let vpc_peer_ids: Vec = match policy { VpcPeeringPolicy::Exclusive => { - // Under exclusive policy, VPC only allowed to peer with VPC of same network virtualization type. - let allowed_network_virtualization_types = vec![network_virtualization_type]; - let vpc_peers = db::vpc_peering::get_vpc_peer_vnis( - txn, - vpc_id, - allowed_network_virtualization_types, - ) - .await?; - - let vpc_peer_ids = vpc_peers.iter().map(|(vpc_id, _)| *vpc_id).collect(); - vpc_peer_prefixes = get_prefixes_by_vpcs(txn, &vpc_peer_ids).await?; - if network_virtualization_type == VpcVirtualizationType::Fnn { - vpc_peer_vnis = vpc_peers.iter().map(|(_, vni)| *vni as u32).collect(); - } - } - VpcPeeringPolicy::Mixed => { - // Any combination of VPC peering allowed - let vpc_peer_ids = db::vpc_peering::get_vpc_peer_ids(txn, vpc_id).await?; - vpc_peer_prefixes = get_prefixes_by_vpcs(txn, &vpc_peer_ids).await?; - if network_virtualization_type == VpcVirtualizationType::Fnn { - // Get vnis of all FNN peers for route import - vpc_peer_vnis = db::vpc_peering::get_vpc_peer_vnis( - txn, - vpc_id, - vec![VpcVirtualizationType::Fnn], - ) + let allowed_peer_types = network_virtualization_type + .capabilities() + .peers_with + .to_vec(); + db::vpc_peering::get_vpc_peer_vnis(txn, vpc_id, allowed_peer_types) .await? - .iter() - .map(|(_, vni)| *vni as u32) - .collect(); - } + .into_iter() + .map(|(id, _)| id) + .collect() } - VpcPeeringPolicy::None => {} + VpcPeeringPolicy::Mixed => db::vpc_peering::get_vpc_peer_ids(txn, vpc_id).await?, + VpcPeeringPolicy::None => vec![], + }; + + vpc_peer_prefixes = get_prefixes_by_vpcs(txn, &vpc_peer_ids).await?; + + // VNI-based peer route imports are independent of peering + // policy: they're a per-type question on both sides. + // - Self: does this VPC's DPU plumb peer VNIs into its VRF? + // (`imports_peer_vnis_into_overlay`, FNN-only today.) + // - Peer: should this peer's VNI be exposed for the self-side + // to pick up? (`vni_advertised_to_peers`, FNN + Flat today -- + // Flat advertises its VNI so pluggable SDN integrations on + // the network operator's fabric can use it.) + if network_virtualization_type.imports_peer_vnis_into_overlay() { + let vni_peer_types: Vec<_> = ALL_VPC_VIRTUALIZATION_TYPES + .iter() + .copied() + .filter(|t| t.vni_advertised_to_peers()) + .collect(); + vpc_peer_vnis = db::vpc_peering::get_vpc_peer_vnis(txn, vpc_id, vni_peer_types) + .await? + .iter() + .map(|(_, vni)| *vni as u32) + .collect(); } } // Keep API responses deterministic so downstream config rendering diff --git a/crates/api/src/handlers/network_segment.rs b/crates/api/src/handlers/network_segment.rs index 8decd62dec..185bea2b8a 100644 --- a/crates/api/src/handlers/network_segment.rs +++ b/crates/api/src/handlers/network_segment.rs @@ -15,13 +15,13 @@ * limitations under the License. */ use ::rpc::forge as rpc; -use carbide_network::virtualization::VpcVirtualizationType; use db::resource_pool::ResourcePoolDatabaseError; use db::{AnnotatedSqlxError, DatabaseError, ObjectColumnFilter, network_segment}; use model::network_segment::{ NetworkSegment, NetworkSegmentControllerState, NetworkSegmentSearchConfig, NetworkSegmentType, NewNetworkSegment, }; +use model::vpc::VpcVirtualizationTypeCapabilities; use sqlx::{PgConnection, PgTransaction}; use tonic::{Request, Response, Status}; @@ -138,25 +138,15 @@ pub(crate) async fn create( .first() .ok_or_else(|| CarbideError::internal(format!("VPC ID: {vpc_id} not found.")))?; - // IPv6 network segments are only supported for FNN VPCs. - if vpc.network_virtualization_type != VpcVirtualizationType::Fnn { - let has_ipv6_prefix = new_network_segment - .prefixes - .iter() - .any(|np| np.prefix.is_ipv6()); - if has_ipv6_prefix { - return Err(CarbideError::InvalidArgument( - "IPv6 network segments are only supported for FNN VPCs".to_string(), - ) - .into()); - } - } + let virtualization_type = vpc.network_virtualization_type; - if new_network_segment.can_stretch.unwrap_or(true) { - vpc.network_virtualization_type == VpcVirtualizationType::Fnn - } else { - false - } + // Segment compatibility (segment-type binding + IPv6 support) + // and SVI allocation are both expressed as capability checks + // on the VPC's virtualization type; see `model::vpc::capability`. + virtualization_type + .ensure_supports_segment(&new_network_segment) + .map_err(CarbideError::from)?; + virtualization_type.allocates_svi_for(&new_network_segment) } else { false }; diff --git a/crates/api/src/handlers/vpc.rs b/crates/api/src/handlers/vpc.rs index d7e259dbb1..a006109f18 100644 --- a/crates/api/src/handlers/vpc.rs +++ b/crates/api/src/handlers/vpc.rs @@ -16,19 +16,24 @@ */ use ::rpc::errors::RpcDataConversionError; use ::rpc::forge as rpc; +use ::rpc::network::vpc_virtualization_type_try_from_rpc; +use carbide_network::virtualization::{DEFAULT_NETWORK_VIRTUALIZATION_TYPE, VpcVirtualizationType}; use carbide_uuid::network_security_group::NetworkSecurityGroupId; use carbide_uuid::vpc::VpcId; use db::resource_pool::ResourcePoolDatabaseError; use db::vpc::{self}; use db::{self, ObjectColumnFilter, network_security_group}; use model::resource_pool; -use model::tenant::InvalidTenantOrg; -use model::vpc::{NewVpc, UpdateVpc, UpdateVpcVirtualization, VpcStatus}; +use model::tenant::{InvalidTenantOrg, Tenant}; +use model::vpc::{ + NewVpc, UpdateVpc, UpdateVpcVirtualization, VpcStatus, VpcVirtualizationTypeCapabilities, +}; use sqlx::PgConnection; use tonic::{Request, Response, Status}; use crate::CarbideError; use crate::api::{Api, log_request_data}; +use crate::cfg::file::FnnConfig; pub(crate) async fn create( api: &Api, @@ -49,8 +54,8 @@ pub(crate) async fn create( // A lot of tests seem to still allow tenant IDs for tenants that don't // exist. We should audit and see if there are still sites with missing tenants - // if we expect Carbide-core to have knowledge of tenants. Otherwise, this would just go away - // when we _remove_ any expectation of tenant knowledge from Carbide-core, and the details we + // if we expect NICo-core to have knowledge of tenants. Otherwise, this would just go away + // when we _remove_ any expectation of tenant knowledge from NICo-core, and the details we // need from tenant would just come in from the VPC creation request. if tenant.is_none() { tracing::warn!( @@ -93,100 +98,31 @@ pub(crate) async fn create( } } - let (requested_profile_type, internal) = match ( - vpc_creation_request.routing_profile_type.as_ref(), - tenant - .as_ref() - .and_then(|t| t.routing_profile_type.as_ref()), - ) { - // No VPC routing profile requested, and no tenant profile. Nothing to do. - // If FNN disabled, assume internal. Otherwise, external must be assumed. - // This is really handling any odd edge case where VPCs were created - // without a tenant. - (None, None) => (None, api.runtime_config.fnn.is_none()), - - // VPC profile requested, but no tenant or tenant routing profile - // Can't validate anything, so reject. - (Some(_), None) => { - return Err(CarbideError::FailedPrecondition(format!( - "VPC routing-profile type requested but no tenant or routing profile-type found for organization id `{}`", - vpc_creation_request.tenant_organization_id.clone() - )) - .into()); - } - - // Tenant routing profile found. - // Check if routing profile was requested and do some validation if so, - // and default to the tenant profile if not. - (requested_profile_type, Some(tenant_profile_type)) => { - match (api.runtime_config.fnn.as_ref(), requested_profile_type) { - // If FNN disabled and profile requested, throw error. - (None, Some(_)) => { - return Err(CarbideError::FailedPrecondition( - "FNN configuration required to request routing-profile for VPCs" - .to_string(), - ) - .into()); - } - - // If FNN disabled and no profile requested, return tenant profile type and internal==true. - // This maintains the legacy/pre-FNN behavior. - (None, None) => (Some(tenant_profile_type.to_owned()), true), - - // If FNN enabled and no profile requested, pull tenant profile and return tenant profile type, and tenant profile .internal value - (Some(_), None) => { - // Pull the tenant profile - let tenant_profile = api - .runtime_config - .fnn - .as_ref() - .and_then(|f| f.routing_profiles.get(tenant_profile_type)) - .ok_or_else(|| CarbideError::NotFoundError { - kind: "routing_profile", - id: tenant_profile_type.to_owned(), - })?; - ( - Some(tenant_profile_type.to_owned()), - tenant_profile.internal, - ) - } - - // If FNN enabled and profile requested, pull tenant and requested profile, check access tiers, and return requested profile type and requested profile .internal value - (Some(_), Some(profile_type)) => { - // Pull the requested profile - let routing_profile = api - .runtime_config - .fnn - .as_ref() - .and_then(|f| f.routing_profiles.get(profile_type)) - .ok_or_else(|| CarbideError::NotFoundError { - kind: "routing_profile", - id: profile_type.to_owned(), - })?; - - // Pull the tenant profile - let tenant_profile = api - .runtime_config - .fnn - .as_ref() - .and_then(|f| f.routing_profiles.get(tenant_profile_type)) - .ok_or_else(|| CarbideError::NotFoundError { - kind: "routing_profile", - id: tenant_profile_type.to_owned(), - })?; + // Resolve the virtualization type up front. Flat VPCs short-circuit + // most of the FNN-flavored routing-profile validation below: Flat doesn't + // have a NICo-managed data plane, so routing-profile semantics don't + // apply. We still allocate a VNI and persist the VPC like any other type. + let requested_virtualization_type = match vpc_creation_request.network_virtualization_type { + None => DEFAULT_NETWORK_VIRTUALIZATION_TYPE, + Some(v) => vpc_virtualization_type_try_from_rpc(v).map_err(CarbideError::from)?, + }; - // Higher tier value means more restrictions, narrower access. - // Lower tier value means less restrictions / broader access. - // A tenant with narrower access should not be able to create a VPC with broader access. - if routing_profile.access_tier < tenant_profile.access_tier { - return Err(CarbideError::FailedPrecondition("requested VPC routing-profile access tier is broader than associated tenant routing-profile access tier".to_string()).into()); - } + if vpc_creation_request.routing_profile_type.is_some() { + requested_virtualization_type + .ensure_supports_routing_profile_type() + .map_err(CarbideError::from)?; + } - (Some(profile_type.to_owned()), routing_profile.internal) - } - } - } - }; + let ResolvedVpcRouting { + profile_type: requested_profile_type, + internal, + } = resolve_vpc_routing( + requested_virtualization_type, + vpc_creation_request.routing_profile_type.as_deref(), + tenant.as_ref(), + api.runtime_config.fnn.as_ref(), + &vpc_creation_request.tenant_organization_id, + )?; let mut new_vpc = NewVpc::try_from(request.into_inner())?; @@ -508,3 +444,371 @@ async fn allocate_vpc_vni( } } } + +/// Resolution of routing-related state for a VPC at create time. The +/// `internal` flag isn't strictly part of the routing profile, but it +/// gets decided together with `profile_type` from the same inputs +/// (request + tenant + site FNN config), so we return both as one +/// value. +#[derive(Debug)] +pub(crate) struct ResolvedVpcRouting { + /// The routing-profile-type name to persist on the VPC. `None` + /// for VPC types without a NICo-managed data plane, or when + /// neither the request nor the tenant supplies one. + pub profile_type: Option, + + /// Whether the VPC is "internal" -- drives VNI pool selection + /// (`vpc-vni` internal pool vs `external-vpc-vni` external pool) + /// and a couple of downstream behaviors. + pub internal: bool, +} + +impl Default for ResolvedVpcRouting { + /// Default resolution for VPC types that don't accept a + /// `routing_profile_type` field (Flat today). `profile_type` is + /// `None` because there's nothing to resolve. `internal` carries + /// the default value the VNI allocator should pool from -- it IS + /// part of the routing-profile concept (every profile has an + /// `internal: bool`), but in the no-profile case we pick a + /// conservative default since the field still has to flow + /// downstream to the VNI pool selector. + /// + /// TODO(chet): Consider switching callers to + /// `Option` so the no-profile case doesn't + /// silently masquerade as "internal." + fn default() -> Self { + Self { + profile_type: None, + internal: true, + } + } +} + +/// Resolves the routing-profile and `internal` flag for a VPC create +/// request from (1) the VPC's virtualization type's capabilities, +/// (2) the request's `routing_profile_type`, (3) the tenant's +/// `routing_profile_type`, and (4) the site's FNN config. Surfaces +/// any contradictions as [`CarbideError`]. +/// +/// This exists as a function so that resolution rules can be +/// more easily unit-tested directly, vs. as part of a wider +/// flow. +pub(crate) fn resolve_vpc_routing( + virt_type: VpcVirtualizationType, + requested_profile_type: Option<&str>, + tenant: Option<&Tenant>, + fnn_config: Option<&FnnConfig>, + organization_id: &str, +) -> Result { + // VPC types that don't accept a `routing_profile_type` in the + // create request short-circuit to a default. ETV+FNN both accept + // the field (for tenant access-tier authorization and VNI pool + // selection) and run the resolution below; Flat declines and lands + // here. See `ResolvedVpcRouting::default` for what the default + // carries and why. + if !virt_type.supports_routing_profile_type() { + return Ok(ResolvedVpcRouting::default()); + } + + let tenant_profile_type = tenant.and_then(|t| t.routing_profile_type.as_deref()); + + match (requested_profile_type, tenant_profile_type) { + // No VPC routing profile requested and no tenant profile. + // Falling back to a default. With FNN disabled, assume + // internal (legacy/pre-FNN behavior); with FNN enabled, + // external must be assumed. + (None, None) => Ok(ResolvedVpcRouting { + profile_type: None, + internal: fnn_config.is_none(), + }), + + // Request asks for a routing profile but no tenant context + // exists to validate it against -- reject. + (Some(_), None) => Err(CarbideError::FailedPrecondition(format!( + "VPC routing-profile type requested but no tenant or routing profile-type found for organization id `{organization_id}`" + ))), + + // Tenant has a routing profile; resolve the request against it. + (request_profile_type, Some(tenant_profile_type)) => { + match (fnn_config, request_profile_type) { + // FNN disabled but the request named a profile -- reject. + (None, Some(_)) => Err(CarbideError::FailedPrecondition( + "FNN configuration required to request routing-profile for VPCs".to_string(), + )), + + // FNN disabled with no explicit request: inherit the + // tenant's profile name; force `internal=true` (legacy + // pre-FNN behavior). + (None, None) => Ok(ResolvedVpcRouting { + profile_type: Some(tenant_profile_type.to_owned()), + internal: true, + }), + + // FNN enabled with no explicit request: inherit the + // tenant's profile name and its `internal` flag. + (Some(fnn), None) => { + let tenant_profile = + fnn.routing_profiles + .get(tenant_profile_type) + .ok_or_else(|| CarbideError::NotFoundError { + kind: "routing_profile", + id: tenant_profile_type.to_owned(), + })?; + Ok(ResolvedVpcRouting { + profile_type: Some(tenant_profile_type.to_owned()), + internal: tenant_profile.internal, + }) + } + + // FNN enabled and the request named a profile: use the + // request's profile, but check that its access tier + // isn't broader than the tenant's. Higher tier value = + // more restricted; lower = broader. + (Some(fnn), Some(profile_type)) => { + let routing_profile = + fnn.routing_profiles.get(profile_type).ok_or_else(|| { + CarbideError::NotFoundError { + kind: "routing_profile", + id: profile_type.to_owned(), + } + })?; + let tenant_profile = + fnn.routing_profiles + .get(tenant_profile_type) + .ok_or_else(|| CarbideError::NotFoundError { + kind: "routing_profile", + id: tenant_profile_type.to_owned(), + })?; + if routing_profile.access_tier < tenant_profile.access_tier { + return Err(CarbideError::FailedPrecondition( + "requested VPC routing-profile access tier is broader than associated tenant routing-profile access tier" + .to_string(), + )); + } + Ok(ResolvedVpcRouting { + profile_type: Some(profile_type.to_owned()), + internal: routing_profile.internal, + }) + } + } + } + } +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use config_version::ConfigVersion; + use model::metadata::Metadata; + + use super::*; + use crate::cfg::file::FnnRoutingProfileConfig; + + fn tenant_with_profile(profile: Option<&str>) -> Tenant { + Tenant { + organization_id: "test-org".parse().unwrap(), + routing_profile_type: profile.map(|s| s.to_string()), + metadata: Metadata::new_with_default_name(), + version: ConfigVersion::initial(), + } + } + + fn fnn_with_profiles(profiles: &[(&str, FnnRoutingProfileConfig)]) -> FnnConfig { + FnnConfig { + admin_vpc: None, + common_internal_route_target: None, + additional_route_target_imports: vec![], + routing_profiles: profiles + .iter() + .map(|(name, profile)| ((*name).to_string(), profile.clone())) + .collect::>(), + use_vpc_vrf_loopback: false, + } + } + + fn profile(internal: bool, access_tier: u32) -> FnnRoutingProfileConfig { + FnnRoutingProfileConfig { + internal, + access_tier, + ..Default::default() + } + } + + #[test] + fn flat_short_circuits_regardless_of_inputs() { + let resolved = resolve_vpc_routing( + VpcVirtualizationType::Flat, + Some("EXTERNAL"), + Some(&tenant_with_profile(Some("INTERNAL"))), + Some(&fnn_with_profiles(&[ + ("EXTERNAL", profile(false, 2)), + ("INTERNAL", profile(true, 1)), + ])), + "test-org", + ) + .expect("Flat must short-circuit cleanly"); + assert_eq!(resolved.profile_type, None); + assert!(resolved.internal); + } + + #[test] + fn no_request_no_tenant_no_fnn_defaults_to_internal() { + let resolved = + resolve_vpc_routing(VpcVirtualizationType::Fnn, None, None, None, "test-org") + .expect("no-request no-tenant no-fnn is the legacy pre-FNN default"); + assert_eq!(resolved.profile_type, None); + assert!( + resolved.internal, + "FNN disabled means we default to internal" + ); + } + + #[test] + fn no_request_no_tenant_with_fnn_defaults_to_external() { + let fnn = fnn_with_profiles(&[]); + let resolved = resolve_vpc_routing( + VpcVirtualizationType::Fnn, + None, + None, + Some(&fnn), + "test-org", + ) + .expect("no-request no-tenant with-fnn must succeed"); + assert_eq!(resolved.profile_type, None); + assert!( + !resolved.internal, + "FNN enabled means we default to external" + ); + } + + #[test] + fn request_but_no_tenant_is_rejected() { + let err = resolve_vpc_routing( + VpcVirtualizationType::Fnn, + Some("EXTERNAL"), + None, + None, + "test-org", + ) + .expect_err("request without tenant must be rejected"); + assert!(matches!(err, CarbideError::FailedPrecondition(_))); + } + + #[test] + fn fnn_disabled_with_request_is_rejected() { + let tenant = tenant_with_profile(Some("INTERNAL")); + let err = resolve_vpc_routing( + VpcVirtualizationType::Fnn, + Some("EXTERNAL"), + Some(&tenant), + None, + "test-org", + ) + .expect_err("FNN-disabled + explicit request must be rejected"); + assert!(matches!(err, CarbideError::FailedPrecondition(_))); + } + + #[test] + fn fnn_disabled_no_request_inherits_tenant_profile() { + let tenant = tenant_with_profile(Some("INTERNAL")); + let resolved = resolve_vpc_routing( + VpcVirtualizationType::Fnn, + None, + Some(&tenant), + None, + "test-org", + ) + .expect("FNN-disabled + tenant profile must inherit"); + assert_eq!(resolved.profile_type.as_deref(), Some("INTERNAL")); + assert!( + resolved.internal, + "legacy pre-FNN behavior forces internal=true" + ); + } + + #[test] + fn fnn_enabled_no_request_inherits_tenant_profile_internal_flag() { + let tenant = tenant_with_profile(Some("EXTERNAL")); + let fnn = fnn_with_profiles(&[("EXTERNAL", profile(false, 2))]); + let resolved = resolve_vpc_routing( + VpcVirtualizationType::Fnn, + None, + Some(&tenant), + Some(&fnn), + "test-org", + ) + .expect("FNN-enabled + tenant profile must inherit name + internal flag"); + assert_eq!(resolved.profile_type.as_deref(), Some("EXTERNAL")); + assert!(!resolved.internal); + } + + #[test] + fn fnn_enabled_request_overrides_when_access_tier_permits() { + // tenant tier 0 (broad); request tier 2 (narrower) -- allowed + let tenant = tenant_with_profile(Some("ADMIN")); + let fnn = + fnn_with_profiles(&[("ADMIN", profile(true, 0)), ("EXTERNAL", profile(false, 2))]); + let resolved = resolve_vpc_routing( + VpcVirtualizationType::Fnn, + Some("EXTERNAL"), + Some(&tenant), + Some(&fnn), + "test-org", + ) + .expect("narrower request than tenant access tier must succeed"); + assert_eq!(resolved.profile_type.as_deref(), Some("EXTERNAL")); + assert!(!resolved.internal); + } + + #[test] + fn fnn_enabled_request_broader_than_tenant_is_rejected() { + // tenant tier 2 (narrow); request tier 0 (broader) -- rejected + let tenant = tenant_with_profile(Some("EXTERNAL")); + let fnn = + fnn_with_profiles(&[("EXTERNAL", profile(false, 2)), ("ADMIN", profile(true, 0))]); + let err = resolve_vpc_routing( + VpcVirtualizationType::Fnn, + Some("ADMIN"), + Some(&tenant), + Some(&fnn), + "test-org", + ) + .expect_err("broader request than tenant access tier must be rejected"); + assert!(matches!(err, CarbideError::FailedPrecondition(_))); + } + + #[test] + fn unknown_requested_profile_yields_not_found() { + let tenant = tenant_with_profile(Some("EXTERNAL")); + let fnn = fnn_with_profiles(&[("EXTERNAL", profile(false, 2))]); + let err = resolve_vpc_routing( + VpcVirtualizationType::Fnn, + Some("DOES_NOT_EXIST"), + Some(&tenant), + Some(&fnn), + "test-org", + ) + .expect_err("request naming an undefined routing profile must error"); + assert!( + matches!(err, CarbideError::NotFoundError { kind, .. } if kind == "routing_profile") + ); + } + + #[test] + fn unknown_tenant_profile_yields_not_found() { + let tenant = tenant_with_profile(Some("UNDEFINED")); + let fnn = fnn_with_profiles(&[("EXTERNAL", profile(false, 2))]); + let err = resolve_vpc_routing( + VpcVirtualizationType::Fnn, + None, + Some(&tenant), + Some(&fnn), + "test-org", + ) + .expect_err("tenant naming an undefined routing profile must error"); + assert!( + matches!(err, CarbideError::NotFoundError { kind, .. } if kind == "routing_profile") + ); + } +} diff --git a/crates/api/src/handlers/vpc_peering.rs b/crates/api/src/handlers/vpc_peering.rs index a88ecfaa3b..36fbb9f35f 100644 --- a/crates/api/src/handlers/vpc_peering.rs +++ b/crates/api/src/handlers/vpc_peering.rs @@ -17,8 +17,8 @@ use ::db::{ObjectColumnFilter, vpc, vpc_peering as db}; use ::rpc::forge as rpc; -use carbide_network::virtualization::VpcVirtualizationType; use carbide_uuid::vpc_peering::VpcPeeringId; +use model::vpc::VpcVirtualizationTypeCapabilities; use tonic::{Request, Response, Status}; use uuid::Uuid; @@ -71,16 +71,13 @@ pub async fn create( kind: "VPC", id: peer_vpc_id.to_string(), })?; - // Peering not allowed between Fnn <-> ETV/ETV_NVUE. - // ETV and ETV_NVUE are treated as equivalent since NVUE is always enabled. - if vpc1.network_virtualization_type != vpc2.network_virtualization_type - && (vpc1.network_virtualization_type == VpcVirtualizationType::Fnn - || vpc2.network_virtualization_type == VpcVirtualizationType::Fnn) - { - return Err(CarbideError::internal( - "VPC peering between VPCs of different network virtualization type not allowed.".to_string(), - ).into()); - } + + // Make sure the VPCs are allowed to peer based on their + // virtualization types. Their capabilities will determine + // if they are allowed or not. + vpc1.network_virtualization_type + .ensure_can_peer_with(vpc2.network_virtualization_type) + .map_err(CarbideError::from)?; } Some(VpcPeeringPolicy::Mixed) => { // Any combination of network virtualization types allowed diff --git a/crates/api/src/handlers/vpc_prefix.rs b/crates/api/src/handlers/vpc_prefix.rs index 6c6c84c4f6..5caed0fbc9 100644 --- a/crates/api/src/handlers/vpc_prefix.rs +++ b/crates/api/src/handlers/vpc_prefix.rs @@ -18,9 +18,9 @@ use ::db::{ObjectColumnFilter, vpc_prefix as db}; use ::rpc::forge as rpc; use ::rpc::forge::PrefixMatchType; -use carbide_network::virtualization::VpcVirtualizationType; use ipnetwork::IpNetwork; use model::network_prefix::NetworkPrefix; +use model::vpc::VpcVirtualizationTypeCapabilities; use model::vpc_prefix; use tonic::{Request, Response, Status}; @@ -73,14 +73,10 @@ pub async fn create( id: new_prefix.vpc_id.to_string(), })?; - // IPv6 VPC prefixes are only supported for FNN VPCs. - if new_prefix.config.prefix.is_ipv6() - && vpc.network_virtualization_type != VpcVirtualizationType::Fnn - { - return Err(CarbideError::InvalidArgument( - "IPv6 VPC prefixes are only supported for FNN VPCs".to_string(), - ) - .into()); + if new_prefix.config.prefix.is_ipv6() { + vpc.network_virtualization_type + .ensure_supports_ipv6_prefix() + .map_err(CarbideError::from)?; } let expected_vpc_version = vpc.version; diff --git a/crates/api/src/instance/mod.rs b/crates/api/src/instance/mod.rs index 639ef4d734..6886cd734a 100644 --- a/crates/api/src/instance/mod.rs +++ b/crates/api/src/instance/mod.rs @@ -48,6 +48,7 @@ use model::metadata::Metadata; use model::network_segment::NetworkSegmentType; use model::os::OperatingSystemVariant; use model::tenant::TenantOrganizationId; +use model::vpc::{FabricInterfaceType, VpcVirtualizationTypeCapabilities}; use model::vpc_prefix::VpcPrefix; use sqlx::PgConnection; @@ -902,6 +903,38 @@ pub async fn batch_allocate_instances( } } + // Each of the host's HostInband segments must be bound to a + // VPC whose fabric interface type matches a zero-DPU host's + // (i.e. `Nic`). HostInband segments are allowed to exist + // without a VPC at segment-create time (so operators can + // create them up front for DHCP routing during site-explorer + // ingestion); we require the binding here, when a tenant + // intent actually shows up to allocate an instance. + for segment_id in &allowed_segment_ids { + let vpc = db::vpc::find_by_segment(&mut txn, *segment_id) + .await + .map_err(|e| { + if e.is_not_found() { + CarbideError::FailedPrecondition(format!( + "zero-DPU host {} has HostInband segment {} that is not bound to a Flat VPC; instance allocation requires the segment to be in a Flat VPC", + mh_snapshot.host_snapshot.id, segment_id, + )) + } else { + CarbideError::from(e) + } + })?; + let vpc_iface = vpc.network_virtualization_type.fabric_interface_type(); + if vpc_iface != FabricInterfaceType::Nic { + return Err(CarbideError::FailedPrecondition(format!( + "zero-DPU host {} has HostInband segment {} bound to VPC {} ({}); zero-DPU hosts can only allocate into VPCs whose fabric_interface_type is `nic` (got `{vpc_iface}`)", + mh_snapshot.host_snapshot.id, + segment_id, + vpc.id, + vpc.network_virtualization_type, + ))); + } + } + // Extension services run on DPU agents; a zero-DPU host has no // place to schedule them. We need to check, otherwise the status // would just report "Unknown" forever. @@ -911,11 +944,40 @@ pub async fn batch_allocate_instances( mh_snapshot.host_snapshot.id, ))); } - } else if request.config.network.auto { - return Err(CarbideError::InvalidArgument(format!( - "host {} has DPUs; `InstanceNetworkConfig.auto` is only valid on zero-DPU hosts", - mh_snapshot.host_snapshot.id, - ))); + } else { + // `auto` is only valid on zero-DPU hosts; DPU-managed hosts must + // list their interfaces explicitly. + if request.config.network.auto { + return Err(CarbideError::InvalidArgument(format!( + "host {} has DPUs; `InstanceNetworkConfig.auto` is only valid on zero-DPU hosts", + mh_snapshot.host_snapshot.id, + ))); + } + + // DPU-managed hosts must only allocate into VPCs whose + // fabric interface type matches (i.e. `Dpu`). The segment- + // binding rule already prevents `HostInband` segments from + // living in a Dpu-fabric VPC, but reject explicitly here so + // a DPU instance referencing a `HostInband` segment (which + // would be in a Nic-fabric VPC) fails with a clear message + // rather than getting stuck somewhere downstream. + for iface in &request.config.network.interfaces { + if let Some(ns_id) = iface.network_segment_id { + let vpc = db::vpc::find_by_segment(&mut txn, ns_id) + .await + .map_err(CarbideError::from)?; + let vpc_iface = vpc.network_virtualization_type.fabric_interface_type(); + if vpc_iface != FabricInterfaceType::Dpu { + return Err(CarbideError::FailedPrecondition(format!( + "DPU-managed host {} cannot allocate an instance into VPC {} ({}, via segment {}); DPU hosts can only allocate into VPCs whose fabric_interface_type is `dpu` (got `{vpc_iface}`)", + mh_snapshot.host_snapshot.id, + vpc.id, + vpc.network_virtualization_type, + ns_id, + ))); + } + } + } } processed_requests.push((request, mh_snapshot)); diff --git a/crates/api/src/tests/common/api_fixtures/network_segment.rs b/crates/api/src/tests/common/api_fixtures/network_segment.rs index 7adc6e0cee..76aefb3826 100644 --- a/crates/api/src/tests/common/api_fixtures/network_segment.rs +++ b/crates/api/src/tests/common/api_fixtures/network_segment.rs @@ -139,6 +139,14 @@ pub async fn create_host_inband_network_segment( .to_string(); let gateway = FIXTURE_HOST_INBAND_NETWORK_SEGMENT_GATEWAY.ip().to_string(); + // HostInband segments must live in Flat VPCs. If the caller did not + // supply a VPC, create a Flat VPC here so the fixture mirrors the + // production binding rather than landing in the default ETV VPC. + let vpc_id = match vpc_id { + Some(id) => Some(id), + None => Some(create_default_flat_vpc(api, "FIXTURE_HOST_INBAND_FLAT").await), + }; + create_network_segment( api, "HOST_INBAND", @@ -151,6 +159,26 @@ pub async fn create_host_inband_network_segment( .await } +/// Creates a Flat VPC for the default test tenant and returns its id. Used as +/// the implicit parent VPC for HostInband segment fixtures. +pub async fn create_default_flat_vpc(api: &Api, name: &str) -> VpcId { + let request = crate::tests::common::rpc_builder::VpcCreationRequest::builder( + "2829bbe3-c169-4cd9-8b2a-19a8b1618a93", + ) + .metadata(rpc::forge::Metadata { + name: name.to_string(), + ..Default::default() + }) + .network_virtualization_type(rpc::forge::VpcVirtualizationType::Flat as i32) + .tonic_request(); + let vpc = api + .create_vpc(request) + .await + .expect("Unable to create Flat VPC fixture") + .into_inner(); + vpc.id.expect("Created Flat VPC must have an id") +} + pub async fn create_tenant_network_segment( api: &Api, vpc_id: Option, diff --git a/crates/api/src/tests/common/api_fixtures/vpc.rs b/crates/api/src/tests/common/api_fixtures/vpc.rs index 7c68bc518f..bff25439d4 100644 --- a/crates/api/src/tests/common/api_fixtures/vpc.rs +++ b/crates/api/src/tests/common/api_fixtures/vpc.rs @@ -51,3 +51,30 @@ pub async fn create_vpc( (vpc_id, vpc) } + +/// Creates a Flat VPC for the given (or default) tenant. +pub async fn create_flat_vpc( + env: &TestEnv, + name: String, + tenant_org_id: Option, +) -> (VpcId, rpc::Vpc) { + let tenant_config = default_tenant_config(); + let vpc_id = VpcId::new(); + let request = + VpcCreationRequest::builder(tenant_org_id.unwrap_or(tenant_config.tenant_organization_id)) + .id(vpc_id) + .network_virtualization_type(rpc::VpcVirtualizationType::Flat as i32) + .metadata(rpc::Metadata { + name, + ..Default::default() + }) + .tonic_request(); + + let vpc = env + .api + .create_vpc(request) + .await + .expect("create_flat_vpc should succeed") + .into_inner(); + (vpc_id, vpc) +} diff --git a/crates/api/src/tests/instance_allocate.rs b/crates/api/src/tests/instance_allocate.rs index 99696eb97a..13615def3c 100644 --- a/crates/api/src/tests/instance_allocate.rs +++ b/crates/api/src/tests/instance_allocate.rs @@ -126,6 +126,16 @@ async fn create_test_env_for_instance_allocation( .unwrap() .into_inner(); + // HostInband segments now require Flat VPCs. Create two so that the + // "different VPCs" test variant can put each HostInband segment in a + // distinct Flat VPC. + let flat_vpc_1_id = + common::api_fixtures::network_segment::create_default_flat_vpc(&env.api, "test flat vpc 1") + .await; + let flat_vpc_2_id = + common::api_fixtures::network_segment::create_default_flat_vpc(&env.api, "test flat vpc 2") + .await; + create_underlay_network_segment(&env.api).await; create_admin_network_segment(&env.api).await; @@ -147,8 +157,9 @@ async fn create_test_env_for_instance_allocation( ) .await; - create_host_inband_network_segment(&env.api, vpc_1.id).await; - // Make sure second host_inband network segment has the same VPC ID + create_host_inband_network_segment(&env.api, Some(flat_vpc_1_id)).await; + // Second HostInband segment lives in the same Flat VPC, or a different + // Flat VPC if the test wants to assert allocation rejection. create_network_segment( &env.api, "HOST_INBAND_2", @@ -161,12 +172,11 @@ async fn create_test_env_for_instance_allocation( .ip() .to_string(), forge::NetworkSegmentType::HostInband, - // One test asserts that allocation should fail if each segment is in a different VPC - if options.host_inband_segments_in_different_vpcs { - vpc_2.id + Some(if options.host_inband_segments_in_different_vpcs { + flat_vpc_2_id } else { - vpc_1.id - }, + flat_vpc_1_id + }), true, ) .await; @@ -705,7 +715,16 @@ async fn test_reject_single_dpu_instance_allocation_host_inband_network_config( .await; match result { - Err(e) if e.code() == tonic::Code::InvalidArgument => {} + // The rejection can come from two distinct gates: + // - InvalidArgument from segment-type rules + // - FailedPrecondition from the DPU-host-vs-Flat-VPC gate + // Both are valid rejections of "DPU host instance referencing a + // HostInband segment"; the test only cares that allocation fails. + Err(e) + if matches!( + e.code(), + tonic::Code::InvalidArgument | tonic::Code::FailedPrecondition + ) => {} _ => panic!( "Creating an instance on a dpu host while specifying a host_inband network segment should throw an error, got {result:?}" ), diff --git a/crates/api/src/tests/network_segment.rs b/crates/api/src/tests/network_segment.rs index 31759731c5..4a1edcf5a1 100644 --- a/crates/api/src/tests/network_segment.rs +++ b/crates/api/src/tests/network_segment.rs @@ -1434,3 +1434,147 @@ async fn test_find_state_histories_unknown_segment_returns_no_records(pool: sqlx "unknown segment must yield no history records, got: {records:?}" ); } + +#[crate::sqlx_test] +async fn flat_vpc_accepts_host_inband_segment( + pool: sqlx::PgPool, +) -> Result<(), Box> { + // The canonical Flat pairing: a Flat VPC with a HostInband segment. + let env = + create_test_env_with_overrides(pool.clone(), TestEnvOverrides::no_network_segments()).await; + + let (_vpc_id, vpc) = common::api_fixtures::vpc::create_flat_vpc( + &env, + "flat".to_string(), + Some("2829bbe3-c169-4cd9-8b2a-19a8b1618a93".to_string()), + ) + .await; + + // Use a fixture-tenant gateway since it's guaranteed to be in TEST_SITE_PREFIXES; + // the segment type (HostInband) is what's being tested here, not the prefix. + let gw = FIXTURE_TENANT_NETWORK_SEGMENT_GATEWAYS[0]; + let request = rpc::forge::NetworkSegmentCreationRequest { + id: None, + mtu: Some(1500), + name: "FLAT_HOST_INBAND".to_string(), + prefixes: vec![rpc::forge::NetworkPrefix { + id: None, + prefix: gw.network().to_string() + "/24", + gateway: Some(gw.ip().to_string()), + reserve_first: 3, + free_ip_count: 0, + svi_ip: None, + }], + subdomain_id: None, + vpc_id: vpc.id, + segment_type: rpc::forge::NetworkSegmentType::HostInband as i32, + }; + + env.api + .create_network_segment(Request::new(request)) + .await + .expect("Flat VPC + HostInband segment is the canonical pairing"); + + Ok(()) +} + +#[crate::sqlx_test] +async fn flat_vpc_rejects_tenant_segment( + pool: sqlx::PgPool, +) -> Result<(), Box> { + // Flat VPCs are HostInband-only -- attempting to put a Tenant overlay + // segment in a Flat VPC should be rejected at create time. + let env = + create_test_env_with_overrides(pool.clone(), TestEnvOverrides::no_network_segments()).await; + + let (_vpc_id, vpc) = common::api_fixtures::vpc::create_flat_vpc( + &env, + "flat".to_string(), + Some("2829bbe3-c169-4cd9-8b2a-19a8b1618a93".to_string()), + ) + .await; + + let request = rpc::forge::NetworkSegmentCreationRequest { + id: None, + mtu: Some(1500), + name: "FLAT_TENANT_REJECTED".to_string(), + prefixes: vec![rpc::forge::NetworkPrefix { + id: None, + prefix: FIXTURE_TENANT_NETWORK_SEGMENT_GATEWAYS[0].to_string(), + gateway: Some(FIXTURE_TENANT_NETWORK_SEGMENT_GATEWAYS[0].ip().to_string()), + reserve_first: 3, + free_ip_count: 0, + svi_ip: None, + }], + subdomain_id: None, + vpc_id: vpc.id, + segment_type: rpc::forge::NetworkSegmentType::Tenant as i32, + }; + + let err = env + .api + .create_network_segment(Request::new(request)) + .await + .expect_err("Flat VPC + Tenant segment must be rejected"); + + assert_eq!(err.code(), tonic::Code::InvalidArgument, "got: {err}"); + assert!( + err.message().contains("flat") && err.message().contains("tenant"), + "error should mention flat VPC rejecting tenant segment, got: {}", + err.message() + ); + + Ok(()) +} + +#[crate::sqlx_test] +async fn etv_vpc_rejects_host_inband_segment( + pool: sqlx::PgPool, +) -> Result<(), Box> { + // HostInband segments must live in Flat VPCs. An ETV VPC accepting a + // HostInband segment would violate the Flat<->HostInband binding. + let env = + create_test_env_with_overrides(pool.clone(), TestEnvOverrides::no_network_segments()).await; + + // Default `create_vpc` produces an ETV VPC (no virt type set => default). + let (_vpc_id, vpc) = common::api_fixtures::vpc::create_vpc( + &env, + "etv".to_string(), + Some("2829bbe3-c169-4cd9-8b2a-19a8b1618a93".to_string()), + None, + ) + .await; + + let gw = FIXTURE_TENANT_NETWORK_SEGMENT_GATEWAYS[0]; + let request = rpc::forge::NetworkSegmentCreationRequest { + id: None, + mtu: Some(1500), + name: "ETV_HOST_INBAND_REJECTED".to_string(), + prefixes: vec![rpc::forge::NetworkPrefix { + id: None, + prefix: gw.network().to_string() + "/24", + gateway: Some(gw.ip().to_string()), + reserve_first: 3, + free_ip_count: 0, + svi_ip: None, + }], + subdomain_id: None, + vpc_id: vpc.id, + segment_type: rpc::forge::NetworkSegmentType::HostInband as i32, + }; + + let err = env + .api + .create_network_segment(Request::new(request)) + .await + .expect_err("HostInband segment must be rejected on non-Flat VPCs"); + + assert_eq!(err.code(), tonic::Code::InvalidArgument, "got: {err}"); + assert!( + err.message().contains("etv") && err.message().contains("host_inband"), + "error should mention etv VPC rejecting host_inband segment, got: {}", + err.message() + ); + + Ok(()) +} diff --git a/crates/api/src/tests/set_primary_dpu.rs b/crates/api/src/tests/set_primary_dpu.rs index 939f55e2f4..49ac48541d 100644 --- a/crates/api/src/tests/set_primary_dpu.rs +++ b/crates/api/src/tests/set_primary_dpu.rs @@ -17,7 +17,6 @@ use carbide_uuid::machine::{MachineId, MachineIdSource, MachineType}; use ipnetwork::IpNetwork; -use model::metadata::Metadata; use rpc::forge; use rpc::forge::forge_server::Forge; @@ -28,7 +27,6 @@ use crate::tests::common::api_fixtures::network_segment::{ FIXTURE_UNDERLAY_NETWORK_SEGMENT_GATEWAY, create_admin_network_segment, create_host_inband_network_segment, create_underlay_network_segment, }; -use crate::tests::common::rpc_builder::VpcCreationRequest; // On a zero-DPU host the handler's interface scan never finds a row with a // matching `attached_dpu_machine_id`, which previously bubbled up as a @@ -67,18 +65,16 @@ async fn test_set_primary_dpu_rejects_zero_dpu_host( }, ) .await; - let vpc = env - .api - .create_vpc( - VpcCreationRequest::builder("2829bbe3-c169-4cd9-8b2a-19a8b1618a93") - .metadata(Metadata::new_with_default_name()) - .tonic_request(), - ) - .await? - .into_inner(); + // HostInband segments must live in a Flat VPC. The test doesn't otherwise + // need a non-Flat VPC, so create only a Flat one for the segment. + let flat_vpc_id = api_fixtures::network_segment::create_default_flat_vpc( + &env.api, + "set-primary-dpu flat vpc", + ) + .await; create_underlay_network_segment(&env.api).await; create_admin_network_segment(&env.api).await; - create_host_inband_network_segment(&env.api, vpc.id).await; + create_host_inband_network_segment(&env.api, Some(flat_vpc_id)).await; env.run_network_segment_controller_iteration().await; env.run_network_segment_controller_iteration().await; diff --git a/crates/api/src/tests/vpc.rs b/crates/api/src/tests/vpc.rs index d6b6c42193..40bc0f128e 100644 --- a/crates/api/src/tests/vpc.rs +++ b/crates/api/src/tests/vpc.rs @@ -1062,3 +1062,104 @@ async fn test_increment_vpc_version_detects_concurrent_writes( Ok(()) } + +#[crate::sqlx_test] +async fn create_flat_vpc_succeeds_without_routing_profile( + pool: sqlx::PgPool, +) -> Result<(), Box> { + // Flat VPCs are for zero-DPU hosts and don't have a Carbide-managed + // routing layer. The create handler should skip the FNN-flavored + // routing-profile validation entirely and still allocate a VNI. + let env = create_test_env(pool).await; + + let tenant = env + .api + .create_tenant(tonic::Request::new(rpc::forge::CreateTenantRequest { + organization_id: "flat-tenant".to_string(), + routing_profile_type: None, + metadata: Some(rpc::forge::Metadata { + name: "flat-tenant".to_string(), + description: "".to_string(), + labels: vec![], + }), + })) + .await? + .into_inner() + .tenant + .unwrap(); + + let vpc = env + .api + .create_vpc( + VpcCreationRequest::builder(tenant.organization_id.clone()) + .network_virtualization_type(rpc::forge::VpcVirtualizationType::Flat as i32) + .metadata(rpc::forge::Metadata { + name: "flat".to_string(), + ..Default::default() + }) + .tonic_request(), + ) + .await? + .into_inner(); + + assert_eq!( + vpc.network_virtualization_type, + Some(rpc::forge::VpcVirtualizationType::Flat as i32), + ); + assert!(vpc.routing_profile_type.is_none()); + assert!( + vpc.status.as_ref().and_then(|s| s.vni).is_some(), + "Flat VPCs still allocate a VNI for pluggable SDN hooks (e.g. switch-side VTEPs)", + ); + + Ok(()) +} + +#[crate::sqlx_test] +async fn create_flat_vpc_rejects_routing_profile_type( + pool: sqlx::PgPool, +) -> Result<(), Box> { + // Routing profile types are FNN-specific. Sending one on a Flat VPC + // create is contradictory and should be rejected up front. + let env = create_test_env(pool).await; + + let tenant = env + .api + .create_tenant(tonic::Request::new(rpc::forge::CreateTenantRequest { + organization_id: "flat-tenant".to_string(), + routing_profile_type: None, + metadata: Some(rpc::forge::Metadata { + name: "flat-tenant".to_string(), + description: "".to_string(), + labels: vec![], + }), + })) + .await? + .into_inner() + .tenant + .unwrap(); + + let err = env + .api + .create_vpc( + VpcCreationRequest::builder(tenant.organization_id) + .network_virtualization_type(rpc::forge::VpcVirtualizationType::Flat as i32) + .routing_profile_type("EXTERNAL".to_string()) + .metadata(rpc::forge::Metadata { + name: "flat".to_string(), + ..Default::default() + }) + .tonic_request(), + ) + .await + .expect_err("Flat VPC + routing_profile_type must be rejected"); + + assert_eq!(err.code(), tonic::Code::InvalidArgument, "got: {err}"); + assert!( + err.message().contains("flat") && err.message().contains("routing_profile_type"), + "error should mention flat VPC and the routing_profile_type field, got: {}", + err.message() + ); + + Ok(()) +} diff --git a/crates/api/src/tests/vpc_peering.rs b/crates/api/src/tests/vpc_peering.rs index e7fd2a6ae3..f0cc3d960c 100644 --- a/crates/api/src/tests/vpc_peering.rs +++ b/crates/api/src/tests/vpc_peering.rs @@ -30,7 +30,7 @@ use uuid::Uuid; use super::common::api_fixtures::{self, TestEnv}; use crate::tests::common::api_fixtures::network_segment::{ - FIXTURE_TENANT_NETWORK_SEGMENT_GATEWAYS, create_tenant_network_segment, + FIXTURE_TENANT_NETWORK_SEGMENT_GATEWAYS, create_network_segment, create_tenant_network_segment, }; use crate::tests::common::api_fixtures::{create_managed_host, create_test_env}; use crate::tests::common::rpc_builder::VpcCreationRequest; @@ -620,3 +620,252 @@ async fn test_vpc_peering_network_config_ordered_peerings( Ok(()) } + +#[crate::sqlx_test] +async fn flat_vpc_can_peer_with_etv_under_exclusive_policy( + pool: sqlx::PgPool, +) -> Result<(), Box> { + // Flat VPCs short-circuit the ETV<->FNN exclusion under Exclusive policy + // because Flat VPCs do not own a Carbide-managed data plane. + let env = api_fixtures::create_test_env(pool).await; + + let (_, etv_vpc) = api_fixtures::vpc::create_vpc( + &env, + "etv".to_string(), + Some("2829bbe3-c169-4cd9-8b2a-19a8b1618a93".to_string()), + None, + ) + .await; + let (_, flat_vpc) = api_fixtures::vpc::create_flat_vpc( + &env, + "flat".to_string(), + Some("2829bbe3-c169-4cd9-8b2a-19a8b1618a93".to_string()), + ) + .await; + + env.api + .create_vpc_peering(Request::new(VpcPeeringCreationRequest { + vpc_id: etv_vpc.id, + peer_vpc_id: flat_vpc.id, + id: None, + })) + .await + .expect("Flat <-> ETV peering must be allowed under Exclusive policy"); + + Ok(()) +} + +#[crate::sqlx_test] +async fn flat_vpc_can_peer_with_fnn_under_exclusive_policy( + pool: sqlx::PgPool, +) -> Result<(), Box> { + // Same short-circuit as the ETV case, but on the FNN side: Flat VPCs are + // peer-policy-neutral. + let env = api_fixtures::create_test_env(pool).await; + + let fnn_vpc = env + .api + .create_vpc( + VpcCreationRequest::builder("2829bbe3-c169-4cd9-8b2a-19a8b1618a93") + .metadata(Metadata { + name: "fnn".to_string(), + ..Default::default() + }) + .network_virtualization_type(VpcVirtualizationType::Fnn) + .tonic_request(), + ) + .await? + .into_inner(); + let (_, flat_vpc) = api_fixtures::vpc::create_flat_vpc( + &env, + "flat".to_string(), + Some("2829bbe3-c169-4cd9-8b2a-19a8b1618a93".to_string()), + ) + .await; + + env.api + .create_vpc_peering(Request::new(VpcPeeringCreationRequest { + vpc_id: fnn_vpc.id, + peer_vpc_id: flat_vpc.id, + id: None, + })) + .await + .expect("Flat <-> FNN peering must be allowed under Exclusive policy"); + + Ok(()) +} + +#[crate::sqlx_test] +async fn flat_vpc_can_peer_with_flat_under_exclusive_policy( + pool: sqlx::PgPool, +) -> Result<(), Box> { + // Flat <-> Flat is structurally identical: no overlay state to mediate. + let env = api_fixtures::create_test_env(pool).await; + + let (_, flat_a) = api_fixtures::vpc::create_flat_vpc( + &env, + "flat-a".to_string(), + Some("2829bbe3-c169-4cd9-8b2a-19a8b1618a93".to_string()), + ) + .await; + let (_, flat_b) = api_fixtures::vpc::create_flat_vpc( + &env, + "flat-b".to_string(), + Some("2829bbe3-c169-4cd9-8b2a-19a8b1618a93".to_string()), + ) + .await; + + env.api + .create_vpc_peering(Request::new(VpcPeeringCreationRequest { + vpc_id: flat_a.id, + peer_vpc_id: flat_b.id, + id: None, + })) + .await + .expect("Flat <-> Flat peering must be allowed under Exclusive policy"); + + Ok(()) +} + +/// Coverage for the capability-driven peer-filter in `tenant_network` +/// with an FNN VPC peered to a Flat VPC: +/// +/// - Flat VPC's HostInband segment prefix appears in the FNN +/// instance's `vpc_peer_prefixes`. +/// - Flat VPC's VNI appears in the FNN instance's `vpc_peer_vnis` -- +/// Flat advertises its VNI for peer consumption (pluggable SDN +/// integrations on the operator's fabric may use it), even though +/// Flat itself doesn't run an overlay. The FNN DPU imports it on +/// the self side via `imports_peer_vnis_into_overlay`. +#[crate::sqlx_test] +async fn test_fnn_vpc_with_flat_peer_exchanges_prefixes_and_vnis( + pool: sqlx::PgPool, +) -> Result<(), Box> { + let env = api_fixtures::create_test_env(pool).await; + + // FNN VPC + Tenant segment (the side the instance allocates on). + let fnn_vpc = env + .api + .create_vpc( + VpcCreationRequest::builder("2829bbe3-c169-4cd9-8b2a-19a8b1618a93") + .metadata(Metadata { + name: "test fnn vpc".to_string(), + ..Default::default() + }) + .network_virtualization_type(VpcVirtualizationType::Fnn) + .tonic_request(), + ) + .await? + .into_inner(); + let fnn_vpc_id = fnn_vpc.id.expect("FNN VPC must have id"); + let fnn_segment_id = create_tenant_network_segment( + &env.api, + Some(fnn_vpc_id), + FIXTURE_TENANT_NETWORK_SEGMENT_GATEWAYS[2], + "FNN_TENANT", + true, + ) + .await; + + // Flat VPC + HostInband segment (the peer side). + let (flat_vpc_id, _) = api_fixtures::vpc::create_flat_vpc( + &env, + "test flat vpc".to_string(), + Some("2829bbe3-c169-4cd9-8b2a-19a8b1618a93".to_string()), + ) + .await; + // Use a different fixture-tenant gateway than the FNN side so the + // peer-prefix assertion is unambiguous. + let flat_gateway = FIXTURE_TENANT_NETWORK_SEGMENT_GATEWAYS[3]; + let flat_prefix = format!("{}/{}", flat_gateway.network(), flat_gateway.prefix()); + let _flat_segment_id = create_network_segment( + &env.api, + "FLAT_HOST_INBAND", + &flat_prefix, + &flat_gateway.ip().to_string(), + rpc::forge::NetworkSegmentType::HostInband, + Some(flat_vpc_id), + true, + ) + .await; + + env.run_network_segment_controller_iteration().await; + env.run_network_segment_controller_iteration().await; + + // Peer the VPCs and allocate an instance in the FNN VPC. + let mh = create_managed_host(&env).await; + env.api + .create_vpc_peering(Request::new(VpcPeeringCreationRequest { + vpc_id: Some(fnn_vpc_id), + peer_vpc_id: Some(flat_vpc_id), + id: None, + })) + .await?; + + let instance_network = rpc::InstanceNetworkConfig { + interfaces: vec![rpc::InstanceInterfaceConfig { + function_type: rpc::InterfaceFunctionType::Physical as i32, + network_segment_id: Some(fnn_segment_id), + network_details: None, + device: None, + device_instance: 0, + virtual_function_id: None, + ip_address: None, + ipv6_interface_config: None, + }], + auto: false, + }; + mh.instance_builer(&env) + .network(instance_network) + .build() + .await; + + // Pull the Flat VPC's VNI so we can assert it shows up. + let flat_vpc = env + .api + .find_vpcs_by_ids(Request::new(rpc::forge::VpcsByIdsRequest { + vpc_ids: vec![flat_vpc_id], + })) + .await? + .into_inner(); + let flat_vni = flat_vpc.vpcs[0] + .status + .as_ref() + .and_then(|s| s.vni) + .expect("Flat VPC must have a VNI allocated") as u32; + + let response = env + .api + .get_managed_host_network_config(Request::new(ManagedHostNetworkConfigRequest { + dpu_machine_id: Some(mh.dpu().id), + })) + .await? + .into_inner(); + + assert_eq!(response.tenant_interfaces.len(), 1); + let iface = &response.tenant_interfaces[0]; + + // The Flat VPC's HostInband prefix shows up. + assert_eq!( + iface.vpc_peer_prefixes.len(), + 1, + "FNN instance's vpc_peer_prefixes should include the Flat VPC's prefix; got {:?}", + iface.vpc_peer_prefixes, + ); + assert!( + iface.vpc_peer_prefixes.contains(&flat_prefix), + "expected Flat VPC's prefix {flat_prefix} in vpc_peer_prefixes, got {:?}", + iface.vpc_peer_prefixes, + ); + + // The Flat VPC's VNI shows up too -- Flat advertises its VNI for + // pluggable SDN integrations on the network operator's fabric. + assert_eq!( + iface.vpc_peer_vnis, + vec![flat_vni], + "FNN instance's vpc_peer_vnis should contain the Flat VPC's VNI ({flat_vni}); got {:?}", + iface.vpc_peer_vnis, + ); + + Ok(()) +} diff --git a/crates/api/src/tests/vpc_prefix.rs b/crates/api/src/tests/vpc_prefix.rs index 5d338ee4fd..75f191597d 100644 --- a/crates/api/src/tests/vpc_prefix.rs +++ b/crates/api/src/tests/vpc_prefix.rs @@ -23,7 +23,10 @@ use rpc::forge::{ use sqlx::PgPool; use tonic::Request; -use crate::tests::common::api_fixtures::{create_test_env, get_vpc_fixture_id}; +use crate::tests::common::api_fixtures::{ + self, TEST_SITE_PREFIXES, TestEnvOverrides, create_test_env, create_test_env_with_overrides, + get_vpc_fixture_id, +}; #[crate::sqlx_test] async fn test_create_and_delete_vpc_prefix_deprecated_fields( @@ -369,3 +372,79 @@ async fn test_vpc_prefix_search(pool: PgPool) -> Result<(), Box Result<(), Box> { + // Flat VPCs should accept IPv4 prefixes just like every other VPC type. + let env = create_test_env(pool).await; + let (_, vpc) = api_fixtures::vpc::create_flat_vpc( + &env, + "flat".to_string(), + Some("2829bbe3-c169-4cd9-8b2a-19a8b1618a93".to_string()), + ) + .await; + + let request = Request::new(VpcPrefixCreationRequest { + id: None, + prefix: String::new(), + vpc_id: vpc.id, + config: Some(rpc::forge::VpcPrefixConfig { + prefix: "192.0.2.0/25".to_string(), + }), + metadata: Some(Metadata { + name: "flat-v4".to_string(), + ..Default::default() + }), + }); + + env.api + .create_vpc_prefix(request) + .await + .expect("Flat VPC should accept IPv4 prefix"); + + Ok(()) +} + +#[crate::sqlx_test] +async fn flat_vpc_accepts_ipv6_prefix(pool: PgPool) -> Result<(), Box> { + // Flat VPCs are allowed IPv6 prefixes alongside FNN -- ETV is the only + // type that rejects IPv6 prefixes. Extend the site fabric prefixes with an + // IPv6 range since the default test fabric is IPv4-only. + let mut site_prefixes = TEST_SITE_PREFIXES.clone(); + site_prefixes.push("2001:db8::/32".parse().unwrap()); + let env = create_test_env_with_overrides( + pool, + TestEnvOverrides { + site_prefixes: Some(site_prefixes), + create_network_segments: Some(false), + ..Default::default() + }, + ) + .await; + let (_, vpc) = api_fixtures::vpc::create_flat_vpc( + &env, + "flat".to_string(), + Some("2829bbe3-c169-4cd9-8b2a-19a8b1618a93".to_string()), + ) + .await; + + let request = Request::new(VpcPrefixCreationRequest { + id: None, + prefix: String::new(), + vpc_id: vpc.id, + config: Some(rpc::forge::VpcPrefixConfig { + prefix: "2001:db8::/64".to_string(), + }), + metadata: Some(Metadata { + name: "flat-v6".to_string(), + ..Default::default() + }), + }); + + env.api + .create_vpc_prefix(request) + .await + .expect("Flat VPC should accept IPv6 prefix"); + + Ok(()) +} diff --git a/crates/network/src/virtualization.rs b/crates/network/src/virtualization.rs index c056c1fc1a..e09959e355 100644 --- a/crates/network/src/virtualization.rs +++ b/crates/network/src/virtualization.rs @@ -22,7 +22,7 @@ use std::str::FromStr; use ipnetwork::IpNetwork; /// DEFAULT_NETWORK_VIRTUALIZATION_TYPE is what to default to if the Cloud API -/// doesn't send it to Carbide (which it never does), or if the Carbide API +/// doesn't send it to NICo (which it never does), or if the NICo API /// doesn't send it to the DPU agent. pub const DEFAULT_NETWORK_VIRTUALIZATION_TYPE: VpcVirtualizationType = VpcVirtualizationType::EthernetVirtualizer; @@ -41,10 +41,28 @@ pub const DEFAULT_NETWORK_VIRTUALIZATION_TYPE: VpcVirtualizationType = pub enum VpcVirtualizationType { #[default] EthernetVirtualizer, + /// Deprecated: equivalent to `EthernetVirtualizer` for all live behavior; + /// retained only so older database rows decode correctly. Treat the two + /// variants as the same thing in match arms. EthernetVirtualizerWithNvue, Fnn, + /// `Flat` is for VPCs whose tenant instances live directly on the + /// underlay (zero-DPU hosts, or hosts with their DPU in NIC mode) and + /// whose interfaces are bound to `HostInband` network segments rather + /// than a NICo-managed overlay. Flat VPCs are still real tenant + /// VPCs with a VNI and NSGs, but NICo doesn't drive their data + /// plane -- routing and ACL enforcement between Flat VPCs and other + /// VPCs is the network operator's responsibility. + Flat, } +// Per-variant policy ("how does this type behave with respect to segments, +// peering, routing profiles, IPv6, host fabric interfaces") is declared +// as data in `carbide_api_model::vpc::capability` and consulted via the +// `VpcVirtualizationTypeCapabilities` extension trait. There are no +// inherent methods here; adding a new variant means filling in one +// `VpcCapabilities` literal in that module, not editing handler logic. + // Manual sqlx impls so that legacy DB value 'etv' decodes as EthernetVirtualizerWithNvue. #[cfg(feature = "sqlx")] const PG_TYPE_NAME: &str = "network_virtualization_type_t"; @@ -65,6 +83,7 @@ impl sqlx::Encode<'_, sqlx::Postgres> for VpcVirtualizationType { let s = match self { Self::EthernetVirtualizer | Self::EthernetVirtualizerWithNvue => "etv", Self::Fnn => "fnn", + Self::Flat => "flat", }; <&str as sqlx::Encode>::encode(s, buf) } @@ -84,6 +103,7 @@ impl sqlx::Decode<'_, sqlx::Postgres> for VpcVirtualizationType { match s { "etv" | "etv_nvue" => Ok(Self::EthernetVirtualizer), "fnn" => Ok(Self::Fnn), + "flat" => Ok(Self::Flat), other => { Err(format!("invalid value {:?} for enum VpcVirtualizationType", other).into()) } @@ -124,6 +144,11 @@ mod sqlx_tests { fn encode_fnn_writes_fnn() { assert_eq!(encode_to_string(VpcVirtualizationType::Fnn), "fnn"); } + + #[test] + fn encode_flat_writes_flat() { + assert_eq!(encode_to_string(VpcVirtualizationType::Flat), "flat"); + } } impl fmt::Display for VpcVirtualizationType { @@ -131,6 +156,7 @@ impl fmt::Display for VpcVirtualizationType { match self { Self::EthernetVirtualizer | Self::EthernetVirtualizerWithNvue => write!(f, "etv"), Self::Fnn => write!(f, "fnn"), + Self::Flat => write!(f, "flat"), } } } @@ -150,6 +176,7 @@ impl FromStr for VpcVirtualizationType { match s { "etv" | "etv_nvue" => Ok(Self::EthernetVirtualizer), "fnn" => Ok(Self::Fnn), + "flat" => Ok(Self::Flat), x => Err(eyre::eyre!(format!("Unknown virt type {}", x))), } } @@ -161,7 +188,7 @@ impl FromStr for VpcVirtualizationType { /// for the purpose of FNN /30 allocations (where the host IP /// ends up being the 4th IP -- aka the second IP of the second /// /31 allocation in the /30), and will probably change with -/// a wider refactor + intro of Carbide IP Prefix Management. +/// a wider refactor + intro of NICo IP Prefix Management. pub fn get_host_ip(network: &IpNetwork) -> eyre::Result { match network.prefix() { // Single-host allocation: IPv4 /32 or IPv6 /128 @@ -239,6 +266,19 @@ mod tests { ); } + #[test] + fn from_str_flat_maps_to_flat() { + assert_eq!( + "flat".parse::().unwrap(), + VpcVirtualizationType::Flat + ); + } + + #[test] + fn display_flat_shows_flat() { + assert_eq!(VpcVirtualizationType::Flat.to_string(), "flat"); + } + #[test] fn test_get_host_ip_ipv4_slash32() { let network = IpNetwork::new("10.0.0.5".parse().unwrap(), 32).unwrap(); diff --git a/crates/rpc/proto/forge.proto b/crates/rpc/proto/forge.proto index 135f90eab8..e233dca9f9 100644 --- a/crates/rpc/proto/forge.proto +++ b/crates/rpc/proto/forge.proto @@ -1549,6 +1549,14 @@ enum VpcVirtualizationType { FNN_CLASSIC = 3; FNN_L3 = 4; FNN = 5; + // FLAT is for VPCs whose tenant instances live directly on the underlay + // (zero-DPU hosts, or hosts with their DPU in NIC mode). Their interfaces + // are bound to `HostInband` network segments rather than a Carbide-managed + // overlay. Flat VPCs are still real tenant VPCs with a VNI and NSGs, but + // Carbide doesn't drive their data plane -- routing and ACL enforcement + // between Flat VPCs and other VPCs is the network operator's + // responsibility. + FLAT = 6; } message VpcUpdateRequest { diff --git a/crates/rpc/src/network.rs b/crates/rpc/src/network.rs index 928ed3ed4a..bd44a39925 100644 --- a/crates/rpc/src/network.rs +++ b/crates/rpc/src/network.rs @@ -27,6 +27,7 @@ impl From for VpcVirtualizationType { #[allow(deprecated)] rpc::VpcVirtualizationType::EthernetVirtualizerWithNvue => Self::EthernetVirtualizer, rpc::VpcVirtualizationType::Fnn => Self::Fnn, + rpc::VpcVirtualizationType::Flat => Self::Flat, // Following are deprecated. rpc::VpcVirtualizationType::FnnClassic => Self::Fnn, rpc::VpcVirtualizationType::FnnL3 => Self::Fnn, @@ -42,6 +43,7 @@ impl From for rpc::VpcVirtualizationType { rpc::VpcVirtualizationType::EthernetVirtualizer } VpcVirtualizationType::Fnn => rpc::VpcVirtualizationType::Fnn, + VpcVirtualizationType::Flat => rpc::VpcVirtualizationType::Flat, } } } @@ -60,6 +62,7 @@ pub fn vpc_virtualization_type_try_from_rpc( VpcVirtualizationType::EthernetVirtualizer } x if x == rpc::VpcVirtualizationType::Fnn as i32 => VpcVirtualizationType::Fnn, + x if x == rpc::VpcVirtualizationType::Flat as i32 => VpcVirtualizationType::Flat, _ => { return Err(RpcDataConversionError::InvalidVpcVirtualizationType(value)); } @@ -100,4 +103,17 @@ mod test { let vtype = vpc_virtualization_type_try_from_rpc(0).unwrap(); assert_eq!(vtype, VpcVirtualizationType::EthernetVirtualizer); } + + #[test] + fn flat_round_trips() { + let rpc_vtype: rpc::VpcVirtualizationType = VpcVirtualizationType::Flat.into(); + assert_eq!(rpc_vtype, rpc::VpcVirtualizationType::Flat); + + let vtype: VpcVirtualizationType = rpc::VpcVirtualizationType::Flat.into(); + assert_eq!(vtype, VpcVirtualizationType::Flat); + + let vtype = + vpc_virtualization_type_try_from_rpc(rpc::VpcVirtualizationType::Flat as i32).unwrap(); + assert_eq!(vtype, VpcVirtualizationType::Flat); + } } diff --git a/docs/manuals/metrics/core_metrics.md b/docs/manuals/metrics/core_metrics.md index ec065fbb9a..d52e81f838 100644 --- a/docs/manuals/metrics/core_metrics.md +++ b/docs/manuals/metrics/core_metrics.md @@ -1,6 +1,6 @@ -# NVIDIA Infra Controller (NICo) Core Metrics +# NCX Infra Controller (NICo) core metrics -This file contains a list of metrics exported by NVIDIA Infra Controller (NICo). The list is auto-generated from an integration test (`test_integration`). Metrics for workflows which are not exercised by the test are missing. +This file contains a list of metrics exported by NCX Infra Controller (NICo). The list is auto-generated from an integration test (`test_integration`). Metrics for workflows which are not exercised by the test are missing. @@ -8,10 +8,10 @@ This file contains a list of metrics exported by NVIDIA Infra Controller (NICo). - + - + @@ -22,7 +22,7 @@ This file contains a list of metrics exported by NVIDIA Infra Controller (NICo). - + @@ -33,14 +33,14 @@ This file contains a list of metrics exported by NVIDIA Infra Controller (NICo). - - - - + + + + - - - + + + @@ -90,6 +90,8 @@ This file contains a list of metrics exported by NVIDIA Infra Controller (NICo). + + @@ -97,6 +99,8 @@ This file contains a list of metrics exported by NVIDIA Infra Controller (NICo). + + @@ -115,9 +119,11 @@ This file contains a list of metrics exported by NVIDIA Infra Controller (NICo). + + -
NameTypeDescription
carbide_api_db_queries_totalcounterThe amount of database queries that occurred inside a span
carbide_api_db_span_query_time_millisecondshistogramTotal time the request spent inside a span on database transactions
carbide_api_grpc_server_duration_millisecondshistogramProcessing time for a request on the carbide API server
carbide_api_readygaugeWhether the NICo API is running
carbide_api_readygaugeWhether the Forge Site Controller API is running
carbide_api_tls_connection_attempted_totalcounterThe amount of tls connections that were attempted
carbide_api_tls_connection_success_totalcounterThe amount of tls connections that were successful
carbide_api_tracing_spans_opengaugeWhether the NICo API is running
carbide_api_tracing_spans_opengaugeWhether the Forge Site Controller API is running
carbide_api_vault_request_duration_millisecondshistogramthe duration of outbound vault requests, in milliseconds
carbide_api_vault_requests_attempted_totalcounterThe amount of tls connections that were attempted
carbide_api_vault_requests_failed_totalcounterThe amount of tcp connections that were failures
carbide_concurrent_machine_updates_availablegaugeThe number of machines in the system that we will update concurrently.
carbide_db_pool_idle_connsgaugeThe amount of idle connections in the carbide database pool
carbide_db_pool_total_connsgaugeThe amount of total (active + idle) connections in the carbide database pool
carbide_dpu_agent_version_countgaugeThe amount of DPU agents which have reported a certain version.
carbide_dpu_agent_version_countgaugeThe amount of Forge DPU agents which have reported a certain version.
carbide_dpu_firmware_version_countgaugeThe amount of DPUs which have reported a certain firmware version.
carbide_dpus_healthy_countgaugeThe total number of DPUs in the system that have reported healthy in the last report. Healthy does not imply up - the report from the DPU might be outdated.
carbide_dpus_up_countgaugeThe total number of DPUs in the system that are up. Up means we have received a health report less than 5 minutes ago.
carbide_endpoint_exploration_machines_explored_overall_countgaugeThe total number of machines explored by machine type
carbide_endpoint_exploration_success_countgaugeThe amount of endpoint explorations that have been successful
carbide_endpoint_explorations_countgaugeThe amount of endpoint explorations that have been attempted
carbide_gpus_in_use_countgaugeThe total number of GPUs that are actively used by tenants in instances in the NICo deployment
carbide_gpus_total_countgaugeThe total number of GPUs available in the NICo deployment
carbide_gpus_usable_countgaugeThe remaining number of GPUs in the NICo deployment which are available for immediate instance creation
carbide_hosts_by_sku_countgaugeThe amount of hosts by SKU and device type ('unknown' for hosts without SKU)
carbide_gpus_in_use_countgaugeThe total number of GPUs that are actively used by tenants in instances in the Forge site
carbide_gpus_total_countgaugeThe total number of GPUs available in the Forge site
carbide_gpus_usable_countgaugeThe remaining number of GPUs in the Forge site which are available for immediate instance creation
carbide_hosts_by_sku_countgaugeThe amount of hosts by SKU and device type ('unknown' for hosts without SKU)
carbide_hosts_health_overrides_countgaugeThe amount of health overrides that are configured in the site
carbide_hosts_health_status_countgaugeThe total number of Managed Hosts in the system that have reported either a healthy or not healthy status - based on the presence of health probe alerts
carbide_hosts_in_use_countgaugeThe total number of hosts that are actively used by tenants as instances in the NICo deployment
carbide_hosts_usable_countgaugeThe remaining number of hosts in the NICo deployment which are available for immediate instance creation
carbide_hosts_health_status_countgaugeThe total number of objects in the system that have reported either a healthy or not healthy status - based on the presence of health probe alerts
carbide_hosts_in_use_countgaugeThe total number of hosts that are actively used by tenants as instances in the Forge site
carbide_hosts_usable_countgaugeThe remaining number of hosts in the Forge site which are available for immediate instance creation
carbide_hosts_with_bios_password_setgaugeThe total number of Hosts in the system that have their BIOS password set.
carbide_ib_partitions_enqueuer_iteration_latency_millisecondshistogramThe overall time it took to enqueue state handling tasks for all carbide_ib_partitions in the system
carbide_ib_partitions_iteration_latency_millisecondshistogramThe elapsed time in the last state processor iteration to handle objects of type carbide_ib_partitions
carbide_pending_dpu_nic_firmware_update_countgaugeThe number of machines in the system that need a firmware update.
carbide_pending_host_firmware_update_countgaugeThe number of host machines in the system that need a firmware update.
carbide_power_shelves_enqueuer_iteration_latency_millisecondshistogramThe overall time it took to enqueue state handling tasks for all carbide_power_shelves in the system
carbide_power_shelves_health_overrides_countgaugeThe amount of health overrides that are configured in the site
carbide_power_shelves_health_status_countgaugeThe total number of objects in the system that have reported either a healthy or not healthy status - based on the presence of health probe alerts
carbide_power_shelves_iteration_latency_millisecondshistogramThe elapsed time in the last state processor iteration to handle objects of type carbide_power_shelves
carbide_power_shelves_object_tasks_enqueued_totalcounterThe amount of types that object handling tasks that have been freshly enqueued for objects of type carbide_power_shelves
carbide_power_shelves_totalgaugeThe total number of carbide_power_shelves in the system
carbide_preingestion_waiting_downloadgaugeThe amount of machines that are waiting for firmware downloads on other machines to complete before doing their own
carbide_preingestion_waiting_installationgaugeThe amount of machines which have had firmware uploaded to them and are currently in the process of installing that firmware
carbide_racks_enqueuer_iteration_latency_millisecondshistogramThe overall time it took to enqueue state handling tasks for all carbide_racks in the system
carbide_racks_health_overrides_countgaugeThe amount of health overrides that are configured in the site
carbide_racks_health_status_countgaugeThe total number of objects in the system that have reported either a healthy or not healthy status - based on the presence of health probe alerts
carbide_racks_iteration_latency_millisecondshistogramThe elapsed time in the last state processor iteration to handle objects of type carbide_racks
carbide_racks_object_tasks_enqueued_totalcounterThe amount of types that object handling tasks that have been freshly enqueued for objects of type carbide_racks
carbide_racks_totalgaugeThe total number of carbide_racks in the system
carbide_site_explorer_enabledgaugeWhether site-explorer is enabled (1) or paused (0)
carbide_site_explorer_iteration_latency_millisecondshistogramThe time it took to perform one site explorer iteration
carbide_switches_enqueuer_iteration_latency_millisecondshistogramThe overall time it took to enqueue state handling tasks for all carbide_switches in the system
carbide_switches_health_overrides_countgaugeThe amount of health overrides that are configured in the site
carbide_switches_health_status_countgaugeThe total number of objects in the system that have reported either a healthy or not healthy status - based on the presence of health probe alerts
carbide_switches_iteration_latency_millisecondshistogramThe elapsed time in the last state processor iteration to handle objects of type carbide_switches
carbide_switches_object_tasks_enqueued_totalcounterThe amount of types that object handling tasks that have been freshly enqueued for objects of type carbide_switches
carbide_switches_totalgaugeThe total number of carbide_switches in the system
carbide_total_ips_countgaugeThe total number of ips in the site
carbide_unavailable_dpu_nic_firmware_update_countgaugeThe number of machines in the system that need a firmware update but are unavailable for update.
+