From d02982bb2b6f65b25794a662750e3a7ea667763c Mon Sep 17 00:00:00 2001 From: Srinivasa Murthy Date: Tue, 19 May 2026 00:17:29 +0000 Subject: [PATCH 1/3] feat: Changes to svpc/dpa to allow Spectrum X partitioning and allow flexible tenant configuration Signed-off-by: Srinivasa Murthy --- Cargo.toml | 4 +- crates/admin-cli/Cargo.toml | 4 +- crates/admin-cli/src/cfg/cli_options.rs | 11 +- crates/admin-cli/src/dpa/ensure/args.rs | 3 + .../admin-cli/src/instance/allocate/args.rs | 9 +- crates/admin-cli/src/instance/mod.rs | 3 + .../src/instance/update_spx_config/args.rs | 33 + .../src/instance/update_spx_config/cmd.rs | 58 ++ .../src/instance/update_spx_config/mod.rs | 43 + crates/admin-cli/src/main.rs | 2 + crates/admin-cli/src/rpc.rs | 59 +- .../src/spx_partition}/mod.rs | 18 +- .../admin-cli/src/spx_partition/show/args.rs | 34 + .../admin-cli/src/spx_partition/show/cmd.rs | 162 ++++ .../src/spx_partition/show/mod.rs} | 24 +- crates/admin-cli/src/spx_partition/tests.rs | 87 ++ crates/agent/src/tests/full.rs | 2 + .../20260413193743_spx_partitions.sql | 32 + crates/api-db/src/dpa_interface.rs | 40 +- crates/api-db/src/instance.rs | 95 +- crates/api-db/src/lib.rs | 1 + crates/api-db/src/machine.rs | 23 + crates/api-db/src/resource_pool.rs | 9 +- crates/api-db/src/spx_partition.rs | 190 ++++ crates/api-model/src/dpa_interface/mod.rs | 72 +- crates/api-model/src/dpa_interface/slas.rs | 2 - crates/api-model/src/instance/config.rs | 5 + crates/api-model/src/instance/config/spx.rs | 126 +++ crates/api-model/src/instance/mod.rs | 1 + crates/api-model/src/instance/snapshot.rs | 21 + crates/api-model/src/instance/status.rs | 4 + crates/api-model/src/instance/status/spx.rs | 163 ++++ crates/api-model/src/lib.rs | 1 + crates/api-model/src/machine/json.rs | 3 + crates/api-model/src/machine/mod.rs | 5 + crates/api-model/src/machine/spx.rs | 129 +++ crates/api-model/src/resource_pool/common.rs | 4 + crates/api-model/src/resource_pool/mod.rs | 4 +- .../api-model/src/rpc_conv/dpa_interface.rs | 10 +- .../api-model/src/rpc_conv/instance/config.rs | 15 + .../src/rpc_conv/instance/snapshot.rs | 4 + .../api-model/src/rpc_conv/instance/status.rs | 11 + crates/api-model/src/rpc_conv/machine/mod.rs | 2 + crates/api-model/src/spx_partition.rs | 169 ++++ crates/api/Cargo.toml | 60 +- crates/api/src/api.rs | 38 +- crates/api/src/auth/internal_rbac_rules.rs | 4 + crates/api/src/cfg/file.rs | 22 +- .../src/cfg/test_data/initial_objects.toml | 1 - crates/api/src/dpa/handler.rs | 183 +++- crates/api/src/dpa_monitor/metrics.rs | 146 +++ crates/api/src/dpa_monitor/mod.rs | 915 ++++++++++++++++++ crates/api/src/handlers/dpa.rs | 62 +- crates/api/src/handlers/health.rs | 1 + crates/api/src/handlers/instance.rs | 66 +- crates/api/src/handlers/mod.rs | 1 + crates/api/src/handlers/spx_partition.rs | 314 ++++++ crates/api/src/instance/mod.rs | 216 +++++ crates/api/src/lib.rs | 1 + crates/api/src/setup.rs | 66 +- .../src/state_controller/common_services.rs | 3 - .../state_controller/dpa_interface/handler.rs | 607 ------------ .../src/state_controller/dpa_interface/io.rs | 164 ---- .../state_controller/dpa_interface/metrics.rs | 60 -- .../src/state_controller/machine/handler.rs | 84 +- crates/api/src/state_controller/mod.rs | 1 - crates/api/src/tests/client_resolution.rs | 1 + .../src/tests/common/api_fixtures/instance.rs | 3 + .../api/src/tests/common/api_fixtures/mod.rs | 16 +- crates/api/src/tests/common/rpc_builder.rs | 1 + crates/api/src/tests/compute_allocation.rs | 1 + crates/api/src/tests/dpa_interfaces.rs | 3 + crates/api/src/tests/dpu_reprovisioning.rs | 9 + .../api/src/tests/host_bmc_firmware_test.rs | 13 + crates/api/src/tests/instance.rs | 28 + crates/api/src/tests/instance_allocate.rs | 13 + .../api/src/tests/instance_batch_allocate.rs | 2 + .../api/src/tests/instance_config_update.rs | 13 + crates/api/src/tests/instance_find.rs | 1 + .../api/src/tests/instance_ipxe_behaviors.rs | 1 + crates/api/src/tests/instance_os.rs | 8 + crates/api/src/tests/instance_type.rs | 3 + crates/api/src/tests/machine_states.rs | 2 + crates/api/src/tests/maintenance.rs | 2 + .../api/src/tests/network_security_group.rs | 5 + .../tests/power_shelf_state_controller/mod.rs | 1 - .../src/tests/switch_state_controller/mod.rs | 1 - crates/api/src/web/mod.rs | 3 + crates/api/src/web/spx_partition.rs | 124 +++ crates/api/templates/base.html | 1 + crates/api/templates/dpa_detail.html | 1 - crates/api/templates/spx_partition_show.html | 29 + crates/bmc-explorer/Cargo.toml | 46 +- crates/bmc-mock/Cargo.toml | 2 +- crates/bmc-proxy/Cargo.toml | 31 +- crates/dpf/Cargo.toml | 12 +- crates/dpu-fmds-shared/Cargo.toml | 4 +- crates/fmds/Cargo.toml | 4 +- crates/ipmi/Cargo.toml | 1 - crates/libmlx/Cargo.toml | 5 + crates/libmlx/src/device/discovery.rs | 32 + crates/libmlx/src/lockdown/lockdown.rs | 14 + crates/machine-a-tron/src/api_client.rs | 1 + crates/metrics-endpoint/Cargo.toml | 12 +- crates/metrics-utils/Cargo.toml | 4 +- crates/redfish/Cargo.toml | 4 +- crates/rpc-utils/Cargo.toml | 2 +- crates/rpc/Cargo.toml | 20 +- crates/rpc/build.rs | 21 + crates/rpc/proto/common.proto | 5 + crates/rpc/proto/dpa_rpc.proto | 16 +- crates/rpc/proto/forge.proto | 98 +- crates/rpc/src/lib.rs | 9 + crates/rvs/src/client/io.rs | 1 + crates/scout/Cargo.toml | 5 + crates/scout/src/mlx_device.rs | 17 + crates/site-explorer/Cargo.toml | 2 +- crates/ssh-console/Cargo.toml | 8 +- crates/state-controller/Cargo.toml | 8 +- crates/utils/Cargo.toml | 2 +- crates/uuid/src/lib.rs | 1 + crates/uuid/src/spx/mod.rs | 46 + .../api/config-files/nico-api-config.toml | 2 +- .../nico-api/files/carbide-api-config.toml | 2 +- .../files/carbide-bmc-proxy.toml | 5 +- pxe/Makefile.toml | 4 +- 126 files changed, 4229 insertions(+), 1214 deletions(-) create mode 100644 crates/admin-cli/src/instance/update_spx_config/args.rs create mode 100644 crates/admin-cli/src/instance/update_spx_config/cmd.rs create mode 100644 crates/admin-cli/src/instance/update_spx_config/mod.rs rename crates/{api/src/state_controller/dpa_interface => admin-cli/src/spx_partition}/mod.rs (75%) create mode 100644 crates/admin-cli/src/spx_partition/show/args.rs create mode 100644 crates/admin-cli/src/spx_partition/show/cmd.rs rename crates/{api/src/state_controller/dpa_interface/context.rs => admin-cli/src/spx_partition/show/mod.rs} (61%) create mode 100644 crates/admin-cli/src/spx_partition/tests.rs create mode 100644 crates/api-db/migrations/20260413193743_spx_partitions.sql create mode 100644 crates/api-db/src/spx_partition.rs create mode 100644 crates/api-model/src/instance/config/spx.rs create mode 100644 crates/api-model/src/instance/status/spx.rs create mode 100644 crates/api-model/src/machine/spx.rs create mode 100644 crates/api-model/src/spx_partition.rs create mode 100644 crates/api/src/dpa_monitor/metrics.rs create mode 100644 crates/api/src/dpa_monitor/mod.rs create mode 100644 crates/api/src/handlers/spx_partition.rs delete mode 100644 crates/api/src/state_controller/dpa_interface/handler.rs delete mode 100644 crates/api/src/state_controller/dpa_interface/io.rs delete mode 100644 crates/api/src/state_controller/dpa_interface/metrics.rs create mode 100644 crates/api/src/web/spx_partition.rs create mode 100644 crates/api/templates/spx_partition_show.html create mode 100644 crates/uuid/src/spx/mod.rs diff --git a/Cargo.toml b/Cargo.toml index ab65a3e15a..c8a4021b68 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -261,8 +261,8 @@ tower-test = "0.4" [profile.release] debug = "line-tables-only" -debug-assertions = true # Add some extra assurance during development -overflow-checks = true # Add some extra assurance during development +debug-assertions = true # Add some extra assurance during development +overflow-checks = true # Add some extra assurance during development [workspace.lints.clippy] cloned_instead_of_copied = "warn" diff --git a/crates/admin-cli/Cargo.toml b/crates/admin-cli/Cargo.toml index 85c67dd883..d95278c705 100644 --- a/crates/admin-cli/Cargo.toml +++ b/crates/admin-cli/Cargo.toml @@ -67,8 +67,8 @@ toml = { workspace = true } tonic = { workspace = true } tracing = { workspace = true } tracing-subscriber = { features = [ - "env-filter", - "local-time", + "env-filter", + "local-time", ], workspace = true } urlencoding = { workspace = true } uuid = { workspace = true } diff --git a/crates/admin-cli/src/cfg/cli_options.rs b/crates/admin-cli/src/cfg/cli_options.rs index 98697c73c5..a24646397d 100644 --- a/crates/admin-cli/src/cfg/cli_options.rs +++ b/crates/admin-cli/src/cfg/cli_options.rs @@ -25,8 +25,8 @@ use crate::{ machine_interfaces, machine_validation, managed_host, managed_switch, mlx, network_devices, network_security_group, network_segment, nvl_logical_partition, nvl_partition, operating_system, os_image, ping, power_shelf, rack, rack_firmware, redfish, resource_pool, - rms, route_server, scout_stream, set, site_explorer, sku, ssh, switch, tenant, tenant_keyset, - tpm_ca, trim_table, version, vpc, vpc_peering, vpc_prefix, + rms, route_server, scout_stream, set, site_explorer, sku, spx_partition, ssh, switch, tenant, + tenant_keyset, tpm_ca, trim_table, version, vpc, vpc_peering, vpc_prefix, }; #[derive(Parser, Debug)] @@ -338,6 +338,13 @@ pub enum CliCommand { )] NvlPartition(nvl_partition::Cmd), + #[clap( + about = "SPX Partition related handling", + subcommand, + visible_alias = "spx" + )] + SpxPartition(spx_partition::Cmd), + #[clap( about = "Logical partition related handling", subcommand, diff --git a/crates/admin-cli/src/dpa/ensure/args.rs b/crates/admin-cli/src/dpa/ensure/args.rs index 439f1b348c..a5179856d9 100644 --- a/crates/admin-cli/src/dpa/ensure/args.rs +++ b/crates/admin-cli/src/dpa/ensure/args.rs @@ -28,6 +28,8 @@ pub struct Args { pub device_type: String, #[clap(help = "PCI name (e.g. 5e:00.0)")] pub pci_name: String, + #[clap(help = "Device description (e.g. NVIDIA BlueField-3 B3140L E-Series FHHL SuperNIC)")] + pub device_description: Option, } impl From for ::rpc::forge::DpaInterfaceCreationRequest { @@ -37,6 +39,7 @@ impl From for ::rpc::forge::DpaInterfaceCreationRequest { mac_addr: args.mac_addr, device_type: args.device_type, pci_name: args.pci_name, + device_description: args.device_description, } } } diff --git a/crates/admin-cli/src/instance/allocate/args.rs b/crates/admin-cli/src/instance/allocate/args.rs index f92a58e56d..95cc83108a 100644 --- a/crates/admin-cli/src/instance/allocate/args.rs +++ b/crates/admin-cli/src/instance/allocate/args.rs @@ -18,7 +18,7 @@ use carbide_uuid::machine::MachineId; use carbide_uuid::vpc::VpcPrefixId; use clap::{ArgGroup, Parser}; -use rpc::forge::InstanceOperatingSystemConfig; +use rpc::forge::{InstanceOperatingSystemConfig, InstanceSpxConfig}; #[derive(Parser, Debug)] #[clap(group(ArgGroup::new("selector").required(true).args(&["subnet", "vpc_prefix_id"])))] @@ -58,6 +58,13 @@ pub struct Args { #[clap(long, help = "OS definition in JSON format", value_name = "OS_JSON")] pub os: Option, + #[clap( + long, + help = "SPX configuration in JSON format", + value_name = "SPX_JSON" + )] + pub spxconfig: Option, + #[clap(long, help = "The subnet to assign to a VF")] pub vf_subnet: Vec, diff --git a/crates/admin-cli/src/instance/mod.rs b/crates/admin-cli/src/instance/mod.rs index 37757ae38f..3acbfcf291 100644 --- a/crates/admin-cli/src/instance/mod.rs +++ b/crates/admin-cli/src/instance/mod.rs @@ -23,6 +23,7 @@ mod show; mod update_ib_config; mod update_nvlink_config; mod update_os; +mod update_spx_config; // Cross-module re-exports for jump module // Cross-module re-export for rpc module @@ -53,4 +54,6 @@ pub enum Cmd { UpdateIbConfig(update_ib_config::Args), #[clap(about = "Update instance NVLink configuration")] UpdateNvLinkConfig(update_nvlink_config::Args), + #[clap(about = "Update instance SPX configuration")] + UpdateSpxConfig(update_spx_config::Args), } diff --git a/crates/admin-cli/src/instance/update_spx_config/args.rs b/crates/admin-cli/src/instance/update_spx_config/args.rs new file mode 100644 index 0000000000..b36603b078 --- /dev/null +++ b/crates/admin-cli/src/instance/update_spx_config/args.rs @@ -0,0 +1,33 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +use carbide_uuid::instance::InstanceId; +use clap::Parser; +use rpc::forge::InstanceSpxConfig; + +#[derive(Parser, Debug)] +pub struct Args { + #[clap(short, long, required(true))] + pub instance: InstanceId, + #[clap( + long, + required(true), + help = "SPX configuration in JSON format", + value_name = "SPX_JSON" + )] + pub config: InstanceSpxConfig, +} diff --git a/crates/admin-cli/src/instance/update_spx_config/cmd.rs b/crates/admin-cli/src/instance/update_spx_config/cmd.rs new file mode 100644 index 0000000000..93e976be1b --- /dev/null +++ b/crates/admin-cli/src/instance/update_spx_config/cmd.rs @@ -0,0 +1,58 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +use ::rpc::admin_cli::{CarbideCliError, CarbideCliResult}; + +use super::args::Args; +use crate::instance::common::GlobalOptions; +use crate::rpc::ApiClient; + +pub async fn update_spx_config( + api_client: &ApiClient, + update_request: Args, + opts: GlobalOptions<'_>, +) -> CarbideCliResult<()> { + if opts.cloud_unsafe_op.is_none() { + return Err(CarbideCliError::GenericError( + "Operation not allowed due to potential inconsistencies with cloud database." + .to_owned(), + )); + } + + match api_client + .update_instance_config_with( + update_request.instance, + |config| { + config.spxconfig = Some(update_request.config); + }, + |_metadata| {}, + opts.cloud_unsafe_op, + ) + .await + { + Ok(i) => { + tracing::info!( + "update-spx-config was successful. Updated instance: {:?}", + i + ); + } + Err(e) => { + tracing::info!("update-spx-config failed with {} ", e); + } + }; + Ok(()) +} diff --git a/crates/admin-cli/src/instance/update_spx_config/mod.rs b/crates/admin-cli/src/instance/update_spx_config/mod.rs new file mode 100644 index 0000000000..315b79aa20 --- /dev/null +++ b/crates/admin-cli/src/instance/update_spx_config/mod.rs @@ -0,0 +1,43 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +pub mod args; +pub mod cmd; + +use ::rpc::admin_cli::CarbideCliResult; +pub use args::Args; + +use super::common::GlobalOptions; +use crate::cfg::run::Run; +use crate::cfg::runtime::RuntimeContext; + +impl Run for Args { + async fn run(self, ctx: &mut RuntimeContext) -> CarbideCliResult<()> { + let opts = GlobalOptions { + format: ctx.config.format, + page_size: ctx.config.page_size, + sort_by: &ctx.config.sort_by, + cloud_unsafe_op: if ctx.config.cloud_unsafe_op_enabled { + Some("enabled".to_string()) + } else { + None + }, + }; + cmd::update_spx_config(&ctx.api_client, self, opts).await?; + Ok(()) + } +} diff --git a/crates/admin-cli/src/main.rs b/crates/admin-cli/src/main.rs index 4c8ad32e16..1ccfd031ce 100644 --- a/crates/admin-cli/src/main.rs +++ b/crates/admin-cli/src/main.rs @@ -94,6 +94,7 @@ mod scout_stream; mod set; mod site_explorer; mod sku; +mod spx_partition; mod ssh; mod switch; mod tenant; @@ -240,6 +241,7 @@ async fn main() -> color_eyre::Result<()> { CliCommand::NetworkSecurityGroup(cmd) => cmd.dispatch(ctx).await?, CliCommand::NetworkSegment(cmd) => cmd.dispatch(ctx).await?, CliCommand::NvlPartition(cmd) => cmd.dispatch(ctx).await?, + CliCommand::SpxPartition(cmd) => cmd.dispatch(ctx).await?, CliCommand::IpxeTemplate(cmd) => cmd.dispatch(ctx).await?, CliCommand::OsImage(cmd) => cmd.dispatch(ctx).await?, CliCommand::OperatingSystem(cmd) => cmd.dispatch(ctx).await?, diff --git a/crates/admin-cli/src/rpc.rs b/crates/admin-cli/src/rpc.rs index a275dfd269..ff4a5f7d1b 100644 --- a/crates/admin-cli/src/rpc.rs +++ b/crates/admin-cli/src/rpc.rs @@ -26,8 +26,9 @@ use ::rpc::forge::{ IdentifySerialRequest, MachineHardwareInfo, MachineHardwareInfoUpdateType, ModifyDpfStateRequest, NetworkPrefix, NetworkSecurityGroupAttributes, NetworkSegmentCreationRequest, NetworkSegmentType, Remediation, RemediationIdList, - RemediationList, UpdateMachineHardwareInfoRequest, UpdateNetworkSecurityGroupRequest, - VpcCreationRequest, VpcSearchFilter, VpcVirtualizationType, VpcsByIdsRequest, + RemediationList, SpxPartitionSearchFilter, UpdateMachineHardwareInfoRequest, + UpdateNetworkSecurityGroupRequest, VpcCreationRequest, VpcSearchFilter, VpcVirtualizationType, + VpcsByIdsRequest, }; use ::rpc::forge_api_client::ForgeApiClient; use ::rpc::{Machine, NetworkSegment}; @@ -41,6 +42,7 @@ use carbide_uuid::network::NetworkSegmentId; use carbide_uuid::nvlink::{NvLinkLogicalPartitionId, NvLinkPartitionId}; use carbide_uuid::power_shelf::PowerShelfId; use carbide_uuid::rack::RackId; +use carbide_uuid::spx::SpxPartitionId; use carbide_uuid::switch::SwitchId; use carbide_uuid::vpc::VpcId; use mac_address::MacAddress; @@ -1035,6 +1037,33 @@ impl ApiClient { Ok(all_list) } + pub async fn get_all_spx_partitions( + &self, + tenant_org_id: Option, + name: Option, + page_size: usize, + ) -> CarbideCliResult { + let all_ids = self.get_spx_partition_ids(tenant_org_id, name).await?; + let mut all_list = rpc::SpxPartitionList { + spx_partitions: Vec::with_capacity(all_ids.spx_partition_ids.len()), + }; + + for ids in all_ids.spx_partition_ids.chunks(page_size) { + let list = self.get_spx_partitions_by_ids(ids).await?; + all_list.spx_partitions.extend(list.spx_partitions); + } + + Ok(all_list) + } + + pub async fn get_one_spx_partition( + &self, + spx_partition_id: SpxPartitionId, + ) -> CarbideCliResult { + let partitions = self.get_spx_partitions_by_ids(&[spx_partition_id]).await?; + Ok(partitions) + } + pub async fn get_one_ib_partition( &self, ib_partition_id: IBPartitionId, @@ -1056,6 +1085,19 @@ impl ApiClient { Ok(self.0.find_ib_partition_ids(request).await?) } + async fn get_spx_partition_ids( + &self, + tenant_org_id: Option, + name: Option, + ) -> CarbideCliResult { + let request = SpxPartitionSearchFilter { + tenant_org_id, + name, + label: None, + }; + Ok(self.0.find_spx_partition_ids(request).await?) + } + async fn get_ib_partitions_by_ids( &self, ids: &[IBPartitionId], @@ -1067,6 +1109,16 @@ impl ApiClient { Ok(self.0.find_ib_partitions_by_ids(request).await?) } + async fn get_spx_partitions_by_ids( + &self, + ids: &[SpxPartitionId], + ) -> CarbideCliResult { + let request = rpc::SpxPartitionsByIdsRequest { + spx_partition_ids: Vec::from(ids), + }; + Ok(self.0.find_spx_partitions_by_ids(request).await?) + } + pub async fn get_all_keysets( &self, tenant_org_id: Option, @@ -1267,7 +1319,7 @@ impl ApiClient { allocate_instance .tenant_org .as_deref() - .unwrap_or("Forge-simulation-tenant"), + .unwrap_or("devenv_test_org"), ) } else if !allocate_instance.vpc_prefix_id.is_empty() { let Some(discovery_info) = &machine.discovery_info else { @@ -1413,6 +1465,7 @@ impl ApiClient { infiniband: None, dpu_extension_services: None, nvlink: None, + spxconfig: allocate_instance.spxconfig.clone(), }; let mut labels = vec![ diff --git a/crates/api/src/state_controller/dpa_interface/mod.rs b/crates/admin-cli/src/spx_partition/mod.rs similarity index 75% rename from crates/api/src/state_controller/dpa_interface/mod.rs rename to crates/admin-cli/src/spx_partition/mod.rs index bfd3599e21..6a6ee92a46 100644 --- a/crates/api/src/state_controller/dpa_interface/mod.rs +++ b/crates/admin-cli/src/spx_partition/mod.rs @@ -15,9 +15,17 @@ * limitations under the License. */ -//! State Controller implementation for Dpa Interface +mod show; -pub mod context; -pub mod handler; -pub mod io; -pub mod metrics; +#[cfg(test)] +mod tests; + +use clap::Parser; + +use crate::cfg::dispatch::Dispatch; + +#[derive(Parser, Debug, Dispatch)] +pub enum Cmd { + #[clap(about = "Display SpectrumX Partition information")] + Show(show::Args), +} diff --git a/crates/admin-cli/src/spx_partition/show/args.rs b/crates/admin-cli/src/spx_partition/show/args.rs new file mode 100644 index 0000000000..e486e7f90b --- /dev/null +++ b/crates/admin-cli/src/spx_partition/show/args.rs @@ -0,0 +1,34 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +use carbide_uuid::spx::SpxPartitionId; +use clap::Parser; + +#[derive(Parser, Debug)] +pub struct Args { + #[clap( + default_value(None), + help = "The SPX Partition ID to query, leave empty for all (default)" + )] + pub id: Option, + + #[clap(short, long, help = "The Tenant Org ID to query")] + pub tenant_org_id: Option, + + #[clap(short, long, help = "The SPX Partition name to query")] + pub name: Option, +} diff --git a/crates/admin-cli/src/spx_partition/show/cmd.rs b/crates/admin-cli/src/spx_partition/show/cmd.rs new file mode 100644 index 0000000000..802edc5865 --- /dev/null +++ b/crates/admin-cli/src/spx_partition/show/cmd.rs @@ -0,0 +1,162 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +use std::fmt::Write; + +use ::rpc::admin_cli::{CarbideCliError, CarbideCliResult, OutputFormat}; +use ::rpc::forge as forgerpc; +use carbide_uuid::spx::SpxPartitionId; +use prettytable::{Table, row}; + +use super::args::Args; +use crate::rpc::ApiClient; + +pub async fn show( + args: Args, + output_format: OutputFormat, + api_client: &ApiClient, + page_size: usize, +) -> CarbideCliResult<()> { + let is_json = output_format == OutputFormat::Json; + if let Some(id) = args.id { + show_spx_partition_details(id, is_json, api_client).await?; + } else { + show_spx_partitions( + is_json, + api_client, + page_size, + args.tenant_org_id, + args.name, + ) + .await?; + } + Ok(()) +} + +async fn show_spx_partitions( + json: bool, + api_client: &ApiClient, + page_size: usize, + tenant_org_id: Option, + name: Option, +) -> CarbideCliResult<()> { + let all_spx_partitions = match api_client + .get_all_spx_partitions(tenant_org_id, name, page_size) + .await + { + Ok(all_spx_partition_ids) => all_spx_partition_ids, + Err(e) => return Err(e), + }; + if json { + println!("{}", serde_json::to_string_pretty(&all_spx_partitions)?); + } else { + convert_spx_partitions_to_nice_table(all_spx_partitions).printstd(); + } + Ok(()) +} + +async fn show_spx_partition_details( + id: SpxPartitionId, + json: bool, + api_client: &ApiClient, +) -> CarbideCliResult<()> { + let spx_partitions = match api_client.get_one_spx_partition(id).await { + Ok(instances) => instances, + Err(e) => return Err(e), + }; + + let Some(spx_partition) = spx_partitions.spx_partitions.into_iter().next() else { + return Err(CarbideCliError::GenericError( + "Unknown SPX Partition ID".to_string(), + )); + }; + + if json { + println!("{}", serde_json::to_string_pretty(&spx_partition)?); + } else { + println!( + "{}", + convert_spx_partition_to_nice_format(spx_partition).unwrap_or_else(|x| x.to_string()) + ); + } + Ok(()) +} + +fn convert_spx_partitions_to_nice_table(spx_partitions: forgerpc::SpxPartitionList) -> Box { + let mut table = Table::new(); + + table.set_titles(row!["Id", "Name", "TenantOrg", "Vni",]); + + for spx_partition in spx_partitions.spx_partitions { + let metadata = spx_partition.metadata.as_ref(); + + table.add_row(row![ + spx_partition.id.unwrap_or_default(), + metadata + .as_ref() + .map(|m| m.name.as_str()) + .unwrap_or_default(), + spx_partition.tenant_organization_id, + spx_partition.vni, + ]); + } + + table.into() +} + +fn convert_spx_partition_to_nice_format( + spx_partition: forgerpc::SpxPartition, +) -> CarbideCliResult { + let width = 25; + let mut lines = String::new(); + + let tenant_organization_id = spx_partition.tenant_organization_id; + let metadata = spx_partition.metadata; + let labels = crate::metadata::fmt_labels_as_kv_pairs(metadata.as_ref()); + + let id = spx_partition.id.map(|i| i.to_string()).unwrap_or_default(); + let labels = labels.join(", "); + + let vni = spx_partition.vni.to_string(); + + let data: Vec<(&str, &str)> = vec![ + ("ID", &id), + ( + "NAME", + metadata + .as_ref() + .map(|m| m.name.as_str()) + .unwrap_or_default(), + ), + ("TENANT ORG", &tenant_organization_id), + ("VNI", &vni), + ("LABELS", &labels), + ( + "DESCRIPTION", + metadata + .as_ref() + .map(|m| m.description.as_str()) + .unwrap_or_default(), + ), + ]; + + for (key, value) in data { + writeln!(&mut lines, "{key: CarbideCliResult<()> { + cmd::show( + self, + ctx.config.format, + &ctx.api_client, + ctx.config.page_size, + ) + .await + } } diff --git a/crates/admin-cli/src/spx_partition/tests.rs b/crates/admin-cli/src/spx_partition/tests.rs new file mode 100644 index 0000000000..1c23db0903 --- /dev/null +++ b/crates/admin-cli/src/spx_partition/tests.rs @@ -0,0 +1,87 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// The intent of the tests.rs file is to test the integrity of the +// command, including things like basic structure parsing, enum +// translations, and any external input validators that are +// configured. Specific "categories" are: +// +// Command Structure - Baseline debug_assert() of the entire command. +// Argument Parsing - Ensure required/optional arg combinations parse correctly. + +use clap::{CommandFactory, Parser}; + +use super::*; + +// verify_cmd_structure runs a baseline clap debug_assert() +// to do basic command configuration checking and validation, +// ensuring things like unique argument definitions, group +// configurations, argument references, etc. Things that would +// otherwise be missed until runtime. +#[test] +fn verify_cmd_structure() { + Cmd::command().debug_assert(); +} + +///////////////////////////////////////////////////////////////////////////// +// Argument Parsing +// +// This section contains tests specific to argument parsing, +// including testing required arguments, as well as optional +// flag-specific checking. + +// parse_show_no_args ensures show parses with no +// arguments (all partitions). +#[test] +fn parse_show_no_args() { + let cmd = Cmd::try_parse_from(["ib-partition", "show"]).expect("should parse show"); + + match cmd { + Cmd::Show(args) => { + assert!(args.id.is_none()); + assert!(args.tenant_org_id.is_none()); + assert!(args.name.is_none()); + } + } +} + +// parse_show_with_tenant ensures show parses with +// --tenant-org-id. +#[test] +fn parse_show_with_tenant() { + let cmd = Cmd::try_parse_from(["ib-partition", "show", "--tenant-org-id", "tenant-123"]) + .expect("should parse show with tenant"); + + match cmd { + Cmd::Show(args) => { + assert_eq!(args.tenant_org_id, Some("tenant-123".to_string())); + } + } +} + +// parse_show_with_name ensures show parses with --name. +#[test] +fn parse_show_with_name() { + let cmd = Cmd::try_parse_from(["ib-partition", "show", "--name", "my-partition"]) + .expect("should parse show with name"); + + match cmd { + Cmd::Show(args) => { + assert_eq!(args.name, Some("my-partition".to_string())); + } + } +} diff --git a/crates/agent/src/tests/full.rs b/crates/agent/src/tests/full.rs index 965fc53386..84657c77a8 100644 --- a/crates/agent/src/tests/full.rs +++ b/crates/agent/src/tests/full.rs @@ -782,6 +782,7 @@ async fn handle_netconf(AxumState(state): AxumState>>) -> impl network_security_group_id: None, dpu_extension_services: None, nvlink: None, + spxconfig: None, }), status: Some(rpc::InstanceStatus { @@ -815,6 +816,7 @@ async fn handle_netconf(AxumState(state): AxumState>>) -> impl }), configs_synced: rpc::SyncState::Synced.into(), update: None, + spx_status: None, }), network_config_version: "V1-T1748645613333257".to_string(), ib_config_version: "V1-T1748645613333260".to_string(), diff --git a/crates/api-db/migrations/20260413193743_spx_partitions.sql b/crates/api-db/migrations/20260413193743_spx_partitions.sql new file mode 100644 index 0000000000..9c543dd77a --- /dev/null +++ b/crates/api-db/migrations/20260413193743_spx_partitions.sql @@ -0,0 +1,32 @@ +-- Add migration script here +CREATE TABLE IF NOT EXISTS spx_partitions +( + id uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL, + + name VARCHAR(256) NOT NULL DEFAULT (''), + description VARCHAR(1024) NOT NULL DEFAULT (''), + tenant_organization_id VARCHAR(64) NOT NULL, + config_version VARCHAR(64) NOT NULL, + vni integer NULL UNIQUE, + + created TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated TIMESTAMPTZ NOT NULL DEFAULT NOW(), + deleted TIMESTAMPTZ +); + +ALTER TABLE IF EXISTS instances + ADD COLUMN IF NOT EXISTS spx_config_version VARCHAR(64) NOT NULL DEFAULT ('V1-T1666644937952267'), + ADD COLUMN IF NOT EXISTS spx_config jsonb NOT NULL DEFAULT ('{"spx_attachments": []}') +; + +ALTER TABLE IF EXISTS dpa_interfaces + ADD COLUMN IF NOT EXISTS device_description VARCHAR(256) +; + +ALTER TABLE IF EXISTS dpa_interfaces + DROP COLUMN IF EXISTS network_status_observation; +; + +ALTER TABLE IF EXISTS machines + ADD COLUMN IF NOT EXISTS spx_status_observation jsonb NULL +; diff --git a/crates/api-db/src/dpa_interface.rs b/crates/api-db/src/dpa_interface.rs index 7fba44d361..b6eacca8ad 100644 --- a/crates/api-db/src/dpa_interface.rs +++ b/crates/api-db/src/dpa_interface.rs @@ -26,8 +26,7 @@ use eyre::eyre; use mac_address::MacAddress; use model::controller_outcome::PersistentStateHandlerOutcome; use model::dpa_interface::{ - DpaInterface, DpaInterfaceControllerState, DpaInterfaceNetworkConfig, - DpaInterfaceNetworkStatusObservation, NewDpaInterface, + DpaInterface, DpaInterfaceControllerState, DpaInterfaceNetworkConfig, NewDpaInterface, }; use model::machine::LoadSnapshotOptions; use sqlx::PgConnection; @@ -44,9 +43,10 @@ pub async fn persist( let network_config = DpaInterfaceNetworkConfig::default(); let state_version = ConfigVersion::initial(); let state = DpaInterfaceControllerState::Provisioning; + let description = value.device_description.unwrap_or_default(); - let query = "INSERT INTO dpa_interfaces (machine_id, mac_address, network_config_version, network_config, controller_state_version, controller_state, device_type, pci_name) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8) RETURNING row_to_json(dpa_interfaces.*)"; + let query = "INSERT INTO dpa_interfaces (machine_id, mac_address, network_config_version, network_config, controller_state_version, controller_state, device_type, pci_name, device_description) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) RETURNING row_to_json(dpa_interfaces.*)"; sqlx::query_as(query) .bind(value.machine_id.to_string()) @@ -57,6 +57,7 @@ pub async fn persist( .bind(sqlx::types::Json(&state)) .bind(value.device_type) .bind(value.pci_name) + .bind(description) .fetch_one(txn) .await .map_err(|e| DatabaseError::query(query, e)) @@ -74,9 +75,10 @@ pub async fn ensure( let network_config = DpaInterfaceNetworkConfig::default(); let state_version = ConfigVersion::initial(); let state = DpaInterfaceControllerState::Provisioning; + let description = value.device_description.unwrap_or_default(); - let insert_query = "INSERT INTO dpa_interfaces (machine_id, mac_address, network_config_version, network_config, controller_state_version, controller_state, device_type, pci_name) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8) ON CONFLICT (machine_id, mac_address) DO NOTHING RETURNING row_to_json(dpa_interfaces.*)"; + let insert_query = "INSERT INTO dpa_interfaces (machine_id, mac_address, network_config_version, network_config, controller_state_version, controller_state, device_type, pci_name, device_description) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) ON CONFLICT (machine_id, mac_address) DO NOTHING RETURNING row_to_json(dpa_interfaces.*)"; let result: Option = sqlx::query_as(insert_query) .bind(value.machine_id.to_string()) @@ -87,6 +89,7 @@ pub async fn ensure( .bind(sqlx::types::Json(&state)) .bind(value.device_type) .bind(value.pci_name) + .bind(description) .fetch_optional(&mut *txn) .await .map_err(|e| DatabaseError::query(insert_query, e))?; @@ -107,27 +110,6 @@ pub async fn ensure( .map_err(|e| DatabaseError::query(select_query, e)) } -pub async fn update_network_observation( - value: &DpaInterface, - txn: &mut PgConnection, - observation: &DpaInterfaceNetworkStatusObservation, -) -> Result { - let query = - "UPDATE dpa_interfaces SET network_status_observation = $1::json WHERE id = $2::uuid AND - ( - (network_status_observation->>'observed_at' IS NULL) - OR ((network_status_observation->>'observed_at')::timestamp <= $3::timestamp) - ) RETURNING id"; - - sqlx::query_as(query) - .bind(sqlx::types::Json(&observation)) - .bind(value.id.to_string()) - .bind(observation.observed_at) - .fetch_one(&mut *txn) - .await - .map_err(|e| DatabaseError::query(query, e)) -} - // Update the last_hb_time field with the current timestamp for the given DPA interface // and return the DPA Interface ID pub async fn update_last_hb_time( @@ -583,6 +565,7 @@ mod test { machine_id: id, device_type: "Bluefield 3".to_string(), pci_name: "5e:00.0".to_string(), + device_description: None, }; let intf = crate::dpa_interface::persist(new_intf, &mut txn).await?; @@ -624,6 +607,7 @@ mod test { mac_address: MacAddress::from_str("00:11:22:33:44:55")?, device_type: "BlueField3".to_string(), pci_name: "01:00.0".to_string(), + device_description: None, }; // First call should insert a new interface. @@ -642,6 +626,7 @@ mod test { mac_address: MacAddress::from_str("00:11:22:33:44:55")?, device_type: "BlueField3".to_string(), pci_name: "01:00.0".to_string(), + device_description: None, }; let second = crate::dpa_interface::ensure(second_intf, &mut txn).await?; assert_eq!(second.id, first.id); @@ -680,6 +665,7 @@ mod test { mac_address: MacAddress::from_str("00:11:22:33:44:55")?, device_type: "BlueField3".to_string(), pci_name: pci_name.to_string(), + device_description: None, }; crate::dpa_interface::persist(new_intf, &mut txn).await?; diff --git a/crates/api-db/src/instance.rs b/crates/api-db/src/instance.rs index 8ff9676528..69f9b3f5c6 100644 --- a/crates/api-db/src/instance.rs +++ b/crates/api-db/src/instance.rs @@ -31,6 +31,7 @@ use model::instance::config::extension_services::InstanceExtensionServicesConfig use model::instance::config::infiniband::InstanceInfinibandConfig; use model::instance::config::network::{InstanceNetworkConfig, InstanceNetworkConfigUpdate}; use model::instance::config::nvlink::InstanceNvLinkConfig; +use model::instance::config::spx::InstanceSpxConfig; use model::instance::snapshot::{self, InstanceSnapshot, InstanceSnapshotPgJson}; use model::metadata::Metadata; use model::os::{InlineIpxe, OperatingSystem, OperatingSystemVariant}; @@ -626,6 +627,22 @@ pub async fn update_nvlink_config( .await } +/// Updates the desired spx configuration for an instance +pub async fn update_spx_config( + txn: &mut PgConnection, + instance_id: InstanceId, + expected_version: ConfigVersion, + new_state: &InstanceSpxConfig, + increment_version: bool, +) -> Result<(), DatabaseError> { + batch_update_spx_config( + txn, + &[(instance_id, expected_version, new_state)], + increment_version, + ) + .await +} + pub async fn trigger_update_network_config_request( instance_id: &InstanceId, current: &InstanceNetworkConfig, @@ -739,7 +756,9 @@ pub async fn batch_persist<'a>( extension_services_config, extension_services_config_version, nvlink_config, - nvlink_config_version + nvlink_config_version, + spx_config, + spx_config_version ) SELECT vals.id, vals.machine_id, vals.operating_system_id, vals.os_user_data, vals.os_ipxe_script, @@ -751,7 +770,7 @@ pub async fn batch_persist<'a>( vals.network_security_group_id, true, vals.instance_type_id, vals.extension_services_config::json, vals.extension_services_config_version, vals.nvlink_config::json, - vals.nvlink_config_version + vals.nvlink_config_version, vals.spx_config::json, vals.spx_config_version FROM (VALUES "; let mut qb = sqlx::QueryBuilder::new(query); @@ -834,6 +853,12 @@ pub async fn batch_persist<'a>( .push_bind_unseparated(serde_json::to_string(&value.config.nvlink).unwrap_or_default()); separated.push_unseparated(","); separated.push_bind_unseparated(value.nvlink_config_version); + separated.push_unseparated(","); + separated.push_bind_unseparated( + serde_json::to_string(&value.config.spxconfig).unwrap_or_default(), + ); + separated.push_unseparated(","); + separated.push_bind_unseparated(value.spx_config_version); separated.push_unseparated(")"); } @@ -842,7 +867,7 @@ pub async fn batch_persist<'a>( ib_config, ib_config_version, keyset_ids, os_phone_home_enabled, name, description, labels, config_version, hostname, network_security_group_id, instance_type_id, extension_services_config, extension_services_config_version, - nvlink_config, nvlink_config_version) + nvlink_config, nvlink_config_version, spx_config, spx_config_version) INNER JOIN machines m ON m.id = vals.machine_id AND (vals.instance_type_id IS NULL OR m.instance_type_id = vals.instance_type_id)"); @@ -1054,6 +1079,70 @@ pub async fn batch_update_nvlink_config( Ok(()) } +/// Batch update spx configs for multiple instances +/// Each update contains (instance_id, expected_version, config) +pub async fn batch_update_spx_config( + txn: &mut PgConnection, + updates: &[(InstanceId, ConfigVersion, &InstanceSpxConfig)], + increment_version: bool, +) -> Result<(), DatabaseError> { + if updates.is_empty() { + return Ok(()); + } + + let expected_count = updates.len() as u64; + + let mut qb = sqlx::QueryBuilder::new( + "UPDATE instances SET + spx_config_version = updates.new_version, + spx_config = updates.config::json + FROM (VALUES ", + ); + + let mut separated = qb.separated(", "); + for (instance_id, expected_version, config) in updates { + let new_version = if increment_version { + expected_version.increment() + } else { + *expected_version + }; + separated.push("("); + separated.push_bind_unseparated(*instance_id); + separated.push_unseparated("::uuid,"); + separated.push_bind_unseparated(*expected_version); + separated.push_unseparated(","); + separated.push_bind_unseparated(new_version); + separated.push_unseparated(","); + separated.push_bind_unseparated(serde_json::to_string(config).unwrap_or_default()); + separated.push_unseparated(")"); + } + + qb.push( + ") AS updates(id, expected_version, new_version, config) + WHERE instances.id = updates.id + AND instances.spx_config_version = updates.expected_version", + ); + + let result = qb + .build() + .execute(txn) + .await + .map_err(|e| DatabaseError::new("batch_update_spx_config", e))?; + + // Verify all rows were updated (version check passed) + if result.rows_affected() != expected_count { + tracing::error!( + "batch_update_spx_config affected != expected: {:#?} != {expected_count}", + result.rows_affected() + ); + return Err(DatabaseError::FailedPrecondition( + "Spx config version mismatch during batch update".to_string(), + )); + } + + Ok(()) +} + pub async fn delete(instance_id: InstanceId, txn: &mut PgConnection) -> DatabaseResult<()> { instance_address::delete(&mut *txn, instance_id).await?; diff --git a/crates/api-db/src/lib.rs b/crates/api-db/src/lib.rs index 495a5c3c60..34398a068e 100644 --- a/crates/api-db/src/lib.rs +++ b/crates/api-db/src/lib.rs @@ -79,6 +79,7 @@ pub mod resource_pool; pub mod route_servers; pub mod site_exploration_report; pub mod sku; +pub mod spx_partition; pub mod state_history; pub mod switch; pub mod tenant; diff --git a/crates/api-db/src/machine.rs b/crates/api-db/src/machine.rs index 832df8afbd..ce15349f53 100644 --- a/crates/api-db/src/machine.rs +++ b/crates/api-db/src/machine.rs @@ -41,6 +41,7 @@ use model::machine::network::{ MachineNetworkStatusObservation, ManagedHostNetworkConfig, ManagedHostQuarantineState, }; use model::machine::nvlink::MachineNvLinkStatusObservation; +use model::machine::spx::MachineSpxStatusObservation; use model::machine::upgrade_policy::AgentUpgradePolicy; use model::machine::{ Dpf, DpuInfo, FailureDetails, HostProfile, Machine, MachineInterfaceSnapshot, @@ -858,6 +859,28 @@ pub async fn update_nvlink_status_observation( Ok(()) } +pub async fn update_spx_status_observation( + txn: &mut PgConnection, + machine_id: &MachineId, + observation: &MachineSpxStatusObservation, +) -> Result<(), DatabaseError> { + tracing::debug!( + "update_spx_status_observation: observation {:#?}", + observation + ); + let query = "UPDATE machines SET spx_status_observation = $1::json WHERE id = $2 AND + (spx_status_observation->>'observed_at' IS NULL OR spx_status_observation->>'observed_at' <= $3) RETURNING id"; + let _id: (MachineId,) = sqlx::query_as(query) + .bind(sqlx::types::Json(&observation)) + .bind(machine_id) + .bind(observation.observed_at.to_rfc3339()) + .fetch_one(txn) + .await + .map_err(|e| DatabaseError::query(query, e))?; + + Ok(()) +} + #[cfg(test)] async fn debug_failed_machine_status_update( txn: &mut PgConnection, diff --git a/crates/api-db/src/resource_pool.rs b/crates/api-db/src/resource_pool.rs index 179063fc36..0a5ffe1f88 100644 --- a/crates/api-db/src/resource_pool.rs +++ b/crates/api-db/src/resource_pool.rs @@ -23,8 +23,8 @@ use config_version::ConfigVersion; use ipnetwork::Ipv6Network; use model::resource_pool; use model::resource_pool::common::{ - CommonPools, EXTERNAL_VPC_VNI, EthernetPools, FNN_ASN, IbPools, LOOPBACK_IP, SECONDARY_VTEP_IP, - VLANID, VNI, VPC_DPU_LOOPBACK, VPC_VNI, + CommonPools, DPA_VNI, EXTERNAL_VPC_VNI, EthernetPools, FNN_ASN, IbPools, LOOPBACK_IP, + SECONDARY_VTEP_IP, VLANID, VNI, VPC_DPU_LOOPBACK, VPC_VNI, }; use model::resource_pool::define::{ResourcePoolDef, ResourcePoolType}; use model::resource_pool::{ @@ -937,6 +937,10 @@ pub async fn create_common_pools( Arc::new(ResourcePool::new(FNN_ASN.to_string(), ValueType::Integer)); optional_pool_names.push(pool_fnn_asn.name().to_string()); + let pool_dpa_vni: Arc> = + Arc::new(ResourcePool::new(DPA_VNI.to_string(), ValueType::Integer)); + pool_names.push(pool_dpa_vni.name().to_string()); + let pool_vpc_dpu_loopback_ip: Arc> = Arc::new(ResourcePool::new( VPC_DPU_LOOPBACK.to_string(), ValueType::Ipv4, @@ -1016,6 +1020,7 @@ pub async fn create_common_pools( pool_vni, pool_vpc_vni, pool_external_vpc_vni, + pool_dpa_vni, pool_fnn_asn, pool_vpc_dpu_loopback_ip, pool_secondary_vtep_ip, diff --git a/crates/api-db/src/spx_partition.rs b/crates/api-db/src/spx_partition.rs new file mode 100644 index 0000000000..7fb86036e0 --- /dev/null +++ b/crates/api-db/src/spx_partition.rs @@ -0,0 +1,190 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +use carbide_uuid::spx::SpxPartitionId; +use config_version::ConfigVersion; +use model::spx_partition::{NewSpxPartition, SpxPartition, SpxPartitionSnapshotPgJson}; +use sqlx::PgConnection; + +use crate::db_read::DbReader; +use crate::{ + ColumnInfo, DatabaseError, DatabaseResult, FilterableQueryBuilder, ObjectColumnFilter, +}; + +#[derive(Copy, Clone)] +pub struct IdColumn; +impl ColumnInfo<'_> for IdColumn { + type TableType = SpxPartition; + type ColumnType = SpxPartitionId; + + fn column_name(&self) -> &'static str { + "id" + } +} + +#[derive(Copy, Clone)] +pub struct VniColumn; +impl ColumnInfo<'_> for VniColumn { + type TableType = SpxPartition; + type ColumnType = i32; + + fn column_name(&self) -> &'static str { + "vni" + } +} + +pub async fn create( + value: &NewSpxPartition, + vni: i32, + txn: &mut PgConnection, +) -> Result { + let config_version = ConfigVersion::initial(); + + let query = "INSERT INTO spx_partitions ( + id, + name, + description, + tenant_organization_id, + vni, + config_version) + VALUES ($1, $2, $3, $4, $5, $6) + RETURNING row_to_json(spx_partitions.*)"; + + let partition: SpxPartitionSnapshotPgJson = sqlx::query_as(query) + .bind(value.id) + .bind(&value.name) + .bind(&value.description) + .bind(&value.tenant_organization_id) + .bind(vni) + .bind(config_version) + .fetch_one(txn) + .await + .map_err(|e| DatabaseError::new(query, e))?; + partition + .try_into() + .map_err(|e| DatabaseError::new(query, e)) +} + +pub async fn for_tenant( + txn: impl DbReader<'_>, + tenant_organization_id: String, +) -> Result, DatabaseError> { + let query = "SELECT row_to_json(p.*) FROM (SELECT * FROM spx_partitions WHERE tenant_organization_id=$1) p"; + let partitions: Vec = sqlx::query_as(query) + .bind(tenant_organization_id) + .fetch_all(txn) + .await + .map_err(|e| DatabaseError::new(query, e))?; + + partitions + .into_iter() + .map(|p| p.try_into()) + .collect::, sqlx::Error>>() + .map_err(|e| DatabaseError::new(query, e)) +} + +pub async fn find_ids( + txn: impl DbReader<'_>, + filter: model::spx_partition::SpxPartitionSearchFilter, +) -> Result, DatabaseError> { + let mut builder = sqlx::QueryBuilder::new("SELECT id FROM spx_partitions WHERE"); + let mut has_filter = false; + + if let Some(tenant_org_id) = &filter.tenant_org_id { + builder.push(" tenant_organization_id = "); + builder.push_bind(tenant_org_id); + has_filter = true; + } + if let Some(name) = &filter.name { + if has_filter { + builder.push(" AND name = "); + } else { + builder.push(" name = "); + } + builder.push_bind(name); + has_filter = true; + } + + if has_filter { + builder.push(" AND "); + } + + builder.push(" deleted IS NULL"); + + let query = builder.build_query_as(); + let ids: Vec = query + .fetch_all(txn) + .await + .map_err(|e| DatabaseError::new("spx_partition::find_ids", e))?; + + Ok(ids) +} + +pub async fn find_by<'a, C: ColumnInfo<'a, TableType = SpxPartition>, DB>( + conn: &mut DB, + filter: ObjectColumnFilter<'a, C>, +) -> Result, DatabaseError> +where + for<'db> &'db mut DB: DbReader<'db>, +{ + let mut query = FilterableQueryBuilder::new( + "SELECT row_to_json(p.*) FROM (SELECT * FROM spx_partitions) p", + ) + .filter(&filter); + + let partitions: Vec = query + .build_query_as() + .fetch_all(&mut *conn) + .await + .map_err(|e| DatabaseError::new(query.sql(), e))?; + + partitions + .into_iter() + .map(|p| p.try_into()) + .collect::, sqlx::Error>>() + .map_err(|e| DatabaseError::new(query.sql(), e)) +} + +pub async fn mark_as_deleted( + pid: SpxPartitionId, + txn: &mut PgConnection, +) -> DatabaseResult { + let query = "UPDATE spx_partitions SET updated=NOW(), deleted=NOW() WHERE id=$1 RETURNING row_to_json(spx_partitions.*)"; + let partition: SpxPartitionSnapshotPgJson = sqlx::query_as(query) + .bind(pid) + .fetch_one(txn) + .await + .map_err(|e| DatabaseError::new(query, e))?; + + partition + .try_into() + .map_err(|e| DatabaseError::new(query, e)) +} + +pub async fn final_delete( + partition_id: SpxPartitionId, + txn: &mut PgConnection, +) -> Result { + let query = "DELETE FROM spx_partitions WHERE id=$1::uuid RETURNING id"; + let partition: SpxPartitionId = sqlx::query_as(query) + .bind(partition_id) + .fetch_one(txn) + .await + .map_err(|e| DatabaseError::new(query, e))?; + + Ok(partition) +} diff --git a/crates/api-model/src/dpa_interface/mod.rs b/crates/api-model/src/dpa_interface/mod.rs index c54f15969e..63ea32c617 100644 --- a/crates/api-model/src/dpa_interface/mod.rs +++ b/crates/api-model/src/dpa_interface/mod.rs @@ -32,6 +32,8 @@ use sqlx::{FromRow, Row}; use crate::StateSla; use crate::controller_outcome::PersistentStateHandlerOutcome; +use crate::instance::snapshot::InstanceSnapshot; +use crate::machine::spx::MachineSpxStatusObservation; use crate::state_history::StateHistoryRecord; mod slas; @@ -61,12 +63,8 @@ pub enum DpaInterfaceControllerState { ApplyProfile, /// Lock the card Locking, - /// The VNI associated with the DPA interface is being set - WaitingForSetVNI, /// The Dpa Interface has been configured with a non-zero VNI Assigned, - /// The VNI associated with the DPA interface is being reset - WaitingForResetVNI, } impl Display for DpaInterfaceControllerState { @@ -106,18 +104,6 @@ pub enum DpaInterfaceQuarantineMode { BlockAllTraffic, } -#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] -pub struct DpaInterfaceNetworkStatusObservation { - pub observed_at: DateTime, - pub network_config_version: Option, -} - -impl Display for DpaInterfaceNetworkStatusObservation { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - std::fmt::Debug::fmt(self, f) - } -} - #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] pub enum DpaLockMode { Unlocked, @@ -182,13 +168,7 @@ pub fn state_sla(state: &DpaInterfaceControllerState, state_version: &ConfigVers DpaInterfaceControllerState::Unlocking => { StateSla::with_sla(slas::UNLOCKING, time_in_state) } - DpaInterfaceControllerState::WaitingForSetVNI => { - StateSla::with_sla(slas::WAITINGFORSETVNI, time_in_state) - } DpaInterfaceControllerState::Assigned => StateSla::no_sla(), - DpaInterfaceControllerState::WaitingForResetVNI => { - StateSla::with_sla(slas::WAITINGFORRESETVNI, time_in_state) - } } } @@ -216,7 +196,6 @@ pub struct DpaInterface { pub controller_state_outcome: Option, pub network_config: Versioned, - pub network_status_observation: Option, pub card_state: Option, @@ -238,6 +217,8 @@ pub struct DpaInterface { pub mlxconfig_profile: Option, pub history: Vec, + + pub device_description: Option, } #[derive(Clone, Debug)] @@ -246,6 +227,7 @@ pub struct NewDpaInterface { pub mac_address: MacAddress, pub device_type: String, pub pci_name: String, + pub device_description: Option, } impl NewDpaInterface { @@ -264,28 +246,48 @@ impl NewDpaInterface { mac_address: info.base_mac?, device_type: info.device_type.clone(), pci_name: info.pci_name.clone(), + device_description: info.device_description.clone(), }) } } impl DpaInterface { + pub fn use_admin_network(&self) -> bool { + self.network_config.use_admin_network.unwrap_or(true) + } + pub fn get_machine_id(&self) -> MachineId { self.machine_id } - pub fn managed_host_network_config_version_synced(&self) -> bool { - let dpa_expected_version = self.network_config.version; - let dpa_observation = self.network_status_observation.as_ref(); + // If the DPA machine is an instance, the config version sent to the card will + // the spx_config_version of the instance. + // If the DPA machine is a managed host, the config version sent to the card will + // be the network_config.version of the DPA interface. + pub fn managed_host_network_config_version_synced( + &self, + instance: &Option, + spx_status_observation: &Option, + ) -> bool { + let mut dpa_expected_version = self.network_config.version; + + if let Some(instance) = instance { + dpa_expected_version = instance.spx_config_version; + } - let dpa_observed_version: ConfigVersion = match dpa_observation { - Some(network_status) => match network_status.network_config_version { - Some(version) => version, - None => return false, - }, - None => return false, + let Some(spx_status_observation) = spx_status_observation else { + return false; }; - dpa_expected_version == dpa_observed_version + for obs in spx_status_observation.spx_attachments.iter() { + if obs.mac_address == self.mac_address + && let Some(config_version) = obs.config_version + { + return config_version == dpa_expected_version; + } + } + + false } pub fn is_ready(&self) -> bool { @@ -316,7 +318,6 @@ pub struct DpaInterfaceSnapshotPgJson { pub controller_state_outcome: Option, pub network_config: DpaInterfaceNetworkConfig, pub network_config_version: String, - pub network_status_observation: Option, pub card_state: Option, pub pci_name: String, pub underlay_ip: Option, @@ -329,8 +330,9 @@ pub struct DpaInterfaceSnapshotPgJson { pub mlxconfig_profile: Option, #[serde(default)] pub history: Vec, + #[serde(default)] + pub device_description: Option, } - #[cfg(test)] mod tests { use std::str::FromStr; diff --git a/crates/api-model/src/dpa_interface/slas.rs b/crates/api-model/src/dpa_interface/slas.rs index 78ed25bcca..7064e4027b 100644 --- a/crates/api-model/src/dpa_interface/slas.rs +++ b/crates/api-model/src/dpa_interface/slas.rs @@ -30,5 +30,3 @@ pub const LOCKING: Duration = Duration::from_secs(15 * 60); pub const APPLY_FIRMWARE: Duration = Duration::from_secs(30 * 60); pub const APPLY_PROFILE: Duration = Duration::from_secs(15 * 60); pub const UNLOCKING: Duration = Duration::from_secs(15 * 60); -pub const WAITINGFORSETVNI: Duration = Duration::from_secs(15 * 60); -pub const WAITINGFORRESETVNI: Duration = Duration::from_secs(15 * 60); diff --git a/crates/api-model/src/instance/config.rs b/crates/api-model/src/instance/config.rs index be46c43b0a..03a243d370 100644 --- a/crates/api-model/src/instance/config.rs +++ b/crates/api-model/src/instance/config.rs @@ -19,6 +19,7 @@ pub mod extension_services; pub mod infiniband; pub mod network; pub mod nvlink; +pub mod spx; pub mod tenant_config; use carbide_uuid::network_security_group::NetworkSecurityGroupId; @@ -29,6 +30,7 @@ use crate::instance::config::extension_services::InstanceExtensionServicesConfig use crate::instance::config::infiniband::InstanceInfinibandConfig; use crate::instance::config::network::InstanceNetworkConfig; use crate::instance::config::nvlink::InstanceNvLinkConfig; +use crate::instance::config::spx::InstanceSpxConfig; use crate::instance::config::tenant_config::TenantConfig; use crate::os::OperatingSystem; @@ -61,6 +63,9 @@ pub struct InstanceConfig { /// configure instance nvlink pub nvlink: InstanceNvLinkConfig, + + /// Configures instance spx + pub spxconfig: InstanceSpxConfig, } impl InstanceConfig { diff --git a/crates/api-model/src/instance/config/spx.rs b/crates/api-model/src/instance/config/spx.rs new file mode 100644 index 0000000000..b220802470 --- /dev/null +++ b/crates/api-model/src/instance/config/spx.rs @@ -0,0 +1,126 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//use std::collections::HashSet; + +use ::rpc::errors::RpcDataConversionError; +use carbide_uuid::spx::SpxPartitionId; +use rpc::forge as rpc; +use serde::{Deserialize, Serialize}; + +use crate::ConfigValidationError; + +#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] +pub struct InstanceSpxConfig { + /// Configures how SpectrumX NICs are set up + pub spx_attachments: Vec, +} + +impl InstanceSpxConfig { + /// Validates the spx configuration + pub fn validate(&self) -> Result<(), ConfigValidationError> { + Ok(()) + } + + pub fn verify_update_allowed_to( + &self, + _new_config: &Self, + ) -> Result<(), ConfigValidationError> { + Ok(()) + } + + pub fn is_spx_config_update_requested(&self, new_config: &Self) -> bool { + self != new_config + } +} + +impl TryFrom for InstanceSpxConfig { + type Error = RpcDataConversionError; + + fn try_from(config: rpc::InstanceSpxConfig) -> Result { + let mut spx_attachments = Vec::with_capacity(config.spx_attachments.len()); + for attachment in config.spx_attachments.into_iter() { + let spx_partition_id = + attachment + .spx_partition_id + .ok_or(RpcDataConversionError::MissingArgument( + "InstanceSpxAttachment::spx_partition_id", + ))?; + spx_attachments.push(InstanceSpxAttachment { + device: attachment.device, + device_instance: attachment.device_instance, + spx_partition_id, + attachment_type: SpxAttachmentType::try_from(attachment.attachment_type)?, + virtual_function_id: attachment.virtual_function_id, + mac_address: None, + }); + } + Ok(Self { spx_attachments }) + } +} + +impl TryFrom for rpc::InstanceSpxConfig { + type Error = RpcDataConversionError; + + fn try_from(config: InstanceSpxConfig) -> Result { + let mut spx_attachments = Vec::with_capacity(config.spx_attachments.len()); + for attachment in config.spx_attachments.into_iter() { + spx_attachments.push(rpc::InstanceSpxAttachment { + device: attachment.device, + device_instance: attachment.device_instance, + spx_partition_id: Some(attachment.spx_partition_id), + attachment_type: attachment.attachment_type as i32, + virtual_function_id: attachment.virtual_function_id, + }); + } + Ok(rpc::InstanceSpxConfig { spx_attachments }) + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub enum SpxAttachmentType { + Physical = 0, + Virtual = 1, + Ovn = 2, +} + +impl TryFrom for SpxAttachmentType { + type Error = RpcDataConversionError; + + fn try_from(value: i32) -> Result { + match value { + 0 => Ok(SpxAttachmentType::Physical), + 1 => Ok(SpxAttachmentType::Virtual), + 2 => Ok(SpxAttachmentType::Ovn), + _ => Err(RpcDataConversionError::InvalidValue( + "SpxAttachmentType".to_string(), + value.to_string(), + )), + } + } +} + +/// The configuration that a customer desires for an instances SpectrumX NICs +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub struct InstanceSpxAttachment { + pub device: String, + pub device_instance: u32, + pub mac_address: Option, + pub spx_partition_id: SpxPartitionId, + pub attachment_type: SpxAttachmentType, + pub virtual_function_id: Option, +} diff --git a/crates/api-model/src/instance/mod.rs b/crates/api-model/src/instance/mod.rs index cd924d6d27..6345e1a152 100644 --- a/crates/api-model/src/instance/mod.rs +++ b/crates/api-model/src/instance/mod.rs @@ -52,6 +52,7 @@ pub struct NewInstance<'a> { pub ib_config_version: ConfigVersion, pub extension_services_config_version: ConfigVersion, pub nvlink_config_version: ConfigVersion, + pub spx_config_version: ConfigVersion, } pub struct DeleteInstance { diff --git a/crates/api-model/src/instance/snapshot.rs b/crates/api-model/src/instance/snapshot.rs index 07dc38a3cd..46315ba9bd 100644 --- a/crates/api-model/src/instance/snapshot.rs +++ b/crates/api-model/src/instance/snapshot.rs @@ -32,6 +32,7 @@ use crate::instance::config::InstanceConfig; use crate::instance::config::extension_services::InstanceExtensionServicesConfig; use crate::instance::config::infiniband::InstanceInfinibandConfig; use crate::instance::config::nvlink::InstanceNvLinkConfig; +use crate::instance::config::spx::InstanceSpxConfig; use crate::instance::config::tenant_config::TenantConfig; use crate::instance::status::InstanceStatusObservations; use crate::metadata::Metadata; @@ -80,6 +81,8 @@ pub struct InstanceSnapshot { pub nvlink_config_version: ConfigVersion, + pub spx_config_version: ConfigVersion, + /// Observed status of the instance pub observations: InstanceStatusObservations, @@ -124,6 +127,8 @@ pub struct InstanceSnapshotPgJson { storage_config_version: String, nvlink_config: InstanceNvLinkConfig, nvlink_config_version: String, + spx_config: InstanceSpxConfig, + spx_config_version: String, config_version: String, phone_home_last_contact: Option>, use_custom_pxe_on_boot: bool, @@ -184,6 +189,7 @@ pub fn from_pg_json_and_os( os, network: value.network_config, infiniband: value.ib_config, + spxconfig: value.spx_config, nvlink: value.nvlink_config, network_security_group_id: value.network_security_group_id, extension_services: value.extension_services_config, @@ -219,6 +225,12 @@ pub fn from_pg_json_and_os( source: Box::new(e), } })?, + spx_config_version: value.spx_config_version.parse().map_err(|e| { + sqlx::error::Error::ColumnDecode { + index: "spx_config_version".to_string(), + source: Box::new(e), + } + })?, storage_config_version: value.storage_config_version.parse().map_err(|e| { sqlx::error::Error::ColumnDecode { index: "storage_config_version".to_string(), @@ -295,6 +307,7 @@ impl TryFrom for InstanceSnapshot { nvlink: value.nvlink_config, network_security_group_id: value.network_security_group_id, extension_services: value.extension_services_config, + spxconfig: value.spx_config, }; Ok(InstanceSnapshot { @@ -327,6 +340,12 @@ impl TryFrom for InstanceSnapshot { source: Box::new(e), } })?, + spx_config_version: value.spx_config_version.parse().map_err(|e| { + sqlx::error::Error::ColumnDecode { + index: "spx_config_version".to_string(), + source: Box::new(e), + } + })?, storage_config_version: value.storage_config_version.parse().map_err(|e| { sqlx::error::Error::ColumnDecode { index: "storage_config_version".to_string(), @@ -385,6 +404,8 @@ mod tests { storage_config_version: version.clone(), nvlink_config: InstanceNvLinkConfig::default(), nvlink_config_version: version.clone(), + spx_config: InstanceSpxConfig::default(), + spx_config_version: version.clone(), config_version: version.clone(), phone_home_last_contact: None, use_custom_pxe_on_boot: false, diff --git a/crates/api-model/src/instance/status.rs b/crates/api-model/src/instance/status.rs index 5efb8a2370..737e1b2c69 100644 --- a/crates/api-model/src/instance/status.rs +++ b/crates/api-model/src/instance/status.rs @@ -26,6 +26,7 @@ pub mod extension_service; pub mod infiniband; pub mod network; pub mod nvlink; +pub mod spx; pub mod tenant; /// Instance status @@ -49,6 +50,9 @@ pub struct InstanceStatus { /// Status of nvlink subsystem of an instance pub nvlink: nvlink::InstanceNvLinkStatus, + /// Status of the SPX subsystem of an instance + pub spx_status: spx::InstanceSpxStatus, + /// Whether all configurations related to an instance are in-sync. /// This is a logical AND for the settings of all sub-configurations. /// At this time it equals `InstanceNetworkStatus::configs_synced`, diff --git a/crates/api-model/src/instance/status/spx.rs b/crates/api-model/src/instance/status/spx.rs new file mode 100644 index 0000000000..76217fd765 --- /dev/null +++ b/crates/api-model/src/instance/status/spx.rs @@ -0,0 +1,163 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +use std::collections::HashMap; + +use ::rpc::errors::RpcDataConversionError; +use carbide_uuid::spx::SpxPartitionId; +use config_version::Versioned; +use rpc::forge as rpc; +use serde::{Deserialize, Serialize}; + +use crate::instance::config::spx::{InstanceSpxConfig, SpxAttachmentType}; +use crate::instance::status::SyncState; +use crate::machine::spx::MachineSpxStatusObservation; + +#[derive(Clone, Debug)] +pub struct InstanceSpxStatus { + /// each entry here maps to the corresponding entry in the config Vec + pub spx_attachments: Vec, + /// similar to InstanceNetworkStatus + pub configs_synced: SyncState, +} + +impl TryFrom for rpc::InstanceSpxStatus { + type Error = RpcDataConversionError; + + fn try_from(status: InstanceSpxStatus) -> Result { + let mut spx_attachments: Vec = Vec::new(); + for attachment in status.spx_attachments.iter() { + let a = rpc::InstanceSpxAttachmentStatus::try_from(attachment.clone())?; + spx_attachments.push(a); + } + Ok(Self { + attachment_statuses: spx_attachments, + configs_synced: rpc::SyncState::try_from(status.configs_synced)? as i32, + }) + } +} + +impl InstanceSpxStatus { + pub fn from_config_and_observation( + config: Versioned<&InstanceSpxConfig>, + observations: Option<&MachineSpxStatusObservation>, + ) -> Self { + if config.spx_attachments.is_empty() { + return Self { + spx_attachments: Vec::new(), + configs_synced: SyncState::Synced, + }; + } + + let Some(observations) = observations else { + return Self::unsynchronized_for_config(&config); + }; + + let mut configs_synced = SyncState::Synced; + + let mut spx_attachments: Vec = + Vec::with_capacity(config.spx_attachments.len()); + let obs_by_mac_address: HashMap<_, _> = observations + .spx_attachments + .iter() + .map(|obs| (obs.mac_address.to_string(), obs)) + .collect(); + for cfg in &config.spx_attachments { + let mac_addr = cfg.mac_address.as_deref().unwrap_or_default(); + let status = match obs_by_mac_address.get(mac_addr) { + Some(obs) => { + if cfg.spx_partition_id != obs.partition_id.unwrap_or_default() { + configs_synced = SyncState::Pending; + } + InstanceSpxAttachmentStatus { + mac_address: mac_addr.to_string(), + virtual_function_id: cfg.virtual_function_id.unwrap_or_default(), + attachment_type: cfg.attachment_type.clone(), + spx_partition_id: cfg.spx_partition_id, + } + } + None => { + tracing::error!( + "could not find matching status spx attachment {:?}", + cfg.device_instance + ); + configs_synced = SyncState::Pending; + InstanceSpxAttachmentStatus { + mac_address: mac_addr.to_string(), + virtual_function_id: cfg.virtual_function_id.unwrap_or_default(), + attachment_type: cfg.attachment_type.clone(), + spx_partition_id: cfg.spx_partition_id, + } + } + }; + spx_attachments.push(status); + } + Self { + spx_attachments, + configs_synced, + } + } + + fn unsynchronized_for_config(config: &InstanceSpxConfig) -> Self { + Self { + spx_attachments: config + .spx_attachments + .iter() + .map(|cfg| InstanceSpxAttachmentStatus { + mac_address: cfg.mac_address.as_deref().unwrap_or_default().to_string(), + virtual_function_id: 0, + attachment_type: SpxAttachmentType::Physical, + spx_partition_id: SpxPartitionId::default(), + }) + .collect(), + configs_synced: SyncState::Pending, + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub struct InstanceSpxAttachmentStatus { + pub mac_address: String, + pub virtual_function_id: u32, + pub attachment_type: SpxAttachmentType, + pub spx_partition_id: SpxPartitionId, +} + +impl TryFrom for rpc::InstanceSpxAttachmentStatus { + type Error = RpcDataConversionError; + fn try_from(status: InstanceSpxAttachmentStatus) -> Result { + Ok(Self { + mac_addr: Some(status.mac_address), + virtual_function_id: status.virtual_function_id, + attachment_type: status.attachment_type as i32, + spx_partition_id: Some(status.spx_partition_id), + ip_address: None, + }) + } +} + +impl TryFrom for InstanceSpxAttachmentStatus { + type Error = RpcDataConversionError; + fn try_from(status: rpc::InstanceSpxAttachmentStatus) -> Result { + Ok(Self { + mac_address: status.mac_addr.unwrap_or_default(), + virtual_function_id: status.virtual_function_id, + attachment_type: SpxAttachmentType::try_from(status.attachment_type)?, + spx_partition_id: status.spx_partition_id.unwrap_or_default(), + }) + } +} diff --git a/crates/api-model/src/lib.rs b/crates/api-model/src/lib.rs index dd3b923012..01bc780d96 100644 --- a/crates/api-model/src/lib.rs +++ b/crates/api-model/src/lib.rs @@ -88,6 +88,7 @@ pub mod route_server; pub mod rpc_conv; pub mod site_explorer; pub mod sku; +pub mod spx_partition; pub mod state_history; pub mod storage; pub mod switch; diff --git a/crates/api-model/src/machine/json.rs b/crates/api-model/src/machine/json.rs index bfd010c1c2..e0432044c0 100644 --- a/crates/api-model/src/machine/json.rs +++ b/crates/api-model/src/machine/json.rs @@ -32,6 +32,7 @@ use crate::machine::health_override::HealthReportSources; use crate::machine::infiniband::MachineInfinibandStatusObservation; use crate::machine::network::{MachineNetworkStatusObservation, ManagedHostNetworkConfig}; use crate::machine::nvlink::MachineNvLinkStatusObservation; +use crate::machine::spx::MachineSpxStatusObservation; use crate::machine::topology::MachineTopology; use crate::machine::{ Dpf, FailureDetails, HostProfile, HostReprovisionRequest, Machine, MachineInterfaceSnapshot, @@ -60,6 +61,7 @@ pub struct MachineSnapshotPgJson { pub network_status_observation: Option, pub infiniband_status_observation: Option, pub nvlink_status_observation: Option, + pub spx_status_observation: Option, pub controller_state_version: String, pub controller_state: ManagedHostState, pub last_discovery_time: Option>, @@ -173,6 +175,7 @@ impl TryFrom for Machine { network_status_observation: value.network_status_observation, infiniband_status_observation: value.infiniband_status_observation, nvlink_status_observation: value.nvlink_status_observation, + spx_status_observation: value.spx_status_observation, history, interfaces: value.interfaces, hardware_info, diff --git a/crates/api-model/src/machine/mod.rs b/crates/api-model/src/machine/mod.rs index f67ee46696..70516926f4 100644 --- a/crates/api-model/src/machine/mod.rs +++ b/crates/api-model/src/machine/mod.rs @@ -41,6 +41,7 @@ use strum_macros::EnumIter; use self::infiniband::MachineInfinibandStatusObservation; use self::network::{MachineNetworkStatusObservation, ManagedHostNetworkConfig}; use self::nvlink::MachineNvLinkStatusObservation; +use self::spx::MachineSpxStatusObservation; use super::StateSla; use super::bmc_info::BmcInfo; use super::hardware_info::MachineInventory; @@ -75,6 +76,7 @@ pub mod machine_id; pub mod machine_search_config; pub mod network; pub mod nvlink; +pub mod spx; pub mod topology; pub mod upgrade_policy; @@ -678,6 +680,9 @@ pub struct Machine { // The most recent status of the nvlink GPUs. pub nvlink_status_observation: Option, + // The most recent status of the SPX attachments. + pub spx_status_observation: Option, + /// A list of [StateHistoryRecord]s that this machine has experienced pub history: Vec, diff --git a/crates/api-model/src/machine/spx.rs b/crates/api-model/src/machine/spx.rs new file mode 100644 index 0000000000..9e523c6934 --- /dev/null +++ b/crates/api-model/src/machine/spx.rs @@ -0,0 +1,129 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +use carbide_uuid::spx::SpxPartitionId; +use chrono::{DateTime, Utc}; +use config_version::ConfigVersion; +use mac_address::MacAddress; +use serde::{Deserialize, Serialize}; + +use crate::StatusValidationError; +use crate::instance::config::spx::{InstanceSpxConfig, SpxAttachmentType}; + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct MachineSpxStatusObservation { + /// Observed status for each configured interface + #[serde(default)] + pub spx_attachments: Vec, + pub observed_at: DateTime, +} + +impl MachineSpxStatusObservation { + pub fn validate(&self) -> Result<(), StatusValidationError> { + Ok(()) + } +} + +impl Default for MachineSpxStatusObservation { + fn default() -> Self { + Self { + spx_attachments: Vec::new(), + observed_at: Utc::now(), + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)] +pub struct MachineSpxAttachmentStatusObservation { + pub mac_address: MacAddress, + pub partition_id: Option, + pub config_version: Option, + pub attachment_type: Option, + pub virtual_function_id: Option, + pub observed_at: DateTime, +} + +impl From for rpc::forge::MachineSpxStatusObservation { + fn from(value: MachineSpxStatusObservation) -> Self { + rpc::forge::MachineSpxStatusObservation { + attachment_status: value + .spx_attachments + .into_iter() + .map(rpc::forge::MachineSpxAttachmentStatusObservation::from) + .collect(), + observed_at: Some(value.observed_at.into()), + } + } +} + +impl From + for rpc::forge::MachineSpxAttachmentStatusObservation +{ + fn from(value: MachineSpxAttachmentStatusObservation) -> Self { + rpc::forge::MachineSpxAttachmentStatusObservation { + mac_address: value.mac_address.to_string(), + partition_id: value.partition_id, + attachment_type: value.attachment_type.map(|at| at as i32), + virtual_function_id: value.virtual_function_id, + observed_at: Some(value.observed_at.into()), + } + } +} + +#[derive(Debug, Clone)] +pub struct SpxConfigNotSyncedReason(pub String); + +pub fn spx_config_synced( + observation: Option<&MachineSpxStatusObservation>, + config: Option<&InstanceSpxConfig>, +) -> Result<(), SpxConfigNotSyncedReason> { + let Some(config) = config.as_ref() else { + return Ok(()); + }; + if config.spx_attachments.is_empty() { + return Ok(()); + } + + let Some(observation) = observation.as_ref() else { + return Err(SpxConfigNotSyncedReason("Due to missing SPX status observation, it can't be verified whether the SPX config is applied".to_string())); + }; + + for conf_att in config.spx_attachments.iter() { + let Some(obs) = observation.spx_attachments.iter().find(|obs_att| { + conf_att.mac_address.as_deref().unwrap_or_default() == obs_att.mac_address.to_string() + }) else { + tracing::error!( + "could not find matching status instance {:?}", + conf_att.device_instance + ); + return Err(SpxConfigNotSyncedReason( + "No matching SPX status observation found for attachment in config".to_string(), + )); + }; + if obs.partition_id.is_none() { + return Err(SpxConfigNotSyncedReason( + "SPX partition ID not yet applied".to_string(), + )); + } + if conf_att.spx_partition_id != obs.partition_id.unwrap() { + return Err(SpxConfigNotSyncedReason( + "SPX partition ID mismatch between config and observation".to_string(), + )); + } + } + Ok(()) +} diff --git a/crates/api-model/src/resource_pool/common.rs b/crates/api-model/src/resource_pool/common.rs index a4f655ab77..043ecf473b 100644 --- a/crates/api-model/src/resource_pool/common.rs +++ b/crates/api-model/src/resource_pool/common.rs @@ -43,6 +43,9 @@ pub const FNN_ASN: &str = "fnn-asn"; /// Must match a pool defined in dev/resource_pools.toml pub const VPC_DPU_LOOPBACK: &str = "vpc-dpu-lo"; +// DPA VNI pool: VNI for the DPA +pub const DPA_VNI: &str = "dpa-vni"; + /// IPs used for creating a secondary overlay on /// a separate set of VTEPs. The initial use-case is /// VMAAS GENEVE VTEPs. @@ -72,6 +75,7 @@ pub struct EthernetPools { pub pool_vni: Arc>, pub pool_vpc_vni: Arc>, pub pool_external_vpc_vni: Arc>, + pub pool_dpa_vni: Arc>, pub pool_fnn_asn: Arc>, pub pool_vpc_dpu_loopback_ip: Arc>, pub pool_secondary_vtep_ip: Arc>, diff --git a/crates/api-model/src/resource_pool/mod.rs b/crates/api-model/src/resource_pool/mod.rs index df3c9571be..895ae0bf23 100644 --- a/crates/api-model/src/resource_pool/mod.rs +++ b/crates/api-model/src/resource_pool/mod.rs @@ -166,7 +166,7 @@ pub enum OwnerType { Vpc, /// owner_type for East West Networks - Dpa, + SpxPartition, } impl FromStr for OwnerType { @@ -191,7 +191,7 @@ impl fmt::Display for OwnerType { Self::NetworkSegment => write!(f, "network_segment"), Self::IBPartition => write!(f, "ib_partition"), Self::Vpc => write!(f, "vpc"), - Self::Dpa => write!(f, "dpa"), + Self::SpxPartition => write!(f, "spx_partition"), } } } diff --git a/crates/api-model/src/rpc_conv/dpa_interface.rs b/crates/api-model/src/rpc_conv/dpa_interface.rs index 0d12cad14a..0a6c8d37e9 100644 --- a/crates/api-model/src/rpc_conv/dpa_interface.rs +++ b/crates/api-model/src/rpc_conv/dpa_interface.rs @@ -40,6 +40,7 @@ impl TryFrom for NewDpaInterface { mac_address, device_type: value.device_type, pci_name: value.pci_name, + device_description: value.device_description, }) } } @@ -54,11 +55,6 @@ impl From for rpc::forge::DpaInterface { None => "None".to_string(), }; - let network_status_observation = match src.network_status_observation { - Some(nso) => nso.to_string(), - None => "None".to_string(), - }; - let cstate = match src.card_state { Some(cs) => cs.to_string(), None => "None".to_string(), @@ -96,13 +92,13 @@ impl From for rpc::forge::DpaInterface { network_config: network_config.to_string(), network_config_version: network_config_version.to_string(), controller_state_outcome: outcome, - network_status_observation, history, card_state: cstate, pci_name: src.pci_name, underlay_ip: underlay, overlay_ip: overlay, mlxconfig_profile: src.mlxconfig_profile, + device_description: src.device_description, } } } @@ -138,7 +134,6 @@ impl TryFrom for DpaInterface { } })?, }, - network_status_observation: value.network_status_observation, card_state: value.card_state, device_info: value.device_info, device_info_ts: value.device_info_ts, @@ -147,6 +142,7 @@ impl TryFrom for DpaInterface { pci_name: value.pci_name, underlay_ip: value.underlay_ip, overlay_ip: value.overlay_ip, + device_description: value.device_description, }) } } diff --git a/crates/api-model/src/rpc_conv/instance/config.rs b/crates/api-model/src/rpc_conv/instance/config.rs index 95b65e6c4d..9393a8c1a0 100644 --- a/crates/api-model/src/rpc_conv/instance/config.rs +++ b/crates/api-model/src/rpc_conv/instance/config.rs @@ -31,6 +31,7 @@ use crate::instance::config::extension_services::{ use crate::instance::config::infiniband::InstanceInfinibandConfig; use crate::instance::config::network::InstanceNetworkConfig; use crate::instance::config::nvlink::InstanceNvLinkConfig; +use crate::instance::config::spx::InstanceSpxConfig; use crate::instance::config::tenant_config::TenantConfig; use crate::os::OperatingSystem; @@ -74,6 +75,13 @@ impl TryFrom for InstanceConfig { .transpose()? .unwrap_or(InstanceNvLinkConfig::default()); + // Spx config is optional + let spxconfig = config + .spxconfig + .map(InstanceSpxConfig::try_from) + .transpose()? + .unwrap_or(InstanceSpxConfig::default()); + Ok(InstanceConfig { tenant, os, @@ -88,6 +96,7 @@ impl TryFrom for InstanceConfig { })?, extension_services, nvlink, + spxconfig, }) } } @@ -109,6 +118,11 @@ impl TryFrom for rpc::InstanceConfig { true => None, false => Some(nvlink), }; + let spxconfig = rpc::forge::InstanceSpxConfig::try_from(config.spxconfig)?; + let spxconfig = match spxconfig.spx_attachments.is_empty() { + true => None, + false => Some(spxconfig), + }; // We only show user active extension services, and track terminating services internally. let active_extension_services: Vec = config @@ -134,6 +148,7 @@ impl TryFrom for rpc::InstanceConfig { network_security_group_id: config.network_security_group_id.map(|i| i.to_string()), dpu_extension_services: extension_services, nvlink, + spxconfig, }) } } diff --git a/crates/api-model/src/rpc_conv/instance/snapshot.rs b/crates/api-model/src/rpc_conv/instance/snapshot.rs index 563e3774bd..26f5e9482d 100644 --- a/crates/api-model/src/rpc_conv/instance/snapshot.rs +++ b/crates/api-model/src/rpc_conv/instance/snapshot.rs @@ -25,6 +25,7 @@ use crate::instance::snapshot::InstanceSnapshot; use crate::instance::status::InstanceStatus; use crate::machine::infiniband::MachineInfinibandStatusObservation; use crate::machine::nvlink::MachineNvLinkStatusObservation; +use crate::machine::spx::MachineSpxStatusObservation; use crate::machine::{ManagedHostState, ReprovisionRequest}; use crate::rpc_conv::instance::status::instance_status_from_config_and_observation; @@ -37,6 +38,7 @@ pub fn instance_snapshot_derive_status( reprovision_request: Option, ib_status: Option<&MachineInfinibandStatusObservation>, nvlink_status: Option<&MachineNvLinkStatusObservation>, + spx_status: Option<&MachineSpxStatusObservation>, ) -> Result { instance_status_from_config_and_observation( dpu_id_to_device_map, @@ -48,12 +50,14 @@ pub fn instance_snapshot_derive_status( snapshot.extension_services_config_version, ), Versioned::new(&snapshot.config.nvlink, snapshot.nvlink_config_version), + Versioned::new(&snapshot.config.spxconfig, snapshot.spx_config_version), &snapshot.observations, managed_host_state, snapshot.deleted.is_some(), reprovision_request, ib_status, nvlink_status, + spx_status, snapshot.update_network_config_request.is_some(), ) } diff --git a/crates/api-model/src/rpc_conv/instance/status.rs b/crates/api-model/src/rpc_conv/instance/status.rs index fab4d209ae..a46d3a5aa8 100644 --- a/crates/api-model/src/rpc_conv/instance/status.rs +++ b/crates/api-model/src/rpc_conv/instance/status.rs @@ -26,9 +26,12 @@ use crate::instance::config::extension_services::InstanceExtensionServicesConfig use crate::instance::config::infiniband::InstanceInfinibandConfig; use crate::instance::config::network::InstanceNetworkConfig; use crate::instance::config::nvlink::InstanceNvLinkConfig; +use crate::instance::config::spx::InstanceSpxConfig; +use crate::instance::status::spx::InstanceSpxStatus; use crate::instance::status::{InstanceStatus, InstanceStatusObservations, SyncState}; use crate::machine::infiniband::MachineInfinibandStatusObservation; use crate::machine::nvlink::MachineNvLinkStatusObservation; +use crate::machine::spx::MachineSpxStatusObservation; use crate::machine::{ManagedHostState, ReprovisionRequest}; use crate::rpc_conv::instance::status::tenant::instance_status_tenant_state; @@ -48,6 +51,7 @@ impl TryFrom for rpc::InstanceStatus { infiniband: Some(status.infiniband.try_into()?), dpu_extension_services: Some(status.extension_services.try_into()?), nvlink: Some(status.nvlink.try_into()?), + spx_status: Some(status.spx_status.try_into()?), configs_synced: rpc::SyncState::try_from(status.configs_synced)? as i32, update: status.reprovision_request.map(|request| request.into()), }) @@ -70,12 +74,14 @@ pub fn instance_status_from_config_and_observation( ib_config: Versioned<&InstanceInfinibandConfig>, extension_services_config: Versioned<&InstanceExtensionServicesConfig>, nvlink_config: Versioned<&InstanceNvLinkConfig>, + spx_config: Versioned<&InstanceSpxConfig>, observations: &InstanceStatusObservations, machine_state: ManagedHostState, delete_requested: bool, reprovision_request: Option, ib_status: Option<&MachineInfinibandStatusObservation>, nvlink_status: Option<&MachineNvLinkStatusObservation>, + spx_status: Option<&MachineSpxStatusObservation>, is_network_config_request_pending: bool, ) -> Result { let mut instance_config_synced = SyncState::Synced; @@ -121,6 +127,8 @@ pub fn instance_status_from_config_and_observation( nvlink_status, ); + let spx_status = InstanceSpxStatus::from_config_and_observation(spx_config, spx_status); + let phone_home_last_contact = observations.phone_home_last_contact; // If additional configs are added, they need to be incorporated here @@ -129,6 +137,7 @@ pub fn instance_status_from_config_and_observation( infiniband.configs_synced, extension_services.configs_synced, nvlink.configs_synced, + spx_status.configs_synced, instance_config_synced, ) { ( @@ -137,6 +146,7 @@ pub fn instance_status_from_config_and_observation( SyncState::Synced, SyncState::Synced, SyncState::Synced, + SyncState::Synced, ) => SyncState::Synced, _ => SyncState::Pending, }; @@ -168,6 +178,7 @@ pub fn instance_status_from_config_and_observation( infiniband, extension_services, nvlink, + spx_status, configs_synced, reprovision_request, }) diff --git a/crates/api-model/src/rpc_conv/machine/mod.rs b/crates/api-model/src/rpc_conv/machine/mod.rs index a40bded8e9..f024055785 100644 --- a/crates/api-model/src/rpc_conv/machine/mod.rs +++ b/crates/api-model/src/rpc_conv/machine/mod.rs @@ -85,6 +85,7 @@ impl TryFrom for Option { .infiniband_status_observation .as_ref(), snapshot.host_snapshot.nvlink_status_observation.as_ref(), + snapshot.host_snapshot.spx_status_observation.as_ref(), )?; Ok(Some(rpc::Instance { @@ -283,6 +284,7 @@ impl From for rpc::forge::Machine { nvlink_status_observation: machine .nvlink_status_observation .map(|status| status.into()), + spx_status_observation: machine.spx_status_observation.map(|status| status.into()), placement_in_rack: Some(rpc::forge::PlacementInRack { slot_number: machine.slot_number, tray_index: machine.tray_index, diff --git a/crates/api-model/src/spx_partition.rs b/crates/api-model/src/spx_partition.rs new file mode 100644 index 0000000000..bdc4d70af5 --- /dev/null +++ b/crates/api-model/src/spx_partition.rs @@ -0,0 +1,169 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +use carbide_uuid::spx::SpxPartitionId; +use chrono::{DateTime, Utc}; +use config_version::ConfigVersion; +use rpc::errors::RpcDataConversionError; +use rpc::forge as rpc_forge; +use serde::{Deserialize, Serialize}; +use sqlx::postgres::PgRow; +use sqlx::{FromRow, Row}; + +use crate::tenant::TenantOrganizationId; + +#[derive(Clone, Debug, Default)] +pub struct SpxPartitionSearchFilter { + pub name: Option, + pub tenant_org_id: Option, +} + +impl From for SpxPartitionSearchFilter { + fn from(filter: rpc_forge::SpxPartitionSearchFilter) -> Self { + SpxPartitionSearchFilter { + name: filter.name, + tenant_org_id: filter.tenant_org_id, + } + } +} + +#[derive(Debug, Clone)] +pub struct NewSpxPartition { + pub id: SpxPartitionId, + pub name: String, + pub description: String, + pub tenant_organization_id: String, + pub vni: Option, +} + +impl TryFrom for NewSpxPartition { + type Error = RpcDataConversionError; + fn try_from(req: rpc_forge::SpxPartitionCreationRequest) -> Result { + if req.tenant_organization_id.is_empty() { + return Err(RpcDataConversionError::InvalidArgument( + "tenant_organization_id is required".to_string(), + )); + } + + let id = req.id.unwrap_or_else(|| uuid::Uuid::new_v4().into()); + + let (name, description) = req + .metadata + .map(|m| (m.name, m.description)) + .unwrap_or_default(); + + Ok(NewSpxPartition { + id, + name, + description, + tenant_organization_id: req.tenant_organization_id, + vni: req.vni.map(|v| v.try_into()).transpose().map_err( + |e: std::num::TryFromIntError| { + RpcDataConversionError::InvalidValue( + format!( + "`{}` cannot be converted to VNI", + req.vni.unwrap_or_default() + ), + e.to_string(), + ) + }, + )?, + }) + } +} + +impl TryFrom for rpc_forge::SpxPartition { + type Error = RpcDataConversionError; + fn try_from(src: SpxPartition) -> Result { + if src.vni.is_none() { + return Err(RpcDataConversionError::InvalidValue( + "VNI is required".to_string(), + "VNI is required".to_string(), + )); + } + let vni = src.vni.unwrap(); + Ok(rpc_forge::SpxPartition { + id: Some(src.id), + metadata: Some(rpc_forge::Metadata { + name: src.name, + description: src.description, + ..Default::default() + }), + tenant_organization_id: src.tenant_organization_id.to_string(), + vni: vni as u32, + }) + } +} + +#[derive(Debug, Clone)] +pub struct SpxPartition { + pub id: SpxPartitionId, + pub name: String, + pub description: String, + pub tenant_organization_id: TenantOrganizationId, + pub config_version: ConfigVersion, + pub vni: Option, + pub created: DateTime, + pub updated: DateTime, + pub deleted: Option>, +} + +/// Returns whether the SPX partition has been soft-deleted +pub fn is_marked_as_deleted(partition: &SpxPartition) -> bool { + partition.deleted.is_some() +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct SpxPartitionSnapshotPgJson { + pub id: SpxPartitionId, + pub name: String, + pub description: String, + pub tenant_organization_id: String, + pub config_version: ConfigVersion, + pub vni: Option, + pub created: DateTime, + pub updated: DateTime, + pub deleted: Option>, +} + +impl TryFrom for SpxPartition { + type Error = sqlx::Error; + fn try_from(value: SpxPartitionSnapshotPgJson) -> sqlx::Result { + let tenant_organization_id = + TenantOrganizationId::try_from(value.tenant_organization_id.clone()) + .map_err(|e| sqlx::Error::Decode(Box::new(e)))?; + + Ok(Self { + id: value.id, + name: value.name, + description: value.description, + tenant_organization_id, + config_version: value.config_version, + vni: value.vni, + created: value.created, + updated: value.updated, + deleted: value.deleted, + }) + } +} + +impl<'r> FromRow<'r, PgRow> for SpxPartitionSnapshotPgJson { + fn from_row(row: &'r PgRow) -> Result { + let json: serde_json::value::Value = row.try_get(0)?; + SpxPartitionSnapshotPgJson::deserialize(json).map_err(|err| sqlx::Error::Decode(err.into())) + } +} diff --git a/crates/api/Cargo.toml b/crates/api/Cargo.toml index f6f3218e11..368a664a52 100644 --- a/crates/api/Cargo.toml +++ b/crates/api/Cargo.toml @@ -81,7 +81,11 @@ askama_escape = { workspace = true } asn1-rs = { workspace = true } async-trait = { workspace = true } axum = { workspace = true, features = ["http2"] } -axum-extra = { workspace = true, features = ["cookie", "cookie-private", "typed-header"] } +axum-extra = { workspace = true, features = [ + "cookie", + "cookie-private", + "typed-header", +] } base64 = { workspace = true } byteorder = { workspace = true } bytes = { workspace = true } @@ -110,10 +114,10 @@ itertools = { workspace = true } jsonwebtoken = { features = ["rust_crypto"], workspace = true } k8s-openapi = { features = ["latest"], workspace = true } kube = { default-features = false, features = [ - "runtime", - "derive", - "client", - "rustls-tls", + "runtime", + "derive", + "client", + "rustls-tls", ], workspace = true } lazy_static = { workspace = true } libredfish = { workspace = true } @@ -126,13 +130,13 @@ opentelemetry = { workspace = true, features = ["logs"] } opentelemetry-otlp = { workspace = true, features = ["grpc-tonic"] } opentelemetry-prometheus.workspace = true opentelemetry-semantic-conventions = { features = [ - "semconv_experimental", + "semconv_experimental", ], workspace = true } opentelemetry_sdk = { workspace = true, features = [ - "logs", - "rt-tokio", - "spec_unstable_metrics_views", - "testing", + "logs", + "rt-tokio", + "spec_unstable_metrics_views", + "testing", ] } pkcs1 = { workspace = true } p256 = { workspace = true } @@ -141,8 +145,8 @@ prost-types = { workspace = true } rand = { workspace = true } regex = { workspace = true } reqwest = { default-features = false, features = [ - "rustls", - "stream", + "rustls", + "stream", ], workspace = true } rsa = { workspace = true } rumqttc = { workspace = true } @@ -154,15 +158,15 @@ serde_json = { workspace = true } serde_yaml = { workspace = true } sha2 = { workspace = true } sqlx = { workspace = true, features = [ - "runtime-tokio-rustls", - "mac_address", - "ipnetwork", - "uuid", - "migrate", - "postgres", - "chrono", - "macros", - "json", + "runtime-tokio-rustls", + "mac_address", + "ipnetwork", + "uuid", + "migrate", + "postgres", + "chrono", + "macros", + "json", ] } strum = { workspace = true } temp-dir = { workspace = true } @@ -178,16 +182,16 @@ tonic = { workspace = true } tonic-reflection = { workspace = true } tower = { workspace = true } tower-http = { features = [ - "add-extension", - "auth", - "normalize-path", + "add-extension", + "auth", + "normalize-path", ], workspace = true } tracing = { workspace = true } tracing-log = { workspace = true } tracing-opentelemetry = { workspace = true } tracing-subscriber = { features = [ - "env-filter", - "local-time", + "env-filter", + "local-time", ], workspace = true } tss-esapi = { optional = true, workspace = true } url = { features = ["serde"], workspace = true } @@ -214,7 +218,9 @@ rcgen = { workspace = true } carbide-macros = { path = "../macros" } carbide-sqlx-testing = { path = "../sqlx-testing", default-features = false } carbide-prost-builder = { path = "../prost-builder" } -carbide-nvlink-manager = { path = "../nvlink-manager", features = ["test-support"] } +carbide-nvlink-manager = { path = "../nvlink-manager", features = [ + "test-support", +] } carbide-redfish = { path = "../redfish", features = ["test-support"] } carbide-utils = { path = "../utils", features = ["test-support"] } state-controller = { path = "../state-controller", features = ["test-support"] } diff --git a/crates/api/src/api.rs b/crates/api/src/api.rs index 6eeed31538..e6b09d2ac7 100644 --- a/crates/api/src/api.rs +++ b/crates/api/src/api.rs @@ -261,6 +261,34 @@ impl Forge for Api { crate::handlers::vpc_peering::delete(self, request).await } + async fn create_spx_partition( + &self, + request: Request, + ) -> Result, Status> { + crate::handlers::spx_partition::create(self, request).await + } + + async fn delete_spx_partition( + &self, + request: Request, + ) -> Result, Status> { + crate::handlers::spx_partition::delete(self, request).await + } + + async fn find_spx_partition_ids( + &self, + request: Request, + ) -> Result, Status> { + crate::handlers::spx_partition::find_ids(self, request).await + } + + async fn find_spx_partitions_by_ids( + &self, + request: Request, + ) -> Result, Status> { + crate::handlers::spx_partition::find_by_ids(self, request).await + } + async fn find_ib_partition_ids( &self, request: Request, @@ -2740,16 +2768,6 @@ impl Forge for Api { crate::handlers::dpa::delete(self, request).await } - // set_dpa_network_observaction_status is for debugging purposes. - // In practice, the MQTT subscriber running in Carbide will update - // the observation status - async fn set_dpa_network_observation_status( - &self, - request: Request, - ) -> Result, Status> { - crate::handlers::dpa::set_dpa_network_observation_status(self, request).await - } - async fn create_bmc_user( &self, request: Request, diff --git a/crates/api/src/auth/internal_rbac_rules.rs b/crates/api/src/auth/internal_rbac_rules.rs index 57e2d2ab09..f0b97189f6 100644 --- a/crates/api/src/auth/internal_rbac_rules.rs +++ b/crates/api/src/auth/internal_rbac_rules.rs @@ -791,6 +791,10 @@ impl InternalRBACRules { x.perm("MlxAdminConfigSet", vec![ForgeAdminCLI]); x.perm("MlxAdminConfigSync", vec![ForgeAdminCLI]); x.perm("MlxAdminConfigCompare", vec![ForgeAdminCLI]); + x.perm("CreateSpxPartition", vec![ForgeAdminCLI, SiteAgent]); + x.perm("DeleteSpxPartition", vec![ForgeAdminCLI, SiteAgent]); + x.perm("FindSpxPartitionIds", vec![ForgeAdminCLI, SiteAgent]); + x.perm("FindSpxPartitionsByIds", vec![ForgeAdminCLI, SiteAgent]); x.perm("FindNVLinkPartitionIds", vec![ForgeAdminCLI, SiteAgent]); x.perm("FindNVLinkPartitionsByIds", vec![ForgeAdminCLI, SiteAgent]); x.perm("NVLinkPartitionsForTenant", vec![ForgeAdminCLI, SiteAgent]); diff --git a/crates/api/src/cfg/file.rs b/crates/api/src/cfg/file.rs index e2551a05a6..5e32cc8751 100644 --- a/crates/api/src/cfg/file.rs +++ b/crates/api/src/cfg/file.rs @@ -1199,7 +1199,7 @@ impl CarbideConfig { self.dpa_config.as_ref().map(|conf| conf.mqtt_broker_port) } - pub fn get_hb_interval(&self) -> Option { + pub fn get_hb_interval(&self) -> Option { self.dpa_config.as_ref().map(|conf| conf.hb_interval) } @@ -1596,8 +1596,12 @@ pub struct InitialObjectsConfig { } impl DpaConfig { - pub const fn default_hb_interval() -> chrono::Duration { - Duration::minutes(2) + pub const fn default_hb_interval() -> chrono::TimeDelta { + chrono::TimeDelta::minutes(2) + } + + pub const fn default_monitor_run_interval() -> std::time::Duration { + std::time::Duration::from_secs(60) } pub const fn default_subnet_ip() -> Ipv4Addr { @@ -1614,6 +1618,7 @@ impl Default for DpaConfig { subnet_ip: Self::default_subnet_ip(), subnet_mask: 0, hb_interval: Self::default_hb_interval(), + monitor_run_interval: Self::default_monitor_run_interval(), auth: MqttAuthConfig::default(), } } @@ -2560,6 +2565,14 @@ pub struct DpaConfig { )] pub hb_interval: chrono::TimeDelta, + /// The interval at which we run the DPA monitor. + #[serde( + default = "DpaConfig::default_monitor_run_interval", + deserialize_with = "deserialize_duration", + serialize_with = "as_std_duration" + )] + pub monitor_run_interval: std::time::Duration, + #[serde(default)] pub auth: MqttAuthConfig, } @@ -3806,7 +3819,8 @@ mqtt_endpoint = "mqtt.forge" enabled: true, mqtt_endpoint: "mqtt.forge".to_string(), mqtt_broker_port: 1884, - hb_interval: Duration::minutes(2), + hb_interval: chrono::TimeDelta::minutes(2), + monitor_run_interval: std::time::Duration::from_secs(60), subnet_ip: Ipv4Addr::UNSPECIFIED, subnet_mask: 0_i32, auth: MqttAuthConfig::default(), diff --git a/crates/api/src/cfg/test_data/initial_objects.toml b/crates/api/src/cfg/test_data/initial_objects.toml index 4e7bf4bde1..6d73e95543 100644 --- a/crates/api/src/cfg/test_data/initial_objects.toml +++ b/crates/api/src/cfg/test_data/initial_objects.toml @@ -31,4 +31,3 @@ prefix = "172.99.0.0/26" gateway = "172.99.0.1" mtu = 1500 reserve_first = 5 - diff --git a/crates/api/src/dpa/handler.rs b/crates/api/src/dpa/handler.rs index 74e60ecde6..f27e10d631 100644 --- a/crates/api/src/dpa/handler.rs +++ b/crates/api/src/dpa/handler.rs @@ -15,21 +15,30 @@ * limitations under the License. */ +/* + * This file contains code that interacts with the SVPC agent on the DPA + * using MQTT = Code to send commands via MQTT, code that handles messages + * received from the DPA via MQTT and code to start the MQTT client. + */ + use std::net::Ipv4Addr; use std::str::FromStr; use std::sync::Arc; use ::rpc::protos::dpa_rpc::{DpaMetadata, Pfvni, SetVni}; +use carbide_uuid::spx::NULL_SPX_PARTITION_ID; use config_version::ConfigVersion; +use db::ObjectColumnFilter; use mac_address::MacAddress; -use model::dpa_interface::DpaInterfaceNetworkStatusObservation; +use model::instance::config::spx::SpxAttachmentType; +use model::machine::machine_search_config::MachineSearchConfig; +use model::machine::spx::{MachineSpxAttachmentStatusObservation, MachineSpxStatusObservation}; use mqttea::client::{ClientOptions, MqtteaClient}; use mqttea::registry::traits::ProtobufRegistration; use rumqttc::QoS; use tokio::task::JoinSet; use tokio::time::{Duration, sleep}; use tokio_util::sync::CancellationToken; -use tracing::error; use crate::api::Api; @@ -40,18 +49,24 @@ pub struct DpaInfo { } // We just received a message from a DPA via the MQTT broker. Handle that message here. +// We figure out the DPA interface belonging to this message and update the observed +// status of the DPA in the machine's spx_status_observation field. async fn handle_dpa_message(services: Arc, message: SetVni, topic: String) { let tokens: Vec<&str> = topic.split("/").collect(); if tokens.len() < 3 { - error!("handle_dpa_message - unusable topic: {}", topic); + tracing::error!( + "handle_dpa_message: token len {} is unusable topic: {}", + tokens.len(), + topic + ); return; } let macaddr = match MacAddress::from_str(tokens[2]) { Ok(m) => m, Err(_e) => { - error!( - "handle_dpa_message - Unable to parse mac addr: {}", + tracing::error!( + "handle_dpa_message: Unable to parse mac addr: {}", tokens[2] ); return; @@ -59,8 +74,8 @@ async fn handle_dpa_message(services: Arc, message: SetVni, topic: String) }; if message.metadata.is_none() || message.pf_info.is_none() { - error!( - "handle_dpa_message - message metadata or pf_info is empty: {:#?}", + tracing::error!( + "handle_dpa_message: message metadata or pf_info is empty: {:#?}", message ); return; @@ -71,7 +86,7 @@ async fn handle_dpa_message(services: Arc, message: SetVni, topic: String) let mut txn = match services.database_connection.begin().await { Ok(t) => t, Err(e) => { - error!("handle_dpa_message - Unable to start txn: {:#?}", e); + tracing::error!("handle_dpa_message: Unable to start txn: {:#?}", e); return; } }; @@ -79,15 +94,17 @@ async fn handle_dpa_message(services: Arc, message: SetVni, topic: String) let mut dpa_ifs = match db::dpa_interface::find_by_mac_addr(txn.as_mut(), &macaddr).await { Ok(ifs) => ifs, Err(e) => { - error!("handle_dpa_message - Error from find_by_mac_addr {e}"); + tracing::error!( + "handle_dpa_message: Error for mac {macaddr} from find_by_mac_addr {:#?}", + e + ); return; } }; if dpa_ifs.len() != 1 { - error!( - "handle_dpa_message - invalid dpa_ifs len from find_by_mac_addr maddr: {} len: {}", - macaddr, + tracing::error!( + "handle_dpa_message: invalid dpa_ifs len from find_by_mac_addr maddr {macaddr} len {:#?}", dpa_ifs.len() ); return; @@ -99,33 +116,145 @@ async fn handle_dpa_message(services: Arc, message: SetVni, topic: String) let ncv = match ConfigVersion::from_str(&md.revision) { Ok(ncv) => ncv, Err(e) => { - error!( - "handle_dpa_message - Error parsing config version from DPA Ack msg {:#?} {e}", - message + tracing::error!( + "handle_dpa_message: Error parsing config version from DPA Ack msg {:#?} {:#?}", + message, + e ); ConfigVersion::invalid() } }; + // We checked that pf_info is not None above, so unwrap is safe. + // If vni is non-zero, then we are in a tenancy and the partition_id is not None. + // We need to get the partition_id correponding to this vni from the database. + let vni = message.pf_info.as_ref().unwrap().vni; + + let mut spx_partition_id = NULL_SPX_PARTITION_ID; + + if vni != 0 { + let partition = match db::spx_partition::find_by( + txn.as_mut(), + ObjectColumnFilter::List(db::spx_partition::VniColumn, &[vni]), + ) + .await + { + Ok(p) => p, + Err(e) => { + tracing::error!( + "handle_dpa_message: Error for vni {vni} from find_by_vni {:#?}", + e + ); + return; + } + }; + + if partition.len() != 1 { + tracing::error!("handle_dpa_message: SPX partition with vni {vni} is not found"); + return; + } + + let spx_partition = &partition[0]; + spx_partition_id = spx_partition.id; + + tracing::debug!( + "handle_dpa_message: SPX partition with vni {vni} found: {:#?}", + spx_partition + ); + } else { + tracing::debug!( + "handle_dpa_message: received vni 0 in DPA message {:#?}", + message + ); + } + let dpa_if = dpa_ifs.remove(0); - let observation = DpaInterfaceNetworkStatusObservation { + let at_status = MachineSpxAttachmentStatusObservation { + mac_address: macaddr, + partition_id: Some(spx_partition_id), + attachment_type: Some(SpxAttachmentType::Physical), // Only Physical attachments are supported at the moment + virtual_function_id: None, + config_version: Some(ncv), observed_at: chrono::Utc::now(), - network_config_version: Some(ncv), }; - match db::dpa_interface::update_network_observation(&dpa_if, &mut txn, &observation).await { + // Get the machine corresponding to the DPA interface. + // The machine entry needs to be obtained with FOR UPDATE to avoid race conditions. + let machine = match db::machine::find_one( + txn.as_mut(), + &dpa_if.machine_id, + MachineSearchConfig { + for_update: true, + ..Default::default() + }, + ) + .await + { + Ok(m) => m, + Err(e) => { + tracing::error!("handle_dpa_message: Error for machine {:#?}", e); + return; + } + }; + + if machine.is_none() { + tracing::error!( + "handle_dpa_message: Machine not found for DPA interface {:#?}", + dpa_if + ); + return; + } + + let machine = machine.unwrap(); + + let cur_spx_status_observations = machine.spx_status_observation.unwrap_or_default(); + let mut new_spx_status_observations = MachineSpxStatusObservation::default(); + + let mut add_new_observation = true; + + for obs in cur_spx_status_observations.spx_attachments.iter() { + if obs.mac_address != macaddr { + new_spx_status_observations + .spx_attachments + .push(obs.clone()); + } else if obs.observed_at < at_status.observed_at { + new_spx_status_observations + .spx_attachments + .push(at_status.clone()); + add_new_observation = false; + } + } + + if add_new_observation { + new_spx_status_observations + .spx_attachments + .push(at_status.clone()); + } + + match db::machine::update_spx_status_observation( + &mut txn, + &dpa_if.machine_id, + &new_spx_status_observations, + ) + .await + { Ok(_r) => { let res = txn.commit().await; if res.is_err() { - error!( - "handle_dpa_message - txn commit error for msg: {:#?} res: {:#?}", - message, res + tracing::error!( + "handle_dpa_message: txn commit error for msg {:#?} res {:#?}", + message, + res ); } } Err(e) => { - error!("handle_dpa_message - update_network_observation error: {e}"); + tracing::error!( + "handle_dpa_message: update_network_observation error for msg {:#?} {:#?}", + message, + e + ); } } } @@ -170,9 +299,11 @@ pub async fn send_dpa_command( println!("send_dpa_command revision: {revision} vni: {vni}"); } Err(e) => { - error!( - "send_dpa_command - error: {:#?} sending message: {:#?} to topic: {}", - e, svni, topic + tracing::error!( + "send_dpa_command: error: {:#?} sending message: {:#?} to topic: {}", + e, + svni, + topic ); return Err(eyre::eyre!("send_message error: {e}")); } @@ -234,7 +365,7 @@ pub async fn start_dpa_handler( }) .await { - println!("handle_dpa_message failed: {e}"); + tracing::error!("handle_dpa_message failed: {e}"); } } }) diff --git a/crates/api/src/dpa_monitor/metrics.rs b/crates/api/src/dpa_monitor/metrics.rs new file mode 100644 index 0000000000..10fc6f3174 --- /dev/null +++ b/crates/api/src/dpa_monitor/metrics.rs @@ -0,0 +1,146 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +use std::fmt; +use std::fmt::Display; +use std::time::Duration; + +use ::carbide_utils::metrics::SharedMetricsHolder; +use opentelemetry::metrics::{Counter, Histogram, Meter}; + +/// Metrics that are gathered in a single dpa monitor run +#[derive(Clone, Debug)] +pub struct DpaMonitorMetrics { + /// Start time of metrics gathering + pub recording_started_at: std::time::Instant, + pub num_machines_scanned: usize, + pub num_instances_scanned: usize, + pub num_dpa_interfaces_scanned: usize, + pub num_heartbeats_sent: usize, +} + +impl DpaMonitorMetrics { + pub fn new() -> Self { + Self { + recording_started_at: std::time::Instant::now(), + num_machines_scanned: 0, + num_instances_scanned: 0, + num_dpa_interfaces_scanned: 0, + num_heartbeats_sent: 0, + } + } +} + +impl Display for DpaMonitorMetrics { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{{ machines_scanned: {}, instances_scanned: {}, duration: {} }}", + self.num_machines_scanned, + self.num_instances_scanned, + self.recording_started_at.elapsed().as_millis(), + ) + } +} + +/// Stores Metric data shared between the dpa monitor and the OpenTelemetry background task +pub struct MetricHolder { + instruments: DpaMonitorInstruments, + last_iteration_metrics: SharedMetricsHolder, +} + +impl MetricHolder { + pub fn new(meter: Meter, hold_period: Duration) -> Self { + let last_iteration_metrics = SharedMetricsHolder::with_hold_period(hold_period); + let instruments = DpaMonitorInstruments::new(meter, last_iteration_metrics.clone()); + instruments.init_counters_and_histograms(); + Self { + instruments, + last_iteration_metrics, + } + } + + /// Updates the most recent metrics + pub fn update_metrics(&self, metrics: DpaMonitorMetrics) { + // Emit the last recent latency metrics + self.instruments.emit_counters_and_histograms(&metrics); + self.last_iteration_metrics.update(metrics); + } +} + +/// Instruments that are used by pub struct DpaMonitor +#[allow(dead_code)] +pub struct DpaMonitorInstruments { + pub iteration_latency: Histogram, + pub operations_latency: Histogram, + pub dpa_config_apply_latency: Histogram, + pub heartbeats_sent: Counter, +} + +impl DpaMonitorInstruments { + pub fn new(meter: Meter, shared_metrics: SharedMetricsHolder) -> Self { + let iteration_latency = meter + .f64_histogram("carbide_dpa_monitor_iteration_latency") + .with_description("Time consumed for one monitor iteration") + .with_unit("ms") + .build(); + let dpa_config_apply_latency = meter + .f64_histogram("carbide_dpa_monitor_dpa_config_apply_latency") + .with_description("Time since dpa config was requested for this instance") + .with_unit("ms") + .build(); + let operations_latency = meter + .f64_histogram("carbide_dpa_monitor_operations_latency") + .with_description("Time consumed for one operations") + .with_unit("ms") + .build(); + let heartbeats_sent = meter + .u64_counter("carbide_dpa_monitor_heartbeats_sent") + .with_description("The number of heartbeats sent to DPA interfaces") + .build(); + + meter + .u64_observable_gauge("carbide_dpa_monitor_interfaces_scanned_count") + .with_description("The number of DPA interfaces scanned in the last monitor iteration") + .with_callback(move |o| { + shared_metrics.if_available(|metrics, attrs| { + o.observe(metrics.num_dpa_interfaces_scanned as u64, attrs); + }) + }) + .build(); + + Self { + iteration_latency, + dpa_config_apply_latency, + operations_latency, + heartbeats_sent, + } + } + + fn init_counters_and_histograms(&self) { + self.heartbeats_sent.add(0, &[]); + } + + fn emit_counters_and_histograms(&self, metrics: &DpaMonitorMetrics) { + self.iteration_latency.record( + 1000.0 * metrics.recording_started_at.elapsed().as_secs_f64(), + &[], + ); + self.heartbeats_sent + .add(metrics.num_heartbeats_sent as u64, &[]); + } +} diff --git a/crates/api/src/dpa_monitor/mod.rs b/crates/api/src/dpa_monitor/mod.rs new file mode 100644 index 0000000000..a479045ce3 --- /dev/null +++ b/crates/api/src/dpa_monitor/mod.rs @@ -0,0 +1,915 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +use std::collections::HashMap; +use std::io; +use std::sync::Arc; +use std::time::Duration; + +use carbide_utils::periodic_timer::PeriodicTimer; +use carbide_uuid::machine::MachineId; +use carbide_uuid::spx::NULL_SPX_PARTITION_ID; +use chrono::TimeDelta; +use db::db_read::PgPoolReader; +use db::work_lock_manager::WorkLockManagerHandle; +use db::{self, ObjectColumnFilter, TransactionVending}; +use metrics::DpaMonitorMetrics; +use model::dpa_interface::DpaLockMode::{Locked, Unlocked}; +use model::dpa_interface::{DpaInterface, DpaInterfaceControllerState}; +use model::instance::snapshot::InstanceSnapshot; +use model::machine::machine_search_config::MachineSearchConfig; +use model::machine::{HostHealthConfig, LoadSnapshotOptions, Machine, ManagedHostStateSnapshot}; +use mqttea::client::MqtteaClient; +use sqlx::{PgConnection, PgPool, PgTransaction}; +use tokio::task::JoinSet; +use tokio_util::sync::CancellationToken; +use tracing::Instrument; + +use crate::cfg::file::DpaConfig; +use crate::dpa::handler::DpaInfo; +use crate::{CarbideError, CarbideResult}; + +mod metrics; + +pub struct DpaMonitor { + db_services: DbServices, + dpa_info: Option>, + config: DpaConfig, + host_health: HostHealthConfig, + metric_holder: Arc, + work_lock_manager_handle: WorkLockManagerHandle, +} + +pub struct DbServices { + db_pool: PgPool, +} + +// This carries the result running the handler for a single dpa interface. +// If the dpa interface needs to a new state, the new state is returned. +// If we started a transaction in the handler, the transaction is returned. +pub struct HandlerResult { + new_state: Option, + txn: Option>, +} + +impl DpaMonitor { + const ITERATION_WORK_KEY: &'static str = "DpaMonitor::run_single_iteration"; + + pub fn new( + db_pool: PgPool, + _db_reader: PgPoolReader, + dpa_info: Option>, + _meter: opentelemetry::metrics::Meter, + config: DpaConfig, + host_health: HostHealthConfig, + work_lock_manager_handle: WorkLockManagerHandle, + ) -> Self { + let hold_period = config + .monitor_run_interval + .saturating_add(std::time::Duration::from_secs(60)); + + let metric_holder = Arc::new(metrics::MetricHolder::new(_meter, hold_period)); + + Self { + db_services: DbServices { db_pool }, + dpa_info, + config, + host_health, + work_lock_manager_handle, + metric_holder, + } + } + + pub fn start( + mut self, + join_set: &mut JoinSet<()>, + cancel_token: CancellationToken, + ) -> io::Result<()> { + join_set + .build_task() + .name("dpa-monitor") + .spawn(async move { self.run(cancel_token).await })?; + + Ok(()) + } + + pub async fn run(&mut self, cancel_token: CancellationToken) { + let timer = PeriodicTimer::new(self.config.monitor_run_interval); + loop { + let mut tick = timer.tick(); + match self.run_single_iteration().await { + Ok(num_changes) => { + if num_changes > 0 { + // Decrease the interval if changes have been made. + tick.set_interval(Duration::from_millis(1000)); + } + } + Err(e) => { + tracing::warn!("DpaMonitor error: {}", e); + } + } + + tokio::select! { + _ = tick.sleep() => {}, + _ = cancel_token.cancelled() => { + tracing::info!("DpaMonitor stop was requested"); + return; + } + } + } + } + + pub async fn run_single_iteration(&mut self) -> CarbideResult { + let mut metrics = DpaMonitorMetrics::new(); + let span_id: String = format!("{:#x}", u64::from_le_bytes(rand::random::<[u8; 8]>())); + let check_dpa_span = tracing::span!( + parent: None, + tracing::Level::INFO, + "dpa-monitor", + span_id, + ); + let result = self + .run_single_iteration_inner(&mut metrics) + .instrument(check_dpa_span.clone()) + .await; + check_dpa_span.record("metrics", metrics.to_string()); + self.metric_holder.update_metrics(metrics); + result + } + + async fn run_single_iteration_inner( + &mut self, + metrics: &mut DpaMonitorMetrics, + ) -> CarbideResult { + let _lock = match self + .work_lock_manager_handle + .try_acquire_lock(Self::ITERATION_WORK_KEY.into()) + .await + { + Ok(lock) => lock, + Err(e) => { + tracing::warn!( + "DpaMonitor failed to acquire work lock: Another instance of carbide running? {e}" + ); + return Ok(0); + } + }; + tracing::info!( + lock = Self::ITERATION_WORK_KEY, + "DpaMonitor acquired the lock", + ); + + let mut txn = self.db_services.db_pool.txn_begin().await?; + + let mut snapshots = match self.get_all_snapshots(&mut txn).await { + Ok(snapshots) => snapshots, + Err(e) => { + tracing::error!(error = %e, "run_single_iteration_inner: Failed to load ManagedHost snapshots in IbFabricMonitor"); + // Record the same error for all fabrics, so that the problem is at least visible on dashboards + return Err(e); + } + }; + + txn.commit().await?; + + for mh in snapshots.values_mut() { + metrics.num_machines_scanned += 1; + + // If the machine does not have any dpa interfaces, we can skip it. + if mh.dpa_interface_snapshots.is_empty() { + tracing::info!("run_single_iteration_inner: skipping, no dpa interfaces"); + continue; + } + + // If the machine is an instance, increment the number of instances scanned. + if mh.instance.is_some() { + metrics.num_instances_scanned += 1; + } + + for idx in 0..mh.dpa_interface_snapshots.len() { + metrics.num_dpa_interfaces_scanned += 1; + + let controller_state = mh.dpa_interface_snapshots[idx].controller_state.clone(); + + // Look at this DPA interface and see if we need to transition it to a new state. + // This will return a new state if we need to transition to a new state, or None if we can stay in the current state. + // We build an array of dpa interfaces and new state. + // After examining all the dpa interfaces in all the machines, we will update the DB with the new states in another loop + let handler_result = self.handle_dpa_interface(mh, idx, metrics).await?; + + let new_state = handler_result.new_state; + let txn = handler_result.txn; + + if let Some(new_state) = new_state { + let new_version = controller_state.version.increment(); + + let mut txn = + match txn { + Some(t) => t, + None => self.db_services.db_pool.begin().await.map_err(|e| { + db::AnnotatedSqlxError::new("dpa_monitor begin txn", e) + })?, + }; + + db::dpa_interface::try_update_controller_state( + &mut txn, + mh.dpa_interface_snapshots[idx].id, + controller_state.version, + new_version, + &new_state, + ) + .await?; + + txn.commit() + .await + .map_err(|e| db::AnnotatedSqlxError::new("dpa_monitor commit txn", e))?; + } else if let Some(txn) = txn { + txn.commit() + .await + .map_err(|e| db::AnnotatedSqlxError::new("dpa_monitor commit txn", e))?; + } + } + } + + Ok(0) + } + + // This function will be called when the DPA object is in Assigned state. + // We need to make sure that the partitioning configuration of the NIC is in sync with + // the desired state. It's possible we are moving from Ready state to Assigned state. + // In this case, we need to send SetVNI command to move the NIC into the desired partition. + // If we were already in Assigned state, and the user changed the SpxConfig using the + // UpdateInstanceConfig API, we need to send SetVNI command to move the NIC into the new partition. + // or remove the NIC from any partition. + // The desired state will be in instance.spx_config field. The observed state will be in the + // NIC's network_status_observation field. + #[allow(clippy::too_many_arguments)] + async fn reconcile_assigned_state<'a>( + &mut self, + dpa_interface: &mut DpaInterface, + machine: &Machine, + instance: &InstanceSnapshot, + client: Arc, + dpa_info: &Arc, + hb_interval: TimeDelta, + metrics: &mut DpaMonitorMetrics, + ) -> CarbideResult>> { + let db_services = &self.db_services; + + let this_mac = dpa_interface.mac_address; + + let spx_config = instance.config.spxconfig.clone(); + + let instance_version = instance.spx_config_version; + let nic_version = dpa_interface.network_config.version.to_string(); + + let mut need_creation = false; + let mut need_deletion = false; + let mut need_heartbeat = false; + + let mut vni = 0_u32; + + let mut this_nic_configured_attachments = spx_config + .spx_attachments + .iter() + .filter(|a| a.mac_address == Some(this_mac.to_string())) + .collect::>(); + + if this_nic_configured_attachments.len() > 1 { + tracing::error!( + "reconcile_assigned_state: this_nic_configured_attachments length is greater than 1" + ); + return Err(CarbideError::InvalidArgument( + "reconcile_assigned_state this_nic_configured_attachments length is greater than 1" + .to_string(), + )); + } + + let mut this_nic_observed_attachments = Vec::new(); + + let observed = machine.spx_status_observation.clone(); + if let Some(observed) = observed { + this_nic_observed_attachments = observed + .spx_attachments + .into_iter() + .filter(|a| a.mac_address == this_mac) + .collect::>(); + } + + if this_nic_observed_attachments.len() > 1 { + tracing::error!( + "reconcile_assigned_state this_nic_observed_attachments length is greater than 1" + ); + return Err(CarbideError::InvalidArgument( + "reconcile_assigned_state this_nic_observed_attachments length is greater than 1" + .to_string(), + )); + } + + let mut txn = + db_services.db_pool.begin().await.map_err(|e| { + db::AnnotatedSqlxError::new("reconcile_assigned_state begin txn", e) + })?; + + if this_nic_configured_attachments.is_empty() { + if !this_nic_observed_attachments.is_empty() { + need_deletion = true; + } + } else { + let partition_id = this_nic_configured_attachments.remove(0).spx_partition_id; + let partition = db::spx_partition::find_by( + txn.as_mut(), + ObjectColumnFilter::List(db::spx_partition::IdColumn, &[partition_id]), + ) + .await?; + if partition.len() != 1 { + tracing::error!( + "reconcile_assigned_state SPX partition {partition_id} is not found" + ); + return Err(CarbideError::InvalidArgument(format!( + "SPX partition {partition_id} is not found", + ))); + } + + vni = partition[0].vni.unwrap_or(0) as u32; + debug_assert_ne!(vni, 0, "VNI in SPX partition {partition_id} is 0"); + + if !this_nic_observed_attachments.is_empty() { + let observed_attachment = this_nic_observed_attachments.remove(0); + + if (observed_attachment.partition_id != Some(partition_id)) + || (observed_attachment.config_version != Some(instance_version)) + { + need_creation = true; + } else { + need_heartbeat = true; + } + } else { + need_creation = true; + } + } + + if !need_creation && !need_deletion && !need_heartbeat { + return Ok(None); + } + + debug_assert_eq!( + (need_creation as u8) + (need_deletion as u8) + (need_heartbeat as u8), + 1, + "reconcile_assigned_state: at most one of need_creation, need_deletion, need_heartbeat should be set" + ); + + tracing::debug!( + "[{}] reconcile_assigned_state: need_creation {need_creation}, need_deletion {need_deletion}, need_heartbeat {need_heartbeat}", + chrono::Utc::now() + ); + + if need_creation { + let txn = self + .send_set_vni_command( + dpa_interface, + client, + dpa_info, + vni, + false, + instance_version.to_string(), + ) + .await?; + return Ok(txn); + } else if need_deletion { + let txn = self + .send_set_vni_command(dpa_interface, client, dpa_info, 0_u32, false, nic_version) + .await?; + return Ok(txn); + } else if need_heartbeat { + let txn = self + .do_heartbeat(dpa_interface, client, dpa_info, hb_interval, vni, metrics) + .await?; + return Ok(txn); + } + + Ok(None) + } + + // This function will be called when the DPA object is in Ready state. + // We need to make sure that the partitioning configuration of the NIC is in sync with + // the desired state. + async fn reconcile_ready_state<'a>( + &mut self, + machine: &Machine, + dpa_interface: &mut DpaInterface, + client: Arc, + dpa_info: &Arc, + hb_interval: TimeDelta, + metrics: &mut DpaMonitorMetrics, + ) -> CarbideResult>> { + let nic_version = dpa_interface.network_config.version; + let nic_version_str = nic_version.to_string(); + + let mut need_deletion = false; + let mut need_heartbeat = false; + + let this_mac = dpa_interface.mac_address; + + let observed = machine.spx_status_observation.clone(); + + let mut this_nic_observed_attachments = Vec::new(); + + if let Some(observed) = observed { + this_nic_observed_attachments = observed + .spx_attachments + .into_iter() + .filter(|a| a.mac_address == this_mac) + .collect::>(); + } + + if this_nic_observed_attachments.len() > 1 { + tracing::error!( + "reconcile_assigned_state this_nic_observed_attachments length is greater than 1" + ); + return Err(CarbideError::InvalidArgument( + "reconcile_assigned_state this_nic_observed_attachments length is greater than 1" + .to_string(), + )); + } + + if this_nic_observed_attachments.is_empty() { + return Ok(None); + } + + let observed_attachment = this_nic_observed_attachments.remove(0).clone(); + + if (observed_attachment.partition_id != Some(NULL_SPX_PARTITION_ID)) + || (observed_attachment.config_version != Some(nic_version)) + { + need_deletion = true; + } else { + need_heartbeat = true; + } + + tracing::debug!( + "[{}] reconcile_ready_state: need_deletion {need_deletion}, need_heartbeat {need_heartbeat}", + chrono::Utc::now() + ); + + if need_deletion { + let txn = self + .send_set_vni_command( + dpa_interface, + client, + dpa_info, + 0_u32, + false, + nic_version_str, + ) + .await?; + return Ok(txn); + } else if need_heartbeat { + let txn = self + .do_heartbeat(dpa_interface, client, dpa_info, hb_interval, 0_u32, metrics) + .await?; + return Ok(txn); + } + + Ok(None) + } + + // This should return a txn if we started one, an indication of whether state is changing, + // and if so, the new state. + // We should: + // 1. Go through the state transitions for the card. + // 2. Send heartbeats in Ready and Assigned states if necessary. + // 3. If the DPA is in ASSIGNED state, go through the attachments. + // 4. If we are not an instance, then, we need to do ResetVNI. + // 5. If we are an instance, then, we need to do SetVNI. + // 6. We need a way for machine statehandler to determine if congig is done. + async fn handle_dpa_interface( + &mut self, + mh: &mut ManagedHostStateSnapshot, + idx: usize, + metrics: &mut DpaMonitorMetrics, + ) -> CarbideResult { + let dpa_interface = &mut mh.dpa_interface_snapshots[idx]; + + let hb_interval = self.config.hb_interval; + + let dpa_info = self.dpa_info.clone().unwrap(); + + let host_use_admin_network = dpa_interface.use_admin_network(); + + let controller_state = dpa_interface.controller_state.value.clone(); + match controller_state { + DpaInterfaceControllerState::Provisioning => { + if host_use_admin_network { + return Ok(HandlerResult { + new_state: None, + txn: None, + }); + } + + let new_state = DpaInterfaceControllerState::Ready; + tracing::info!(state = ?new_state, "Dpa Interface state transition"); + Ok(HandlerResult { + new_state: Some(new_state), + txn: None, + }) + } + + DpaInterfaceControllerState::Ready => { + // We will stay in Ready state as long use_admin_network is true. + // When an instance is created from this host, use_admin_network + // will be turned off. We then need to SetVNI, and wait for the + // SetVNI to take effect. + + let client = dpa_info + .mqtt_client + .clone() + .ok_or_else(|| eyre::eyre!("Missing mqtt_client"))?; + + if !host_use_admin_network { + // We are in the process of transitioning to an instance. + // So go through the unlock/apply firmware/lock sequence + let new_state = DpaInterfaceControllerState::Unlocking; + tracing::info!(state = ?new_state, "Dpa Interface state transition"); + + Ok(HandlerResult { + new_state: Some(new_state), + txn: None, + }) + } else { + // When we are in the Ready state, we need to make sure that there are no VNIs configured on the NICs. + // If an instance has just been released and we transition to Ready state, we need to reset the VNIs on the NICs to 0. + // The reconciliation routine will send SetVNI command with VNI being 0, as long as the observed state is different from the desired state. + // If the observed state is the same as the desired state, we can stay in the Assigned state and we + // will send heartbeat commands to keep the states in sync. + + let txn = self + .reconcile_ready_state( + &mh.host_snapshot, + dpa_interface, + client, + &dpa_info, + hb_interval, + metrics, + ) + .await?; + + Ok(HandlerResult { + new_state: None, + txn, + }) + } + } + + DpaInterfaceControllerState::Unlocking => { + // Once we reach Unlocking state, we would have replied to + // ForgeAgentControl requests from scout with a reply indicating + // that it should unlock the card. The scout does the action, and + // publishes an observation indicating the lock status. That causes + // us to update the card state in the DB. If card_state is none, that + // means this sequence has not yet taken place. So we just wait. + if dpa_interface.card_state.is_none() { + tracing::info!("card_state none for dpa: {:#?}", dpa_interface.id); + return Ok(HandlerResult { + new_state: None, + txn: None, + }); + } + if let Some(ref mut cs) = dpa_interface.card_state + && cs.lockmode == Some(Unlocked) + { + let new_state = DpaInterfaceControllerState::ApplyFirmware; + tracing::info!(state = ?new_state, "Interface unlocked. Transitioning to next state"); + return Ok(HandlerResult { + new_state: Some(new_state), + txn: None, + }); + } + Ok(HandlerResult { + new_state: None, + txn: None, + }) + } + + DpaInterfaceControllerState::ApplyFirmware => { + // At this point, we're in the ApplyFirmware state, which means we + // have sent a firmware flash instruction to scout (via a configured + // FirmwareFlasherProfile). Now, we wait for an observation report + // from scout indicating firmware has been applied (or skipped if no + // config was available). + let Some(ref card_state) = dpa_interface.card_state else { + tracing::info!( + "no firmware report, because card_state none for dpa: {:#?}, waiting for retry", + dpa_interface.id + ); + return Ok(HandlerResult { + new_state: None, + txn: None, + }); + }; + if let Some(ref firmware_report) = card_state.firmware_report { + // Transition on to the next state if the flash succeeded and reset + // either wasn't requested (None) or succeeded (Some(true)). + // + // To explain this a bit better, if no reset was requested, then + // we'll get None back here. Since no reset was requested at all, + // then we can continue, so we just "default" to true, to let + // things continue. If a reset WAS requested, then we'll unwrap + // whatever the result was (either success/true, or failed/false). + let reset_ok = firmware_report.reset.unwrap_or(true); + if firmware_report.flashed && reset_ok { + let new_state = DpaInterfaceControllerState::ApplyProfile; + tracing::info!( + state = ?new_state, + observed_version = firmware_report.observed_version.as_deref().unwrap_or("none"), + "firmware report received and successfully applied, transitioning" + ); + return Ok(HandlerResult { + new_state: Some(new_state), + txn: None, + }); + } + tracing::warn!( + flashed = firmware_report.flashed, + reset = ?firmware_report.reset, + observed_version = firmware_report.observed_version.as_deref().unwrap_or("none"), + "firmware report received but not successful, waiting for retry" + ); + } + + // ..if we get here, it's because the firmware_report in the CardState + // wasn't set yet. ...or it was, and this round wasn't successful, so we're + // just going to keep hanging out in this state until it is (letting the + // apply workflow happen again). + Ok(HandlerResult { + new_state: None, + txn: None, + }) + } + + DpaInterfaceControllerState::ApplyProfile => handle_apply_profile(dpa_interface), + + DpaInterfaceControllerState::Locking => { + let Some(ref cs) = dpa_interface.card_state else { + tracing::error!( + "Unexpected - card_state none for dpa: {:#?}", + dpa_interface.id + ); + return Ok(HandlerResult { + new_state: None, + txn: None, + }); + }; + if cs.lockmode == Some(Locked) { + let new_state = DpaInterfaceControllerState::Assigned; + tracing::info!(state = ?new_state, "Dpa Interface state transition"); + return Ok(HandlerResult { + new_state: Some(new_state), + txn: None, + }); + } + Ok(HandlerResult { + new_state: None, + txn: None, + }) + } + + DpaInterfaceControllerState::Assigned => { + // We will stay in the Assigned state as long as use_admin_network is off, which + // means we are in the tenant network. Once use_admin_network is turned on, we + // will send a SetVNI command to the DPA Interface card to set the VNI to 0 + // and will transition to WaitingForResetVNI state. + + let client = dpa_info + .mqtt_client + .clone() + .ok_or_else(|| eyre::eyre!("Missing mqtt_client"))?; + + if host_use_admin_network { + let new_state = DpaInterfaceControllerState::Ready; + tracing::info!(state = ?new_state, "Dpa Interface state transition"); + Ok(HandlerResult { + new_state: Some(new_state), + txn: None, + }) + } else { + // When we are in the Assigned state, we need to make sure the NICs are configured + // with the correct VNI. We have to reconcile the desired state (as specified in the + // spx_config field of the instance) with the observed state of the NIC in the + // network_status_observation field of the DpaInterface. Send SetVNI command to the NIC + // to set the VNI to the desired value if the observed state is different from the desired state. + // If the observed state is the same as the desired state, we can stay in the Assigned state and we + // will send heartbeat commands to keep the states in sync. + + let instance = mh.instance.as_ref().ok_or_else(|| { + tracing::error!("reconcile_assigned_state instance is missing"); + eyre::eyre!("reconcile_assigned_state instance is missing") + })?; + let txn = self + .reconcile_assigned_state( + dpa_interface, + &mh.host_snapshot, + instance, + client, + &dpa_info, + hb_interval, + metrics, + ) + .await?; + + Ok(HandlerResult { + new_state: None, + txn, + }) + } + } + } + } + + async fn get_all_snapshots( + &self, + txn: &mut PgConnection, + ) -> CarbideResult> { + let machine_ids = db::machine::find_machine_ids( + &mut *txn, + MachineSearchConfig { + include_predicted_host: true, + ..Default::default() + }, + ) + .await?; + + let mut res = db::managed_host::load_by_machine_ids( + txn, + &machine_ids, + LoadSnapshotOptions { + include_history: false, + include_instance_data: true, + host_health_config: self.host_health, + }, + ) + .await + .map_err(Into::::into)?; + + for mh in res.values_mut() { + let machine_id = mh.host_snapshot.id; + let dpa_snapshots = db::dpa_interface::find_by_machine_id(&mut *txn, machine_id) + .await + .map_err(Into::::into)?; + mh.dpa_interface_snapshots = dpa_snapshots; + } + + Ok(res) + } + + // Determine if we need to do a heartbeat or if we need to + // send a SetVni command because the DPA and Carbide are out of sync. + // If so, call send_set_vni_command to send the heart beat or set vni + async fn do_heartbeat<'a>( + &mut self, + state: &mut DpaInterface, + client: Arc, + dpa_info: &Arc, + hb_interval: TimeDelta, + vni: u32, + metrics: &mut DpaMonitorMetrics, + ) -> CarbideResult>> { + // We are in the Ready or Assigned state and we continue to be in the same state. + // In this state, we will send SetVni command to the DPA if + // (1) if the heartbeat interval has elapsed since the heartbeat + // (2) The DPA sent us an ack and it looks like the DPA lost its config (due to powercycle potentially) + // Heartbeat is identified by the revision being se to the sentinel value "NIL" + // When we send a heartbeat below, we update the last_hb_time for the interface entry. + + // XXX TODO XXX + // Verify with the FW team how the card behaves if it loses its config after a powercyle. + // If we send it a heartbeat with NIL as the revision, but with a valid VNI (since its a part + // of a tenancy), will it echo back the VNI? Or does the reply alway carry whatever VNI it is using? + // If it just echoes back the VNI, we have to send it a SetVni command with the VNI to use. + // XXX TODO XXX + + let Some(next_hb_time) = state.last_hb_time.checked_add_signed(hb_interval) else { + // checked_add_signed returns None if the addition overflows + return Ok(None); + }; + + if chrono::Utc::now() < next_hb_time { + return Ok(None); + } + + let txn = self + .send_set_vni_command(state, client, dpa_info, vni, true, "NIL".to_string()) + .await?; + + metrics.num_heartbeats_sent += 1; + + Ok(txn) + } + + // Send a SetVni command to the DPA. The SetVni command could be a heart beat (identified by + // revision being "NIL"). If needs_vni is true, get the VNI to use from the DB. Otherwise, vni + // sent is 0. + async fn send_set_vni_command<'a>( + &mut self, + state: &mut DpaInterface, + client: Arc, + dpa_info: &Arc, // dpa_info contains the subnet_ip and subnet_mask to use for the SetVni command + vni: u32, + heart_beat: bool, + revision_str: String, + ) -> CarbideResult>> { + let services = &self.db_services; + + // Send a heartbeat command, indicated by the revision string being "NIL". + match crate::dpa::handler::send_dpa_command( + client, + dpa_info, + state.mac_address.to_string(), + revision_str, + vni as i32, + ) + .await + { + Ok(()) => { + if heart_beat { + let mut txn = + services.db_pool.begin().await.map_err(|e| { + db::AnnotatedSqlxError::new("dpa_monitor hb begin txn", e) + })?; + let res = db::dpa_interface::update_last_hb_time(state, &mut txn).await; + if res.is_err() { + tracing::error!( + "Error updating last_hb_time for dpa id: {} res: {:#?}", + state.id, + res + ); + } + Ok(Some(txn)) + } else { + Ok(None) + } + } + Err(_e) => Ok(None), + } + } +} + +/// handle_apply_profile handles the ApplyProfile state for a +/// SuperNIC/DPA interface, which means we sent an mlxconfig +/// profile config down to scout (which takes care of resetting +/// mlxconfig parameters back to defaults, and then potentially +/// overlaying a profile of parameters over top of it). +/// +/// And just so it's clear, there are two "success" cases that +/// we check for here. +/// 1. A profile was configured and successfully synced — scout +/// reports a profile_name and profile_synced is true. +/// 2. NO profile was configured (indicating reset only) — scout +/// reports profile_name=None and profile_synced true. This is +/// successful because the reset itself succeeded and there was +/// nothing else to apply. +/// +/// In both cases, profile_synced=Some(true) is the signal that +/// the workflow completed successfully, and it's safe to transition +/// to the next state. +fn handle_apply_profile(state: &DpaInterface) -> CarbideResult { + let Some(ref cs) = state.card_state else { + tracing::info!( + "no profile report, because card_state none for dpa: {:#?}, waiting for retry", + state.id + ); + return Ok(HandlerResult { + new_state: None, + txn: None, + }); + }; + if cs.profile_synced == Some(true) { + let new_state = DpaInterfaceControllerState::Locking; + tracing::info!( + state = ?new_state, + profile = cs.profile.as_deref().unwrap_or("none"), + "profile applied successfully, transitioning" + ); + return Ok(HandlerResult { + new_state: Some(new_state), + txn: None, + }); + } + Ok(HandlerResult { + new_state: None, + txn: None, + }) +} diff --git a/crates/api/src/handlers/dpa.rs b/crates/api/src/handlers/dpa.rs index f8f2bc61ee..e7b16411a1 100644 --- a/crates/api/src/handlers/dpa.rs +++ b/crates/api/src/handlers/dpa.rs @@ -25,8 +25,7 @@ use eyre::eyre; use libmlx::device::report::MlxDeviceReport; use libmlx::profile::serialization::SerializableProfile; use model::dpa_interface::{ - CardState, DpaInterface, DpaInterfaceControllerState, DpaInterfaceNetworkStatusObservation, - DpaLockMode, NewDpaInterface, + CardState, DpaInterface, DpaInterfaceControllerState, DpaLockMode, NewDpaInterface, }; use rpc::forge_agent_control_response as fac; use rpc::forge_agent_control_response::MlxDeviceAction; @@ -182,47 +181,6 @@ pub(crate) async fn find_dpa_interfaces_by_ids( })) } -// XXX TODO XXX -// Remove before final commit -// XXX TODO XXX -pub(crate) async fn set_dpa_network_observation_status( - api: &Api, - request: Request<::rpc::forge::DpaNetworkObservationSetRequest>, -) -> Result, Status> { - log_request_data(&request); - - let req = request.into_inner(); - - let id = req.id.ok_or(CarbideError::InvalidArgument( - "at least one ID must be provided to find_dpa_interfaces_by_ids".to_string(), - ))?; - - // Prepare our txn to grab the dpa interfaces from the DB - let mut txn = api.txn_begin().await?; - - let dpa_ifs_int = db::dpa_interface::find_by_ids(&mut txn, &[id], false).await?; - - if dpa_ifs_int.len() != 1 { - return Err(CarbideError::InvalidArgument( - "ID could not be used to locate interface".to_string(), - ) - .into()); - } - - let dpa_if_int = dpa_ifs_int[0].clone(); - - let observation = DpaInterfaceNetworkStatusObservation { - observed_at: chrono::Utc::now(), - network_config_version: Some(dpa_if_int.network_config.version), - }; - - db::dpa_interface::update_network_observation(&dpa_if_int, &mut txn, &observation).await?; - - txn.commit().await?; - - Ok(Response::new(dpa_if_int.into())) -} - // Scout is asking us what it should do. We found the machine in DpaProvisioning state. // So look at each DPA interface and make it progress through the state machine. // If there is work to be done, return an MLX action with per-device commands. @@ -253,9 +211,7 @@ pub(crate) async fn process_scout_req( let dpa_cmd = match cstate { DpaInterfaceControllerState::Provisioning | DpaInterfaceControllerState::Ready - | DpaInterfaceControllerState::WaitingForSetVNI - | DpaInterfaceControllerState::Assigned - | DpaInterfaceControllerState::WaitingForResetVNI => continue, + | DpaInterfaceControllerState::Assigned => continue, // We are in the Assigned state, so we don't need to do anything DpaInterfaceControllerState::Unlocking => { build_unlock_command(api, sn, machine_id, pci_name).await? @@ -278,7 +234,7 @@ pub(crate) async fn process_scout_req( Ok(action) => device_actions.push(action), Err(e) => { // Would only happen if the op is an ApplyProfile command with invalid YAML - tracing::info!("process_scout_req Error encoding DpaCommand for dpa: {e}"); + tracing::error!("process_scout_req Error encoding DpaCommand for dpa: {e}"); } } } @@ -305,6 +261,7 @@ async fn build_unlock_command( })?; tracing::info!(%machine_id, %pci_name, "Unlocking DPA"); + Ok(DpaCommand { op: OpCode::Unlock { key }, }) @@ -525,6 +482,17 @@ async fn process_mlx_observation( ))); } + if rep.observations.is_empty() { + tracing::error!( + "process_mlx_observation no observations in report: {:#?}", + rep + ); + return Err(CarbideError::GenericErrorFromReport(eyre!( + "process_mlx_observation no observations in report: {:#?}", + rep + ))); + } + for obs in rep.observations { let Some(devinfo) = obs.device_info else { tracing::error!( diff --git a/crates/api/src/handlers/health.rs b/crates/api/src/handlers/health.rs index cdea0a4c9b..1b59e15c0c 100644 --- a/crates/api/src/handlers/health.rs +++ b/crates/api/src/handlers/health.rs @@ -158,6 +158,7 @@ pub async fn remove_machine_health_report( let rpc::RemoveMachineHealthReportRequest { machine_id, source } = request.into_inner(); let machine_id = convert_and_log_machine_id(machine_id.as_ref())?; + remove_by_source(&mut txn, machine_id, source).await?; txn.commit().await?; diff --git a/crates/api/src/handlers/instance.rs b/crates/api/src/handlers/instance.rs index 18e8a49201..5b190207e7 100644 --- a/crates/api/src/handlers/instance.rs +++ b/crates/api/src/handlers/instance.rs @@ -36,6 +36,7 @@ use model::instance::config::extension_services::InstanceExtensionServicesConfig use model::instance::config::infiniband::InstanceInfinibandConfig; use model::instance::config::network::{InstanceNetworkConfig, NetworkDetails}; use model::instance::config::nvlink::InstanceNvLinkConfig; +use model::instance::config::spx::InstanceSpxConfig; use model::instance::config::tenant_config::TenantConfig; use model::instance::snapshot::InstanceSnapshot; use model::machine::machine_search_config::MachineSearchConfig; @@ -52,7 +53,8 @@ use crate::api::{Api, log_machine_id, log_request_data, log_tenant_organization_ use crate::handlers::utils::convert_and_log_machine_id; use crate::instance::{ InstanceAllocationRequest, allocate_ib_port_guid, allocate_instance, allocate_network, - validate_ib_partition_ownership, validate_os_definition_usable, + allocate_spx_port_mac, validate_ib_partition_ownership, validate_os_definition_usable, + validate_spx_partition_ownership, }; use crate::{CarbideError, CarbideResult}; @@ -1103,6 +1105,8 @@ pub(crate) async fn update_instance_config( Some(config) => config.try_into().map_err(CarbideError::from)?, }; + println!("SPX updaete_instance_config config: {:?}", config.spxconfig); + // Network validation is done only if network update is requested. config .validate( @@ -1292,6 +1296,13 @@ pub(crate) async fn update_instance_config( ); update_instance_nvlink_config(&mh_snapshot, &instance, &config.nvlink, &mut txn).await?; + tracing::debug!( + "Updating instance {} with Spx config {:?}", + instance.id, + config.spxconfig + ); + update_instance_spx_config(&mh_snapshot, &instance, &mut config.spxconfig, &mut txn).await?; + db::instance::update_config(&mut txn, instance.id, expected_version, config, metadata).await?; let mh_snapshot = db::managed_host::load_snapshot( @@ -1683,6 +1694,59 @@ pub async fn update_instance_nvlink_config( Ok(()) } +pub async fn update_instance_spx_config( + mh_snapshot: &ManagedHostStateSnapshot, + instance: &InstanceSnapshot, + spxcfg: &mut InstanceSpxConfig, + txn: &mut sqlx::Transaction<'_, sqlx::Postgres>, +) -> Result<(), CarbideError> { + let mid = instance.machine_id; + + let mut mh_snapshot = mh_snapshot.clone(); + + if !instance + .config + .spxconfig + .is_spx_config_update_requested(spxcfg) + { + println!("SPX update_instance_spx_config is_spx_config_update_requested is false"); + return Ok(()); + } + + if !matches!( + mh_snapshot.managed_state, + ManagedHostState::Assigned { + instance_state: InstanceState::Ready, + } + ) { + println!("SPX update_instance_spx_config not Assigned"); + return Err(ConfigValidationError::InvalidState.into()); + } + + if instance.deleted.is_some() { + println!("SPX update_instance_spx_config instance deleted"); + return Err(ConfigValidationError::InstanceDeletionIsRequested.into()); + } + + let dpa_interfaces = db::dpa_interface::find_by_machine_id(txn.as_mut(), mid).await?; + + mh_snapshot.dpa_interface_snapshots = dpa_interfaces; + + validate_spx_partition_ownership(txn, &instance.config.tenant.tenant_organization_id, spxcfg) + .await?; + + let spx_config_with_ports = allocate_spx_port_mac(spxcfg, &mh_snapshot)?; + *spxcfg = spx_config_with_ports; + + // Update config in db. + db::instance::update_spx_config(txn, instance.id, instance.spx_config_version, spxcfg, true) + .await?; + + println!("SPX update_instance_spx_config updating config in db done"); + + Ok(()) +} + async fn unbind_all_instance_ib_ports( api: &Api, instance: &InstanceSnapshot, diff --git a/crates/api/src/handlers/mod.rs b/crates/api/src/handlers/mod.rs index dab6e1cd58..977f9a47af 100644 --- a/crates/api/src/handlers/mod.rs +++ b/crates/api/src/handlers/mod.rs @@ -73,6 +73,7 @@ pub mod route_server; pub mod scout_stream; pub mod site_explorer; pub mod sku; +pub mod spx_partition; pub mod switch; mod switch_artifacts; pub mod tenant; diff --git a/crates/api/src/handlers/spx_partition.rs b/crates/api/src/handlers/spx_partition.rs new file mode 100644 index 0000000000..c2b8f4c581 --- /dev/null +++ b/crates/api/src/handlers/spx_partition.rs @@ -0,0 +1,314 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +use ::rpc::forge as rpc; +use db::resource_pool::ResourcePoolDatabaseError; +use db::{ObjectColumnFilter, WithTransaction, spx_partition}; +use futures_util::FutureExt; +use model::resource_pool; +use model::spx_partition::NewSpxPartition; +use sqlx::PgConnection; +use tonic::{Request, Response, Status}; + +use crate::CarbideError; +use crate::api::{Api, log_request_data, log_tenant_organization_id}; + +async fn allocate_dpa_vni( + api: &Api, + txn: &mut PgConnection, + owner_id: &str, + requested_vni: Option, +) -> Result { + let source_pool = &api.common_pools.ethernet.pool_dpa_vni; + + match db::resource_pool::allocate( + source_pool, + txn, + resource_pool::OwnerType::SpxPartition, + owner_id, + requested_vni, + ) + .await + { + Ok(val) => Ok(val), + Err(ResourcePoolDatabaseError::ResourcePool(resource_pool::ResourcePoolError::Empty)) => { + tracing::error!( + owner_id, + pool = source_pool.name(), + "Pool exhausted, cannot allocate" + ); + Err(CarbideError::ResourceExhausted(format!( + "pool {}", + source_pool.name + ))) + } + Err(ResourcePoolDatabaseError::Database(e)) if requested_vni.is_some() => Err(match *e { + db::DatabaseError::FailedPrecondition(_s) => { + tracing::error!( + owner_id, + pool = source_pool.name(), + value = requested_vni, + "invalid pool value requested, cannot allocate" + ); + CarbideError::FailedPrecondition(format!( + "VNI `{}` cannot be requested or is already allocated", + requested_vni.unwrap_or_default() + )) + } + e => e.into(), + }), + Err(err) => { + tracing::error!(owner_id, error = %err, pool = source_pool.name, "Error allocating from resource pool"); + Err(err.into()) + } + } +} + +pub(crate) async fn create( + api: &Api, + request: Request, +) -> Result, Status> { + log_request_data(&request); + + let request_inner = request.into_inner(); + log_tenant_organization_id(&request_inner.tenant_organization_id); + + let req = NewSpxPartition::try_from(request_inner)?; + + let mut txn = api.txn_begin().await?; + + let vni = allocate_dpa_vni(api, &mut txn, &req.id.to_string(), req.vni).await?; + + let partition = db::spx_partition::create(&req, vni, &mut txn) + .await + .map_err(CarbideError::from)?; + let resp = rpc::SpxPartition::try_from(partition).map(Response::new)?; + txn.commit().await?; + Ok(resp) +} + +pub(crate) async fn delete( + api: &Api, + request: Request, +) -> Result, Status> { + log_request_data(&request); + + let id = request + .into_inner() + .id + .ok_or_else(|| CarbideError::MissingArgument("id"))?; + + let mut txn = api.txn_begin().await?; + + let resp = api + .with_txn(|txn| db::spx_partition::mark_as_deleted(id, txn).boxed()) + .await? + .map_err(CarbideError::from)?; + + if let Some(vni) = resp.vni { + db::resource_pool::release(&api.common_pools.ethernet.pool_dpa_vni, &mut txn, vni) + .await + .map_err(CarbideError::from)?; + } + + txn.commit().await?; + + Ok(Response::new(rpc::SpxPartitionDeletionResult {})) +} + +pub(crate) async fn find_ids( + api: &Api, + request: Request, +) -> Result, Status> { + log_request_data(&request); + + let rpc_filter = request.into_inner(); + if let Some(ref tenant_org_id) = rpc_filter.tenant_org_id { + log_tenant_organization_id(tenant_org_id); + } + + let filter: model::spx_partition::SpxPartitionSearchFilter = rpc_filter.into(); + let spx_partition_ids = db::spx_partition::find_ids(&api.database_connection, filter).await?; + + Ok(Response::new(rpc::SpxPartitionIdList { spx_partition_ids })) +} + +pub(crate) async fn find_by_ids( + api: &Api, + request: Request, +) -> Result, Status> { + log_request_data(&request); + + let rpc::SpxPartitionsByIdsRequest { + spx_partition_ids, .. + } = request.into_inner(); + + let max_find_by_ids = api.runtime_config.max_find_by_ids as usize; + if spx_partition_ids.len() > max_find_by_ids { + return Err(CarbideError::InvalidArgument(format!( + "no more than {max_find_by_ids} IDs can be accepted" + )) + .into()); + } else if spx_partition_ids.is_empty() { + return Err( + CarbideError::InvalidArgument("at least one ID must be provided".to_string()).into(), + ); + } + + let mut reader = api.db_reader(); + let partitions = db::spx_partition::find_by( + reader.as_mut(), + ObjectColumnFilter::List(spx_partition::IdColumn, &spx_partition_ids), + ) + .await + .map_err(CarbideError::from)?; + + let mut spx_partitions = Vec::with_capacity(partitions.len()); + for p in partitions { + spx_partitions.push(p.try_into()?); + } + + Ok(Response::new(rpc::SpxPartitionList { spx_partitions })) +} + +#[cfg(test)] +mod tests { + use ::rpc::forge as rpc; + use model::spx_partition::NewSpxPartition; + + #[test] + fn test_create_spx_partition_valid_request() { + let request = rpc::SpxPartitionCreationRequest { + metadata: Some(rpc::Metadata { + name: "test-partition".to_string(), + description: "A test SPX partition".to_string(), + ..Default::default() + }), + id: None, + vni: Some(100), + tenant_organization_id: "tenant-org-123".to_string(), + }; + + let result = NewSpxPartition::try_from(request); + assert!(result.is_ok()); + + let partition = result.unwrap(); + assert_eq!(partition.name, "test-partition"); + assert_eq!(partition.description, "A test SPX partition"); + assert_eq!(partition.tenant_organization_id, "tenant-org-123"); + assert_eq!(partition.vni, Some(100)); + } + + #[test] + fn test_create_spx_partition_missing_tenant_organization_id() { + let request = rpc::SpxPartitionCreationRequest { + metadata: Some(rpc::Metadata { + name: "test-partition".to_string(), + description: "A test SPX partition".to_string(), + ..Default::default() + }), + id: None, + vni: Some(100), + tenant_organization_id: String::new(), + }; + + let result = NewSpxPartition::try_from(request); + assert!(result.is_err()); + + let err = result.unwrap_err(); + assert!(err.to_string().contains("tenant_organization_id")); + } + + #[test] + fn test_create_spx_partition_auto_generates_id() { + let request = rpc::SpxPartitionCreationRequest { + metadata: Some(rpc::Metadata { + name: "test-partition".to_string(), + description: "".to_string(), + ..Default::default() + }), + id: None, + vni: None, + tenant_organization_id: "tenant-org-123".to_string(), + }; + + let result = NewSpxPartition::try_from(request); + assert!(result.is_ok()); + + let partition = result.unwrap(); + assert_ne!(partition.id, carbide_uuid::spx::SpxPartitionId::nil()); + } + + #[test] + fn test_create_spx_partition_uses_provided_id() { + let provided_id: carbide_uuid::spx::SpxPartitionId = uuid::Uuid::new_v4().into(); + + let request = rpc::SpxPartitionCreationRequest { + metadata: Some(rpc::Metadata { + name: "test-partition".to_string(), + description: "".to_string(), + ..Default::default() + }), + id: Some(provided_id), + vni: None, + tenant_organization_id: "tenant-org-123".to_string(), + }; + + let result = NewSpxPartition::try_from(request); + assert!(result.is_ok()); + + let partition = result.unwrap(); + assert_eq!(partition.id, provided_id); + } + + #[test] + fn test_create_spx_partition_without_metadata() { + let request = rpc::SpxPartitionCreationRequest { + metadata: None, + id: None, + vni: Some(200), + tenant_organization_id: "tenant-org-456".to_string(), + }; + + let result = NewSpxPartition::try_from(request); + assert!(result.is_ok()); + + let partition = result.unwrap(); + assert!(partition.name.is_empty()); + assert!(partition.description.is_empty()); + assert_eq!(partition.vni, Some(200)); + } + + #[test] + fn test_create_spx_partition_without_vni() { + let request = rpc::SpxPartitionCreationRequest { + metadata: Some(rpc::Metadata { + name: "no-vni-partition".to_string(), + description: "".to_string(), + ..Default::default() + }), + id: None, + vni: None, + tenant_organization_id: "tenant-org-789".to_string(), + }; + + let result = NewSpxPartition::try_from(request); + assert!(result.is_ok()); + + let partition = result.unwrap(); + assert!(partition.vni.is_none()); + } +} diff --git a/crates/api/src/instance/mod.rs b/crates/api/src/instance/mod.rs index 639ef4d734..b67bfb32b4 100644 --- a/crates/api/src/instance/mod.rs +++ b/crates/api/src/instance/mod.rs @@ -15,6 +15,7 @@ * limitations under the License. */ +use std::cmp::Ordering; use std::collections::{HashMap, HashSet}; use ::rpc::errors::RpcDataConversionError; @@ -24,6 +25,7 @@ use carbide_uuid::infiniband::IBPartitionId; use carbide_uuid::instance::InstanceId; use carbide_uuid::instance_type::InstanceTypeId; use carbide_uuid::machine::MachineId; +use carbide_uuid::spx::SpxPartitionId; use carbide_uuid::vpc::VpcPrefixId; use config_version::ConfigVersion; use db::{ @@ -33,6 +35,7 @@ use db::{ use ipnetwork::IpNetwork; use itertools::Itertools; use model::ConfigValidationError; +use model::dpa_interface::DpaInterface; use model::hardware_info::InfinibandInterface; use model::instance::NewInstance; use model::instance::config::InstanceConfig; @@ -40,6 +43,7 @@ use model::instance::config::infiniband::InstanceInfinibandConfig; use model::instance::config::network::{ InstanceNetworkConfig, InterfaceFunctionId, NetworkDetails, }; +use model::instance::config::spx::{InstanceSpxConfig, SpxAttachmentType}; use model::machine::machine_search_config::MachineSearchConfig; use model::machine::{ HostHealthConfig, LoadSnapshotOptions, Machine, ManagedHostStateSnapshot, NotAllocatableReason, @@ -652,6 +656,14 @@ pub async fn batch_allocate_instances( ) .await?; + for mid in &machine_ids { + let dpa_interfaces = db::dpa_interface::find_by_machine_id(&mut txn, *mid).await?; + let machine_snapshot = snapshot_map.get(mid).unwrap(); + let mut machine_snapshot = machine_snapshot.clone(); + machine_snapshot.dpa_interface_snapshots = dpa_interfaces; + snapshot_map.insert(*mid, machine_snapshot.clone()); + } + // Verify all snapshots were loaded and validate usability for request in &requests { let machine_id = request.machine_id; @@ -826,6 +838,19 @@ pub async fn batch_allocate_instances( batch_validate_ib_partition_ownership(&mut txn, &ib_partition_validations).await?; + let spx_partition_validations: Vec<_> = requests + .iter() + .flat_map(|r| { + r.config.spxconfig.spx_attachments.iter().map(|attachment| { + ( + attachment.spx_partition_id, + &r.config.tenant.tenant_organization_id, + ) + }) + }) + .collect(); + batch_validate_spx_partition_ownership(&mut txn, &spx_partition_validations).await?; + // Batch query inband segments for all machines let inband_segments_map = db::instance_network_config::batch_get_inband_segments_by_machine_ids( @@ -927,6 +952,7 @@ pub async fn batch_allocate_instances( let extension_services_config_version = ConfigVersion::initial(); let config_version = ConfigVersion::initial(); let nvl_config_version = ConfigVersion::initial(); + let spx_config_version = ConfigVersion::initial(); let new_instances: Vec> = processed_requests .iter() @@ -941,6 +967,7 @@ pub async fn batch_allocate_instances( ib_config_version, extension_services_config_version, nvlink_config_version: nvl_config_version, + spx_config_version, }) .collect(); @@ -964,6 +991,11 @@ pub async fn batch_allocate_instances( ConfigVersion, model::instance::config::nvlink::InstanceNvLinkConfig, )> = Vec::with_capacity(request_count); + let mut spx_config_updates: Vec<( + carbide_uuid::instance::InstanceId, + ConfigVersion, + model::instance::config::spx::InstanceSpxConfig, + )> = Vec::with_capacity(request_count); for (request, mh_snapshot) in &processed_requests { let instance_id = request.instance_id; @@ -1009,6 +1041,9 @@ pub async fn batch_allocate_instances( nvl_config_version, request.config.nvlink.clone(), )); + + let updated_spx_config = allocate_spx_port_mac(&request.config.spxconfig, mh_snapshot)?; + spx_config_updates.push((instance_id, spx_config_version, updated_spx_config)); } // ==== Phase 8: Batch update configs ==== @@ -1031,6 +1066,12 @@ pub async fn batch_allocate_instances( .collect(); db::instance::batch_update_nvlink_config(&mut txn, &nvlink_refs, false).await?; + let spx_refs: Vec<_> = spx_config_updates + .iter() + .map(|(id, ver, cfg)| (*id, *ver, cfg)) + .collect(); + db::instance::batch_update_spx_config(&mut txn, &spx_refs, false).await?; + // ==== Phase 9: Load final instances ==== let machine_id_refs: Vec<&MachineId> = processed_requests .iter() @@ -1065,6 +1106,55 @@ pub async fn batch_allocate_instances( Ok(snapshots) } +/// Batch validate SPX partition ownership for multiple (partition_id, tenant_id) pairs +pub async fn batch_validate_spx_partition_ownership( + txn: &mut PgConnection, + validations: &[(SpxPartitionId, &TenantOrganizationId)], +) -> CarbideResult<()> { + if validations.is_empty() { + tracing::info!("batch_validate_spx_partition_ownership validations is empty"); + return Ok(()); + } + + // Batch query all unique partitions + let unique_partition_ids: Vec<_> = validations + .iter() + .map(|(id, _)| *id) + .collect::>() + .into_iter() + .collect(); + + let partitions = db::spx_partition::find_by( + txn, + ObjectColumnFilter::List(db::spx_partition::IdColumn, &unique_partition_ids), + ) + .await?; + + let partition_map: HashMap<_, _> = partitions.into_iter().map(|p| (p.id, p)).collect(); + + // Validate each partition ownership + for (partition_id, expected_tenant) in validations { + let partition = partition_map.get(partition_id).ok_or_else(|| { + tracing::error!( + "batch_validate_spx_partition_ownership partition not found: {partition_id}" + ); + ConfigValidationError::invalid_value(format!( + "SPX partition {partition_id} is not created" + )) + })?; + + if &partition.tenant_organization_id != *expected_tenant { + tracing::error!( + "batch_validate_spx_partition_ownership partition not owned by the tenant: {partition_id}" + ); + return Err(CarbideError::InvalidArgument(format!( + "SPX Partition {partition_id} is not owned by the tenant {expected_tenant}", + ))); + } + } + Ok(()) +} + /// Batch validate IB partition ownership for multiple (partition_id, tenant_id) pairs pub async fn batch_validate_ib_partition_ownership( txn: &mut PgConnection, @@ -1121,6 +1211,132 @@ pub async fn validate_ib_partition_ownership( batch_validate_ib_partition_ownership(txn, &validations).await } +pub async fn validate_spx_partition_ownership( + txn: &mut sqlx::Transaction<'_, sqlx::Postgres>, + instance_tenant: &TenantOrganizationId, + spxcfg: &InstanceSpxConfig, +) -> Result<(), CarbideError> { + for attachment in &spxcfg.spx_attachments { + let partition_id = attachment.spx_partition_id; + + let partition = db::spx_partition::find_by( + txn.as_mut(), + ObjectColumnFilter::List(db::spx_partition::IdColumn, &[partition_id]), + ) + .await?; + if partition.len() != 1 { + return Err(CarbideError::InvalidArgument(format!( + "SPX partition {partition_id} is not found", + ))); + } + let spx_partition = &partition[0]; + if spx_partition.tenant_organization_id != *instance_tenant { + return Err(CarbideError::InvalidArgument(format!( + "SPX partition {partition_id} is not owned by the tenant {instance_tenant}", + ))); + } + } + + Ok(()) +} + +/// sort spx device by slot and add devices with the same name are added to hashmap +pub fn sort_spx_by_slot(spx_hw_info_vec: &[DpaInterface]) -> HashMap> { + let mut spx_hw_map = HashMap::new(); + let mut sorted_spx_hw_info_vec = spx_hw_info_vec.to_owned(); + sorted_spx_hw_info_vec.sort_by(|a, b| a.pci_name.cmp(&b.pci_name)); + + for spx in sorted_spx_hw_info_vec { + if let Some(device) = &spx.device_description.clone() { + let entry: &mut Vec = spx_hw_map.entry(device.clone()).or_default(); + entry.push(spx); + } else { + tracing::info!( + "sort_spx_by_slot device_description is not found: {:#?}", + spx + ); + } + } + + spx_hw_map +} + +/// Allocate SPX port MAC addresses +pub fn allocate_spx_port_mac( + spx_config: &InstanceSpxConfig, + mh_snapshot: &ManagedHostStateSnapshot, +) -> CarbideResult { + let mut updated_spx_config = spx_config.clone(); + + tracing::debug!( + "allocate_spx_port_mac dev len: {:#?}", + mh_snapshot.dpa_interface_snapshots.len() + ); + + let mut seen_device_instances = HashSet::new(); + for att in &updated_spx_config.spx_attachments { + if !seen_device_instances.insert((att.device.clone(), att.device_instance)) { + tracing::error!( + "allocate_spx_port_mac duplicate SPX attachment for device {} instance {}", + att.device, + att.device_instance + ); + return Err(CarbideError::InvalidArgument(format!( + "duplicate SPX attachment for device {} instance {}", + att.device, att.device_instance, + ))); + } + } + + // Process higher `device_instance` indices first so removing a consumed interface from + // `sorted_spxs` does not shift indices still needed for lower instances on the same device. + updated_spx_config + .spx_attachments + .sort_unstable_by(|a, b| match a.device.cmp(&b.device) { + Ordering::Equal => b.device_instance.cmp(&a.device_instance), + o => o, + }); + + let mut spx_hw_map = sort_spx_by_slot(mh_snapshot.dpa_interface_snapshots.as_ref()); + + for spx_attachment in &mut updated_spx_config.spx_attachments { + if spx_attachment.attachment_type == SpxAttachmentType::Virtual { + tracing::error!("allocate_spx_port_mac SPX attachment type Virtual is not supported"); + return Err(CarbideError::InvalidArgument( + "SPX attachment type Virtual is not supported".to_string(), + )); + } + if let Some(sorted_spxs) = spx_hw_map.get_mut(&spx_attachment.device) { + if let Some(spx_interface) = sorted_spxs.get(spx_attachment.device_instance as usize) { + spx_attachment.mac_address = Some(spx_interface.mac_address.to_string()); + sorted_spxs.remove(spx_attachment.device_instance as usize); + } else { + tracing::error!( + "allocate_spx_port_mac SPX device {} has no instance {}", + spx_attachment.device, + spx_attachment.device_instance + ); + return Err(CarbideError::InvalidArgument(format!( + "SPX device {} has no instance {}", + spx_attachment.device, spx_attachment.device_instance, + ))); + } + } else { + tracing::error!( + "allocate_spx_port_mac No SPX device with name {} in machine {}", + spx_attachment.device, + mh_snapshot.host_snapshot.id + ); + return Err(CarbideError::InvalidArgument(format!( + "No SPX device with name {} in machine {}", + spx_attachment.device, mh_snapshot.host_snapshot.id, + ))); + } + } + + Ok(updated_spx_config) +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/api/src/lib.rs b/crates/api/src/lib.rs index 9f0d7a9ab9..f831a71a4d 100644 --- a/crates/api/src/lib.rs +++ b/crates/api/src/lib.rs @@ -41,6 +41,7 @@ mod credentials; mod db_init; mod dhcp; mod dpa; +mod dpa_monitor; mod dpf; mod dpf_services; mod dynamic_settings; diff --git a/crates/api/src/setup.rs b/crates/api/src/setup.rs index dac272cfac..5333cdf5da 100644 --- a/crates/api/src/setup.rs +++ b/crates/api/src/setup.rs @@ -64,6 +64,7 @@ use crate::api::Api; use crate::api::metrics::ApiMetricsEmitter; use crate::cfg::file::{CarbideConfig, InitialObjectsConfig, ListenMode}; use crate::dpa::handler::{DpaInfo, start_dpa_handler}; +use crate::dpa_monitor::DpaMonitor; use crate::dynamic_settings::DynamicSettings; use crate::errors::CarbideError; use crate::handlers::machine_validation::apply_config_on_startup; @@ -79,8 +80,6 @@ use crate::rack::bms_client::BmsDsxExchangeHandle; use crate::scout_stream::ConnectionRegistry; use crate::state_controller::common_services::CommonStateHandlerServices; use crate::state_controller::controller::{Enqueuer, StateController}; -use crate::state_controller::dpa_interface::handler::DpaInterfaceStateHandler; -use crate::state_controller::dpa_interface::io::DpaInterfaceStateControllerIO; use crate::state_controller::ib_partition::handler::IBPartitionStateHandler; use crate::state_controller::ib_partition::io::IBPartitionStateControllerIO; use crate::state_controller::machine::handler::MachineStateHandlerBuilder; @@ -910,24 +909,6 @@ pub async fn initialize_and_start_controllers<'a>( let downloader = FirmwareDownloader::new(); let upload_limiter = Arc::new(Semaphore::new(carbide_config.firmware_global.max_uploads)); - let mut dpa_info: Option> = None; - - if carbide_config.is_dpa_enabled() { - let mqtt_client = - Some(start_dpa_handler(join_set, api_service.clone(), cancel_token.clone()).await?); - let subnet_ip = carbide_config.get_dpa_subnet_ip()?; - - let subnet_mask = carbide_config.get_dpa_subnet_mask()?; - - let info: DpaInfo = DpaInfo { - subnet_ip, - subnet_mask, - mqtt_client, - }; - - dpa_info = Some(Arc::new(info)); - } - // Create state change emitter with DSX Exchange Event Bus hook if enabled let state_change_emitter = { let mut emitter_builder = StateChangeEmitterBuilder::default(); @@ -1003,7 +984,6 @@ pub async fn initialize_and_start_controllers<'a>( emitter_builder.build() }; - let handler_services = Arc::new(CommonStateHandlerServices { db_pool: db_pool.clone(), db_reader: db_pool.clone().into(), @@ -1012,7 +992,6 @@ pub async fn initialize_and_start_controllers<'a>( ib_pools: common_pools.infiniband.clone(), ipmi_tool: ipmi_tool.clone(), site_config: carbide_config.clone(), - dpa_info, rms_client: rms_client.clone(), switch_system_image_rms_client: carbide_config .rms @@ -1131,19 +1110,6 @@ pub async fn initialize_and_start_controllers<'a>( .build_and_spawn(join_set, cancel_token.clone()) .expect("Unable to build NetworkSegmentController"); - if carbide_config.is_dpa_enabled() { - tracing::info!("Starting DpaInterfaceStateController as dpa is enabled"); - StateController::::builder() - .database(db_pool.clone(), work_lock_manager_handle.clone()) - .meter("carbide_dpa_interfaces", meter.clone()) - .processor_id(state_controller_id.clone()) - .services(handler_services.clone()) - .iteration_config((&carbide_config.dpa_interface_state_controller.controller).into()) - .state_handler(Arc::new(DpaInterfaceStateHandler::new())) - .build_and_spawn(join_set, cancel_token.clone()) - .expect("Unable to build DpaInterfaceStateController"); - } - if carbide_config.spdm.enabled { let Some(nras_config) = carbide_config.spdm.nras_config.clone() else { return Err(eyre::eyre!( @@ -1230,6 +1196,34 @@ pub async fn initialize_and_start_controllers<'a>( ) .start(join_set, cancel_token.clone())?; + if carbide_config.is_dpa_enabled() { + let mqtt_client = + Some(start_dpa_handler(join_set, api_service.clone(), cancel_token.clone()).await?); + + let subnet_ip = carbide_config.get_dpa_subnet_ip()?; + + let subnet_mask = carbide_config.get_dpa_subnet_mask()?; + + let info: DpaInfo = DpaInfo { + subnet_ip, + subnet_mask, + mqtt_client, + }; + + let dpa_info = Some(Arc::new(info)); + + DpaMonitor::new( + db_pool.clone(), + db_pool.clone().into(), + dpa_info, + meter.clone(), + carbide_config.dpa_config.clone().unwrap_or_default(), + carbide_config.host_health, + work_lock_manager_handle.clone(), + ) + .start(join_set, cancel_token.clone())?; + } + SiteExplorer::new( db_pool.clone(), carbide_config.site_explorer.clone(), @@ -1286,6 +1280,8 @@ pub async fn initialize_and_start_controllers<'a>( ) .await?; + tracing::info!("initialize_and_start_controllers: all controllers initialized and started"); + Ok(()) } diff --git a/crates/api/src/state_controller/common_services.rs b/crates/api/src/state_controller/common_services.rs index 40840bb90e..75e9abcc00 100644 --- a/crates/api/src/state_controller/common_services.rs +++ b/crates/api/src/state_controller/common_services.rs @@ -29,7 +29,6 @@ use model::resource_pool::common::IbPools; use sqlx::PgPool; use crate::cfg::file::CarbideConfig; -use crate::dpa::handler::DpaInfo; use crate::rack::rms_client::SwitchSystemImageRmsClient; use crate::state_controller::external_service_error::redfish_client_creation_error; use crate::state_controller::state_handler::StateHandlerError; @@ -59,8 +58,6 @@ pub struct CommonStateHandlerServices { /// Access to the site config pub site_config: Arc, - pub dpa_info: Option>, - /// Rack Manager Service client pub rms_client: Option>, diff --git a/crates/api/src/state_controller/dpa_interface/handler.rs b/crates/api/src/state_controller/dpa_interface/handler.rs deleted file mode 100644 index 9cda9d9bff..0000000000 --- a/crates/api/src/state_controller/dpa_interface/handler.rs +++ /dev/null @@ -1,607 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -//! State Handler implementation for Dpa Interfaces - -use std::sync::Arc; - -use carbide_uuid::dpa_interface::DpaInterfaceId; -use chrono::{Duration, TimeDelta}; -use db::dpa_interface::get_dpa_vni; -use eyre::eyre; -use model::dpa_interface::DpaLockMode::{Locked, Unlocked}; -use model::dpa_interface::{DpaInterface, DpaInterfaceControllerState}; -use mqttea::MqtteaClient; -use sqlx::PgTransaction; - -use crate::dpa::handler::DpaInfo; -use crate::state_controller::common_services::CommonStateHandlerServices; -use crate::state_controller::dpa_interface::context::DpaInterfaceStateHandlerContextObjects; -use crate::state_controller::state_handler::{ - StateHandler, StateHandlerContext, StateHandlerError, StateHandlerOutcome, -}; - -/// The actual Dpa Interface State handler -#[derive(Debug, Clone)] -pub struct DpaInterfaceStateHandler {} - -impl DpaInterfaceStateHandler { - pub fn new() -> Self { - Self {} - } - - fn record_metrics( - &self, - _state: &mut DpaInterface, - _ctx: &mut StateHandlerContext, - ) { - } -} - -#[async_trait::async_trait] -impl StateHandler for DpaInterfaceStateHandler { - type ObjectId = DpaInterfaceId; - type State = DpaInterface; - type ControllerState = DpaInterfaceControllerState; - type ContextObjects = DpaInterfaceStateHandlerContextObjects; - - async fn handle_object_state( - &self, - _interface_id: &DpaInterfaceId, - state: &mut DpaInterface, - controller_state: &Self::ControllerState, - ctx: &mut StateHandlerContext, - ) -> Result, StateHandlerError> { - // record metrics irrespective of the state of the dpa interface - self.record_metrics(state, ctx); - - let hb_interval = ctx - .services - .site_config - .get_hb_interval() - .unwrap_or_else(|| Duration::minutes(2)); - - let dpa_info = ctx.services.dpa_info.clone().unwrap(); - - // DPAs follow the host-level `use_admin_network`. - let host_use_admin_network = db::machine::get_host_use_admin_network_for_dpa_interface( - &ctx.services.db_pool, - &state.id, - ) - .await?; - - match controller_state { - DpaInterfaceControllerState::Provisioning => { - // New DPA objects start off in the Provisioning state. - // They stay in that state until the first time the machine - // starts a transition from Ready to Assigned state. - if host_use_admin_network { - return Ok(StateHandlerOutcome::do_nothing()); - } - - let new_state = DpaInterfaceControllerState::Ready; - tracing::info!(state = ?new_state, "Dpa Interface state transition"); - return Ok(StateHandlerOutcome::transition(new_state)); - } - - DpaInterfaceControllerState::Ready => { - // We will stay in Ready state as long use_admin_network is true. - // When an instance is created from this host, use_admin_network - // will be turned off. We then need to SetVNI, and wait for the - // SetVNI to take effect. - - let client = dpa_info - .mqtt_client - .clone() - .ok_or_else(|| StateHandlerError::GenericError(eyre!("Missing mqtt_client")))?; - - if !host_use_admin_network { - let new_state = DpaInterfaceControllerState::Unlocking; - tracing::info!(state = ?new_state, "Dpa Interface state transition"); - - Ok(StateHandlerOutcome::transition(new_state)) - } else { - let txn = - do_heartbeat(state, ctx.services, client, &dpa_info, hb_interval, false) - .await?; - - Ok(StateHandlerOutcome::do_nothing().with_txn_opt(txn)) - } - } - - DpaInterfaceControllerState::Unlocking => { - // Once we reach Unlocking state, we would have replied to - // ForgeAgentControl requests from scout with a reply indicating - // that it should unlock the card. The scout does the action, and - // publishes an observation indicating the lock status. That causes - // us to update the card state in the DB. If card_state is none, that - // means this sequence has not yet taken place. So we just wait. - if state.card_state.is_none() { - tracing::info!("card_state none for dpa: {:#?}", state.id); - return Ok(StateHandlerOutcome::wait( - "Waiting for card to get unlocked".to_string(), - )); - } - if let Some(ref mut cs) = state.card_state - && cs.lockmode == Some(Unlocked) - { - let new_state = DpaInterfaceControllerState::ApplyFirmware; - tracing::info!(state = ?new_state, "Interface unlocked. Transitioning to next state"); - return Ok(StateHandlerOutcome::transition(new_state)); - } - Ok(StateHandlerOutcome::wait( - "Waiting for card to get unlocked".to_string(), - )) - } - - DpaInterfaceControllerState::ApplyFirmware => { - // At this point, we're in the ApplyFirmware state, which means we - // have sent a firmware flash instruction to scout (via a configured - // FirmwareFlasherProfile). Now, we wait for an observation report - // from scout indicating firmware has been applied (or skipped if no - // config was available). - let Some(ref card_state) = state.card_state else { - tracing::info!( - "no firmware report, because card_state none for dpa: {:#?}, waiting for retry", - state.id - ); - return Ok(StateHandlerOutcome::wait( - "Waiting for firmware to be applied".to_string(), - )); - }; - if let Some(ref firmware_report) = card_state.firmware_report { - // Transition on to the next state if the flash succeeded and reset - // either wasn't requested (None) or succeeded (Some(true)). - // - // To explain this a bit better, if no reset was requested, then - // we'll get None back here. Since no reset was requested at all, - // then we can continue, so we just "default" to true, to let - // things continue. If a reset WAS requested, then we'll unwrap - // whatever the result was (either success/true, or failed/false). - let reset_ok = firmware_report.reset.unwrap_or(true); - if firmware_report.flashed && reset_ok { - let new_state = DpaInterfaceControllerState::ApplyProfile; - tracing::info!( - state = ?new_state, - observed_version = firmware_report.observed_version.as_deref().unwrap_or("none"), - "firmware report received and successfully applied, transitioning" - ); - return Ok(StateHandlerOutcome::transition(new_state)); - } - tracing::warn!( - flashed = firmware_report.flashed, - reset = ?firmware_report.reset, - observed_version = firmware_report.observed_version.as_deref().unwrap_or("none"), - "firmware report received but not successful, waiting for retry" - ); - } - - // ..if we get here, it's because the firmware_report in the CardState - // wasn't set yet. ...or it was, and this round wasn't successful, so we're - // just going to keep hanging out in this state until it is (letting the - // apply workflow happen again). - Ok(StateHandlerOutcome::wait( - "Waiting for firmware to be applied".to_string(), - )) - } - - DpaInterfaceControllerState::ApplyProfile => handle_apply_profile(state), - DpaInterfaceControllerState::Locking => { - let Some(ref cs) = state.card_state else { - tracing::error!("Unexpected - card_state none for dpa: {:#?}", state.id); - return Ok(StateHandlerOutcome::do_nothing()); - }; - if cs.lockmode == Some(Locked) { - let new_state = DpaInterfaceControllerState::WaitingForSetVNI; - tracing::info!(state = ?new_state, "Dpa Interface state transition"); - return Ok(StateHandlerOutcome::transition(new_state)); - } - Ok(StateHandlerOutcome::wait( - "Waiting for card to get locked".to_string(), - )) - } - - DpaInterfaceControllerState::WaitingForSetVNI => { - // When we are in the WaitingForSetVNI state, we are have sent a SetVNI command - // to the DPA Interface Card. We are waiting for an ACK for that command. - // When the ack shows up, the network_config_version and the network_status_observation - // will match. - - if !state.managed_host_network_config_version_synced() { - tracing::debug!("DPA interface found in WaitingForSetVNI state"); - - let client = dpa_info.mqtt_client.clone().ok_or_else(|| { - StateHandlerError::GenericError(eyre!("Missing mqtt_client")) - })?; - - let txn = send_set_vni_command( - state, - ctx.services, - client, - &dpa_info, - true, /* needs_vni */ - false, /* not a heartbeat */ - true, /* send revision */ - ) - .await?; - Ok(StateHandlerOutcome::do_nothing().with_txn_opt(txn)) - } else { - let new_state = DpaInterfaceControllerState::Assigned; - tracing::info!(state = ?new_state, "Dpa Interface state transition"); - Ok(StateHandlerOutcome::transition(new_state)) - } - } - DpaInterfaceControllerState::Assigned => { - // We will stay in the Assigned state as long as use_admin_network is off, which - // means we are in the tenant network. Once use_admin_network is turned on, we - // will send a SetVNI command to the DPA Interface card to set the VNI to 0 - // and will transition to WaitingForResetVNI state. - - let client = dpa_info - .mqtt_client - .clone() - .ok_or_else(|| StateHandlerError::GenericError(eyre!("Missing mqtt_client")))?; - - if host_use_admin_network { - let new_state = DpaInterfaceControllerState::WaitingForResetVNI; - tracing::info!(state = ?new_state, "Dpa Interface state transition"); - let txn = send_set_vni_command( - state, - ctx.services, - client, - &dpa_info, - false, - false, - true, - ) - .await?; - - Ok(StateHandlerOutcome::transition(new_state).with_txn_opt(txn)) - } else { - let txn = - do_heartbeat(state, ctx.services, client, &dpa_info, hb_interval, true) - .await?; - - // Send a heartbeat command, indicated by the revision string being "NIL". - Ok(StateHandlerOutcome::do_nothing().with_txn_opt(txn)) - } - } - DpaInterfaceControllerState::WaitingForResetVNI => { - // When we are in the WaitingForResetVNI state, we are have sent a SetVNI command - // to the DPA Interface Card. We are waiting for an ACK for that command. - // When the ack shows up, the network_config_version and the network_status_observation - // will match. - - if !state.managed_host_network_config_version_synced() { - tracing::debug!("DPA interface found in WaitingForResetVNI state"); - let client = dpa_info.mqtt_client.clone().ok_or_else(|| { - StateHandlerError::GenericError(eyre!("Missing mqtt_client")) - })?; - - let txn = send_set_vni_command( - state, - ctx.services, - client, - &dpa_info, - false, - false, - true, - ) - .await?; - Ok(StateHandlerOutcome::do_nothing().with_txn_opt(txn)) - } else { - let new_state = DpaInterfaceControllerState::Ready; - tracing::info!(state = ?new_state, "Dpa Interface state transition"); - Ok(StateHandlerOutcome::transition(new_state)) - } - } - } - } -} - -// Determine if we need to do a heartbeat or if we need to -// send a SetVni command because the DPA and Carbide are out of sync. -// If so, call send_set_vni_command to send the heart beat or set vni -async fn do_heartbeat<'a>( - state: &mut DpaInterface, - services: &mut CommonStateHandlerServices, - client: Arc, - dpa_info: &Arc, - hb_interval: TimeDelta, - needs_vni: bool, -) -> Result>, StateHandlerError> { - let mut send_hb = false; - let mut send_revision = false; - - // We are in the Ready or Assigned state and we continue to be in the same state. - // In this state, we will send SetVni command to the DPA if - // (1) if the heartbeat interval has elapsed since the heartbeat - // (2) The DPA sent us an ack and it looks like the DPA lost its config (due to powercycle potentially) - // Heartbeat is identified by the revision being se to the sentinel value "NIL" - // Both send_hb and send_revision could evaluate to true below. If send_hb is true, we will - // update the last_hb_time for the interface entry. - - if let Some(next_hb_time) = state.last_hb_time.checked_add_signed(hb_interval) - && chrono::Utc::now() >= next_hb_time - { - send_hb = true; // heartbeat interval elapsed since the last heartbeat - } - - if !state.managed_host_network_config_version_synced() { - send_revision = true; // DPA config not in sync with us. So resend the config - } - - if send_hb || send_revision { - let txn = send_set_vni_command( - state, - services, - client, - dpa_info, - needs_vni, - send_hb, - send_revision, - ) - .await?; - Ok(txn) - } else { - Ok(None) - } -} - -// Send a SetVni command to the DPA. The SetVni command could be a heart beat (identified by -// revision being "NIL"). If needs_vni is true, get the VNI to use from the DB. Otherwise, vni -// sent is 0. -async fn send_set_vni_command<'a>( - state: &mut DpaInterface, - services: &mut CommonStateHandlerServices, - client: Arc, - dpa_info: &Arc, - needs_vni: bool, - heart_beat: bool, - send_revision: bool, -) -> Result>, StateHandlerError> { - let revision_str = if send_revision { - state.network_config.version.to_string() - } else { - "NIL".to_string() - }; - - let vni = if needs_vni { - match get_dpa_vni(state, &mut services.db_reader).await { - Ok(dv) => dv, - Err(e) => { - return Err(StateHandlerError::GenericError(eyre!( - "get_dpa_vni error: {:#?}", - e - ))); - } - } - } else { - 0 - }; - - // Send a heartbeat command, indicated by the revision string being "NIL". - match crate::dpa::handler::send_dpa_command( - client, - dpa_info, - state.mac_address.to_string(), - revision_str, - vni, - ) - .await - { - Ok(()) => { - if heart_beat { - let mut txn = services.db_pool.begin().await?; - let res = db::dpa_interface::update_last_hb_time(state, &mut txn).await; - if res.is_err() { - tracing::error!( - "Error updating last_hb_time for dpa id: {} res: {:#?}", - state.id, - res - ); - } - Ok(Some(txn)) - } else { - Ok(None) - } - } - Err(_e) => Ok(None), - } -} - -/// handle_apply_profile handles the ApplyProfile state for a -/// SuperNIC/DPA interface, which means we sent an mlxconfig -/// profile config down to scout (which takes care of resetting -/// mlxconfig parameters back to defaults, and then potentially -/// overlaying a profile of parameters over top of it). -/// -/// And just so it's clear, there are two "success" cases that -/// we check for here. -/// 1. A profile was configured and successfully synced — scout -/// reports a profile_name and profile_synced is true. -/// 2. NO profile was configured (indicating reset only) — scout -/// reports profile_name=None and profile_synced true. This is -/// successful because the reset itself succeeded and there was -/// nothing else to apply. -/// -/// In both cases, profile_synced=Some(true) is the signal that -/// the workflow completed successfully, and it's safe to transition -/// to the next state. -fn handle_apply_profile( - state: &DpaInterface, -) -> Result, StateHandlerError> { - let Some(ref cs) = state.card_state else { - tracing::info!( - "no profile report, because card_state none for dpa: {:#?}, waiting for retry", - state.id - ); - return Ok(StateHandlerOutcome::wait( - "Waiting for profile to be applied".to_string(), - )); - }; - if cs.profile_synced == Some(true) { - let new_state = DpaInterfaceControllerState::Locking; - tracing::info!( - state = ?new_state, - profile = cs.profile.as_deref().unwrap_or("none"), - "profile applied successfully, transitioning" - ); - return Ok(StateHandlerOutcome::transition(new_state)); - } - Ok(StateHandlerOutcome::wait( - "Waiting for profile to be applied".to_string(), - )) -} - -#[cfg(test)] -mod tests { - use std::str::FromStr; - - use carbide_uuid::dpa_interface::DpaInterfaceId; - use carbide_uuid::machine::MachineId; - use config_version::{ConfigVersion, Versioned}; - use mac_address::MacAddress; - use model::dpa_interface::{ - CardState, DpaInterface, DpaInterfaceControllerState, DpaInterfaceNetworkConfig, - }; - - use super::*; - - // test_dpa_interface is a small helper function used to build - // a minimal DpaInterface for testing the ApplyProfile handler. - fn test_dpa_interface(card_state: Option) -> DpaInterface { - let now = chrono::Utc::now(); - DpaInterface { - id: DpaInterfaceId::new(), - machine_id: MachineId::from_str( - "fm100htes3rn1npvbtm5qd57dkilaag7ljugl1llmm7rfuq1ov50i0rpl30", - ) - .unwrap(), - mac_address: MacAddress::from_str("00:11:22:33:44:55").unwrap(), - pci_name: "01:00.0".to_string(), - underlay_ip: None, - overlay_ip: None, - created: now, - updated: now, - deleted: None, - controller_state: Versioned::new( - DpaInterfaceControllerState::ApplyProfile, - ConfigVersion::initial(), - ), - last_hb_time: now, - controller_state_outcome: None, - network_config: Versioned::new( - DpaInterfaceNetworkConfig::default(), - ConfigVersion::initial(), - ), - network_status_observation: None, - card_state, - device_info: None, - device_info_ts: None, - mlxconfig_profile: None, - history: vec![], - } - } - - #[test] - fn apply_profile_no_card_state_waits() { - let state = test_dpa_interface(None); - let outcome = handle_apply_profile(&state).unwrap(); - assert!( - matches!(outcome, StateHandlerOutcome::Wait { .. }), - "expected Wait when card_state is None" - ); - } - - #[test] - fn apply_profile_synced_with_profile_transitions() { - let cs = CardState { - profile: Some("bf3-spx-enabled".to_string()), - profile_synced: Some(true), - ..Default::default() - }; - let state = test_dpa_interface(Some(cs)); - let outcome = handle_apply_profile(&state).unwrap(); - assert!( - matches!( - outcome, - StateHandlerOutcome::Transition { - next_state: DpaInterfaceControllerState::Locking, - .. - } - ), - "expected Transition to Locking when profile_synced is true" - ); - } - - #[test] - fn apply_profile_synced_without_profile_transitions() { - // This is the reset-only case -- no profile was configured, - // but the reset succeeded (yay), so profile_synced is true - // with profile=None. - let cs = CardState { - profile: None, - profile_synced: Some(true), - ..Default::default() - }; - let state = test_dpa_interface(Some(cs)); - let outcome = handle_apply_profile(&state).unwrap(); - assert!( - matches!( - outcome, - StateHandlerOutcome::Transition { - next_state: DpaInterfaceControllerState::Locking, - .. - } - ), - "expected Transition to Locking for reset-only (no profile) success" - ); - } - - #[test] - fn apply_profile_sync_failed_waits() { - let cs = CardState { - profile: Some("bf3-spx-enabled".to_string()), - profile_synced: Some(false), - ..Default::default() - }; - let state = test_dpa_interface(Some(cs)); - let outcome = handle_apply_profile(&state).unwrap(); - assert!( - matches!(outcome, StateHandlerOutcome::Wait { .. }), - "expected Wait when profile_synced is false (sync failed)" - ); - } - - #[test] - fn apply_profile_synced_not_yet_reported_waits() { - // scout hasn't reported back yet, so profile_synced is None, - // and we keep on waiting. - let cs = CardState { - profile: None, - profile_synced: None, - ..Default::default() - }; - let state = test_dpa_interface(Some(cs)); - let outcome = handle_apply_profile(&state).unwrap(); - assert!( - matches!(outcome, StateHandlerOutcome::Wait { .. }), - "expected Wait when profile_synced is None (not yet reported)" - ); - } -} diff --git a/crates/api/src/state_controller/dpa_interface/io.rs b/crates/api/src/state_controller/dpa_interface/io.rs deleted file mode 100644 index 4c10919219..0000000000 --- a/crates/api/src/state_controller/dpa_interface/io.rs +++ /dev/null @@ -1,164 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -//! State Controller IO implementation for dpa interfaces - -use carbide_uuid::dpa_interface::DpaInterfaceId; -use config_version::{ConfigVersion, Versioned}; -use db::{self, DatabaseError}; -use model::StateSla; -use model::controller_outcome::PersistentStateHandlerOutcome; -use model::dpa_interface::{self, DpaInterface, DpaInterfaceControllerState}; -use sqlx::PgConnection; - -use crate::state_controller::dpa_interface::context::DpaInterfaceStateHandlerContextObjects; -use crate::state_controller::dpa_interface::metrics::DpaInterfaceMetricsEmitter; -use crate::state_controller::io::StateControllerIO; - -/// State Controller IO implementation for dpa interfaces -#[derive(Default, Debug)] -pub struct DpaInterfaceStateControllerIO {} - -#[async_trait::async_trait] -impl StateControllerIO for DpaInterfaceStateControllerIO { - type ObjectId = DpaInterfaceId; - type State = DpaInterface; - type ControllerState = DpaInterfaceControllerState; - type MetricsEmitter = DpaInterfaceMetricsEmitter; - type ContextObjects = DpaInterfaceStateHandlerContextObjects; - - const DB_ITERATION_ID_TABLE_NAME: &'static str = "dpa_interfaces_controller_iteration_ids"; - const DB_QUEUED_OBJECTS_TABLE_NAME: &'static str = "dpa_interfaces_controller_queued_objects"; - - const LOG_SPAN_CONTROLLER_NAME: &'static str = "dpa_interfaces_controller"; - - async fn list_objects( - &self, - txn: &mut PgConnection, - ) -> Result, DatabaseError> { - db::dpa_interface::find_ids(txn).await - } - - /// Loads a state snapshot from the database - async fn load_object_state( - &self, - txn: &mut PgConnection, - interface_id: &Self::ObjectId, - ) -> Result, DatabaseError> { - let mut interfaces = db::dpa_interface::find_by_ids(txn, &[*interface_id], false).await?; - if interfaces.is_empty() { - tracing::debug!("DPA load_object_state empty ifid: {:#?}", interface_id); - return Ok(None); - } - if interfaces.len() > 1 { - tracing::debug!( - "DPA load_object_state len ifid: {:#?} len: {}", - interface_id, - interfaces.len() - ); - return Err(DatabaseError::new( - "DpaInterface::find_by_ids()", - sqlx::Error::Decode( - eyre::eyre!( - "Searching for DpaInterface {} returned multiple results", - interface_id - ) - .into(), - ), - )); - } - let intf = interfaces.swap_remove(0); - - Ok(Some(intf)) - } - - async fn load_controller_state( - &self, - _txn: &mut PgConnection, - _object_id: &Self::ObjectId, - state: &Self::State, - ) -> Result, DatabaseError> { - Ok(state.controller_state.clone()) - } - - async fn persist_controller_state( - &self, - txn: &mut PgConnection, - object_id: &Self::ObjectId, - old_version: ConfigVersion, - new_version: ConfigVersion, - new_state: &Self::ControllerState, - ) -> Result { - db::dpa_interface::try_update_controller_state( - txn, - *object_id, - old_version, - new_version, - new_state, - ) - .await - } - - async fn persist_state_history( - &self, - txn: &mut PgConnection, - object_id: &Self::ObjectId, - new_version: ConfigVersion, - new_state: &Self::ControllerState, - ) -> Result<(), DatabaseError> { - db::state_history::persist( - txn, - db::state_history::StateHistoryTableId::DpaInterface, - object_id, - new_state, - new_version, - ) - .await?; - Ok(()) - } - - async fn persist_outcome( - &self, - txn: &mut PgConnection, - object_id: &Self::ObjectId, - outcome: PersistentStateHandlerOutcome, - ) -> Result<(), DatabaseError> { - db::dpa_interface::update_controller_state_outcome(txn, *object_id, outcome).await - } - - fn metric_state_names(state: &DpaInterfaceControllerState) -> (&'static str, &'static str) { - match state { - DpaInterfaceControllerState::Provisioning => ("provisioning", ""), - DpaInterfaceControllerState::Unlocking => ("unlocking", ""), - DpaInterfaceControllerState::ApplyFirmware => ("applyfirmware", ""), - DpaInterfaceControllerState::ApplyProfile => ("locking", ""), - DpaInterfaceControllerState::Locking => ("locking", ""), - DpaInterfaceControllerState::Ready => ("ready", ""), - DpaInterfaceControllerState::WaitingForSetVNI => ("waitingforsetvni", ""), - DpaInterfaceControllerState::WaitingForResetVNI => ("waitingforresetvni", ""), - DpaInterfaceControllerState::Assigned => ("assigned", ""), - } - } - - fn state_sla( - &self, - state: &Versioned, - _object_state: &Self::State, - ) -> StateSla { - dpa_interface::state_sla(&state.value, &state.version) - } -} diff --git a/crates/api/src/state_controller/dpa_interface/metrics.rs b/crates/api/src/state_controller/dpa_interface/metrics.rs deleted file mode 100644 index 1657e520eb..0000000000 --- a/crates/api/src/state_controller/dpa_interface/metrics.rs +++ /dev/null @@ -1,60 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -//! Defines custom metrics that are collected and emitted by the Machine State Controller - -use ::carbide_utils::metrics::SharedMetricsHolder; -use opentelemetry::metrics::Meter; - -use crate::state_controller::metrics::MetricsEmitter; - -#[derive(Debug, Default, Clone)] -pub struct DpaInterfaceMetrics {} - -#[derive(Debug, Default)] -pub struct DpaInterfaceStateControllerIterationMetrics {} - -#[derive(Debug)] -pub struct DpaInterfaceMetricsEmitter {} - -impl DpaInterfaceStateControllerIterationMetrics {} - -impl MetricsEmitter for DpaInterfaceMetricsEmitter { - type ObjectMetrics = DpaInterfaceMetrics; - type IterationMetrics = DpaInterfaceStateControllerIterationMetrics; - - fn new( - _object_type: &str, - _meter: &Meter, - _shared_metrics: SharedMetricsHolder, - ) -> Self { - Self {} - } - - // This routine is called in the context of a single thread. - // The statecontroller launches multiple threads (upto max_concurrency) - // Each thread works on one object and records the metrics for that object. - // Once all the tasks are done, the original thread calls merge object_handling_metrics. - // No need for mutex when manipulating the seg_stats HashMap. - fn merge_object_handling_metrics( - _iteration_metrics: &mut Self::IterationMetrics, - _object_metrics: &Self::ObjectMetrics, - ) { - } - - fn emit_object_counters_and_histograms(&self, _object_metrics: &Self::ObjectMetrics) {} -} diff --git a/crates/api/src/state_controller/machine/handler.rs b/crates/api/src/state_controller/machine/handler.rs index 53741b299f..59849e2b8c 100644 --- a/crates/api/src/state_controller/machine/handler.rs +++ b/crates/api/src/state_controller/machine/handler.rs @@ -1617,27 +1617,6 @@ impl MachineStateHandler { // Clear if any reprovision (dpu or host) is set due to race scenario. Self::clear_host_update_alert_and_reprov(mh_snapshot, &mut txn).await?; - // Flip the host onto the tenant network. Setting - // `use_admin_network = false` on the host row goes through - // `try_update_network_config`, which fans the version bump out - // to every DPU in the host machine group -- each DPU's sync state - // then flips to "out of sync" until its agent has polled, applied, - // and reported the new version. State-machine waits (e.g. - // WaitingForNetworkReconfig, WaitingForNetworkSegmentToBeReady) - // gate on that. DPAs follow the same flag (read host-level via the - // snapshot), but use a separate per-interface ack mechanism for - // SetVNI commands. - let host_version = mh_snapshot.host_snapshot.network_config.version; - let mut host_netconf = mh_snapshot.host_snapshot.network_config.value.clone(); - host_netconf.use_admin_network = Some(false); - db::machine::try_update_network_config( - &mut txn, - &mh_snapshot.host_snapshot.id, - host_version, - &host_netconf, - ) - .await?; - let mut next_state = ManagedHostState::Assigned { instance_state: InstanceState::DpaProvisioning, }; @@ -2230,6 +2209,7 @@ impl StateHandler for MachineStateHandler { .is_empty() && mh_snapshot.dpu_snapshots.is_empty() { + tracing::error!("No DPU snapshot found for host {}", host_machine_id); return Err(StateHandlerError::GenericError(eyre!( "No DPU snapshot found." ))); @@ -5411,6 +5391,24 @@ impl StateHandler for InstanceStateHandler { )); } + // Check each DPA interface to see if it has acted on updating the network config. + // This involves the DPA State Machine sending SetVNI commands to the NICs, and getting + // an ACK. If any of the interfaces has not yet heard back the ACk, we will continue to + // be in the current state. + if ctx.services.site_config.is_dpa_enabled() { + for dpa_interface in &mh_snapshot.dpa_interface_snapshots { + if !dpa_interface.managed_host_network_config_version_synced( + &mh_snapshot.instance, + &mh_snapshot.host_snapshot.spx_status_observation, + ) { + return Ok(StateHandlerOutcome::wait( + "Waiting for DPA agent(s) to apply network config and report healthy network" + .to_string() + )); + } + } + } + let next_state = ManagedHostState::Assigned { instance_state: InstanceState::WaitingForRebootToReady, }; @@ -5946,11 +5944,13 @@ impl StateHandler for InstanceStateHandler { // Bump each DPA interface's config version so the DPA State Controller // re-evaluates and sends SetVNI commands with VNI zero. for dpa_interface in &mh_snapshot.dpa_interface_snapshots { + let (mut netconf, version) = dpa_interface.network_config.clone().take(); + netconf.use_admin_network = Some(true); db::dpa_interface::try_update_network_config( &mut txn, &dpa_interface.id, - dpa_interface.network_config.version, - &dpa_interface.network_config.value, + version, + &netconf, ) .await?; } @@ -6010,7 +6010,10 @@ impl StateHandler for InstanceStateHandler { { continue; } - if !dpa_interface.managed_host_network_config_version_synced() { + if !dpa_interface.managed_host_network_config_version_synced( + &None, + &mh_snapshot.host_snapshot.spx_status_observation, + ) { return Ok(StateHandlerOutcome::wait( "Waiting for DPA agent(s) to apply network config and report healthy network" .to_string() @@ -6210,11 +6213,14 @@ impl StateHandler for InstanceStateHandler { let mut txn = ctx.services.db_pool.begin().await?; if ctx.services.site_config.is_dpa_enabled() { for dpa_interface in &mh_snapshot.dpa_interface_snapshots { + let (mut netconf, version) = + dpa_interface.network_config.clone().take(); + netconf.use_admin_network = Some(false); db::dpa_interface::try_update_network_config( &mut txn, &dpa_interface.id, - dpa_interface.network_config.version, - &dpa_interface.network_config.value, + version, + &netconf, ) .await?; } @@ -6229,17 +6235,33 @@ impl StateHandler for InstanceStateHandler { // This involves the DPA State Machine sending SetVNI commands to the NICs, and getting // an ACK. If any of the interfaces has not yet heard back the ACk, we will continue to // be in the current state. + if ctx.services.site_config.is_dpa_enabled() { for dpa_interface in &mh_snapshot.dpa_interface_snapshots { - if !dpa_interface.managed_host_network_config_version_synced() { + if !dpa_interface.managed_host_network_config_version_synced( + &mh_snapshot.instance, + &mh_snapshot.host_snapshot.spx_status_observation, + ) { return Ok(StateHandlerOutcome::wait( - "Waiting for DPA agent(s) to apply network config and report healthy network" - .to_string() - )); + "Waiting for DPA agent(s) to apply network config and report healthy network" + .to_string() + )); } } } + let mut txn = ctx.services.db_pool.begin().await?; + let host_version = mh_snapshot.host_snapshot.network_config.version; + let mut host_netconf = mh_snapshot.host_snapshot.network_config.value.clone(); + host_netconf.use_admin_network = Some(false); + db::machine::try_update_network_config( + &mut txn, + &mh_snapshot.host_snapshot.id, + host_version, + &host_netconf, + ) + .await?; + // The host was already flipped to tenant network in the // Ready -> Assigned transition; that write fanned out via // `try_update_network_config`'s group sync to bump every @@ -6247,7 +6269,7 @@ impl StateHandler for InstanceStateHandler { let next_state = ManagedHostState::Assigned { instance_state: InstanceState::WaitingForNetworkSegmentToBeReady, }; - Ok(StateHandlerOutcome::transition(next_state)) + return Ok(StateHandlerOutcome::transition(next_state).with_txn(txn)); } } } else { diff --git a/crates/api/src/state_controller/mod.rs b/crates/api/src/state_controller/mod.rs index c949a28f3e..2f381ab71d 100644 --- a/crates/api/src/state_controller/mod.rs +++ b/crates/api/src/state_controller/mod.rs @@ -16,7 +16,6 @@ */ pub mod common_services; -pub mod dpa_interface; pub(crate) mod external_service_error; pub mod health_metrics; pub mod ib_partition; diff --git a/crates/api/src/tests/client_resolution.rs b/crates/api/src/tests/client_resolution.rs index 03000f142d..a922e58e1c 100644 --- a/crates/api/src/tests/client_resolution.rs +++ b/crates/api/src/tests/client_resolution.rs @@ -75,6 +75,7 @@ async fn test_resolve_machine_interface_via_instance_address(pool: sqlx::PgPool) network_security_group_id: None, dpu_extension_services: None, nvlink: None, + spxconfig: None, }; let tinstance = mh.instance_builer(&env).config(config).build().await; diff --git a/crates/api/src/tests/common/api_fixtures/instance.rs b/crates/api/src/tests/common/api_fixtures/instance.rs index ce2600cc51..1f238abda5 100644 --- a/crates/api/src/tests/common/api_fixtures/instance.rs +++ b/crates/api/src/tests/common/api_fixtures/instance.rs @@ -59,6 +59,7 @@ impl<'a, 'b> TestInstanceBuilder<'a, 'b> { network_security_group_id: None, dpu_extension_services: None, nvlink: None, + spxconfig: None, }, tenant: default_tenant_config(), metadata: None, @@ -317,6 +318,7 @@ pub fn config_for_ib_config( network: Some(single_interface_network_config(network_segment_id)), infiniband: Some(ib_config), nvlink: None, + spxconfig: None, network_security_group_id: None, dpu_extension_services: None, } @@ -332,6 +334,7 @@ pub fn config_for_nvlink_config( network: Some(single_interface_network_config(network_segment_id)), infiniband: None, nvlink: Some(nvl_config), + spxconfig: None, network_security_group_id: None, dpu_extension_services: None, } diff --git a/crates/api/src/tests/common/api_fixtures/mod.rs b/crates/api/src/tests/common/api_fixtures/mod.rs index 1cf1c2e9cc..d8e1eff630 100644 --- a/crates/api/src/tests/common/api_fixtures/mod.rs +++ b/crates/api/src/tests/common/api_fixtures/mod.rs @@ -406,7 +406,6 @@ impl TestEnv { ib_pools: self.common_pools.infiniband.clone(), ipmi_tool: self.ipmi_tool.clone(), site_config: self.config.clone(), - dpa_info: None, rms_client: self.rms_sim.as_rms_client(), switch_system_image_rms_client: self.rms_sim.as_switch_system_image_rms_client(), credential_manager: self.test_credential_manager.clone(), @@ -1263,6 +1262,7 @@ pub fn get_config() -> CarbideConfig { subnet_ip: Ipv4Addr::UNSPECIFIED, subnet_mask: 0_i32, auth: MqttAuthConfig::default(), + monitor_run_interval: std::time::Duration::from_secs(10), }), power_manager_options: PowerManagerOptions { enabled: false, @@ -1669,7 +1669,6 @@ pub async fn create_test_env_with_overrides( ib_pools: common_pools.infiniband.clone(), ipmi_tool: ipmi_tool.clone(), site_config: config.clone(), - dpa_info: None, rms_client: rms_sim.as_rms_client(), switch_system_image_rms_client: rms_sim.as_switch_system_image_rms_client(), credential_manager: credential_manager.clone(), @@ -2131,6 +2130,19 @@ fn pool_defs(fabric_len: u8) -> HashMap delegate_prefix_len: None, }, ); + defs.insert( + model::resource_pool::common::DPA_VNI.to_string(), + resource_pool::ResourcePoolDef { + pool_type: resource_pool::ResourcePoolType::Integer, + ranges: vec![resource_pool::Range { + start: 40001.to_string(), + end: (40001 + fabric_len as u16 - 1).to_string(), + auto_assign: true, + }], + prefix: None, + delegate_prefix_len: None, + }, + ); defs.insert( model::resource_pool::common::VLANID.to_string(), resource_pool::ResourcePoolDef { diff --git a/crates/api/src/tests/common/rpc_builder.rs b/crates/api/src/tests/common/rpc_builder.rs index dad313ce5a..0ba419b94a 100644 --- a/crates/api/src/tests/common/rpc_builder.rs +++ b/crates/api/src/tests/common/rpc_builder.rs @@ -108,6 +108,7 @@ pub struct InstanceConfig { pub dpu_extension_services: ::core::option::Option<::rpc::forge::InstanceDpuExtensionServicesConfig>, pub nvlink: ::core::option::Option<::rpc::forge::InstanceNvLinkConfig>, + pub spxconfig: ::core::option::Option<::rpc::forge::InstanceSpxConfig>, } impl InstanceConfig { diff --git a/crates/api/src/tests/compute_allocation.rs b/crates/api/src/tests/compute_allocation.rs index 15f40108d1..a527ad6bf0 100644 --- a/crates/api/src/tests/compute_allocation.rs +++ b/crates/api/src/tests/compute_allocation.rs @@ -91,6 +91,7 @@ async fn allocate_instance( nvlink: None, network_security_group_id: None, dpu_extension_services: None, + spxconfig: None, }), metadata: None, allow_unhealthy_machine: false, diff --git a/crates/api/src/tests/dpa_interfaces.rs b/crates/api/src/tests/dpa_interfaces.rs index b0046c537b..f95c59b011 100644 --- a/crates/api/src/tests/dpa_interfaces.rs +++ b/crates/api/src/tests/dpa_interfaces.rs @@ -38,6 +38,8 @@ async fn dpa_api_test_cases(pool: sqlx::PgPool) -> Result<(), Box( network_security_group_id: None, dpu_extension_services: None, nvlink: None, + spxconfig: None, }; mh.instance_builer(env).config(config).build().await } diff --git a/crates/api/src/tests/instance_os.rs b/crates/api/src/tests/instance_os.rs index c837a6e89b..ddbf54833c 100644 --- a/crates/api/src/tests/instance_os.rs +++ b/crates/api/src/tests/instance_os.rs @@ -52,6 +52,7 @@ async fn test_update_instance_operating_system(_: PgPoolOptions, options: PgConn network_security_group_id: None, dpu_extension_services: None, nvlink: None, + spxconfig: None, }; let tinstance = mh.instance_builer(&env).config(config).build().await; @@ -256,6 +257,7 @@ async fn test_create_instance_with_ipxe_template_os(_: PgPoolOptions, options: P network_security_group_id: None, dpu_extension_services: None, nvlink: None, + spxconfig: None, }; let tinstance = mh.instance_builer(&env).config(config).build().await; @@ -352,6 +354,7 @@ async fn test_allocate_instance_rejects_inactive_os(_: PgPoolOptions, options: P network_security_group_id: None, dpu_extension_services: None, nvlink: None, + spxconfig: None, }), instance_id: None, instance_type_id: None, @@ -438,6 +441,7 @@ async fn test_allocate_instance_rejects_not_ready_os(_: PgPoolOptions, options: network_security_group_id: None, dpu_extension_services: None, nvlink: None, + spxconfig: None, }), instance_id: None, instance_type_id: None, @@ -486,6 +490,7 @@ async fn test_update_instance_os_rejects_inactive_os(_: PgPoolOptions, options: network_security_group_id: None, dpu_extension_services: None, nvlink: None, + spxconfig: None, }; let tinstance = mh.instance_builer(&env).config(config).build().await; @@ -581,6 +586,7 @@ async fn test_create_instance_with_os_image_and_verify_pxe_rendering( network_security_group_id: None, dpu_extension_services: None, nvlink: None, + spxconfig: None, }; let tinstance = mh.instance_builer(&env).config(config).build().await; @@ -688,6 +694,7 @@ async fn test_create_instance_with_raw_ipxe_os_and_verify_pxe_rendering( network_security_group_id: None, dpu_extension_services: None, nvlink: None, + spxconfig: None, }; let tinstance = mh.instance_builer(&env).config(config).build().await; @@ -815,6 +822,7 @@ async fn test_create_instance_with_templated_ipxe_os_with_artifacts_and_verify_p network_security_group_id: None, dpu_extension_services: None, nvlink: None, + spxconfig: None, }; let tinstance = mh.instance_builer(&env).config(config).build().await; diff --git a/crates/api/src/tests/instance_type.rs b/crates/api/src/tests/instance_type.rs index 577cbdd22e..5fde606758 100644 --- a/crates/api/src/tests/instance_type.rs +++ b/crates/api/src/tests/instance_type.rs @@ -466,6 +466,7 @@ async fn test_instance_type_delete(pool: sqlx::PgPool) -> Result<(), Box Result<(), eyre::Report> { network: Some(single_interface_network_config(segment_id)), infiniband: None, nvlink: None, + spxconfig: None, network_security_group_id: None, dpu_extension_services: None, }; @@ -197,6 +198,7 @@ async fn test_maintenance_multi_dpu(db_pool: sqlx::PgPool) -> Result<(), eyre::R os: Some(default_os_config()), infiniband: None, nvlink: None, + spxconfig: None, network_security_group_id: None, dpu_extension_services: None, }; diff --git a/crates/api/src/tests/network_security_group.rs b/crates/api/src/tests/network_security_group.rs index 4c9efaace0..d51b8b7217 100644 --- a/crates/api/src/tests/network_security_group.rs +++ b/crates/api/src/tests/network_security_group.rs @@ -916,6 +916,7 @@ async fn test_network_security_group_delete( network: Some(single_interface_network_config(segment_id)), infiniband: None, nvlink: None, + spxconfig: None, network_security_group_id: Some(good_network_security_group_id.into()), dpu_extension_services: None, }), @@ -1218,6 +1219,7 @@ async fn test_network_security_group_propagation_impl( )), infiniband: None, nvlink: None, + spxconfig: None, network_security_group_id: Some(good_network_security_group_id.to_string()), dpu_extension_services: None, }), @@ -1339,6 +1341,7 @@ async fn test_network_security_group_propagation_impl( )), infiniband: None, nvlink: None, + spxconfig: None, network_security_group_id: None, dpu_extension_services: None, }), @@ -1450,6 +1453,7 @@ async fn test_network_security_group_propagation_impl( )), infiniband: None, nvlink: None, + spxconfig: None, network_security_group_id: None, dpu_extension_services: None, }), @@ -1813,6 +1817,7 @@ async fn test_network_security_group_get_attachments( network: Some(single_interface_network_config(segment_id)), infiniband: None, nvlink: None, + spxconfig: None, network_security_group_id: Some(good_network_security_group_id.to_string()), dpu_extension_services: None, }), diff --git a/crates/api/src/tests/power_shelf_state_controller/mod.rs b/crates/api/src/tests/power_shelf_state_controller/mod.rs index c499c19617..194501d0b0 100644 --- a/crates/api/src/tests/power_shelf_state_controller/mod.rs +++ b/crates/api/src/tests/power_shelf_state_controller/mod.rs @@ -121,7 +121,6 @@ async fn test_power_shelf_deletion_with_state_controller( ib_pools: env.common_pools.infiniband.clone(), ipmi_tool: env.ipmi_tool.clone(), site_config: env.config.clone(), - dpa_info: None, rms_client: None, switch_system_image_rms_client: None, credential_manager: Arc::new(TestCredentialManager::default()), diff --git a/crates/api/src/tests/switch_state_controller/mod.rs b/crates/api/src/tests/switch_state_controller/mod.rs index f2e7e0dda6..15a57ee37d 100644 --- a/crates/api/src/tests/switch_state_controller/mod.rs +++ b/crates/api/src/tests/switch_state_controller/mod.rs @@ -113,7 +113,6 @@ async fn test_switch_deletion_with_state_controller( ib_pools: env.common_pools.infiniband.clone(), ipmi_tool: env.ipmi_tool.clone(), site_config: env.config.clone(), - dpa_info: None, rms_client: None, switch_system_image_rms_client: None, credential_manager: Arc::new(TestCredentialManager::default()), diff --git a/crates/api/src/web/mod.rs b/crates/api/src/web/mod.rs index 35ab2e50cd..f1a5b45ca1 100644 --- a/crates/api/src/web/mod.rs +++ b/crates/api/src/web/mod.rs @@ -263,6 +263,7 @@ mod redfish_browser; mod resource_pool; mod search; mod sku; +mod spx_partition; mod state_history; mod switch; mod tenant; @@ -726,6 +727,8 @@ pub fn routes(api: Arc) -> eyre::Result> { get(nvlink::show_nvlink_logical_partitions_json), ) .route("/nvlink-partition/{id}", get(nvlink::detail)) + .route("/spx-partition", get(spx_partition::show_html)) + .route("/spx-partition.json", get(spx_partition::show_all_json)) .route("/resource-pool", get(resource_pool::show_html)) .route("/resource-pool.json", get(resource_pool::show_all_json)) .route("/vpc", get(vpc::show_html)) diff --git a/crates/api/src/web/spx_partition.rs b/crates/api/src/web/spx_partition.rs new file mode 100644 index 0000000000..b39176ff35 --- /dev/null +++ b/crates/api/src/web/spx_partition.rs @@ -0,0 +1,124 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +use std::sync::Arc; + +use askama::Template; +use axum::Json; +use axum::extract::State as AxumState; +use axum::response::{Html, IntoResponse, Response}; +use hyper::http::StatusCode; +use rpc::forge as forgerpc; +use rpc::forge::forge_server::Forge; + +use super::Base; +use crate::api::Api; + +#[derive(Template)] +#[template(path = "spx_partition_show.html")] +struct SpxPartitionShow { + partitions: Vec, +} + +struct SpxPartitionRowDisplay { + id: String, + name: String, + tenant_organization_id: String, + vni: i32, +} + +impl From for SpxPartitionRowDisplay { + fn from(partition: forgerpc::SpxPartition) -> Self { + Self { + id: partition.id.map(|id| id.to_string()).unwrap_or_default(), + tenant_organization_id: partition.tenant_organization_id, + name: partition + .metadata + .as_ref() + .map(|m| m.name.clone()) + .unwrap_or_default(), + vni: partition.vni as i32, + } + } +} + +/// List partitions +pub async fn show_html(AxumState(state): AxumState>) -> Response { + let partitions = match fetch_spx_partitions(state.clone()).await { + Ok(n) => n, + Err(err) => { + tracing::error!(%err, "fetch_spx_partitions"); + return ( + StatusCode::INTERNAL_SERVER_ERROR, + "Error loading SPX partitions", + ) + .into_response(); + } + }; + + let tmpl = SpxPartitionShow { + partitions: partitions.into_iter().map(Into::into).collect(), + }; + (StatusCode::OK, Html(tmpl.render().unwrap())).into_response() +} + +pub async fn show_all_json(AxumState(state): AxumState>) -> Response { + let partitions = match fetch_spx_partitions(state).await { + Ok(n) => n, + Err(err) => { + tracing::error!(%err, "fetch_spx_partitions"); + return ( + StatusCode::INTERNAL_SERVER_ERROR, + "Error loading SPX partitions", + ) + .into_response(); + } + }; + (StatusCode::OK, Json(partitions)).into_response() +} + +async fn fetch_spx_partitions(api: Arc) -> Result, tonic::Status> { + let request = tonic::Request::new(forgerpc::SpxPartitionSearchFilter::default()); + + let spx_partition_ids = api + .find_spx_partition_ids(request) + .await? + .into_inner() + .spx_partition_ids; + + let mut partitions = Vec::new(); + let mut offset = 0; + while offset != spx_partition_ids.len() { + const PAGE_SIZE: usize = 100; + let page_size = PAGE_SIZE.min(spx_partition_ids.len() - offset); + let next_ids = &spx_partition_ids[offset..offset + page_size]; + let request = tonic::Request::new(forgerpc::SpxPartitionsByIdsRequest { + spx_partition_ids: next_ids.to_vec(), + }); + let next_partitions = api + .find_spx_partitions_by_ids(request) + .await + .map(|response| response.into_inner())?; + + partitions.extend(next_partitions.spx_partitions); + offset += page_size; + } + + Ok(partitions) +} + +impl super::Base for SpxPartitionShow {} diff --git a/crates/api/templates/base.html b/crates/api/templates/base.html index 915a177c18..28f91323b2 100644 --- a/crates/api/templates/base.html +++ b/crates/api/templates/base.html @@ -110,6 +110,7 @@

Tenant Objects

  • VPCs
  • InfiniBand Partitions
  • NVLink Partitions
  • +
  • Spx Partitions
  • Network Segments
  • Instances
  • Compute Allocations
  • diff --git a/crates/api/templates/dpa_detail.html b/crates/api/templates/dpa_detail.html index f8149dbd59..46a2eae288 100644 --- a/crates/api/templates/dpa_detail.html +++ b/crates/api/templates/dpa_detail.html @@ -19,7 +19,6 @@

    DPA Detail

    -
    State Version{{ dpa.controller_state_version }}
    Network Config{{ dpa.network_config }}
    Network Config Version{{ dpa.network_config_version }}
    Network Status Observation{{ dpa.network_status_observation }}

    History

    diff --git a/crates/api/templates/spx_partition_show.html b/crates/api/templates/spx_partition_show.html new file mode 100644 index 0000000000..3359379515 --- /dev/null +++ b/crates/api/templates/spx_partition_show.html @@ -0,0 +1,29 @@ +{% extends "base.html" %} + +{% block title %}Spx Partitions{% endblock %} + +{% block content %} + +

    Spx Partitions

    + + + + + + + + + + {% for partition in partitions %} + + + + + + + {% endfor %} + +
    IdNameVNITenant
    {{ partition.id }}{{ partition.name }}{{ partition.vni }}{{ partition.tenant_organization_id }}
    + +{% endblock %} + diff --git a/crates/bmc-explorer/Cargo.toml b/crates/bmc-explorer/Cargo.toml index e2b70d1968..a184a81301 100644 --- a/crates/bmc-explorer/Cargo.toml +++ b/crates/bmc-explorer/Cargo.toml @@ -33,29 +33,29 @@ carbide-api-model = { path = "../api-model", default-features = false } itertools = { workspace = true } mac_address = { workspace = true } nv-redfish = { workspace = true, features = [ - "assembly", - "bios", - "boot-options", - "chassis", - "computer-systems", - "ethernet-interfaces", - "host-interfaces", - "network-adapters", - "network-device-functions", - "managers", - "pcie-devices", - "power-supplies", - "update-service", - "secure-boot", - "oem-nvidia-bluefield", - "oem-nvidia-baseboard", - "oem-dell-attributes", - "oem-lenovo", - "oem-supermicro", - "oem-hpe", - "oem-ami", - "oem-liteon", - "bmc-http" + "assembly", + "bios", + "boot-options", + "chassis", + "computer-systems", + "ethernet-interfaces", + "host-interfaces", + "network-adapters", + "network-device-functions", + "managers", + "pcie-devices", + "power-supplies", + "update-service", + "secure-boot", + "oem-nvidia-bluefield", + "oem-nvidia-baseboard", + "oem-dell-attributes", + "oem-lenovo", + "oem-supermicro", + "oem-hpe", + "oem-ami", + "oem-liteon", + "bmc-http", ] } regex = { workspace = true } lazy_static = { workspace = true } diff --git a/crates/bmc-mock/Cargo.toml b/crates/bmc-mock/Cargo.toml index 89442114d1..687da8c97f 100644 --- a/crates/bmc-mock/Cargo.toml +++ b/crates/bmc-mock/Cargo.toml @@ -29,7 +29,7 @@ carbide-utils = { path = "../utils", default-features = false } arc-swap = { workspace = true } axum = { workspace = true } -axum-extra = { features = [ "typed-header" ], workspace = true } +axum-extra = { features = ["typed-header"], workspace = true } axum-server = { features = ["tls-rustls"], workspace = true } bytes = { workspace = true } chrono = { workspace = true } diff --git a/crates/bmc-proxy/Cargo.toml b/crates/bmc-proxy/Cargo.toml index 947b3f1ff9..ebcf2e6683 100644 --- a/crates/bmc-proxy/Cargo.toml +++ b/crates/bmc-proxy/Cargo.toml @@ -59,15 +59,18 @@ opentelemetry = { workspace = true, features = ["logs"] } opentelemetry-otlp = { workspace = true, features = ["grpc-tonic"] } opentelemetry-prometheus.workspace = true opentelemetry-semantic-conventions = { features = [ - "semconv_experimental", + "semconv_experimental", ], workspace = true } opentelemetry_sdk = { workspace = true, features = [ - "logs", - "rt-tokio", - "spec_unstable_metrics_views", - "testing", + "logs", + "rt-tokio", + "spec_unstable_metrics_views", + "testing", +] } +reqwest = { workspace = true, default-features = false, features = [ + "rustls", + "stream", ] } -reqwest = { workspace = true, default-features = false, features = ["rustls", "stream", ] } rustls-pemfile = { workspace = true } rustls-pki-types = { workspace = true } thiserror = { workspace = true } @@ -77,23 +80,23 @@ tokio-util = { workspace = true } tokio = { workspace = true } tower = { workspace = true } tower-http = { features = [ - "add-extension", - "auth", - "normalize-path", + "add-extension", + "auth", + "normalize-path", ], workspace = true } tracing = { workspace = true } tracing-log = { workspace = true } tracing-opentelemetry = { workspace = true } -tracing-subscriber = { features = ["env-filter", "local-time", ], workspace = true } +tracing-subscriber = { features = [ + "env-filter", + "local-time", +], workspace = true } urlencoding = { workspace = true } url = { features = ["serde"], workspace = true } uuid = { features = ["v4", "serde"], workspace = true } x509-parser = { features = ["verify"], workspace = true } serde = { features = ["derive"], workspace = true } -sqlx = { workspace = true, features = [ - "runtime-tokio-rustls", - "postgres", -] } +sqlx = { workspace = true, features = ["runtime-tokio-rustls", "postgres"] } [build-dependencies] carbide-version = { path = "../version" } diff --git a/crates/dpf/Cargo.toml b/crates/dpf/Cargo.toml index 44892071e5..d8c9b4bad7 100644 --- a/crates/dpf/Cargo.toml +++ b/crates/dpf/Cargo.toml @@ -22,11 +22,11 @@ driver = ["dep:clap", "dep:tracing-subscriber", "dep:libredfish"] [dependencies] hex = { workspace = true } kube = { default-features = false, features = [ - "runtime", - "derive", - "client", - "rustls-tls", - "aws-lc-rs", + "runtime", + "derive", + "client", + "rustls-tls", + "aws-lc-rs", ], workspace = true } rustls = { workspace = true } k8s-openapi = { features = ["latest", "schemars"], workspace = true } @@ -50,7 +50,7 @@ carbide-uuid = { path = "../uuid", features = ["sqlx"] } # Optional dependencies for carbide-dpf-api-harness binary clap = { workspace = true, features = ["derive"], optional = true } tracing-subscriber = { workspace = true, features = [ - "env-filter", + "env-filter", ], optional = true } libredfish = { workspace = true, optional = true } diff --git a/crates/dpu-fmds-shared/Cargo.toml b/crates/dpu-fmds-shared/Cargo.toml index 456e54b12a..4e155900c3 100644 --- a/crates/dpu-fmds-shared/Cargo.toml +++ b/crates/dpu-fmds-shared/Cargo.toml @@ -31,9 +31,7 @@ axum = { workspace = true, features = ["macros"] } carbide-dpu-agent-utils = { path = "../dpu-agent-utils" } carbide-rpc = { path = "../rpc" } governor = { workspace = true } -reqwest = { default-features = false, features = [ - "rustls", -], workspace = true } +reqwest = { default-features = false, features = ["rustls"], workspace = true } serde = { features = ["derive"], workspace = true } serde_json = { workspace = true } tokio = { workspace = true } diff --git a/crates/fmds/Cargo.toml b/crates/fmds/Cargo.toml index 28b291bb1c..28b94aa453 100644 --- a/crates/fmds/Cargo.toml +++ b/crates/fmds/Cargo.toml @@ -54,7 +54,9 @@ netlink-packet-route = { workspace = true } nonzero_ext = { workspace = true } opentelemetry = { workspace = true } opentelemetry-prometheus = { workspace = true } -opentelemetry_sdk = { workspace = true, features = ["spec_unstable_metrics_views"] } +opentelemetry_sdk = { workspace = true, features = [ + "spec_unstable_metrics_views", +] } opentelemetry-semantic-conventions = { workspace = true, features = [ "semconv_experimental", ] } diff --git a/crates/ipmi/Cargo.toml b/crates/ipmi/Cargo.toml index 019db2c4a8..d1be4c7ec2 100644 --- a/crates/ipmi/Cargo.toml +++ b/crates/ipmi/Cargo.toml @@ -22,4 +22,3 @@ reqwest = { workspace = true, features = ["json"] } serde = { features = ["derive"], workspace = true } serde_json = { workspace = true } tracing = { workspace = true } - diff --git a/crates/libmlx/Cargo.toml b/crates/libmlx/Cargo.toml index c81155ddf1..0a568ca6f0 100644 --- a/crates/libmlx/Cargo.toml +++ b/crates/libmlx/Cargo.toml @@ -88,3 +88,8 @@ serde_yaml = "0.9" carbide-libmlx-model = { path = "../libmlx-model", features = ["test-support"] } carbide-ssh = { path = "../ssh", features = ["test_support"] } russh = { workspace = true } + +[features] +default = ["mockdpa"] +#default = [] +mockdpa = [] diff --git a/crates/libmlx/src/device/discovery.rs b/crates/libmlx/src/device/discovery.rs index 206e2cc2d8..4ba3a11be7 100644 --- a/crates/libmlx/src/device/discovery.rs +++ b/crates/libmlx/src/device/discovery.rs @@ -26,6 +26,9 @@ use tracing::{debug, warn}; use crate::device::filters::DeviceFilter; +#[cfg(feature = "mockdpa")] +const XML_CONTENT: &str = include_str!("mlxconfig.out"); + // DevicesXml represents the root XML structure // from mlxfwmanager output. #[derive(Debug, Deserialize)] @@ -114,16 +117,24 @@ pub fn discover_devices() -> Result, String> { )); } + #[cfg(not(feature = "mockdpa"))] { let xml_content = String::from_utf8_lossy(&output.stdout); warn!("mlxfwmanager XML output: {}", xml_content); parse_mlxfwmanager_xml(&xml_content) } + #[cfg(feature = "mockdpa")] + { + let xml_content = XML_CONTENT; + warn!("mlxfwmanager XML output: {}", xml_content); + parse_mlxfwmanager_xml(xml_content) + } } // discover_device loads a specific device using mlxfwmanager. // The actual XML returned is still "devices", but will only // contain the target device. +#[cfg(not(feature = "mockdpa"))] pub fn discover_device(device: &str) -> Result { debug!("Running mlxfwmanager to discover device: {device}"); @@ -162,6 +173,27 @@ pub fn discover_device(device: &str) -> Result { Ok(devices.into_iter().next().unwrap()) } +#[cfg(feature = "mockdpa")] +pub fn discover_device(device: &str) -> Result { + debug!("Running mlxfwmanager to discover device: {device}"); + + let xml_content = XML_CONTENT; + debug!("mlxfwmanager XML output: {}", xml_content); + + let devices = parse_mlxfwmanager_xml(xml_content)?; + if devices.is_empty() { + return Err(format!("no devices returned for device: {device}")); + } + + for dev in devices { + if dev.pci_name == device { + return Ok(dev); + } + } + + Err(format!("no devices found in loop for device: {device}")) +} + // discover_devices_with_filters finds devices that match // the specified filters. pub fn discover_devices_with_filters(filter: DeviceFilter) -> Result, String> { diff --git a/crates/libmlx/src/lockdown/lockdown.rs b/crates/libmlx/src/lockdown/lockdown.rs index d3018f5124..2856f21048 100644 --- a/crates/libmlx/src/lockdown/lockdown.rs +++ b/crates/libmlx/src/lockdown/lockdown.rs @@ -104,6 +104,7 @@ impl LockdownManager { } // lock_device locks hardware access on the specified device with the provided key. + #[cfg(not(feature = "mockdpa"))] pub fn lock_device(&self, device_id: &str, key: &str) -> MlxResult { FlintRunner::validate_device_id(device_id)?; @@ -112,7 +113,14 @@ impl LockdownManager { Ok(LockStatus::Locked) } + // lock_device locks hardware access on the specified device with the provided key. + #[cfg(feature = "mockdpa")] + pub fn lock_device(&self, _device_id: &str, _key: &str) -> MlxResult { + Ok(LockStatus::Locked) + } + // unlock_device unlocks hardware access on the specified device with the provided key. + #[cfg(not(feature = "mockdpa"))] pub fn unlock_device(&self, device_id: &str, key: &str) -> MlxResult { FlintRunner::validate_device_id(device_id)?; @@ -121,6 +129,12 @@ impl LockdownManager { Ok(LockStatus::Unlocked) } + // unlock_device unlocks hardware access on the specified device with the provided key. + #[cfg(feature = "mockdpa")] + pub fn unlock_device(&self, _device_id: &str, _key: &str) -> MlxResult { + Ok(LockStatus::Unlocked) + } + // get_status gets the current lock status of the specified device. pub fn get_status(&self, device_id: &str) -> MlxResult { FlintRunner::validate_device_id(device_id)?; diff --git a/crates/machine-a-tron/src/api_client.rs b/crates/machine-a-tron/src/api_client.rs index 6a40b48d95..e52de4326a 100644 --- a/crates/machine-a-tron/src/api_client.rs +++ b/crates/machine-a-tron/src/api_client.rs @@ -292,6 +292,7 @@ impl ApiClient { infiniband: None, dpu_extension_services: None, nvlink: None, + spxconfig: None, }; let instance_request = rpc::InstanceAllocationRequest { diff --git a/crates/metrics-endpoint/Cargo.toml b/crates/metrics-endpoint/Cargo.toml index 14a186a954..36f3404847 100644 --- a/crates/metrics-endpoint/Cargo.toml +++ b/crates/metrics-endpoint/Cargo.toml @@ -28,10 +28,10 @@ carbide-metrics-utils = { path = "../metrics-utils" } hyper = { workspace = true } hyper-util = { features = [ - "server", - "http1", - "http2", - "tokio", + "server", + "http1", + "http2", + "tokio", ], workspace = true } tokio = { workspace = true } tokio-util = { workspace = true } @@ -43,11 +43,11 @@ tracing = { workspace = true } eyre = { workspace = true } opentelemetry = { workspace = true } opentelemetry-semantic-conventions = { workspace = true, features = [ - "semconv_experimental", + "semconv_experimental", ] } opentelemetry-prometheus = { workspace = true } opentelemetry_sdk = { workspace = true, features = [ - "spec_unstable_metrics_views", + "spec_unstable_metrics_views", ] } [lints] diff --git a/crates/metrics-utils/Cargo.toml b/crates/metrics-utils/Cargo.toml index 7b18d22a13..847835500a 100644 --- a/crates/metrics-utils/Cargo.toml +++ b/crates/metrics-utils/Cargo.toml @@ -6,7 +6,9 @@ license.workspace = true authors.workspace = true [dependencies] -opentelemetry_sdk = { workspace = true, features = ["spec_unstable_metrics_views"] } +opentelemetry_sdk = { workspace = true, features = [ + "spec_unstable_metrics_views", +] } glob = { workspace = true } thiserror = { workspace = true } tracing = { workspace = true } diff --git a/crates/redfish/Cargo.toml b/crates/redfish/Cargo.toml index 27baa342f5..e48d275535 100644 --- a/crates/redfish/Cargo.toml +++ b/crates/redfish/Cargo.toml @@ -11,7 +11,7 @@ name = "carbide_redfish" [features] default = [] -test-support = [ "dep:chrono", "dep:serde_json", "dep:tokio" ] +test-support = ["dep:chrono", "dep:serde_json", "dep:tokio"] [dependencies] bmc-vendor = { path = "../bmc-vendor" } @@ -30,7 +30,7 @@ mac_address = { workspace = true } nv-redfish = { workspace = true, features = ["bmc-http", "oem-hpe"] } reqwest = { workspace = true, default-features = false } serde_json = { workspace = true, optional = true } -sqlx = { workspace = true, features = [ "postgres" ] } +sqlx = { workspace = true, features = ["postgres"] } thiserror = { workspace = true } tokio = { workspace = true, optional = true } tracing = { workspace = true } diff --git a/crates/rpc-utils/Cargo.toml b/crates/rpc-utils/Cargo.toml index 3de409f845..5354c5c8d3 100644 --- a/crates/rpc-utils/Cargo.toml +++ b/crates/rpc-utils/Cargo.toml @@ -32,7 +32,7 @@ chrono = { workspace = true } eyre = { workspace = true } ipnetwork = { workspace = true } itertools = { workspace = true } -serde = { workspace = true, features = ["derive"] } +serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } thiserror = { workspace = true } tonic = { workspace = true } diff --git a/crates/rpc/Cargo.toml b/crates/rpc/Cargo.toml index 8c502c4883..5ba8ddb889 100644 --- a/crates/rpc/Cargo.toml +++ b/crates/rpc/Cargo.toml @@ -79,18 +79,18 @@ async-trait = { workspace = true } nonempty = { workspace = true } clap = { features = ["derive"], optional = true, workspace = true } sqlx = { workspace = true, features = [ - "runtime-tokio-rustls", - "mac_address", - "ipnetwork", - "uuid", - "migrate", - "postgres", - "chrono", - "macros", - "json", + "runtime-tokio-rustls", + "mac_address", + "ipnetwork", + "uuid", + "migrate", + "postgres", + "chrono", + "macros", + "json", ], optional = true } prettytable-rs = { optional = true, default-features = false, features = [ - "csv", + "csv", ], workspace = true } serde_yaml = { optional = true, workspace = true } data-encoding = { workspace = true } diff --git a/crates/rpc/build.rs b/crates/rpc/build.rs index 03ac72ab11..b9e60707fa 100644 --- a/crates/rpc/build.rs +++ b/crates/rpc/build.rs @@ -48,6 +48,7 @@ fn main() -> Result<(), Box> { .extern_path(".common.DomainId", "::carbide_uuid::domain::DomainId") .extern_path(".common.DpaInterfaceId", "::carbide_uuid::dpa_interface::DpaInterfaceId") .extern_path(".common.IBPartitionId", "::carbide_uuid::infiniband::IBPartitionId") + .extern_path(".common.SpxPartitionId", "::carbide_uuid::spx::SpxPartitionId") .extern_path(".common.InstanceId", "::carbide_uuid::instance::InstanceId") .extern_path(".common.MachineId", "::carbide_uuid::machine::MachineId") .extern_path(".common.MachineInterfaceId", "::carbide_uuid::machine::MachineInterfaceId") @@ -124,6 +125,14 @@ fn main() -> Result<(), Box> { "forge.InstanceInfinibandConfig", "#[derive(serde::Deserialize, serde::Serialize)]", ) + .type_attribute( + "forge.InstanceSpxConfig", + "#[derive(serde::Deserialize, serde::Serialize)]", + ) + .type_attribute( + "forge.InstanceSpxAttachment", + "#[derive(serde::Deserialize, serde::Serialize)]", + ) .type_attribute("forge.InstanceStorageConfig", "#[derive(serde::Serialize)]") .type_attribute( "forge.IpxeTemplateParameter", @@ -162,6 +171,8 @@ fn main() -> Result<(), Box> { .type_attribute("forge.DpuExtensionServiceObservabilityConfig.config", "#[derive(serde::Serialize, serde::Deserialize)]") .type_attribute("forge.DpuExtensionServiceObservabilityConfig", "#[derive(serde::Serialize, serde::Deserialize)]") .type_attribute("forge.DpuExtensionServiceObservability.configs", "#[derive(serde::Serialize, serde::Deserialize)]") + .type_attribute("forge.InstanceSpxStatus", "#[derive(serde::Serialize)]") + .type_attribute("forge.InstanceSpxAttachmentStatus", "#[derive(serde::Serialize)]") .type_attribute("forge.InstanceNVLinkConfig", "#[derive(serde::Deserialize, serde::Serialize)]") .type_attribute("forge.InstanceNVLinkGpuConfig", "#[derive(serde::Deserialize, serde::Serialize)]") .type_attribute("forge.InstanceNVLinkStatus", "#[derive(serde::Serialize)]") @@ -183,6 +194,8 @@ fn main() -> Result<(), Box> { .type_attribute("common.NVLinkPartitionId", "#[derive(serde::Serialize)]") .type_attribute("forge.MachineNVLinkInfo", "#[derive(serde::Serialize)]") .type_attribute("forge.NVLinkGpu", "#[derive(serde::Serialize)]") + .type_attribute("forge.MachineSpxStatusObservation", "#[derive(serde::Serialize)]") + .type_attribute("forge.MachineSpxAttachmentStatusObservation", "#[derive(serde::Serialize)]") .type_attribute( "forge.InstanceInterfaceStatus", "#[derive(serde::Serialize)]", @@ -297,10 +310,14 @@ fn main() -> Result<(), Box> { .type_attribute("forge.NetworkSegmentStatus", "#[derive(serde::Serialize)]") .type_attribute("forge.NetworkSegment", "#[derive(serde::Serialize)]") .type_attribute("forge.IBPartitionConfig", "#[derive(serde::Serialize)]") + .type_attribute("forge.SpxPartitionConfig", "#[derive(serde::Serialize)]") .type_attribute("forge.IBPartitionStatus", "#[derive(serde::Serialize)]") .type_attribute("forge.IBPartition", "#[derive(serde::Serialize)]") .type_attribute("forge.IBPartitionIdList", "#[derive(serde::Serialize)]") .type_attribute("forge.IBPartitionList", "#[derive(serde::Serialize)]") + .type_attribute("forge.SpxPartitionList", "#[derive(serde::Serialize)]") + .type_attribute("forge.SpxPartition", "#[derive(serde::Serialize)]") + .type_attribute("forge.SpxPartitionIdList", "#[derive(serde::Serialize)]") .type_attribute("forge.PowerOptionResponse", "#[derive(serde::Deserialize, serde::Serialize)]") .type_attribute("forge.PowerOptions", @@ -898,6 +915,10 @@ fn main() -> Result<(), Box> { ".common.IBPartitionId", "::carbide_uuid::infiniband::IBPartitionId", ), + ( + ".common.SpxPartitionId", + "::carbide_uuid::spx::SpxPartitionId", + ), (".common.InstanceId", "::carbide_uuid::instance::InstanceId"), ( ".common.NetworkSegmentId", diff --git a/crates/rpc/proto/common.proto b/crates/rpc/proto/common.proto index 8d934c705f..1343ab065e 100644 --- a/crates/rpc/proto/common.proto +++ b/crates/rpc/proto/common.proto @@ -98,6 +98,11 @@ message ComputeAllocationId { string value = 1; } + +message SpxPartitionId { + string value = 1; +} + message OperatingSystemId { string value = 1; } diff --git a/crates/rpc/proto/dpa_rpc.proto b/crates/rpc/proto/dpa_rpc.proto index f268d28a76..d5c112bdbb 100644 --- a/crates/rpc/proto/dpa_rpc.proto +++ b/crates/rpc/proto/dpa_rpc.proto @@ -2,6 +2,13 @@ syntax = "proto3"; package dpa_rpc; +// This file contains the format of the messages exchanged between +// Carbide and the SVPC agent running on the DPA. This exchange +// happens via an MQTT broker. + +// Currently, we have only one message - the SetVNI message that is +// sent as a command, and is received as an acknowledgement. + // This should be sent to topic dpa/command//SetVni // Example for topic: dpa/command/001122334455/SetVni @@ -11,16 +18,17 @@ message SetVNI { } message DpaMetadata { - string dpa_id = 1; + string dpa_id = 1; // The MAC address of the NIC string host_id = 2; - string revision = 3; + string revision = 3; // Revision string or NIL in case of heartbeat string transaction = 4; } +// Currently, SVPC supports only pfs and not vfs message PFVNI { - int32 pf_id = 1; + int32 pf_id = 1; // Set to 0 string mac = 2; - int32 vni = 3; + int32 vni = 3; // Actual vni if in tenancy; 0 otherwise string subnet_ip = 4; int32 subnet_mask = 5; string dhcp_ip = 6; diff --git a/crates/rpc/proto/forge.proto b/crates/rpc/proto/forge.proto index 5b1783c71a..4d934bd013 100644 --- a/crates/rpc/proto/forge.proto +++ b/crates/rpc/proto/forge.proto @@ -53,6 +53,11 @@ service Forge { rpc FindVpcIds(VpcSearchFilter) returns (VpcIdList); rpc FindVpcsByIds(VpcsByIdsRequest) returns (VpcList); + rpc CreateSpxPartition(SpxPartitionCreationRequest) returns (SpxPartition); + rpc DeleteSpxPartition(SpxPartitionDeletionRequest) returns (SpxPartitionDeletionResult); + rpc FindSpxPartitionIds(SpxPartitionSearchFilter) returns (SpxPartitionIdList); + rpc FindSpxPartitionsByIds(SpxPartitionsByIdsRequest) returns (SpxPartitionList); + // VPC prefixes rpc CreateVpcPrefix(VpcPrefixCreationRequest) returns (VpcPrefix); rpc SearchVpcPrefixes(VpcPrefixSearchQuery) returns (VpcPrefixIdList); @@ -712,7 +717,6 @@ service Forge { rpc CreateDpaInterface(DpaInterfaceCreationRequest) returns (DpaInterface); rpc EnsureDpaInterface(DpaInterfaceCreationRequest) returns (DpaInterface); rpc DeleteDpaInterface(DpaInterfaceDeletionRequest) returns (DpaInterfaceDeletionResult); - rpc SetDpaNetworkObservationStatus(DpaNetworkObservationSetRequest) returns (DpaInterface); // Power Options rpc GetPowerOptions(PowerOptionRequest) returns (PowerOptionResponse); @@ -2662,6 +2666,7 @@ message InstanceConfig { // Configures instance nvlink partitions InstanceNVLinkConfig nvlink = 24; + InstanceSpxConfig spxconfig = 25; } // Desired network configuration for an instance @@ -2711,6 +2716,24 @@ message InstanceNVLinkConfig { repeated InstanceNVLinkGpuConfig gpu_configs = 1; } +message InstanceSpxConfig { + repeated InstanceSpxAttachment spx_attachments = 1; +} + +enum SpxAttachmentType { + Physical = 0; + Virtual = 1; + Ovn = 2; // This is WIP. Specification for this not yet finalized in Astra Spec +} + +message InstanceSpxAttachment { + string device = 1; + uint32 device_instance = 2; + common.SpxPartitionId spx_partition_id = 3; + SpxAttachmentType attachment_type = 4; + optional uint32 virtual_function_id = 5; // Needed only if AttachmentType is Virtual +} + // New configuration for the instances network interfaces // This completely overwrites the existing interfaces, so it would be up on the caller // to make sure the new settings are consistent and old interfaces that should not be @@ -2797,6 +2820,21 @@ message InstanceStatus { optional InstanceUpdateStatus update = 102; InstanceNVLinkStatus nvlink = 103; + + InstanceSpxStatus spx_status = 104; +} + +message InstanceSpxStatus { + repeated InstanceSpxAttachmentStatus attachment_statuses = 1; + SyncState configs_synced = 101; +} + +message InstanceSpxAttachmentStatus { + optional string mac_addr = 1; + uint32 virtual_function_id = 2; + optional string ip_address = 3; + SpxAttachmentType attachment_type = 4; + common.SpxPartitionId spx_partition_id = 5; } // State of the networking subsystem of an instance @@ -3413,6 +3451,8 @@ message Machine { optional common.RackId rack_id = 45; optional PlacementInRack placement_in_rack = 46; + + optional MachineSpxStatusObservation spx_status_observation = 47; } message InstanceNetworkRestrictions { @@ -6698,7 +6738,6 @@ message DpaInterface { string controller_state = 7; string controller_state_version = 8; string controller_state_outcome = 9; - string network_status_observation = 10; string network_config = 11; string network_config_version = 12; @@ -6723,6 +6762,8 @@ message DpaInterface { // back to its default values before allocating to the // next tenant. optional string mlxconfig_profile = 19; + + optional string device_description = 20; } @@ -6731,6 +6772,7 @@ message DpaInterfaceCreationRequest { string mac_addr = 2; string device_type = 3; string pci_name = 4; + optional string device_description = 5; } message DpaInterfaceIdList { @@ -7060,6 +7102,19 @@ message UpdateMachineNvLinkInfoRequest { MachineNVLinkInfo nvlink_info = 2; } +message MachineSpxStatusObservation { + repeated MachineSpxAttachmentStatusObservation attachment_status = 1; + optional google.protobuf.Timestamp observed_at = 2; +} + +message MachineSpxAttachmentStatusObservation { + string mac_address = 1; + optional common.SpxPartitionId partition_id = 2; + optional SpxAttachmentType attachment_type = 3; + optional uint32 virtual_function_id = 4; + optional google.protobuf.Timestamp observed_at = 5; +} + message NVLinkGpu { string nmx_m_id = 1; int32 tray_index = 2; @@ -8132,6 +8187,45 @@ message ListComponentFirmwareVersionsResponse { repeated DeviceFirmwareVersions devices = 1; } +message SpxPartitionCreationRequest { + Metadata metadata = 1; + optional common.SpxPartitionId id = 2; + optional uint32 vni = 3; + string tenantOrganizationId = 4; +} + +message SpxPartition { + Metadata metadata = 1; + common.SpxPartitionId id = 2; + uint32 vni = 3; + string tenantOrganizationId = 4; +} + +message SpxPartitionIdList { + repeated common.SpxPartitionId spx_partition_ids = 1; +} + +message SpxPartitionDeletionRequest { + common.SpxPartitionId id = 1; +} + +message SpxPartitionDeletionResult { +} + +message SpxPartitionSearchFilter { + optional string name = 1; + optional string tenant_org_id = 2; + optional Label label = 3; +} + +message SpxPartitionList { + repeated SpxPartition spx_partitions = 1; +} + +message SpxPartitionsByIdsRequest { + repeated common.SpxPartitionId spx_partition_ids = 1; +} + // Force deletes a Switch and optionally its associated interfaces. message AdminForceDeleteSwitchRequest { // The Switch ID to force delete. diff --git a/crates/rpc/src/lib.rs b/crates/rpc/src/lib.rs index 33626ccad0..7ea4e0d961 100644 --- a/crates/rpc/src/lib.rs +++ b/crates/rpc/src/lib.rs @@ -563,6 +563,15 @@ impl FromStr for forge::InstanceNvLinkConfig { } } +impl FromStr for forge::InstanceSpxConfig { + type Err = RpcDataConversionError; + + fn from_str(s: &str) -> Result { + serde_json::from_str(s) + .map_err(|e| RpcDataConversionError::JsonConversionFailure(e.to_string())) + } +} + /* ****************************************************** */ // Serialization/deserialization helpers for network // security group enums to let admin CLI callers describe diff --git a/crates/rvs/src/client/io.rs b/crates/rvs/src/client/io.rs index 3403196dc0..685eb93fff 100644 --- a/crates/rvs/src/client/io.rs +++ b/crates/rvs/src/client/io.rs @@ -110,6 +110,7 @@ impl NiccClient { network_security_group_id: None, dpu_extension_services: None, nvlink: None, + spxconfig: None, }), instance_id: None, instance_type_id: None, diff --git a/crates/scout/Cargo.toml b/crates/scout/Cargo.toml index 94fffb1df3..563db416c7 100644 --- a/crates/scout/Cargo.toml +++ b/crates/scout/Cargo.toml @@ -77,5 +77,10 @@ axum = { workspace = true } [build-dependencies] carbide-version = { path = "../version" } +[features] +#default = [] +default = ["mockdpa"] +mockdpa = [] + [lints] workspace = true diff --git a/crates/scout/src/mlx_device.rs b/crates/scout/src/mlx_device.rs index 0dcabec953..1e500c56df 100644 --- a/crates/scout/src/mlx_device.rs +++ b/crates/scout/src/mlx_device.rs @@ -30,6 +30,7 @@ use libmlx::lockdown::lockdown::{LockdownManager, StatusReport}; use libmlx::profile::error::MlxProfileError; use libmlx::profile::serialization::SerializableProfile; use libmlx::registry::registries; +#[cfg(not(feature = "mockdpa"))] use libmlx::runner::applier::MlxConfigApplier; use libmlx::runner::result_types::{ComparisonResult, SyncResult}; use libmlx::runner::runner::MlxConfigRunner; @@ -1120,6 +1121,7 @@ pub async fn apply_firmware( // // Returns the profile name (if any) and whether the operation // succeeded, for reporting back via MlxObservation. +#[cfg(not(feature = "mockdpa"))] pub(crate) fn apply_profile( device: &str, profile: Option, @@ -1165,6 +1167,21 @@ pub(crate) fn apply_profile( } } +#[cfg(feature = "mockdpa")] +pub(crate) fn apply_profile( + _device: &str, + profile: Option, +) -> (Option, Option) { + // If a profile was provided, sync it after the reset. + let Some(profile) = profile else { + return (None, Some(true)); + }; + + let name = profile.name; + + (Some(name), Some(true)) +} + // load_and_sync_profile loads a profile from data and syncs it to the device. fn load_and_sync_profile( device_id: &str, diff --git a/crates/site-explorer/Cargo.toml b/crates/site-explorer/Cargo.toml index 304e9a88ab..374c380810 100644 --- a/crates/site-explorer/Cargo.toml +++ b/crates/site-explorer/Cargo.toml @@ -42,7 +42,7 @@ rand = { workspace = true } regex = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } -sqlx = { workspace = true, features = [ "postgres" ] } +sqlx = { workspace = true, features = ["postgres"] } tokio = { workspace = true } tokio-util = { workspace = true } tracing = { workspace = true } diff --git a/crates/ssh-console/Cargo.toml b/crates/ssh-console/Cargo.toml index 4a294fa961..9e7d1d858d 100644 --- a/crates/ssh-console/Cargo.toml +++ b/crates/ssh-console/Cargo.toml @@ -41,10 +41,10 @@ bytes = { workspace = true } ctor = { workspace = true } lazy_static = { workspace = true } tokio = { workspace = true, features = [ - "tokio-macros", - "rt-multi-thread", - "sync", - "macros", + "tokio-macros", + "rt-multi-thread", + "sync", + "macros", ] } tracing = { workspace = true } tracing-subscriber = { features = ["env-filter"], workspace = true } diff --git a/crates/state-controller/Cargo.toml b/crates/state-controller/Cargo.toml index 132f435431..7c395ea78d 100644 --- a/crates/state-controller/Cargo.toml +++ b/crates/state-controller/Cargo.toml @@ -50,10 +50,10 @@ rand = { workspace = true } serde = { features = ["derive"], workspace = true } serde_json = { workspace = true } sqlx = { workspace = true, features = [ - "runtime-tokio-rustls", - "postgres", - "chrono", - "macros", + "runtime-tokio-rustls", + "postgres", + "chrono", + "macros", ] } thiserror = { workspace = true } tokio = { workspace = true } diff --git a/crates/utils/Cargo.toml b/crates/utils/Cargo.toml index ee136312f3..ed82c891cd 100644 --- a/crates/utils/Cargo.toml +++ b/crates/utils/Cargo.toml @@ -29,7 +29,7 @@ sqlx = ["carbide-network/sqlx", "dep:sqlx"] test-support = [ "dep:opentelemetry_sdk", "dep:prometheus", - "dep:opentelemetry-prometheus" + "dep:opentelemetry-prometheus", ] [dependencies] diff --git a/crates/uuid/src/lib.rs b/crates/uuid/src/lib.rs index 9c06175336..41e8f28b5e 100644 --- a/crates/uuid/src/lib.rs +++ b/crates/uuid/src/lib.rs @@ -35,6 +35,7 @@ pub mod nvlink; pub mod operating_system; pub mod power_shelf; pub mod rack; +pub mod spx; pub mod switch; pub mod typed_uuids; pub mod vpc; diff --git a/crates/uuid/src/spx/mod.rs b/crates/uuid/src/spx/mod.rs new file mode 100644 index 0000000000..eea95cad94 --- /dev/null +++ b/crates/uuid/src/spx/mod.rs @@ -0,0 +1,46 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +use crate::typed_uuids::{TypedUuid, UuidSubtype}; + +/// Marker type for SpxPartitionId. +pub struct SpxPartitionIdMarker; + +impl UuidSubtype for SpxPartitionIdMarker { + const TYPE_NAME: &'static str = "SpxPartitionId"; +} + +/// SpxPartitionId is a strongly typed UUID specific to an +/// Spx partition ID. +pub type SpxPartitionId = TypedUuid; + +pub const NULL_SPX_PARTITION_ID: SpxPartitionId = SpxPartitionId::nil(); + +#[cfg(test)] +mod tests { + use super::*; + use crate::typed_uuid_tests; + // Run all boilerplate TypedUuid tests for this type, also + // ensuring TYPE_NAME and DB_COLUMN_NAME test correctly. + typed_uuid_tests!(SpxPartitionId, "SpxPartitionId", "id"); + + #[test] + fn test_null_constant() { + assert_eq!(NULL_SPX_PARTITION_ID, SpxPartitionId::nil()); + assert_eq!(uuid::Uuid::from(NULL_SPX_PARTITION_ID), uuid::Uuid::nil()); + } +} diff --git a/deploy/nico-base/api/config-files/nico-api-config.toml b/deploy/nico-base/api/config-files/nico-api-config.toml index d8ea964ca5..b9b289869a 100644 --- a/deploy/nico-base/api/config-files/nico-api-config.toml +++ b/deploy/nico-base/api/config-files/nico-api-config.toml @@ -49,7 +49,7 @@ autoupdate = true [machine_identity] enabled = true current_encryption_key_id = "kv1" # must match a key under machine_identity.encryption_keys in credentials -algorithm = "ES256" # must match tenant identity signing alg (ES256 only) +algorithm = "ES256" # must match tenant identity signing alg (ES256 only) [tls] identity_pemfile_path = "/var/run/secrets/spiffe.io/tls.crt" diff --git a/helm/charts/nico-api/files/carbide-api-config.toml b/helm/charts/nico-api/files/carbide-api-config.toml index ae2f7a3b78..3b527f189c 100644 --- a/helm/charts/nico-api/files/carbide-api-config.toml +++ b/helm/charts/nico-api/files/carbide-api-config.toml @@ -47,7 +47,7 @@ autoupdate = true [machine_identity] enabled = true current_encryption_key_id = "kv1" # must match a key under machine_identity.encryption_keys in credentials -algorithm = "ES256" # must match tenant identity signing alg (ES256 only) +algorithm = "ES256" # must match tenant identity signing alg (ES256 only) [tls] identity_pemfile_path = "/var/run/secrets/spiffe.io/tls.crt" diff --git a/helm/charts/nico-bmc-proxy/files/carbide-bmc-proxy.toml b/helm/charts/nico-bmc-proxy/files/carbide-bmc-proxy.toml index 52c011fb64..67d16ca205 100644 --- a/helm/charts/nico-bmc-proxy/files/carbide-bmc-proxy.toml +++ b/helm/charts/nico-bmc-proxy/files/carbide-bmc-proxy.toml @@ -11,10 +11,7 @@ admin_root_cafile_path = "/etc/forge/carbide-bmc-proxy/site/admin_root_cert_pem" [auth.trust] spiffe_trust_domain = "nico.local" -spiffe_service_base_paths = [ - "/nico-system/sa/", - "/default/sa/", -] +spiffe_service_base_paths = ["/nico-system/sa/", "/default/sa/"] spiffe_machine_base_path = "/nico-system/machine/" additional_issuer_cns = [] diff --git a/pxe/Makefile.toml b/pxe/Makefile.toml index aea3634e71..a94309d4b4 100644 --- a/pxe/Makefile.toml +++ b/pxe/Makefile.toml @@ -959,7 +959,9 @@ cwd = "/tmp/bfb-dump" [tasks.bfb-extract-efi-sa] category = "iPXE Kernel" description = "Extract the BFB initramfs using fakeroot" -script = ["gzip -d < /tmp/bfb-dump/dump-initramfs-v0 | fakeroot -s /tmp/bfb-dump/fakeroot.env cpio -id"] +script = [ + "gzip -d < /tmp/bfb-dump/dump-initramfs-v0 | fakeroot -s /tmp/bfb-dump/fakeroot.env cpio -id", +] cwd = "${BUILD_LOCATION}" [tasks.bfb-extract-efi] From c7f7a378fd6b6ca7f70893b1e6597db0057ee565 Mon Sep 17 00:00:00 2001 From: Srinivasa Murthy Date: Wed, 20 May 2026 22:04:09 +0000 Subject: [PATCH 2/3] feat: Changes to svpc/dpa to allow Spectrum X partitioning and allow flexible tenant configuration Signed-off-by: Srinivasa Murthy --- crates/api-test-helper/src/api_server.rs | 7 +++++++ crates/api/src/dpa_monitor/mod.rs | 13 +++++++++---- crates/api/src/handlers/spx_partition.rs | 9 +++++---- crates/libmlx/Cargo.toml | 4 ++-- crates/rpc/proto/forge.proto | 4 ++-- crates/scout/Cargo.toml | 4 ++-- 6 files changed, 27 insertions(+), 14 deletions(-) diff --git a/crates/api-test-helper/src/api_server.rs b/crates/api-test-helper/src/api_server.rs index 8229112fe2..e8dbdeb95e 100644 --- a/crates/api-test-helper/src/api_server.rs +++ b/crates/api-test-helper/src/api_server.rs @@ -146,6 +146,13 @@ pub async fn start( start = "1024500" end = "1024550" + [pools.dpa-vni] + type = "integer" + + [[pools.dpa-vni.ranges]] + start = "1024600" + end = "1024650" + [pools.vpc-dpu-lo] type = "ipv4" prefix = "10.181.62.1/26" diff --git a/crates/api/src/dpa_monitor/mod.rs b/crates/api/src/dpa_monitor/mod.rs index a479045ce3..210cd7f209 100644 --- a/crates/api/src/dpa_monitor/mod.rs +++ b/crates/api/src/dpa_monitor/mod.rs @@ -319,22 +319,27 @@ impl DpaMonitor { )); } - let mut txn = - db_services.db_pool.begin().await.map_err(|e| { - db::AnnotatedSqlxError::new("reconcile_assigned_state begin txn", e) - })?; if this_nic_configured_attachments.is_empty() { if !this_nic_observed_attachments.is_empty() { need_deletion = true; } } else { + let mut txn = + db_services.db_pool.begin().await.map_err(|e| { + db::AnnotatedSqlxError::new("reconcile_assigned_state begin txn", e) + })?; let partition_id = this_nic_configured_attachments.remove(0).spx_partition_id; let partition = db::spx_partition::find_by( txn.as_mut(), ObjectColumnFilter::List(db::spx_partition::IdColumn, &[partition_id]), ) .await?; + + txn.commit().await.map_err(|e| { + db::AnnotatedSqlxError::new("reconcile_assigned_state commit txn", e) + })?; + if partition.len() != 1 { tracing::error!( "reconcile_assigned_state SPX partition {partition_id} is not found" diff --git a/crates/api/src/handlers/spx_partition.rs b/crates/api/src/handlers/spx_partition.rs index c2b8f4c581..32acb27108 100644 --- a/crates/api/src/handlers/spx_partition.rs +++ b/crates/api/src/handlers/spx_partition.rs @@ -111,20 +111,21 @@ pub(crate) async fn delete( .id .ok_or_else(|| CarbideError::MissingArgument("id"))?; - let mut txn = api.txn_begin().await?; - let resp = api .with_txn(|txn| db::spx_partition::mark_as_deleted(id, txn).boxed()) .await? .map_err(CarbideError::from)?; if let Some(vni) = resp.vni { + + let mut txn = api.txn_begin().await?; + db::resource_pool::release(&api.common_pools.ethernet.pool_dpa_vni, &mut txn, vni) .await .map_err(CarbideError::from)?; - } - txn.commit().await?; + txn.commit().await?; + } Ok(Response::new(rpc::SpxPartitionDeletionResult {})) } diff --git a/crates/libmlx/Cargo.toml b/crates/libmlx/Cargo.toml index 0a568ca6f0..e911dde111 100644 --- a/crates/libmlx/Cargo.toml +++ b/crates/libmlx/Cargo.toml @@ -90,6 +90,6 @@ carbide-ssh = { path = "../ssh", features = ["test_support"] } russh = { workspace = true } [features] -default = ["mockdpa"] -#default = [] +#default = ["mockdpa"] +default = [] mockdpa = [] diff --git a/crates/rpc/proto/forge.proto b/crates/rpc/proto/forge.proto index 72a6b96c23..1a2242dfa7 100644 --- a/crates/rpc/proto/forge.proto +++ b/crates/rpc/proto/forge.proto @@ -8231,14 +8231,14 @@ message SpxPartitionCreationRequest { Metadata metadata = 1; optional common.SpxPartitionId id = 2; optional uint32 vni = 3; - string tenantOrganizationId = 4; + string tenant_organization_id = 4; } message SpxPartition { Metadata metadata = 1; common.SpxPartitionId id = 2; uint32 vni = 3; - string tenantOrganizationId = 4; + string tenant_organization_id = 4; } message SpxPartitionIdList { diff --git a/crates/scout/Cargo.toml b/crates/scout/Cargo.toml index 563db416c7..cc28995899 100644 --- a/crates/scout/Cargo.toml +++ b/crates/scout/Cargo.toml @@ -78,8 +78,8 @@ axum = { workspace = true } carbide-version = { path = "../version" } [features] -#default = [] -default = ["mockdpa"] +default = [] +#default = ["mockdpa"] mockdpa = [] [lints] From cc4af53f49f4e8513d2e59d065a4bbc722376864 Mon Sep 17 00:00:00 2001 From: Srinivasa Murthy Date: Thu, 21 May 2026 19:29:23 +0000 Subject: [PATCH 3/3] feat: Changes to svpc/dpa to allow Spectrum X partitioning and allow flexible tenant configuration Signed-off-by: Srinivasa Murthy --- crates/admin-cli/src/cfg/cli_options.rs | 6 +++--- .../admin-cli/src/instance/update_spx_config/cmd.rs | 3 +-- .../admin-cli/src/instance/update_spx_config/mod.rs | 2 +- crates/admin-cli/src/spx_partition/show/cmd.rs | 2 +- crates/admin-cli/src/spx_partition/show/mod.rs | 2 +- crates/api/src/dpa_monitor/mod.rs | 12 +++++++----- crates/api/src/handlers/spx_partition.rs | 1 - crates/libnmxc/Cargo.toml | 7 ++++++- rest-api/.revive.toml | 2 +- 9 files changed, 21 insertions(+), 16 deletions(-) diff --git a/crates/admin-cli/src/cfg/cli_options.rs b/crates/admin-cli/src/cfg/cli_options.rs index 72d575e81b..9a6c250a40 100644 --- a/crates/admin-cli/src/cfg/cli_options.rs +++ b/crates/admin-cli/src/cfg/cli_options.rs @@ -25,9 +25,9 @@ use crate::{ machine_interfaces, machine_validation, managed_host, managed_switch, mlx, network_devices, network_security_group, network_segment, nvl_logical_partition, nvl_partition, nvlink_nmxc_endpoints, operating_system, os_image, ping, power_shelf, rack, rack_firmware, - redfish, resource_pool, rms, route_server, scout_stream, set, site_explorer, sku, ssh, switch, - tenant, tenant_keyset, tpm_ca, trim_table, version, vpc, vpc_peering, vpc_prefix, - spx_partition, + redfish, resource_pool, rms, route_server, scout_stream, set, site_explorer, sku, + spx_partition, ssh, switch, tenant, tenant_keyset, tpm_ca, trim_table, version, vpc, + vpc_peering, vpc_prefix, }; #[derive(Parser, Debug)] diff --git a/crates/admin-cli/src/instance/update_spx_config/cmd.rs b/crates/admin-cli/src/instance/update_spx_config/cmd.rs index 306a5dd1f8..c293d67d6b 100644 --- a/crates/admin-cli/src/instance/update_spx_config/cmd.rs +++ b/crates/admin-cli/src/instance/update_spx_config/cmd.rs @@ -15,9 +15,8 @@ * limitations under the License. */ -use crate::errors::{CarbideCliError, CarbideCliResult}; - use super::args::Args; +use crate::errors::{CarbideCliError, CarbideCliResult}; use crate::instance::common::GlobalOptions; use crate::rpc::ApiClient; diff --git a/crates/admin-cli/src/instance/update_spx_config/mod.rs b/crates/admin-cli/src/instance/update_spx_config/mod.rs index 0cbd20e689..e2e76ad679 100644 --- a/crates/admin-cli/src/instance/update_spx_config/mod.rs +++ b/crates/admin-cli/src/instance/update_spx_config/mod.rs @@ -18,12 +18,12 @@ pub mod args; pub mod cmd; -use crate::errors::{CarbideCliResult}; pub use args::Args; use super::common::GlobalOptions; use crate::cfg::run::Run; use crate::cfg::runtime::RuntimeContext; +use crate::errors::CarbideCliResult; impl Run for Args { async fn run(self, ctx: &mut RuntimeContext) -> CarbideCliResult<()> { diff --git a/crates/admin-cli/src/spx_partition/show/cmd.rs b/crates/admin-cli/src/spx_partition/show/cmd.rs index f26538c9fd..ee92c1caa9 100644 --- a/crates/admin-cli/src/spx_partition/show/cmd.rs +++ b/crates/admin-cli/src/spx_partition/show/cmd.rs @@ -17,13 +17,13 @@ use std::fmt::Write; -use crate::errors::{CarbideCliError, CarbideCliResult}; use ::rpc::admin_cli::OutputFormat; use ::rpc::forge as forgerpc; use carbide_uuid::spx::SpxPartitionId; use prettytable::{Table, row}; use super::args::Args; +use crate::errors::{CarbideCliError, CarbideCliResult}; use crate::rpc::ApiClient; pub async fn show( diff --git a/crates/admin-cli/src/spx_partition/show/mod.rs b/crates/admin-cli/src/spx_partition/show/mod.rs index 964ad926ea..744cc237d4 100644 --- a/crates/admin-cli/src/spx_partition/show/mod.rs +++ b/crates/admin-cli/src/spx_partition/show/mod.rs @@ -18,11 +18,11 @@ pub mod args; pub mod cmd; -use crate::errors::{CarbideCliResult}; pub use args::Args; use crate::cfg::run::Run; use crate::cfg::runtime::RuntimeContext; +use crate::errors::CarbideCliResult; impl Run for Args { async fn run(self, ctx: &mut RuntimeContext) -> CarbideCliResult<()> { diff --git a/crates/api/src/dpa_monitor/mod.rs b/crates/api/src/dpa_monitor/mod.rs index 210cd7f209..1ae2fbd0ae 100644 --- a/crates/api/src/dpa_monitor/mod.rs +++ b/crates/api/src/dpa_monitor/mod.rs @@ -256,6 +256,8 @@ impl DpaMonitor { // or remove the NIC from any partition. // The desired state will be in instance.spx_config field. The observed state will be in the // NIC's network_status_observation field. + // Currently, we only support one attachment per NIC. This routine will have to be changed + // when we start supporting multiple attachments per NIC. #[allow(clippy::too_many_arguments)] async fn reconcile_assigned_state<'a>( &mut self, @@ -319,16 +321,14 @@ impl DpaMonitor { )); } - if this_nic_configured_attachments.is_empty() { if !this_nic_observed_attachments.is_empty() { need_deletion = true; } } else { - let mut txn = - db_services.db_pool.begin().await.map_err(|e| { - db::AnnotatedSqlxError::new("reconcile_assigned_state begin txn", e) - })?; + let mut txn = db_services.db_pool.begin().await.map_err(|e| { + db::AnnotatedSqlxError::new("reconcile_assigned_state begin txn", e) + })?; let partition_id = this_nic_configured_attachments.remove(0).spx_partition_id; let partition = db::spx_partition::find_by( txn.as_mut(), @@ -412,6 +412,8 @@ impl DpaMonitor { // This function will be called when the DPA object is in Ready state. // We need to make sure that the partitioning configuration of the NIC is in sync with // the desired state. + // Currently, we only support one attachment per NIC. This routine will have to be changed + // when we start supporting multiple attachments per NIC. async fn reconcile_ready_state<'a>( &mut self, machine: &Machine, diff --git a/crates/api/src/handlers/spx_partition.rs b/crates/api/src/handlers/spx_partition.rs index 32acb27108..9ae6f919c5 100644 --- a/crates/api/src/handlers/spx_partition.rs +++ b/crates/api/src/handlers/spx_partition.rs @@ -117,7 +117,6 @@ pub(crate) async fn delete( .map_err(CarbideError::from)?; if let Some(vni) = resp.vni { - let mut txn = api.txn_begin().await?; db::resource_pool::release(&api.common_pools.ethernet.pool_dpa_vni, &mut txn, vni) diff --git a/crates/libnmxc/Cargo.toml b/crates/libnmxc/Cargo.toml index 4ed9c25067..1230e45576 100644 --- a/crates/libnmxc/Cargo.toml +++ b/crates/libnmxc/Cargo.toml @@ -29,7 +29,12 @@ path = "src/lib.rs" [dependencies] async-trait = { workspace = true } http = { workspace = true } -tokio = { workspace = true, features = ["fs", "net", "time", "rt-multi-thread"] } +tokio = { workspace = true, features = [ + "fs", + "net", + "time", + "rt-multi-thread", +] } tonic = { workspace = true } prost = { workspace = true } serde = { workspace = true, features = ["derive"] } diff --git a/rest-api/.revive.toml b/rest-api/.revive.toml index 64496485f1..3f3b3dfa58 100644 --- a/rest-api/.revive.toml +++ b/rest-api/.revive.toml @@ -28,4 +28,4 @@ errorCode = 0 # Disable these rules [rule.package-comments] - Disabled = true +Disabled = true