diff --git a/Cargo.lock b/Cargo.lock index 1009dbe424..26148b2049 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6287,7 +6287,7 @@ dependencies = [ [[package]] name = "librms" version = "0.0.12" -source = "git+https://github.com/NVIDIA/nv-rms-client.git?tag=v0.0.12-rc1#94a7d944e80075cd57d04bdf0ebf2db497e205db" +source = "git+https://github.com/NVIDIA/nv-rms-client.git?tag=v0.0.12-rc3#88797e64f4fbf072b5844ed7b8d7e1e987d4aef6" dependencies = [ "async-trait", "chrono", @@ -6297,6 +6297,7 @@ dependencies = [ "hyper-timeout", "hyper-util", "prost", + "prost-types", "rustls", "rustls-pemfile", "serde", @@ -11314,7 +11315,7 @@ dependencies = [ [[package]] name = "tonic-client-wrapper" version = "1.0.0" -source = "git+https://github.com/NVIDIA/nv-rms-client.git?tag=v0.0.12-rc1#94a7d944e80075cd57d04bdf0ebf2db497e205db" +source = "git+https://github.com/NVIDIA/nv-rms-client.git?tag=v0.0.12-rc3#88797e64f4fbf072b5844ed7b8d7e1e987d4aef6" dependencies = [ "async-trait", "heck", diff --git a/Cargo.toml b/Cargo.toml index bf96404326..f26bc1481e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,7 +27,7 @@ authors = ["NVIDIA Carbide Engineering "] [workspace.dependencies] clap = { version = "4", features = ["derive", "env"] } libredfish = { git = "https://github.com/NVIDIA/libredfish.git", tag = "v0.44.3" } -librms = { git = "https://github.com/NVIDIA/nv-rms-client.git", tag = "v0.0.12-rc1" } +librms = { git = "https://github.com/NVIDIA/nv-rms-client.git", tag = "v0.0.12-rc3" } ansi-to-html = "0.2.2" tokio = { version = "1", features = ["full", "tracing"] } diff --git a/crates/admin-cli/src/cfg/cli_options.rs b/crates/admin-cli/src/cfg/cli_options.rs index 31a33c63e1..178371b273 100644 --- a/crates/admin-cli/src/cfg/cli_options.rs +++ b/crates/admin-cli/src/cfg/cli_options.rs @@ -24,9 +24,9 @@ use crate::{ ib_partition, instance, instance_type, inventory, ip, ipxe_template, jump, machine, machine_interfaces, machine_validation, managed_host, managed_switch, mlx, network_devices, network_security_group, network_segment, nvl_logical_partition, nvl_partition, - nvlink_nmxc_endpoints, operating_system, os_image, ping, power_shelf, rack, rack_firmware, - redfish, resource_pool, rms, route_server, scout_stream, set, site_explorer, sku, ssh, switch, - tenant, tenant_keyset, tpm_ca, trim_table, version, vpc, vpc_peering, vpc_prefix, + nvlink_nmxc_endpoints, operating_system, os_image, ping, power_shelf, rack, redfish, + resource_pool, rms, route_server, scout_stream, set, site_explorer, sku, ssh, switch, tenant, + tenant_keyset, tpm_ca, trim_table, version, vpc, vpc_peering, vpc_prefix, }; #[derive(Parser, Debug)] @@ -308,13 +308,6 @@ pub enum CliCommand { #[clap(about = "Rack Management", subcommand)] Rack(rack::Cmd), - #[clap( - about = "Rack Firmware configuration management", - subcommand, - visible_alias = "rack-fw" - )] - RackFirmware(rack_firmware::Cmd), - #[clap(about = "RMS Actions")] Rms(rms::args::RmsAction), diff --git a/crates/admin-cli/src/component_manager/update_firmware/args.rs b/crates/admin-cli/src/component_manager/update_firmware/args.rs index 70a6482f81..b62b1c3f8b 100644 --- a/crates/admin-cli/src/component_manager/update_firmware/args.rs +++ b/crates/admin-cli/src/component_manager/update_firmware/args.rs @@ -15,12 +15,15 @@ * limitations under the License. */ +use std::path::PathBuf; + use clap::{Args as ClapArgs, Parser, Subcommand}; use crate::component_manager::common::{ ComputeTrayComponentArg, MachineTargetArgs, NvSwitchComponentArg, PowerShelfComponentArg, PowerShelfTargetArgs, RackTargetArgs, SwitchTargetArgs, }; +use crate::errors::{CarbideCliError, CarbideCliResult}; #[derive(Parser, Debug)] pub struct Args { @@ -48,8 +51,11 @@ pub struct SwitchArgs { #[clap(flatten)] pub ids: SwitchTargetArgs, - #[clap(long = "target-version", help = "Firmware target version")] - pub target_version: String, + #[clap(flatten)] + pub firmware_source: FirmwareSourceArgs, + + #[clap(long = "force-update", help = "Force firmware update when supported")] + pub force_update: bool, #[clap( long = "component", @@ -68,6 +74,9 @@ pub struct PowerShelfArgs { #[clap(long = "target-version", help = "Firmware target version")] pub target_version: String, + #[clap(long = "force-update", help = "Force firmware update when supported")] + pub force_update: bool, + #[clap( long = "component", value_enum, @@ -82,8 +91,11 @@ pub struct ComputeTrayArgs { #[clap(flatten)] pub ids: MachineTargetArgs, - #[clap(long = "target-version", help = "Firmware target version")] - pub target_version: String, + #[clap(flatten)] + pub firmware_source: FirmwareSourceArgs, + + #[clap(long = "force-update", help = "Force firmware update when supported")] + pub force_update: bool, #[clap( long = "component", @@ -99,32 +111,114 @@ pub struct RackArgs { #[clap(flatten)] pub ids: RackTargetArgs, - #[clap(long = "target-version", help = "Firmware target version")] - pub target_version: String, + #[clap(flatten)] + pub firmware_source: FirmwareSourceArgs, + + #[clap(long = "force-update", help = "Force firmware update when supported")] + pub force_update: bool, } -impl From for rpc::forge::UpdateComponentFirmwareRequest { - fn from(args: Args) -> Self { +#[derive(ClapArgs, Debug)] +pub struct FirmwareSourceArgs { + #[clap( + long = "target-version", + help = "Firmware target version for legacy direct-update paths" + )] + pub target_version: Option, + + #[clap( + long = "sot-json-file", + value_name = "PATH", + help = "SOT JSON file for RMS ApplyFirmwareObjectFromJSON" + )] + pub sot_json_file: Option, + + #[clap( + long = "access-token", + help = "Artifact access token; required with --sot-json-file" + )] + pub access_token: Option, +} + +fn resolve_firmware_source( + source: FirmwareSourceArgs, +) -> CarbideCliResult<(String, Option)> { + match ( + source.target_version, + source.sot_json_file, + source.access_token, + ) { + (Some(_), Some(_), _) => Err(CarbideCliError::ChooseOneError( + "--target-version", + "--sot-json-file", + )), + (None, None, _) => Err(CarbideCliError::RequireOneError( + "--target-version", + "--sot-json-file", + )), + (Some(_), None, Some(_)) => Err(CarbideCliError::GenericError( + "--access-token requires --sot-json-file".to_string(), + )), + (Some(target_version), None, None) => { + if target_version.trim().is_empty() { + Err(CarbideCliError::GenericError( + "--target-version must not be empty".to_string(), + )) + } else { + Ok((target_version, None)) + } + } + (None, Some(sot_json_file), access_token) => { + let token = access_token.ok_or_else(|| { + CarbideCliError::GenericError( + "--access-token is required with --sot-json-file".to_string(), + ) + })?; + if token.trim().is_empty() { + return Err(CarbideCliError::GenericError( + "--access-token must not be empty".to_string(), + )); + } + + let config_json = std::fs::read_to_string(sot_json_file)?; + serde_json::from_str::(&config_json)?; + Ok((config_json, Some(token))) + } + } +} + +impl TryFrom for rpc::forge::UpdateComponentFirmwareRequest { + type Error = CarbideCliError; + + fn try_from(args: Args) -> CarbideCliResult { match args.target { - Target::Switch(target) => Self { - target_version: target.target_version, - target: Some( - rpc::forge::update_component_firmware_request::Target::Switches( - rpc::forge::UpdateSwitchFirmwareTarget { - switch_ids: Some(target.ids.into()), - components: target - .components - .into_iter() - .map(|component| { - rpc::forge::NvSwitchComponent::from(component) as i32 - }) - .collect(), - }, + Target::Switch(target) => { + let (target_version, access_token) = + resolve_firmware_source(target.firmware_source)?; + Ok(Self { + target_version, + access_token, + force_update: target.force_update, + target: Some( + rpc::forge::update_component_firmware_request::Target::Switches( + rpc::forge::UpdateSwitchFirmwareTarget { + switch_ids: Some(target.ids.into()), + components: target + .components + .into_iter() + .map(|component| { + rpc::forge::NvSwitchComponent::from(component) as i32 + }) + .collect(), + }, + ), ), - ), - }, - Target::PowerShelf(target) => Self { + }) + } + Target::PowerShelf(target) => Ok(Self { target_version: target.target_version, + access_token: None, + force_update: target.force_update, target: Some( rpc::forge::update_component_firmware_request::Target::PowerShelves( rpc::forge::UpdatePowerShelfFirmwareTarget { @@ -139,34 +233,120 @@ impl From for rpc::forge::UpdateComponentFirmwareRequest { }, ), ), - }, - Target::ComputeTray(target) => Self { - target_version: target.target_version, - target: Some( - rpc::forge::update_component_firmware_request::Target::ComputeTrays( - rpc::forge::UpdateComputeTrayFirmwareTarget { - machine_ids: Some(target.ids.into()), - components: target - .components - .into_iter() - .map(|component| { - rpc::forge::ComputeTrayComponent::from(component) as i32 - }) - .collect(), - }, + }), + Target::ComputeTray(target) => { + let (target_version, access_token) = + resolve_firmware_source(target.firmware_source)?; + Ok(Self { + target_version, + access_token, + force_update: target.force_update, + target: Some( + rpc::forge::update_component_firmware_request::Target::ComputeTrays( + rpc::forge::UpdateComputeTrayFirmwareTarget { + machine_ids: Some(target.ids.into()), + components: target + .components + .into_iter() + .map(|component| { + rpc::forge::ComputeTrayComponent::from(component) as i32 + }) + .collect(), + }, + ), ), - ), - }, - Target::Rack(target) => Self { - target_version: target.target_version, - target: Some( - rpc::forge::update_component_firmware_request::Target::Racks( - rpc::forge::UpdateRackFirmwareTarget { - rack_ids: Some(target.ids.into()), - }, + }) + } + Target::Rack(target) => { + let (target_version, access_token) = + resolve_firmware_source(target.firmware_source)?; + Ok(Self { + target_version, + access_token, + force_update: target.force_update, + target: Some( + rpc::forge::update_component_firmware_request::Target::Racks( + rpc::forge::UpdateFirmwareObjectTarget { + rack_ids: Some(target.ids.into()), + }, + ), ), - ), - }, + }) + } } } } + +#[cfg(test)] +mod tests { + use super::*; + + fn temp_sot_file(contents: &str) -> PathBuf { + let path = std::env::temp_dir().join(format!( + "bmm-sot-{}-{}.json", + std::process::id(), + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("system time before unix epoch") + .as_nanos() + )); + std::fs::write(&path, contents).expect("write test SOT JSON"); + path + } + + #[test] + fn target_version_source_uses_legacy_version() { + let (target_version, access_token) = resolve_firmware_source(FirmwareSourceArgs { + target_version: Some("fw-1.0".to_string()), + sot_json_file: None, + access_token: None, + }) + .expect("legacy source should resolve"); + + assert_eq!(target_version, "fw-1.0"); + assert_eq!(access_token, None); + } + + #[test] + fn sot_json_source_reads_file_and_sets_access_token() { + let path = temp_sot_file(r#"{"Id":"fw-object"}"#); + + let (target_version, access_token) = resolve_firmware_source(FirmwareSourceArgs { + target_version: None, + sot_json_file: Some(path.clone()), + access_token: Some("token".to_string()), + }) + .expect("SOT source should resolve"); + + assert_eq!(target_version, r#"{"Id":"fw-object"}"#); + assert_eq!(access_token.as_deref(), Some("token")); + let _ = std::fs::remove_file(path); + } + + #[test] + fn access_token_without_sot_json_file_is_rejected() { + let err = resolve_firmware_source(FirmwareSourceArgs { + target_version: Some("fw-1.0".to_string()), + sot_json_file: None, + access_token: Some("token".to_string()), + }) + .expect_err("access token without SOT JSON should fail"); + + assert!(err.to_string().contains("--access-token requires")); + } + + #[test] + fn invalid_sot_json_file_is_rejected() { + let path = temp_sot_file("not-json"); + + let err = resolve_firmware_source(FirmwareSourceArgs { + target_version: None, + sot_json_file: Some(path.clone()), + access_token: Some("token".to_string()), + }) + .expect_err("invalid SOT JSON should fail"); + + assert!(matches!(err, CarbideCliError::JsonError(_))); + let _ = std::fs::remove_file(path); + } +} diff --git a/crates/admin-cli/src/component_manager/update_firmware/cmd.rs b/crates/admin-cli/src/component_manager/update_firmware/cmd.rs index fe3755f012..c290bc600f 100644 --- a/crates/admin-cli/src/component_manager/update_firmware/cmd.rs +++ b/crates/admin-cli/src/component_manager/update_firmware/cmd.rs @@ -28,9 +28,10 @@ pub async fn update_firmware( format: OutputFormat, api_client: &ApiClient, ) -> Result<(), CarbideCliError> { + let request: rpc::forge::UpdateComponentFirmwareRequest = opts.try_into()?; let response = api_client .0 - .update_component_firmware(opts) + .update_component_firmware(request) .await .map_err(CarbideCliError::from)?; diff --git a/crates/admin-cli/src/main.rs b/crates/admin-cli/src/main.rs index 679d387cbd..c035143c4b 100644 --- a/crates/admin-cli/src/main.rs +++ b/crates/admin-cli/src/main.rs @@ -93,7 +93,6 @@ mod os_image; mod ping; mod power_shelf; mod rack; -mod rack_firmware; mod redfish; mod resource_pool; mod rms; @@ -272,7 +271,6 @@ async fn main() -> color_eyre::Result<()> { CliCommand::Vpc(cmd) => cmd.dispatch(ctx).await?, CliCommand::VpcPeering(cmd) => cmd.dispatch(ctx).await?, CliCommand::VpcPrefix(cmd) => cmd.dispatch(ctx).await?, - CliCommand::RackFirmware(cmd) => cmd.dispatch(ctx).await?, CliCommand::Dpf(cmd) => cmd.dispatch(ctx).await?, CliCommand::Redfish(action) => { if let redfish::Cmd::Browse(redfish::UriInfo { uri }) = &action.command { diff --git a/crates/admin-cli/src/rack/maintenance/args.rs b/crates/admin-cli/src/rack/maintenance/args.rs index b81379fc82..b2e10e1864 100644 --- a/crates/admin-cli/src/rack/maintenance/args.rs +++ b/crates/admin-cli/src/rack/maintenance/args.rs @@ -15,6 +15,8 @@ * limitations under the License. */ +use std::path::PathBuf; + use carbide_uuid::rack::RackId; use clap::Parser; @@ -63,10 +65,26 @@ pub struct MaintenanceOptions { #[clap( long, - help = "Target firmware version for firmware-upgrade activity (omit for RMS default)" + help = "Raw SOT JSON for firmware-upgrade activity (prefer --sot-json-file)" )] pub firmware_version: Option, + #[clap( + long = "sot-json-file", + value_name = "PATH", + help = "SOT JSON file for RMS ApplyFirmwareObjectFromJSON" + )] + pub sot_json_file: Option, + + #[clap( + long = "access-token", + help = "Artifact access token; required with --sot-json-file" + )] + pub access_token: Option, + + #[clap(long = "force-update", help = "Force firmware update when supported")] + pub force_update: bool, + #[clap( long, help = "Firmware components to update, e.g. BMC,CPLD,BIOS (omit for all components)", @@ -77,7 +95,7 @@ pub struct MaintenanceOptions { #[clap( long, - help = "Rack firmware ID containing the NVOS switch system image (omit for default)" + help = "Firmware object ID containing the NVOS switch system image (omit for default)" )] - pub rack_firmware_id: Option, + pub firmware_object_id: Option, } diff --git a/crates/admin-cli/src/rack/maintenance/cmd.rs b/crates/admin-cli/src/rack/maintenance/cmd.rs index 1211c5030d..21e1ecfc81 100644 --- a/crates/admin-cli/src/rack/maintenance/cmd.rs +++ b/crates/admin-cli/src/rack/maintenance/cmd.rs @@ -18,18 +18,89 @@ use ::rpc::forge as rpc; use super::args::MaintenanceOptions; -use crate::errors::CarbideCliResult; +use crate::errors::{CarbideCliError, CarbideCliResult}; use crate::rpc::ApiClient; +fn resolve_firmware_upgrade_source( + args: &MaintenanceOptions, +) -> CarbideCliResult<(String, Option)> { + let explicit_firmware_upgrade = args.activities.as_ref().is_some_and(|activities| { + activities + .iter() + .any(|activity| activity == "firmware-upgrade") + }); + + if args.firmware_version.is_some() && args.sot_json_file.is_some() { + return Err(CarbideCliError::ChooseOneError( + "--firmware-version", + "--sot-json-file", + )); + } + + let firmware_version = if let Some(path) = args.sot_json_file.as_ref() { + let config_json = std::fs::read_to_string(path)?; + serde_json::from_str::(&config_json)?; + config_json + } else { + args.firmware_version.clone().unwrap_or_default() + }; + + let access_token = args.access_token.as_ref().and_then(|token| { + if token.trim().is_empty() { + None + } else { + Some(token.clone()) + } + }); + + if args.sot_json_file.is_some() && access_token.is_none() { + return Err(CarbideCliError::GenericError( + "--access-token is required with --sot-json-file".to_string(), + )); + } + if explicit_firmware_upgrade && firmware_version.trim().is_empty() { + return Err(CarbideCliError::GenericError( + "--activities firmware-upgrade requires SOT JSON from --sot-json-file or --firmware-version" + .to_string(), + )); + } + if explicit_firmware_upgrade && access_token.is_none() { + return Err(CarbideCliError::GenericError( + "--activities firmware-upgrade requires --access-token".to_string(), + )); + } + if !explicit_firmware_upgrade && args.sot_json_file.is_some() { + return Err(CarbideCliError::GenericError( + "--sot-json-file requires --activities firmware-upgrade".to_string(), + )); + } + if !explicit_firmware_upgrade && args.firmware_version.is_some() { + return Err(CarbideCliError::GenericError( + "--firmware-version requires --activities firmware-upgrade".to_string(), + )); + } + if !explicit_firmware_upgrade && args.access_token.is_some() { + return Err(CarbideCliError::GenericError( + "--access-token requires --activities firmware-upgrade".to_string(), + )); + } + if access_token.is_some() && args.firmware_version.is_some() { + serde_json::from_str::(&firmware_version)?; + } + + Ok((firmware_version, access_token)) +} + pub async fn on_demand_rack_maintenance( api_client: &ApiClient, args: MaintenanceOptions, ) -> CarbideCliResult<()> { use rpc::maintenance_activity_config::Activity as ProtoActivity; - let firmware_version = args.firmware_version.unwrap_or_default(); + let (firmware_version, access_token) = resolve_firmware_upgrade_source(&args)?; let components = args.components.unwrap_or_default(); - let rack_firmware_id = args.rack_firmware_id.unwrap_or_default(); + let firmware_object_id = args.firmware_object_id.unwrap_or_default(); + let force_update = args.force_update; let activities: Vec = args .activities @@ -41,11 +112,13 @@ pub async fn on_demand_rack_maintenance( rpc::FirmwareUpgradeActivity { firmware_version: firmware_version.clone(), components: components.clone(), + access_token: access_token.clone(), + force_update, }, )), "nvos-update" => Ok(ProtoActivity::NvosUpdate( rpc::NvosUpdateActivity { - rack_firmware_id: rack_firmware_id.clone(), + firmware_object_id: firmware_object_id.clone(), }, )), "configure-nmx-cluster" => Ok(ProtoActivity::ConfigureNmxCluster( @@ -77,3 +150,94 @@ pub async fn on_demand_rack_maintenance( println!("On-demand rack maintenance scheduled successfully."); Ok(()) } + +#[cfg(test)] +mod tests { + use carbide_uuid::rack::RackId; + + use super::*; + + fn options() -> MaintenanceOptions { + MaintenanceOptions { + rack: RackId::new("rack-test"), + machine_ids: None, + switch_ids: None, + power_shelf_ids: None, + activities: None, + firmware_version: None, + sot_json_file: None, + access_token: None, + force_update: false, + components: None, + firmware_object_id: None, + } + } + + #[test] + fn firmware_upgrade_requires_sot_json() { + let args = MaintenanceOptions { + activities: Some(vec!["firmware-upgrade".to_string()]), + access_token: Some("token".to_string()), + ..options() + }; + + let err = resolve_firmware_upgrade_source(&args).unwrap_err(); + + assert!(err.to_string().contains("requires SOT JSON")); + } + + #[test] + fn firmware_upgrade_requires_access_token() { + let args = MaintenanceOptions { + activities: Some(vec!["firmware-upgrade".to_string()]), + firmware_version: Some(r#"{"Id":"fw"}"#.to_string()), + ..options() + }; + + let err = resolve_firmware_upgrade_source(&args).unwrap_err(); + + assert!(err.to_string().contains("requires --access-token")); + } + + #[test] + fn firmware_upgrade_rejects_invalid_inline_json() { + let args = MaintenanceOptions { + activities: Some(vec!["firmware-upgrade".to_string()]), + firmware_version: Some("not-json".to_string()), + access_token: Some("token".to_string()), + ..options() + }; + + assert!(resolve_firmware_upgrade_source(&args).is_err()); + } + + #[test] + fn firmware_source_requires_firmware_upgrade_activity() { + let args = MaintenanceOptions { + firmware_version: Some(r#"{"Id":"fw"}"#.to_string()), + ..options() + }; + + let err = resolve_firmware_upgrade_source(&args).unwrap_err(); + + assert!( + err.to_string() + .contains("--firmware-version requires --activities firmware-upgrade") + ); + } + + #[test] + fn access_token_requires_firmware_upgrade_activity() { + let args = MaintenanceOptions { + access_token: Some("token".to_string()), + ..options() + }; + + let err = resolve_firmware_upgrade_source(&args).unwrap_err(); + + assert!( + err.to_string() + .contains("--access-token requires --activities firmware-upgrade") + ); + } +} diff --git a/crates/admin-cli/src/rack_firmware/apply/args.rs b/crates/admin-cli/src/rack_firmware/apply/args.rs deleted file mode 100644 index beedb8d4df..0000000000 --- a/crates/admin-cli/src/rack_firmware/apply/args.rs +++ /dev/null @@ -1,41 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -use carbide_uuid::rack::RackId; -use clap::Parser; - -#[derive(Parser, Debug)] -pub struct Args { - #[clap(help = "Rack ID to apply firmware to")] - pub rack_id: RackId, - - #[clap(help = "Firmware configuration ID to apply")] - pub firmware_id: String, - - #[clap(help = "Firmware type: dev or prod", value_parser = ["dev", "prod"])] - pub firmware_type: String, -} - -impl From for rpc::forge::RackFirmwareApplyRequest { - fn from(args: Args) -> Self { - Self { - rack_id: Some(args.rack_id), - firmware_id: args.firmware_id, - firmware_type: args.firmware_type, - } - } -} diff --git a/crates/admin-cli/src/rack_firmware/apply/cmd.rs b/crates/admin-cli/src/rack_firmware/apply/cmd.rs deleted file mode 100644 index abf90745c9..0000000000 --- a/crates/admin-cli/src/rack_firmware/apply/cmd.rs +++ /dev/null @@ -1,133 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -use ::rpc::admin_cli::OutputFormat; -use prettytable::{Cell, Row, Table}; - -use super::args::Args; -use crate::errors::CarbideCliError; -use crate::rpc::ApiClient; - -pub async fn apply( - opts: Args, - format: OutputFormat, - api_client: &ApiClient, -) -> Result<(), CarbideCliError> { - println!( - "Applying firmware ID '{}' ({}) to rack '{}'...", - opts.firmware_id, opts.firmware_type, opts.rack_id - ); - - let response = api_client - .0 - .apply_rack_firmware(opts) - .await - .map_err(CarbideCliError::from)?; - - if format == OutputFormat::Json { - let result = serde_json::json!({ - "total_updates": response.total_updates, - "successful_updates": response.successful_updates, - "failed_updates": response.failed_updates, - "device_results": response.device_results.iter().map(|r| serde_json::json!({ - "device_id": r.device_id, - "device_type": r.device_type, - "success": r.success, - "message": r.message, - "job_id": r.job_id, - "node_jobs": r.node_jobs.iter().map(|j| serde_json::json!({ - "node_id": j.node_id, - "job_id": j.job_id, - })).collect::>(), - })).collect::>(), - }); - println!("{}", serde_json::to_string_pretty(&result)?); - } else { - let mut table = Table::new(); - table.set_titles(Row::new(vec![ - Cell::new("Device Type"), - Cell::new("Status"), - Cell::new("Job ID"), - ])); - - for device_result in &response.device_results { - let status_text = if device_result.success { - "INITIATED" - } else { - "FAILED" - }; - - let job_id_display = if device_result.job_id.is_empty() { - "-".to_string() - } else { - device_result.job_id.clone() - }; - - table.add_row(Row::new(vec![ - Cell::new(&device_result.device_type), - Cell::new(status_text), - Cell::new(&job_id_display), - ])); - } - - println!("\n{}", "=".repeat(80)); - println!("Firmware Update Summary"); - println!("{}", "=".repeat(80)); - table.printstd(); - println!("\nTotal updates: {}", response.total_updates); - println!("Successfully initiated: {}", response.successful_updates); - println!("Failed to initiate: {}", response.failed_updates); - - let has_node_jobs = response - .device_results - .iter() - .any(|r| !r.node_jobs.is_empty()); - if has_node_jobs { - println!("\n{}", "-".repeat(80)); - println!("Per-Node Job IDs (use with GetFirmwareJobStatus to track progress)"); - println!("{}", "-".repeat(80)); - - let mut node_table = Table::new(); - node_table.set_titles(Row::new(vec![ - Cell::new("Device Type"), - Cell::new("Node ID"), - Cell::new("Job ID"), - ])); - - for device_result in &response.device_results { - for node_job in &device_result.node_jobs { - node_table.add_row(Row::new(vec![ - Cell::new(&device_result.device_type), - Cell::new(&node_job.node_id), - Cell::new(&node_job.job_id), - ])); - } - } - - node_table.printstd(); - } - } - - if response.failed_updates > 0 { - return Err(CarbideCliError::GenericError(format!( - "{} firmware updates failed", - response.failed_updates - ))); - } - - Ok(()) -} diff --git a/crates/admin-cli/src/rack_firmware/apply/mod.rs b/crates/admin-cli/src/rack_firmware/apply/mod.rs deleted file mode 100644 index 9e950ecda7..0000000000 --- a/crates/admin-cli/src/rack_firmware/apply/mod.rs +++ /dev/null @@ -1,32 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -pub mod args; -pub mod cmd; - -pub use args::Args; - -use crate::cfg::run::Run; -use crate::cfg::runtime::RuntimeContext; -use crate::errors::CarbideCliResult; - -impl Run for Args { - async fn run(self, ctx: &mut RuntimeContext) -> CarbideCliResult<()> { - cmd::apply(self, ctx.config.format, &ctx.api_client).await?; - Ok(()) - } -} diff --git a/crates/admin-cli/src/rack_firmware/create/args.rs b/crates/admin-cli/src/rack_firmware/create/args.rs deleted file mode 100644 index 506c37c842..0000000000 --- a/crates/admin-cli/src/rack_firmware/create/args.rs +++ /dev/null @@ -1,58 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -use std::path::PathBuf; - -use clap::Parser; - -use crate::errors::CarbideCliError; - -#[derive(Parser, Debug)] -pub struct Args { - #[clap(help = "Rack hardware type for this firmware configuration.")] - pub rack_hardware_type: String, - #[clap(help = "Path to JSON configuration file.")] - pub json_file: PathBuf, - #[clap(help = "Artifactory token for downloading firmware files.")] - pub artifactory_token: String, -} - -impl TryFrom for rpc::forge::RackFirmwareCreateRequest { - type Error = CarbideCliError; - - fn try_from(args: Args) -> Result { - let config_json = std::fs::read_to_string(&args.json_file).map_err(|e| { - CarbideCliError::GenericError(format!( - "Failed to read file {}: {}", - args.json_file.display(), - e - )) - })?; - - // Check that the JSON is valid. - serde_json::from_str::(&config_json) - .map_err(|e| CarbideCliError::GenericError(format!("Invalid JSON in file: {}", e)))?; - - Ok(Self { - rack_hardware_type: Some(rpc::common::RackHardwareType { - value: args.rack_hardware_type, - }), - config_json, - artifactory_token: args.artifactory_token, - }) - } -} diff --git a/crates/admin-cli/src/rack_firmware/create/cmd.rs b/crates/admin-cli/src/rack_firmware/create/cmd.rs deleted file mode 100644 index 0de409644d..0000000000 --- a/crates/admin-cli/src/rack_firmware/create/cmd.rs +++ /dev/null @@ -1,51 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -use ::rpc::admin_cli::OutputFormat; -use prettytable::{Table, row}; - -use super::args::Args; -use crate::errors::CarbideCliError; -use crate::rpc::ApiClient; - -pub async fn create( - opts: Args, - format: OutputFormat, - api_client: &ApiClient, -) -> Result<(), CarbideCliError> { - let request: rpc::forge::RackFirmwareCreateRequest = opts.try_into()?; - let result = api_client.0.create_rack_firmware(request).await?; - - if format == OutputFormat::Json { - println!("{}", serde_json::to_string_pretty(&result)?); - } else { - let mut table = Table::new(); - table.add_row(row!["ID", result.id]); - let hw_type = result - .rack_hardware_type - .as_ref() - .map(|t| t.value.as_str()) - .unwrap_or("N/A"); - table.add_row(row!["Hardware Type", hw_type]); - table.add_row(row!["Default", result.is_default]); - table.add_row(row!["Available", result.available]); - table.add_row(row!["Created", result.created]); - table.printstd(); - } - - Ok(()) -} diff --git a/crates/admin-cli/src/rack_firmware/create/mod.rs b/crates/admin-cli/src/rack_firmware/create/mod.rs deleted file mode 100644 index 4c9c725ec4..0000000000 --- a/crates/admin-cli/src/rack_firmware/create/mod.rs +++ /dev/null @@ -1,32 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -pub mod args; -pub mod cmd; - -pub use args::Args; - -use crate::cfg::run::Run; -use crate::cfg::runtime::RuntimeContext; -use crate::errors::CarbideCliResult; - -impl Run for Args { - async fn run(self, ctx: &mut RuntimeContext) -> CarbideCliResult<()> { - cmd::create(self, ctx.config.format, &ctx.api_client).await?; - Ok(()) - } -} diff --git a/crates/admin-cli/src/rack_firmware/delete/args.rs b/crates/admin-cli/src/rack_firmware/delete/args.rs deleted file mode 100644 index 888afd088c..0000000000 --- a/crates/admin-cli/src/rack_firmware/delete/args.rs +++ /dev/null @@ -1,30 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -use clap::Parser; - -#[derive(Parser, Debug)] -pub struct Args { - #[clap(help = "ID of the configuration to delete")] - pub id: String, -} - -impl From for rpc::forge::RackFirmwareDeleteRequest { - fn from(args: Args) -> Self { - Self { id: args.id } - } -} diff --git a/crates/admin-cli/src/rack_firmware/delete/cmd.rs b/crates/admin-cli/src/rack_firmware/delete/cmd.rs deleted file mode 100644 index ab5202a401..0000000000 --- a/crates/admin-cli/src/rack_firmware/delete/cmd.rs +++ /dev/null @@ -1,39 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -use super::args::Args; -use crate::errors::CarbideCliError; -use crate::rpc::ApiClient; - -pub async fn delete(opts: Args, api_client: &ApiClient) -> Result<(), CarbideCliError> { - let id = opts.id.clone(); - - match api_client.0.delete_rack_firmware(opts).await { - Ok(_) => { - println!("Deleted Rack firmware configuration: {}", id); - } - Err(status) if status.code() == tonic::Code::NotFound => { - return Err(CarbideCliError::GenericError(format!( - "Rack firmware configuration not found: {}", - id - ))); - } - Err(err) => return Err(CarbideCliError::from(err)), - } - - Ok(()) -} diff --git a/crates/admin-cli/src/rack_firmware/delete/mod.rs b/crates/admin-cli/src/rack_firmware/delete/mod.rs deleted file mode 100644 index 5e31e1c4a1..0000000000 --- a/crates/admin-cli/src/rack_firmware/delete/mod.rs +++ /dev/null @@ -1,32 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -pub mod args; -pub mod cmd; - -pub use args::Args; - -use crate::cfg::run::Run; -use crate::cfg::runtime::RuntimeContext; -use crate::errors::CarbideCliResult; - -impl Run for Args { - async fn run(self, ctx: &mut RuntimeContext) -> CarbideCliResult<()> { - cmd::delete(self, &ctx.api_client).await?; - Ok(()) - } -} diff --git a/crates/admin-cli/src/rack_firmware/get/args.rs b/crates/admin-cli/src/rack_firmware/get/args.rs deleted file mode 100644 index d75ddee631..0000000000 --- a/crates/admin-cli/src/rack_firmware/get/args.rs +++ /dev/null @@ -1,30 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -use clap::Parser; - -#[derive(Parser, Debug)] -pub struct Args { - #[clap(help = "ID of the configuration to retrieve")] - pub id: String, -} - -impl From for rpc::forge::RackFirmwareGetRequest { - fn from(args: Args) -> Self { - Self { id: args.id } - } -} diff --git a/crates/admin-cli/src/rack_firmware/get/cmd.rs b/crates/admin-cli/src/rack_firmware/get/cmd.rs deleted file mode 100644 index 541b62b8c7..0000000000 --- a/crates/admin-cli/src/rack_firmware/get/cmd.rs +++ /dev/null @@ -1,221 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -use std::collections::BTreeMap; - -use ::rpc::admin_cli::OutputFormat; -use prettytable::{Cell, Row, Table, row}; -use serde::Deserialize; - -use super::args::Args; -use crate::errors::CarbideCliError; -use crate::rpc::ApiClient; - -#[derive(Debug, Deserialize, Default)] -struct ParsedFirmwareLookupTable { - #[serde(default)] - devices: BTreeMap>, - #[serde(default)] - switch_system_images: BTreeMap>, -} - -#[derive(Debug, Deserialize, Default)] -struct FirmwareLookupEntry { - #[serde(default)] - component: String, - #[serde(default)] - bundle: String, - #[serde(default)] - firmware_type: String, - #[serde(default)] - target: String, - #[serde(default)] - subcomponents: Vec, -} - -#[derive(Debug, Deserialize, Default)] -struct FirmwareSubcomponent { - #[serde(default)] - component: String, - #[serde(default)] - version: String, - #[serde(default)] - skuid: Option, -} - -#[derive(Debug, Deserialize, Default)] -struct SwitchSystemImageLookupEntry { - #[serde(default)] - component: String, - #[serde(default)] - package_name: String, - #[serde(default)] - version: String, - #[serde(default)] - image_filename: String, - #[serde(default)] - location_type: String, - #[serde(default)] - firmware_type: String, -} - -pub async fn get( - opts: Args, - format: OutputFormat, - api_client: &ApiClient, -) -> Result<(), CarbideCliError> { - let id = opts.id.clone(); - - let result = match api_client.0.get_rack_firmware(opts).await { - Ok(response) => response, - Err(status) if status.code() == tonic::Code::NotFound => { - return Err(CarbideCliError::GenericError(format!( - "Rack firmware configuration not found: {}", - id - ))); - } - Err(err) => return Err(CarbideCliError::from(err)), - }; - - if format == OutputFormat::Json { - println!("{}", serde_json::to_string_pretty(&result)?); - } else { - let mut table = Table::new(); - table.add_row(row!["ID", result.id]); - let hw_type = result - .rack_hardware_type - .as_ref() - .map(|t| t.value.as_str()) - .unwrap_or("N/A"); - table.add_row(row!["Hardware Type", hw_type]); - table.add_row(row!["Default", result.is_default]); - table.add_row(row!["Available", result.available]); - table.add_row(row!["Created", result.created]); - table.add_row(row!["Updated", result.updated]); - table.printstd(); - - // Display parsed firmware components. - if should_show_not_downloaded(&result.parsed_components) { - println!("\nFirmware Components: (not yet downloaded)"); - } else { - match serde_json::from_str::(&result.parsed_components) { - Ok(parsed) if print_parsed_components(&parsed) => {} - _ => println!("\nFirmware Components: (not yet downloaded)"), - } - } - } - - Ok(()) -} - -fn should_show_not_downloaded(parsed_components: &str) -> bool { - parsed_components.is_empty() || parsed_components == "{}" -} - -fn print_parsed_components(parsed: &ParsedFirmwareLookupTable) -> bool { - let mut printed = false; - - for (device_type, components) in &parsed.devices { - printed = true; - println!("\n[{}]", device_type); - - let mut component_table = Table::new(); - component_table.set_titles(Row::new(vec![ - Cell::new("Component"), - Cell::new("Type"), - Cell::new("Bundle"), - Cell::new("Target"), - ])); - - let mut component_subcomps: Vec<(&str, &[FirmwareSubcomponent])> = Vec::new(); - - for entry in components.values() { - component_table.add_row(Row::new(vec![ - Cell::new(display_value(&entry.component)), - Cell::new(&display_value(&entry.firmware_type).to_uppercase()), - Cell::new(display_value(&entry.bundle)), - Cell::new(display_value(&entry.target)), - ])); - - if !entry.subcomponents.is_empty() { - component_subcomps.push((display_value(&entry.component), &entry.subcomponents)); - } - } - - component_table.printstd(); - - for (comp_name, subcomps) in component_subcomps { - println!("\n {} Subcomponents:", comp_name); - - let mut sub_table = Table::new(); - sub_table.set_titles(Row::new(vec![ - Cell::new("Component"), - Cell::new("Version"), - Cell::new("SKUID"), - ])); - - for subcomp in subcomps { - let sub_skuid = subcomp.skuid.as_deref().unwrap_or("-"); - - sub_table.add_row(Row::new(vec![ - Cell::new(display_value(&subcomp.component)), - Cell::new(display_value(&subcomp.version)), - Cell::new(display_value(sub_skuid)), - ])); - } - - let table_str = sub_table.to_string(); - for line in table_str.lines() { - println!(" {}", line); - } - } - } - - for (device_type, images) in &parsed.switch_system_images { - printed = true; - println!("\n[Switch System Images: {}]", device_type); - - let mut image_table = Table::new(); - image_table.set_titles(Row::new(vec![ - Cell::new("Component"), - Cell::new("Type"), - Cell::new("Package"), - Cell::new("Image Filename"), - Cell::new("Version"), - Cell::new("Location Type"), - ])); - - for entry in images.values() { - image_table.add_row(Row::new(vec![ - Cell::new(display_value(&entry.component)), - Cell::new(&display_value(&entry.firmware_type).to_uppercase()), - Cell::new(display_value(&entry.package_name)), - Cell::new(display_value(&entry.image_filename)), - Cell::new(display_value(&entry.version)), - Cell::new(display_value(&entry.location_type)), - ])); - } - - image_table.printstd(); - } - - printed -} - -fn display_value(value: &str) -> &str { - if value.is_empty() { "-" } else { value } -} diff --git a/crates/admin-cli/src/rack_firmware/get/mod.rs b/crates/admin-cli/src/rack_firmware/get/mod.rs deleted file mode 100644 index 6af7c685b1..0000000000 --- a/crates/admin-cli/src/rack_firmware/get/mod.rs +++ /dev/null @@ -1,32 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -pub mod args; -pub mod cmd; - -pub use args::Args; - -use crate::cfg::run::Run; -use crate::cfg::runtime::RuntimeContext; -use crate::errors::CarbideCliResult; - -impl Run for Args { - async fn run(self, ctx: &mut RuntimeContext) -> CarbideCliResult<()> { - cmd::get(self, ctx.config.format, &ctx.api_client).await?; - Ok(()) - } -} diff --git a/crates/admin-cli/src/rack_firmware/history/args.rs b/crates/admin-cli/src/rack_firmware/history/args.rs deleted file mode 100644 index aab1cd8e58..0000000000 --- a/crates/admin-cli/src/rack_firmware/history/args.rs +++ /dev/null @@ -1,36 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -use clap::Parser; - -#[derive(Parser, Debug)] -pub struct Args { - #[clap(long, help = "Filter by firmware ID")] - pub firmware_id: Option, - - #[clap(long, help = "Filter by rack ID(s)")] - pub rack_id: Vec, -} - -impl From for rpc::forge::RackFirmwareHistoryRequest { - fn from(args: Args) -> Self { - Self { - firmware_id: args.firmware_id.unwrap_or_default(), - rack_ids: args.rack_id, - } - } -} diff --git a/crates/admin-cli/src/rack_firmware/history/cmd.rs b/crates/admin-cli/src/rack_firmware/history/cmd.rs deleted file mode 100644 index f76415ef35..0000000000 --- a/crates/admin-cli/src/rack_firmware/history/cmd.rs +++ /dev/null @@ -1,78 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -use ::rpc::admin_cli::OutputFormat; -use prettytable::{Cell, Row, Table}; - -use super::args::Args; -use crate::errors::CarbideCliError; -use crate::rpc::ApiClient; - -pub async fn history( - opts: Args, - format: OutputFormat, - api_client: &ApiClient, -) -> Result<(), CarbideCliError> { - let result = api_client.0.get_rack_firmware_history(opts).await?; - - if format == OutputFormat::Json { - // Flatten to map> for serialization - let json_histories: std::collections::HashMap< - &str, - Vec<&rpc::forge::RackFirmwareHistoryRecord>, - > = result - .histories - .iter() - .map(|(rack_id, records)| (rack_id.as_str(), records.records.iter().collect())) - .collect(); - println!("{}", serde_json::to_string_pretty(&json_histories)?); - } else if result.histories.is_empty() { - println!("No rack firmware apply history found."); - } else { - let mut table = Table::new(); - table.set_titles(Row::new(vec![ - Cell::new("Rack ID"), - Cell::new("Firmware ID"), - Cell::new("Hardware Type"), - Cell::new("Firmware Type"), - Cell::new("Applied At"), - Cell::new("Available"), - ])); - - for (rack_id, records) in &result.histories { - for record in &records.records { - let hw_type = record - .rack_hardware_type - .as_ref() - .map(|t| t.value.as_str()) - .unwrap_or("N/A"); - table.add_row(Row::new(vec![ - Cell::new(rack_id), - Cell::new(&record.firmware_id), - Cell::new(hw_type), - Cell::new(&record.firmware_type), - Cell::new(&record.applied_at), - Cell::new(&record.firmware_available.to_string()), - ])); - } - } - - table.printstd(); - } - - Ok(()) -} diff --git a/crates/admin-cli/src/rack_firmware/history/mod.rs b/crates/admin-cli/src/rack_firmware/history/mod.rs deleted file mode 100644 index 860a518d23..0000000000 --- a/crates/admin-cli/src/rack_firmware/history/mod.rs +++ /dev/null @@ -1,32 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -pub mod args; -pub mod cmd; - -pub use args::Args; - -use crate::cfg::run::Run; -use crate::cfg::runtime::RuntimeContext; -use crate::errors::CarbideCliResult; - -impl Run for Args { - async fn run(self, ctx: &mut RuntimeContext) -> CarbideCliResult<()> { - cmd::history(self, ctx.config.format, &ctx.api_client).await?; - Ok(()) - } -} diff --git a/crates/admin-cli/src/rack_firmware/list/args.rs b/crates/admin-cli/src/rack_firmware/list/args.rs deleted file mode 100644 index c47a786932..0000000000 --- a/crates/admin-cli/src/rack_firmware/list/args.rs +++ /dev/null @@ -1,37 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -use clap::Parser; - -#[derive(Parser, Debug)] -pub struct Args { - #[clap(long, help = "Show only available configurations.")] - pub only_available: bool, - #[clap(help = "Filter by rack hardware type.")] - pub rack_hardware_type: Option, -} - -impl From for rpc::forge::RackFirmwareSearchFilter { - fn from(args: Args) -> Self { - Self { - only_available: args.only_available, - rack_hardware_type: args - .rack_hardware_type - .map(|v| rpc::common::RackHardwareType { value: v }), - } - } -} diff --git a/crates/admin-cli/src/rack_firmware/list/cmd.rs b/crates/admin-cli/src/rack_firmware/list/cmd.rs deleted file mode 100644 index c5550ea0d1..0000000000 --- a/crates/admin-cli/src/rack_firmware/list/cmd.rs +++ /dev/null @@ -1,68 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -use ::rpc::admin_cli::OutputFormat; -use prettytable::{Cell, Row, Table}; - -use super::args::Args; -use crate::errors::CarbideCliError; -use crate::rpc::ApiClient; - -pub async fn list( - opts: Args, - format: OutputFormat, - api_client: &ApiClient, -) -> Result<(), CarbideCliError> { - let result = api_client.0.list_rack_firmware(opts).await?; - - if format == OutputFormat::Json { - println!("{}", serde_json::to_string_pretty(&result.configs)?); - } else if result.configs.is_empty() { - println!("No Rack firmware configurations found."); - } else { - let mut table = Table::new(); - table.set_titles(Row::new(vec![ - Cell::new("ID"), - Cell::new("Hardware Type"), - Cell::new("Default"), - Cell::new("Available"), - Cell::new("Created"), - Cell::new("Updated"), - ])); - - for config in result.configs { - let hw_type = config - .rack_hardware_type - .as_ref() - .map(|t| t.value.as_str()) - .unwrap_or("N/A"); - let default_marker = if config.is_default { "*" } else { "" }; - table.add_row(Row::new(vec![ - Cell::new(&config.id), - Cell::new(hw_type), - Cell::new(default_marker), - Cell::new(&config.available.to_string()), - Cell::new(&config.created), - Cell::new(&config.updated), - ])); - } - - table.printstd(); - } - - Ok(()) -} diff --git a/crates/admin-cli/src/rack_firmware/list/mod.rs b/crates/admin-cli/src/rack_firmware/list/mod.rs deleted file mode 100644 index 1205da171f..0000000000 --- a/crates/admin-cli/src/rack_firmware/list/mod.rs +++ /dev/null @@ -1,32 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -pub mod args; -pub mod cmd; - -pub use args::Args; - -use crate::cfg::run::Run; -use crate::cfg::runtime::RuntimeContext; -use crate::errors::CarbideCliResult; - -impl Run for Args { - async fn run(self, ctx: &mut RuntimeContext) -> CarbideCliResult<()> { - cmd::list(self, ctx.config.format, &ctx.api_client).await?; - Ok(()) - } -} diff --git a/crates/admin-cli/src/rack_firmware/mod.rs b/crates/admin-cli/src/rack_firmware/mod.rs deleted file mode 100644 index 2b9a5f7df6..0000000000 --- a/crates/admin-cli/src/rack_firmware/mod.rs +++ /dev/null @@ -1,59 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -mod apply; -mod create; -mod delete; -mod get; -mod history; -mod list; -mod set_default; -mod status; - -#[cfg(test)] -mod tests; - -use clap::Parser; - -use crate::cfg::dispatch::Dispatch; - -#[derive(Parser, Debug, Dispatch)] -pub enum Cmd { - #[clap(about = "Create a new Rack firmware configuration from JSON file")] - Create(create::Args), - - #[clap(about = "Get a Rack firmware configuration by ID")] - Get(get::Args), - - #[clap(about = "List all Rack firmware configurations")] - List(list::Args), - - #[clap(about = "Delete a Rack firmware configuration")] - Delete(delete::Args), - - #[clap(about = "Apply firmware to all devices in a rack")] - Apply(apply::Args), - - #[clap(about = "Check the status of an async firmware update job")] - Status(status::Args), - - #[clap(about = "Show history of rack firmware apply operations")] - History(history::Args), - - #[clap(about = "Set a firmware configuration as the default for its hardware type")] - SetDefault(set_default::Args), -} diff --git a/crates/admin-cli/src/rack_firmware/set_default/args.rs b/crates/admin-cli/src/rack_firmware/set_default/args.rs deleted file mode 100644 index d49d7ddbf4..0000000000 --- a/crates/admin-cli/src/rack_firmware/set_default/args.rs +++ /dev/null @@ -1,32 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -use clap::Parser; - -#[derive(Parser, Debug)] -pub struct Args { - #[clap(help = "Firmware configuration ID to set as default.")] - pub firmware_id: String, -} - -impl From for rpc::forge::RackFirmwareSetDefaultRequest { - fn from(args: Args) -> Self { - Self { - firmware_id: args.firmware_id, - } - } -} diff --git a/crates/admin-cli/src/rack_firmware/set_default/cmd.rs b/crates/admin-cli/src/rack_firmware/set_default/cmd.rs deleted file mode 100644 index 3e74707809..0000000000 --- a/crates/admin-cli/src/rack_firmware/set_default/cmd.rs +++ /dev/null @@ -1,27 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -use super::args::Args; -use crate::errors::CarbideCliError; -use crate::rpc::ApiClient; - -pub async fn set_default(opts: Args, api_client: &ApiClient) -> Result<(), CarbideCliError> { - let firmware_id = opts.firmware_id.clone(); - api_client.0.rack_firmware_set_default(opts).await?; - println!("Set firmware '{}' as default.", firmware_id); - Ok(()) -} diff --git a/crates/admin-cli/src/rack_firmware/set_default/mod.rs b/crates/admin-cli/src/rack_firmware/set_default/mod.rs deleted file mode 100644 index 8cbe3ca827..0000000000 --- a/crates/admin-cli/src/rack_firmware/set_default/mod.rs +++ /dev/null @@ -1,32 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -pub mod args; -pub mod cmd; - -pub use args::Args; - -use crate::cfg::run::Run; -use crate::cfg::runtime::RuntimeContext; -use crate::errors::CarbideCliResult; - -impl Run for Args { - async fn run(self, ctx: &mut RuntimeContext) -> CarbideCliResult<()> { - cmd::set_default(self, &ctx.api_client).await?; - Ok(()) - } -} diff --git a/crates/admin-cli/src/rack_firmware/status/args.rs b/crates/admin-cli/src/rack_firmware/status/args.rs deleted file mode 100644 index a25de14fdb..0000000000 --- a/crates/admin-cli/src/rack_firmware/status/args.rs +++ /dev/null @@ -1,32 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -use clap::Parser; - -#[derive(Parser, Debug)] -pub struct Args { - #[clap(help = "Job ID to check status for (from apply output)")] - pub job_id: String, -} - -impl From for rpc::forge::RackFirmwareJobStatusRequest { - fn from(args: Args) -> Self { - Self { - job_id: args.job_id, - } - } -} diff --git a/crates/admin-cli/src/rack_firmware/status/cmd.rs b/crates/admin-cli/src/rack_firmware/status/cmd.rs deleted file mode 100644 index 954f4bf367..0000000000 --- a/crates/admin-cli/src/rack_firmware/status/cmd.rs +++ /dev/null @@ -1,64 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -use ::rpc::admin_cli::OutputFormat; - -use super::args::Args; -use crate::errors::CarbideCliError; -use crate::rpc::ApiClient; - -pub async fn get_job_status( - opts: Args, - format: OutputFormat, - api_client: &ApiClient, -) -> Result<(), CarbideCliError> { - let response = api_client - .0 - .get_rack_firmware_job_status(opts) - .await - .map_err(CarbideCliError::from)?; - - if format == OutputFormat::Json { - let result = serde_json::json!({ - "job_id": response.job_id, - "state": response.state, - "state_description": response.state_description, - "rack_id": response.rack_id, - "node_id": response.node_id, - "error_message": response.error_message, - "result_json": response.result_json, - }); - println!("{}", serde_json::to_string_pretty(&result)?); - } else { - println!("Firmware Job Status"); - println!(" Job ID: {}", response.job_id); - println!(" State: {}", response.state); - println!(" Description: {}", response.state_description); - println!(" Rack: {}", response.rack_id); - println!(" Node: {}", response.node_id); - - if !response.error_message.is_empty() { - println!(" Error: {}", response.error_message); - } - - if !response.result_json.is_empty() { - println!(" Result: {}", response.result_json); - } - } - - Ok(()) -} diff --git a/crates/admin-cli/src/rack_firmware/status/mod.rs b/crates/admin-cli/src/rack_firmware/status/mod.rs deleted file mode 100644 index 2576bb71c3..0000000000 --- a/crates/admin-cli/src/rack_firmware/status/mod.rs +++ /dev/null @@ -1,32 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -pub mod args; -pub mod cmd; - -pub use args::Args; - -use crate::cfg::run::Run; -use crate::cfg::runtime::RuntimeContext; -use crate::errors::CarbideCliResult; - -impl Run for Args { - async fn run(self, ctx: &mut RuntimeContext) -> CarbideCliResult<()> { - cmd::get_job_status(self, ctx.config.format, &ctx.api_client).await?; - Ok(()) - } -} diff --git a/crates/admin-cli/src/rack_firmware/tests.rs b/crates/admin-cli/src/rack_firmware/tests.rs deleted file mode 100644 index b7bc2cc1de..0000000000 --- a/crates/admin-cli/src/rack_firmware/tests.rs +++ /dev/null @@ -1,151 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// The intent of the tests.rs file is to test the integrity of the -// command, including things like basic structure parsing, enum -// translations, and any external input validators that are -// configured. Specific "categories" are: -// -// Command Structure - Baseline debug_assert() of the entire command. -// Argument Parsing - Ensure required/optional arg combinations parse correctly. - -use clap::{CommandFactory, Parser}; - -use super::*; - -// verify_cmd_structure runs a baseline clap debug_assert() -// to do basic command configuration checking and validation, -// ensuring things like unique argument definitions, group -// configurations, argument references, etc. Things that would -// otherwise be missed until runtime. -#[test] -fn verify_cmd_structure() { - Cmd::command().debug_assert(); -} - -///////////////////////////////////////////////////////////////////////////// -// Argument Parsing -// -// This section contains tests specific to argument parsing, -// including testing required arguments, as well as optional -// flag-specific checking. - -// parse_list_defaults ensures list parses with default values. -#[test] -fn parse_list_defaults() { - let cmd = Cmd::try_parse_from(["rack-firmware", "list"]).expect("should parse list"); - - match cmd { - Cmd::List(args) => { - assert!(!args.only_available); - } - _ => panic!("expected List variant"), - } -} - -// parse_list_only_available ensures list parses with --only-available flag. -#[test] -fn parse_list_only_available() { - let cmd = Cmd::try_parse_from(["rack-firmware", "list", "--only-available"]) - .expect("should parse list with only-available"); - - match cmd { - Cmd::List(args) => { - assert!(args.only_available); - } - _ => panic!("expected List variant"), - } -} - -// parse_list_with_hardware_type_filter ensures list parses with a -// hardware type filter. -#[test] -fn parse_list_with_hardware_type_filter() { - let cmd = Cmd::try_parse_from(["rack-firmware", "list", "any"]) - .expect("should parse list with hardware type filter"); - - match cmd { - Cmd::List(args) => { - assert!(!args.only_available); - assert_eq!(args.rack_hardware_type, Some("any".to_string())); - } - _ => panic!("expected List variant"), - } -} - -// parse_create_with_hardware_type ensures create parses with -// hardware type as first argument. -#[test] -fn parse_create_with_hardware_type() { - let cmd = Cmd::try_parse_from([ - "rack-firmware", - "create", - "any", - "/tmp/test.json", - "test-token", - ]) - .expect("should parse create with hardware type"); - - match cmd { - Cmd::Create(args) => { - assert_eq!(args.rack_hardware_type, "any"); - } - _ => panic!("expected Create variant"), - } -} - -// parse_create_missing_args_fails ensures create fails without required args. -#[test] -fn parse_create_missing_args_fails() { - let result = Cmd::try_parse_from(["rack-firmware", "create"]); - assert!(result.is_err(), "should fail without json_file and token"); -} - -// parse_get_missing_id_fails ensures get fails without ID. -#[test] -fn parse_get_missing_id_fails() { - let result = Cmd::try_parse_from(["rack-firmware", "get"]); - assert!(result.is_err(), "should fail without id"); -} - -// parse_delete_missing_id_fails ensures delete fails without ID. -#[test] -fn parse_delete_missing_id_fails() { - let result = Cmd::try_parse_from(["rack-firmware", "delete"]); - assert!(result.is_err(), "should fail without id"); -} - -// parse_set_default ensures set-default parses with firmware ID. -#[test] -fn parse_set_default() { - let cmd = Cmd::try_parse_from(["rack-firmware", "set-default", "fw-001"]) - .expect("should parse set-default"); - - match cmd { - Cmd::SetDefault(args) => { - assert_eq!(args.firmware_id, "fw-001"); - } - _ => panic!("expected SetDefault variant"), - } -} - -// parse_set_default_missing_id_fails ensures set-default fails without ID. -#[test] -fn parse_set_default_missing_id_fails() { - let result = Cmd::try_parse_from(["rack-firmware", "set-default"]); - assert!(result.is_err(), "should fail without firmware_id"); -} diff --git a/crates/api-db/migrations/20260521190000_drop_rack_firmware_tables.sql b/crates/api-db/migrations/20260521190000_drop_rack_firmware_tables.sql new file mode 100644 index 0000000000..8aeefa094d --- /dev/null +++ b/crates/api-db/migrations/20260521190000_drop_rack_firmware_tables.sql @@ -0,0 +1,4 @@ +-- RMS owns firmware object storage and apply history. Drop the legacy BMM-local +-- tables after the migration to RMS-backed firmware objects. +DROP TABLE IF EXISTS rack_firmware_apply_history; +DROP TABLE IF EXISTS rack_firmware; diff --git a/crates/api-db/src/lib.rs b/crates/api-db/src/lib.rs index 6dfdb40b09..26a2d5fa68 100644 --- a/crates/api-db/src/lib.rs +++ b/crates/api-db/src/lib.rs @@ -74,7 +74,6 @@ pub mod power_shelf; pub mod predicted_machine_interface; pub mod queries; pub mod rack; -pub mod rack_firmware; pub mod redfish_actions; pub mod resource_pool; pub mod route_servers; diff --git a/crates/api-db/src/rack_firmware.rs b/crates/api-db/src/rack_firmware.rs deleted file mode 100644 index 2179a8cbbf..0000000000 --- a/crates/api-db/src/rack_firmware.rs +++ /dev/null @@ -1,586 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -use chrono::{DateTime, Utc}; -use model::rack_firmware::{RackFirmware, RackFirmwareApplyHistoryRecord}; -use model::rack_type::RackHardwareType; -use sqlx::Error::RowNotFound; -use sqlx::postgres::PgRow; -use sqlx::types::Json; -use sqlx::{FromRow, PgConnection, Row}; - -use crate::db_read::DbReader; -use crate::{DatabaseError, DatabaseResult}; - -#[derive(Debug, Clone, FromRow)] -struct DbRackFirmwareApplyHistory { - #[allow(dead_code)] - id: i64, - firmware_id: String, - rack_id: String, - firmware_type: String, - rack_hardware_type: RackHardwareType, - applied_at: DateTime, -} - -#[derive(Debug, Clone)] -struct DbRackFirmwareApplyHistoryWithAvailability { - history: DbRackFirmwareApplyHistory, - firmware_available: bool, -} - -impl<'r> FromRow<'r, PgRow> for DbRackFirmwareApplyHistoryWithAvailability { - fn from_row(row: &'r PgRow) -> Result { - Ok(DbRackFirmwareApplyHistoryWithAvailability { - history: DbRackFirmwareApplyHistory::from_row(row)?, - firmware_available: row.try_get("firmware_available")?, - }) - } -} - -impl From for RackFirmwareApplyHistoryRecord { - fn from(row: DbRackFirmwareApplyHistoryWithAvailability) -> Self { - RackFirmwareApplyHistoryRecord { - firmware_id: row.history.firmware_id, - rack_id: row.history.rack_id, - firmware_type: row.history.firmware_type, - rack_hardware_type: row.history.rack_hardware_type, - applied_at: row.history.applied_at, - firmware_available: row.firmware_available, - } - } -} - -pub async fn record_apply_history( - txn: &mut PgConnection, - firmware_id: &str, - rack_id: &str, - firmware_type: &str, - rack_hardware_type: RackHardwareType, -) -> DatabaseResult<()> { - let query = "INSERT INTO rack_firmware_apply_history \ - (firmware_id, rack_id, firmware_type, rack_hardware_type) \ - VALUES ($1, $2, $3, $4)"; - - sqlx::query(query) - .bind(firmware_id) - .bind(rack_id) - .bind(firmware_type) - .bind(rack_hardware_type) - .execute(txn) - .await - .map_err(|e| DatabaseError::new(query, e))?; - Ok(()) -} - -/// List apply history, optionally filtered by firmware_id and/or rack_ids. -/// Joins against rack_firmware to report whether each firmware_id is still available. -pub async fn list_apply_history( - txn: &mut PgConnection, - firmware_id: Option<&str>, - rack_ids: &[String], -) -> DatabaseResult> { - let mut query = "SELECT h.*, COALESCE(rf.available, false) AS firmware_available \ - FROM rack_firmware_apply_history h \ - LEFT JOIN rack_firmware rf ON rf.id = h.firmware_id" - .to_string(); - - let mut param_idx = 1; - let mut conditions = Vec::new(); - - if firmware_id.is_some() { - conditions.push(format!("h.firmware_id = ${param_idx}")); - param_idx += 1; - } - if !rack_ids.is_empty() { - conditions.push(format!("h.rack_id = ANY(${param_idx})")); - } - if !conditions.is_empty() { - query.push_str(" WHERE "); - query.push_str(&conditions.join(" AND ")); - } - query.push_str(" ORDER BY h.applied_at DESC"); - - let mut q = sqlx::query_as(&query); - if let Some(fid) = firmware_id { - q = q.bind(fid); - } - if !rack_ids.is_empty() { - q = q.bind(rack_ids); - } - - let rows: Vec = q - .fetch_all(txn) - .await - .map_err(|e| DatabaseError::query(&query, e))?; - Ok(rows.into_iter().map(Into::into).collect()) -} - -pub async fn create( - txn: &mut PgConnection, - id: &str, - rack_hardware_type: RackHardwareType, - config: serde_json::Value, - parsed_components: Option, -) -> DatabaseResult { - let query = "INSERT INTO rack_firmware (id, rack_hardware_type, config, parsed_components) VALUES ($1, $2, $3::jsonb, $4::jsonb) RETURNING *"; - - sqlx::query_as(query) - .bind(id) - .bind(rack_hardware_type) - .bind(Json(config)) - .bind(parsed_components.map(Json)) - .fetch_one(txn) - .await - .map_err(|e| DatabaseError::new(query, e)) -} - -pub async fn find_by_id(txn: impl DbReader<'_>, id: &str) -> DatabaseResult { - let query = "SELECT * FROM rack_firmware WHERE id = $1"; - let ret = sqlx::query_as(query).bind(id).fetch_one(txn).await; - ret.map_err(|e| match e { - RowNotFound => DatabaseError::NotFoundError { - kind: "rack firmware", - id: format!("{id:?}"), - }, - _ => DatabaseError::query(query, e), - }) -} - -pub async fn list_all( - txn: &mut PgConnection, - filter: model::rack_firmware::RackFirmwareSearchFilter, -) -> DatabaseResult> { - let mut qb = sqlx::QueryBuilder::new("SELECT * FROM rack_firmware WHERE TRUE"); - - if filter.only_available { - qb.push(" AND available = true"); - } - if let Some(hw_type) = filter.rack_hardware_type { - qb.push(" AND rack_hardware_type = "); - qb.push_bind(hw_type); - } - - qb.push(" ORDER BY created DESC"); - - qb.build_query_as() - .fetch_all(txn) - .await - .map_err(|e| DatabaseError::new("rack_firmware::list_all", e)) -} - -pub async fn update_config( - txn: &mut PgConnection, - id: &str, - config: serde_json::Value, -) -> DatabaseResult { - let query = - "UPDATE rack_firmware SET config = $2::jsonb, updated = NOW() WHERE id = $1 RETURNING *"; - - sqlx::query_as(query) - .bind(id) - .bind(Json(config)) - .fetch_one(txn) - .await - .map_err(|e| DatabaseError::new(query, e)) -} - -pub async fn set_available( - txn: &mut PgConnection, - id: &str, - available: bool, -) -> DatabaseResult { - let query = - "UPDATE rack_firmware SET available = $2, updated = NOW() WHERE id = $1 RETURNING *"; - - sqlx::query_as(query) - .bind(id) - .bind(available) - .fetch_one(txn) - .await - .map_err(|e| DatabaseError::new(query, e)) -} - -/// Returns true if there is already a default firmware for the given rack_hardware_type. -pub async fn has_default( - txn: &mut PgConnection, - rack_hardware_type: &RackHardwareType, -) -> DatabaseResult { - let query = "SELECT EXISTS(SELECT 1 FROM rack_firmware WHERE rack_hardware_type = $1 AND is_default = true)"; - let (exists,) = sqlx::query_as(query) - .bind(rack_hardware_type) - .fetch_one(txn) - .await - .map_err(|e| DatabaseError::new(query, e))?; - Ok(exists) -} - -/// Finds the default firmware row for the given rack_hardware_type. -pub async fn find_default_by_rack_hardware_type( - txn: impl DbReader<'_>, - rack_hardware_type: &RackHardwareType, -) -> DatabaseResult { - let query = "SELECT * FROM rack_firmware WHERE rack_hardware_type = $1 AND is_default = true ORDER BY created DESC LIMIT 1"; - let ret = sqlx::query_as(query) - .bind(rack_hardware_type) - .fetch_optional(txn) - .await; - ret.map_err(|e| DatabaseError::query(query, e))? - .ok_or_else(|| DatabaseError::NotFoundError { - kind: "default rack firmware", - id: rack_hardware_type.to_string(), - }) -} - -/// Sets the given firmware as the default for its rack_hardware_type. -/// Clears any previous default for the same rack_hardware_type. -pub async fn set_default(txn: &mut PgConnection, id: &str) -> DatabaseResult { - // First, get the firmware to learn its rack_hardware_type. - let fw = find_by_id(&mut *txn, id).await?; - - // Clear previous default for this rack_hardware_type. - let clear_query = "UPDATE rack_firmware SET is_default = false WHERE rack_hardware_type = $1"; - sqlx::query(clear_query) - .bind(&fw.rack_hardware_type) - .execute(&mut *txn) - .await - .map_err(|e| DatabaseError::new(clear_query, e))?; - - // Set this one as default. - let set_query = - "UPDATE rack_firmware SET is_default = true, updated = NOW() WHERE id = $1 RETURNING *"; - sqlx::query_as(set_query) - .bind(id) - .fetch_one(txn) - .await - .map_err(|e| DatabaseError::new(set_query, e)) -} - -pub async fn delete(txn: &mut PgConnection, id: &str) -> DatabaseResult<()> { - let query = "DELETE FROM rack_firmware WHERE id = $1 RETURNING id"; - - sqlx::query_as::<_, (String,)>(query) - .bind(id) - .fetch_one(txn) - .await - .map_err(|e| DatabaseError::new(query, e))?; - - Ok(()) -} - -#[cfg(test)] -mod tests { - use serde_json::json; - - use super::*; - - #[crate::sqlx_test] - async fn test_apply_history_record_and_list(pool: sqlx::PgPool) { - let mut txn = pool.begin().await.unwrap(); - - // Create a firmware config so we can verify the availability join - create( - &mut txn, - "fw-001", - RackHardwareType::any(), - json!({"Id": "fw-001"}), - None, - ) - .await - .unwrap(); - set_available(&mut txn, "fw-001", true).await.unwrap(); - - // Record two apply events for the same firmware - record_apply_history( - &mut txn, - "fw-001", - "rack-a", - "prod", - RackHardwareType::any(), - ) - .await - .unwrap(); - record_apply_history(&mut txn, "fw-001", "rack-b", "dev", RackHardwareType::any()) - .await - .unwrap(); - - // List all history — should return both, newest first - let all = list_apply_history(&mut txn, None, &[]).await.unwrap(); - assert_eq!(all.len(), 2); - assert_eq!(all[0].rack_id, "rack-b"); - assert_eq!(all[1].rack_id, "rack-a"); - assert!(all[0].firmware_available); - assert!(all[1].firmware_available); - - // List filtered by firmware_id - let filtered = list_apply_history(&mut txn, Some("fw-001"), &[]) - .await - .unwrap(); - assert_eq!(filtered.len(), 2); - - // Filter by a non-existent firmware_id - let empty = list_apply_history(&mut txn, Some("fw-999"), &[]) - .await - .unwrap(); - assert!(empty.is_empty()); - - // Filter by rack_id - let by_rack = list_apply_history(&mut txn, None, &["rack-a".to_string()]) - .await - .unwrap(); - assert_eq!(by_rack.len(), 1); - assert_eq!(by_rack[0].rack_id, "rack-a"); - - // Filter by both firmware_id and rack_ids - let combined = list_apply_history(&mut txn, Some("fw-001"), &["rack-b".to_string()]) - .await - .unwrap(); - assert_eq!(combined.len(), 1); - assert_eq!(combined[0].rack_id, "rack-b"); - } - - #[crate::sqlx_test] - async fn test_apply_history_firmware_available_reflects_deletion(pool: sqlx::PgPool) { - let mut txn = pool.begin().await.unwrap(); - - // Create firmware and mark available - create( - &mut txn, - "fw-002", - RackHardwareType::any(), - json!({"Id": "fw-002"}), - None, - ) - .await - .unwrap(); - set_available(&mut txn, "fw-002", true).await.unwrap(); - - // Record an apply - record_apply_history( - &mut txn, - "fw-002", - "rack-a", - "prod", - RackHardwareType::any(), - ) - .await - .unwrap(); - - // Verify available = true - let before = list_apply_history(&mut txn, Some("fw-002"), &[]) - .await - .unwrap(); - assert_eq!(before.len(), 1); - assert!(before[0].firmware_available); - - // Delete the firmware - delete(&mut txn, "fw-002").await.unwrap(); - - // History entry still exists but firmware_available is now false - let after = list_apply_history(&mut txn, Some("fw-002"), &[]) - .await - .unwrap(); - assert_eq!(after.len(), 1); - assert!(!after[0].firmware_available); - } - - #[crate::sqlx_test] - async fn test_apply_history_unavailable_firmware(pool: sqlx::PgPool) { - let mut txn = pool.begin().await.unwrap(); - - // Record history for a firmware_id that was never created - record_apply_history( - &mut txn, - "fw-ghost", - "rack-a", - "prod", - RackHardwareType::any(), - ) - .await - .unwrap(); - - let history = list_apply_history(&mut txn, None, &[]).await.unwrap(); - assert_eq!(history.len(), 1); - assert_eq!(history[0].firmware_id, "fw-ghost"); - assert!(!history[0].firmware_available); - } - - #[crate::sqlx_test] - async fn test_create_firmware_defaults_to_not_default(pool: sqlx::PgPool) { - let mut txn = pool.begin().await.unwrap(); - - let fw = create( - &mut txn, - "fw-010", - RackHardwareType::any(), - json!({"Id": "fw-010"}), - None, - ) - .await - .unwrap(); - - assert!(!fw.is_default); - } - - #[crate::sqlx_test] - async fn test_has_default_returns_false_when_none_set(pool: sqlx::PgPool) { - let mut txn = pool.begin().await.unwrap(); - - create( - &mut txn, - "fw-011", - RackHardwareType::any(), - json!({"Id": "fw-011"}), - None, - ) - .await - .unwrap(); - - assert!( - !has_default(&mut txn, &RackHardwareType::any()) - .await - .unwrap() - ); - } - - #[crate::sqlx_test] - async fn test_set_default_and_has_default(pool: sqlx::PgPool) { - let mut txn = pool.begin().await.unwrap(); - - create( - &mut txn, - "fw-012", - RackHardwareType::any(), - json!({"Id": "fw-012"}), - None, - ) - .await - .unwrap(); - - // Set as default. - let fw = set_default(&mut txn, "fw-012").await.unwrap(); - assert!(fw.is_default); - - // has_default should now return true. - assert!( - has_default(&mut txn, &RackHardwareType::any()) - .await - .unwrap() - ); - - let default_fw = find_default_by_rack_hardware_type(&mut *txn, &RackHardwareType::any()) - .await - .unwrap(); - assert_eq!(default_fw.id, "fw-012"); - assert!(default_fw.is_default); - } - - #[crate::sqlx_test] - async fn test_set_default_clears_previous_default(pool: sqlx::PgPool) { - let mut txn = pool.begin().await.unwrap(); - - let hw_type = RackHardwareType::from("test-type"); - - create( - &mut txn, - "fw-a", - hw_type.clone(), - json!({"Id": "fw-a"}), - None, - ) - .await - .unwrap(); - create( - &mut txn, - "fw-b", - hw_type.clone(), - json!({"Id": "fw-b"}), - None, - ) - .await - .unwrap(); - - // Set fw-a as default. - set_default(&mut txn, "fw-a").await.unwrap(); - let a = find_by_id(&mut *txn, "fw-a").await.unwrap(); - assert!(a.is_default); - - // Set fw-b as default — fw-a should lose default. - set_default(&mut txn, "fw-b").await.unwrap(); - let a = find_by_id(&mut *txn, "fw-a").await.unwrap(); - let b = find_by_id(&mut *txn, "fw-b").await.unwrap(); - assert!(!a.is_default); - assert!(b.is_default); - } - - #[crate::sqlx_test] - async fn test_set_default_does_not_affect_other_hardware_types(pool: sqlx::PgPool) { - let mut txn = pool.begin().await.unwrap(); - - let type_a = RackHardwareType::from("type-a"); - let type_b = RackHardwareType::from("type-b"); - - create( - &mut txn, - "fw-x", - type_a.clone(), - json!({"Id": "fw-x"}), - None, - ) - .await - .unwrap(); - create( - &mut txn, - "fw-y", - type_b.clone(), - json!({"Id": "fw-y"}), - None, - ) - .await - .unwrap(); - - // Set both as default for their respective types. - set_default(&mut txn, "fw-x").await.unwrap(); - set_default(&mut txn, "fw-y").await.unwrap(); - - // Both should be default (different hardware types). - let x = find_by_id(&mut *txn, "fw-x").await.unwrap(); - let y = find_by_id(&mut *txn, "fw-y").await.unwrap(); - assert!(x.is_default); - assert!(y.is_default); - } - - #[crate::sqlx_test] - async fn test_find_default_by_rack_hardware_type_not_found(pool: sqlx::PgPool) { - let mut txn = pool.begin().await.unwrap(); - - create( - &mut txn, - "fw-z", - RackHardwareType::from("type-z"), - json!({"Id": "fw-z"}), - None, - ) - .await - .unwrap(); - - let err = find_default_by_rack_hardware_type(&mut *txn, &RackHardwareType::from("type-z")) - .await - .unwrap_err(); - assert!(matches!(err, DatabaseError::NotFoundError { .. })); - } -} diff --git a/crates/api-integration-tests/tests/vault_catalogue.rs b/crates/api-integration-tests/tests/vault_catalogue.rs index 3ac9f0c6d8..c3922a687a 100644 --- a/crates/api-integration-tests/tests/vault_catalogue.rs +++ b/crates/api-integration-tests/tests/vault_catalogue.rs @@ -98,12 +98,6 @@ async fn setup_vault_with_secrets() -> Option<( }, cred("mqtt-user", "mqtt-pass"), ), - ( - CredentialKey::RackFirmware { - firmware_id: "fw-1".to_string(), - }, - cred("fw-user", "fw-pass"), - ), ]; for (key, c) in &secrets { diff --git a/crates/api-model/src/lib.rs b/crates/api-model/src/lib.rs index b48045d3e3..d21208fbfb 100644 --- a/crates/api-model/src/lib.rs +++ b/crates/api-model/src/lib.rs @@ -81,7 +81,6 @@ pub mod power_manager; pub mod power_shelf; pub mod predicted_machine_interface; pub mod rack; -pub mod rack_firmware; pub mod rack_type; pub mod redfish; pub mod resource_pool; diff --git a/crates/api-model/src/rack.rs b/crates/api-model/src/rack.rs index c7a3cf3d92..ece757008e 100644 --- a/crates/api-model/src/rack.rs +++ b/crates/api-model/src/rack.rs @@ -492,7 +492,7 @@ impl Display for FirmwareUpgradeState { #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] pub enum NvosUpdateState { - Start { rack_firmware_id: Option }, + Start { firmware_object_id: Option }, WaitForComplete, } @@ -646,18 +646,25 @@ impl MachineRvLabels { #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub enum MaintenanceActivity { FirmwareUpgrade { - /// Target firmware version. `None` means RMS uses its default/latest. + /// SOT JSON for RMS ApplyFirmwareObjectFromJSON. + /// `None` is only valid for implicit all-activity maintenance skips. #[serde(default)] firmware_version: Option, /// Firmware components to update (e.g. "BMC", "CPLD", "BIOS"). /// Empty means all components. #[serde(default)] components: Vec, + /// RMS artifact-download token for ApplyFirmwareObjectFromJSON. + /// Required when firmware upgrade is explicitly requested. + #[serde(default)] + access_token: Option, + #[serde(default)] + force_update: bool, }, NvosUpdate { - /// Rack firmware entry containing the switch system image to install. - /// `None` means the default rack firmware for the rack is used. - rack_firmware_id: Option, + /// Firmware object containing the switch system image to install. + /// `None` means the default firmware object for the rack is used. + firmware_object_id: Option, }, ConfigureNmxCluster, PowerSequence, @@ -867,9 +874,11 @@ mod tests { assert!(scope.should_run(&MaintenanceActivity::FirmwareUpgrade { firmware_version: None, components: vec![], + access_token: None, + force_update: false, })); assert!(scope.should_run(&MaintenanceActivity::NvosUpdate { - rack_firmware_id: None, + firmware_object_id: None, })); assert!(scope.should_run(&MaintenanceActivity::ConfigureNmxCluster)); assert!(scope.should_run(&MaintenanceActivity::PowerSequence)); @@ -881,15 +890,19 @@ mod tests { activities: vec![MaintenanceActivity::FirmwareUpgrade { firmware_version: Some("v2.0".into()), components: vec![], + access_token: None, + force_update: false, }], ..Default::default() }; assert!(scope.should_run(&MaintenanceActivity::FirmwareUpgrade { firmware_version: None, components: vec![], + access_token: None, + force_update: false, })); assert!(!scope.should_run(&MaintenanceActivity::NvosUpdate { - rack_firmware_id: None, + firmware_object_id: None, })); assert!(!scope.should_run(&MaintenanceActivity::ConfigureNmxCluster)); assert!(!scope.should_run(&MaintenanceActivity::PowerSequence)); @@ -902,9 +915,11 @@ mod tests { MaintenanceActivity::FirmwareUpgrade { firmware_version: None, components: vec![], + access_token: None, + force_update: false, }, MaintenanceActivity::NvosUpdate { - rack_firmware_id: Some("fw-nvos".into()), + firmware_object_id: Some("fw-nvos".into()), }, MaintenanceActivity::PowerSequence, ], @@ -913,10 +928,12 @@ mod tests { assert!(scope.should_run(&MaintenanceActivity::FirmwareUpgrade { firmware_version: Some("v1.0".into()), components: vec![], + access_token: None, + force_update: false, })); assert!(!scope.should_run(&MaintenanceActivity::ConfigureNmxCluster)); assert!(scope.should_run(&MaintenanceActivity::NvosUpdate { - rack_firmware_id: None, + firmware_object_id: None, })); assert!(scope.should_run(&MaintenanceActivity::PowerSequence)); } @@ -928,18 +945,22 @@ mod tests { let a = MaintenanceActivity::FirmwareUpgrade { firmware_version: Some("v1".into()), components: vec!["BMC".into()], + access_token: None, + force_update: false, }; let b = MaintenanceActivity::FirmwareUpgrade { firmware_version: None, components: vec![], + access_token: None, + force_update: false, }; assert!(a.same_kind(&b)); let a = MaintenanceActivity::NvosUpdate { - rack_firmware_id: Some("fw-a".into()), + firmware_object_id: Some("fw-a".into()), }; let b = MaintenanceActivity::NvosUpdate { - rack_firmware_id: None, + firmware_object_id: None, }; assert!(a.same_kind(&b)); } @@ -949,6 +970,8 @@ mod tests { let a = MaintenanceActivity::FirmwareUpgrade { firmware_version: None, components: vec![], + access_token: None, + force_update: false, }; let b = MaintenanceActivity::ConfigureNmxCluster; assert!(!a.same_kind(&b)); @@ -960,6 +983,8 @@ mod tests { MaintenanceActivity::FirmwareUpgrade { firmware_version: None, components: vec![], + access_token: None, + force_update: false, } .to_string(), "FirmwareUpgrade" @@ -970,7 +995,7 @@ mod tests { ); assert_eq!( MaintenanceActivity::NvosUpdate { - rack_firmware_id: None, + firmware_object_id: None, } .to_string(), "NvosUpdate" diff --git a/crates/api-model/src/rack_firmware.rs b/crates/api-model/src/rack_firmware.rs deleted file mode 100644 index 029736170c..0000000000 --- a/crates/api-model/src/rack_firmware.rs +++ /dev/null @@ -1,69 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -use chrono::{DateTime, Utc}; -use serde::{Deserialize, Serialize}; -use sqlx::postgres::PgRow; -use sqlx::types::Json; -use sqlx::{FromRow, Row}; - -use crate::rack_type::RackHardwareType; - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct RackFirmware { - pub id: String, - pub rack_hardware_type: RackHardwareType, - pub config: Json, - pub available: bool, - pub is_default: bool, - pub parsed_components: Option>, - pub created: DateTime, - pub updated: DateTime, -} - -impl<'r> FromRow<'r, PgRow> for RackFirmware { - fn from_row(row: &'r PgRow) -> Result { - Ok(RackFirmware { - id: row.try_get("id")?, - rack_hardware_type: row.try_get("rack_hardware_type")?, - config: row.try_get("config")?, - available: row.try_get("available")?, - is_default: row.try_get("is_default")?, - parsed_components: row.try_get("parsed_components")?, - created: row.try_get("created")?, - updated: row.try_get("updated")?, - }) - } -} - -/// Filter criteria for searching rack firmware configurations. -#[derive(Clone, Debug, Default)] -pub struct RackFirmwareSearchFilter { - pub only_available: bool, - pub rack_hardware_type: Option, -} - -/// A record of a rack firmware apply operation. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct RackFirmwareApplyHistoryRecord { - pub firmware_id: String, - pub rack_id: String, - pub firmware_type: String, - pub rack_hardware_type: RackHardwareType, - pub applied_at: DateTime, - pub firmware_available: bool, -} diff --git a/crates/api-test-helper/src/mock_rms.rs b/crates/api-test-helper/src/mock_rms.rs index 65f0d71fee..fd862d7b4e 100644 --- a/crates/api-test-helper/src/mock_rms.rs +++ b/crates/api-test-helper/src/mock_rms.rs @@ -119,6 +119,45 @@ pub struct MockRmsApi { Mutex>>, get_rack_firmware_inventory_calls: Mutex>, + add_firmware_object_responses: Mutex>>, + add_firmware_object_calls: Mutex>, + + get_firmware_object_responses: Mutex>>, + get_firmware_object_calls: Mutex>, + + list_firmware_objects_responses: + Mutex>>, + list_firmware_objects_calls: Mutex>, + + delete_firmware_object_responses: + Mutex>>, + delete_firmware_object_calls: Mutex>, + + set_default_firmware_object_responses: + Mutex>>, + set_default_firmware_object_calls: Mutex>, + + apply_firmware_object_responses: + Mutex>>, + apply_firmware_object_calls: Mutex>, + + apply_firmware_object_from_json_responses: + Mutex>>, + apply_firmware_object_from_json_calls: Mutex>, + + apply_switch_system_image_from_json_responses: + Mutex>>, + apply_switch_system_image_from_json_calls: + Mutex>, + + apply_switch_system_image_responses: + Mutex>>, + apply_switch_system_image_calls: Mutex>, + + get_firmware_object_history_responses: + Mutex>>, + get_firmware_object_history_calls: Mutex>, + update_node_firmware_async_responses: Mutex>>, update_node_firmware_async_calls: Mutex>, @@ -170,6 +209,10 @@ pub struct MockRmsApi { Mutex>>, configure_scale_up_fabric_manager_calls: Mutex>, + set_scale_up_fabric_state_responses: + Mutex>>, + set_scale_up_fabric_state_calls: Mutex>, + enable_scale_up_fabric_telemetry_interface_responses: Mutex< VecDeque>, >, @@ -229,6 +272,26 @@ impl MockRmsApi { get_node_firmware_inventory_calls: Default::default(), get_rack_firmware_inventory_responses: Default::default(), get_rack_firmware_inventory_calls: Default::default(), + add_firmware_object_responses: Default::default(), + add_firmware_object_calls: Default::default(), + get_firmware_object_responses: Default::default(), + get_firmware_object_calls: Default::default(), + list_firmware_objects_responses: Default::default(), + list_firmware_objects_calls: Default::default(), + delete_firmware_object_responses: Default::default(), + delete_firmware_object_calls: Default::default(), + set_default_firmware_object_responses: Default::default(), + set_default_firmware_object_calls: Default::default(), + apply_firmware_object_responses: Default::default(), + apply_firmware_object_calls: Default::default(), + apply_firmware_object_from_json_responses: Default::default(), + apply_firmware_object_from_json_calls: Default::default(), + apply_switch_system_image_from_json_responses: Default::default(), + apply_switch_system_image_from_json_calls: Default::default(), + apply_switch_system_image_responses: Default::default(), + apply_switch_system_image_calls: Default::default(), + get_firmware_object_history_responses: Default::default(), + get_firmware_object_history_calls: Default::default(), update_node_firmware_async_responses: Default::default(), update_node_firmware_async_calls: Default::default(), update_firmware_by_node_type_async_responses: Default::default(), @@ -253,6 +316,8 @@ impl MockRmsApi { poll_job_status_calls: Default::default(), configure_scale_up_fabric_manager_responses: Default::default(), configure_scale_up_fabric_manager_calls: Default::default(), + set_scale_up_fabric_state_responses: Default::default(), + set_scale_up_fabric_state_calls: Default::default(), enable_scale_up_fabric_telemetry_interface_responses: Default::default(), enable_scale_up_fabric_telemetry_interface_calls: Default::default(), version_responses: Default::default(), @@ -402,6 +467,86 @@ impl MockRmsApi { rms::GetRackFirmwareInventoryRequest, rms::GetRackFirmwareInventoryResponse ); + impl_enqueue_inspect!( + enqueue_add_firmware_object, + add_firmware_object_calls, + add_firmware_object_responses, + add_firmware_object_calls, + rms::AddFirmwareObjectRequest, + rms::FirmwareObject + ); + impl_enqueue_inspect!( + enqueue_get_firmware_object, + get_firmware_object_calls, + get_firmware_object_responses, + get_firmware_object_calls, + rms::GetFirmwareObjectRequest, + rms::FirmwareObject + ); + impl_enqueue_inspect!( + enqueue_list_firmware_objects, + list_firmware_objects_calls, + list_firmware_objects_responses, + list_firmware_objects_calls, + rms::ListFirmwareObjectsRequest, + rms::ListFirmwareObjectsResponse + ); + impl_enqueue_inspect!( + enqueue_delete_firmware_object, + delete_firmware_object_calls, + delete_firmware_object_responses, + delete_firmware_object_calls, + rms::DeleteFirmwareObjectRequest, + rms::OperationResponse + ); + impl_enqueue_inspect!( + enqueue_set_default_firmware_object, + set_default_firmware_object_calls, + set_default_firmware_object_responses, + set_default_firmware_object_calls, + rms::SetDefaultFirmwareObjectRequest, + rms::FirmwareObject + ); + impl_enqueue_inspect!( + enqueue_apply_firmware_object, + apply_firmware_object_calls, + apply_firmware_object_responses, + apply_firmware_object_calls, + rms::ApplyFirmwareObjectRequest, + rms::ApplyFirmwareObjectResponse + ); + impl_enqueue_inspect!( + enqueue_apply_firmware_object_from_json, + apply_firmware_object_from_json_calls, + apply_firmware_object_from_json_responses, + apply_firmware_object_from_json_calls, + rms::ApplyFirmwareObjectFromJsonRequest, + rms::ApplyFirmwareObjectResponse + ); + impl_enqueue_inspect!( + enqueue_apply_switch_system_image_from_json, + apply_switch_system_image_from_json_calls, + apply_switch_system_image_from_json_responses, + apply_switch_system_image_from_json_calls, + rms::ApplySwitchSystemImageFromJsonRequest, + rms::ApplySwitchSystemImageResponse + ); + impl_enqueue_inspect!( + enqueue_apply_switch_system_image, + apply_switch_system_image_calls, + apply_switch_system_image_responses, + apply_switch_system_image_calls, + rms::ApplySwitchSystemImageRequest, + rms::ApplySwitchSystemImageResponse + ); + impl_enqueue_inspect!( + enqueue_get_firmware_object_history, + get_firmware_object_history_calls, + get_firmware_object_history_responses, + get_firmware_object_history_calls, + rms::GetFirmwareObjectHistoryRequest, + rms::GetFirmwareObjectHistoryResponse + ); impl_enqueue_inspect!( enqueue_update_node_firmware_async, update_node_firmware_async_calls, @@ -504,6 +649,14 @@ impl MockRmsApi { rms::ConfigureScaleUpFabricManagerRequest, rms::ConfigureScaleUpFabricManagerResponse ); + impl_enqueue_inspect!( + enqueue_set_scale_up_fabric_state, + set_scale_up_fabric_state_calls, + set_scale_up_fabric_state_responses, + set_scale_up_fabric_state_calls, + rms::SetScaleUpFabricStateRequest, + rms::SetScaleUpFabricStateResponse + ); impl_enqueue_inspect!( enqueue_enable_scale_up_fabric_telemetry_interface, enable_scale_up_fabric_telemetry_interface_calls, @@ -789,6 +942,93 @@ impl RmsApi for MockRmsApi { .push(cmd); pop_or_err(&mut self.get_rack_firmware_inventory_responses.lock().await) } + async fn add_firmware_object( + &self, + cmd: rms::AddFirmwareObjectRequest, + ) -> Result { + self.add_firmware_object_calls.lock().await.push(cmd); + pop_or_err(&mut self.add_firmware_object_responses.lock().await) + } + async fn get_firmware_object( + &self, + cmd: rms::GetFirmwareObjectRequest, + ) -> Result { + self.get_firmware_object_calls.lock().await.push(cmd); + pop_or_err(&mut self.get_firmware_object_responses.lock().await) + } + async fn list_firmware_objects( + &self, + cmd: rms::ListFirmwareObjectsRequest, + ) -> Result { + self.list_firmware_objects_calls.lock().await.push(cmd); + pop_or_err(&mut self.list_firmware_objects_responses.lock().await) + } + async fn delete_firmware_object( + &self, + cmd: rms::DeleteFirmwareObjectRequest, + ) -> Result { + self.delete_firmware_object_calls.lock().await.push(cmd); + pop_or_err(&mut self.delete_firmware_object_responses.lock().await) + } + async fn set_default_firmware_object( + &self, + cmd: rms::SetDefaultFirmwareObjectRequest, + ) -> Result { + self.set_default_firmware_object_calls + .lock() + .await + .push(cmd); + pop_or_err(&mut self.set_default_firmware_object_responses.lock().await) + } + async fn apply_firmware_object( + &self, + cmd: rms::ApplyFirmwareObjectRequest, + ) -> Result { + self.apply_firmware_object_calls.lock().await.push(cmd); + pop_or_err(&mut self.apply_firmware_object_responses.lock().await) + } + async fn apply_firmware_object_from_json( + &self, + cmd: rms::ApplyFirmwareObjectFromJsonRequest, + ) -> Result { + self.apply_firmware_object_from_json_calls + .lock() + .await + .push(cmd); + pop_or_err(&mut self.apply_firmware_object_from_json_responses.lock().await) + } + async fn apply_switch_system_image_from_json( + &self, + cmd: rms::ApplySwitchSystemImageFromJsonRequest, + ) -> Result { + self.apply_switch_system_image_from_json_calls + .lock() + .await + .push(cmd); + pop_or_err( + &mut self + .apply_switch_system_image_from_json_responses + .lock() + .await, + ) + } + async fn apply_switch_system_image( + &self, + cmd: rms::ApplySwitchSystemImageRequest, + ) -> Result { + self.apply_switch_system_image_calls.lock().await.push(cmd); + pop_or_err(&mut self.apply_switch_system_image_responses.lock().await) + } + async fn get_firmware_object_history( + &self, + cmd: rms::GetFirmwareObjectHistoryRequest, + ) -> Result { + self.get_firmware_object_history_calls + .lock() + .await + .push(cmd); + pop_or_err(&mut self.get_firmware_object_history_responses.lock().await) + } async fn update_node_firmware_async( &self, cmd: rms::UpdateNodeFirmwareRequest, @@ -895,6 +1135,13 @@ impl RmsApi for MockRmsApi { .await, ) } + async fn set_scale_up_fabric_state( + &self, + cmd: rms::SetScaleUpFabricStateRequest, + ) -> Result { + self.set_scale_up_fabric_state_calls.lock().await.push(cmd); + pop_or_err(&mut self.set_scale_up_fabric_state_responses.lock().await) + } async fn enable_scale_up_fabric_telemetry_interface( &self, cmd: rms::EnableScaleUpFabricTelemetryInterfaceRequest, diff --git a/crates/api/src/api.rs b/crates/api/src/api.rs index 88c43a8a3f..7f32b08289 100644 --- a/crates/api/src/api.rs +++ b/crates/api/src/api.rs @@ -1510,62 +1510,6 @@ impl Forge for Api { crate::handlers::expected_machine::update_expected_machines(self, request).await } - async fn create_rack_firmware( - &self, - request: tonic::Request, - ) -> Result, tonic::Status> { - crate::handlers::rack_firmware::create(self, request).await - } - - async fn get_rack_firmware( - &self, - request: tonic::Request, - ) -> Result, tonic::Status> { - crate::handlers::rack_firmware::get(self, request).await - } - - async fn list_rack_firmware( - &self, - request: tonic::Request, - ) -> Result, tonic::Status> { - crate::handlers::rack_firmware::list(self, request).await - } - - async fn delete_rack_firmware( - &self, - request: tonic::Request, - ) -> Result, tonic::Status> { - crate::handlers::rack_firmware::delete(self, request).await - } - - async fn apply_rack_firmware( - &self, - request: tonic::Request, - ) -> Result, tonic::Status> { - crate::handlers::rack_firmware::apply(self, request).await - } - - async fn get_rack_firmware_job_status( - &self, - request: tonic::Request, - ) -> Result, tonic::Status> { - crate::handlers::rack_firmware::get_job_status(self, request).await - } - - async fn get_rack_firmware_history( - &self, - request: tonic::Request, - ) -> Result, tonic::Status> { - crate::handlers::rack_firmware::get_history(self, request).await - } - - async fn rack_firmware_set_default( - &self, - request: tonic::Request, - ) -> Result, tonic::Status> { - crate::handlers::rack_firmware::set_default(self, request).await - } - async fn get_expected_power_shelf( &self, request: Request, diff --git a/crates/api/src/auth/internal_rbac_rules.rs b/crates/api/src/auth/internal_rbac_rules.rs index 867c4f14d3..d68b6e6f0b 100644 --- a/crates/api/src/auth/internal_rbac_rules.rs +++ b/crates/api/src/auth/internal_rbac_rules.rs @@ -448,15 +448,7 @@ impl InternalRBACRules { ); x.perm("GetIpxeTemplate", vec![ForgeAdminCLI, SiteAgent]); x.perm("ListIpxeTemplates", vec![ForgeAdminCLI, SiteAgent]); - x.perm("CreateRackFirmware", vec![ForgeAdminCLI]); - x.perm("DeleteRackFirmware", vec![ForgeAdminCLI]); x.perm("FindRackStateHistories", vec![ForgeAdminCLI, Machineatron]); - x.perm("ListRackFirmware", vec![ForgeAdminCLI]); - x.perm("GetRackFirmware", vec![ForgeAdminCLI]); - x.perm("ApplyRackFirmware", vec![ForgeAdminCLI]); - x.perm("GetRackFirmwareJobStatus", vec![ForgeAdminCLI]); - x.perm("GetRackFirmwareHistory", vec![ForgeAdminCLI]); - x.perm("RackFirmwareSetDefault", vec![ForgeAdminCLI]); x.perm("RebootCompleted", vec![Machineatron, Scout]); x.perm("PersistValidationResult", vec![Scout, SiteAgent]); x.perm( diff --git a/crates/api/src/handlers/component_manager.rs b/crates/api/src/handlers/component_manager.rs index 48cac3ebf4..2d952b18dc 100644 --- a/crates/api/src/handlers/component_manager.rs +++ b/crates/api/src/handlers/component_manager.rs @@ -20,7 +20,9 @@ use std::net::IpAddr; use ::rpc::common::SystemPowerControl; use ::rpc::forge::{self as rpc}; +use carbide_uuid::machine::MachineId; use carbide_uuid::power_shelf::PowerShelfId; +use carbide_uuid::rack::RackId; use carbide_uuid::switch::SwitchId; use component_manager::component_manager::ComponentManager; use component_manager::compute_tray_manager::{ComputeTrayEndpoint, ComputeTrayVendor}; @@ -32,6 +34,7 @@ use forge_secrets::credentials::{ BmcCredentialType, CredentialKey, CredentialManager, Credentials, }; use futures_util::FutureExt; +use librms::protos::rack_manager as rms; use mac_address::MacAddress; use model::component_manager::{ ComputeTrayComponent as ModelComputeTrayComponent, NvSwitchComponent, PowerAction, @@ -43,6 +46,7 @@ use model::rack::{FirmwareUpgradeJob, MaintenanceActivity}; use tonic::{Code, Request, Response, Status}; use crate::api::{Api, log_request_data}; +use crate::rack::firmware_object::hardware_type_matches_filter; const MACHINE_POWER_OVERRIDE_SOURCE: &str = "component_power_control"; const MACHINE_POWER_OVERRIDE_MESSAGE: &str = "Compute-Tray component power control in progress"; @@ -458,6 +462,221 @@ fn map_power_shelf_components(raw: &[i32]) -> Result, S .collect() } +fn normalize_access_token(access_token: Option) -> Option { + access_token.and_then(|token| { + if token.trim().is_empty() { + None + } else { + Some(token) + } + }) +} + +fn validate_firmware_object_json_request(target_version: &str) -> Result<(), Status> { + if target_version.trim().is_empty() { + return Err(Status::invalid_argument( + "target_version must contain SOT JSON for firmware updates routed through rack maintenance", + )); + } + Ok(()) +} + +fn reject_power_shelf_firmware_object_json(access_token: &Option) -> Result<(), Status> { + if access_token.is_some() { + Err(Status::unimplemented( + "firmware object JSON updates for power shelves are not implemented", + )) + } else { + Ok(()) + } +} + +fn missing_firmware_object_json_status(target: &str) -> Status { + Status::invalid_argument(format!( + "access_token is required for {target} firmware updates routed through rack maintenance" + )) +} + +fn require_firmware_object_json_for_rack_maintenance( + target: &str, + access_token: &Option, + target_version: &str, +) -> Result { + let Some(token) = access_token.clone() else { + return Err(missing_firmware_object_json_status(target)); + }; + + validate_firmware_object_json_request(target_version)?; + Ok(token) +} + +fn reject_firmware_object_json_for_direct_dispatch( + target: &str, + access_token: &Option, +) -> Result<(), Status> { + if access_token.is_some() { + Err(Status::invalid_argument(format!( + "access_token is only supported for {target} firmware updates routed through rack maintenance" + ))) + } else { + Ok(()) + } +} + +struct RackFirmwareMaintenanceTarget { + rack_id: RackId, + machine_ids: Vec, + switch_ids: Vec, +} + +fn push_rack_firmware_target( + targets: &mut Vec, + rack_id: RackId, + machine_id: Option, + switch_id: Option, +) { + let target = match targets.iter_mut().find(|target| target.rack_id == rack_id) { + Some(target) => target, + None => { + targets.push(RackFirmwareMaintenanceTarget { + rack_id, + machine_ids: Vec::new(), + switch_ids: Vec::new(), + }); + targets.last_mut().expect("target was just pushed") + } + }; + + if let Some(machine_id) = machine_id { + target.machine_ids.push(machine_id); + } + if let Some(switch_id) = switch_id { + target.switch_ids.push(switch_id); + } +} + +async fn group_machine_ids_by_rack( + api: &Api, + machine_ids: &[MachineId], +) -> Result, Status> { + let machines = db::machine::find( + api.db_reader().as_mut(), + db::ObjectFilter::List(machine_ids), + MachineSearchConfig::default(), + ) + .await + .map_err(|e| Status::internal(format!("failed to look up machines: {e}")))?; + let machines_by_id: HashMap<_, _> = machines + .into_iter() + .map(|machine| (machine.id, machine)) + .collect(); + + let mut targets = Vec::new(); + for machine_id in machine_ids { + let machine = machines_by_id + .get(machine_id) + .ok_or_else(|| Status::not_found(format!("machine {machine_id} not found")))?; + let rack_id = machine.rack_id.clone().ok_or_else(|| { + Status::failed_precondition(format!( + "machine {machine_id} is not associated with a rack" + )) + })?; + push_rack_firmware_target(&mut targets, rack_id, Some(machine_id.to_string()), None); + } + + Ok(targets) +} + +async fn group_switch_ids_by_rack( + api: &Api, + switch_ids: &[SwitchId], +) -> Result, Status> { + let mut txn = api + .database_connection + .begin() + .await + .map_err(|e| Status::internal(format!("failed to begin transaction: {e}")))?; + let switches = db::switch::find_by( + &mut txn, + db::ObjectColumnFilter::List(db::switch::IdColumn, switch_ids), + ) + .await + .map_err(|e| Status::internal(format!("failed to look up switches: {e}")))?; + drop(txn); + + let switches_by_id: HashMap<_, _> = switches + .into_iter() + .map(|switch| (switch.id, switch)) + .collect(); + + let mut targets = Vec::new(); + for switch_id in switch_ids { + let switch = switches_by_id + .get(switch_id) + .ok_or_else(|| Status::not_found(format!("switch {switch_id} not found")))?; + let rack_id = switch.rack_id.clone().ok_or_else(|| { + Status::failed_precondition(format!("switch {switch_id} is not associated with a rack")) + })?; + push_rack_firmware_target(&mut targets, rack_id, None, Some(switch_id.to_string())); + } + + Ok(targets) +} + +async fn submit_rack_firmware_maintenance_requests( + api: &Api, + targets: Vec, + firmware_version: String, + components: Vec, + access_token: Option, + force_update: bool, +) -> Result, Status> { + if targets.is_empty() { + return Err(Status::invalid_argument( + "no devices specified for firmware upgrade", + )); + } + + let mut results = Vec::new(); + for target in targets { + let affected_ids: Vec<_> = target + .machine_ids + .iter() + .chain(target.switch_ids.iter()) + .cloned() + .collect(); + let maintenance_req = Request::new(rpc::RackMaintenanceOnDemandRequest { + rack_id: Some(target.rack_id), + scope: Some(rpc::RackMaintenanceScope { + machine_ids: target.machine_ids, + switch_ids: target.switch_ids, + power_shelf_ids: vec![], + activities: vec![rpc::MaintenanceActivityConfig { + activity: Some(rpc::maintenance_activity_config::Activity::FirmwareUpgrade( + rpc::FirmwareUpgradeActivity { + firmware_version: firmware_version.clone(), + components: components.clone(), + access_token: access_token.clone(), + force_update, + }, + )), + }], + }), + }); + + match crate::handlers::rack::on_demand_rack_maintenance(api, maintenance_req).await { + Ok(_) => results.extend(affected_ids.iter().map(|id| success_result(id))), + Err(status) => results.extend( + affected_ids + .iter() + .map(|id| status_result(id, status.clone())), + ), + } + } + + Ok(results) +} + // ---- Endpoint resolution helpers ---- struct UnresolvedDevice { @@ -1284,17 +1503,18 @@ pub(crate) async fn update_component_firmware( let target = req .target .ok_or_else(|| Status::invalid_argument("target is required"))?; + let access_token = normalize_access_token(req.access_token); - let mut rack_machine_ids: Vec = Vec::new(); - let mut rack_switch_ids: Vec = Vec::new(); - let mut rack_id: Option = None; + let target_version = req.target_version.clone(); + let force_update = req.force_update; + let mut rack_maintenance_targets: Vec = Vec::new(); let mut power_shelf_results: Option> = None; let mut rack_results: Option> = None; let mut component_names: Vec = Vec::new(); + let mut maintenance_access_token: Option = None; match target { rpc::update_component_firmware_request::Target::Switches(t) => { - let cm = require_component_manager(api)?; let list = t .switch_ids .ok_or_else(|| Status::invalid_argument("switch_ids is required"))?; @@ -1302,30 +1522,17 @@ pub(crate) async fn update_component_firmware( return Err(Status::invalid_argument("switch_ids must not be empty")); } + let cm = require_component_manager(api)?; if cm.nv_switch_use_state_controller { + maintenance_access_token = Some(require_firmware_object_json_for_rack_maintenance( + "switch", + &access_token, + &req.target_version, + )?); component_names = map_nv_switch_component_names(&t.components)?; - - let mut txn = - api.database_connection.begin().await.map_err(|e| { - Status::internal(format!("failed to begin transaction: {e}")) - })?; - let switch = db::switch::find_by_id(&mut txn, &list.ids[0]) - .await - .map_err(|e| Status::internal(format!("failed to look up switch: {e}")))? - .ok_or_else(|| { - Status::not_found(format!("switch {} not found", list.ids[0])) - })?; - drop(txn); - - rack_id = Some(switch.rack_id.ok_or_else(|| { - Status::failed_precondition(format!( - "switch {} is not associated with a rack", - list.ids[0] - )) - })?); - rack_switch_ids = list.ids.iter().map(|id| id.to_string()).collect(); + rack_maintenance_targets = group_switch_ids_by_rack(api, &list.ids).await?; } else { - // Directly dispatch to backend + reject_firmware_object_json_for_direct_dispatch("switch", &access_token)?; let components = map_nv_switch_components(&t.components)?; let endpoints = resolve_switch_endpoints(api, &list.ids).await?; @@ -1359,7 +1566,6 @@ pub(crate) async fn update_component_firmware( } } rpc::update_component_firmware_request::Target::ComputeTrays(t) => { - let cm = require_component_manager(api)?; let list = t .machine_ids .ok_or_else(|| Status::invalid_argument("machine_ids is required"))?; @@ -1367,28 +1573,18 @@ pub(crate) async fn update_component_firmware( return Err(Status::invalid_argument("machine_ids must not be empty")); } + let cm = require_component_manager(api)?; if cm.compute_tray_use_state_controller { + maintenance_access_token = Some(require_firmware_object_json_for_rack_maintenance( + "compute tray", + &access_token, + &req.target_version, + )?); component_names = map_compute_tray_component_names(&t.components)?; - - let machine = db::machine::find_one( - api.db_reader().as_mut(), - &list.machine_ids[0], - Default::default(), - ) - .await - .map_err(|e| Status::internal(format!("failed to look up machine: {e}")))? - .ok_or_else(|| { - Status::not_found(format!("machine {} not found", list.machine_ids[0])) - })?; - - rack_id = Some(machine.rack_id.ok_or_else(|| { - Status::failed_precondition(format!( - "machine {} is not associated with a rack", - list.machine_ids[0] - )) - })?); - rack_machine_ids = list.machine_ids.iter().map(|id| id.to_string()).collect(); + rack_maintenance_targets = + group_machine_ids_by_rack(api, &list.machine_ids).await?; } else { + reject_firmware_object_json_for_direct_dispatch("compute tray", &access_token)?; let components = map_compute_tray_components(&t.components)?; let resolved = resolve_compute_tray_endpoints(api, &list.machine_ids).await?; @@ -1421,10 +1617,17 @@ pub(crate) async fn update_component_firmware( } } rpc::update_component_firmware_request::Target::PowerShelves(t) => { - let cm = require_component_manager(api)?; let list = t .power_shelf_ids .ok_or_else(|| Status::invalid_argument("power_shelf_ids is required"))?; + if list.ids.is_empty() { + return Err(Status::invalid_argument( + "power_shelf_ids must not be empty", + )); + } + + reject_power_shelf_firmware_object_json(&access_token)?; + let cm = require_component_manager(api)?; let components = map_power_shelf_components(&t.components)?; let endpoints = resolve_power_shelf_endpoints(api, &list.ids).await?; @@ -1460,6 +1663,11 @@ pub(crate) async fn update_component_firmware( if list.rack_ids.is_empty() { return Err(Status::invalid_argument("rack_ids must not be empty")); } + let token = require_firmware_object_json_for_rack_maintenance( + "rack", + &access_token, + &req.target_version, + )?; let mut results = Vec::new(); for rack_id in list.rack_ids { @@ -1476,6 +1684,8 @@ pub(crate) async fn update_component_firmware( rpc::FirmwareUpgradeActivity { firmware_version: req.target_version.clone(), components: vec![], + access_token: Some(token.clone()), + force_update: req.force_update, }, ), ), @@ -1505,34 +1715,15 @@ pub(crate) async fn update_component_firmware( })); } - let rack_id = rack_id.ok_or_else(|| { - Status::invalid_argument("no machines or switches specified for firmware upgrade") - })?; - - let maintenance_req = Request::new(rpc::RackMaintenanceOnDemandRequest { - rack_id: Some(rack_id), - scope: Some(rpc::RackMaintenanceScope { - machine_ids: rack_machine_ids.clone(), - switch_ids: rack_switch_ids.clone(), - power_shelf_ids: vec![], - activities: vec![rpc::MaintenanceActivityConfig { - activity: Some(rpc::maintenance_activity_config::Activity::FirmwareUpgrade( - rpc::FirmwareUpgradeActivity { - firmware_version: req.target_version, - components: component_names, - }, - )), - }], - }), - }); - - crate::handlers::rack::on_demand_rack_maintenance(api, maintenance_req).await?; - - let results: Vec<_> = rack_machine_ids - .iter() - .chain(rack_switch_ids.iter()) - .map(|id| success_result(id)) - .collect(); + let results = submit_rack_firmware_maintenance_requests( + api, + rack_maintenance_targets, + target_version, + component_names, + maintenance_access_token, + force_update, + ) + .await?; Ok(Response::new(rpc::UpdateComponentFirmwareResponse { results, @@ -1878,23 +2069,19 @@ pub(crate) async fn list_component_firmware_versions( .map(|rack| (rack.id.clone(), rack)) .collect(); - let mut txn = api - .database_connection - .begin() - .await - .map_err(|e| Status::internal(format!("failed to begin transaction: {e}")))?; - let firmwares = db::rack_firmware::list_all( - &mut txn, - model::rack_firmware::RackFirmwareSearchFilter { + let rms_client = api + .rms_client + .as_ref() + .ok_or_else(|| Status::failed_precondition("RMS client is not configured"))?; + let firmwares = rms_client + .list_firmware_objects(rms::ListFirmwareObjectsRequest { + metadata: None, only_available: true, - rack_hardware_type: None, - }, - ) - .await - .map_err(|e| Status::internal(format!("failed to list rack firmware: {e}")))?; - txn.commit() + hardware_type: String::new(), + }) .await - .map_err(|e| Status::internal(format!("failed to commit transaction: {e}")))?; + .map_err(|e| Status::internal(format!("failed to list firmware objects: {e}")))? + .objects; let devices = requested_rack_ids .iter() @@ -1930,23 +2117,13 @@ pub(crate) async fn list_component_firmware_versions( }; }; - let Some(rack_hardware_type) = profile.rack_hardware_type.as_ref() else { - return rpc::DeviceFirmwareVersions { - result: Some(invalid_argument_component_result( - rack_id.as_ref(), - format!( - "rack profile {profile_id} does not define rack_hardware_type" - ), - )), - ..Default::default() - }; - }; - let versions = firmwares .iter() .filter(|firmware| { - firmware.rack_hardware_type.is_any() - || firmware.rack_hardware_type == *rack_hardware_type + hardware_type_matches_filter( + &firmware.hardware_type, + profile.rack_hardware_type.as_ref(), + ) }) .map(|firmware| firmware.id.clone()) .collect(); @@ -2041,6 +2218,74 @@ mod tests { assert_eq!(st.code(), Code::PermissionDenied); } + #[test] + fn firmware_object_json_request_requires_sot_json_in_target_version() { + let err = validate_firmware_object_json_request("").unwrap_err(); + assert_eq!(err.code(), Code::InvalidArgument); + assert!(err.message().contains("target_version")); + + validate_firmware_object_json_request("{}").unwrap(); + } + + #[test] + fn rack_firmware_targets_are_grouped_by_rack() { + let rack_a = RackId::new("rack-a".to_string()); + let rack_b = RackId::new("rack-b".to_string()); + let mut targets = Vec::new(); + + push_rack_firmware_target(&mut targets, rack_a.clone(), Some("machine-a".into()), None); + push_rack_firmware_target(&mut targets, rack_b.clone(), None, Some("switch-b".into())); + push_rack_firmware_target(&mut targets, rack_a.clone(), Some("machine-c".into()), None); + + assert_eq!(targets.len(), 2); + assert_eq!(targets[0].rack_id, rack_a); + assert_eq!(targets[0].machine_ids, vec!["machine-a", "machine-c"]); + assert!(targets[0].switch_ids.is_empty()); + assert_eq!(targets[1].rack_id, rack_b); + assert_eq!(targets[1].switch_ids, vec!["switch-b"]); + assert!(targets[1].machine_ids.is_empty()); + } + + #[test] + fn power_shelf_firmware_object_json_is_unimplemented() { + let access_token = Some("token".to_string()); + + let err = reject_power_shelf_firmware_object_json(&access_token).unwrap_err(); + + assert_eq!(err.code(), Code::Unimplemented); + assert!(err.message().contains("power shelves")); + } + + #[test] + fn rack_maintenance_firmware_update_requires_firmware_object_json() { + let err = missing_firmware_object_json_status("rack"); + + assert_eq!(err.code(), Code::InvalidArgument); + assert!(err.message().contains("access_token")); + } + + #[test] + fn rack_maintenance_firmware_update_returns_access_token_when_valid() { + let token = require_firmware_object_json_for_rack_maintenance( + "switch", + &Some("token".to_string()), + "{}", + ) + .unwrap(); + + assert_eq!(token, "token"); + } + + #[test] + fn direct_firmware_update_rejects_access_token() { + let err = + reject_firmware_object_json_for_direct_dispatch("switch", &Some("token".to_string())) + .unwrap_err(); + + assert_eq!(err.code(), Code::InvalidArgument); + assert!(err.message().contains("rack maintenance")); + } + #[test] fn power_action_on() { let action = map_power_action(SystemPowerControl::On as i32).unwrap(); @@ -2221,6 +2466,8 @@ mod tests { activities: vec![MaintenanceActivity::FirmwareUpgrade { firmware_version: None, components: vec![], + access_token: None, + force_update: false, }], ..Default::default() }); @@ -2242,6 +2489,8 @@ mod tests { activities: vec![MaintenanceActivity::FirmwareUpgrade { firmware_version: None, components: vec![], + access_token: None, + force_update: false, }], ..Default::default() }); diff --git a/crates/api/src/handlers/mod.rs b/crates/api/src/handlers/mod.rs index 635fe8e033..2596a50c8d 100644 --- a/crates/api/src/handlers/mod.rs +++ b/crates/api/src/handlers/mod.rs @@ -68,7 +68,6 @@ pub mod power_options; pub mod power_shelf; pub mod pxe; pub mod rack; -pub mod rack_firmware; pub mod redfish; pub mod resource_pool; pub mod route_server; @@ -76,7 +75,6 @@ pub mod scout_stream; pub mod site_explorer; pub mod sku; pub mod switch; -mod switch_artifacts; pub mod tenant; pub mod tenant_identity_config; pub mod tenant_keyset; diff --git a/crates/api/src/handlers/rack.rs b/crates/api/src/handlers/rack.rs index 9f27c3f66a..f8bc25df67 100644 --- a/crates/api/src/handlers/rack.rs +++ b/crates/api/src/handlers/rack.rs @@ -505,19 +505,51 @@ pub(crate) async fn on_demand_rack_maintenance( .activities .iter() .map(|entry| match &entry.activity { - Some(ProtoActivity::FirmwareUpgrade(fw)) => Ok(MaintenanceActivity::FirmwareUpgrade { - firmware_version: if fw.firmware_version.is_empty() { + Some(ProtoActivity::FirmwareUpgrade(fw)) => { + let firmware_version = if fw.firmware_version.trim().is_empty() { None } else { Some(fw.firmware_version.clone()) - }, - components: fw.components.clone(), - }), + }; + let access_token = fw.access_token.as_ref().and_then(|token| { + if token.trim().is_empty() { + None + } else { + Some(token.clone()) + } + }); + + if firmware_version.is_none() { + return Err(CarbideError::InvalidArgument( + "firmware-upgrade rack maintenance requires SOT JSON in firmware_version" + .into(), + )); + } + if access_token.is_none() { + return Err(CarbideError::InvalidArgument( + "firmware-upgrade rack maintenance requires access_token".into(), + )); + } + if let Some(config_json) = firmware_version.as_deref() { + serde_json::from_str::(config_json).map_err(|error| { + CarbideError::InvalidArgument(format!( + "firmware-upgrade firmware_version must contain valid SOT JSON: {error}" + )) + })?; + } + + Ok(MaintenanceActivity::FirmwareUpgrade { + firmware_version, + components: fw.components.clone(), + access_token, + force_update: fw.force_update, + }) + } Some(ProtoActivity::NvosUpdate(nvos)) => Ok(MaintenanceActivity::NvosUpdate { - rack_firmware_id: if nvos.rack_firmware_id.is_empty() { + firmware_object_id: if nvos.firmware_object_id.is_empty() { None } else { - Some(nvos.rack_firmware_id.clone()) + Some(nvos.firmware_object_id.clone()) }, }), Some(ProtoActivity::ConfigureNmxCluster(_)) => { @@ -661,6 +693,8 @@ pub(crate) async fn on_demand_rack_maintenance( scope.should_run(&MaintenanceActivity::FirmwareUpgrade { firmware_version: None, components: vec![], + access_token: None, + force_update: false, }) }) { diff --git a/crates/api/src/handlers/rack_firmware.rs b/crates/api/src/handlers/rack_firmware.rs deleted file mode 100644 index 02d3062ab7..0000000000 --- a/crates/api/src/handlers/rack_firmware.rs +++ /dev/null @@ -1,1437 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -use std::path::PathBuf; -use std::sync::Arc; - -use db::{DatabaseError, ObjectColumnFilter, rack_firmware as rack_firmware_db}; -use forge_secrets::credentials::{CredentialKey, CredentialReader, Credentials}; -use rpc::forge::{ - DeviceUpdateResult, NodeJobInfo, RackFirmware, RackFirmwareApplyRequest, - RackFirmwareApplyResponse, RackFirmwareCreateRequest, RackFirmwareDeleteRequest, - RackFirmwareGetRequest, RackFirmwareHistoryRecords, RackFirmwareHistoryRequest, - RackFirmwareHistoryResponse, RackFirmwareJobStatusRequest, RackFirmwareJobStatusResponse, - RackFirmwareList, RackFirmwareSearchFilter, RackFirmwareSetDefaultRequest, -}; -use serde::{Deserialize, Serialize}; -use serde_json::Value; -use tokio::task::JoinSet; -use tonic::{Request, Response, Status}; - -use crate::api::Api; -use crate::errors::CarbideError; -use crate::handlers::switch_artifacts::{SwitchSystemImageArtifact, collect_switch_system_images}; -use crate::rack::firmware_update::{ - build_firmware_update_batches, load_rack_firmware_inventory, submit_firmware_update_batches, -}; -// Structs for parsing rack firmware JSON - -#[derive(Debug, Clone, Serialize, Deserialize)] -struct ParsedFirmwareComponents { - #[serde(default)] - board_skus: Vec, - #[serde(default)] - switch_system_images: Vec, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -struct BoardSkuFirmware { - sku_id: String, - name: String, - sku_type: String, - firmware_components: Vec, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -struct FirmwareComponent { - component: String, - bundle: Option, - version: Option, - /// Firmware type: "Prod" or "Dev" - component_type: Option, - locations: Vec, - /// Subcomponents with individual versions (from FWPKG) - subcomponents: Vec, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -struct FirmwareSubComponent { - component: String, - version: String, - skuid: Option, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -struct FirmwareLocation { - location: String, - location_type: String, - firmware_type: Option, -} - -// Structs for firmware lookup table - -#[derive(Debug, Clone, Serialize, Deserialize)] -struct FirmwareLookupTable { - /// Map of device_type -> component_name -> FirmwareLookupEntry - #[serde(default)] - devices: - std::collections::HashMap>, - #[serde(default)] - switch_system_images: std::collections::HashMap< - String, - std::collections::HashMap, - >, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -struct FirmwareLookupEntry { - /// Path to the downloaded firmware file (relative to firmware_id directory) - filename: String, - /// Target identifier for RMS update command - target: String, - /// Component name (e.g., "HMC", "BMC") - component: String, - /// Bundle identifier (e.g., "P4975", "P4972") - bundle: String, - /// Firmware type: "prod" or "dev" - firmware_type: String, - /// Version of the firmware bundle - version: Option, - /// Subcomponents with individual versions - subcomponents: Vec, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -struct SwitchSystemImageLookupEntry { - component: String, - package_name: String, - version: String, - image_filename: String, - location_type: String, - firmware_type: String, -} - -/// Parse rack firmware JSON to extract firmware components -fn parse_rack_firmware_json(config: &Value) -> Result { - let board_skus = config - .get("BoardSKUs") - .and_then(|v| v.as_array()) - .ok_or_else(|| "JSON must contain 'BoardSKUs' array".to_string())?; - - let mut parsed_board_skus = Vec::new(); - - for board_sku in board_skus { - let sku_id = board_sku - .get("SKUID") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(); - - let name = board_sku - .get("Name") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(); - - let sku_type = board_sku - .get("Type") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(); - - // Get firmware components (ignore software) - let firmware_array = board_sku - .get("Components") - .and_then(|c| c.get("Firmware")) - .and_then(|f| f.as_array()); - - let mut firmware_components = Vec::new(); - - if let Some(firmware_list) = firmware_array { - for firmware in firmware_list { - let component = firmware - .get("Component") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(); - - let bundle = firmware - .get("Bundle") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()); - - let version = firmware - .get("Version") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()); - - // Get firmware type (Prod or Dev) - let component_type = firmware - .get("Type") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()); - - // Parse locations - let empty_vec = vec![]; - let locations_array = firmware - .get("Locations") - .and_then(|l| l.as_array()) - .unwrap_or(&empty_vec); - - let mut locations = Vec::new(); - - for location in locations_array { - let firmware_type = location - .get("Type") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()); - - // Only include locations with Type: "Firmware" (skip Certificate, Misc, etc.) - if firmware_type.as_deref() != Some("Firmware") { - continue; - } - - let loc = FirmwareLocation { - location: location - .get("Location") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(), - location_type: location - .get("LocationType") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(), - firmware_type, - }; - locations.push(loc); - } - - // Parse subcomponents - let empty_vec = vec![]; - let subcomponents_array = firmware - .get("SubComponents") - .and_then(|s| s.as_array()) - .unwrap_or(&empty_vec); - - let mut subcomponents = Vec::new(); - for subcomp in subcomponents_array { - let sub_component = subcomp - .get("Component") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(); - - let sub_version = subcomp - .get("Version") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(); - - let sub_skuid = subcomp - .get("SKUID") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()); - - if !sub_component.is_empty() && !sub_version.is_empty() { - subcomponents.push(FirmwareSubComponent { - component: sub_component, - version: sub_version, - skuid: sub_skuid, - }); - } - } - - firmware_components.push(FirmwareComponent { - component, - bundle, - version, - component_type, - locations, - subcomponents, - }); - } - } - - parsed_board_skus.push(BoardSkuFirmware { - sku_id, - name, - sku_type, - firmware_components, - }); - } - - Ok(ParsedFirmwareComponents { - board_skus: parsed_board_skus, - switch_system_images: Vec::new(), - }) -} - -/// Create a new Rack firmware configuration -pub async fn create( - api: &Api, - request: Request, -) -> Result, Status> { - let req = request.into_inner(); - - // Validate that config_json is valid JSON - let config: serde_json::Value = serde_json::from_str(&req.config_json) - .map_err(|e| CarbideError::InvalidArgument(format!("Invalid JSON: {}", e)))?; - - // Extract ID from JSON - use "Id" field (UUID) - let id = config - .get("Id") - .and_then(|v| v.as_str()) - .ok_or_else(|| { - CarbideError::InvalidArgument( - "JSON must contain 'Id' field to use as identifier".into(), - ) - })? - .to_string(); - - // Validate token is provided - if req.artifactory_token.is_empty() { - return Err( - CarbideError::InvalidArgument("Artifactory token is required".to_string()).into(), - ); - } - - // Parse firmware components from the JSON - let mut parsed_components = match parse_rack_firmware_json(&config) { - Ok(parsed) => { - tracing::info!( - "Parsed {} board SKUs from rack firmware config {}", - parsed.board_skus.len(), - id - ); - parsed - } - Err(e) => { - tracing::warn!( - "Failed to parse firmware components from config {}: {}", - id, - e - ); - ParsedFirmwareComponents { - board_skus: Vec::new(), - switch_system_images: Vec::new(), - } - } - }; - - let switch_system_images = - collect_switch_system_images(&config).map_err(CarbideError::InvalidArgument)?; - if !switch_system_images.is_empty() { - tracing::info!( - "Parsed {} switch system images from rack firmware config {}", - switch_system_images.len(), - id - ); - } - parsed_components.switch_system_images = switch_system_images; - - let parsed_components = if parsed_components.board_skus.is_empty() - && parsed_components.switch_system_images.is_empty() - { - None - } else { - Some( - serde_json::to_value(parsed_components).map_err(|e| CarbideError::Internal { - message: format!("Failed to serialize parsed components: {}", e), - })?, - ) - }; - - // Store token in Vault - tracing::info!("Storing Rack firmware config {} with token in Vault", id); - - api.credential_manager - .set_credentials( - &CredentialKey::RackFirmware { - firmware_id: id.clone(), - }, - &Credentials::UsernamePassword { - username: id.clone(), - password: req.artifactory_token.clone(), - }, - ) - .await - .map_err(|e| CarbideError::Internal { - message: format!("Failed to store token in Vault: {}", e), - })?; - - let mut txn = api - .database_connection - .begin() - .await - .map_err(|e| CarbideError::from(DatabaseError::new("begin create", e)))?; - - let rack_hardware_type: model::rack_type::RackHardwareType = req - .rack_hardware_type - .ok_or_else(|| CarbideError::MissingArgument("rack_hardware_type"))? - .into(); - - let mut db_config = rack_firmware_db::create( - &mut txn, - &id, - rack_hardware_type.clone(), - config, - parsed_components, - ) - .await?; - - // Auto-set as default if no default exists for this rack_hardware_type. - if !rack_firmware_db::has_default(&mut txn, &rack_hardware_type).await? { - db_config = rack_firmware_db::set_default(&mut txn, &id).await?; - tracing::info!( - firmware_id = %id, - rack_hardware_type = %rack_hardware_type, - "Auto-set as default firmware (first for this rack hardware type)." - ); - } - - txn.commit() - .await - .map_err(|e| CarbideError::from(DatabaseError::new("commit create", e)))?; - - // Spawn background task to download firmware files - if let Some(parsed_value) = &db_config.parsed_components { - // Deserialize back to struct for download task - if let Ok(parsed_struct) = - serde_json::from_value::(parsed_value.0.clone()) - { - spawn_firmware_download_task( - id.clone(), - parsed_struct, - api.credential_manager.clone() as Arc, - api.database_connection.clone(), - ); - tracing::info!( - firmware_id = %id, - "Spawned background task to download firmware files" - ); - } - } - - Ok(Response::new((&db_config).into())) -} - -/// Get a Rack firmware configuration by ID -pub async fn get( - api: &Api, - request: Request, -) -> Result, Status> { - let req = request.into_inner(); - - let db_config = rack_firmware_db::find_by_id(&api.database_connection, &req.id) - .await - .map_err(CarbideError::from)?; - - Ok(Response::new((&db_config).into())) -} - -/// List all Rack firmware configurations -pub async fn list( - api: &Api, - request: Request, -) -> Result, Status> { - let filter: model::rack_firmware::RackFirmwareSearchFilter = request.into_inner().into(); - - let mut txn = api - .database_connection - .begin() - .await - .map_err(|e| CarbideError::from(DatabaseError::new("begin list", e)))?; - - let db_configs = rack_firmware_db::list_all(&mut txn, filter).await?; - - txn.commit() - .await - .map_err(|e| CarbideError::from(DatabaseError::new("commit list", e)))?; - - let configs = db_configs - .into_iter() - .map(|db_config| (&db_config).into()) - .collect(); - - Ok(Response::new(RackFirmwareList { configs })) -} - -/// Delete a Rack firmware configuration -pub async fn delete( - api: &Api, - request: Request, -) -> Result, Status> { - let req = request.into_inner(); - - let mut txn = api - .database_connection - .begin() - .await - .map_err(|e| CarbideError::from(DatabaseError::new("begin delete", e)))?; - - rack_firmware_db::delete(&mut txn, &req.id) - .await - .map_err(CarbideError::from)?; - - txn.commit() - .await - .map_err(|e| CarbideError::from(DatabaseError::new("commit delete", e)))?; - - // cleanup of downloaded firmware files - let firmware_cache_dir = PathBuf::from("/forge-boot-artifacts/blobs/internal/fw") - .join("rack_firmware") - .join(&req.id); - if let Err(e) = tokio::fs::remove_dir_all(&firmware_cache_dir).await { - tracing::warn!( - firmware_id = %req.id, - "Failed to delete firmware cache directory {}: {}", - firmware_cache_dir.display(), - e - ); - } - - // cleanup of credentials from Vault - let credential_key = CredentialKey::RackFirmware { - firmware_id: req.id.clone(), - }; - if let Err(e) = api - .credential_manager - .delete_credentials(&credential_key) - .await - { - tracing::warn!( - firmware_id = %req.id, - "Failed to delete credentials from Vault: {}", - e - ); - } - - Ok(Response::new(())) -} - -/// Spawn a background task to download firmware files and mark as available when complete -fn spawn_firmware_download_task( - firmware_id: String, - parsed_components: ParsedFirmwareComponents, - credential_reader: Arc, - database_connection: sqlx::PgPool, -) { - tokio::spawn(async move { - if let Err(e) = download_firmware_files( - &firmware_id, - &parsed_components, - &*credential_reader, - &database_connection, - ) - .await - { - tracing::error!( - firmware_id = %firmware_id, - error = %e, - "Failed to download firmware files" - ); - } - }); -} - -/// Download all firmware files for a rack firmware configuration -async fn download_firmware_files( - firmware_id: &str, - parsed_components: &ParsedFirmwareComponents, - credential_reader: &dyn CredentialReader, - database_connection: &sqlx::PgPool, -) -> Result<(), String> { - // Retrieve token from Vault - let credentials = credential_reader - .get_credentials(&CredentialKey::RackFirmware { - firmware_id: firmware_id.to_string(), - }) - .await - .map_err(|e| format!("Failed to get token from Vault: {}", e))?; - - let artifactory_token = match credentials { - Some(Credentials::UsernamePassword { password, .. }) => password, - None => "".to_string(), // no credentials for this download - }; - - tracing::info!( - firmware_id = %firmware_id, - "Starting firmware download for {} board SKUs", - parsed_components.board_skus.len() - ); - - // Create firmware cache directory if it doesn't exist - let firmware_cache_dir = PathBuf::from("/forge-boot-artifacts/blobs/internal/fw") - .join("rack_firmware") - .join(firmware_id); - tokio::fs::create_dir_all(&firmware_cache_dir) - .await - .map_err(|e| format!("Failed to create cache directory: {}", e))?; - - // Collect all download tasks - let mut task_set = JoinSet::new(); - let mut total_locations = 0; - - for board_sku in &parsed_components.board_skus { - for firmware_component in &board_sku.firmware_components { - for location in &firmware_component.locations { - total_locations += 1; - - let url = location.location.clone(); - let location_type = location.location_type.clone(); - let component = firmware_component.component.clone(); - let bundle = firmware_component.bundle.clone(); - let token = artifactory_token.clone(); - let dest_dir = firmware_cache_dir.clone(); - - task_set.spawn(async move { - ( - true, - download_single_file( - url, - location_type, - component, - bundle, - token, - dest_dir, - ) - .await, - ) - }); - } - } - } - - for system_image in &parsed_components.switch_system_images { - total_locations += 1; - - let url = system_image.location.clone(); - let location_type = system_image.location_type.clone(); - let component = system_image.component.clone(); - let bundle = Some(system_image.package_name.clone()); - let token = artifactory_token.clone(); - let dest_dir = firmware_cache_dir.clone(); - let required = system_image.required; - - task_set.spawn(async move { - ( - required, - download_single_file(url, location_type, component, bundle, token, dest_dir).await, - ) - }); - } - - tracing::info!( - firmware_id = %firmware_id, - total_locations = total_locations, - "Spawned download tasks for all firmware locations" - ); - - // Wait for all downloads to complete - let mut successful_downloads = 0; - let mut failed_downloads = 0; - let mut failed_required_downloads = 0; - - while let Some(result) = task_set.join_next().await { - match result { - Ok((_, Ok(_))) => successful_downloads += 1, - Ok((required, Err(e))) => { - tracing::warn!(error = %e, "Firmware download failed"); - failed_downloads += 1; - if required { - failed_required_downloads += 1; - } - } - Err(join_error) => { - tracing::error!(error = %join_error, "Download task panicked"); - failed_downloads += 1; - failed_required_downloads += 1; - } - } - } - - tracing::info!( - firmware_id = %firmware_id, - successful = successful_downloads, - failed = failed_downloads, - total = total_locations, - "Firmware download completed" - ); - - // Mark firmware as available if all required downloads succeeded - if failed_required_downloads == 0 { - // Build firmware lookup table - let lookup_table = build_firmware_lookup_table(parsed_components); - let lookup_json = serde_json::to_value(&lookup_table) - .map_err(|e| format!("Failed to serialize lookup table: {}", e))?; - - tracing::info!( - firmware_id = %firmware_id, - device_types = lookup_table.devices.len(), - "Built firmware lookup table" - ); - - let mut txn = database_connection - .begin() - .await - .map_err(|e| format!("Failed to begin transaction: {}", e))?; - - // Update parsed_components with the lookup table - let query = "UPDATE rack_firmware SET parsed_components = $2::jsonb, available = true, updated = NOW() WHERE id = $1"; - sqlx::query(query) - .bind(firmware_id) - .bind(sqlx::types::Json(lookup_json)) - .execute(&mut *txn) - .await - .map_err(|e| format!("Failed to update firmware lookup table: {}", e))?; - - txn.commit() - .await - .map_err(|e| format!("Failed to commit transaction: {}", e))?; - - tracing::info!( - firmware_id = %firmware_id, - "Marked rack firmware as available with lookup table" - ); - } else { - tracing::warn!( - firmware_id = %firmware_id, - failed = failed_downloads, - failed_required = failed_required_downloads, - "Firmware not marked as available due to required download failures" - ); - } - - Ok(()) -} - -/// Known device types based on BoardSKU SKUID patterns -#[derive(Debug, Clone, PartialEq)] -enum DeviceType { - /// GB200 Compute Tray (P4975 Bianca) - needs HMC and BMC firmware - /// Also contains Power Shelf firmware that gets extracted separately - GB200ComputeTray, - /// Juliet Switch (P4978) - needs switch firmware - JulietSwitch, - /// Power Shelf - firmware is included in GB200ComputeTray BoardSKU - PowerShelf, - /// Unknown device type - Unknown, -} - -/// Map BoardSKU SKUID to a known device type -fn get_device_type_from_skuid(sku_id: &str) -> DeviceType { - // GB200 Compute Tray SKUIDs (P4975 Bianca) - const GB200_COMPUTE_TRAY_SKUIDS: &[&str] = &["699-24764-0001-TS3", "699-24764-0001-TS1"]; - - // Juliet Switch SKUIDs (P4978) - const JULIET_SWITCH_SKUIDS: &[&str] = &[ - "920-9K36F-00MV-QS1", - "692-9K36F-00MV-JQS", - "920-9K36F-B4MV-QS1", - "692-9K36F-B4MV-JD0", - "920-9K36F-A5MV-QS1", - "692-9K36F-A5MV-JQS", - "920-9K36N-00MV-QS1", - "692-9K36N-00MV-JQS", - "920-9K36N-09MV-QS1", - "692-9K36N-09MV-JSO", - ]; - - // The sku_id field may contain multiple comma-separated SKUIDs - let skuids: Vec<&str> = sku_id.split(',').map(|s| s.trim()).collect(); - - for skuid in &skuids { - if GB200_COMPUTE_TRAY_SKUIDS.contains(skuid) { - return DeviceType::GB200ComputeTray; - } - if JULIET_SWITCH_SKUIDS.contains(skuid) { - return DeviceType::JulietSwitch; - } - } - - DeviceType::Unknown -} - -/// Get the firmware components to extract for a given device type -/// Returns: Vec of (component_name_to_match, lookup_key, target) -fn get_firmware_components_for_device_type( - device_type: &DeviceType, -) -> Vec<(&'static str, &'static str, &'static str)> { - match device_type { - DeviceType::GB200ComputeTray => vec![ - // (Component name in JSON, Key in lookup table, Redfish target) - ("HMC", "HMC", "/redfish/v1/Chassis/HGX_Chassis_0"), - ("BMC", "BMC", "FW_BMC_0"), - ], - DeviceType::JulietSwitch => vec![ - ("BMC+FPGA+EROT", "BMC", "bmc"), - ("BMC+FPGA+EROT", "FPGA", "fpga"), - ("BMC+FPGA+EROT", "EROT", "erot"), - // CPLD — disabled: RMS does not support CPLD updates yet - // ("CPLD", "CPLD", "cpld"), - ("SBIOS+EROT", "BIOS", "bios"), - ], - DeviceType::PowerShelf => vec![ - // Power Shelf firmware - found in GB200ComputeTray BoardSKU - // TODO: Confirm correct targets for Power Shelf components - ("Power Shelf FW", "PowerShelfFW", "TODO_POWERSHELF_TARGET"), - ], - DeviceType::Unknown => vec![], - } -} - -/// Build a lookup table mapping device types and components to downloaded firmware files -fn build_firmware_lookup_table( - parsed_components: &ParsedFirmwareComponents, -) -> FirmwareLookupTable { - let mut lookup = FirmwareLookupTable { - devices: std::collections::HashMap::new(), - switch_system_images: std::collections::HashMap::new(), - }; - - for board_sku in &parsed_components.board_skus { - // Determine device type from SKUID - let device_type = get_device_type_from_skuid(&board_sku.sku_id); - - if device_type == DeviceType::Unknown { - tracing::debug!( - sku_id = %board_sku.sku_id, - sku_name = %board_sku.name, - "Unknown device type for BoardSKU, skipping" - ); - continue; - } - - // Get the firmware components we need to extract for this device type - let components_to_extract = get_firmware_components_for_device_type(&device_type); - - // For GB200ComputeTray, also extract Power Shelf firmware - let power_shelf_components = if device_type == DeviceType::GB200ComputeTray { - get_firmware_components_for_device_type(&DeviceType::PowerShelf) - } else { - vec![] - }; - - let mut device_components = std::collections::HashMap::new(); - let mut power_shelf_device_components = std::collections::HashMap::new(); - - for firmware_component in &board_sku.firmware_components { - let component_name = &firmware_component.component; - let bundle = firmware_component.bundle.clone().unwrap_or_default(); - - // Get firmware type (Prod/Dev), normalize to lowercase - let fw_type = firmware_component - .component_type - .as_ref() - .map(|t| t.to_lowercase()) - .unwrap_or_else(|| "prod".to_string()); // Default to prod if not specified - - // Check if this component is one we need to extract for the main device type - for (match_name, lookup_key, target) in &components_to_extract { - if component_name == *match_name { - // Find the firmware location and extract filename - for location in &firmware_component.locations { - if location.firmware_type.as_deref() == Some("Firmware") - && let Some(filename) = location.location.split('/').next_back() - { - // Use key format: "HMC_prod" or "HMC_dev" - let typed_key = format!("{}_{}", lookup_key, fw_type); - device_components.insert( - typed_key.clone(), - FirmwareLookupEntry { - filename: filename.to_string(), - target: target.to_string(), - component: component_name.clone(), - bundle: bundle.clone(), - firmware_type: fw_type.clone(), - version: firmware_component.version.clone(), - subcomponents: firmware_component.subcomponents.clone(), - }, - ); - tracing::debug!( - device_type = ?device_type, - component = %component_name, - firmware_type = %fw_type, - filename = %filename, - target = %target, - "Added firmware component to lookup table" - ); - break; // Found the file for this target - } - } - } - } - - // Check if this component is Power Shelf firmware (embedded in GB200ComputeTray) - for (match_name, lookup_key, target) in &power_shelf_components { - if component_name == *match_name { - // Power Shelf FW has subcomponents with firmware locations - // For now, just record that we have Power Shelf firmware - // TODO: Extract individual subcomponent firmware files - let typed_key = format!("{}_{}", lookup_key, fw_type); - power_shelf_device_components.insert( - typed_key, - FirmwareLookupEntry { - filename: "".to_string(), // Subcomponents have individual files - target: target.to_string(), - component: component_name.clone(), - bundle: bundle.clone(), - firmware_type: fw_type.clone(), - version: firmware_component.version.clone(), - subcomponents: firmware_component.subcomponents.clone(), - }, - ); - tracing::debug!( - component = %component_name, - target = %target, - "Added Power Shelf firmware component to lookup table" - ); - break; - } - } - } - - if !device_components.is_empty() { - // Use a consistent device type key for the lookup table - let device_key = match device_type { - DeviceType::GB200ComputeTray => "Compute Node", - DeviceType::JulietSwitch => "Switch Tray", - DeviceType::PowerShelf => "Power Shelf", - DeviceType::Unknown => continue, - }; - lookup - .devices - .insert(device_key.to_string(), device_components); - } - - // Insert Power Shelf components if found - if !power_shelf_device_components.is_empty() { - lookup - .devices - .insert("Power Shelf".to_string(), power_shelf_device_components); - } - } - - for system_image in &parsed_components.switch_system_images { - let typed_key = format!("{}_{}", system_image.component, system_image.firmware_type); - lookup - .switch_system_images - .entry(system_image.device_type.clone()) - .or_default() - .insert( - typed_key, - SwitchSystemImageLookupEntry { - component: system_image.component.clone(), - package_name: system_image.package_name.clone(), - version: system_image.version.clone(), - image_filename: system_image.image_filename.clone(), - location_type: system_image.location_type.clone(), - firmware_type: system_image.firmware_type.clone(), - }, - ); - } - - lookup -} - -/// Download a single firmware file -async fn download_single_file( - url: String, - location_type: String, - component: String, - bundle: Option, - token: String, - dest_dir: PathBuf, -) -> Result<(), String> { - // Extract filename from URL - let filename = url - .split('/') - .next_back() - .ok_or_else(|| format!("Invalid URL: {}", url))?; - - let dest_path = dest_dir.join(filename); - - // Skip if file already exists - if dest_path.exists() { - tracing::debug!( - component = %component, - filename = %filename, - "File already cached, skipping download" - ); - return Ok(()); - } - - tracing::info!( - component = %component, - bundle = ?bundle, - url = %url, - location_type = %location_type, - "Downloading firmware file" - ); - - // Build HTTP client - let client = reqwest::Client::builder() - .danger_accept_invalid_certs(true) - .connect_timeout(std::time::Duration::from_secs(30)) - .timeout(std::time::Duration::from_secs(600)) // 10 minutes for large files - .build() - .map_err(|e| format!("Failed to build HTTP client: {}", e))?; - - // Try downloading without token first - let response = match client.get(&url).send().await { - Ok(resp) if resp.status().is_success() => resp, - Ok(resp) if resp.status() == reqwest::StatusCode::UNAUTHORIZED => { - tracing::debug!( - url = %url, - "Authentication required, retrying with token" - ); - - // Retry with token - client - .get(&url) - .header("X-JFrog-Art-Api", &token) - .send() - .await - .map_err(|e| format!("Failed to download with token: {}", e))? - } - Ok(resp) => { - return Err(format!( - "Download failed with status {}: {}", - resp.status(), - url - )); - } - Err(e) => { - tracing::debug!( - url = %url, - error = %e, - "Download without token failed, retrying with token" - ); - - // Try with token on any error - client - .get(&url) - .header("X-JFrog-Art-Api", &token) - .send() - .await - .map_err(|e| format!("Failed to download with token: {}", e))? - } - }; - - // Check if response is successful - if !response.status().is_success() { - return Err(format!( - "Download failed with status {}: {}", - response.status(), - url - )); - } - - // Download file content - let bytes = response - .bytes() - .await - .map_err(|e| format!("Failed to read response body: {}", e))?; - - // Write to file - tokio::fs::write(&dest_path, bytes) - .await - .map_err(|e| format!("Failed to write file {}: {}", dest_path.display(), e))?; - - tracing::info!( - component = %component, - filename = %filename, - path = %dest_path.display(), - "Successfully downloaded firmware file" - ); - - Ok(()) -} - -/// Apply firmware to all devices in a rack -pub async fn apply( - api: &Api, - request: Request, -) -> Result, Status> { - let req = request.into_inner(); - let rack_id = req - .rack_id - .ok_or_else(|| CarbideError::InvalidArgument("rack_id is required".to_string()))?; - - tracing::info!( - rack_id = %rack_id, - firmware_id = %req.firmware_id, - firmware_type = %req.firmware_type, - "Starting firmware apply operation" - ); - - // Get the RackFirmware configuration from the database - let fw_config = rack_firmware_db::find_by_id(&api.database_connection, &req.firmware_id) - .await - .map_err(|e| CarbideError::Internal { - message: format!("Failed to get firmware configuration: {}", e), - })?; - - if !fw_config.available { - return Err(CarbideError::FailedPrecondition(format!( - "Firmware configuration '{}' is not marked as available", - req.firmware_id - )) - .into()); - } - - let rack = db::rack::find_by( - api.db_reader().as_mut(), - ObjectColumnFilter::One(db::rack::IdColumn, &rack_id), - ) - .await - .map_err(CarbideError::from)? - .pop() - .ok_or_else(|| CarbideError::NotFoundError { - kind: "rack", - id: rack_id.to_string(), - })?; - - // Validate firmware hardware type and firmware_type against rack capabilities. - if let Some(rack_profile_id) = rack.rack_profile_id.as_ref() - && let Some(profile) = api - .runtime_config - .rack_profiles - .get(rack_profile_id.as_str()) - { - // Validate firmware hardware type matches rack's hardware type. - if !fw_config.rack_hardware_type.is_any() - && let Some(rack_hw_type) = &profile.rack_hardware_type - && *rack_hw_type != fw_config.rack_hardware_type - { - return Err(CarbideError::FailedPrecondition(format!( - "Firmware hardware type '{}' does not match rack '{}' hardware type '{}'", - fw_config.rack_hardware_type, rack_id, rack_hw_type - )) - .into()); - } - - // Validate firmware_type matches rack's hardware class. - if let Some(rack_hw_class) = profile.rack_hardware_class { - let expected_fw_type = match rack_hw_class { - model::rack_type::RackHardwareClass::Dev => "dev", - model::rack_type::RackHardwareClass::Prod => "prod", - }; - if req.firmware_type != expected_fw_type { - return Err(CarbideError::FailedPrecondition(format!( - "Firmware type '{}' does not match rack '{}' hardware class '{}'", - req.firmware_type, rack_id, rack_hw_class - )) - .into()); - } - } - } - - let inventory = load_rack_firmware_inventory( - &api.database_connection, - api.credential_manager.as_ref(), - &rack_id, - ) - .await - .map_err(|e| CarbideError::Internal { - message: format!("Failed to load rack firmware inventory: {}", e), - })?; - - if inventory.machines.is_empty() && inventory.switches.is_empty() { - return Err(CarbideError::FailedPrecondition(format!( - "Rack '{}' contains no compute or switch devices", - rack_id - )) - .into()); - } - - tracing::info!( - rack_id = %rack_id, - compute_trays = inventory.machines.len(), - switches = inventory.switches.len(), - "Found supported devices in rack" - ); - - let rms_client = api - .rms_client - .as_ref() - .ok_or_else(|| CarbideError::FailedPrecondition("RMS client not configured".to_string()))?; - let batches = - build_firmware_update_batches(&rack_id, &fw_config, &req.firmware_type, &inventory, &[]) - .map_err(|e| CarbideError::Internal { - message: format!("Failed to build firmware update requests: {}", e), - })?; - - if batches.is_empty() { - return Err(CarbideError::FailedPrecondition(format!( - "Rack '{}' contains no supported compute or switch devices", - rack_id - )) - .into()); - } - - let submissions = submit_firmware_update_batches(rms_client.as_ref(), batches).await; - let mut device_results = Vec::with_capacity(submissions.len()); - let mut successful_updates = 0; - let mut failed_updates = 0; - - for submission in submissions { - match submission.response { - Ok(response) => { - let batch_response = response.response.as_ref(); - let success = batch_response - .map(|batch_response| { - batch_response.status - == librms::protos::rack_manager::ReturnCode::Success as i32 - }) - .unwrap_or(false); - let total_nodes = batch_response - .map(|batch_response| batch_response.total_nodes) - .unwrap_or(response.node_jobs.len() as i32); - let message = batch_response - .map(|batch_response| batch_response.message.as_str()) - .unwrap_or_default(); - let job_id = batch_response - .map(|batch_response| batch_response.job_id.clone()) - .unwrap_or_default(); - - if success { - successful_updates += 1; - } else { - failed_updates += 1; - } - - let node_jobs: Vec = response - .node_jobs - .iter() - .map(|job| NodeJobInfo { - node_id: job.node_id.clone(), - job_id: job.job_id.clone(), - }) - .collect(); - - device_results.push(DeviceUpdateResult { - device_id: rack_id.to_string(), - device_type: submission.display_name.to_string(), - success, - message: format!( - "Async firmware update initiated for {} nodes: {}", - total_nodes, message - ), - job_id, - node_jobs, - }); - } - Err(error) => { - failed_updates += 1; - device_results.push(DeviceUpdateResult { - device_id: rack_id.to_string(), - device_type: submission.display_name.to_string(), - success: false, - message: format!("RMS API Error: {}", error), - job_id: String::new(), - node_jobs: vec![], - }); - } - } - } - - tracing::info!( - rack_id = %rack_id, - firmware_id = %req.firmware_id, - successful = successful_updates, - failed = failed_updates, - total = device_results.len(), - "Firmware apply operation completed" - ); - - // Record apply event in history - let rack_id_str = rack_id.to_string(); - let mut conn = api - .database_connection - .acquire() - .await - .map_err(|e| CarbideError::from(DatabaseError::new("acquire for apply history", e)))?; - db::rack_firmware::record_apply_history( - &mut conn, - &req.firmware_id, - &rack_id_str, - &req.firmware_type, - fw_config.rack_hardware_type, - ) - .await - .map_err(CarbideError::from)?; - - Ok(Response::new(RackFirmwareApplyResponse { - total_updates: device_results.len() as i32, - successful_updates, - failed_updates, - device_results, - })) -} - -/// Get the status of an async firmware update job by proxying to RMS GetFirmwareJobStatus -pub async fn get_job_status( - api: &Api, - request: Request, -) -> Result, Status> { - let req = request.into_inner(); - - if req.job_id.is_empty() { - return Err(CarbideError::InvalidArgument("job_id is required".to_string()).into()); - } - - let rms_client = api - .rms_client - .as_ref() - .ok_or_else(|| CarbideError::FailedPrecondition("RMS client not configured".to_string()))?; - - let rms_request = librms::protos::rack_manager::GetFirmwareJobStatusRequest { - metadata: None, - job_id: req.job_id.clone(), - }; - - let rms_response = rms_client - .get_firmware_job_status(rms_request) - .await - .map_err(|e| CarbideError::Internal { - message: format!("RMS API error: {}", e), - })?; - - // Map FirmwareJobState enum to human-readable string - let state = match rms_response.job_state { - 0 => "QUEUED", - 1 => "RUNNING", - 2 => "COMPLETED", - 3 => "FAILED", - _ => "UNKNOWN", - }; - - Ok(Response::new(RackFirmwareJobStatusResponse { - job_id: rms_response.job_id, - state: state.to_string(), - state_description: rms_response.state_description, - rack_id: rms_response.rack_id, - node_id: rms_response.node_id, - error_message: rms_response.error_message, - result_json: rms_response.result_json, - })) -} - -/// Get the history of rack firmware apply operations -pub async fn get_history( - api: &Api, - request: Request, -) -> Result, Status> { - let req = request.into_inner(); - - let firmware_id_filter = if req.firmware_id.is_empty() { - None - } else { - Some(req.firmware_id.as_str()) - }; - - let mut conn = api - .database_connection - .acquire() - .await - .map_err(|e| CarbideError::from(DatabaseError::new("acquire for history", e)))?; - - let records = - db::rack_firmware::list_apply_history(&mut conn, firmware_id_filter, &req.rack_ids) - .await - .map_err(CarbideError::from)?; - - // Group results by rack_id - let mut histories: std::collections::HashMap> = std::collections::HashMap::new(); - for record in records { - let rack_id = record.rack_id.clone(); - histories.entry(rack_id).or_default().push(record.into()); - } - - let histories = histories - .into_iter() - .map(|(rack_id, records)| (rack_id, RackFirmwareHistoryRecords { records })) - .collect(); - - Ok(Response::new(RackFirmwareHistoryResponse { histories })) -} - -/// Set a rack firmware configuration as the default for its rack hardware type. -pub async fn set_default( - api: &Api, - request: Request, -) -> Result, Status> { - let req = request.into_inner(); - - if req.firmware_id.is_empty() { - return Err(CarbideError::InvalidArgument("firmware_id is required".to_string()).into()); - } - - let mut txn = api - .database_connection - .begin() - .await - .map_err(|e| CarbideError::from(DatabaseError::new("begin set_default", e)))?; - - rack_firmware_db::set_default(&mut txn, &req.firmware_id).await?; - - txn.commit() - .await - .map_err(|e| CarbideError::from(DatabaseError::new("commit set_default", e)))?; - - Ok(Response::new(())) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn build_firmware_lookup_table_includes_switch_system_images() { - let parsed = ParsedFirmwareComponents { - board_skus: Vec::new(), - switch_system_images: vec![SwitchSystemImageArtifact { - device_type: "Switch Tray".to_string(), - component: "NVOS".to_string(), - version: "25.02.2553".to_string(), - firmware_type: "prod".to_string(), - package_name: "GB200NVL72_NVOS".to_string(), - location: "https://example.invalid/nvos-amd64-25.02.2553.bin".to_string(), - location_type: "HTTPS".to_string(), - required: true, - image_filename: "nvos-amd64-25.02.2553.bin".to_string(), - }], - }; - - let lookup = build_firmware_lookup_table(&parsed); - let entry = lookup - .switch_system_images - .get("Switch Tray") - .and_then(|images| images.get("NVOS_prod")) - .expect("expected NVOS_prod lookup entry"); - - assert_eq!(entry.component, "NVOS"); - assert_eq!(entry.package_name, "GB200NVL72_NVOS"); - assert_eq!(entry.version, "25.02.2553"); - assert_eq!(entry.image_filename, "nvos-amd64-25.02.2553.bin"); - assert_eq!(entry.location_type, "HTTPS"); - assert_eq!(entry.firmware_type, "prod"); - } -} diff --git a/crates/api/src/handlers/switch_artifacts.rs b/crates/api/src/handlers/switch_artifacts.rs deleted file mode 100644 index 1966761f7b..0000000000 --- a/crates/api/src/handlers/switch_artifacts.rs +++ /dev/null @@ -1,518 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -use std::collections::HashSet; - -use serde::{Deserialize, Serialize}; -use serde_json::Value; - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub(crate) struct SwitchSystemImageArtifact { - pub(crate) device_type: String, - pub(crate) component: String, - pub(crate) version: String, - pub(crate) firmware_type: String, - pub(crate) package_name: String, - pub(crate) location: String, - pub(crate) location_type: String, - pub(crate) required: bool, - pub(crate) image_filename: String, -} - -#[derive(Debug, Clone, Deserialize, Default)] -struct RawSwitchArtifactsConfig { - #[serde(rename = "SwitchSystemImages", default)] - switch_system_images: Vec, - #[serde(rename = "BoardSKUs", default)] - board_skus: Vec, -} - -#[derive(Debug, Clone, Deserialize)] -struct RawSwitchSystemImageArtifact { - #[serde(rename = "DeviceType")] - device_type: String, - #[serde(rename = "Component")] - component: String, - #[serde(rename = "Version")] - version: String, - #[serde(rename = "Type")] - firmware_type: String, - #[serde(rename = "PackageName")] - package_name: String, - #[serde(rename = "Location")] - location: String, - #[serde(rename = "LocationType")] - location_type: String, - #[serde(rename = "Required", default = "default_true")] - required: bool, -} - -#[derive(Debug, Clone, Deserialize, Default)] -struct RawBoardSku { - #[serde(rename = "Name", default)] - name: String, - #[serde(rename = "Type", default)] - sku_type: String, - #[serde(rename = "Components", default)] - components: RawBoardSkuComponents, -} - -#[derive(Debug, Clone, Deserialize, Default)] -struct RawBoardSkuComponents { - #[serde(rename = "Software", default)] - software: Vec, -} - -#[derive(Debug, Clone, Deserialize, Default)] -struct RawSoftwareComponent { - #[serde(rename = "Component", default)] - component: String, - #[serde(rename = "Version", default)] - version: String, - #[serde(rename = "Type", default)] - firmware_type: String, - #[serde(rename = "Locations", default)] - locations: Vec, -} - -#[derive(Debug, Clone, Deserialize, Default)] -struct RawSoftwareLocation { - #[serde(rename = "Name", default)] - name: String, - #[serde(rename = "Location", default)] - location: String, - #[serde(rename = "LocationType", default)] - location_type: String, - #[serde(rename = "PackageName", default)] - package_name: String, - #[serde(rename = "Required", default = "default_true")] - required: bool, -} - -pub(crate) fn collect_switch_system_images( - config: &Value, -) -> Result, String> { - let raw: RawSwitchArtifactsConfig = serde_json::from_value(config.clone()) - .map_err(format_switch_artifacts_deserialize_error)?; - - let mut parsed = Vec::new(); - let mut seen_keys = HashSet::new(); - - for (idx, entry) in raw.switch_system_images.iter().enumerate() { - let device_type = entry.device_type.trim(); - if device_type != "Switch Tray" { - return Err(format!( - "SwitchSystemImages[{idx}].DeviceType must be 'Switch Tray'" - )); - } - - let component = entry.component.trim(); - if component != "NVOS" { - return Err(format!( - "SwitchSystemImages[{idx}].Component must be 'NVOS'" - )); - } - - let version = entry.version.trim(); - if version.is_empty() { - return Err(format!("SwitchSystemImages[{idx}].Version is required")); - } - - let firmware_type = entry.firmware_type.trim(); - if firmware_type.is_empty() { - return Err(format!("SwitchSystemImages[{idx}].Type is required")); - } - let firmware_type = firmware_type.to_lowercase(); - - let package_name = entry.package_name.trim(); - if package_name.is_empty() { - return Err(format!("SwitchSystemImages[{idx}].PackageName is required")); - } - - let location = entry.location.trim(); - if location.is_empty() { - return Err(format!("SwitchSystemImages[{idx}].Location is required")); - } - - let location_type = entry.location_type.trim(); - if location_type.is_empty() { - return Err(format!( - "SwitchSystemImages[{idx}].LocationType is required" - )); - } - - let key = ( - device_type.to_string(), - component.to_string(), - firmware_type.clone(), - ); - if seen_keys.insert(key) { - parsed.push(SwitchSystemImageArtifact { - device_type: device_type.to_string(), - component: component.to_string(), - version: version.to_string(), - firmware_type, - package_name: package_name.to_string(), - location: location.to_string(), - location_type: location_type.to_string(), - required: entry.required, - image_filename: filename_from_location(location)?, - }); - } - } - - for (board_idx, board_sku) in raw.board_skus.iter().enumerate() { - if board_sku.sku_type.trim() != "Switch Tray" { - continue; - } - - let board_context = if board_sku.name.trim().is_empty() { - format!("BoardSKUs[{board_idx}]") - } else { - format!("BoardSKUs[{board_idx}] ({})", board_sku.name.trim()) - }; - - let board_nvos_package_name = board_sku - .components - .software - .iter() - .filter(|software| software.component.trim() == "NVOS") - .flat_map(|software| software.locations.iter()) - .map(|location| location.package_name.trim()) - .find(|package_name| !package_name.is_empty()); - - for (software_idx, software) in board_sku.components.software.iter().enumerate() { - if software.component.trim() != "NVOS" { - continue; - } - - let context = format!("{board_context}.Components.Software[{software_idx}]"); - let version = software.version.trim(); - if version.is_empty() { - return Err(format!("{context}.Version is required")); - } - - let firmware_type = software.firmware_type.trim(); - if firmware_type.is_empty() { - return Err(format!("{context}.Type is required")); - } - let firmware_type = firmware_type.to_lowercase(); - - let software_package_name = software - .locations - .iter() - .map(|location| location.package_name.trim()) - .find(|package_name| !package_name.is_empty()); - - let mut candidates = Vec::new(); - for (location_idx, location) in software.locations.iter().enumerate() { - let location_value = location.location.trim(); - if location_value.is_empty() { - continue; - } - - let image_filename = filename_from_location(location_value)?; - if !image_filename.to_ascii_lowercase().ends_with(".bin") { - continue; - } - - let name_upper = location.name.to_ascii_uppercase(); - let filename_upper = image_filename.to_ascii_uppercase(); - let location_upper = location_value.to_ascii_uppercase(); - let mut score = 0; - if name_upper.contains("NVOS") || filename_upper.contains("NVOS") { - score += 2; - } - if name_upper.contains("AMD64") - || filename_upper.contains("AMD64") - || location_upper.contains("/AMD64/") - { - score += 1; - } - - candidates.push((location_idx, score, image_filename)); - } - - if candidates.is_empty() { - return Err(format!( - "{context}.Locations must include an NVOS .bin image location" - )); - } - - let best_score = candidates - .iter() - .map(|(_, score, _)| *score) - .max() - .expect("candidates is not empty"); - let best_candidates: Vec<_> = candidates - .iter() - .filter(|(_, score, _)| *score == best_score) - .collect(); - if best_candidates.len() != 1 { - return Err(format!( - "{context}.Locations contains multiple NVOS .bin image candidates" - )); - } - - let (location_idx, _, image_filename) = best_candidates[0]; - let selected_location = &software.locations[*location_idx]; - let location = selected_location.location.trim(); - let location_type = selected_location.location_type.trim(); - if location_type.is_empty() { - return Err(format!( - "{context}.Locations[{location_idx}].LocationType is required" - )); - } - - let selected_package_name = selected_location.package_name.trim(); - let package_name = if selected_package_name.is_empty() { - software_package_name.or(board_nvos_package_name) - } else { - Some(selected_package_name) - } - .ok_or_else(|| { - format!("{context}.Locations[{location_idx}].PackageName is required") - })?; - - let key = ( - "Switch Tray".to_string(), - "NVOS".to_string(), - firmware_type.clone(), - ); - if seen_keys.insert(key) { - parsed.push(SwitchSystemImageArtifact { - device_type: "Switch Tray".to_string(), - component: "NVOS".to_string(), - version: version.to_string(), - firmware_type, - package_name: package_name.to_string(), - location: location.to_string(), - location_type: location_type.to_string(), - required: selected_location.required, - image_filename: image_filename.to_string(), - }); - } - } - } - - Ok(parsed) -} - -fn filename_from_location(location: &str) -> Result { - location - .split('/') - .next_back() - .filter(|name| !name.is_empty()) - .map(str::to_string) - .ok_or_else(|| format!("Location must end with a filename: {location}")) -} - -fn default_true() -> bool { - true -} - -fn format_switch_artifacts_deserialize_error(error: serde_json::Error) -> String { - let message = error.to_string(); - - if let Some(field) = message - .strip_prefix("missing field `") - .and_then(|s| s.strip_suffix('`')) - { - return format!("Invalid SwitchSystemImages: {field} is required"); - } - - format!("Invalid switch artifacts: {message}") -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn derives_nvos_from_board_sku_software() { - let config = serde_json::json!({ - "BoardSKUs": [ - { - "Name": "P4978-Juliet_Switch", - "Type": "Switch Tray", - "Components": { - "Software": [ - { - "Component": "NVOS", - "Version": "25.02.2553", - "Type": "Prod", - "Locations": [ - { - "Name": "NVOS_Prod_AMD64", - "Location": "https://example.invalid/release/25.02.2553/amd64/prod/nvos-amd64-25.02.2553.bin", - "LocationType": "HTTPS", - "PackageName": "GB300_NVOS", - "Required": true - }, - { - "Name": "NVOS_OpenAPI_Spec", - "Location": "https://example.invalid/release/25.02.2553/openapi.json", - "LocationType": "HTTPS", - "PackageName": "GB300_NVOS", - "Required": true - } - ] - } - ] - } - } - ] - }); - - let artifacts = collect_switch_system_images(&config).unwrap(); - - assert_eq!(artifacts.len(), 1); - assert_eq!(artifacts[0].device_type, "Switch Tray"); - assert_eq!(artifacts[0].component, "NVOS"); - assert_eq!(artifacts[0].version, "25.02.2553"); - assert_eq!(artifacts[0].firmware_type, "prod"); - assert_eq!(artifacts[0].package_name, "GB300_NVOS"); - assert_eq!(artifacts[0].image_filename, "nvos-amd64-25.02.2553.bin"); - } - - #[test] - fn uses_board_sku_nvos_package_name_fallback() { - let config = serde_json::json!({ - "BoardSKUs": [ - { - "Name": "P4978-Juliet_Switch", - "Type": "Switch Tray", - "Components": { - "Software": [ - { - "Component": "NVOS", - "Version": "25.02.2553", - "Type": "Prod", - "Locations": [ - { - "Name": "NVOS_Prod_AMD64", - "Location": "https://example.invalid/prod/nvos-amd64-25.02.2553.bin", - "LocationType": "HTTPS", - "PackageName": "GB300_NVOS" - } - ] - }, - { - "Component": "NVOS", - "Version": "25.02.2553", - "Type": "Dev", - "Locations": [ - { - "Name": "NVOS_Dev_AMD64", - "Location": "https://example.invalid/dev/nvos-amd64-25.02.2553.bin", - "LocationType": "HTTPS", - "PackageName": "" - } - ] - } - ] - } - } - ] - }); - - let artifacts = collect_switch_system_images(&config).unwrap(); - - let dev = artifacts - .iter() - .find(|artifact| artifact.firmware_type == "dev") - .expect("expected dev NVOS artifact"); - assert_eq!(dev.package_name, "GB300_NVOS"); - } - - #[test] - fn explicit_switch_system_image_wins_over_derived() { - let config = serde_json::json!({ - "SwitchSystemImages": [ - { - "DeviceType": "Switch Tray", - "Component": "NVOS", - "Version": "explicit", - "Type": "Prod", - "PackageName": "EXPLICIT_NVOS", - "Location": "https://example.invalid/explicit.bin", - "LocationType": "HTTPS" - } - ], - "BoardSKUs": [ - { - "Type": "Switch Tray", - "Components": { - "Software": [ - { - "Component": "NVOS", - "Version": "derived", - "Type": "Prod", - "Locations": [ - { - "Name": "NVOS_Prod_AMD64", - "Location": "https://example.invalid/derived.bin", - "LocationType": "HTTPS", - "PackageName": "DERIVED_NVOS" - } - ] - } - ] - } - } - ] - }); - - let artifacts = collect_switch_system_images(&config).unwrap(); - - assert_eq!(artifacts.len(), 1); - assert_eq!(artifacts[0].version, "explicit"); - assert_eq!(artifacts[0].package_name, "EXPLICIT_NVOS"); - } - - #[test] - fn rejects_nvos_software_without_bin_location() { - let config = serde_json::json!({ - "BoardSKUs": [ - { - "Type": "Switch Tray", - "Components": { - "Software": [ - { - "Component": "NVOS", - "Version": "25.02.2553", - "Type": "Prod", - "Locations": [ - { - "Name": "NVOS_OpenAPI_Spec", - "Location": "https://example.invalid/openapi.json", - "LocationType": "HTTPS", - "PackageName": "GB300_NVOS" - } - ] - } - ] - } - } - ] - }); - - let error = collect_switch_system_images(&config).unwrap_err(); - - assert!(error.contains("must include an NVOS .bin image location")); - } -} diff --git a/crates/api/src/rack/firmware_object.rs b/crates/api/src/rack/firmware_object.rs new file mode 100644 index 0000000000..25ef4eb153 --- /dev/null +++ b/crates/api/src/rack/firmware_object.rs @@ -0,0 +1,75 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +use model::rack_type::{RackHardwareType, RackProfile}; + +pub const ANY_RACK_HARDWARE_TYPE: &str = "any"; + +pub fn hardware_type_wire_value(value: Option<&RackHardwareType>) -> String { + value.map(|value| value.0.clone()).unwrap_or_default() +} + +pub fn profile_hardware_type_wire_value(profile: &RackProfile) -> String { + hardware_type_wire_value(profile.rack_hardware_type.as_ref()) +} + +pub fn hardware_type_matches_filter( + firmware_hardware_type: &str, + rack_hardware_type: Option<&RackHardwareType>, +) -> bool { + let Some(rack_hardware_type) = rack_hardware_type else { + return true; + }; + + firmware_hardware_type + .trim() + .eq_ignore_ascii_case(ANY_RACK_HARDWARE_TYPE) + || firmware_hardware_type + .trim() + .eq_ignore_ascii_case(rack_hardware_type.0.trim()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn hardware_type_matches_any_case_and_whitespace() { + assert!(hardware_type_matches_filter( + "any", + Some(&RackHardwareType::from("gb200")) + )); + assert!(hardware_type_matches_filter( + " ANY ", + Some(&RackHardwareType::from("gb200")) + )); + assert!(hardware_type_matches_filter( + "GB200", + Some(&RackHardwareType::from("gb200")) + )); + assert!(!hardware_type_matches_filter( + "gb300", + Some(&RackHardwareType::from("gb200")) + )); + } + + #[test] + fn missing_rack_hardware_type_skips_filter() { + assert!(hardware_type_matches_filter("gb300", None)); + assert_eq!(hardware_type_wire_value(None), ""); + } +} diff --git a/crates/api/src/rack/firmware_update.rs b/crates/api/src/rack/firmware_update.rs index 34dbc57dbf..69f57258c8 100644 --- a/crates/api/src/rack/firmware_update.rs +++ b/crates/api/src/rack/firmware_update.rs @@ -15,9 +15,6 @@ * limitations under the License. */ -use std::collections::HashMap; - -use carbide_uuid::machine::MachineId; use carbide_uuid::rack::RackId; use carbide_uuid::switch::SwitchId; use db::{machine as db_machine, machine_topology as db_machine_topology, switch as db_switch}; @@ -25,21 +22,17 @@ use eyre::{Result, eyre}; use forge_secrets::credentials::{ BmcCredentialType, CredentialKey, CredentialManager, Credentials, }; -use librms::RmsApi; use librms::protos::rack_manager as rms; use model::machine::machine_search_config::MachineSearchConfig; use model::rack::FirmwareUpgradeDeviceInfo; -use model::rack_firmware::RackFirmware; use model::rack_type::{RackHardwareClass, RackProfile}; -use serde::{Deserialize, Serialize}; -use serde_json::Value; use sqlx::PgPool; #[derive(Debug, Clone)] pub struct RackFirmwareInventory { - pub machine_ids: Vec, - pub switch_ids: Vec, + pub machine_ids: Vec, pub machines: Vec, + pub switch_ids: Vec, pub switches: Vec, } @@ -49,35 +42,6 @@ pub struct RackSwitchFirmwareInventory { pub switches: Vec, } -#[derive(Debug, Clone)] -pub struct FirmwareUpdateBatchRequest { - pub display_name: &'static str, - pub devices: Vec, - pub request: rms::UpdateFirmwareByDeviceListRequest, -} - -#[derive(Debug, Clone)] -pub struct SubmittedFirmwareBatch { - pub display_name: &'static str, - pub devices: Vec, - pub response: Result, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -struct FirmwareLookupTable { - devices: HashMap>, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -struct FirmwareLookupEntry { - filename: String, - target: String, - component: String, - bundle: String, - firmware_type: String, - version: Option, -} - pub fn firmware_type_for_profile(profile: &RackProfile) -> &'static str { match profile.rack_hardware_class { Some(RackHardwareClass::Dev) => "dev", @@ -146,8 +110,8 @@ pub async fn load_rack_firmware_inventory( Ok(RackFirmwareInventory { machine_ids, - switch_ids, machines, + switch_ids, switches, }) } @@ -199,98 +163,6 @@ pub async fn load_rack_switch_firmware_inventory( }) } -pub fn build_firmware_update_batches( - rack_id: &RackId, - firmware: &RackFirmware, - firmware_type: &str, - inventory: &RackFirmwareInventory, - components: &[String], -) -> Result> { - let parsed_components = firmware - .parsed_components - .as_ref() - .map(|parsed| parsed.0.clone()) - .ok_or_else(|| eyre!("firmware '{}' has no parsed components", firmware.id))?; - - let batch_definitions = [ - ( - "Compute Node", - rms::NodeType::Compute, - "Compute Node", - &inventory.machines, - true, - ), - ( - "Switch Tray", - rms::NodeType::Switch, - "Switch", - &inventory.switches, - false, - ), - ]; - - let mut batches = Vec::new(); - for (lookup_key, node_type, display_name, devices, activate) in batch_definitions { - if devices.is_empty() { - continue; - } - - let firmware_targets = build_firmware_targets( - &parsed_components, - lookup_key, - firmware_type, - &firmware.id, - components, - )?; - let node_infos = devices - .iter() - .map(|device| build_new_node_info(rack_id, device, node_type)) - .collect(); - let mut target_map = HashMap::new(); - target_map.insert( - node_type as i32, - rms::FirmwareTargetList { - targets: firmware_targets, - }, - ); - - batches.push(FirmwareUpdateBatchRequest { - display_name, - devices: devices.clone(), - request: rms::UpdateFirmwareByDeviceListRequest { - nodes: Some(rms::NodeSet { - devices: node_infos, - }), - firmware_targets: target_map, - activate, - force_update: false, - ..Default::default() - }, - }); - } - - Ok(batches) -} - -pub async fn submit_firmware_update_batches( - rms_client: &dyn RmsApi, - batches: Vec, -) -> Vec { - let mut submissions = Vec::with_capacity(batches.len()); - for batch in batches { - let response = rms_client - .update_firmware_by_device_list(batch.request) - .await - .map_err(|err| err.to_string()); - submissions.push(SubmittedFirmwareBatch { - display_name: batch.display_name, - devices: batch.devices, - response, - }); - } - submissions -} - async fn fetch_bmc_credentials( credential_manager: &dyn CredentialManager, bmc_mac: mac_address::MacAddress, @@ -334,47 +206,6 @@ async fn fetch_nvos_credentials( } } -fn build_firmware_targets( - parsed_components: &Value, - lookup_key: &str, - firmware_type: &str, - firmware_id: &str, - components: &[String], -) -> Result> { - let mut firmware_components = find_firmware_components_for_device( - parsed_components, - lookup_key, - firmware_type, - components, - )?; - let flash_order = get_firmware_flash_order(lookup_key); - firmware_components.sort_by_key(|(_, _, target)| { - flash_order - .iter() - .position(|candidate| candidate == &target.as_str()) - .unwrap_or(usize::MAX) - }); - - if firmware_components.is_empty() { - return Err(eyre!( - "no matching firmware found in config for {} ({})", - lookup_key, - firmware_type - )); - } - - Ok(firmware_components - .into_iter() - .map(|(_, filename, target)| rms::FirmwareTarget { - target, - filename: format!( - "/forge-boot-artifacts/blobs/internal/fw/rack_firmware/{}/{}", - firmware_id, filename - ), - }) - .collect()) -} - pub(crate) fn build_new_node_info( rack_id: &RackId, device: &FirmwareUpgradeDeviceInfo, @@ -438,50 +269,3 @@ fn user_pass_credentials(username: &str, password: &str) -> Option &'static [&'static str] { - match device_type_key { - "Switch Tray" => &["bmc", "fpga", "erot", "bios"], - "Compute Node" => &["/redfish/v1/Chassis/HGX_Chassis_0", "FW_BMC_0"], - _ => &[], - } -} - -fn find_firmware_components_for_device( - parsed_components: &Value, - hardware_type: &str, - firmware_type: &str, - components: &[String], -) -> Result> { - let lookup_table: FirmwareLookupTable = serde_json::from_value(parsed_components.clone()) - .map_err(|err| { - eyre!( - "failed to parse firmware lookup table for '{}': {}", - hardware_type, - err - ) - })?; - - let wanted_type = firmware_type.to_lowercase(); - let wanted_components: Vec = components.iter().map(|c| c.to_lowercase()).collect(); - let mut results = Vec::new(); - if let Some(device_components) = lookup_table.devices.get(hardware_type) { - for (component_key, entry) in device_components { - if entry.firmware_type.to_lowercase() != wanted_type { - continue; - } - if !wanted_components.is_empty() - && !wanted_components.contains(&entry.component.to_lowercase()) - { - continue; - } - results.push(( - component_key.clone(), - entry.filename.clone(), - entry.target.clone(), - )); - } - } - - Ok(results) -} diff --git a/crates/api/src/rack/mod.rs b/crates/api/src/rack/mod.rs index 558a6a278a..fa0ef30f3b 100644 --- a/crates/api/src/rack/mod.rs +++ b/crates/api/src/rack/mod.rs @@ -20,6 +20,7 @@ use librms::RackManagerError; use crate::state_controller::state_handler::{ExternalServiceError, StateHandlerError}; pub mod bms_client; +pub mod firmware_object; pub mod firmware_update; pub mod rms_client; diff --git a/crates/api/src/rack/rms_client.rs b/crates/api/src/rack/rms_client.rs index dbc82c81d7..9748f98268 100644 --- a/crates/api/src/rack/rms_client.rs +++ b/crates/api/src/rack/rms_client.rs @@ -19,10 +19,10 @@ use librms::protos::rack_manager as rms; #[async_trait::async_trait] pub trait SwitchSystemImageRmsClient: Send + Sync { - async fn update_switch_system_image( + async fn apply_switch_system_image( &self, - cmd: rms::UpdateSwitchSystemImageRequest, - ) -> Result; + cmd: rms::ApplySwitchSystemImageRequest, + ) -> Result; async fn get_switch_system_image_job_status( &self, @@ -32,11 +32,11 @@ pub trait SwitchSystemImageRmsClient: Send + Sync { #[async_trait::async_trait] impl SwitchSystemImageRmsClient for librms::RackManagerApi { - async fn update_switch_system_image( + async fn apply_switch_system_image( &self, - cmd: rms::UpdateSwitchSystemImageRequest, - ) -> Result { - self.client.update_switch_system_image(cmd).await + cmd: rms::ApplySwitchSystemImageRequest, + ) -> Result { + self.client.apply_switch_system_image(cmd).await } async fn get_switch_system_image_job_status( @@ -63,14 +63,20 @@ pub mod test_support { fail_add_node: Arc, fail_inventory_get: Arc, registered_nodes: Arc>>, + firmware_objects: Arc>>, submitted_firmware_requests: Arc>>, queued_firmware_responses: Arc>>, + submitted_firmware_object_apply_requests: Arc>>, + queued_firmware_object_apply_responses: + Arc>>, + submitted_firmware_object_from_json_apply_requests: + Arc>>, firmware_job_statuses: Arc>>, firmware_job_errors: Arc>>, - submitted_switch_system_image_requests: - Arc>>, - queued_switch_system_image_responses: - Arc>>, + submitted_apply_switch_system_image_requests: + Arc>>, + queued_apply_switch_system_image_responses: + Arc>>, switch_system_image_job_statuses: Arc>>, switch_system_image_job_errors: Arc>>, @@ -86,12 +92,18 @@ pub mod test_support { fail_add_node: Arc::new(AtomicBool::new(false)), fail_inventory_get: Arc::new(AtomicBool::new(false)), registered_nodes: Arc::new(Mutex::new(Vec::new())), + firmware_objects: Arc::new(Mutex::new(HashMap::new())), submitted_firmware_requests: Arc::new(Mutex::new(Vec::new())), queued_firmware_responses: Arc::new(Mutex::new(VecDeque::new())), + submitted_firmware_object_apply_requests: Arc::new(Mutex::new(Vec::new())), + queued_firmware_object_apply_responses: Arc::new(Mutex::new(VecDeque::new())), + submitted_firmware_object_from_json_apply_requests: Arc::new( + Mutex::new(Vec::new()), + ), firmware_job_statuses: Arc::new(Mutex::new(HashMap::new())), firmware_job_errors: Arc::new(Mutex::new(HashMap::new())), - submitted_switch_system_image_requests: Arc::new(Mutex::new(Vec::new())), - queued_switch_system_image_responses: Arc::new(Mutex::new(VecDeque::new())), + submitted_apply_switch_system_image_requests: Arc::new(Mutex::new(Vec::new())), + queued_apply_switch_system_image_responses: Arc::new(Mutex::new(VecDeque::new())), switch_system_image_job_statuses: Arc::new(Mutex::new(HashMap::new())), switch_system_image_job_errors: Arc::new(Mutex::new(HashMap::new())), submitted_set_power_state_by_device_list_requests: Arc::new(Mutex::new(Vec::new())), @@ -119,15 +131,25 @@ pub mod test_support { fail_add_node: self.fail_add_node.clone(), fail_inventory_get: self.fail_inventory_get.clone(), registered_nodes: self.registered_nodes.clone(), + firmware_objects: self.firmware_objects.clone(), submitted_firmware_requests: self.submitted_firmware_requests.clone(), queued_firmware_responses: self.queued_firmware_responses.clone(), + submitted_firmware_object_apply_requests: self + .submitted_firmware_object_apply_requests + .clone(), + queued_firmware_object_apply_responses: self + .queued_firmware_object_apply_responses + .clone(), + submitted_firmware_object_from_json_apply_requests: self + .submitted_firmware_object_from_json_apply_requests + .clone(), firmware_job_statuses: self.firmware_job_statuses.clone(), firmware_job_errors: self.firmware_job_errors.clone(), - submitted_switch_system_image_requests: self - .submitted_switch_system_image_requests + submitted_apply_switch_system_image_requests: self + .submitted_apply_switch_system_image_requests .clone(), - queued_switch_system_image_responses: self - .queued_switch_system_image_responses + queued_apply_switch_system_image_responses: self + .queued_apply_switch_system_image_responses .clone(), switch_system_image_job_statuses: self.switch_system_image_job_statuses.clone(), switch_system_image_job_errors: self.switch_system_image_job_errors.clone(), @@ -188,16 +210,41 @@ pub mod test_support { self.submitted_firmware_requests.lock().await.clone() } - pub async fn queue_update_switch_system_image_response( + pub async fn queue_apply_firmware_object_response( &self, - response: rms::UpdateSwitchSystemImageResponse, + response: rms::ApplyFirmwareObjectResponse, ) { - self.queued_switch_system_image_responses + self.queued_firmware_object_apply_responses .lock() .await .push_back(response); } + pub async fn insert_firmware_object(&self, object: rms::FirmwareObject) { + self.firmware_objects + .lock() + .await + .insert(object.id.clone(), object); + } + + pub async fn submitted_apply_firmware_object_requests( + &self, + ) -> Vec { + self.submitted_firmware_object_apply_requests + .lock() + .await + .clone() + } + + pub async fn submitted_apply_firmware_object_from_json_requests( + &self, + ) -> Vec { + self.submitted_firmware_object_from_json_apply_requests + .lock() + .await + .clone() + } + pub async fn set_switch_system_image_job_status( &self, response: rms::GetSwitchSystemImageJobStatusResponse, @@ -219,10 +266,20 @@ pub mod test_support { .insert(job_id.into(), message.into()); } - pub async fn submitted_switch_system_image_requests( + pub async fn queue_apply_switch_system_image_response( &self, - ) -> Vec { - self.submitted_switch_system_image_requests + response: rms::ApplySwitchSystemImageResponse, + ) { + self.queued_apply_switch_system_image_responses + .lock() + .await + .push_back(response); + } + + pub async fn submitted_apply_switch_system_image_requests( + &self, + ) -> Vec { + self.submitted_apply_switch_system_image_requests .lock() .await .clone() @@ -259,14 +316,20 @@ pub mod test_support { fail_add_node: Arc, fail_inventory_get: Arc, registered_nodes: Arc>>, + firmware_objects: Arc>>, submitted_firmware_requests: Arc>>, queued_firmware_responses: Arc>>, + submitted_firmware_object_apply_requests: Arc>>, + queued_firmware_object_apply_responses: + Arc>>, + submitted_firmware_object_from_json_apply_requests: + Arc>>, firmware_job_statuses: Arc>>, firmware_job_errors: Arc>>, - submitted_switch_system_image_requests: - Arc>>, - queued_switch_system_image_responses: - Arc>>, + submitted_apply_switch_system_image_requests: + Arc>>, + queued_apply_switch_system_image_responses: + Arc>>, switch_system_image_job_statuses: Arc>>, switch_system_image_job_errors: Arc>>, @@ -425,6 +488,173 @@ pub mod test_support { ) -> Result { Ok(rms::GetRackFirmwareInventoryResponse::default()) } + async fn add_firmware_object( + &self, + cmd: rms::AddFirmwareObjectRequest, + ) -> Result { + let value = + serde_json::from_str::(&cmd.config_json).map_err(|e| { + RackManagerError::ApiInvocationError(tonic::Status::invalid_argument(format!( + "invalid config_json: {e}" + ))) + })?; + let id = value + .get("Id") + .and_then(serde_json::Value::as_str) + .filter(|id| !id.is_empty()) + .map(str::to_string) + .ok_or_else(|| { + RackManagerError::ApiInvocationError(tonic::Status::invalid_argument( + "config_json must contain non-empty Id", + )) + })?; + let mut objects = self.firmware_objects.lock().await; + let is_default = cmd.set_default + || !objects.values().any(|existing| { + existing.hardware_type == cmd.hardware_type && existing.is_default + }); + let object = rms::FirmwareObject { + id: id.clone(), + config_json: cmd.config_json, + available: false, + hardware_type: cmd.hardware_type, + is_default, + ..Default::default() + }; + if is_default { + for existing in objects.values_mut() { + if existing.hardware_type == object.hardware_type { + existing.is_default = false; + } + } + } + objects.insert(id, object.clone()); + Ok(object) + } + async fn get_firmware_object( + &self, + cmd: rms::GetFirmwareObjectRequest, + ) -> Result { + self.firmware_objects + .lock() + .await + .get(&cmd.id) + .cloned() + .ok_or_else(|| { + RackManagerError::ApiInvocationError(tonic::Status::not_found(format!( + "firmware object {} not found", + cmd.id + ))) + }) + } + async fn list_firmware_objects( + &self, + cmd: rms::ListFirmwareObjectsRequest, + ) -> Result { + let objects = self + .firmware_objects + .lock() + .await + .values() + .filter(|object| !cmd.only_available || object.available) + .filter(|object| { + cmd.hardware_type.is_empty() || object.hardware_type == cmd.hardware_type + }) + .cloned() + .collect(); + Ok(rms::ListFirmwareObjectsResponse { objects }) + } + async fn delete_firmware_object( + &self, + cmd: rms::DeleteFirmwareObjectRequest, + ) -> Result { + self.firmware_objects.lock().await.remove(&cmd.id); + Ok(rms::OperationResponse { + status: rms::ReturnCode::Success as i32, + ..Default::default() + }) + } + async fn set_default_firmware_object( + &self, + cmd: rms::SetDefaultFirmwareObjectRequest, + ) -> Result { + let mut objects = self.firmware_objects.lock().await; + let hardware_type = objects + .get(&cmd.object_id) + .map(|object| object.hardware_type.clone()) + .ok_or_else(|| { + RackManagerError::ApiInvocationError(tonic::Status::not_found(format!( + "firmware object {} not found", + cmd.object_id + ))) + })?; + for object in objects.values_mut() { + if object.hardware_type == hardware_type { + object.is_default = object.id == cmd.object_id; + } + } + Ok(objects + .get(&cmd.object_id) + .cloned() + .expect("firmware object existence validated above")) + } + async fn apply_firmware_object( + &self, + cmd: rms::ApplyFirmwareObjectRequest, + ) -> Result { + self.submitted_firmware_object_apply_requests + .lock() + .await + .push(cmd); + Ok(self + .queued_firmware_object_apply_responses + .lock() + .await + .pop_front() + .unwrap_or_default()) + } + async fn apply_firmware_object_from_json( + &self, + cmd: rms::ApplyFirmwareObjectFromJsonRequest, + ) -> Result { + self.submitted_firmware_object_from_json_apply_requests + .lock() + .await + .push(cmd); + Ok(self + .queued_firmware_object_apply_responses + .lock() + .await + .pop_front() + .unwrap_or_default()) + } + async fn apply_switch_system_image_from_json( + &self, + _cmd: rms::ApplySwitchSystemImageFromJsonRequest, + ) -> Result { + Ok(rms::ApplySwitchSystemImageResponse::default()) + } + async fn apply_switch_system_image( + &self, + cmd: rms::ApplySwitchSystemImageRequest, + ) -> Result { + self.submitted_apply_switch_system_image_requests + .lock() + .await + .push(cmd); + Ok(self + .queued_apply_switch_system_image_responses + .lock() + .await + .pop_front() + .unwrap_or_default()) + } + async fn get_firmware_object_history( + &self, + _cmd: rms::GetFirmwareObjectHistoryRequest, + ) -> Result { + Ok(rms::GetFirmwareObjectHistoryResponse::default()) + } async fn list_firmware_on_switch( &self, _cmd: rms::ListFirmwareOnSwitchCommand, @@ -449,6 +679,12 @@ pub mod test_support { ) -> Result { Ok(rms::ConfigureScaleUpFabricManagerResponse::default()) } + async fn set_scale_up_fabric_state( + &self, + _cmd: rms::SetScaleUpFabricStateRequest, + ) -> Result { + Ok(rms::SetScaleUpFabricStateResponse::default()) + } async fn fetch_switch_system_image( &self, _cmd: rms::FetchSwitchSystemImageRequest, @@ -527,16 +763,16 @@ pub mod test_support { #[async_trait::async_trait] impl SwitchSystemImageRmsClient for MockRmsClient { - async fn update_switch_system_image( + async fn apply_switch_system_image( &self, - cmd: rms::UpdateSwitchSystemImageRequest, - ) -> Result { - self.submitted_switch_system_image_requests + cmd: rms::ApplySwitchSystemImageRequest, + ) -> Result { + self.submitted_apply_switch_system_image_requests .lock() .await .push(cmd); Ok(self - .queued_switch_system_image_responses + .queued_apply_switch_system_image_responses .lock() .await .pop_front() diff --git a/crates/api/src/state_controller/rack/maintenance.rs b/crates/api/src/state_controller/rack/maintenance.rs index e966372c53..68c07236da 100644 --- a/crates/api/src/state_controller/rack/maintenance.rs +++ b/crates/api/src/state_controller/rack/maintenance.rs @@ -18,10 +18,10 @@ //! Handler for RackState::Maintenance. use carbide_rack::firmware_update::{ - RackFirmwareInventory, RackSwitchFirmwareInventory, build_firmware_update_batches, - build_new_node_info, firmware_type_for_profile, load_rack_firmware_inventory, - load_rack_switch_firmware_inventory, submit_firmware_update_batches, + RackFirmwareInventory, RackSwitchFirmwareInventory, build_new_node_info, + firmware_type_for_profile, load_rack_firmware_inventory, load_rack_switch_firmware_inventory, }; +use carbide_rack::firmware_object::{ANY_RACK_HARDWARE_TYPE, profile_hardware_type_wire_value}; use carbide_rack::rack_manager_error; use carbide_rack::rms_client::SwitchSystemImageRmsClient; use carbide_rack_controller::context::RackStateHandlerContextObjects; @@ -33,19 +33,17 @@ use carbide_rack_controller::validating::strip_rv_labels; use carbide_uuid::rack::{RackId, RackProfileId}; use db::{ host_machine_update as db_host_machine_update, machine as db_machine, - machine_topology as db_machine_topology, rack as db_rack, rack_firmware as db_rack_firmware, - switch as db_switch, + machine_topology as db_machine_topology, rack as db_rack, switch as db_switch, }; use librms::protos::rack_manager as rms; use model::rack::{ ConfigureNmxClusterState, FirmwareUpgradeDeviceInfo, FirmwareUpgradeDeviceStatus, FirmwareUpgradeState, MaintenanceActivity, MaintenanceScope, NvosUpdateJob, NvosUpdateState, NvosUpdateSwitchStatus, Rack, RackFirmwareUpgradeState, RackFirmwareUpgradeStatus, - RackMaintenanceState, RackPowerState, RackState, RackValidationState, ResolvedNvosArtifact, - SwitchNvosUpdateState, SwitchNvosUpdateStatus, + RackMaintenanceState, RackPowerState, RackState, RackValidationState, SwitchNvosUpdateState, + SwitchNvosUpdateStatus, }; -use model::rack_firmware::{RackFirmware, RackFirmwareSearchFilter}; -use model::rack_type::RackHardwareType; +use model::rack_type::RackProfile; use state_controller::state_handler::{ StateHandlerContext, StateHandlerError, StateHandlerOutcome, }; @@ -107,166 +105,6 @@ async fn trigger_rack_firmware_reprovisioning_requests( Ok(()) } -fn desired_rack_hardware_type( - id: &RackId, - rack_profile_id: Option<&RackProfileId>, - ctx: &StateHandlerContext<'_, RackStateHandlerContextObjects>, -) -> RackHardwareType { - super::resolve_profile(id, rack_profile_id, ctx) - .and_then(|profile| profile.rack_hardware_type.clone()) - .unwrap_or_else(RackHardwareType::any) -} - -fn preferred_nvos_lookup_keys( - id: &RackId, - rack_profile_id: Option<&RackProfileId>, - ctx: &StateHandlerContext<'_, RackStateHandlerContextObjects>, -) -> Vec { - let mut keys = Vec::new(); - if let Some(class) = super::resolve_profile(id, rack_profile_id, ctx) - .and_then(|profile| profile.rack_hardware_class) - { - keys.push(format!("NVOS_{}", class)); - } - if !keys.iter().any(|key| key == "NVOS_prod") { - keys.push("NVOS_prod".to_string()); - } - keys -} - -fn resolve_nvos_artifact_from_firmware( - firmware: &RackFirmware, - lookup_keys: &[String], -) -> Result, StateHandlerError> { - let Some(parsed_components) = firmware.parsed_components.as_ref().map(|json| &json.0) else { - return Ok(None); - }; - - let images = parsed_components - .get("switch_system_images") - .and_then(|value| value.get("Switch Tray")); - - let Some(images) = images else { - return Ok(None); - }; - - for lookup_key in lookup_keys { - let Some(entry) = images.get(lookup_key) else { - continue; - }; - - let image_filename = entry - .get("image_filename") - .and_then(serde_json::Value::as_str) - .ok_or_else(|| { - StateHandlerError::GenericError(eyre::eyre!( - "rack firmware {} has malformed {} lookup entry: missing image_filename", - firmware.id, - lookup_key - )) - })?; - - let version = entry - .get("version") - .and_then(serde_json::Value::as_str) - .map(str::to_string); - - return Ok(Some(ResolvedNvosArtifact { - firmware_id: firmware.id.clone(), - image_filename: image_filename.to_string(), - local_file_path: format!( - "/forge-boot-artifacts/blobs/internal/fw/rack_firmware/{}/{}", - firmware.id, image_filename - ), - version, - })); - } - - Ok(None) -} - -async fn resolve_default_nvos_artifact( - id: &RackId, - rack_profile_id: Option<&RackProfileId>, - ctx: &mut StateHandlerContext<'_, RackStateHandlerContextObjects>, -) -> Result, StateHandlerError> { - let desired_hw_type = desired_rack_hardware_type(id, rack_profile_id, ctx); - let lookup_keys = preferred_nvos_lookup_keys(id, rack_profile_id, ctx); - let mut hardware_types = vec![desired_hw_type.clone()]; - if !desired_hw_type.is_any() { - hardware_types.push(RackHardwareType::any()); - } - - let mut conn = ctx.services.db_pool.acquire().await.map_err(|e| { - StateHandlerError::GenericError(eyre::eyre!( - "failed to acquire db connection for NVOS lookup: {}", - e - )) - })?; - - for rack_hardware_type in hardware_types { - let firmware_rows = db_rack_firmware::list_all( - &mut conn, - RackFirmwareSearchFilter { - only_available: true, - rack_hardware_type: Some(rack_hardware_type), - }, - ) - .await - .map_err(|e| StateHandlerError::GenericError(eyre::eyre!(e.to_string())))?; - - for firmware in firmware_rows.into_iter().filter(|fw| fw.is_default) { - if let Some(artifact) = resolve_nvos_artifact_from_firmware(&firmware, &lookup_keys)? { - return Ok(Some(artifact)); - } - } - } - - Ok(None) -} - -async fn resolve_requested_or_default_nvos_artifact( - id: &RackId, - rack_profile_id: Option<&RackProfileId>, - rack_firmware_id: Option<&str>, - ctx: &mut StateHandlerContext<'_, RackStateHandlerContextObjects>, -) -> Result, StateHandlerError> { - let Some(rack_firmware_id) = rack_firmware_id else { - return resolve_default_nvos_artifact(id, rack_profile_id, ctx).await; - }; - - let firmware = match db_rack_firmware::find_by_id(&ctx.services.db_pool, rack_firmware_id).await - { - Ok(firmware) => firmware, - Err(db::DatabaseError::NotFoundError { .. }) => { - return Err(StateHandlerError::GenericError(eyre::eyre!( - "requested NVOS rack firmware '{}' not found", - rack_firmware_id - ))); - } - Err(error) => return Err(error.into()), - }; - - if !firmware.available { - return Err(StateHandlerError::GenericError(eyre::eyre!( - "requested NVOS rack firmware '{}' exists but is not available", - rack_firmware_id - ))); - } - - let lookup_keys = preferred_nvos_lookup_keys(id, rack_profile_id, ctx); - resolve_nvos_artifact_from_firmware(&firmware, &lookup_keys)?.map_or_else( - || { - Err(StateHandlerError::GenericError(eyre::eyre!( - "requested NVOS rack firmware '{}' has no switch system image for lookup keys [{}]", - rack_firmware_id, - lookup_keys.join(", ") - ))) - }, - |artifact| Ok(Some(artifact)), - ) -} - async fn clear_rack_firmware_device_statuses( txn: &mut sqlx::PgConnection, machine_ids: &[carbide_uuid::machine::MachineId], @@ -380,13 +218,48 @@ fn nvos_update_requested(scope: &MaintenanceScope) -> bool { fn implicit_nvos_update_requested(scope: &MaintenanceScope) -> bool { scope.should_run(&MaintenanceActivity::NvosUpdate { - rack_firmware_id: None, + firmware_object_id: None, }) && !nvos_update_requested(scope) } -fn requested_rack_firmware_id(scope: &MaintenanceScope) -> Option { +fn requested_firmware_object_id(scope: &MaintenanceScope) -> Option { + scope.activities.iter().find_map(|activity| match activity { + MaintenanceActivity::NvosUpdate { firmware_object_id } => firmware_object_id.clone(), + _ => None, + }) +} + +fn explicit_firmware_upgrade_requested(scope: &MaintenanceScope) -> bool { + scope + .activities + .iter() + .any(|activity| matches!(activity, MaintenanceActivity::FirmwareUpgrade { .. })) +} + +fn profile_hardware_type_or_any(profile: Option<&RackProfile>) -> String { + profile + .map(profile_hardware_type_wire_value) + .filter(|hardware_type| !hardware_type.trim().is_empty()) + .unwrap_or_else(|| ANY_RACK_HARDWARE_TYPE.to_string()) +} + +fn requested_firmware_object_json_upgrade( + scope: &MaintenanceScope, +) -> Option<(Option, Vec, String, bool)> { scope.activities.iter().find_map(|activity| match activity { - MaintenanceActivity::NvosUpdate { rack_firmware_id } => rack_firmware_id.clone(), + MaintenanceActivity::FirmwareUpgrade { + firmware_version, + components, + access_token, + force_update, + } => access_token.as_ref().map(|token| { + ( + firmware_version.clone(), + components.clone(), + token.clone(), + *force_update, + ) + }), _ => None, }) } @@ -394,7 +267,7 @@ fn requested_rack_firmware_id(scope: &MaintenanceScope) -> Option { fn nvos_update_start_state(scope: &MaintenanceScope) -> RackMaintenanceState { RackMaintenanceState::NVOSUpdate { nvos_update: NvosUpdateState::Start { - rack_firmware_id: requested_rack_firmware_id(scope), + firmware_object_id: requested_firmware_object_id(scope), }, } } @@ -403,7 +276,7 @@ fn nvos_update_start_state(scope: &MaintenanceScope) -> RackMaintenanceState { /// activities not requested in the scope. fn next_state_after_firmware(scope: &MaintenanceScope) -> RackMaintenanceState { if scope.should_run(&MaintenanceActivity::NvosUpdate { - rack_firmware_id: None, + firmware_object_id: None, }) { nvos_update_start_state(scope) } else { @@ -441,6 +314,8 @@ pub(crate) fn first_maintenance_state(scope: &MaintenanceScope) -> RackMaintenan if scope.should_run(&MaintenanceActivity::FirmwareUpgrade { firmware_version: None, components: vec![], + access_token: None, + force_update: false, }) { RackMaintenanceState::FirmwareUpgrade { rack_firmware_upgrade: FirmwareUpgradeState::Start, @@ -450,9 +325,9 @@ pub(crate) fn first_maintenance_state(scope: &MaintenanceScope) -> RackMaintenan } } -/// Filters a full-rack firmware inventory down to only the devices listed in -/// the maintenance scope. When `scope.is_full_rack()` the inventory is -/// returned unchanged. +/// Filters a full-rack firmware inventory down to compute/switch devices listed +/// in the maintenance scope. Power-shelf firmware-object JSON apply is not +/// implemented yet. fn filter_inventory_by_scope( mut inventory: RackFirmwareInventory, scope: &MaintenanceScope, @@ -549,108 +424,220 @@ fn build_switch_device_info_request( } } -/// Submit compute and switch firmware-update batches to RMS and persist the -/// per-device child job IDs returned by UpdateFirmwareByDeviceList. -async fn rms_start_firmware_upgrade( - rms_client: &dyn librms::RmsApi, - firmware_id: &str, - batches: Vec, -) -> model::rack::FirmwareUpgradeJob { - let started_at = chrono::Utc::now(); - let submissions = submit_firmware_update_batches(rms_client, batches).await; - let mut job = model::rack::FirmwareUpgradeJob { - firmware_id: Some(firmware_id.to_string()), - started_at: Some(started_at), - ..Default::default() +fn rms_component_filters_from_components( + components: &[String], + include_machines: bool, + include_switches: bool, +) -> std::collections::HashMap { + if components.is_empty() { + return std::collections::HashMap::new(); + } + + let mut filters = std::collections::HashMap::new(); + if include_machines { + filters.insert( + rms::NodeType::Compute as i32, + rms::FirmwareObjectComponentFilter { + components: components.to_vec(), + }, + ); + } + if include_switches { + filters.insert( + rms::NodeType::Switch as i32, + rms::FirmwareObjectComponentFilter { + components: components.to_vec(), + }, + ); + } + filters +} + +fn firmware_device_status( + device: FirmwareUpgradeDeviceInfo, + parent_job_id: Option, + child_jobs: &std::collections::HashMap, + node_errors: &std::collections::HashMap, + batch_error: Option<&str>, +) -> FirmwareUpgradeDeviceStatus { + let mut status = FirmwareUpgradeDeviceStatus { + node_id: device.node_id.clone(), + mac: device.mac, + bmc_ip: device.bmc_ip, + status: "in_progress".into(), + job_id: None, + parent_job_id, + error_message: None, }; - for submission in submissions { - match submission.response { - Ok(response) => { - let batch_response = response.response.as_ref(); - if let Some(batch_response) = batch_response - && !batch_response.job_id.is_empty() - { - job.batch_job_ids.push(batch_response.job_id.clone()); - } + if let Some(error_message) = node_errors.get(&device.node_id) { + status.status = "failed".into(); + status.error_message = Some(error_message.clone()); + } else if let Some(job_id) = child_jobs.get(&device.node_id) { + status.job_id = Some(job_id.clone()); + } else { + status.status = "failed".into(); + status.error_message = Some( + batch_error + .unwrap_or("RMS did not return a child firmware job for this device") + .to_string(), + ); + } - let child_jobs = response - .node_jobs - .iter() - .map(|child| (child.node_id.clone(), child.job_id.clone())) - .collect::>(); - let node_errors = batch_response - .map(|batch_response| { - batch_response - .node_results - .iter() - .filter(|result| { - result.status - != librms::protos::rack_manager::ReturnCode::Success as i32 - || !result.error_message.is_empty() - }) - .map(|result| (result.node_id.clone(), result.error_message.clone())) - .collect::>() - }) - .unwrap_or_default(); - let parent_job_id = batch_response.and_then(|batch_response| { - (!batch_response.job_id.is_empty()).then(|| batch_response.job_id.clone()) - }); - - let target_devices = match submission.display_name { - "Compute Node" => &mut job.machines, - "Switch" => &mut job.switches, - _ => continue, - }; + status +} - for device in submission.devices { - let mut status = FirmwareUpgradeDeviceStatus { - node_id: device.node_id.clone(), - mac: device.mac.clone(), - bmc_ip: device.bmc_ip.clone(), - status: "in_progress".into(), - job_id: None, - parent_job_id: parent_job_id.clone(), - error_message: None, - }; +struct RmsFirmwareObjectJsonApply<'a> { + rack_id: &'a RackId, + config_json: &'a str, + access_token: &'a str, + firmware_type: &'a str, + hardware_type: &'a str, + force_update: bool, + components: &'a [String], + machines: Vec, + switches: Vec, +} - if let Some(error_message) = node_errors.get(&device.node_id) { - status.status = "failed".into(); - status.error_message = Some(error_message.clone()); - } else if let Some(job_id) = child_jobs.get(&device.node_id) { - status.job_id = Some(job_id.clone()); - } else { - status.status = "failed".into(); - status.error_message = - Some("RMS did not return a child firmware job for this device".into()); - } +async fn rms_start_firmware_upgrade_from_json( + rms_client: &dyn librms::RmsApi, + request: RmsFirmwareObjectJsonApply<'_>, +) -> Result { + let started_at = chrono::Utc::now(); + let machine_count = request.machines.len(); + let switch_count = request.switches.len(); + let mut devices = Vec::with_capacity(machine_count + switch_count); + devices.extend( + request + .machines + .iter() + .map(|device| build_new_node_info(request.rack_id, device, rms::NodeType::Compute)), + ); + devices.extend( + request + .switches + .iter() + .map(|device| build_new_node_info(request.rack_id, device, rms::NodeType::Switch)), + ); - target_devices.push(status); - } - } - Err(error) => { - let target_devices = match submission.display_name { - "Compute Node" => &mut job.machines, - "Switch" => &mut job.switches, - _ => continue, - }; + let response = rms_client + .apply_firmware_object_from_json(rms::ApplyFirmwareObjectFromJsonRequest { + metadata: None, + rack_id: request.rack_id.to_string(), + config_json: request.config_json.to_string(), + access_token: request.access_token.to_string(), + firmware_type: request.firmware_type.to_string(), + hardware_type: request.hardware_type.to_string(), + nodes: Some(rms::NodeSet { devices }), + force_update: request.force_update, + component_filters: rms_component_filters_from_components( + request.components, + machine_count > 0, + switch_count > 0, + ), + }) + .await + .map_err(|error| { + StateHandlerError::GenericError(eyre::eyre!( + "failed to submit firmware object JSON apply to RMS: {}", + error + )) + })?; - for device in submission.devices { - target_devices.push(FirmwareUpgradeDeviceStatus { - node_id: device.node_id.clone(), - mac: device.mac.clone(), - bmc_ip: device.bmc_ip.clone(), - status: "failed".into(), - job_id: None, - parent_job_id: None, - error_message: Some(error.clone()), - }); - } - } - } + let batch_response = response.response.as_ref(); + let batch_status = batch_response + .map(|batch_response| batch_response.status) + .unwrap_or(rms::ReturnCode::Failure as i32); + let batch_job_id = batch_response + .map(|batch_response| batch_response.job_id.as_str()) + .unwrap_or_default(); + if batch_status != rms::ReturnCode::Success as i32 + && batch_job_id.is_empty() + && response.node_jobs.is_empty() + { + let message = batch_response + .map(|batch_response| batch_response.message.as_str()) + .unwrap_or_default(); + let message = if message.is_empty() { + "RMS returned failure for ApplyFirmwareObjectFromJSON".to_string() + } else { + message.to_string() + }; + return Err(StateHandlerError::GenericError(eyre::eyre!(message))); } - job.job_id = job.batch_job_ids.first().cloned(); + let parent_job_id = (!batch_job_id.is_empty()).then(|| batch_job_id.to_string()); + let child_jobs = response + .node_jobs + .iter() + .map(|child| (child.node_id.clone(), child.job_id.clone())) + .collect::>(); + let node_errors = batch_response + .map(|batch_response| { + batch_response + .node_results + .iter() + .filter(|result| { + result.status != rms::ReturnCode::Success as i32 + || !result.error_message.is_empty() + }) + .map(|result| (result.node_id.clone(), result.error_message.clone())) + .collect::>() + }) + .unwrap_or_default(); + let batch_error = batch_response.and_then(|batch_response| { + if batch_response.status == rms::ReturnCode::Success as i32 + || batch_response.message.is_empty() + { + None + } else { + Some(batch_response.message.clone()) + } + }); + + let mut job = model::rack::FirmwareUpgradeJob { + job_id: parent_job_id.clone(), + firmware_id: Some(response.object_id), + started_at: Some(started_at), + batch_job_ids: parent_job_id.iter().cloned().collect(), + machines: request + .machines + .into_iter() + .map(|device| { + firmware_device_status( + device, + parent_job_id.clone(), + &child_jobs, + &node_errors, + batch_error.as_deref(), + ) + }) + .collect(), + switches: request + .switches + .into_iter() + .map(|device| { + firmware_device_status( + device, + parent_job_id.clone(), + &child_jobs, + &node_errors, + batch_error.as_deref(), + ) + }) + .collect(), + ..Default::default() + }; + + tracing::info!( + rack_id = %request.rack_id, + parent_job_id = ?job.job_id, + object_id = ?job.firmware_id, + machine_count, + switch_count, + "RMS firmware object JSON apply submitted", + ); + let all_devices: Vec<_> = job.all_devices().collect(); let failed = all_devices .iter() @@ -677,7 +664,7 @@ async fn rms_start_firmware_upgrade( job.completed_at = Some(chrono::Utc::now()); } - job + Ok(job) } /// Poll RMS GetFirmwareJobStatus for each tracked child job and update the @@ -812,21 +799,25 @@ async fn rms_get_firmware_upgrade_status( async fn rms_start_nvos_update( rms_client: &dyn SwitchSystemImageRmsClient, rack_id: &RackId, - artifact: &ResolvedNvosArtifact, + object_id: Option<&str>, + software_type: &str, + hardware_type: &str, switches: Vec, ) -> Result { let started_at = chrono::Utc::now(); let response = rms_client - .update_switch_system_image(rms::UpdateSwitchSystemImageRequest { + .apply_switch_system_image(rms::ApplySwitchSystemImageRequest { + metadata: None, + rack_id: rack_id.to_string(), + object_id: object_id.unwrap_or_default().to_string(), + software_type: software_type.to_string(), + hardware_type: hardware_type.to_string(), nodes: Some(rms::NodeSet { devices: switches .iter() .map(|switch| build_new_node_info(rack_id, switch, rms::NodeType::Switch)) .collect(), }), - image_filename: artifact.image_filename.clone(), - local_file_path: artifact.local_file_path.clone(), - ..Default::default() }) .await .map_err(|error| { @@ -851,7 +842,7 @@ async fn rms_start_nvos_update( .map(|batch_response| batch_response.message.as_str()) .unwrap_or_default(); let message = if message.is_empty() { - "RMS returned failure for UpdateSwitchSystemImage".to_string() + "RMS returned failure for ApplySwitchSystemImage".to_string() } else { message.to_string() }; @@ -864,20 +855,6 @@ async fn rms_start_nvos_update( .iter() .map(|child| (child.node_id.clone(), child.job_id.clone())) .collect::>(); - let node_errors = batch_response - .map(|batch_response| { - batch_response - .node_results - .iter() - .filter(|result| { - result.status != rms::ReturnCode::Success as i32 - || !result.error_message.is_empty() - }) - .map(|result| (result.node_id.clone(), result.error_message.clone())) - .collect::>() - }) - .unwrap_or_default(); - let switches: Vec<_> = switches .into_iter() .map(|switch| { @@ -894,10 +871,7 @@ async fn rms_start_nvos_update( error_message: None, }; - if let Some(error_message) = node_errors.get(&switch.node_id) { - status.status = "failed".into(); - status.error_message = Some(error_message.clone()); - } else if status.job_id.is_none() { + if status.job_id.is_none() { status.status = "failed".into(); status.error_message = Some("RMS did not return a switch system image job for this switch".into()); @@ -920,10 +894,10 @@ async fn rms_start_nvos_update( Ok(NvosUpdateJob { job_id: parent_job_id, - firmware_id: artifact.firmware_id.clone(), - image_filename: artifact.image_filename.clone(), - local_file_path: artifact.local_file_path.clone(), - version: artifact.version.clone(), + firmware_id: response.object_id, + image_filename: response.image_filename, + local_file_path: String::new(), + version: None, status: Some( if total > 0 && terminal < total { "in_progress" @@ -1095,93 +1069,44 @@ pub async fn handle_maintenance( rack_firmware_upgrade, } => match rack_firmware_upgrade { FirmwareUpgradeState::Start => { - let Some(profile) = super::resolve_profile(id, rack_profile_id, ctx) else { - return Ok(skip_firmware_upgrade_outcome( - id, - "rack profile is missing or unknown", - scope, - )); - }; - let Some(rack_hardware_type) = profile.rack_hardware_type.as_ref() else { - return Ok(skip_firmware_upgrade_outcome( - id, - "rack capabilities do not define rack_hardware_type", - scope, - )); - }; - let (requested_fw_version, requested_components) = scope - .activities - .iter() - .find_map(|a| match a { - MaintenanceActivity::FirmwareUpgrade { - firmware_version, - components, - } => Some((firmware_version.as_deref(), components.as_slice())), - _ => None, - }) - .unwrap_or((None, &[])); - - let firmware = if let Some(fw_version) = requested_fw_version { - match db_rack_firmware::find_by_id(&ctx.services.db_pool, fw_version).await { - Ok(fw) => fw, - Err(db::DatabaseError::NotFoundError { .. }) => { - return transition_to_rack_error_with_firmware_job( - id, - state, - fw_version, - format!("requested rack firmware '{}' not found", fw_version), - ctx, - ) - .await; - } - Err(error) => return Err(error.into()), - } - } else { - match db_rack_firmware::find_default_by_rack_hardware_type( - &ctx.services.db_pool, - rack_hardware_type, - ) - .await - { - Ok(fw) => fw, - Err(db::DatabaseError::NotFoundError { .. }) => { - return Ok(skip_firmware_upgrade_outcome( - id, - format!( - "no default rack firmware configured for hardware type '{}'", - rack_hardware_type - ), - scope, - )); - } - Err(error) => return Err(error.into()), - } - }; - - if !firmware.available { - if let Some(fw_version) = requested_fw_version { - return transition_to_rack_error_with_firmware_job( + let Some((config_json, components, access_token, force_update)) = + requested_firmware_object_json_upgrade(scope) + else { + if explicit_firmware_upgrade_requested(scope) { + return transition_to_rack_error( id, state, - fw_version, - format!( - "requested rack firmware '{}' exists but is not available", - firmware.id - ), + "firmware-upgrade rack maintenance requires SOT JSON and access token", ctx, ) .await; } return Ok(skip_firmware_upgrade_outcome( id, - format!( - "rack firmware '{}' exists but is not available", - firmware.id - ), + "firmware object JSON source is not configured for rack maintenance; skipping firmware update", scope, )); - } - + }; + // Defensive: older persisted maintenance state may predate API-side JSON validation. + let Some(config_json) = config_json.filter(|json| !json.trim().is_empty()) else { + return transition_to_rack_error( + id, + state, + "firmware object JSON source is configured but target firmware version does not contain SOT JSON", + ctx, + ) + .await; + }; + let Some(rms_client) = ctx.services.rms_client.as_ref() else { + return transition_to_rack_error(id, state, "RMS client not configured", ctx) + .await; + }; + let profile = super::resolve_profile(id, rack_profile_id, ctx); + let rack_hardware_type = profile_hardware_type_or_any(profile); + let firmware_type = profile + .map(firmware_type_for_profile) + .unwrap_or("prod") + .to_string(); let inventory = load_rack_firmware_inventory( &ctx.services.db_pool, ctx.services.credential_manager.as_ref(), @@ -1195,63 +1120,58 @@ pub async fn handle_maintenance( )) })?; let inventory = filter_inventory_by_scope(inventory, scope); - let firmware_type = firmware_type_for_profile(profile); - let batches = match build_firmware_update_batches( - id, - &firmware, - firmware_type, - &inventory, - requested_components, - ) { - Ok(batches) if batches.is_empty() => { - return Ok(skip_firmware_upgrade_outcome( - id, - "no compute or switch devices require rack firmware updates", - scope, - )); - } - Ok(batches) => batches, + + if inventory.machines.is_empty() && inventory.switches.is_empty() { + return Ok(skip_firmware_upgrade_outcome( + id, + "no compute or switch devices require rack firmware updates", + scope, + )); + } + + tracing::info!( + rack_id = %id, + firmware_type = %firmware_type, + hardware_type = %rack_hardware_type, + force_update, + machine_count = inventory.machines.len(), + switch_count = inventory.switches.len(), + "Rack firmware object JSON apply starting", + ); + + let mut job = match rms_start_firmware_upgrade_from_json( + rms_client.as_ref(), + RmsFirmwareObjectJsonApply { + rack_id: id, + config_json: &config_json, + access_token: &access_token, + firmware_type: &firmware_type, + hardware_type: &rack_hardware_type, + force_update, + components: &components, + machines: inventory.machines.clone(), + switches: inventory.switches.clone(), + }, + ) + .await + { + Ok(job) => job, Err(error) => { return transition_to_rack_error_with_firmware_job( id, state, - firmware.id.clone(), - format!( - "failed to build firmware update requests for firmware '{}': {}", - firmware.id, error - ), + "firmware-object-json", + error.to_string(), ctx, ) .await; } }; - let Some(rms_client) = ctx.services.rms_client.as_ref() else { - return transition_to_rack_error_with_firmware_job( - id, - state, - firmware.id.clone(), - "RMS client not configured", - ctx, - ) - .await; - }; - - tracing::info!( - rack_id = %id, - rack_hardware_type = %rack_hardware_type, - firmware_id = %firmware.id, - firmware_type, - machine_count = inventory.machines.len(), - switch_count = inventory.switches.len(), - "Rack firmware upgrade starting" - ); - let mut job = - rms_start_firmware_upgrade(rms_client.as_ref(), &firmware.id, batches).await; let mut txn = ctx.services.db_pool.begin().await?; let continue_after_firmware_upgrade = scope.should_run(&MaintenanceActivity::NvosUpdate { - rack_firmware_id: None, + firmware_object_id: None, }); trigger_rack_firmware_reprovisioning_requests( txn.as_mut(), @@ -1297,12 +1217,8 @@ pub async fn handle_maintenance( let completed = all.iter().filter(|d| d.status == "completed").count(); let failed = all.iter().filter(|d| d.status == "failed").count(); let terminal = completed + failed; - let default_nvos_artifact = - if terminal == total && failed == 0 && implicit_nvos_update_requested(scope) { - resolve_default_nvos_artifact(id, rack_profile_id, ctx).await? - } else { - None - }; + let run_implicit_nvos_update = + terminal == total && failed == 0 && implicit_nvos_update_requested(scope); let mut txn = ctx.services.db_pool.begin().await?; @@ -1440,16 +1356,14 @@ pub async fn handle_maintenance( "Rack firmware upgrade complete; advancing to explicitly requested next activity" ); next - } else if let Some(artifact) = default_nvos_artifact { + } else if run_implicit_nvos_update { tracing::info!( - "Rack {} has a default NVOS artifact available; advancing to NVOSUpdate(Start) with firmware {} ({})", + "Rack {} firmware upgrade complete; advancing to NVOSUpdate(Start) and letting RMS resolve the default switch system image", id, - artifact.firmware_id, - artifact.image_filename, ); RackMaintenanceState::NVOSUpdate { nvos_update: NvosUpdateState::Start { - rack_firmware_id: Some(artifact.firmware_id), + firmware_object_id: None, }, } } else { @@ -1471,47 +1385,17 @@ pub async fn handle_maintenance( } }, RackMaintenanceState::NVOSUpdate { nvos_update } => match nvos_update { - NvosUpdateState::Start { rack_firmware_id } => { - let artifact = match resolve_requested_or_default_nvos_artifact( - id, - rack_profile_id, - rack_firmware_id.as_deref(), - ctx, - ) - .await? - { - Some(artifact) => artifact, - None if nvos_update_requested(scope) => { - return transition_to_rack_error( - id, - state, - "requested NVOS update could not find a default switch system image", - ctx, - ) - .await; - } - None => { - // A missing rack_firmware_id already falls back to the default rack - // firmware. None here means no default switch system image was found. - let next = next_state_after_nvos(scope); - tracing::info!( - rack_id = %id, - next_state = %next, - "No default NVOS artifact available, advancing" - ); - return Ok(StateHandlerOutcome::transition(RackState::Maintenance { - maintenance_state: next, - })); - } - }; - + NvosUpdateState::Start { firmware_object_id } => { let Some(rms_client) = ctx.services.switch_system_image_rms_client.as_deref() else { return transition_to_rack_error(id, state, "RMS client not configured", ctx) .await; }; + let profile = super::resolve_profile(id, rack_profile_id, ctx); + let rack_hardware_type = profile_hardware_type_or_any(profile); + let software_type = profile.map(firmware_type_for_profile).unwrap_or("prod"); - let inventory = load_rack_firmware_inventory( + let switch_inventory = load_rack_switch_firmware_inventory( &ctx.services.db_pool, ctx.services.credential_manager.as_ref(), id, @@ -1519,13 +1403,13 @@ pub async fn handle_maintenance( .await .map_err(|error| { StateHandlerError::GenericError(eyre::eyre!( - "failed to load rack inventory for NVOS update: {}", + "failed to load rack switch firmware inventory for NVOS update: {}", error )) })?; - let inventory = filter_inventory_by_scope(inventory, scope); + let switch_inventory = filter_switch_inventory_by_scope(switch_inventory, scope); - if inventory.switches.is_empty() { + if switch_inventory.switches.is_empty() { let next = next_state_after_nvos(scope); tracing::info!( rack_id = %id, @@ -1538,13 +1422,12 @@ pub async fn handle_maintenance( } tracing::info!( - "Rack {} NVOS update starting with firmware {} ({})", + "Rack {} switch system image update starting with firmware object '{}'", id, - artifact.firmware_id, - artifact.image_filename, + firmware_object_id.as_deref().unwrap_or(""), ); - for switch in &inventory.switches { + for switch in &switch_inventory.switches { switch.os_ip.as_ref().ok_or_else(|| { StateHandlerError::GenericError(eyre::eyre!( "switch {} has no NVOS IP for rack NVOS update", @@ -1565,11 +1448,34 @@ pub async fn handle_maintenance( })?; } - let job = - rms_start_nvos_update(rms_client, id, &artifact, inventory.switches).await?; + let job = match rms_start_nvos_update( + rms_client, + id, + firmware_object_id.as_deref(), + software_type, + &rack_hardware_type, + switch_inventory.switches, + ) + .await + { + Ok(job) => job, + Err(error) if !nvos_update_requested(scope) => { + let next = next_state_after_nvos(scope); + tracing::info!( + rack_id = %id, + error = %error, + next_state = %next, + "Implicit switch system image update could not be started, advancing" + ); + return Ok(StateHandlerOutcome::transition(RackState::Maintenance { + maintenance_state: next, + })); + } + Err(error) => return Err(error), + }; let mut txn = ctx.services.db_pool.begin().await?; - clear_nvos_update_statuses(txn.as_mut(), &inventory.switch_ids).await?; + clear_nvos_update_statuses(txn.as_mut(), &switch_inventory.switch_ids).await?; db_rack::update_nvos_update_job(txn.as_mut(), id, Some(&job)).await?; state.nvos_update_job = Some(job); @@ -1995,36 +1901,42 @@ pub async fn handle_maintenance( #[cfg(test)] mod tests { + use carbide_uuid::machine::{MachineId, MachineIdSource, MachineType}; + use carbide_uuid::switch::{SwitchId, SwitchIdSource, SwitchType}; use carbide_rack::firmware_update::RackFirmwareInventory; use model::rack::{ ConfigureNmxClusterState, FirmwareUpgradeDeviceInfo, FirmwareUpgradeState, MaintenanceActivity, MaintenanceScope, NvosUpdateState, RackMaintenanceState, RackPowerState, }; + use model::rack_type::{RackHardwareType, RackProfile}; use super::{ - filter_inventory_by_scope, first_maintenance_state, implicit_nvos_update_requested, - next_state_after_configure, next_state_after_firmware, next_state_after_nvos, + filter_inventory_by_scope, firmware_device_status, first_maintenance_state, + implicit_nvos_update_requested, next_state_after_configure, next_state_after_firmware, + next_state_after_nvos, profile_hardware_type_or_any, }; use crate::rack as carbide_rack; - fn test_machine_id(byte: u8) -> carbide_uuid::machine::MachineId { - use carbide_uuid::machine::{MachineIdSource, MachineType}; - carbide_uuid::machine::MachineId::new(MachineIdSource::Tpm, [byte; 32], MachineType::Host) + fn test_machine_id(seed: u8) -> MachineId { + let mut hash = [0u8; 32]; + hash[0] = seed; + MachineId::new(MachineIdSource::Tpm, hash, MachineType::Host) } - fn test_switch_id(byte: u8) -> carbide_uuid::switch::SwitchId { - use carbide_uuid::switch::{SwitchIdSource, SwitchType}; - carbide_uuid::switch::SwitchId::new(SwitchIdSource::Tpm, [byte; 32], SwitchType::NvLink) + fn test_switch_id(seed: u8) -> SwitchId { + let mut hash = [0u8; 32]; + hash[0] = seed; + SwitchId::new(SwitchIdSource::Tpm, hash, SwitchType::NvLink) } - fn test_device_info(node_id: &str) -> FirmwareUpgradeDeviceInfo { + fn test_device_info(node_id: impl ToString) -> FirmwareUpgradeDeviceInfo { FirmwareUpgradeDeviceInfo { node_id: node_id.to_string(), - mac: "AA:BB:CC:DD:EE:FF".to_string(), - bmc_ip: "10.0.0.1".to_string(), + mac: "00:11:22:33:44:55".to_string(), + bmc_ip: "192.0.2.10".to_string(), bmc_username: "admin".to_string(), - bmc_password: "pass".to_string(), + bmc_password: "password".to_string(), os_mac: None, os_ip: None, os_username: None, @@ -2032,6 +1944,137 @@ mod tests { } } + fn sample_inventory() -> RackFirmwareInventory { + let machine_a = test_machine_id(1); + let machine_b = test_machine_id(2); + let switch_a = test_switch_id(3); + let switch_b = test_switch_id(4); + + RackFirmwareInventory { + machine_ids: vec![machine_a, machine_b], + machines: vec![test_device_info(machine_a), test_device_info(machine_b)], + switch_ids: vec![switch_a, switch_b], + switches: vec![test_device_info(switch_a), test_device_info(switch_b)], + } + } + + #[test] + fn profile_hardware_type_or_any_defaults_missing_values_to_any() { + assert_eq!(profile_hardware_type_or_any(None), "any"); + assert_eq!( + profile_hardware_type_or_any(Some(&RackProfile::default())), + "any" + ); + + let profile = RackProfile { + rack_hardware_type: Some(RackHardwareType::from("gb200")), + ..Default::default() + }; + + assert_eq!(profile_hardware_type_or_any(Some(&profile)), "gb200"); + } + + #[test] + fn filter_inventory_by_scope_full_rack_keeps_all_devices() { + let inventory = sample_inventory(); + let filtered = filter_inventory_by_scope(inventory, &MaintenanceScope::default()); + + assert_eq!(filtered.machine_ids.len(), 2); + assert_eq!(filtered.machines.len(), 2); + assert_eq!(filtered.switch_ids.len(), 2); + assert_eq!(filtered.switches.len(), 2); + } + + #[test] + fn filter_inventory_by_scope_machines_only() { + let machine_id = test_machine_id(1); + let scope = MaintenanceScope { + machine_ids: vec![machine_id], + ..Default::default() + }; + let filtered = filter_inventory_by_scope(sample_inventory(), &scope); + + assert_eq!(filtered.machine_ids, vec![machine_id]); + assert_eq!(filtered.machines.len(), 1); + assert_eq!(filtered.machines[0].node_id, machine_id.to_string()); + assert!(filtered.switch_ids.is_empty()); + assert!(filtered.switches.is_empty()); + } + + #[test] + fn filter_inventory_by_scope_switches_only() { + let switch_id = test_switch_id(3); + let scope = MaintenanceScope { + switch_ids: vec![switch_id], + ..Default::default() + }; + let filtered = filter_inventory_by_scope(sample_inventory(), &scope); + + assert!(filtered.machine_ids.is_empty()); + assert!(filtered.machines.is_empty()); + assert_eq!(filtered.switch_ids, vec![switch_id]); + assert_eq!(filtered.switches.len(), 1); + assert_eq!(filtered.switches[0].node_id, switch_id.to_string()); + } + + #[test] + fn filter_inventory_by_scope_machines_and_switches() { + let machine_id = test_machine_id(2); + let switch_id = test_switch_id(4); + let scope = MaintenanceScope { + machine_ids: vec![machine_id], + switch_ids: vec![switch_id], + ..Default::default() + }; + let filtered = filter_inventory_by_scope(sample_inventory(), &scope); + + assert_eq!(filtered.machine_ids, vec![machine_id]); + assert_eq!(filtered.machines.len(), 1); + assert_eq!(filtered.machines[0].node_id, machine_id.to_string()); + assert_eq!(filtered.switch_ids, vec![switch_id]); + assert_eq!(filtered.switches.len(), 1); + assert_eq!(filtered.switches[0].node_id, switch_id.to_string()); + } + + #[test] + fn filter_inventory_by_scope_excludes_unknown_device_ids() { + let machine_id = test_machine_id(1); + let switch_id = test_switch_id(3); + let scope = MaintenanceScope { + machine_ids: vec![machine_id], + switch_ids: vec![switch_id], + ..Default::default() + }; + let mut inventory = sample_inventory(); + inventory + .machines + .push(test_device_info("not-a-machine-id")); + inventory.switches.push(test_device_info("not-a-switch-id")); + + let filtered = filter_inventory_by_scope(inventory, &scope); + + assert_eq!(filtered.machine_ids, vec![machine_id]); + assert_eq!(filtered.machines.len(), 1); + assert_eq!(filtered.machines[0].node_id, machine_id.to_string()); + assert_eq!(filtered.switch_ids, vec![switch_id]); + assert_eq!(filtered.switches.len(), 1); + assert_eq!(filtered.switches[0].node_id, switch_id.to_string()); + } + + #[test] + fn firmware_device_status_uses_batch_error_when_child_job_missing() { + let status = firmware_device_status( + test_device_info("node-1"), + Some("parent-job".to_string()), + &std::collections::HashMap::new(), + &std::collections::HashMap::new(), + Some("invalid SOT JSON"), + ); + + assert_eq!(status.status, "failed"); + assert_eq!(status.error_message.as_deref(), Some("invalid SOT JSON")); + } + // ── first_maintenance_state ───────────────────────────────────────── #[test] @@ -2051,6 +2094,8 @@ mod tests { activities: vec![MaintenanceActivity::FirmwareUpgrade { firmware_version: None, components: vec![], + access_token: None, + force_update: false, }], ..Default::default() }; @@ -2078,7 +2123,7 @@ mod tests { fn first_maintenance_state_only_nvos() { let scope = MaintenanceScope { activities: vec![MaintenanceActivity::NvosUpdate { - rack_firmware_id: Some("fw-nvos".into()), + firmware_object_id: Some("fw-nvos".into()), }], ..Default::default() }; @@ -2086,7 +2131,7 @@ mod tests { first_maintenance_state(&scope), RackMaintenanceState::NVOSUpdate { nvos_update: NvosUpdateState::Start { - rack_firmware_id: Some("fw-nvos".into()), + firmware_object_id: Some("fw-nvos".into()), }, }, ); @@ -2136,6 +2181,8 @@ mod tests { activities: vec![MaintenanceActivity::FirmwareUpgrade { firmware_version: None, components: vec![], + access_token: None, + force_update: false, }], ..Default::default() }; @@ -2147,7 +2194,7 @@ mod tests { fn implicit_nvos_update_not_requested_for_explicit_nvos() { let scope = MaintenanceScope { activities: vec![MaintenanceActivity::NvosUpdate { - rack_firmware_id: None, + firmware_object_id: None, }], ..Default::default() }; @@ -2173,6 +2220,8 @@ mod tests { MaintenanceActivity::FirmwareUpgrade { firmware_version: None, components: vec![], + access_token: None, + force_update: false, }, MaintenanceActivity::PowerSequence, ], @@ -2190,6 +2239,8 @@ mod tests { activities: vec![MaintenanceActivity::FirmwareUpgrade { firmware_version: None, components: vec![], + access_token: None, + force_update: false, }], ..Default::default() }; @@ -2206,9 +2257,11 @@ mod tests { MaintenanceActivity::FirmwareUpgrade { firmware_version: None, components: vec![], + access_token: None, + force_update: false, }, MaintenanceActivity::NvosUpdate { - rack_firmware_id: Some("fw-nvos".into()), + firmware_object_id: Some("fw-nvos".into()), }, ], ..Default::default() @@ -2217,7 +2270,7 @@ mod tests { next_state_after_firmware(&scope), RackMaintenanceState::NVOSUpdate { nvos_update: NvosUpdateState::Start { - rack_firmware_id: Some("fw-nvos".into()), + firmware_object_id: Some("fw-nvos".into()), }, }, ); @@ -2241,7 +2294,7 @@ mod tests { let scope = MaintenanceScope { activities: vec![ MaintenanceActivity::NvosUpdate { - rack_firmware_id: None, + firmware_object_id: None, }, MaintenanceActivity::PowerSequence, ], @@ -2273,6 +2326,8 @@ mod tests { MaintenanceActivity::FirmwareUpgrade { firmware_version: None, components: vec![], + access_token: None, + force_update: false, }, MaintenanceActivity::ConfigureNmxCluster, ], @@ -2283,98 +2338,4 @@ mod tests { RackMaintenanceState::Completed, ); } - - // ── filter_inventory_by_scope ─────────────────────────────────────── - - fn sample_inventory() -> RackFirmwareInventory { - let m1 = test_machine_id(1); - let m2 = test_machine_id(2); - let s1 = test_switch_id(1); - let s2 = test_switch_id(2); - RackFirmwareInventory { - machine_ids: vec![m1, m2], - switch_ids: vec![s1, s2], - machines: vec![ - test_device_info(&m1.to_string()), - test_device_info(&m2.to_string()), - ], - switches: vec![ - test_device_info(&s1.to_string()), - test_device_info(&s2.to_string()), - ], - } - } - - #[test] - fn filter_inventory_full_rack_is_noop() { - let inventory = sample_inventory(); - let scope = MaintenanceScope::default(); - let filtered = filter_inventory_by_scope(inventory, &scope); - assert_eq!(filtered.machine_ids.len(), 2); - assert_eq!(filtered.switch_ids.len(), 2); - assert_eq!(filtered.machines.len(), 2); - assert_eq!(filtered.switches.len(), 2); - } - - #[test] - fn filter_inventory_partial_machines_only() { - let inventory = sample_inventory(); - let m1 = test_machine_id(1); - let scope = MaintenanceScope { - machine_ids: vec![m1], - ..Default::default() - }; - let filtered = filter_inventory_by_scope(inventory, &scope); - assert_eq!(filtered.machine_ids, vec![m1]); - assert_eq!(filtered.machines.len(), 1); - assert_eq!(filtered.machines[0].node_id, m1.to_string()); - assert!(filtered.switch_ids.is_empty()); - assert!(filtered.switches.is_empty()); - } - - #[test] - fn filter_inventory_partial_switches_only() { - let inventory = sample_inventory(); - let s2 = test_switch_id(2); - let scope = MaintenanceScope { - switch_ids: vec![s2], - ..Default::default() - }; - let filtered = filter_inventory_by_scope(inventory, &scope); - assert!(filtered.machine_ids.is_empty()); - assert!(filtered.machines.is_empty()); - assert_eq!(filtered.switch_ids, vec![s2]); - assert_eq!(filtered.switches.len(), 1); - assert_eq!(filtered.switches[0].node_id, s2.to_string()); - } - - #[test] - fn filter_inventory_partial_both() { - let inventory = sample_inventory(); - let m2 = test_machine_id(2); - let s1 = test_switch_id(1); - let scope = MaintenanceScope { - machine_ids: vec![m2], - switch_ids: vec![s1], - ..Default::default() - }; - let filtered = filter_inventory_by_scope(inventory, &scope); - assert_eq!(filtered.machine_ids, vec![m2]); - assert_eq!(filtered.machines.len(), 1); - assert_eq!(filtered.switch_ids, vec![s1]); - assert_eq!(filtered.switches.len(), 1); - } - - #[test] - fn filter_inventory_unknown_id_excluded() { - let inventory = sample_inventory(); - let unknown = test_machine_id(99); - let scope = MaintenanceScope { - machine_ids: vec![unknown], - ..Default::default() - }; - let filtered = filter_inventory_by_scope(inventory, &scope); - assert!(filtered.machine_ids.is_empty()); - assert!(filtered.machines.is_empty()); - } } diff --git a/crates/api/src/tests/mod.rs b/crates/api/src/tests/mod.rs index 306ce3983b..af8893c89d 100644 --- a/crates/api/src/tests/mod.rs +++ b/crates/api/src/tests/mod.rs @@ -98,7 +98,6 @@ mod power_shelf_metadata; mod power_shelf_state_controller; mod prevent_duplicate_mac_addresses; mod rack_find; -mod rack_firmware; mod rack_health; mod rack_metadata; mod rack_state_controller; diff --git a/crates/api/src/tests/rack_firmware.rs b/crates/api/src/tests/rack_firmware.rs deleted file mode 100644 index 3918ea49e0..0000000000 --- a/crates/api/src/tests/rack_firmware.rs +++ /dev/null @@ -1,798 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -use common::api_fixtures::create_test_env; -use rpc::forge::{ - RackFirmwareCreateRequest, RackFirmwareDeleteRequest, RackFirmwareGetRequest, - RackFirmwareSearchFilter, -}; -use rpc::protos::forge::forge_server::Forge; - -use crate::tests::common; - -/// Helper function to create a valid rack firmware JSON config -fn create_valid_rack_firmware_json(id: &str) -> String { - serde_json::json!({ - "Id": id, - "Name": "Test Rack Firmware Config", - "Description": "A test configuration for rack firmware", - "BoardSKUs": [ - { - "SKUID": "sku-001", - "Name": "Compute Tray", - "Type": "ComputeTray", - "Components": { - "Firmware": [ - { - "Component": "BIOS", - "Bundle": "bios-bundle-v1.0", - "Version": "1.0.0", - "Locations": [ - { - "Location": "artifactory.example.com/bios/v1.0.0", - "LocationType": "Artifactory", - "Type": "Firmware" - } - ] - }, - { - "Component": "BMC", - "Bundle": "bmc-bundle-v2.0", - "Version": "2.0.0", - "Locations": [ - { - "Location": "artifactory.example.com/bmc/v2.0.0", - "LocationType": "Artifactory", - "Type": "Firmware" - } - ] - } - ] - } - }, - { - "SKUID": "sku-002", - "Name": "Power Shelf", - "Type": "PowerShelf", - "Components": { - "Firmware": [ - { - "Component": "PSU", - "Bundle": "psu-bundle-v1.5", - "Version": "1.5.0", - "Locations": [ - { - "Location": "artifactory.example.com/psu/v1.5.0", - "LocationType": "Artifactory", - "Type": "Firmware" - } - ] - } - ] - } - } - ] - }) - .to_string() -} - -fn create_valid_rack_firmware_json_with_switch_system_image(id: &str) -> String { - serde_json::json!({ - "Id": id, - "Name": "Test Rack Firmware Config With NVOS", - "Description": "A test configuration for rack firmware with switch system image", - "BoardSKUs": [ - { - "SKUID": "sku-001", - "Name": "Compute Tray", - "Type": "ComputeTray", - "Components": { - "Firmware": [ - { - "Component": "BIOS", - "Bundle": "bios-bundle-v1.0", - "Version": "1.0.0", - "Locations": [ - { - "Location": "artifactory.example.com/bios/v1.0.0", - "LocationType": "Artifactory", - "Type": "Firmware" - } - ] - } - ] - } - } - ], - "SwitchSystemImages": [ - { - "DeviceType": "Switch Tray", - "Component": "NVOS", - "Version": "25.02.2553", - "Type": "Prod", - "PackageName": "GB200NVL72_NVOS", - "Location": "https://urm.nvidia.com/artifactory/sw-nbu-sws-nvos-generic-local/release/25.02.2553/amd64/prod/nvos-amd64-25.02.2553.bin", - "LocationType": "HTTPS", - "Required": true - } - ] - }) - .to_string() -} - -fn create_valid_rack_firmware_json_with_nvos_software(id: &str) -> String { - serde_json::json!({ - "Id": id, - "Name": "Test Rack Firmware Config With NVOS Software", - "Description": "A test configuration for rack firmware with NVOS under Switch Tray software", - "BoardSKUs": [ - { - "SKUID": "switch-sku-001", - "Name": "P4978-Juliet_Switch", - "Type": "Switch Tray", - "Components": { - "Software": [ - { - "Component": "NVOS", - "Version": "25.02.2553", - "Type": "Prod", - "Locations": [ - { - "Name": "NVOS_Prod_AMD64", - "Location": "https://urm.nvidia.com/artifactory/sw-nbu-sws-nvos-generic-local/release/25.02.2553/amd64/prod/nvos-amd64-25.02.2553.bin", - "LocationType": "HTTPS", - "PackageName": "GB300_NVOS", - "Required": true - }, - { - "Name": "NVOS_OpenAPI_Spec", - "Location": "https://urm.nvidia.com/artifactory/sw-nbu-sws-nvos-generic-local/release/25.02.2553/openapi.json", - "LocationType": "HTTPS", - "PackageName": "GB300_NVOS", - "Required": true - } - ] - }, - { - "Component": "NVOS", - "Version": "25.02.2553", - "Type": "Dev", - "Locations": [ - { - "Name": "NVOS_Dev_AMD64", - "Location": "https://urm.nvidia.com/artifactory/sw-nbu-sws-nvos-generic-local/release/25.02.2553/amd64/dev/nvos-amd64-25.02.2553.bin", - "LocationType": "HTTPS", - "PackageName": "", - "Required": true - } - ] - } - ], - "Firmware": [] - } - } - ] - }) - .to_string() -} - -// ============================================================================ -// CREATE TESTS -// ============================================================================ - -#[crate::sqlx_test()] -async fn test_create_rack_firmware(pool: sqlx::PgPool) -> Result<(), Box> { - let env = create_test_env(pool).await; - - let firmware_id = "test-firmware-001"; - let config_json = create_valid_rack_firmware_json(firmware_id); - - let request = tonic::Request::new(RackFirmwareCreateRequest { - rack_hardware_type: Some(rpc::common::RackHardwareType { - value: "any".to_string(), - }), - config_json: config_json.clone(), - artifactory_token: "test-token-123".to_string(), - }); - - let response = env.api.create_rack_firmware(request).await?; - let firmware = response.into_inner(); - - // Verify response - assert_eq!(firmware.id, firmware_id); - assert!(!firmware.config_json.is_empty()); - assert!(!firmware.available); // Should default to false - assert!(firmware.is_default); // First firmware for a hardware type becomes default - assert!(!firmware.created.is_empty()); - assert!(!firmware.updated.is_empty()); - - // Verify database state - let db_firmware = db::rack_firmware::find_by_id(&env.pool, firmware_id).await?; - assert_eq!(db_firmware.id, firmware_id); - assert!(!db_firmware.available); - assert!(db_firmware.is_default); - assert!(db_firmware.parsed_components.is_some()); - - // Verify parsed components contain expected data - let parsed = db_firmware.parsed_components.unwrap(); - let board_skus = parsed["board_skus"].as_array().unwrap(); - assert_eq!(board_skus.len(), 2); - assert_eq!(board_skus[0]["sku_id"], "sku-001"); - assert_eq!(board_skus[1]["sku_id"], "sku-002"); - - Ok(()) -} - -#[crate::sqlx_test()] -async fn test_create_rack_firmware_with_switch_system_image( - pool: sqlx::PgPool, -) -> Result<(), Box> { - let env = create_test_env(pool).await; - - let firmware_id = "test-firmware-nvos-001"; - let config_json = create_valid_rack_firmware_json_with_switch_system_image(firmware_id); - - let request = tonic::Request::new(RackFirmwareCreateRequest { - rack_hardware_type: Some(rpc::common::RackHardwareType { - value: "any".to_string(), - }), - config_json, - artifactory_token: "test-token-123".to_string(), - }); - - let response = env.api.create_rack_firmware(request).await?; - let firmware = response.into_inner(); - - assert_eq!(firmware.id, firmware_id); - - let db_firmware = db::rack_firmware::find_by_id(&env.pool, firmware_id).await?; - let parsed = db_firmware.parsed_components.unwrap(); - let switch_system_images = parsed["switch_system_images"].as_array().unwrap(); - assert_eq!(switch_system_images.len(), 1); - assert_eq!(switch_system_images[0]["device_type"], "Switch Tray"); - assert_eq!(switch_system_images[0]["component"], "NVOS"); - assert_eq!(switch_system_images[0]["version"], "25.02.2553"); - assert_eq!(switch_system_images[0]["firmware_type"], "prod"); - assert_eq!(switch_system_images[0]["package_name"], "GB200NVL72_NVOS"); - assert_eq!( - switch_system_images[0]["image_filename"], - "nvos-amd64-25.02.2553.bin" - ); - - Ok(()) -} - -#[crate::sqlx_test()] -async fn test_create_rack_firmware_derives_nvos_from_switch_tray_software( - pool: sqlx::PgPool, -) -> Result<(), Box> { - let env = create_test_env(pool).await; - - let firmware_id = "test-firmware-nvos-software-001"; - let config_json = create_valid_rack_firmware_json_with_nvos_software(firmware_id); - - let request = tonic::Request::new(RackFirmwareCreateRequest { - rack_hardware_type: Some(rpc::common::RackHardwareType { - value: "any".to_string(), - }), - config_json, - artifactory_token: "test-token-123".to_string(), - }); - - let response = env.api.create_rack_firmware(request).await?; - let firmware = response.into_inner(); - - assert_eq!(firmware.id, firmware_id); - - let db_firmware = db::rack_firmware::find_by_id(&env.pool, firmware_id).await?; - let parsed = db_firmware.parsed_components.unwrap(); - let switch_system_images = parsed["switch_system_images"].as_array().unwrap(); - assert_eq!(switch_system_images.len(), 2); - - let prod = switch_system_images - .iter() - .find(|image| image["firmware_type"] == "prod") - .expect("expected prod NVOS image"); - assert_eq!(prod["device_type"], "Switch Tray"); - assert_eq!(prod["component"], "NVOS"); - assert_eq!(prod["version"], "25.02.2553"); - assert_eq!(prod["package_name"], "GB300_NVOS"); - assert_eq!(prod["image_filename"], "nvos-amd64-25.02.2553.bin"); - - let dev = switch_system_images - .iter() - .find(|image| image["firmware_type"] == "dev") - .expect("expected dev NVOS image"); - assert_eq!(dev["package_name"], "GB300_NVOS"); - assert_eq!(dev["image_filename"], "nvos-amd64-25.02.2553.bin"); - - Ok(()) -} - -#[crate::sqlx_test()] -async fn test_create_rack_firmware_rejects_switch_system_image_without_location( - pool: sqlx::PgPool, -) -> Result<(), Box> { - let env = create_test_env(pool).await; - - let firmware_id = "invalid-firmware-nvos-missing-location"; - let config_json = serde_json::json!({ - "Id": firmware_id, - "BoardSKUs": [], - "SwitchSystemImages": [ - { - "DeviceType": "Switch Tray", - "Component": "NVOS", - "Version": "25.02.2553", - "Type": "Prod", - "PackageName": "GB200NVL72_NVOS", - "LocationType": "HTTPS", - "Required": true - } - ] - }) - .to_string(); - - let request = tonic::Request::new(RackFirmwareCreateRequest { - rack_hardware_type: Some(rpc::common::RackHardwareType { - value: "any".to_string(), - }), - config_json, - artifactory_token: "test-token-123".to_string(), - }); - - let err = env.api.create_rack_firmware(request).await.unwrap_err(); - assert_eq!(err.code(), tonic::Code::InvalidArgument); - assert!(err.message().contains("Location is required")); - - Ok(()) -} - -#[crate::sqlx_test()] -async fn test_create_rack_firmware_rejects_switch_system_image_with_wrong_device_type( - pool: sqlx::PgPool, -) -> Result<(), Box> { - let env = create_test_env(pool).await; - - let firmware_id = "invalid-firmware-nvos-device-type"; - let config_json = serde_json::json!({ - "Id": firmware_id, - "BoardSKUs": [], - "SwitchSystemImages": [ - { - "DeviceType": "Compute Node", - "Component": "NVOS", - "Version": "25.02.2553", - "Type": "Prod", - "PackageName": "GB200NVL72_NVOS", - "Location": "https://urm.nvidia.com/artifactory/sw-nbu-sws-nvos-generic-local/release/25.02.2553/amd64/prod/nvos-amd64-25.02.2553.bin", - "LocationType": "HTTPS", - "Required": true - } - ] - }) - .to_string(); - - let request = tonic::Request::new(RackFirmwareCreateRequest { - rack_hardware_type: Some(rpc::common::RackHardwareType { - value: "any".to_string(), - }), - config_json, - artifactory_token: "test-token-123".to_string(), - }); - - let err = env.api.create_rack_firmware(request).await.unwrap_err(); - assert_eq!(err.code(), tonic::Code::InvalidArgument); - assert!(err.message().contains("DeviceType must be 'Switch Tray'")); - - Ok(()) -} - -#[crate::sqlx_test()] -async fn test_create_rack_firmware_rejects_switch_system_image_with_wrong_component( - pool: sqlx::PgPool, -) -> Result<(), Box> { - let env = create_test_env(pool).await; - - let firmware_id = "invalid-firmware-nvos-component"; - let config_json = serde_json::json!({ - "Id": firmware_id, - "BoardSKUs": [], - "SwitchSystemImages": [ - { - "DeviceType": "Switch Tray", - "Component": "SDK", - "Version": "25.02.2553", - "Type": "Prod", - "PackageName": "GB200NVL72_NVOS", - "Location": "https://urm.nvidia.com/artifactory/sw-nbu-sws-nvos-generic-local/release/25.02.2553/amd64/prod/nvos-amd64-25.02.2553.bin", - "LocationType": "HTTPS", - "Required": true - } - ] - }) - .to_string(); - - let request = tonic::Request::new(RackFirmwareCreateRequest { - rack_hardware_type: Some(rpc::common::RackHardwareType { - value: "any".to_string(), - }), - config_json, - artifactory_token: "test-token-123".to_string(), - }); - - let err = env.api.create_rack_firmware(request).await.unwrap_err(); - assert_eq!(err.code(), tonic::Code::InvalidArgument); - assert!(err.message().contains("Component must be 'NVOS'")); - - Ok(()) -} - -// ============================================================================ -// GET TESTS -// ============================================================================ - -#[crate::sqlx_test()] -async fn test_get_rack_firmware(pool: sqlx::PgPool) -> Result<(), Box> { - let env = create_test_env(pool).await; - - let firmware_id = "get-test-firmware-001"; - let config_json = create_valid_rack_firmware_json(firmware_id); - - // Create firmware first - let create_request = tonic::Request::new(RackFirmwareCreateRequest { - rack_hardware_type: Some(rpc::common::RackHardwareType { - value: "any".to_string(), - }), - config_json: config_json.clone(), - artifactory_token: "test-token".to_string(), - }); - env.api.create_rack_firmware(create_request).await?; - - // Now get it - let get_request = tonic::Request::new(RackFirmwareGetRequest { - id: firmware_id.to_string(), - }); - - let response = env.api.get_rack_firmware(get_request).await?; - let firmware = response.into_inner(); - - assert_eq!(firmware.id, firmware_id); - assert!(!firmware.config_json.is_empty()); - assert!(!firmware.available); - assert!(!firmware.created.is_empty()); - assert!(!firmware.updated.is_empty()); - - Ok(()) -} - -// ============================================================================ -// LIST TESTS -// ============================================================================ - -#[crate::sqlx_test()] -async fn test_list_rack_firmware_empty( - pool: sqlx::PgPool, -) -> Result<(), Box> { - let env = create_test_env(pool).await; - - let request = tonic::Request::new(RackFirmwareSearchFilter { - only_available: false, - rack_hardware_type: None, - }); - - let response = env.api.list_rack_firmware(request).await?; - let list = response.into_inner(); - - assert_eq!(list.configs.len(), 0); - - Ok(()) -} - -#[crate::sqlx_test()] -async fn test_list_rack_firmware_multiple( - pool: sqlx::PgPool, -) -> Result<(), Box> { - let env = create_test_env(pool).await; - - // Create multiple firmware configs - for i in 1..=3 { - let firmware_id = format!("list-test-firmware-{:03}", i); - let config_json = create_valid_rack_firmware_json(&firmware_id); - - let request = tonic::Request::new(RackFirmwareCreateRequest { - rack_hardware_type: Some(rpc::common::RackHardwareType { - value: "any".to_string(), - }), - config_json, - artifactory_token: format!("test-token-{}", i), - }); - env.api.create_rack_firmware(request).await?; - } - - // List all - let request = tonic::Request::new(RackFirmwareSearchFilter { - only_available: false, - rack_hardware_type: None, - }); - - let response = env.api.list_rack_firmware(request).await?; - let list = response.into_inner(); - - assert_eq!(list.configs.len(), 3); - - // Verify they're sorted by created DESC (newest first) - assert_eq!(list.configs[0].id, "list-test-firmware-003"); - assert_eq!(list.configs[1].id, "list-test-firmware-002"); - assert_eq!(list.configs[2].id, "list-test-firmware-001"); - - Ok(()) -} - -// ============================================================================ -// DELETE TESTS -// ============================================================================ - -#[crate::sqlx_test()] -async fn test_delete_rack_firmware(pool: sqlx::PgPool) -> Result<(), Box> { - let env = create_test_env(pool).await; - - let firmware_id = "delete-test-firmware-001"; - let config_json = create_valid_rack_firmware_json(firmware_id); - - // Create firmware - let create_request = tonic::Request::new(RackFirmwareCreateRequest { - rack_hardware_type: Some(rpc::common::RackHardwareType { - value: "any".to_string(), - }), - config_json, - artifactory_token: "test-token".to_string(), - }); - env.api.create_rack_firmware(create_request).await?; - - // Verify it exists - let firmware = db::rack_firmware::find_by_id(&env.pool, firmware_id).await; - assert!(firmware.is_ok()); - - // Delete it - let delete_request = tonic::Request::new(RackFirmwareDeleteRequest { - id: firmware_id.to_string(), - }); - env.api.delete_rack_firmware(delete_request).await?; - - // Verify it's gone - let firmware = db::rack_firmware::find_by_id(&env.pool, firmware_id).await; - assert!(firmware.is_err()); - - Ok(()) -} - -// ============================================================================ -// INTEGRATION TESTS -// ============================================================================ - -#[crate::sqlx_test()] -async fn test_rack_firmware_full_lifecycle( - pool: sqlx::PgPool, -) -> Result<(), Box> { - let env = create_test_env(pool).await; - - let firmware_id = "lifecycle-test-001"; - let config_json = create_valid_rack_firmware_json(firmware_id); - - // 1. Create - let create_request = tonic::Request::new(RackFirmwareCreateRequest { - rack_hardware_type: Some(rpc::common::RackHardwareType { - value: "any".to_string(), - }), - config_json: config_json.clone(), - artifactory_token: "test-token".to_string(), - }); - let create_response = env.api.create_rack_firmware(create_request).await?; - let created_firmware = create_response.into_inner(); - assert_eq!(created_firmware.id, firmware_id); - assert!(!created_firmware.available); - - // 2. Get - let get_request = tonic::Request::new(RackFirmwareGetRequest { - id: firmware_id.to_string(), - }); - let get_response = env.api.get_rack_firmware(get_request).await?; - let retrieved_firmware = get_response.into_inner(); - assert_eq!(retrieved_firmware.id, firmware_id); - - // 3. List (should contain our firmware) - let list_request = tonic::Request::new(RackFirmwareSearchFilter { - only_available: false, - rack_hardware_type: None, - }); - let list_response = env.api.list_rack_firmware(list_request).await?; - let list = list_response.into_inner(); - assert_eq!(list.configs.len(), 1); - assert_eq!(list.configs[0].id, firmware_id); - - // 4. Update availability in database - let mut txn = env.pool.begin().await?; - db::rack_firmware::set_available(&mut txn, firmware_id, true).await?; - txn.commit().await?; - - // 5. Verify availability changed - let get_request = tonic::Request::new(RackFirmwareGetRequest { - id: firmware_id.to_string(), - }); - let get_response = env.api.get_rack_firmware(get_request).await?; - let updated_firmware = get_response.into_inner(); - assert!(updated_firmware.available); - - // 6. Delete - let delete_request = tonic::Request::new(RackFirmwareDeleteRequest { - id: firmware_id.to_string(), - }); - env.api.delete_rack_firmware(delete_request).await?; - - // 7. Verify deleted - let get_request = tonic::Request::new(RackFirmwareGetRequest { - id: firmware_id.to_string(), - }); - let err = env - .api - .get_rack_firmware(get_request) - .await - .expect_err("Should not find deleted firmware"); - assert_eq!(err.code(), tonic::Code::NotFound); - - Ok(()) -} - -#[crate::sqlx_test()] -async fn test_rack_firmware_with_multiple_components( - pool: sqlx::PgPool, -) -> Result<(), Box> { - let env = create_test_env(pool).await; - - let firmware_id = "multi-component-001"; - let config_json = serde_json::json!({ - "Id": firmware_id, - "Name": "Multi-Component Configuration", - "Version": "2.5.0", - "Description": "A firmware configuration with multiple board SKUs and components", - "BoardSKUs": [ - { - "SKUID": "sku-compute-001", - "Name": "GB200 Compute Tray", - "Type": "ComputeTray", - "Components": { - "Firmware": [ - { - "Component": "BIOS", - "Bundle": "bios-gb200-v3.0", - "Version": "3.0.5", - "Locations": [ - { - "Location": "artifactory.nvidia.com/firmware/bios/gb200/v3.0.5", - "LocationType": "Artifactory", - "Type": "Firmware" - } - ] - }, - { - "Component": "BMC", - "Bundle": "bmc-gb200-v2.1", - "Version": "2.1.3", - "Locations": [ - { - "Location": "artifactory.nvidia.com/firmware/bmc/gb200/v2.1.3", - "LocationType": "Artifactory", - "Type": "Firmware" - } - ] - }, - { - "Component": "NIC", - "Bundle": "nic-cx7-v1.8", - "Version": "1.8.2", - "Locations": [ - { - "Location": "artifactory.nvidia.com/firmware/nic/cx7/v1.8.2", - "LocationType": "Artifactory", - "Type": "Firmware" - } - ] - } - ] - } - }, - { - "SKUID": "sku-power-001", - "Name": "Power Shelf", - "Type": "PowerShelf", - "Components": { - "Firmware": [ - { - "Component": "PSU", - "Bundle": "psu-v2.0", - "Version": "2.0.1", - "Locations": [ - { - "Location": "artifactory.nvidia.com/firmware/psu/v2.0.1", - "LocationType": "Artifactory", - "Type": "Firmware" - } - ] - } - ] - } - }, - { - "SKUID": "sku-switch-001", - "Name": "NVLink Switch", - "Type": "NVLinkSwitch", - "Components": { - "Firmware": [ - { - "Component": "SwitchOS", - "Bundle": "nvlink-switch-v4.0", - "Version": "4.0.0", - "Locations": [ - { - "Location": "artifactory.nvidia.com/firmware/nvlink/v4.0.0", - "LocationType": "Artifactory", - "Type": "Firmware" - } - ] - } - ] - } - } - ] - }) - .to_string(); - - let request = tonic::Request::new(RackFirmwareCreateRequest { - rack_hardware_type: Some(rpc::common::RackHardwareType { - value: "any".to_string(), - }), - config_json, - artifactory_token: "test-token".to_string(), - }); - - let response = env.api.create_rack_firmware(request).await?; - let firmware = response.into_inner(); - - assert_eq!(firmware.id, firmware_id); - - // Verify parsed components - let db_firmware = db::rack_firmware::find_by_id(&env.pool, firmware_id).await?; - assert!(db_firmware.parsed_components.is_some()); - - let parsed = db_firmware.parsed_components.unwrap(); - let board_skus = parsed["board_skus"].as_array().unwrap(); - assert_eq!(board_skus.len(), 3); - - // Verify SKU details - assert_eq!(board_skus[0]["sku_id"], "sku-compute-001"); - assert_eq!(board_skus[0]["name"], "GB200 Compute Tray"); - assert_eq!(board_skus[0]["sku_type"], "ComputeTray"); - - let firmware_components = board_skus[0]["firmware_components"].as_array().unwrap(); - assert_eq!(firmware_components.len(), 3); // BIOS, BMC, NIC - - assert_eq!(board_skus[1]["sku_id"], "sku-power-001"); - assert_eq!(board_skus[2]["sku_id"], "sku-switch-001"); - - Ok(()) -} diff --git a/crates/api/src/tests/rack_state_controller/handler.rs b/crates/api/src/tests/rack_state_controller/handler.rs index 8009cd9e02..8707c0d594 100644 --- a/crates/api/src/tests/rack_state_controller/handler.rs +++ b/crates/api/src/tests/rack_state_controller/handler.rs @@ -36,7 +36,6 @@ use model::rack_type::{ RackHardwareClass, RackHardwareType, RackProfile, RackProfileConfig, }; use model::switch::{NewSwitch, SwitchConfig}; -use serde_json::json; use tonic::Request; use crate::state_controller::db_write_batch::DbWriteBatch; @@ -159,58 +158,6 @@ pub(crate) fn config_with_rack_profiles() -> crate::cfg::file::CarbideConfig { config } -fn default_lookup_table_json() -> serde_json::Value { - json!({ - "devices": { - "Compute Node": { - "HMC_prod": { - "filename": "hmc-prod.bin", - "target": "/redfish/v1/Chassis/HGX_Chassis_0", - "component": "HMC", - "bundle": "bundle-hmc", - "firmware_type": "prod", - "version": "1.0.0" - }, - "BMC_prod": { - "filename": "bmc-prod.bin", - "target": "FW_BMC_0", - "component": "BMC", - "bundle": "bundle-bmc", - "firmware_type": "prod", - "version": "1.0.0" - } - } - } - }) -} - -async fn insert_default_rack_firmware( - pool: &sqlx::PgPool, - firmware_id: &str, - rack_hardware_type: RackHardwareType, - available: bool, -) { - let mut txn = pool.begin().await.unwrap(); - db::rack_firmware::create( - &mut txn, - firmware_id, - rack_hardware_type, - json!({ "Id": firmware_id }), - Some(default_lookup_table_json()), - ) - .await - .unwrap(); - if available { - db::rack_firmware::set_available(&mut txn, firmware_id, true) - .await - .unwrap(); - } - db::rack_firmware::set_default(&mut txn, firmware_id) - .await - .unwrap(); - txn.commit().await.unwrap(); -} - async fn create_single_compute_rack( env: &TestEnv, pool: &sqlx::PgPool, @@ -396,36 +343,6 @@ async fn create_expected_rack(pool: &sqlx::PgPool, rack_id: &RackId, rack_profil db_expected_rack::create(&mut txn, &er).await.unwrap(); } -async fn create_default_nvos_rack_firmware(pool: &sqlx::PgPool, firmware_id: &str) { - let mut txn = pool.acquire().await.unwrap(); - sqlx::query( - "INSERT INTO rack_firmware \ - (id, rack_hardware_type, config, parsed_components, available, is_default) \ - VALUES ($1, $2, $3::jsonb, $4::jsonb, true, true)", - ) - .bind(firmware_id) - .bind(RackHardwareType::any()) - .bind(sqlx::types::Json(json!({ "Id": firmware_id }))) - .bind(sqlx::types::Json(json!({ - "devices": {}, - "switch_system_images": { - "Switch Tray": { - "NVOS_prod": { - "component": "NVOS", - "package_name": "GB200NVL72_NVOS", - "version": "25.02.2553", - "image_filename": "nvos-amd64-25.02.2553.bin", - "location_type": "HTTPS", - "firmware_type": "prod" - } - } - } - }))) - .execute(txn.as_mut()) - .await - .unwrap(); -} - pub(crate) fn new_machine_id(seed: u8) -> MachineId { let mut hash = [0u8; 32]; hash[0] = seed; @@ -455,7 +372,7 @@ async fn test_on_demand_rack_maintenance_schedules_nvos_only_scope( activity: Some( rpc::forge::maintenance_activity_config::Activity::NvosUpdate( rpc::forge::NvosUpdateActivity { - rack_firmware_id: "fw-nvos".to_string(), + firmware_object_id: "fw-nvos".to_string(), }, ), ), @@ -475,7 +392,7 @@ async fn test_on_demand_rack_maintenance_schedules_nvos_only_scope( assert!(matches!( &scope.activities[0], MaintenanceActivity::NvosUpdate { - rack_firmware_id: Some(id) + firmware_object_id: Some(id) } if id == "fw-nvos" )); @@ -544,8 +461,10 @@ async fn test_on_demand_rack_maintenance_schedules_firmware_and_nvos_scope( activity: Some( rpc::forge::maintenance_activity_config::Activity::FirmwareUpgrade( rpc::forge::FirmwareUpgradeActivity { - firmware_version: "fw-mixed".to_string(), + firmware_version: r#"{"Id":"fw-mixed"}"#.to_string(), components: vec!["BMC".to_string()], + access_token: Some("token".to_string()), + force_update: false, }, ), ), @@ -554,7 +473,7 @@ async fn test_on_demand_rack_maintenance_schedules_firmware_and_nvos_scope( activity: Some( rpc::forge::maintenance_activity_config::Activity::NvosUpdate( rpc::forge::NvosUpdateActivity { - rack_firmware_id: "fw-mixed".to_string(), + firmware_object_id: "fw-mixed".to_string(), }, ), ), @@ -577,18 +496,58 @@ async fn test_on_demand_rack_maintenance_schedules_firmware_and_nvos_scope( MaintenanceActivity::FirmwareUpgrade { firmware_version: Some(id), components, - } if id == "fw-mixed" && components == &vec!["BMC".to_string()] + .. + } if id == r#"{"Id":"fw-mixed"}"# && components == &vec!["BMC".to_string()] )); assert!(matches!( &scope.activities[1], MaintenanceActivity::NvosUpdate { - rack_firmware_id: Some(id) + firmware_object_id: Some(id) } if id == "fw-mixed" )); Ok(()) } +#[crate::sqlx_test] +async fn test_on_demand_rack_maintenance_rejects_firmware_without_access_token( + pool: sqlx::PgPool, +) -> Result<(), Box> { + let env = create_test_env_with_overrides(pool.clone(), TestEnvOverrides::default()).await; + let (rack_id, switch_id) = create_ready_rack_with_switch(&env, &pool).await?; + + let err = crate::handlers::rack::on_demand_rack_maintenance( + env.api.as_ref(), + Request::new(rpc::forge::RackMaintenanceOnDemandRequest { + rack_id: Some(rack_id.clone()), + scope: Some(rpc::forge::RackMaintenanceScope { + machine_ids: vec![], + switch_ids: vec![switch_id.to_string()], + power_shelf_ids: vec![], + activities: vec![rpc::forge::MaintenanceActivityConfig { + activity: Some( + rpc::forge::maintenance_activity_config::Activity::FirmwareUpgrade( + rpc::forge::FirmwareUpgradeActivity { + firmware_version: r#"{"Id":"fw-mixed"}"#.to_string(), + components: vec!["BMC".to_string()], + access_token: None, + force_update: false, + }, + ), + ), + }], + }), + }), + ) + .await + .expect_err("firmware-upgrade should require access_token"); + + assert_eq!(err.code(), tonic::Code::InvalidArgument); + assert!(err.message().contains("access_token")); + + Ok(()) +} + /// test_expected_no_definition_stays_parked verifies that a rack without an /// expected_rack record stays in Created and does not advance. #[crate::sqlx_test] @@ -1258,12 +1217,11 @@ async fn test_ready_with_no_labels_stays_ready( Ok(()) } -/// test_firmware_upgrade_start_without_default_advances_to_nvos_update -/// verifies that maintenance skips firmware flashing when no default firmware -/// exists for the rack hardware type and continues through NVOS update before -/// ConfigureNmxCluster. +/// test_firmware_upgrade_start_skips_without_json verifies that +/// Maintenance::FirmwareUpgrade(Start) skips firmware flashing when no +/// firmware object JSON source is configured for rack maintenance. #[crate::sqlx_test] -async fn test_firmware_upgrade_start_without_default_advances_to_nvos_update( +async fn test_firmware_upgrade_start_skips_without_json( pool: sqlx::PgPool, ) -> Result<(), Box> { let env = create_test_env_with_overrides( @@ -1275,90 +1233,7 @@ async fn test_firmware_upgrade_start_without_default_advances_to_nvos_update( ) .await; let (rack_id, host) = create_single_compute_rack(&env, &pool).await?; - let mut rack = get_db_rack(env.db_reader().as_mut(), &rack_id).await; - let handler_instance = RackStateHandler::default(); - let mut services = env.rack_state_handler_services(); - let mut metrics = RackMetrics::default(); - let mut db_writes = DbWriteBatch::default(); - let mut ctx = StateHandlerContext:: { - services: &mut services, - metrics: &mut metrics, - pending_db_writes: &mut db_writes, - }; - - let fw_state = RackState::Maintenance { - maintenance_state: RackMaintenanceState::FirmwareUpgrade { - rack_firmware_upgrade: FirmwareUpgradeState::Start, - }, - }; - let mut outcome = handler_instance - .handle_object_state(&rack_id, &mut rack, &fw_state, &mut ctx) - .await?; - if let Some(txn) = outcome.take_transaction() { - txn.commit().await?; - } - - match outcome { - StateHandlerOutcome::Transition { next_state, .. } => { - assert!( - matches!( - next_state, - RackState::Maintenance { - maintenance_state: RackMaintenanceState::NVOSUpdate { - nvos_update: NvosUpdateState::Start { - rack_firmware_id: None, - }, - }, - } - ), - "FirmwareUpgrade(Start) should skip firmware and advance to NVOSUpdate, got {:?}", - next_state - ); - } - other => panic!( - "Expected Transition, got {:?}", - std::mem::discriminant(&other) - ), - } - - assert!(env.rms_sim.submitted_firmware_requests().await.is_empty()); - let machine = db::machine::find_one( - &pool, - &host.host_snapshot.id, - model::machine::machine_search_config::MachineSearchConfig::default(), - ) - .await? - .expect("machine should exist"); - assert!(machine.host_reprovision_requested.is_none()); - - Ok(()) -} - -/// test_firmware_upgrade_start_with_unavailable_default_advances_to_nvos_update -/// verifies that maintenance skips firmware flashing when a default firmware -/// exists for the hardware type but is not yet available, then continues -/// through NVOS update before ConfigureNmxCluster. -#[crate::sqlx_test] -async fn test_firmware_upgrade_start_with_unavailable_default_advances_to_nvos_update( - pool: sqlx::PgPool, -) -> Result<(), Box> { - let env = create_test_env_with_overrides( - pool.clone(), - TestEnvOverrides { - config: Some(config_with_rack_profiles()), - ..Default::default() - }, - ) - .await; - let (rack_id, host) = create_single_compute_rack(&env, &pool).await?; - insert_default_rack_firmware( - &pool, - "fw-default-unavailable", - RackHardwareType::any(), - false, - ) - .await; let mut rack = get_db_rack(env.db_reader().as_mut(), &rack_id).await; let handler_instance = RackStateHandler::default(); @@ -1391,121 +1266,12 @@ async fn test_firmware_upgrade_start_with_unavailable_default_advances_to_nvos_u RackState::Maintenance { maintenance_state: RackMaintenanceState::NVOSUpdate { nvos_update: NvosUpdateState::Start { - rack_firmware_id: None, + firmware_object_id: None, }, }, } ), - "FirmwareUpgrade(Start) should skip unavailable firmware and advance to NVOSUpdate, got {:?}", - next_state - ); - } - other => panic!( - "Expected Transition, got {:?}", - std::mem::discriminant(&other) - ), - } - - assert!(env.rms_sim.submitted_firmware_requests().await.is_empty()); - let machine = db::machine::find_one( - &pool, - &host.host_snapshot.id, - model::machine::machine_search_config::MachineSearchConfig::default(), - ) - .await? - .expect("machine should exist"); - assert!(machine.host_reprovision_requested.is_none()); - - Ok(()) -} - -/// test_firmware_upgrade_start_transitions_to_wait_for_complete verifies that -/// Maintenance::FirmwareUpgrade(Start) transitions to WaitForComplete. -#[crate::sqlx_test] -async fn test_firmware_upgrade_start_transitions_to_wait_for_complete( - pool: sqlx::PgPool, -) -> Result<(), Box> { - let env = create_test_env_with_overrides( - pool.clone(), - TestEnvOverrides { - config: Some(config_with_rack_profiles()), - ..Default::default() - }, - ) - .await; - let (rack_id, host) = create_single_compute_rack(&env, &pool).await?; - insert_default_rack_firmware(&pool, "fw-default", RackHardwareType::any(), true).await; - { - let mut txn = pool.begin().await?; - db::machine::update_rack_fw_details( - txn.as_mut(), - &host.host_snapshot.id, - Some(&model::rack::RackFirmwareUpgradeStatus { - task_id: "stale-rack-job".to_string(), - status: RackFirmwareUpgradeState::Completed, - started_at: Some(chrono::Utc::now() - chrono::Duration::minutes(10)), - ended_at: Some(chrono::Utc::now() - chrono::Duration::minutes(9)), - }), - ) - .await?; - txn.commit().await?; - } - env.rms_sim - .queue_update_firmware_response( - librms::protos::rack_manager::UpdateFirmwareByDeviceListResponse { - response: Some(librms::protos::rack_manager::NodeBatchResponse { - status: librms::protos::rack_manager::ReturnCode::Success as i32, - message: "queued".to_string(), - total_nodes: 1, - successful_nodes: 1, - failed_nodes: 0, - job_id: "batch-job-1".to_string(), - ..Default::default() - }), - node_jobs: vec![librms::protos::rack_manager::NodeFirmwareJobInfo { - node_id: host.host_snapshot.id.to_string(), - job_id: "child-job-1".to_string(), - }], - }, - ) - .await; - - let mut rack = get_db_rack(env.db_reader().as_mut(), &rack_id).await; - - let handler_instance = RackStateHandler::default(); - let mut services = env.rack_state_handler_services(); - let mut metrics = RackMetrics::default(); - let mut db_writes = DbWriteBatch::default(); - let mut ctx = StateHandlerContext:: { - services: &mut services, - metrics: &mut metrics, - pending_db_writes: &mut db_writes, - }; - - let fw_state = RackState::Maintenance { - maintenance_state: RackMaintenanceState::FirmwareUpgrade { - rack_firmware_upgrade: FirmwareUpgradeState::Start, - }, - }; - let mut outcome = handler_instance - .handle_object_state(&rack_id, &mut rack, &fw_state, &mut ctx) - .await?; - if let Some(txn) = outcome.take_transaction() { - txn.commit().await?; - } - - match outcome { - StateHandlerOutcome::Transition { next_state, .. } => { - assert!( - matches!( - next_state, - RackState::Maintenance { - maintenance_state: RackMaintenanceState::FirmwareUpgrade { - rack_firmware_upgrade: FirmwareUpgradeState::WaitForComplete, - }, - } - ), - "FirmwareUpgrade(Start) should transition to WaitForComplete, got {:?}", + "FirmwareUpgrade(Start) should skip firmware without JSON and advance to NVOSUpdate, got {:?}", next_state ); } @@ -1515,23 +1281,12 @@ async fn test_firmware_upgrade_start_transitions_to_wait_for_complete( ), } - let requests = env.rms_sim.submitted_firmware_requests().await; - assert_eq!(requests.len(), 1); - assert!(requests[0].activate); - assert_eq!(requests[0].nodes.as_ref().unwrap().devices.len(), 1); - assert_eq!( - requests[0].nodes.as_ref().unwrap().devices[0].node_id, - host.host_snapshot.id.to_string() + let requests = env.rms_sim.submitted_apply_firmware_object_requests().await; + assert!( + requests.is_empty(), + "firmware apply should not be submitted without a firmware object JSON source" ); - let persisted_rack = get_db_rack(env.db_reader().as_mut(), &rack_id).await; - let job = persisted_rack - .firmware_upgrade_job - .expect("rack firmware job should be persisted"); - assert_eq!(job.batch_job_ids, vec!["batch-job-1".to_string()]); - assert_eq!(job.machines.len(), 1); - assert_eq!(job.machines[0].job_id.as_deref(), Some("child-job-1")); - let machine = db::machine::find_one( &pool, &host.host_snapshot.id, @@ -1539,22 +1294,7 @@ async fn test_firmware_upgrade_start_transitions_to_wait_for_complete( ) .await? .expect("machine should exist"); - assert!(machine.host_reprovision_requested.is_some()); - assert!( - machine.rack_fw_details.is_none(), - "rack firmware details should be cleared at the start of a new rack firmware cycle" - ); - assert!( - job.started_at.is_some_and(|started_at| { - started_at - >= machine - .host_reprovision_requested - .as_ref() - .expect("rack reprovision request should exist") - .requested_at - }), - "rack firmware job start time should be at or after the rack reprovision request" - ); + assert!(machine.host_reprovision_requested.is_none()); Ok(()) } @@ -2095,7 +1835,7 @@ async fn test_firmware_upgrade_wait_for_complete_retries_on_transient_poll_error /// test_nvos_update_start_transitions_to_wait_for_complete verifies that /// Maintenance::NVOSUpdate(Start) transitions to WaitForComplete when a -/// default NVOS-capable rack_firmware entry exists. +/// default NVOS-capable firmware object is available through RMS. #[crate::sqlx_test] async fn test_nvos_update_start_transitions_to_wait_for_complete( pool: sqlx::PgPool, @@ -2118,7 +1858,7 @@ async fn test_nvos_update_start_transitions_to_wait_for_complete( maintenance_requested: Some(MaintenanceScope { switch_ids: vec![switch_id], activities: vec![MaintenanceActivity::NvosUpdate { - rack_firmware_id: Some("fw-nvos-default".to_string()), + firmware_object_id: Some("fw-nvos-default".to_string()), }], ..Default::default() }), @@ -2128,10 +1868,9 @@ async fn test_nvos_update_start_transitions_to_wait_for_complete( db_rack::update(&mut txn, &rack_id, &config).await?; drop(txn); - create_default_nvos_rack_firmware(&pool, "fw-nvos-default").await; env.rms_sim - .queue_update_switch_system_image_response( - librms::protos::rack_manager::UpdateSwitchSystemImageResponse { + .queue_apply_switch_system_image_response( + librms::protos::rack_manager::ApplySwitchSystemImageResponse { response: Some(librms::protos::rack_manager::NodeBatchResponse { status: librms::protos::rack_manager::ReturnCode::Success as i32, job_id: "nvos-job-1".to_string(), @@ -2157,7 +1896,7 @@ async fn test_nvos_update_start_transitions_to_wait_for_complete( let nvos_state = RackState::Maintenance { maintenance_state: RackMaintenanceState::NVOSUpdate { nvos_update: NvosUpdateState::Start { - rack_firmware_id: Some("fw-nvos-default".to_string()), + firmware_object_id: Some("fw-nvos-default".to_string()), }, }, }; @@ -2171,7 +1910,7 @@ async fn test_nvos_update_start_transitions_to_wait_for_complete( ); assert!( !env.rms_sim - .submitted_switch_system_image_requests() + .submitted_apply_switch_system_image_requests() .await .is_empty(), "NVOSUpdate(Start) should submit a switch system image request to RMS" diff --git a/crates/api/src/tests/rack_state_controller/mod.rs b/crates/api/src/tests/rack_state_controller/mod.rs index 3c97a96f5b..25ee9dd7b7 100644 --- a/crates/api/src/tests/rack_state_controller/mod.rs +++ b/crates/api/src/tests/rack_state_controller/mod.rs @@ -98,7 +98,7 @@ impl StateHandler for TestRackStateHandler { RackMaintenanceState::FirmwareUpgrade { .. } => RackState::Maintenance { maintenance_state: RackMaintenanceState::NVOSUpdate { nvos_update: NvosUpdateState::Start { - rack_firmware_id: None, + firmware_object_id: None, }, }, }, @@ -420,7 +420,7 @@ async fn test_can_retrieve_rack_state_history_with_real_handler( let expected = vec![ "{\"state\": \"discovering\"}", "{\"state\": \"maintenance\", \"maintenance_state\": {\"FirmwareUpgrade\": {\"rack_firmware_upgrade\": \"Start\"}}}", - "{\"state\": \"maintenance\", \"maintenance_state\": {\"NVOSUpdate\": {\"nvos_update\": {\"Start\": {\"rack_firmware_id\": null}}}}}", + "{\"state\": \"maintenance\", \"maintenance_state\": {\"NVOSUpdate\": {\"nvos_update\": {\"Start\": {\"firmware_object_id\": null}}}}}", "{\"state\": \"maintenance\", \"maintenance_state\": {\"ConfigureNmxCluster\": {\"configure_nmx_cluster\": \"Start\"}}}", "{\"state\": \"maintenance\", \"maintenance_state\": {\"PowerSequence\": {\"rack_power\": \"PoweringOn\"}}}", "{\"state\": \"maintenance\", \"maintenance_state\": \"Completed\"}", diff --git a/crates/component-manager/src/rms.rs b/crates/component-manager/src/rms.rs index 7ba5c97cab..b096cc511c 100644 --- a/crates/component-manager/src/rms.rs +++ b/crates/component-manager/src/rms.rs @@ -439,24 +439,24 @@ impl PowerShelfManager for RmsBackend { } } -/// Query all rack firmware IDs from the database. -async fn list_rack_firmware_ids(db: &PgPool) -> Result, ComponentManagerError> { - let mut conn = db.acquire().await.map_err(|e| { - ComponentManagerError::Internal(format!("failed to acquire DB connection: {e}")) - })?; - - let filter = model::rack_firmware::RackFirmwareSearchFilter { - only_available: false, - rack_hardware_type: None, - }; - - let firmwares = db::rack_firmware::list_all(&mut conn, filter) +/// Query all firmware object IDs from RMS. +async fn list_firmware_object_ids( + client: &dyn RmsApi, +) -> Result, ComponentManagerError> { + let response = client + .list_firmware_objects(rms::ListFirmwareObjectsRequest { + metadata: None, + only_available: false, + hardware_type: String::new(), + }) .await .map_err(|e| { - ComponentManagerError::Internal(format!("failed to list rack firmware: {e}")) + ComponentManagerError::Internal(format!( + "failed to list firmware objects from RMS: {e}" + )) })?; - Ok(firmwares.into_iter().map(|fw| fw.id).collect()) + Ok(response.objects.into_iter().map(|fw| fw.id).collect()) } /// Map NvSwitchComponent to a firmware target name used by RMS. @@ -684,7 +684,7 @@ impl NvSwitchManager for RmsBackend { #[instrument(skip(self), fields(backend = "rms"))] async fn list_firmware_bundles(&self) -> Result, ComponentManagerError> { - list_rack_firmware_ids(&self.db).await + list_firmware_object_ids(self.client.as_ref()).await } } @@ -1290,9 +1290,15 @@ mod tests { } #[carbide_macros::sqlx_test] - async fn list_firmware_bundles_empty_db(pool: sqlx::PgPool) { - let (_mock, backend, _, _, _, _, _) = make_backend(&pool).await; + async fn list_firmware_bundles_empty_rms(pool: sqlx::PgPool) { + let (mock, backend, _, _, _, _, _) = make_backend(&pool).await; + mock.enqueue_list_firmware_objects(Ok(rms::ListFirmwareObjectsResponse { + objects: Vec::new(), + })) + .await; + let bundles = backend.list_firmware_bundles().await.unwrap(); + assert!(bundles.is_empty()); } } diff --git a/crates/component-manager/src/state_controller/nv_switch.rs b/crates/component-manager/src/state_controller/nv_switch.rs index 63c71b0652..539d5fafee 100644 --- a/crates/component-manager/src/state_controller/nv_switch.rs +++ b/crates/component-manager/src/state_controller/nv_switch.rs @@ -186,6 +186,8 @@ impl NvSwitchManager for StateControllerNvSwitch { MaintenanceActivity::FirmwareUpgrade { firmware_version, components: vec![], + access_token: None, + force_update: false, }, ) .await diff --git a/crates/component-manager/src/state_controller/power_shelf.rs b/crates/component-manager/src/state_controller/power_shelf.rs index aab9d17072..f7aafeb43d 100644 --- a/crates/component-manager/src/state_controller/power_shelf.rs +++ b/crates/component-manager/src/state_controller/power_shelf.rs @@ -191,6 +191,8 @@ impl PowerShelfManager for StateControllerPowerShelf { MaintenanceActivity::FirmwareUpgrade { firmware_version, components: vec![], + access_token: None, + force_update: false, }, ) .await diff --git a/crates/rpc/build.rs b/crates/rpc/build.rs index ec6fa45475..b44801c48a 100644 --- a/crates/rpc/build.rs +++ b/crates/rpc/build.rs @@ -701,11 +701,6 @@ fn main() -> Result<(), Box> { "SkuStatus", "#[derive(serde::Serialize, serde::Deserialize)]", ) - .type_attribute("forge.RackFirmware", "#[derive(serde::Serialize)]") - .type_attribute("forge.RackFirmwareList", "#[derive(serde::Serialize)]") - .type_attribute("forge.RackFirmwareHistoryRecord", "#[derive(serde::Serialize)]") - .type_attribute("forge.RackFirmwareHistoryRecords", "#[derive(serde::Serialize)]") - .type_attribute("forge.RackFirmwareHistoryResponse", "#[derive(serde::Serialize)]") .type_attribute("common.RackHardwareType", "#[derive(serde::Serialize)]") .type_attribute("forge.RackCapabilitiesSet", "#[derive(serde::Serialize)]") .type_attribute("forge.RackCapabilityCompute", "#[derive(serde::Serialize)]") diff --git a/crates/rpc/proto/forge.proto b/crates/rpc/proto/forge.proto index acc106a9d7..7e06a31875 100644 --- a/crates/rpc/proto/forge.proto +++ b/crates/rpc/proto/forge.proto @@ -403,23 +403,6 @@ service Forge { // Get the BMC credentials for all expected machines rpc GetAllExpectedMachines(google.protobuf.Empty) returns (ExpectedMachineList); - // Rack Firmware Management - // Create or update a Rack firmware configuration - rpc CreateRackFirmware(RackFirmwareCreateRequest) returns (RackFirmware); - // Get a Rack firmware configuration by ID - rpc GetRackFirmware(RackFirmwareGetRequest) returns (RackFirmware); - // List all Rack firmware configurations - rpc ListRackFirmware(RackFirmwareSearchFilter) returns (RackFirmwareList); - // Delete a Rack firmware configuration - rpc DeleteRackFirmware(RackFirmwareDeleteRequest) returns (google.protobuf.Empty); - // Apply firmware to all devices in a rack - rpc ApplyRackFirmware(RackFirmwareApplyRequest) returns (RackFirmwareApplyResponse); - // Check the status of an async firmware update job - rpc GetRackFirmwareJobStatus(RackFirmwareJobStatusRequest) returns (RackFirmwareJobStatusResponse); - // Get the history of rack firmware apply operations - rpc GetRackFirmwareHistory(RackFirmwareHistoryRequest) returns (RackFirmwareHistoryResponse); - rpc RackFirmwareSetDefault(RackFirmwareSetDefaultRequest) returns (google.protobuf.Empty); - // Replace all expected machines in site rpc ReplaceAllExpectedMachines(ExpectedMachineList) returns (google.protobuf.Empty); // Delete all expected machines in site @@ -5836,17 +5819,23 @@ message MachineValidationOnDemandResponse { } message FirmwareUpgradeActivity { - // Target firmware version. When empty, RMS uses its default/latest version. + // SOT JSON string passed to the backend. + // Explicit firmware-upgrade maintenance requests must set this and access_token. + // Implicit all-activity maintenance skips firmware update when no JSON is configured. string firmware_version = 1; // Firmware components to update (e.g. "BMC", "CPLD", "BIOS"). // When empty, all components are updated. repeated string components = 2; + // Used only by the backend for artifact download when firmware_version contains SOT JSON. + // NICo does not persist it. + optional string access_token = 3; + bool force_update = 4; } message NvosUpdateActivity { - // Rack firmware entry containing the switch system image to install. - // When empty, the default rack firmware for the rack hardware type is used. - string rack_firmware_id = 1; + // Firmware object containing the switch system image to install. + // When empty, the default firmware object for the rack hardware type is used. + string firmware_object_id = 1; } message ConfigureNmxClusterActivity { @@ -7840,118 +7829,6 @@ message MachinePositionInfo { optional common.PowerShelfId power_shelf_id = 7; } -// Rack Firmware Management Messages - -message RackFirmware { - string id = 1; - string config_json = 2; - bool available = 3; - string created = 4; - string updated = 5; - string parsed_components = 6; // JSON string of firmware lookup table - common.RackHardwareType rack_hardware_type = 7; - bool is_default = 8; -} - -message RackFirmwareSetDefaultRequest { - string firmware_id = 1; -} - -message FirmwareComponentInfo { - string device_type = 1; - string component = 2; - string bundle = 3; - string filename = 4; - string target = 5; - string firmware_type = 6; // "prod" or "dev" -} - -message RackFirmwareCreateRequest { - string config_json = 1; - string artifactory_token = 2; - common.RackHardwareType rack_hardware_type = 3; -} - -message RackFirmwareGetRequest { - string id = 1; -} - -message RackFirmwareSearchFilter { - bool only_available = 1; - common.RackHardwareType rack_hardware_type = 2; // Optional: filter by rack hardware type. Unspecified means no filter. -} - -message RackFirmwareList { - repeated RackFirmware configs = 1; -} - -message RackFirmwareDeleteRequest { - string id = 1; -} - -message RackFirmwareApplyRequest { - common.RackId rack_id = 1; - string firmware_id = 2; - string firmware_type = 3; // "dev" or "prod" -} - -message RackFirmwareApplyResponse { - int32 total_updates = 1; - int32 successful_updates = 2; - int32 failed_updates = 3; - repeated DeviceUpdateResult device_results = 4; -} - -message DeviceUpdateResult { - string device_id = 1; - string device_type = 2; - bool success = 3; - string message = 4; - string job_id = 5; // Parent job ID for async firmware updates (use with RMS GetFirmwareJobStatus) - repeated NodeJobInfo node_jobs = 6; // Per-node job IDs for tracking individual node progress -} - -message NodeJobInfo { - string node_id = 1; - string job_id = 2; -} - -message RackFirmwareJobStatusRequest { - string job_id = 1; -} - -message RackFirmwareJobStatusResponse { - string job_id = 1; - string state = 2; // QUEUED, RUNNING, COMPLETED, FAILED - string state_description = 3; // Human-readable description - string rack_id = 4; - string node_id = 5; - string error_message = 6; // Populated if state is FAILED - string result_json = 7; // Detailed result data on completion -} - -message RackFirmwareHistoryRequest { - string firmware_id = 1; // Optional: filter by firmware ID. Empty string means no filter. - repeated string rack_ids = 2; // Optional: filter by rack IDs. Empty list returns all racks. -} - -message RackFirmwareHistoryResponse { - map histories = 1; // Keyed by rack ID -} - -message RackFirmwareHistoryRecords { - repeated RackFirmwareHistoryRecord records = 1; -} - -message RackFirmwareHistoryRecord { - string firmware_id = 1; - string rack_id = 2; - string firmware_type = 3; - string applied_at = 4; - bool firmware_available = 5; // Whether the firmware ID still exists and is available - common.RackHardwareType rack_hardware_type = 6; -} - message ModifyDPFStateRequest { common.MachineId machine_id = 1; bool dpf_enabled = 2; @@ -8131,9 +8008,9 @@ message UpdatePowerShelfFirmwareTarget { repeated PowerShelfComponent components = 2; } -// Whole-rack firmware update target. All eligible devices in each listed -// rack are updated according to the SOT identified by `target_version`. -message UpdateRackFirmwareTarget { +// Whole-rack firmware update target. All eligible compute and switch devices +// in each listed rack are updated. +message UpdateFirmwareObjectTarget { RackIdList rack_ids = 1; } @@ -8141,10 +8018,19 @@ message UpdateComponentFirmwareRequest { oneof target { UpdateComputeTrayFirmwareTarget compute_trays = 1; UpdateSwitchFirmwareTarget switches = 2; + // powershelf firmware update currently not implemented by state machine and backend UpdatePowerShelfFirmwareTarget power_shelves = 3; - UpdateRackFirmwareTarget racks = 5; + UpdateFirmwareObjectTarget racks = 5; } + + // Target firmware version for direct component-manager dispatch. + // For targets routed through rack maintenance, this carries SOT JSON for + // the backend firmware apply path. string target_version = 4; + // Required for firmware targets routed through rack maintenance. + // Rejected for direct dispatch paths and currently unsupported for power shelves. + optional string access_token = 6; + bool force_update = 7; } message UpdateComponentFirmwareResponse { diff --git a/crates/rpc/src/model/mod.rs b/crates/rpc/src/model/mod.rs index c75bbe2610..114d2ab5e3 100644 --- a/crates/rpc/src/model/mod.rs +++ b/crates/rpc/src/model/mod.rs @@ -53,7 +53,6 @@ pub mod os; pub mod power_manager; pub mod power_shelf; pub mod rack; -pub mod rack_firmware; pub mod rack_type; pub mod redfish; pub mod resource_pool; diff --git a/crates/rpc/src/model/rack_firmware.rs b/crates/rpc/src/model/rack_firmware.rs deleted file mode 100644 index 26fb2c9bc4..0000000000 --- a/crates/rpc/src/model/rack_firmware.rs +++ /dev/null @@ -1,129 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -use model::rack_firmware::{ - RackFirmware, RackFirmwareApplyHistoryRecord, RackFirmwareSearchFilter, -}; -use model::rack_type::RackHardwareType; - -use crate as rpc; - -impl From<&RackFirmware> for rpc::forge::RackFirmware { - fn from(db: &RackFirmware) -> Self { - let parsed_components = db - .parsed_components - .as_ref() - .map(|p| p.0.to_string()) - .unwrap_or_else(|| "{}".to_string()); - - rpc::forge::RackFirmware { - id: db.id.clone(), - config_json: db.config.0.to_string(), - available: db.available, - created: db.created.format("%Y-%m-%d %H:%M:%S").to_string(), - updated: db.updated.format("%Y-%m-%d %H:%M:%S").to_string(), - parsed_components, - rack_hardware_type: Some(db.rack_hardware_type.clone().into()), - is_default: db.is_default, - } - } -} - -impl From for RackFirmwareSearchFilter { - fn from(filter: rpc::forge::RackFirmwareSearchFilter) -> Self { - let rack_hardware_type = filter - .rack_hardware_type - .filter(|t| !t.value.is_empty()) - .map(RackHardwareType::from); - - RackFirmwareSearchFilter { - only_available: filter.only_available, - rack_hardware_type, - } - } -} - -impl From for rpc::forge::RackFirmwareHistoryRecord { - fn from(record: RackFirmwareApplyHistoryRecord) -> Self { - rpc::forge::RackFirmwareHistoryRecord { - firmware_id: record.firmware_id, - rack_id: record.rack_id, - firmware_type: record.firmware_type, - applied_at: record.applied_at.format("%Y-%m-%d %H:%M:%S").to_string(), - firmware_available: record.firmware_available, - rack_hardware_type: Some(record.rack_hardware_type.into()), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_search_filter_from_proto_with_hardware_type() { - let proto = rpc::forge::RackFirmwareSearchFilter { - only_available: true, - rack_hardware_type: Some(rpc::common::RackHardwareType { - value: "any".to_string(), - }), - }; - let filter = RackFirmwareSearchFilter::from(proto); - assert!(filter.only_available); - assert_eq!(filter.rack_hardware_type, Some(RackHardwareType::any())); - } - - #[test] - fn test_search_filter_from_proto_none_becomes_none() { - let proto = rpc::forge::RackFirmwareSearchFilter { - only_available: false, - rack_hardware_type: None, - }; - let filter = RackFirmwareSearchFilter::from(proto); - assert!(!filter.only_available); - assert_eq!(filter.rack_hardware_type, None); - } - - #[test] - fn test_search_filter_from_proto_empty_value_becomes_none() { - let proto = rpc::forge::RackFirmwareSearchFilter { - only_available: false, - rack_hardware_type: Some(rpc::common::RackHardwareType { - value: String::new(), - }), - }; - let filter = RackFirmwareSearchFilter::from(proto); - assert!(!filter.only_available); - assert_eq!(filter.rack_hardware_type, None); - } - - #[test] - fn test_search_filter_from_proto_specific_type() { - let proto = rpc::forge::RackFirmwareSearchFilter { - only_available: true, - rack_hardware_type: Some(rpc::common::RackHardwareType { - value: "dsx_gb200nvl_72x1".to_string(), - }), - }; - let filter = RackFirmwareSearchFilter::from(proto); - assert!(filter.only_available); - assert_eq!( - filter.rack_hardware_type, - Some(RackHardwareType::from("dsx_gb200nvl_72x1")) - ); - } -} diff --git a/crates/secrets/src/credentials.rs b/crates/secrets/src/credentials.rs index fc9bf474ed..1b956f7661 100644 --- a/crates/secrets/src/credentials.rs +++ b/crates/secrets/src/credentials.rs @@ -385,9 +385,6 @@ pub enum CredentialKey { NmxM { nmxm_id: String, }, - RackFirmware { - firmware_id: String, - }, SwitchNvosAdmin { bmc_mac_address: MacAddress, }, @@ -418,7 +415,6 @@ pub enum CredentialPrefix { BmcCredentials, ExtensionService, NmxM, - RackFirmware, SwitchNvosAdmin, MqttAuth, MachineIdentityEncryptionKey, @@ -440,7 +436,6 @@ impl CredentialPrefix { Self::BmcCredentials => "machines/bmc/", Self::ExtensionService => "machines/extension-services/", Self::NmxM => "nmxm/", - Self::RackFirmware => "rack_firmware/", Self::SwitchNvosAdmin => "switch_nvos/", Self::MqttAuth => "mqtt/", Self::MachineIdentityEncryptionKey => "machine_identity/", @@ -461,7 +456,6 @@ impl CredentialPrefix { Self::BmcCredentials, Self::ExtensionService, Self::NmxM, - Self::RackFirmware, Self::SwitchNvosAdmin, Self::MqttAuth, Self::MachineIdentityEncryptionKey, @@ -485,7 +479,6 @@ impl CredentialKey { Self::BmcCredentials { .. } => CredentialPrefix::BmcCredentials, Self::ExtensionService { .. } => CredentialPrefix::ExtensionService, Self::NmxM { .. } => CredentialPrefix::NmxM, - Self::RackFirmware { .. } => CredentialPrefix::RackFirmware, Self::SwitchNvosAdmin { .. } => CredentialPrefix::SwitchNvosAdmin, Self::MqttAuth { .. } => CredentialPrefix::MqttAuth, Self::MachineIdentityEncryptionKey { .. } => { @@ -564,9 +557,6 @@ impl CredentialKey { "machines/extension-services/{service_id}/versions/{version}/credential" )), CredentialKey::NmxM { nmxm_id } => Cow::from(format!("nmxm/{nmxm_id}/auth")), - CredentialKey::RackFirmware { firmware_id } => { - Cow::from(format!("rack_firmware/{firmware_id}/token")) - } CredentialKey::SwitchNvosAdmin { bmc_mac_address } => { Cow::from(format!("switch_nvos/{bmc_mac_address}/admin")) } @@ -779,12 +769,6 @@ mod tests { }, "nmxm/", ), - ( - CredentialKey::RackFirmware { - firmware_id: "fw1".to_string(), - }, - "rack_firmware/", - ), ( CredentialKey::SwitchNvosAdmin { bmc_mac_address: mac, @@ -868,9 +852,6 @@ mod tests { CredentialKey::NmxM { nmxm_id: "n".to_string(), }, - CredentialKey::RackFirmware { - firmware_id: "f".to_string(), - }, CredentialKey::SwitchNvosAdmin { bmc_mac_address: mac, }, @@ -899,6 +880,6 @@ mod tests { #[test] fn prefix_all_is_complete() { let all = CredentialPrefix::all(); - assert_eq!(all.len(), 15); + assert_eq!(all.len(), 14); } } diff --git a/crates/secrets/src/local_credentials/mod.rs b/crates/secrets/src/local_credentials/mod.rs index c8c0c6d047..0650afaa31 100644 --- a/crates/secrets/src/local_credentials/mod.rs +++ b/crates/secrets/src/local_credentials/mod.rs @@ -332,15 +332,10 @@ mod tests { #[test] fn snapshot_unsupported_keys_return_none() { let snap = populated_snapshot(); - let keys: Vec = vec![ - CredentialKey::ExtensionService { - service_id: "svc".to_string(), - version: "1".to_string(), - }, - CredentialKey::RackFirmware { - firmware_id: "fw".to_string(), - }, - ]; + let keys: Vec = vec![CredentialKey::ExtensionService { + service_id: "svc".to_string(), + version: "1".to_string(), + }]; for key in &keys { assert_eq!(snap.get_credentials(key), None, "expected None for {key:?}"); }