diff --git a/Cargo.lock b/Cargo.lock index ddc3790b01f..c0de1ea261d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7100,6 +7100,7 @@ dependencies = [ "omicron-workspace-hack", "schemars 0.8.22", "serde", + "sled-hardware-types", "trust-quorum-types", "uuid", ] diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index e0be6d58ccd..13b1abdd862 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -579,9 +579,11 @@ struct TrustQuorumArgs { } #[derive(Debug, Subcommand)] +#[allow(clippy::large_enum_variant)] enum TrustQuorumCommands { GetConfig(TrustQuorumConfigArgs), LrtqUpgrade, + RemoveSled(TrustQuorumRemoveSledArgs), } #[derive(Debug, Clone, Copy, Args)] @@ -590,6 +592,15 @@ struct TrustQuorumConfigArgs { epoch: TrustQuorumEpochOrLatest, } +#[derive(Debug, Args)] +struct TrustQuorumRemoveSledArgs { + // remove is _extremely_ dangerous, so we also require a database + // connection to perform some safety checks + #[clap(flatten)] + db_url_opts: DbUrlOptions, + sled_id: SledUuid, +} + #[derive(Debug, Clone, Copy)] pub(crate) enum TrustQuorumEpochOrLatest { Latest, @@ -912,6 +923,15 @@ impl NexusArgs { let token = omdb.check_allow_destructive()?; cmd_nexus_trust_quorum_lrtq_upgrade(&client, token).await } + NexusCommands::TrustQuorum(TrustQuorumArgs { + command: TrustQuorumCommands::RemoveSled(args), + }) => { + let token = omdb.check_allow_destructive()?; + cmd_nexus_trust_quorum_remove_sled( + &client, args, omdb, log, token, + ) + .await + } NexusCommands::UpdateStatus(args) => { cmd_nexus_update_status(&client, args).await } @@ -4320,6 +4340,13 @@ async fn cmd_nexus_sled_expunge_with_datastore( } } + eprintln!( + "WARNING: Are you sure that you have removed this sled from the latest \ + trust quorum configuration for rack {}?. Please double check with: \ + `omdb nexus trust-quorum get-config latest`\n", + sled.rack_id + ); + eprintln!( "WARNING: This operation will PERMANENTLY and IRRECOVABLY mark sled \ {} ({}) expunged. To proceed, type the sled's serial number.", @@ -4562,6 +4589,108 @@ async fn cmd_nexus_trust_quorum_lrtq_upgrade( Ok(()) } +async fn cmd_nexus_trust_quorum_remove_sled( + client: &nexus_lockstep_client::Client, + args: &TrustQuorumRemoveSledArgs, + omdb: &Omdb, + log: &slog::Logger, + destruction_token: DestructiveOperationToken, +) -> Result<(), anyhow::Error> { + let datastore = args.db_url_opts.connect(omdb, log).await?; + let result = cmd_nexus_trust_quorum_remove_sled_with_datastore( + &datastore, + client, + args, + log, + destruction_token, + ) + .await; + datastore.terminate().await; + result +} + +// `omdb nexus trust-quorum remove-sled`, but borrowing a datastore +async fn cmd_nexus_trust_quorum_remove_sled_with_datastore( + datastore: &Arc, + client: &nexus_lockstep_client::Client, + args: &TrustQuorumRemoveSledArgs, + log: &slog::Logger, + _destruction_token: DestructiveOperationToken, +) -> Result<(), anyhow::Error> { + use nexus_db_queries::context::OpContext; + let opctx = OpContext::for_tests(log.clone(), datastore.clone()); + let opctx = &opctx; + + // First, we need to look up the sled so we know its serial number. + let (_authz_sled, sled) = LookupPath::new(opctx, datastore) + .sled_id(args.sled_id) + .fetch() + .await + .with_context(|| format!("failed to find sled {}", args.sled_id))?; + + // Helper to get confirmation messages from the user. + let mut prompt = ConfirmationPrompt::new(); + + println!( + "WARNING: This is step 1 of the process to expunge a sled. If you \ + remove a sled from the trust quorum and reboot it, it will not be able \ + to unlock its storage and participate in the control plane. However, \ + the Reconfigurator will not yet know the sled is expunged and may \ + still try to use it." + ); + + println!( + "Therefore, you must treat this action in conjunction with a reboot as \ + the software equivalent of physically removing the sled from the rack \ + before expungement." + ); + + println!( + "After this sled is removed from the trust quorum, you must reboot it \ + and expunge it to complete the process." + ); + + println!( + "WARNING: This operation will PERMANENTLY and IRRECOVABLY remove sled \ + {} ({}) from the trust-quorum for rack {}. To proceed, type the \ + sled's serial number.", + args.sled_id, + sled.serial_number(), + sled.rack_id + ); + prompt.read_and_validate("sled serial number", sled.serial_number())?; + + println!( + "About to start the trust quorum reconfiguration to remove the sled." + ); + + println!( + "If this operation fails with a timeout, please check the latest trust \ + quorum configuration to see whether or not to proceed with rack reboot \ + and expungement." + ); + + println!( + "You can poll the trust quorum reconfiguration with \ + `omdb nexus trust-quorum get-config `\n" + ); + + println!( + "Once the trust quorum configuration is committed, please reboot \ + the sled and proceed to call `omdb nexus sled expunge`.\n" + ); + + let epoch = client + .trust_quorum_remove_sled(&args.sled_id.into_untyped_uuid()) + .await + .context("trust quorum remove sled")? + .into_inner(); + + println!("Started trust quorum reconfiguration at epoch {epoch}\n"); + + Ok(()) +} + /// Runs `omdb nexus support-bundles create` async fn cmd_nexus_support_bundles_create( client: &nexus_lockstep_client::Client, diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index afa28295d69..f3416eadde6 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -15,6 +15,7 @@ omicron-rpaths.workspace = true [dependencies] anyhow.workspace = true +async-bb8-diesel.workspace = true assert_matches.workspace = true async-trait.workspace = true base64.workspace = true @@ -34,6 +35,7 @@ crucible-pantry-client.workspace = true crucible-common.workspace = true dns-service-client.workspace = true dpd-client.workspace = true +diesel.workspace = true ereport-types.workspace = true mg-admin-client.workspace = true dropshot.workspace = true diff --git a/nexus/lockstep-api/Cargo.toml b/nexus/lockstep-api/Cargo.toml index 4a194d19154..54bd51fccb2 100644 --- a/nexus/lockstep-api/Cargo.toml +++ b/nexus/lockstep-api/Cargo.toml @@ -16,5 +16,6 @@ omicron-uuid-kinds.workspace = true omicron-workspace-hack.workspace = true schemars.workspace = true serde.workspace = true +sled-hardware-types.workspace = true trust-quorum-types.workspace = true uuid.workspace = true diff --git a/nexus/lockstep-api/src/lib.rs b/nexus/lockstep-api/src/lib.rs index 8e6a7d8f460..db83d4f8c3f 100644 --- a/nexus/lockstep-api/src/lib.rs +++ b/nexus/lockstep-api/src/lib.rs @@ -588,6 +588,21 @@ pub trait NexusLockstepApi { async fn trust_quorum_lrtq_upgrade( rqctx: RequestContext, ) -> Result, HttpError>; + + /// Remove a sled from the trust quorum + /// + /// This is a required first step towards expunging a sled + /// + /// Return the epoch of the proposed configuration so it can be polled + /// asynchronously. + #[endpoint { + method = POST, + path = "/trust-quorum/remove/{sled}" + }] + async fn trust_quorum_remove_sled( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError>; } /// Path parameters for Rack requests. diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index 28c1a5dafb9..8028a33721d 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -28,10 +28,12 @@ use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::InstanceUuid; use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::PropolisUuid; +use omicron_uuid_kinds::RackUuid; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; use sled_agent_client::Client as SledAgentClient; use sled_agent_types::inventory::SledRole; +use sled_hardware_types::BaseboardId; use std::net::SocketAddrV6; use std::sync::Arc; use uuid::Uuid; @@ -109,8 +111,45 @@ impl super::Nexus { sled_id: SledUuid, ) -> Result { let sled_lookup = self.sled_lookup(opctx, &sled_id)?; - let (authz_sled,) = - sled_lookup.lookup_for(authz::Action::Modify).await?; + let (authz_sled, sled) = + sled_lookup.fetch_for(authz::Action::Modify).await?; + + let rack_id = RackUuid::from_untyped_uuid(sled.rack_id); + + // If the sled still exists in the latest committed trust quorum + // configuration, it cannot be expunged. + // + // This is just a sanity check. There is an inherent TOCTUO here, since + // we aren't combining the check with with the policy change inside + // a transaction. However, it is unlikely that an operator would be + // creating a different trust quorum configuration while trying to + // expunge this sled, since both have to be done from omdb by an oxide + // employee right now. + // + // When we add an external API, we'll probably want to come back and add + // some extra safeguards including checking that there are no ongoing + // configurations currently, as the last committed configuration can be + // different from the current configuration. However, if we checked that + // here we'd have to do yet more DB lookups to check that the state of + // the last configuration wasn't aborted. This is probably good enough + // for now given that the user already has to confirm a bunch of prompts + // before kicking off the operation. + let (tq_latest_committed_config, _) = self + .tq_load_latest_possible_committed_config(opctx, rack_id) + .await?; + let baseboard_id = BaseboardId { + part_number: sled.part_number().to_string(), + serial_number: sled.serial_number().to_string(), + }; + if tq_latest_committed_config.members.contains_key(&baseboard_id) { + return Err(Error::conflict(format!( + "Cannot expunge sled {sled_id}, as its baseboard \ + {baseboard_id} is still a member of the latest committed \ + trust quorum configuration at epoch {} for rack {rack_id}", + tq_latest_committed_config.epoch + ))); + } + let prev_policy = self .db_datastore .sled_set_policy_to_expunged(opctx, &authz_sled) diff --git a/nexus/src/app/trust_quorum.rs b/nexus/src/app/trust_quorum.rs index 97f4b614471..f1776837620 100644 --- a/nexus/src/app/trust_quorum.rs +++ b/nexus/src/app/trust_quorum.rs @@ -9,7 +9,7 @@ use nexus_types::trust_quorum::{ IsLrtqUpgrade, ProposedTrustQuorumConfig, TrustQuorumConfig, }; use omicron_common::api::external::Error; -use omicron_uuid_kinds::{GenericUuid, RackUuid}; +use omicron_uuid_kinds::{GenericUuid, RackUuid, SledUuid}; use sled_hardware_types::BaseboardId; use std::collections::BTreeSet; use std::time::Duration; @@ -27,8 +27,9 @@ impl super::Nexus { rack_id: RackUuid, new_sleds: BTreeSet, ) -> Result { - let (latest_committed_config, latest_epoch) = - self.load_latest_possible_committed_config(opctx, rack_id).await?; + let (latest_committed_config, latest_epoch) = self + .tq_load_latest_possible_committed_config(opctx, rack_id) + .await?; let new_epoch = latest_epoch.next(); let proposed = self .add_sleds_proposed_config( @@ -75,6 +76,75 @@ impl super::Nexus { Ok(new_config) } + /// Remove a sled from the trust quorum + /// + /// This will trigger a trust quorum reconfiguration. This is a required + /// first step towards expunging a sled. + /// + /// Returns the epoch of the proposed configuration so it can be polled + /// asynchronously. + pub(crate) async fn tq_remove_sled( + &self, + opctx: &OpContext, + sled_id: SledUuid, + ) -> Result { + // Look up the sled to get its rack_id and baseboard_id + let (.., sled) = self.sled_lookup(opctx, &sled_id)?.fetch().await?; + let rack_id = RackUuid::from_untyped_uuid(sled.rack_id); + let sled_to_remove = BaseboardId { + part_number: sled.part_number().to_string(), + serial_number: sled.serial_number().to_string(), + }; + + let (latest_committed_config, latest_epoch) = self + .tq_load_latest_possible_committed_config(opctx, rack_id) + .await?; + let new_epoch = latest_epoch.next(); + let proposed = self + .remove_sled_proposed_config( + latest_committed_config, + new_epoch, + sled_to_remove, + ) + .await?; + self.db_datastore.tq_insert_latest_config(opctx, proposed).await?; + + // Read back the real configuration from the database. Importantly this + // includes a chosen coordinator. + let Some(new_config) = + self.db_datastore.tq_get_config(opctx, rack_id, new_epoch).await? + else { + return Err(Error::internal_error(&format!( + "Cannot retrieve newly inserted trust quorum \ + configuration for rack {rack_id}, epoch {new_epoch}." + ))); + }; + + // Retrieve the sled for the coordinator + let client = self + .get_coordinator_client( + opctx, + rack_id, + new_epoch, + &new_config.coordinator, + ) + .await?; + + // Now send the reconfiguration request to the coordinator. We do + // this directly in the API handler because this is a non-idempotent + // operation and we only want to issue it once. + let req = trust_quorum_types::messages::ReconfigureMsg { + rack_id: new_config.rack_id, + epoch: new_config.epoch, + last_committed_epoch: new_config.last_committed_epoch, + members: new_config.members.keys().cloned().collect(), + threshold: new_config.threshold, + }; + client.trust_quorum_reconfigure(&req).await?; + + Ok(new_epoch) + } + /// Abort the latest trust quorum configuration if it is still in the /// preparing state. pub(crate) async fn tq_abort_latest_config( @@ -281,6 +351,36 @@ impl super::Nexus { )) } + // Create a new `ProposedTrustQuorumConfig` with `sled_to_remove` removed + // from the membership. Return an error if the sled is not a member of the + // `latest_committed_config`. + async fn remove_sled_proposed_config( + &self, + latest_committed_config: TrustQuorumConfig, + new_epoch: Epoch, + sled_to_remove: BaseboardId, + ) -> Result { + let rack_id = latest_committed_config.rack_id; + let mut members: BTreeSet<_> = + latest_committed_config.members.keys().cloned().collect(); + + if !members.remove(&sled_to_remove) { + return Err(Error::invalid_request(format!( + "Sled {} is not a member of the trust quorum.", + sled_to_remove + ))); + } + + Ok(ProposedTrustQuorumConfig { + rack_id, + epoch: new_epoch, + is_lrtq_upgrade: IsLrtqUpgrade::No { + last_committed_epoch: latest_committed_config.epoch, + }, + members, + }) + } + // Create a new `ProposedTrustQuorumConfig` including `new_sleds` in // the membership. Return an error if any of the new sleds exist in the // `latest_committed_config` membership already. @@ -322,7 +422,7 @@ impl super::Nexus { // Note that the configuration that comes back may not actually be committed // yet if it's the latest configuration. That is ok because we will perform // validation during insert of the any newly proposed config. - async fn load_latest_possible_committed_config( + pub async fn tq_load_latest_possible_committed_config( &self, opctx: &OpContext, rack_id: RackUuid, diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index 070afa9314d..efd00284b81 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -19,6 +19,7 @@ mod saga_interface; pub use app::Nexus; pub use app::test_interfaces::TestInterfaces; +use async_bb8_diesel::AsyncRunQueryDsl; use context::ApiContext; use context::ServerContext; use dropshot::ConfigDropshot; @@ -26,11 +27,13 @@ use external_api::http_entrypoints::external_api; use internal_api::http_entrypoints::internal_api; use lockstep_api::http_entrypoints::lockstep_api; use nexus_config::NexusConfig; +use nexus_db_model::HwBaseboardId; use nexus_db_model::RendezvousDebugDataset; use nexus_db_queries::db; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintZoneType; use nexus_types::deployment::blueprint_zone_type; +use nexus_types::internal_api::params::InitialTrustQuorumConfig; use nexus_types::internal_api::params::{ PhysicalDiskPutRequest, ZpoolPutRequest, }; @@ -47,11 +50,14 @@ use omicron_uuid_kinds::BlueprintUuid; use omicron_uuid_kinds::DatasetUuid; use oximeter::types::ProducerRegistry; use oximeter_producer::Server as ProducerServer; +use sled_hardware_types::BaseboardId; use slog::Logger; +use std::collections::BTreeSet; use std::collections::HashMap; use std::net::{Ipv4Addr, SocketAddr, SocketAddrV6}; use std::sync::Arc; use tokio::sync::watch; +use uuid::Uuid; #[macro_use] extern crate slog; @@ -349,6 +355,41 @@ impl nexus_test_interface::NexusServer for Server { // However, RSS isn't running, so we'll do the handoff ourselves. let opctx = internal_server.apictx.context.nexus.opctx_for_internal_api(); + let datastore = internal_server.apictx.context.nexus.datastore(); + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + // We need to insert hw_baseboard_ids that match the fake hardware we + // use for our fake trust quorum configuration. + let tq_members = BTreeSet::from([ + BaseboardId { + part_number: "_test_fake_tq_9adfa".into(), + serial_number: "a1".into(), + }, + BaseboardId { + part_number: "_test_fake_9adfa".into(), + serial_number: "a2".into(), + }, + BaseboardId { + part_number: "_test_fake_tq_9adfa".into(), + serial_number: "a3".into(), + }, + ]); + + let hw_baseboard_ids: Vec<_> = tq_members + .iter() + .cloned() + .map(|id| HwBaseboardId { + id: Uuid::new_v4(), + part_number: id.part_number, + serial_number: id.serial_number, + }) + .collect(); + use nexus_db_schema::schema::hw_baseboard_id::dsl; + diesel::insert_into(dsl::hw_baseboard_id) + .values(hw_baseboard_ids.clone()) + .execute_async(&*conn) + .await + .unwrap(); // Allocation of initial external IP addresses is a little funny. In // a real system, it'd be allocated by RSS and provided with the rack @@ -413,7 +454,30 @@ impl nexus_test_interface::NexusServer for Server { bfd: Vec::new(), }, allowed_source_ips: AllowedSourceIps::Any, - initial_trust_quorum_configuration: None, + // Insert a fake trust quorum config such that existing + // sleds will never be present. + // + // The purpose of this is solely for expunge testing. We + // don't do any trust quorum testing in `NexusServer` based + // tests, but we do test expunging sleds. The second part + // of exunging a sled, as used in these tests, relies on + // the sled not being part of the current trust quorum. + // This gets checked in the production nexus code path. By + // inserting an fake config as the latest committed config + // we can ensure that no sled used by this test will every + // be present in a trust quorum config and that the existing + // expunge tests will pass. + initial_trust_quorum_configuration: Some( + InitialTrustQuorumConfig { + // We need at least 3 members + members: tq_members, + // Coordinator must be one of the members + coordinator: BaseboardId { + part_number: "_test_fake_tq_9adfa".into(), + serial_number: "a1".into(), + }, + }, + ), }, false, // blueprint_execution_enabled ) diff --git a/nexus/src/lockstep_api/http_entrypoints.rs b/nexus/src/lockstep_api/http_entrypoints.rs index 13f02da4d58..434da18de01 100644 --- a/nexus/src/lockstep_api/http_entrypoints.rs +++ b/nexus/src/lockstep_api/http_entrypoints.rs @@ -1140,4 +1140,23 @@ impl NexusLockstepApi for NexusLockstepApiImpl { .instrument_dropshot_handler(&rqctx, handler) .await } + + async fn trust_quorum_remove_sled( + rqctx: RequestContext, + path_params: Path, + ) -> Result, HttpError> { + let apictx = &rqctx.context().context; + let nexus = &apictx.nexus; + let sled_id = path_params.into_inner().sled; + let handler = async { + let opctx = + crate::context::op_context_for_internal_api(&rqctx).await; + let epoch = nexus.tq_remove_sled(&opctx, sled_id).await?; + Ok(HttpResponseOk(epoch)) + }; + apictx + .internal_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } } diff --git a/openapi/nexus-lockstep.json b/openapi/nexus-lockstep.json index f2e7009e42a..c3f7e3bcadb 100644 --- a/openapi/nexus-lockstep.json +++ b/openapi/nexus-lockstep.json @@ -1462,6 +1462,46 @@ } } }, + "/trust-quorum/remove/{sled}": { + "post": { + "summary": "Remove a sled from the trust quorum", + "description": "This is a required first step towards expunging a sled\n\nReturn the epoch of the proposed configuration so it can be polled asynchronously.", + "operationId": "trust_quorum_remove_sled", + "parameters": [ + { + "in": "path", + "name": "sled", + "description": "ID of the sled", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "uint64", + "type": "integer", + "format": "uint64", + "minimum": 0 + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/ping": { "get": { "summary": "Ping API",