-
Notifications
You must be signed in to change notification settings - Fork 66
[sim] Optionally enable health monitor #9628
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
667bec2
893b850
b40ee44
3b892ed
bcdc133
003d246
f4772d5
ff8553c
a93af40
aa49383
53bfd84
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -880,6 +880,7 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> { | |
| sled_id: SledUuid, | ||
| sled_index: u16, | ||
| sim_mode: sim::SimMode, | ||
| health_monitor: sim::ConfigHealthMonitor, | ||
| ) { | ||
| let nexus_address = | ||
| self.nexus_internal_addr.expect("Must launch Nexus first"); | ||
|
|
@@ -896,6 +897,7 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> { | |
| tempdir.path(), | ||
| sim_mode, | ||
| &self.simulated_upstairs, | ||
| health_monitor, | ||
| ) | ||
| .await | ||
| .expect("Failed to start sled agent"); | ||
|
|
@@ -1000,6 +1002,7 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> { | |
| sled_id: SledUuid, | ||
| sled_index: u16, | ||
| sim_mode: sim::SimMode, | ||
| health_monitor: sim::ConfigHealthMonitor, | ||
| ) { | ||
| let nexus_address = | ||
| self.nexus_internal_addr.expect("Must launch Nexus first"); | ||
|
|
@@ -1016,6 +1019,7 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> { | |
| tempdir.path(), | ||
| sim_mode, | ||
| &self.simulated_upstairs, | ||
| health_monitor, | ||
| ) | ||
| .await | ||
| .expect("Failed to start sled agent"); | ||
|
|
@@ -1534,15 +1538,26 @@ impl RackInitRequestBuilder { | |
| } | ||
| } | ||
|
|
||
| #[derive(Debug, Clone)] | ||
| pub(crate) struct SledAgentOptions { | ||
| pub sim_mode: sim::SimMode, | ||
| pub extra_sled_agents: u16, | ||
| pub sled_agent_health_monitor: sim::ConfigHealthMonitor, | ||
| } | ||
|
|
||
| pub(crate) async fn setup_with_config_impl<N: NexusServer>( | ||
| mut starter: ControlPlaneStarter<'_, N>, | ||
| populate: PopulateCrdb, | ||
| sim_mode: sim::SimMode, | ||
| sled_agent_opts: SledAgentOptions, | ||
| initial_cert: Option<Certificate>, | ||
| extra_sled_agents: u16, | ||
| gateway_config_file: Utf8PathBuf, | ||
| second_nexus: bool, | ||
| ) -> ControlPlaneTestContext<N> { | ||
| let SledAgentOptions { | ||
| sim_mode, | ||
| extra_sled_agents, | ||
| sled_agent_health_monitor, | ||
| } = sled_agent_opts; | ||
| const STEP_TIMEOUT: Duration = Duration::from_secs(600); | ||
|
|
||
| // All setups will start with CRDB and clickhouse | ||
|
|
@@ -1705,6 +1720,7 @@ pub(crate) async fn setup_with_config_impl<N: NexusServer>( | |
| // The first and second sled agents have special UUIDs, and any extra ones | ||
| // after that are random. | ||
|
|
||
| let health_monitor = sled_agent_health_monitor.clone(); | ||
| starter | ||
| .init_with_steps( | ||
| vec![( | ||
|
|
@@ -1715,6 +1731,7 @@ pub(crate) async fn setup_with_config_impl<N: NexusServer>( | |
| SLED_AGENT_UUID.parse().unwrap(), | ||
| 0, | ||
| sim_mode, | ||
| health_monitor, | ||
| ) | ||
| .boxed() | ||
| }), | ||
|
|
@@ -1723,6 +1740,7 @@ pub(crate) async fn setup_with_config_impl<N: NexusServer>( | |
| ) | ||
| .await; | ||
|
|
||
| let health_monitor = sled_agent_health_monitor.clone(); | ||
| if extra_sled_agents > 0 { | ||
| starter | ||
| .init_with_steps( | ||
|
|
@@ -1734,6 +1752,7 @@ pub(crate) async fn setup_with_config_impl<N: NexusServer>( | |
| SLED_AGENT2_UUID.parse().unwrap(), | ||
| 1, | ||
| sim_mode, | ||
| health_monitor, | ||
| ) | ||
| .boxed() | ||
| }), | ||
|
|
@@ -1743,7 +1762,9 @@ pub(crate) async fn setup_with_config_impl<N: NexusServer>( | |
| .await; | ||
| } | ||
|
|
||
| let health_monitor = sled_agent_health_monitor.clone(); | ||
| for index in 1..extra_sled_agents { | ||
| let health_monitor = health_monitor.clone(); | ||
| starter | ||
| .init_with_steps( | ||
| vec![( | ||
|
|
@@ -1754,6 +1775,7 @@ pub(crate) async fn setup_with_config_impl<N: NexusServer>( | |
| SledUuid::new_v4(), | ||
| index.checked_add(1).unwrap(), | ||
| sim_mode, | ||
| health_monitor.clone(), | ||
| ) | ||
| .boxed() | ||
| }), | ||
|
|
@@ -1839,6 +1861,7 @@ pub(crate) enum PopulateCrdb { | |
| /// | ||
| /// Note: you should probably use the `extra_sled_agents` macro parameter on | ||
| /// `nexus_test` instead! | ||
| #[allow(clippy::too_many_arguments)] | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about defining a struct for these arguments instead?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I went with just adding the function to have too many arguments because of the comment above this line:
Since this is somewhat of a semi-deprecated function, I didn't want to create a struct for it, WDYT? |
||
| pub async fn start_sled_agent( | ||
| log: Logger, | ||
| nexus_address: SocketAddr, | ||
|
|
@@ -1847,6 +1870,7 @@ pub async fn start_sled_agent( | |
| update_directory: &Utf8Path, | ||
| sim_mode: sim::SimMode, | ||
| simulated_upstairs: &Arc<sim::SimulatedUpstairs>, | ||
| health_monitor: sim::ConfigHealthMonitor, | ||
| ) -> Result<sim::Server, String> { | ||
| // Generate a baseboard serial number that matches the SP configuration | ||
| // (SimGimlet00, SimGimlet01, etc.) so that inventory can link sled agents | ||
|
|
@@ -1861,6 +1885,7 @@ pub async fn start_sled_agent( | |
| sim::ZpoolConfig::None, | ||
| SledCpuFamily::AmdMilan, | ||
| Some(baseboard_serial), | ||
| health_monitor, | ||
| ); | ||
| start_sled_agent_with_config(log, &config, sled_index, simulated_upstairs) | ||
| .await | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1160,6 +1160,10 @@ async fn test_instance_migration_compatible_cpu_platforms( | |
| Some(&camino::Utf8Path::new("/an/unused/update/directory")), | ||
| omicron_sled_agent::sim::ZpoolConfig::None, | ||
| sled_agent_types::inventory::SledCpuFamily::AmdTurin, | ||
| omicron_sled_agent::sim::ConfigHealthMonitor { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This block is repeated quite a lot -- maybe there should be a |
||
| enabled: false, | ||
| sim_health_checks: None, | ||
| }, | ||
| ); | ||
| let new_sled_id = config.id; | ||
|
|
||
|
|
@@ -1349,6 +1353,10 @@ async fn test_instance_migration_incompatible_cpu_platforms( | |
| Some(&camino::Utf8Path::new("/an/unused/update/directory")), | ||
| omicron_sled_agent::sim::ZpoolConfig::None, | ||
| sled_agent_types::inventory::SledCpuFamily::AmdTurin, | ||
| omicron_sled_agent::sim::ConfigHealthMonitor { | ||
| enabled: false, | ||
| sim_health_checks: None, | ||
| }, | ||
| ); | ||
| let turin_sled_id = config.id; | ||
|
|
||
|
|
@@ -1426,6 +1434,10 @@ async fn test_instance_migration_unknown_sled_type( | |
| Some(&camino::Utf8Path::new("/an/unused/update/directory")), | ||
| omicron_sled_agent::sim::ZpoolConfig::None, | ||
| sled_agent_types::inventory::SledCpuFamily::Unknown, | ||
| omicron_sled_agent::sim::ConfigHealthMonitor { | ||
| enabled: false, | ||
| sim_health_checks: None, | ||
| }, | ||
| ); | ||
| let new_sled_id = config.id; | ||
|
|
||
|
|
@@ -7125,6 +7137,10 @@ async fn test_can_start_instance_with_cpu_platform( | |
| Some(&camino::Utf8Path::new("/an/unused/update/directory")), | ||
| omicron_sled_agent::sim::ZpoolConfig::None, | ||
| sled_agent_types::inventory::SledCpuFamily::AmdTurin, | ||
| omicron_sled_agent::sim::ConfigHealthMonitor { | ||
| enabled: false, | ||
| sim_health_checks: None, | ||
| }, | ||
| ); | ||
| let new_sled_id = config.id; | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,6 +10,7 @@ workspace = true | |
| [dependencies] | ||
| anyhow.workspace = true | ||
| async-trait.workspace = true | ||
| chrono.workspace = true | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this used? |
||
| derive_more.workspace = true | ||
| dropshot.workspace = true | ||
| futures.workspace = true | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think maybe most of the stuff added here isn't used?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah! yeah, I think this was for a previous iteration of this work, I'll remove