From 89a7c2ff3fb8dbcd3c8b0f3cc3dad74a78b02bb9 Mon Sep 17 00:00:00 2001 From: Taylor Mutch Date: Fri, 22 May 2026 08:14:30 -0700 Subject: [PATCH 1/2] fix(homebrew): repair local driver bootstrap state --- crates/openshell-bootstrap/src/pki.rs | 8 +- crates/openshell-driver-docker/src/lib.rs | 41 +++- crates/openshell-driver-docker/src/tests.rs | 19 ++ crates/openshell-driver-podman/src/driver.rs | 60 ++++- crates/openshell-server/src/certgen.rs | 221 ++++++++++++++++++- tasks/scripts/release.py | 6 +- 6 files changed, 340 insertions(+), 15 deletions(-) diff --git a/crates/openshell-bootstrap/src/pki.rs b/crates/openshell-bootstrap/src/pki.rs index 388507840..e99279b97 100644 --- a/crates/openshell-bootstrap/src/pki.rs +++ b/crates/openshell-bootstrap/src/pki.rs @@ -28,7 +28,7 @@ pub struct PkiBundle { /// aliases used by every supported runtime: Kubernetes service DNS, /// `host.docker.internal` for Docker Desktop and rootless Docker on Linux, /// and `host.containers.internal` for Podman containers reaching their host. -const DEFAULT_SERVER_SANS: &[&str] = &[ +pub const DEFAULT_SERVER_SANS: &[&str] = &[ "openshell", "openshell.openshell.svc", "openshell.openshell.svc.cluster.local", @@ -184,4 +184,10 @@ mod tests { // Should have all default SANs + 2 extras assert_eq!(sans.len(), DEFAULT_SERVER_SANS.len() + 2); } + + #[test] + fn default_server_sans_include_local_container_hostnames() { + assert!(DEFAULT_SERVER_SANS.contains(&"host.docker.internal")); + assert!(DEFAULT_SERVER_SANS.contains(&"host.containers.internal")); + } } diff --git a/crates/openshell-driver-docker/src/lib.rs b/crates/openshell-driver-docker/src/lib.rs index 33ed0edf5..c74b07437 100644 --- a/crates/openshell-driver-docker/src/lib.rs +++ b/crates/openshell-driver-docker/src/lib.rs @@ -1869,10 +1869,27 @@ fn linux_supervisor_candidates(daemon_arch: &str) -> Vec { /// inside the digest-keyed directory and renamed into place, so concurrent /// gateway starts don't observe a partial file. async fn extract_supervisor_bin_from_image(docker: &Docker, image: &str) -> CoreResult { + let refresh_attempted = if supervisor_image_should_refresh(image) { + info!(image = image, "Refreshing mutable docker supervisor image"); + match pull_supervisor_image(docker, image).await { + Ok(()) => true, + Err(err) => { + warn!( + image = image, + error = %err, + "failed to refresh mutable docker supervisor image; falling back to local image if present", + ); + true + } + } + } else { + false + }; + // Inspect first to see if the image is already present; only pull on miss. let inspect = match docker.inspect_image(image).await { Ok(inspect) => inspect, - Err(err) if is_not_found_error(&err) => { + Err(err) if is_not_found_error(&err) && !refresh_attempted => { info!(image = image, "Pulling docker supervisor image"); pull_supervisor_image(docker, image).await?; docker.inspect_image(image).await.map_err(|err| { @@ -1881,6 +1898,11 @@ async fn extract_supervisor_bin_from_image(docker: &Docker, image: &str) -> Core )) })? } + Err(err) if is_not_found_error(&err) => { + return Err(Error::config(format!( + "docker supervisor image '{image}' is not present locally after refresh attempt", + ))); + } Err(err) => { return Err(Error::config(format!( "failed to inspect docker supervisor image '{image}': {err}", @@ -1926,6 +1948,23 @@ async fn extract_supervisor_bin_from_image(docker: &Docker, image: &str) -> Core Ok(cache_path) } +fn supervisor_image_should_refresh(image: &str) -> bool { + matches!(supervisor_image_tag(image), Some("dev" | "latest")) +} + +fn supervisor_image_tag(image: &str) -> Option<&str> { + if image.contains('@') { + return None; + } + + let image_name = image.rsplit('/').next().unwrap_or(image); + image_name + .rsplit_once(':') + .map_or(Some("latest"), |(_, tag)| { + if tag.is_empty() { None } else { Some(tag) } + }) +} + async fn pull_supervisor_image(docker: &Docker, image: &str) -> CoreResult<()> { let mut stream = docker.create_image( Some(CreateImageOptions { diff --git a/crates/openshell-driver-docker/src/tests.rs b/crates/openshell-driver-docker/src/tests.rs index 7da6164ce..9afec4be4 100644 --- a/crates/openshell-driver-docker/src/tests.rs +++ b/crates/openshell-driver-docker/src/tests.rs @@ -903,6 +903,25 @@ fn docker_supervisor_image_tag_sanitizes_build_metadata_for_docker() { ); } +#[test] +fn docker_supervisor_image_refreshes_mutable_tags_only() { + assert!(supervisor_image_should_refresh( + "ghcr.io/nvidia/openshell/supervisor:dev" + )); + assert!(supervisor_image_should_refresh( + "ghcr.io/nvidia/openshell/supervisor:latest" + )); + assert!(supervisor_image_should_refresh( + "ghcr.io/nvidia/openshell/supervisor" + )); + assert!(!supervisor_image_should_refresh( + "ghcr.io/nvidia/openshell/supervisor:0.0.47-dev.13-g57b71c68f" + )); + assert!(!supervisor_image_should_refresh( + "ghcr.io/nvidia/openshell/supervisor@sha256:abc123" + )); +} + #[test] fn supervisor_cache_path_namespaces_by_digest_under_openshell_data_dir() { let base = PathBuf::from("/var/cache/share"); diff --git a/crates/openshell-driver-podman/src/driver.rs b/crates/openshell-driver-podman/src/driver.rs index ec8c519ee..a8ac6d424 100644 --- a/crates/openshell-driver-podman/src/driver.rs +++ b/crates/openshell-driver-podman/src/driver.rs @@ -328,15 +328,16 @@ impl PodmanComputeDriver { // 1a. Pull the supervisor image if needed. The supervisor binary // is shipped in a standalone OCI image and mounted into sandbox - // containers via Podman's type=image mount. Using "missing" - // policy so the image is only pulled once and then cached. + // containers via Podman's type=image mount. Refresh mutable tags + // like latest/dev, but avoid registry checks for pinned images. + let supervisor_pull_policy = supervisor_image_pull_policy(&self.config.supervisor_image); info!( image = %self.config.supervisor_image, - policy = "missing", + policy = supervisor_pull_policy, "Ensuring supervisor image" ); self.client - .pull_image(&self.config.supervisor_image, "missing") + .pull_image(&self.config.supervisor_image, supervisor_pull_policy) .await .map_err(ComputeDriverError::from)?; @@ -589,6 +590,31 @@ impl PodmanComputeDriver { } } +fn supervisor_image_pull_policy(image: &str) -> &'static str { + if supervisor_image_should_refresh(image) { + "newer" + } else { + "missing" + } +} + +fn supervisor_image_should_refresh(image: &str) -> bool { + matches!(supervisor_image_tag(image), Some("dev" | "latest")) +} + +fn supervisor_image_tag(image: &str) -> Option<&str> { + if image.contains('@') { + return None; + } + + let image_name = image.rsplit('/').next().unwrap_or(image); + image_name + .rsplit_once(':') + .map_or(Some("latest"), |(_, tag)| { + if tag.is_empty() { None } else { Some(tag) } + }) +} + /// Check whether the current user has subuid/subgid ranges configured. /// /// Rootless Podman requires entries in `/etc/subuid` and `/etc/subgid` for @@ -719,6 +745,32 @@ mod tests { assert_eq!(cfg.grpc_endpoint, "https://gateway.internal:9000"); } + #[test] + fn supervisor_pull_policy_refreshes_mutable_tags_only() { + assert_eq!( + supervisor_image_pull_policy("ghcr.io/nvidia/openshell/supervisor:dev"), + "newer" + ); + assert_eq!( + supervisor_image_pull_policy("ghcr.io/nvidia/openshell/supervisor:latest"), + "newer" + ); + assert_eq!( + supervisor_image_pull_policy("ghcr.io/nvidia/openshell/supervisor"), + "newer" + ); + assert_eq!( + supervisor_image_pull_policy( + "ghcr.io/nvidia/openshell/supervisor:0.0.47-dev.13-g57b71c68f" + ), + "missing" + ); + assert_eq!( + supervisor_image_pull_policy("ghcr.io/nvidia/openshell/supervisor@sha256:abc123"), + "missing" + ); + } + fn test_driver(socket_path: PathBuf) -> PodmanComputeDriver { let config = PodmanComputeConfig { socket_path, diff --git a/crates/openshell-server/src/certgen.rs b/crates/openshell-server/src/certgen.rs index 948ce2907..b7ce0421c 100644 --- a/crates/openshell-server/src/certgen.rs +++ b/crates/openshell-server/src/certgen.rs @@ -24,9 +24,11 @@ use k8s_openapi::api::core::v1::Secret; use kube::Client; use kube::api::{Api, ObjectMeta, PostParams}; use miette::{IntoDiagnostic, Result, WrapErr}; -use openshell_bootstrap::pki::{PkiBundle, generate_pki}; +use openshell_bootstrap::pki::{DEFAULT_SERVER_SANS, PkiBundle, generate_pki}; use openshell_core::paths::{create_dir_restricted, set_file_owner_only}; -use std::collections::BTreeMap; +use std::collections::{BTreeMap, BTreeSet}; +use std::fmt; +use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; use std::path::{Path, PathBuf}; use tracing::{info, warn}; use tracing_subscriber::EnvFilter; @@ -290,6 +292,21 @@ enum LocalAction { CreateAll, } +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +enum CertSan { + Dns(String), + Ip(IpAddr), +} + +impl fmt::Display for CertSan { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Dns(name) => write!(f, "{name}"), + Self::Ip(addr) => write!(f, "{addr}"), + } + } +} + /// Layout under ``: /// /// ```text @@ -389,13 +406,34 @@ fn run_local(dir: &Path, server_sans: &[String]) -> Result<()> { let bundle = match decide_local(paths.tls_existence_count(), paths.jwt_existence_count()) { LocalAction::Skip => { - info!(dir = %dir.display(), "PKI files already exist, skipping."); + let missing_sans = missing_required_server_sans(&paths, server_sans)?; + if missing_sans.is_empty() { + info!(dir = %dir.display(), "PKI files already exist, skipping."); + } else { + let bundle = generate_pki(server_sans)?; + write_local_tls_bundle(&bundle, &paths)?; + info!( + dir = %dir.display(), + missing_sans = %format_cert_sans(&missing_sans), + "server TLS certificate refreshed for current SAN set.", + ); + } read_local_bundle(&paths)? } LocalAction::CreateJwtOnly => { let bundle = generate_pki(server_sans)?; - write_local_jwt_bundle(&bundle, &paths)?; - info!(dir = %dir.display(), "JWT signing files created for existing TLS install."); + let missing_sans = missing_required_server_sans(&paths, server_sans)?; + if missing_sans.is_empty() { + write_local_jwt_bundle(&bundle, &paths)?; + info!(dir = %dir.display(), "JWT signing files created for existing TLS install."); + } else { + write_local_bundle(dir, &bundle, &paths)?; + info!( + dir = %dir.display(), + missing_sans = %format_cert_sans(&missing_sans), + "PKI files refreshed for current SAN set and JWT signing material.", + ); + } read_local_bundle(&paths)? } LocalAction::PartialState => { @@ -423,6 +461,90 @@ fn run_local(dir: &Path, server_sans: &[String]) -> Result<()> { Ok(()) } +fn required_server_sans(server_sans: &[String]) -> BTreeSet { + DEFAULT_SERVER_SANS + .iter() + .copied() + .chain(server_sans.iter().map(String::as_str)) + .filter_map(|san| { + san.parse::() + .map(CertSan::Ip) + .ok() + .or_else(|| san.is_ascii().then(|| CertSan::Dns(san.to_string()))) + }) + .collect() +} + +fn missing_required_server_sans( + paths: &LocalPaths, + server_sans: &[String], +) -> Result> { + let required = required_server_sans(server_sans); + let actual = server_cert_sans(&paths.server_crt)?; + Ok(required.difference(&actual).cloned().collect()) +} + +fn server_cert_sans(path: &Path) -> Result> { + use x509_parser::pem::parse_x509_pem; + use x509_parser::prelude::{FromDer, GeneralName, X509Certificate}; + + let pem = std::fs::read(path) + .into_diagnostic() + .wrap_err_with(|| format!("failed to read {}", path.display()))?; + let (_, pem) = parse_x509_pem(&pem).map_err(|e| { + miette::miette!( + "failed to parse server certificate PEM {}: {e:?}", + path.display() + ) + })?; + let (_, cert) = X509Certificate::from_der(&pem.contents).map_err(|e| { + miette::miette!( + "failed to parse server certificate {}: {e:?}", + path.display() + ) + })?; + + let Some(ext) = cert.subject_alternative_name().map_err(|e| { + miette::miette!( + "failed to read server certificate SANs {}: {e:?}", + path.display() + ) + })? + else { + return Ok(BTreeSet::new()); + }; + + let mut sans = BTreeSet::new(); + for name in &ext.value.general_names { + match name { + GeneralName::DNSName(name) => { + sans.insert(CertSan::Dns((*name).to_string())); + } + GeneralName::IPAddress(raw) => match raw.len() { + 4 => { + sans.insert(CertSan::Ip(IpAddr::V4(Ipv4Addr::new( + raw[0], raw[1], raw[2], raw[3], + )))); + } + 16 => { + let octets: [u8; 16] = (*raw).try_into().expect("checked IPv6 SAN length"); + sans.insert(CertSan::Ip(IpAddr::V6(Ipv6Addr::from(octets)))); + } + _ => {} + }, + _ => {} + } + } + Ok(sans) +} + +fn format_cert_sans(sans: &[CertSan]) -> String { + sans.iter() + .map(ToString::to_string) + .collect::>() + .join(", ") +} + fn read_local_bundle(paths: &LocalPaths) -> Result { Ok(PkiBundle { ca_cert_pem: read_pem(&paths.ca_crt)?, @@ -506,6 +628,47 @@ fn write_local_bundle(dir: &Path, bundle: &PkiBundle, paths: &LocalPaths) -> Res Ok(()) } +fn write_local_tls_bundle(bundle: &PkiBundle, paths: &LocalPaths) -> Result<()> { + let temp = sibling_temp_dir(&paths.server_dir); + if temp.exists() { + std::fs::remove_dir_all(&temp) + .into_diagnostic() + .wrap_err_with(|| format!("failed to remove stale {}", temp.display()))?; + } + + let temp_server = temp.join("server"); + let temp_client = temp.join("client"); + create_dir_restricted(&temp)?; + create_dir_restricted(&temp_server)?; + create_dir_restricted(&temp_client)?; + + write_pem(&temp.join("ca.crt"), &bundle.ca_cert_pem, false)?; + write_pem(&temp.join("ca.key"), &bundle.ca_key_pem, true)?; + write_pem(&temp_server.join("tls.crt"), &bundle.server_cert_pem, false)?; + write_pem(&temp_server.join("tls.key"), &bundle.server_key_pem, true)?; + write_pem(&temp_client.join("tls.crt"), &bundle.client_cert_pem, false)?; + write_pem(&temp_client.join("tls.key"), &bundle.client_key_pem, true)?; + + create_dir_restricted(&paths.server_dir)?; + create_dir_restricted(&paths.client_dir)?; + let renames: [(PathBuf, &Path); 6] = [ + (temp.join("ca.crt"), paths.ca_crt.as_path()), + (temp.join("ca.key"), paths.ca_key.as_path()), + (temp_server.join("tls.crt"), paths.server_crt.as_path()), + (temp_server.join("tls.key"), paths.server_key.as_path()), + (temp_client.join("tls.crt"), paths.client_crt.as_path()), + (temp_client.join("tls.key"), paths.client_key.as_path()), + ]; + for (from, to) in &renames { + std::fs::rename(from, to) + .into_diagnostic() + .wrap_err_with(|| format!("failed to move {} -> {}", from.display(), to.display()))?; + } + + let _ = std::fs::remove_dir_all(&temp); + Ok(()) +} + fn write_local_jwt_bundle(bundle: &PkiBundle, paths: &LocalPaths) -> Result<()> { let temp = sibling_temp_dir(&paths.jwt_dir); if temp.exists() { @@ -568,9 +731,9 @@ fn print_bundle(bundle: &PkiBundle) { #[cfg(test)] mod tests { use super::{ - K8sAction, LocalAction, LocalPaths, decide_k8s, decide_local, jwt_signing_secret, - read_local_bundle, sibling_temp_dir, tls_secret, write_local_bundle, - write_local_jwt_bundle, + CertSan, K8sAction, LocalAction, LocalPaths, decide_k8s, decide_local, jwt_signing_secret, + missing_required_server_sans, read_local_bundle, sibling_temp_dir, tls_secret, + write_local_bundle, write_local_jwt_bundle, write_local_tls_bundle, }; use openshell_bootstrap::pki::generate_pki; use std::path::Path; @@ -733,6 +896,48 @@ mod tests { assert_eq!(read.jwt_public_key_pem, new_bundle.jwt_public_key_pem); } + #[test] + fn write_local_tls_bundle_preserves_existing_jwt_files() { + let parent = tempfile::tempdir().expect("tempdir"); + let dir = parent.path().join("tls"); + let old_bundle = generate_pki(&[]).expect("generate_pki"); + let new_bundle = generate_pki(&["extra.example.test".to_string()]).expect("generate_pki"); + let paths = LocalPaths::resolve(&dir); + + write_local_bundle(&dir, &old_bundle, &paths).expect("write_local_bundle"); + write_local_tls_bundle(&new_bundle, &paths).expect("write_local_tls_bundle"); + + let read = read_local_bundle(&paths).expect("read_local_bundle"); + assert_eq!(read.ca_cert_pem, new_bundle.ca_cert_pem); + assert_eq!(read.server_cert_pem, new_bundle.server_cert_pem); + assert_eq!(read.client_cert_pem, new_bundle.client_cert_pem); + assert_eq!(read.jwt_key_id, old_bundle.jwt_key_id); + assert_eq!(read.jwt_public_key_pem, old_bundle.jwt_public_key_pem); + } + + #[test] + fn missing_required_server_sans_detects_new_required_name() { + let parent = tempfile::tempdir().expect("tempdir"); + let dir = parent.path().join("tls"); + let bundle = generate_pki(&[]).expect("generate_pki"); + let paths = LocalPaths::resolve(&dir); + + write_local_bundle(&dir, &bundle, &paths).expect("write_local_bundle"); + + assert!( + missing_required_server_sans(&paths, &[]) + .unwrap() + .is_empty() + ); + + let missing = + missing_required_server_sans(&paths, &["future.example.test".to_string()]).unwrap(); + assert_eq!( + missing, + vec![CertSan::Dns("future.example.test".to_string())] + ); + } + #[test] fn read_local_bundle_uses_existing_files() { let parent = tempfile::tempdir().expect("tempdir"); diff --git a/tasks/scripts/release.py b/tasks/scripts/release.py index 79cb7ab73..086ca9906 100644 --- a/tasks/scripts/release.py +++ b/tasks/scripts/release.py @@ -299,12 +299,16 @@ def install docker_tls_dir="${{HOME}}/.local/state/openshell/homebrew/tls" mkdir -p "${{docker_tls_dir}}/server" mkdir -p "${{docker_tls_dir}}/client" - chmod 700 "${{docker_tls_dir}}" "${{docker_tls_dir}}/server" "${{docker_tls_dir}}/client" + mkdir -p "${{docker_tls_dir}}/jwt" + chmod 700 "${{docker_tls_dir}}" "${{docker_tls_dir}}/server" "${{docker_tls_dir}}/client" "${{docker_tls_dir}}/jwt" /usr/bin/install -m 0644 "#{{var}}/openshell/tls/ca.crt" "${{docker_tls_dir}}/ca.crt" /usr/bin/install -m 0644 "#{{var}}/openshell/tls/server/tls.crt" "${{docker_tls_dir}}/server/tls.crt" /usr/bin/install -m 0600 "#{{var}}/openshell/tls/server/tls.key" "${{docker_tls_dir}}/server/tls.key" /usr/bin/install -m 0644 "#{{var}}/openshell/tls/client/tls.crt" "${{docker_tls_dir}}/client/tls.crt" /usr/bin/install -m 0600 "#{{var}}/openshell/tls/client/tls.key" "${{docker_tls_dir}}/client/tls.key" + /usr/bin/install -m 0600 "#{{var}}/openshell/tls/jwt/signing.pem" "${{docker_tls_dir}}/jwt/signing.pem" + /usr/bin/install -m 0644 "#{{var}}/openshell/tls/jwt/public.pem" "${{docker_tls_dir}}/jwt/public.pem" + /usr/bin/install -m 0644 "#{{var}}/openshell/tls/jwt/kid" "${{docker_tls_dir}}/jwt/kid" export OPENSHELL_LOCAL_TLS_DIR="${{OPENSHELL_LOCAL_TLS_DIR:-${{docker_tls_dir}}}}" xdg_gateway_config="${{xdg_config_home}}/openshell/gateway.toml" From 4f29dfcb04ba15da98e5bb53a3ac57383f5095bb Mon Sep 17 00:00:00 2001 From: Taylor Mutch Date: Fri, 22 May 2026 09:15:04 -0700 Subject: [PATCH 2/2] fix(bootstrap): satisfy default SAN doc lint --- crates/openshell-bootstrap/src/pki.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crates/openshell-bootstrap/src/pki.rs b/crates/openshell-bootstrap/src/pki.rs index e99279b97..adc2c48f1 100644 --- a/crates/openshell-bootstrap/src/pki.rs +++ b/crates/openshell-bootstrap/src/pki.rs @@ -24,8 +24,9 @@ pub struct PkiBundle { pub jwt_key_id: String, } -/// Default SANs always included on the server certificate. Covers the host -/// aliases used by every supported runtime: Kubernetes service DNS, +/// Default SANs always included on the server certificate. +/// +/// Covers the host aliases used by every supported runtime: Kubernetes service DNS, /// `host.docker.internal` for Docker Desktop and rootless Docker on Linux, /// and `host.containers.internal` for Podman containers reaching their host. pub const DEFAULT_SERVER_SANS: &[&str] = &[