From ad671cf8a3b84ada8dcd9da5b48562cce61fe1ce Mon Sep 17 00:00:00 2001 From: unive3sal Date: Mon, 18 May 2026 13:37:13 +0800 Subject: [PATCH 1/5] refactor: add daemon-managed proxy process * Move proxy worker lifecycle ownership into the daemon, including signal-safe shutdown cleanup and per-app proxy port preservation. * Consolidate proxy daemon and failover CI checks into the integration test workflow. * change fmt and integration trigger from "pull_request_target" to "pull_request". --- .github/workflows/rust-ci.yml | 19 +- README.md | 17 +- README_ZH.md | 17 +- src-tauri/Cargo.lock | 1 + src-tauri/Cargo.toml | 3 +- src-tauri/src/app_config.rs | 2 +- src-tauri/src/cli/commands/daemon.rs | 157 +++ src-tauri/src/cli/commands/mod.rs | 2 + src-tauri/src/cli/commands/proxy.rs | 392 +++++-- src-tauri/src/cli/mod.rs | 26 + src-tauri/src/cli/tui/app/app_state.rs | 4 - src-tauri/src/cli/tui/app/content_config.rs | 51 +- .../src/cli/tui/app/overlay_handlers/views.rs | 2 +- src-tauri/src/cli/tui/app/types.rs | 11 +- src-tauri/src/cli/tui/data.rs | 17 +- src-tauri/src/cli/tui/runtime_actions/mod.rs | 3 - .../src/cli/tui/runtime_actions/settings.rs | 88 +- src-tauri/src/daemon/ipc/client.rs | 191 +++ src-tauri/src/daemon/ipc/mod.rs | 3 + src-tauri/src/daemon/ipc/protocol.rs | 204 ++++ src-tauri/src/daemon/ipc/server.rs | 231 ++++ src-tauri/src/daemon/logging.rs | 213 ++++ src-tauri/src/daemon/mod.rs | 152 +++ src-tauri/src/daemon/paths.rs | 130 +++ src-tauri/src/daemon/pidfile.rs | 161 +++ src-tauri/src/daemon/restart.rs | 239 ++++ src-tauri/src/daemon/supervisor.rs | 579 +++++++++ src-tauri/src/database/dao/proxy.rs | 191 ++- src-tauri/src/database/mod.rs | 8 +- src-tauri/src/database/schema.rs | 43 +- src-tauri/src/lib.rs | 1 + src-tauri/src/main.rs | 6 +- src-tauri/src/proxy/types.rs | 19 + src-tauri/src/services/proxy.rs | 489 +++++--- .../tests/proxy_claude_forwarder_alignment.rs | 9 + src-tauri/tests/proxy_daemon.rs | 1039 +++++++++++++++++ 36 files changed, 4306 insertions(+), 414 deletions(-) create mode 100644 src-tauri/src/cli/commands/daemon.rs create mode 100644 src-tauri/src/daemon/ipc/client.rs create mode 100644 src-tauri/src/daemon/ipc/mod.rs create mode 100644 src-tauri/src/daemon/ipc/protocol.rs create mode 100644 src-tauri/src/daemon/ipc/server.rs create mode 100644 src-tauri/src/daemon/logging.rs create mode 100644 src-tauri/src/daemon/mod.rs create mode 100644 src-tauri/src/daemon/paths.rs create mode 100644 src-tauri/src/daemon/pidfile.rs create mode 100644 src-tauri/src/daemon/restart.rs create mode 100644 src-tauri/src/daemon/supervisor.rs create mode 100644 src-tauri/tests/proxy_daemon.rs diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml index 18b65d62..344b7a1f 100644 --- a/.github/workflows/rust-ci.yml +++ b/.github/workflows/rust-ci.yml @@ -8,7 +8,7 @@ on: - "src-tauri/**" - ".github/workflows/rust-ci.yml" - ".github/workflows/release.yml" - pull_request_target: + pull_request: paths: - "src-tauri/**" - ".github/workflows/rust-ci.yml" @@ -47,8 +47,8 @@ jobs: working-directory: src-tauri run: cargo fmt --check - failover-e2e: - name: failover E2E test + integration-tests: + name: Integration tests runs-on: ubuntu-22.04 steps: - name: Checkout @@ -63,9 +63,9 @@ jobs: uses: Swatinem/rust-cache@v2 with: workspaces: src-tauri - key: failover-e2e + key: integration-tests - - name: Run failover E2E test + - name: Failover working-directory: src-tauri run: | sandbox_home="$(mktemp -d)" @@ -73,3 +73,12 @@ jobs: export USERPROFILE="$sandbox_home" export CC_SWITCH_CONFIG_DIR="$sandbox_home/.cc-switch" cargo test --test proxy_claude_forwarder_alignment proxy_claude_auto_failover_uses_activated_queue_providers -- --exact --nocapture + + - name: Proxy daemon + working-directory: src-tauri + run: | + sandbox_home="$(mktemp -d)" + export HOME="$sandbox_home" + export USERPROFILE="$sandbox_home" + export CC_SWITCH_CONFIG_DIR="$sandbox_home/.cc-switch" + cargo test --test proxy_daemon proxy_enable_and_disable_cli_manage_daemon_worker -- --exact --nocapture diff --git a/README.md b/README.md index a0433449..534015d4 100644 --- a/README.md +++ b/README.md @@ -365,17 +365,22 @@ cc-switch config reset # Reset to default configuration ### 🌉 Proxy Management -Inspect and control the local multi-app proxy used by supported apps. +Inspect and control daemon-managed per-app proxy routes for supported apps. -**Features:** Persisted enable/disable switch, current route inspection, dashboard telemetry, and foreground serve mode for debugging. +**Features:** independent enable/disable per app, per-app listen ports, daemon-managed workers, current route inspection, dashboard telemetry, and foreground serve mode for debugging. ```bash -cc-switch proxy show # Show proxy configuration and routes -cc-switch proxy enable # Enable the persisted proxy switch -cc-switch proxy disable # Disable the persisted proxy switch -cc-switch proxy serve # Run the proxy in foreground +cc-switch proxy show # Show proxy configuration, routes, and daemon worker status +cc-switch proxy enable # Enable the Claude proxy route (default app) +cc-switch --app codex proxy enable # Enable the Codex proxy route +cc-switch --app gemini proxy disable # Disable the Gemini proxy route +cc-switch --app claude proxy config --listen-port 15721 +cc-switch --app codex proxy config --listen-port 15722 +cc-switch proxy serve --takeover claude # Foreground debug mode; refused while daemon-managed routes are active ``` +Normal CLI/TUI proxy enable/disable actions are routed through the daemon. The daemon auto-starts when the first app proxy route is activated, runs one worker per active supported app (Claude, Codex, Gemini), and exits automatically when no proxy routes remain active. + ### 🧪 Environment & Local Tools Inspect environment conflicts and whether required local CLIs are installed. diff --git a/README_ZH.md b/README_ZH.md index 4bddcd03..4ad626ea 100644 --- a/README_ZH.md +++ b/README_ZH.md @@ -366,17 +366,22 @@ cc-switch config reset # 重置为默认配置 ### 🌉 代理管理 -查看并控制服务于各应用的本地多应用代理。 +查看并控制由守护进程管理的按应用代理路由。 -**功能:** 持久化开关、当前路由检查、首页遥测,以及用于调试的前台运行模式。 +**功能:** 每个应用可独立启用/禁用代理、每个应用可配置监听端口、由 daemon 管理 worker、当前路由检查、首页遥测,以及用于调试的前台运行模式。 ```bash -cc-switch proxy show # 显示代理配置和路由 -cc-switch proxy enable # 启用持久化代理开关 -cc-switch proxy disable # 禁用持久化代理开关 -cc-switch proxy serve # 以前台模式运行代理 +cc-switch proxy show # 显示代理配置、路由和 daemon worker 状态 +cc-switch proxy enable # 启用 Claude 代理路由(默认应用) +cc-switch --app codex proxy enable # 启用 Codex 代理路由 +cc-switch --app gemini proxy disable # 禁用 Gemini 代理路由 +cc-switch --app claude proxy config --listen-port 15721 +cc-switch --app codex proxy config --listen-port 15722 +cc-switch proxy serve --takeover claude # 前台调试模式;存在 daemon 托管路由时会拒绝运行 ``` +普通 CLI/TUI 的代理启用/禁用操作都会通过 daemon 执行。首次启用任一应用代理路由时 daemon 会自动启动;每个活跃的受支持应用(Claude、Codex、Gemini)各有一个 worker;当没有任何活跃代理路由时 daemon 会自动退出。 + ### 🧪 环境与本地工具 检查环境变量冲突,以及 Claude/Codex/Gemini/OpenCode CLI 是否已经装好。 diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index c3a1ed04..1cdabba4 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -495,6 +495,7 @@ dependencies = [ "log", "minisign", "minisign-verify", + "nix", "once_cell", "ratatui", "regex", diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index f6733acc..da60a72c 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -63,7 +63,8 @@ json-five = "0.3.1" # Network and async reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "json", "stream", "socks"] } -tokio = { version = "1", features = ["macros", "rt-multi-thread", "time", "sync", "signal"] } +tokio = { version = "1", features = ["macros", "rt-multi-thread", "time", "sync", "signal", "net", "process", "io-util"] } +nix = { version = "0.29", features = ["signal", "process", "fs"] } futures = "0.3" async-stream = "0.3" bytes = "1.5" diff --git a/src-tauri/src/app_config.rs b/src-tauri/src/app_config.rs index 16d5756c..8c2045ac 100644 --- a/src-tauri/src/app_config.rs +++ b/src-tauri/src/app_config.rs @@ -268,7 +268,7 @@ use crate::error::AppError; use crate::provider::ProviderManager; /// 应用类型 -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, clap::ValueEnum)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, clap::ValueEnum)] #[serde(rename_all = "lowercase")] pub enum AppType { Claude, diff --git a/src-tauri/src/cli/commands/daemon.rs b/src-tauri/src/cli/commands/daemon.rs new file mode 100644 index 00000000..cb17931b --- /dev/null +++ b/src-tauri/src/cli/commands/daemon.rs @@ -0,0 +1,157 @@ +use std::path::PathBuf; + +use clap::Subcommand; + +use crate::cli::ui::{highlight, info, success, warning}; +use crate::daemon; +use crate::daemon::ipc::client; +use crate::daemon::ipc::protocol::{Request, Response}; +use crate::error::AppError; + +#[derive(Subcommand, Debug, Clone)] +pub enum DaemonCommand { + /// Start the supervisor daemon. Without --detach, runs in the foreground + /// (useful for debugging or running under systemd / launchd). + Start { + /// Detach from the terminal (double-fork) and write the pidfile. + #[arg(long)] + detach: bool, + }, + /// Tell the running daemon to stop the worker (if any) and exit. + Stop, + /// Print daemon status (running, worker pid, restart count). + Status, + /// Show the path to the daemon log file. + Logs, +} + +pub fn execute(cmd: DaemonCommand) -> Result<(), AppError> { + match cmd { + DaemonCommand::Start { detach } => start_daemon(detach), + DaemonCommand::Stop => stop_daemon(), + DaemonCommand::Status => status_daemon(), + DaemonCommand::Logs => show_log_path(), + } +} + +fn start_daemon(detach: bool) -> Result<(), AppError> { + if detach { + detach_into_background()?; + } + + let binary_path = current_executable()?; + let runtime = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .map_err(|err| AppError::Message(format!("build daemon runtime failed: {err}")))?; + runtime + .block_on(daemon::run(binary_path)) + .map_err(AppError::Message) +} + +fn stop_daemon() -> Result<(), AppError> { + let socket = daemon::paths::socket_path(); + let response = client::round_trip(&socket, &Request::Shutdown) + .map_err(|err| AppError::Message(format!("send shutdown to daemon: {err}")))?; + match response { + Response::Ok => { + println!("{}", success("daemon shutdown signalled")); + Ok(()) + } + Response::Error { message } => Err(AppError::Message(message)), + other => Err(AppError::Message(format!( + "unexpected response from daemon: {other:?}" + ))), + } +} + +fn status_daemon() -> Result<(), AppError> { + let socket = daemon::paths::socket_path(); + let response = match client::round_trip(&socket, &Request::Status) { + Ok(r) => r, + Err(err) => { + println!("{}", warning(&format!("daemon not reachable: {err}"))); + return Ok(()); + } + }; + match response { + Response::Status { + running, + address, + port, + worker_pid, + takeovers, + restart_count, + last_restart_at, + workers, + } => { + println!("{}", highlight("cc-switch daemon")); + println!( + " worker: {}", + if running { + format!( + "running at {address}:{port} (pid {})", + worker_pid.unwrap_or(0) + ) + } else { + "not running".to_string() + } + ); + for worker in &workers { + println!( + " worker[{}]: {}:{} (pid {})", + worker.app_type, + worker.address, + worker.port, + worker.pid.unwrap_or(0) + ); + } + println!( + " takeovers: claude={}, codex={}, gemini={}", + takeovers.claude, takeovers.codex, takeovers.gemini + ); + println!(" restart count: {restart_count}"); + if let Some(at) = last_restart_at { + println!(" last restart: {at}"); + } + Ok(()) + } + Response::Error { message } => Err(AppError::Message(message)), + other => Err(AppError::Message(format!( + "unexpected response from daemon: {other:?}" + ))), + } +} + +fn show_log_path() -> Result<(), AppError> { + let path = daemon::paths::log_path(); + println!("{}", info(&path.display().to_string())); + Ok(()) +} + +fn current_executable() -> Result { + if let Some(path) = std::env::var_os("CARGO_BIN_EXE_cc-switch") { + return Ok(PathBuf::from(path)); + } + std::env::current_exe() + .map_err(|err| AppError::Message(format!("resolve daemon executable: {err}"))) +} + +#[cfg(unix)] +fn detach_into_background() -> Result<(), AppError> { + // Double-fork via libc::daemon. nochdir=1 keeps cwd, noclose=0 redirects + // stdio to /dev/null so the daemon doesn't keep the parent terminal open. + let rc = unsafe { libc::daemon(1, 0) }; + if rc != 0 { + let err = std::io::Error::last_os_error(); + return Err(AppError::Message(format!("daemonize failed: {err}"))); + } + Ok(()) +} + +#[cfg(not(unix))] +fn detach_into_background() -> Result<(), AppError> { + Err(AppError::Message( + "--detach is only supported on unix targets".to_string(), + )) +} diff --git a/src-tauri/src/cli/commands/mod.rs b/src-tauri/src/cli/commands/mod.rs index d958aeef..6622ef9c 100644 --- a/src-tauri/src/cli/commands/mod.rs +++ b/src-tauri/src/cli/commands/mod.rs @@ -2,6 +2,8 @@ pub mod completions; pub mod config; mod config_common; pub mod config_webdav; +#[cfg(unix)] +pub mod daemon; pub mod env; pub mod failover; pub mod internal; diff --git a/src-tauri/src/cli/commands/proxy.rs b/src-tauri/src/cli/commands/proxy.rs index 2a95f7fc..f6a71a9c 100644 --- a/src-tauri/src/cli/commands/proxy.rs +++ b/src-tauri/src/cli/commands/proxy.rs @@ -1,10 +1,18 @@ use clap::Subcommand; use crate::app_config::AppType; -use crate::cli::ui::{highlight, info, success, warning}; +use crate::cli::ui::{highlight, info, success}; use crate::error::AppError; +use crate::proxy::types::AppProxyConfig; use crate::{AppState, ProxyConfig}; +#[cfg(unix)] +use crate::daemon::ipc::client as daemon_client; +#[cfg(unix)] +use crate::daemon::ipc::protocol::{Request as DaemonRequest, Response as DaemonResponse}; +#[cfg(unix)] +use crate::daemon::supervisor::{DAEMON_SOCKET_ENV, SESSION_TOKEN_ENV}; + #[derive(Subcommand, Debug, Clone)] pub enum ProxyCommand { /// Show current proxy configuration and routes @@ -16,6 +24,13 @@ pub enum ProxyCommand { /// Disable the persisted proxy switch Disable, + /// Configure the selected app's proxy route + Config { + /// Set the selected app's daemon worker listen port + #[arg(long)] + listen_port: Option, + }, + /// Start the local proxy in the foreground for debugging Serve { /// Override listen address for this run only @@ -32,11 +47,13 @@ pub enum ProxyCommand { }, } -pub fn execute(cmd: ProxyCommand) -> Result<(), AppError> { +pub fn execute(cmd: ProxyCommand, app: Option) -> Result<(), AppError> { + let app_type = app.unwrap_or(AppType::Claude); match cmd { ProxyCommand::Show => show_proxy(), - ProxyCommand::Enable => set_proxy_enabled(true), - ProxyCommand::Disable => set_proxy_enabled(false), + ProxyCommand::Enable => set_proxy_enabled(app_type, true), + ProxyCommand::Disable => set_proxy_enabled(app_type, false), + ProxyCommand::Config { listen_port } => configure_proxy(app_type, listen_port), ProxyCommand::Serve { listen_address, listen_port, @@ -59,52 +76,97 @@ fn create_runtime() -> Result { fn show_proxy() -> Result<(), AppError> { let state = get_state()?; let runtime = create_runtime()?; - let global = runtime.block_on(state.proxy_service.get_global_config())?; let config = runtime.block_on(state.proxy_service.get_config())?; let status = runtime.block_on(state.proxy_service.get_status()); + let app_configs = load_proxy_app_configs(&state, &runtime)?; let takeovers = runtime .block_on(state.proxy_service.get_takeover_status()) .map_err(AppError::Message)?; println!("{}", highlight(crate::t!("Local Proxy", "本地代理"))); - for line in build_proxy_overview_lines(&state, &global, &config, &status, &takeovers) { + for line in build_proxy_overview_lines(&state, &config, &status, &app_configs, &takeovers) { println!("{line}"); } Ok(()) } -fn set_proxy_enabled(enabled: bool) -> Result<(), AppError> { +fn set_proxy_enabled(app_type: AppType, enabled: bool) -> Result<(), AppError> { + if !matches!(app_type, AppType::Claude | AppType::Codex | AppType::Gemini) { + return Err(AppError::InvalidInput(format!( + "proxy takeover is not supported for {}", + app_type.as_str() + ))); + } let state = get_state()?; let runtime = create_runtime()?; - let update = runtime.block_on(state.proxy_service.set_global_enabled(enabled))?; - let config = update.config; - let cleared_failover = update.cleared_auto_failover; + runtime + .block_on( + state + .proxy_service + .set_managed_session_for_app(app_type.as_str(), enabled), + ) + .map_err(AppError::Message)?; println!( "{}", success(&format!( - "{}: {}", - crate::t!("Proxy switch", "代理开关"), - if config.proxy_enabled { + "{} {}: {}", + crate::t!("Proxy route", "代理路由"), + app_type.as_str(), + if enabled { crate::t!("enabled", "开启") } else { crate::t!("disabled", "关闭") } )) ); - if !enabled && cleared_failover > 0 { - println!( - "{}", - warning(&format!( - "Cleared automatic failover for {cleared_failover} app(s) because the proxy was disabled." - )) - ); - } Ok(()) } +fn configure_proxy(app_type: AppType, listen_port: Option) -> Result<(), AppError> { + let Some(listen_port) = listen_port else { + return show_proxy(); + }; + if !matches!(app_type, AppType::Claude | AppType::Codex | AppType::Gemini) { + return Err(AppError::InvalidInput(format!( + "proxy takeover is not supported for {}", + app_type.as_str() + ))); + } + let state = get_state()?; + let runtime = create_runtime()?; + let status = runtime.block_on(state.proxy_service.get_status()); + let app_running = status + .active_workers + .iter() + .any(|worker| worker.app_type == app_type.as_str()); + if app_running { + return Err(AppError::Message(format!( + "stop the {} proxy route before changing its listen port", + app_type.as_str() + ))); + } + let mut config = runtime + .block_on(state.db.get_proxy_config_for_app(app_type.as_str())) + .map_err(AppError::from)?; + config.listen_port = listen_port; + runtime + .block_on(state.db.update_proxy_config_for_app(config)) + .map_err(AppError::from)?; + println!( + "{}", + success(&format!( + "{} {}: {}", + crate::t!("Proxy listen port", "代理监听端口"), + app_type.as_str(), + listen_port + )) + ); + Ok(()) +} + fn serve_proxy( listen_address: Option, listen_port: Option, @@ -115,6 +177,12 @@ fn serve_proxy( runtime.block_on(async move { let service = state.proxy_service.clone(); + if !takeovers.is_empty() && service.has_persisted_managed_sessions() { + return Err(AppError::Message( + "cannot run foreground proxy takeover while a daemon-managed proxy session is active; disable daemon-managed proxy routes first" + .to_string(), + )); + } let base_config = service.get_config().await?; let effective_config = apply_overrides(&base_config, listen_address, listen_port); @@ -129,6 +197,12 @@ fn serve_proxy( return Err(AppError::Message(err)); } + #[cfg(unix)] + if let Err(err) = announce_to_daemon_if_managed(&server_info) { + let _ = service.stop_with_restore().await; + return Err(AppError::Message(err)); + } + if let Err(err) = service.publish_runtime_session_if_needed(&server_info) { let _ = service.stop_with_restore().await; return Err(AppError::Message(err)); @@ -198,6 +272,35 @@ fn serve_proxy( }) } +#[cfg(unix)] +fn announce_to_daemon_if_managed( + info: &crate::proxy::types::ProxyServerInfo, +) -> Result<(), String> { + let Some(socket_os) = std::env::var_os(DAEMON_SOCKET_ENV) else { + return Ok(()); + }; + let socket_path = std::path::PathBuf::from(socket_os); + let session_token = std::env::var(SESSION_TOKEN_ENV) + .map_err(|_| "missing CC_SWITCH_PROXY_SESSION_TOKEN env from daemon".to_string())?; + let request = DaemonRequest::WorkerHello { + pid: std::process::id(), + address: info.address.clone(), + port: info.port, + session_token, + }; + let response = daemon_client::round_trip(&socket_path, &request) + .map_err(|err| format!("worker hello to daemon failed: {err}"))?; + match response { + DaemonResponse::Ok => Ok(()), + DaemonResponse::Error { message } => { + Err(format!("daemon rejected worker hello: {message}")) + } + other => Err(format!( + "daemon returned unexpected response to worker hello: {other:?}" + )), + } +} + async fn apply_takeovers( service: &crate::ProxyService, takeovers: &[AppType], @@ -234,11 +337,79 @@ fn apply_overrides( config } +fn load_proxy_app_configs( + state: &AppState, + runtime: &tokio::runtime::Runtime, +) -> Result, AppError> { + [AppType::Claude, AppType::Codex, AppType::Gemini] + .into_iter() + .map(|app| { + runtime + .block_on(state.db.get_proxy_config_for_app(app.as_str())) + .map_err(AppError::from) + }) + .collect() +} + +fn build_proxy_route_lines( + config: &ProxyConfig, + status: &crate::ProxyStatus, + app_configs: &[AppProxyConfig], + takeovers: &crate::proxy::types::ProxyTakeoverStatus, +) -> Vec { + [ + (AppType::Claude, "Claude", takeovers.claude), + (AppType::Codex, "Codex", takeovers.codex), + (AppType::Gemini, "Gemini", takeovers.gemini), + ] + .into_iter() + .map(|(app, label, enabled)| { + let configured_port = app_configured_port(app_configs, &app).unwrap_or(config.listen_port); + let worker = status + .active_workers + .iter() + .find(|worker| worker.app_type == app.as_str()); + let state = if enabled { + crate::t!("enabled", "开启") + } else { + crate::t!("disabled", "关闭") + }; + + match worker { + Some(worker) => format!( + "- {label}: {state}, {} {}, {} {}:{}{}", + crate::t!("configured", "配置"), + configured_port, + crate::t!("running", "运行"), + worker.address, + worker.port, + worker + .pid + .map(|pid| format!(" pid={pid}")) + .unwrap_or_default() + ), + None => format!( + "- {label}: {state}, {} {}", + crate::t!("configured", "配置"), + configured_port + ), + } + }) + .collect() +} + +fn app_configured_port(app_configs: &[AppProxyConfig], app: &AppType) -> Option { + app_configs + .iter() + .find(|config| config.app_type == app.as_str()) + .map(|config| config.listen_port) +} + fn build_proxy_overview_lines( state: &AppState, - global: &crate::proxy::types::GlobalProxyConfig, config: &ProxyConfig, status: &crate::ProxyStatus, + app_configs: &[AppProxyConfig], takeovers: &crate::proxy::types::ProxyTakeoverStatus, ) -> Vec { let current_providers = AppType::all() @@ -258,11 +429,7 @@ fn build_proxy_overview_lines( } else { config.listen_address.as_str() }; - let listen_port = if status.running && status.port > 0 { - status.port - } else { - config.listen_port - }; + let route_lines = build_proxy_route_lines(config, status, app_configs, takeovers); let mut lines = vec![ format!( @@ -275,19 +442,28 @@ fn build_proxy_overview_lines( } ), format!( - "{}: {}", - crate::t!("Enabled", "启用状态"), - if global.proxy_enabled { - crate::t!("enabled", "开启") + "{}: Claude={}, Codex={}, Gemini={}", + crate::t!("Active routes", "活动路由"), + if takeovers.claude { + crate::t!("on", "开启") } else { - crate::t!("disabled", "关闭") + crate::t!("off", "关闭") + }, + if takeovers.codex { + crate::t!("on", "开启") + } else { + crate::t!("off", "关闭") + }, + if takeovers.gemini { + crate::t!("on", "开启") + } else { + crate::t!("off", "关闭") } ), format!( - "{}: {}:{}", - crate::t!("Listen", "监听"), - listen_host, - listen_port + "{}: {}", + crate::t!("Listen address", "监听地址"), + listen_host ), crate::t!( "Mode: local proxy (manual takeover and automatic failover follow app settings)", @@ -319,34 +495,13 @@ fn build_proxy_overview_lines( config.non_streaming_timeout ), String::new(), - crate::t!("Takeovers:", "接管状态:").to_string(), - format!( - "- Claude: {}", - if takeovers.claude { - crate::t!("takeover on", "已接管") - } else { - crate::t!("takeover off", "未接管") - } - ), - format!( - "- Codex: {}", - if takeovers.codex { - crate::t!("takeover on", "已接管") - } else { - crate::t!("takeover off", "未接管") - } - ), - format!( - "- Gemini: {}", - if takeovers.gemini { - crate::t!("takeover on", "已接管") - } else { - crate::t!("takeover off", "未接管") - } - ), + crate::t!("Proxy app routes:", "代理应用路由:").to_string(), + ]; + lines.extend(route_lines); + lines.extend([ String::new(), crate::t!("Auto failover:", "自动故障转移:").to_string(), - ]; + ]); lines.extend(build_auto_failover_status_lines(state)); lines.extend([ String::new(), @@ -367,7 +522,8 @@ fn build_proxy_overview_lines( .to_string(), format!( "- ANTHROPIC_BASE_URL=http://{}:{}", - listen_host, listen_port + listen_host, + app_configured_port(app_configs, &AppType::Claude).unwrap_or(config.listen_port) ), "- ANTHROPIC_AUTH_TOKEN=proxy-placeholder".to_string(), crate::t!( @@ -424,11 +580,11 @@ mod tests { use std::sync::{Arc, RwLock}; use crate::{ - proxy::types::{GlobalProxyConfig, ProxyStatus, ProxyTakeoverStatus}, + proxy::types::{ActiveWorker, ProxyStatus, ProxyTakeoverStatus}, Database, MultiAppConfig, ProxyService, }; - use super::build_proxy_overview_lines; + use super::{build_proxy_overview_lines, load_proxy_app_configs}; #[test] fn proxy_overview_lines_include_runtime_status_and_takeover_state() { @@ -436,19 +592,61 @@ mod tests { let state = crate::AppState { db: db.clone(), config: RwLock::new(MultiAppConfig::default()), - proxy_service: ProxyService::new(db), + proxy_service: ProxyService::new(db.clone()), }; - let global = GlobalProxyConfig { - proxy_enabled: true, - listen_address: "127.0.0.1".to_string(), - listen_port: 15721, - enable_logging: true, - }; - let config = crate::ProxyConfig::default(); + let mut config = crate::ProxyConfig::default(); + config.listen_port = 15721; + let runtime = tokio::runtime::Runtime::new().expect("create runtime"); + runtime.block_on(async { + let mut claude = db + .get_proxy_config_for_app("claude") + .await + .expect("load claude proxy config"); + claude.listen_port = 15721; + claude.enabled = true; + db.update_proxy_config_for_app(claude) + .await + .expect("save claude proxy config"); + + let mut codex = db + .get_proxy_config_for_app("codex") + .await + .expect("load codex proxy config"); + codex.listen_port = 15722; + codex.enabled = false; + db.update_proxy_config_for_app(codex) + .await + .expect("save codex proxy config"); + + let mut gemini = db + .get_proxy_config_for_app("gemini") + .await + .expect("load gemini proxy config"); + gemini.listen_port = 15723; + gemini.enabled = true; + db.update_proxy_config_for_app(gemini) + .await + .expect("save gemini proxy config"); + }); + let app_configs = load_proxy_app_configs(&state, &runtime).expect("load app proxy configs"); let status = ProxyStatus { running: true, address: "127.0.0.1".to_string(), port: 24567, + active_workers: vec![ + ActiveWorker { + app_type: "claude".to_string(), + address: "127.0.0.1".to_string(), + port: 15721, + pid: Some(1001), + }, + ActiveWorker { + app_type: "gemini".to_string(), + address: "127.0.0.1".to_string(), + port: 15723, + pid: Some(1003), + }, + ], ..Default::default() }; let takeover = ProxyTakeoverStatus { @@ -457,7 +655,7 @@ mod tests { gemini: true, }; - let lines = build_proxy_overview_lines(&state, &global, &config, &status, &takeover); + let lines = build_proxy_overview_lines(&state, &config, &status, &app_configs, &takeover); let output = lines.join("\n"); assert!( @@ -465,16 +663,38 @@ mod tests { "proxy show output should include foreground runtime status" ); assert!( - output.contains("127.0.0.1:24567"), - "proxy show output should prefer the active runtime listen address when the proxy is running" + output.contains("Listen address: 127.0.0.1") + || output.contains("监听地址: 127.0.0.1"), + "proxy show output should show the active runtime listen address separately from app ports" + ); + assert!( + output.contains("Claude: enabled, configured 15721, running 127.0.0.1:15721 pid=1001") + || output.contains("Claude: 开启, 配置 15721, 运行 127.0.0.1:15721 pid=1001"), + "proxy show output should include Claude configured and runtime ports" ); assert!( - output.contains("Claude: takeover on") || output.contains("Claude: 已接管"), - "proxy show output should include Claude manual takeover state" + output.contains("Codex: disabled, configured 15722") + || output.contains("Codex: 关闭, 配置 15722"), + "proxy show output should include Codex configured port even when stopped" ); assert!( - output.contains("Gemini: takeover on") || output.contains("Gemini: 已接管"), - "proxy show output should include Gemini manual takeover state" + output.contains("Gemini: enabled, configured 15723, running 127.0.0.1:15723 pid=1003") + || output.contains("Gemini: 开启, 配置 15723, 运行 127.0.0.1:15723 pid=1003"), + "proxy show output should include Gemini configured and runtime ports" + ); + assert!( + output.contains("Active routes: Claude=on, Codex=off, Gemini=on") + || output.contains("活动路由: Claude=开启, Codex=关闭, Gemini=开启"), + "proxy show output should summarize app-specific active routes" + ); + assert!( + !output.contains("Listen: 127.0.0.1:24567") + && !output.contains("监听: 127.0.0.1:24567"), + "proxy show output should not collapse per-app ports into one listen line" + ); + assert!( + !output.contains("Enabled:") && !output.contains("启用状态:"), + "proxy show output should not present proxy state as a single global enabled flag" ); } @@ -496,19 +716,15 @@ mod tests { let state = crate::AppState { db: db.clone(), config: RwLock::new(MultiAppConfig::default()), - proxy_service: ProxyService::new(db), - }; - let global = GlobalProxyConfig { - proxy_enabled: true, - listen_address: "127.0.0.1".to_string(), - listen_port: 15721, - enable_logging: true, + proxy_service: ProxyService::new(db.clone()), }; let config = crate::ProxyConfig::default(); let status = ProxyStatus::default(); let takeover = ProxyTakeoverStatus::default(); + let runtime = tokio::runtime::Runtime::new().expect("create runtime"); + let app_configs = load_proxy_app_configs(&state, &runtime).expect("load app proxy configs"); - let lines = build_proxy_overview_lines(&state, &global, &config, &status, &takeover); + let lines = build_proxy_overview_lines(&state, &config, &status, &app_configs, &takeover); let output = lines.join("\n"); assert!( diff --git a/src-tauri/src/cli/mod.rs b/src-tauri/src/cli/mod.rs index c66572bb..2de4dd3f 100644 --- a/src-tauri/src/cli/mod.rs +++ b/src-tauri/src/cli/mod.rs @@ -70,6 +70,11 @@ pub enum Commands { #[command(subcommand)] Start(commands::start::StartCommand), + /// Manage the cc-switch supervisor daemon (start/stop/status/logs) + #[cfg(unix)] + #[command(subcommand)] + Daemon(commands::daemon::DaemonCommand), + /// Manage environment variables and local CLI tool checks #[command(subcommand)] Env(commands::env::EnvCommand), @@ -192,6 +197,27 @@ mod tests { } } + #[test] + fn parses_proxy_config_listen_port_subcommand() { + let cli = Cli::parse_from([ + "cc-switch", + "--app", + "codex", + "proxy", + "config", + "--listen-port", + "15722", + ]); + + assert_eq!(cli.app, Some(super::AppType::Codex)); + match cli.command { + Some(Commands::Proxy(super::commands::proxy::ProxyCommand::Config { listen_port })) => { + assert_eq!(listen_port, Some(15722)); + } + _ => panic!("expected proxy config command"), + } + } + #[test] fn parses_failover_enable_subcommand() { let cli = Cli::parse_from(["cc-switch", "failover", "enable"]); diff --git a/src-tauri/src/cli/tui/app/app_state.rs b/src-tauri/src/cli/tui/app/app_state.rs index 5f73008c..321a9b3c 100644 --- a/src-tauri/src/cli/tui/app/app_state.rs +++ b/src-tauri/src/cli/tui/app/app_state.rs @@ -213,10 +213,6 @@ pub enum Action { SetOpenClawConfigDir { path: Option, }, - SetProxyTakeover { - app_type: AppType, - enabled: bool, - }, SetManagedProxyForCurrentApp { app_type: AppType, enabled: bool, diff --git a/src-tauri/src/cli/tui/app/content_config.rs b/src-tauri/src/cli/tui/app/content_config.rs index b6574859..ecc78a16 100644 --- a/src-tauri/src/cli/tui/app/content_config.rs +++ b/src-tauri/src/cli/tui/app/content_config.rs @@ -908,32 +908,25 @@ impl App { } else { crate::t!("stopped", "未运行") }; - let current_takeover = data.proxy.takeover_enabled_for(&self.app_type); - let current_app_routed = data.proxy.routes_current_app_through_proxy(&self.app_type); - let proxy_action_available = current_app_routed.is_some_and(|current_app_routed| { - !data.proxy.running || data.proxy.managed_runtime || current_app_routed + let current_route = data.proxy.routes_current_app_through_proxy(&self.app_type); + let proxy_action_available = current_route.is_some_and(|current_route| { + !data.proxy.running || data.proxy.managed_runtime || current_route }); - let takeover_state = match current_takeover { - Some(true) => crate::t!("active", "已接管"), - Some(false) => crate::t!("inactive", "未接管"), + let route_state = match current_route { + Some(true) => crate::t!("enabled", "开启"), + Some(false) => crate::t!("disabled", "关闭"), None => crate::t!("not supported", "不支持"), }; - let toggle_action = match current_app_routed { - Some(true) if proxy_action_available => Some(TextViewAction::ProxyToggleTakeover { - app_type: self.app_type.clone(), - enabled: false, - }), - Some(false) if proxy_action_available => Some(TextViewAction::ProxyToggleTakeover { - app_type: self.app_type.clone(), - enabled: true, - }), - _ => None, + let toggle_action = if current_route.is_some() && proxy_action_available { + Some(TextViewAction::ProxyToggleManagedRoute) + } else { + None }; let mut lines = vec![ crate::t!( - "Manual takeover status for the foreground proxy.", - "前台代理的手动接管状态。" + "Managed proxy routing for the current app.", + "当前应用的托管代理路由状态。" ) .to_string(), String::new(), @@ -947,19 +940,15 @@ impl App { crate::t!("Current provider", "当前供应商"), current_provider ), + format!("{}: {}", crate::t!("Runtime", "运行态"), runtime_state), format!( "{}: {}", - crate::t!("Foreground runtime", "前台运行态"), - runtime_state - ), - format!( - "{}: {}", - crate::t!("Current app takeover", "当前应用接管"), - takeover_state + crate::t!("Current app route", "当前应用路由"), + route_state ), crate::t!( - "Manual takeover only. Automatic failover is disabled.", - "仅支持手动接管,不提供自动故障转移。" + "Proxy routes are started and stopped by the cc-switch daemon.", + "代理路由由 cc-switch daemon 启停。" ) .to_string(), ]; @@ -993,7 +982,7 @@ impl App { } lines.push(String::new()); - lines.push(match current_app_routed { + lines.push(match current_route { Some(true) => crate::t!( "Press T to restore the current app to its live config.", "按 T 恢复当前应用的 live 配置。" @@ -1013,8 +1002,8 @@ impl App { ) .to_string(), None => crate::t!( - "This app does not support proxy takeover in the TUI.", - "这个应用暂不支持在 TUI 中进行代理接管。" + "This app does not support managed proxy routing in the TUI.", + "这个应用暂不支持在 TUI 中使用托管代理路由。" ) .to_string(), }); diff --git a/src-tauri/src/cli/tui/app/overlay_handlers/views.rs b/src-tauri/src/cli/tui/app/overlay_handlers/views.rs index 354671a8..449ad40d 100644 --- a/src-tauri/src/cli/tui/app/overlay_handlers/views.rs +++ b/src-tauri/src/cli/tui/app/overlay_handlers/views.rs @@ -121,7 +121,7 @@ impl App { let has_action = matches!( &self.overlay, Overlay::TextView(TextViewState { - action: Some(TextViewAction::ProxyToggleTakeover { .. }), + action: Some(TextViewAction::ProxyToggleManagedRoute), .. }) ); diff --git a/src-tauri/src/cli/tui/app/types.rs b/src-tauri/src/cli/tui/app/types.rs index 3ba7f305..0116f115 100644 --- a/src-tauri/src/cli/tui/app/types.rs +++ b/src-tauri/src/cli/tui/app/types.rs @@ -134,7 +134,7 @@ pub struct TextViewState { #[derive(Debug, Clone)] pub enum TextViewAction { - ProxyToggleTakeover { app_type: AppType, enabled: bool }, + ProxyToggleManagedRoute, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -143,15 +143,6 @@ pub enum CommonSnippetViewSource { ProviderForm, } -impl TextViewAction { - pub fn key_label(&self) -> &'static str { - match self { - TextViewAction::ProxyToggleTakeover { enabled: true, .. } => texts::tui_key_takeover(), - TextViewAction::ProxyToggleTakeover { enabled: false, .. } => texts::tui_key_restore(), - } - } -} - #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum LoadingKind { Generic, diff --git a/src-tauri/src/cli/tui/data.rs b/src-tauri/src/cli/tui/data.rs index 9e7bbc22..fcc6c0a5 100644 --- a/src-tauri/src/cli/tui/data.rs +++ b/src-tauri/src/cli/tui/data.rs @@ -1025,11 +1025,13 @@ fn load_proxy_snapshot(app_type: &AppType) -> Result { } else { runtime_status.address.clone() }; - let listen_port = if runtime_status.port == 0 { - config.listen_port - } else { - runtime_status.port - }; + let listen_port = runtime_status + .active_workers + .iter() + .find(|worker| worker.app_type == current_app) + .map(|worker| worker.port) + .or_else(|| (runtime_status.port != 0).then_some(runtime_status.port)) + .unwrap_or(app_proxy_config.listen_port); let default_cost_multiplier = state .db .get_default_cost_multiplier(app_type.as_str()) @@ -1041,14 +1043,15 @@ fn load_proxy_snapshot(app_type: &AppType) -> Result { Ok(ProxySnapshot { enabled: config.proxy_enabled, running: runtime_status.running, - managed_runtime: runtime_status.managed_session_token.is_some(), + managed_runtime: runtime_status.managed_session_token.is_some() + || !runtime_status.active_workers.is_empty(), auto_failover_enabled: app_proxy_config.auto_failover_enabled, claude_takeover: takeover.claude, codex_takeover: takeover.codex, gemini_takeover: takeover.gemini, default_cost_multiplier, configured_listen_address: config.listen_address.clone(), - configured_listen_port: config.listen_port, + configured_listen_port: app_proxy_config.listen_port, listen_address, listen_port, uptime_seconds: runtime_status.uptime_seconds, diff --git a/src-tauri/src/cli/tui/runtime_actions/mod.rs b/src-tauri/src/cli/tui/runtime_actions/mod.rs index 363ea7f6..d7d2dffb 100644 --- a/src-tauri/src/cli/tui/runtime_actions/mod.rs +++ b/src-tauri/src/cli/tui/runtime_actions/mod.rs @@ -340,9 +340,6 @@ pub(crate) fn handle_action( settings::enable_proxy_and_auto_failover(&mut ctx, app_type) } Action::SetOpenClawConfigDir { path } => settings::set_openclaw_config_dir(&mut ctx, path), - Action::SetProxyTakeover { app_type, enabled } => { - settings::set_proxy_takeover(&mut ctx, app_type, enabled) - } Action::SetManagedProxyForCurrentApp { app_type, enabled } => queue_managed_proxy_action( ctx.app, ctx.proxy_req_tx, diff --git a/src-tauri/src/cli/tui/runtime_actions/settings.rs b/src-tauri/src/cli/tui/runtime_actions/settings.rs index be0a5253..5c1cfae8 100644 --- a/src-tauri/src/cli/tui/runtime_actions/settings.rs +++ b/src-tauri/src/cli/tui/runtime_actions/settings.rs @@ -5,8 +5,7 @@ use crate::cli::failover_policy::{ use crate::cli::i18n::texts; use crate::error::AppError; -use super::super::data::{load_proxy_config, load_state, UiData}; -use super::helpers::open_proxy_help_overlay_with; +use super::super::data::{load_state, UiData}; use super::RuntimeActionContext; pub(super) fn set_proxy_enabled( @@ -18,17 +17,18 @@ pub(super) fn set_proxy_enabled( .enable_all() .build() .map_err(|e| AppError::Message(format!("failed to create async runtime: {e}")))?; - let update = runtime.block_on(state.proxy_service.set_global_enabled(enabled))?; - let cleared_failover = update.cleared_auto_failover; + runtime + .block_on( + state + .proxy_service + .set_managed_session_for_app(ctx.app.app_type.as_str(), enabled), + ) + .map_err(AppError::Message)?; + *ctx.data = UiData::load(&ctx.app.app_type)?; ctx.app.push_toast( if enabled { crate::t!("Local proxy enabled.", "本地代理已开启。") - } else if cleared_failover > 0 { - crate::t!( - "Local proxy disabled. Automatic failover has been cleared.", - "本地代理已关闭,自动故障转移已清除。" - ) } else { crate::t!("Local proxy disabled.", "本地代理已关闭。") }, @@ -50,9 +50,36 @@ pub(super) fn set_proxy_listen_port( ctx: &mut RuntimeActionContext<'_>, port: u16, ) -> Result<(), AppError> { - update_proxy_config(ctx, |config| { - config.listen_port = port; - }) + let state = load_state()?; + let runtime = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .map_err(|e| AppError::Message(format!("failed to create async runtime: {e}")))?; + let status = runtime.block_on(state.proxy_service.get_status()); + let app_running = status + .active_workers + .iter() + .any(|worker| worker.app_type == ctx.app.app_type.as_str()); + if app_running { + *ctx.data = UiData::load(&ctx.app.app_type)?; + ctx.app.push_toast( + texts::tui_toast_proxy_settings_stop_before_edit(), + super::super::app::ToastKind::Info, + ); + return Ok(()); + } + + let mut app_config = + runtime.block_on(state.db.get_proxy_config_for_app(ctx.app.app_type.as_str()))?; + app_config.listen_port = port; + runtime.block_on(state.db.update_proxy_config_for_app(app_config))?; + + *ctx.data = UiData::load(&ctx.app.app_type)?; + ctx.app.push_toast( + texts::tui_toast_proxy_settings_saved(), + super::super::app::ToastKind::Success, + ); + Ok(()) } pub(super) fn set_proxy_auto_failover( @@ -169,43 +196,6 @@ pub(super) fn set_openclaw_config_dir( Ok(()) } -pub(super) fn set_proxy_takeover( - ctx: &mut RuntimeActionContext<'_>, - app_type: AppType, - enabled: bool, -) -> Result<(), AppError> { - let state = load_state()?; - let runtime = tokio::runtime::Builder::new_current_thread() - .enable_all() - .build() - .map_err(|e| AppError::Message(format!("failed to create async runtime: {e}")))?; - - let status = runtime.block_on(state.proxy_service.get_status()); - if enabled && !status.running { - ctx.app.push_toast( - texts::tui_toast_proxy_takeover_requires_running(), - super::super::app::ToastKind::Warning, - ); - return Ok(()); - } - - runtime - .block_on( - state - .proxy_service - .set_takeover_for_app(app_type.as_str(), enabled), - ) - .map_err(AppError::Message)?; - - *ctx.data = UiData::load(&ctx.app.app_type)?; - open_proxy_help_overlay_with(ctx.app, ctx.data, load_proxy_config)?; - ctx.app.push_toast( - texts::tui_toast_proxy_takeover_updated(app_type.as_str(), enabled), - super::super::app::ToastKind::Success, - ); - Ok(()) -} - pub(super) fn set_visible_apps( ctx: &mut RuntimeActionContext<'_>, apps: crate::settings::VisibleApps, diff --git a/src-tauri/src/daemon/ipc/client.rs b/src-tauri/src/daemon/ipc/client.rs new file mode 100644 index 00000000..52f25ca4 --- /dev/null +++ b/src-tauri/src/daemon/ipc/client.rs @@ -0,0 +1,191 @@ +//! Synchronous client used by the foreground TUI/CLI to talk to the daemon. +//! +//! - One TCP-style request/response per connection. +//! - Auto-spawns the daemon (`cc-switch daemon start --detach`) on +//! `ECONNREFUSED` / missing socket; subsequent retries wait for the socket +//! to appear. + +use std::io::{BufRead, BufReader, Write}; +use std::os::unix::net::UnixStream; +use std::path::{Path, PathBuf}; +use std::process::{Command, Stdio}; +use std::time::{Duration, Instant}; + +use super::protocol::{encode_request, Request, Response}; + +const CONNECT_TIMEOUT: Duration = Duration::from_secs(5); +const READ_TIMEOUT: Duration = Duration::from_secs(15); + +#[derive(Debug)] +pub enum ClientError { + NoDaemon(String), + Io(std::io::Error), + Protocol(String), +} + +impl std::fmt::Display for ClientError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::NoDaemon(msg) => write!(f, "{msg}"), + Self::Io(e) => write!(f, "{e}"), + Self::Protocol(msg) => write!(f, "protocol error: {msg}"), + } + } +} + +impl std::error::Error for ClientError {} + +/// Connect to the daemon's control socket. If the socket is missing or refuses +/// connections, fork-and-exec `cc-switch daemon start --detach` (or whatever +/// `binary_resolver` returns) and retry until the socket comes up or we time +/// out. +pub fn connect_or_spawn( + socket_path: &Path, + binary_resolver: F, +) -> Result +where + F: FnOnce() -> Result, +{ + if let Ok(stream) = UnixStream::connect(socket_path) { + return Ok(stream); + } + + let bin = binary_resolver()?; + let mut cmd = Command::new(&bin); + cmd.arg("daemon") + .arg("start") + .arg("--detach") + .stdin(Stdio::null()) + .stdout(Stdio::null()) + .stderr(Stdio::null()); + cmd.spawn() + .map_err(|err| ClientError::NoDaemon(format!("spawn daemon failed: {err}")))?; + + let deadline = Instant::now() + CONNECT_TIMEOUT; + while Instant::now() < deadline { + if let Ok(stream) = UnixStream::connect(socket_path) { + return Ok(stream); + } + std::thread::sleep(Duration::from_millis(75)); + } + Err(ClientError::NoDaemon(format!( + "daemon socket {} did not come up within {}s", + socket_path.display(), + CONNECT_TIMEOUT.as_secs() + ))) +} + +/// Connect-only (no auto-spawn). Used when the caller has already ensured the +/// daemon is running (e.g. from inside the worker startup path). +pub fn connect(socket_path: &Path) -> Result { + UnixStream::connect(socket_path).map_err(ClientError::Io) +} + +/// Send one request and read one response on `stream`. +pub fn exchange(stream: &mut UnixStream, request: &Request) -> Result { + stream + .set_read_timeout(Some(READ_TIMEOUT)) + .map_err(ClientError::Io)?; + stream + .set_write_timeout(Some(READ_TIMEOUT)) + .map_err(ClientError::Io)?; + + let payload = encode_request(request) + .map_err(|err| ClientError::Protocol(format!("encode request: {err}")))?; + stream + .write_all(payload.as_bytes()) + .map_err(ClientError::Io)?; + stream.write_all(b"\n").map_err(ClientError::Io)?; + stream.flush().map_err(ClientError::Io)?; + // Half-close the write side so the server's read_line returns. The Unix + // domain socket here is bidirectional, so we use shutdown(Write) on the fd + // via the stream's shutdown method. + let _ = stream.shutdown(std::net::Shutdown::Write); + + let mut reader = BufReader::new(stream); + let mut line = String::new(); + let n = reader.read_line(&mut line).map_err(ClientError::Io)?; + if n == 0 { + return Err(ClientError::Protocol( + "daemon closed connection without response".into(), + )); + } + serde_json::from_str(line.trim()) + .map_err(|err| ClientError::Protocol(format!("decode response: {err}"))) +} + +/// Convenience: open a socket, send one request, return the response. +pub fn round_trip(socket_path: &Path, request: &Request) -> Result { + let mut stream = connect(socket_path)?; + exchange(&mut stream, request) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::daemon::ipc::protocol::TakeoverFlags; + use std::sync::Arc; + use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader as TokioBufReader}; + use tokio::net::UnixListener; + + /// Tiny tokio-based echo server for client-side tests. Replies once with + /// the stub response, then closes. + fn spawn_test_server(socket: PathBuf, response: Response) -> tokio::task::JoinHandle<()> { + tokio::spawn(async move { + if socket.exists() { + let _ = std::fs::remove_file(&socket); + } + let listener = UnixListener::bind(&socket).expect("bind test listener"); + let (stream, _) = listener.accept().await.expect("accept test conn"); + let (read_half, mut write_half) = stream.into_split(); + let mut reader = TokioBufReader::new(read_half); + let mut buf = String::new(); + reader.read_line(&mut buf).await.expect("read request"); + let body = serde_json::to_string(&response).expect("encode test response"); + write_half + .write_all(body.as_bytes()) + .await + .expect("write resp"); + write_half.write_all(b"\n").await.expect("write nl"); + write_half.flush().await.expect("flush"); + }) + } + + #[tokio::test] + async fn round_trip_returns_decoded_response() { + let tmp = tempfile::tempdir().expect("tmp"); + let sock = tmp.path().join("daemon.sock"); + let stub = Response::Status { + running: true, + address: "127.0.0.1".into(), + port: 1234, + worker_pid: Some(99), + takeovers: TakeoverFlags::default(), + restart_count: 0, + last_restart_at: None, + workers: vec![], + }; + let server = spawn_test_server(sock.clone(), stub.clone()); + + // Client API is synchronous; run on a blocking thread so we don't + // starve the runtime that's hosting the server. + let client_sock = sock.clone(); + let result = tokio::task::spawn_blocking(move || { + // Brief wait for the listener to bind. + for _ in 0..50 { + if client_sock.exists() { + break; + } + std::thread::sleep(Duration::from_millis(20)); + } + round_trip(&client_sock, &Request::Status) + }) + .await + .expect("blocking task") + .expect("round trip"); + + assert_eq!(result, stub); + let _ = Arc::new(()); // unused; silence lint about unused import on some configs + server.await.expect("server task"); + } +} diff --git a/src-tauri/src/daemon/ipc/mod.rs b/src-tauri/src/daemon/ipc/mod.rs new file mode 100644 index 00000000..7f8e5614 --- /dev/null +++ b/src-tauri/src/daemon/ipc/mod.rs @@ -0,0 +1,3 @@ +pub mod client; +pub mod protocol; +pub mod server; diff --git a/src-tauri/src/daemon/ipc/protocol.rs b/src-tauri/src/daemon/ipc/protocol.rs new file mode 100644 index 00000000..38bff86e --- /dev/null +++ b/src-tauri/src/daemon/ipc/protocol.rs @@ -0,0 +1,204 @@ +//! Wire protocol for the daemon control socket. +//! +//! Framing: one JSON object per line (newline-delimited). Each connection is +//! request/response style — the client writes one Request line, the server +//! writes one Response line, and either side may close. + +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum Request { + /// Foreground asks the daemon to bring the named app's worker up if it + /// isn't already, and enable proxy takeover for that app. + EnsureWorker { app_type: String }, + /// Foreground asks the daemon to disable takeover for the named app. The + /// daemon stops that app's worker and may exit if no workers remain. + DropTakeover { app_type: String }, + /// Foreground asks for current daemon + worker state. + Status, + /// Worker → daemon, sent once on worker startup. Identifies the bound + /// listener and the session token so the daemon can publish the + /// `proxy_runtime_session` row on the worker's behalf. + WorkerHello { + pid: u32, + address: String, + port: u16, + session_token: String, + }, + /// Foreground asks the daemon to set the global desired proxy switch and + /// align worker state with it. On `enabled: false`, the daemon clears all + /// active per-app takeovers and stops all workers. On `enabled: true`, the + /// daemon writes the desired switch only; app routes start through + /// `EnsureWorker`. + SetGlobalEnabled { enabled: bool }, + /// Force the daemon to stop the worker (if any) and exit. + Shutdown, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum Response { + Ok, + Worker { + address: String, + port: u16, + session_token: String, + pid: u32, + }, + Status { + running: bool, + address: String, + port: u16, + worker_pid: Option, + takeovers: TakeoverFlags, + restart_count: u32, + last_restart_at: Option, + #[serde(default)] + workers: Vec, + }, + Error { + message: String, + }, +} + +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)] +pub struct TakeoverFlags { + pub claude: bool, + pub codex: bool, + pub gemini: bool, +} + +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)] +pub struct WorkerState { + pub app_type: String, + pub running: bool, + pub address: String, + pub port: u16, + pub pid: Option, +} + +/// Encode a request as a single JSON line (no trailing newline). +pub fn encode_request(req: &Request) -> Result { + serde_json::to_string(req) +} + +/// Encode a response as a single JSON line (no trailing newline). +pub fn encode_response(resp: &Response) -> Result { + serde_json::to_string(resp) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn roundtrip_request(req: Request) { + let line = encode_request(&req).expect("encode"); + let decoded: Request = serde_json::from_str(&line).expect("decode"); + assert_eq!(decoded, req); + } + + fn roundtrip_response(resp: Response) { + let line = encode_response(&resp).expect("encode"); + let decoded: Response = serde_json::from_str(&line).expect("decode"); + assert_eq!(decoded, resp); + } + + #[test] + fn ensure_worker_roundtrips() { + roundtrip_request(Request::EnsureWorker { + app_type: "claude".to_string(), + }); + } + + #[test] + fn drop_takeover_roundtrips() { + roundtrip_request(Request::DropTakeover { + app_type: "codex".to_string(), + }); + } + + #[test] + fn status_request_roundtrips() { + roundtrip_request(Request::Status); + } + + #[test] + fn worker_hello_roundtrips() { + roundtrip_request(Request::WorkerHello { + pid: 4242, + address: "127.0.0.1".to_string(), + port: 15721, + session_token: "tok".to_string(), + }); + } + + #[test] + fn shutdown_request_roundtrips() { + roundtrip_request(Request::Shutdown); + } + + #[test] + fn set_global_enabled_roundtrips_both_polarities() { + roundtrip_request(Request::SetGlobalEnabled { enabled: true }); + roundtrip_request(Request::SetGlobalEnabled { enabled: false }); + } + + #[test] + fn ok_response_roundtrips() { + roundtrip_response(Response::Ok); + } + + #[test] + fn worker_response_roundtrips() { + roundtrip_response(Response::Worker { + address: "127.0.0.1".to_string(), + port: 15721, + session_token: "tok".to_string(), + pid: 9999, + }); + } + + #[test] + fn status_response_roundtrips() { + roundtrip_response(Response::Status { + running: true, + address: "127.0.0.1".to_string(), + port: 15721, + worker_pid: Some(9999), + takeovers: TakeoverFlags { + claude: true, + codex: false, + gemini: true, + }, + restart_count: 2, + last_restart_at: Some("2026-05-15T12:34:56Z".to_string()), + workers: vec![WorkerState { + app_type: "claude".to_string(), + running: true, + address: "127.0.0.1".to_string(), + port: 15721, + pid: Some(9999), + }], + }); + } + + #[test] + fn error_response_roundtrips() { + roundtrip_response(Response::Error { + message: "boom".to_string(), + }); + } + + #[test] + fn encoded_lines_have_no_embedded_newlines() { + let line = encode_request(&Request::WorkerHello { + pid: 1, + address: "a".into(), + port: 1, + session_token: "t".into(), + }) + .unwrap(); + assert!(!line.contains('\n')); + } +} diff --git a/src-tauri/src/daemon/ipc/server.rs b/src-tauri/src/daemon/ipc/server.rs new file mode 100644 index 00000000..9c680bd6 --- /dev/null +++ b/src-tauri/src/daemon/ipc/server.rs @@ -0,0 +1,231 @@ +//! Async accept loop for the daemon control socket. +//! +//! Each accepted connection reads a single request line, dispatches it via the +//! provided `Handler`, and writes back a single response line. Connections are +//! ephemeral — the foreground client connects, exchanges one request/response, +//! and disconnects. + +use std::path::Path; +use std::time::Duration; + +use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader}; +use tokio::net::{UnixListener, UnixStream}; +use tokio::task::JoinSet; + +use super::protocol::{encode_response, Request, Response}; + +/// Maximum time we wait for in-flight connections to drain after shutdown +/// is signalled. The handlers that trigger self-shutdown (drop_takeover with +/// no remaining takeovers, set_global_enabled(false)) already do their work +/// before returning Response::Ok, so this drain just covers flushing those +/// final writes onto the socket. +const DRAIN_TIMEOUT: Duration = Duration::from_secs(2); + +/// Server-side handler. The supervisor implements this to translate Requests +/// into actions on its internal state (worker child, takeover ops, DB writes). +pub trait Handler: Send + Sync + 'static { + fn handle(&self, request: Request) -> impl std::future::Future + Send; +} + +/// Bind a Unix domain socket at `path`, removing any stale entry first. +pub fn bind(path: &Path) -> std::io::Result { + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent)?; + } + // Remove any leftover socket from a previous (now-dead) daemon. We only + // reach this code path after pidfile acquisition has confirmed no other + // daemon owns the lock, so this is safe. + if path.exists() { + let _ = std::fs::remove_file(path); + } + UnixListener::bind(path) +} + +/// Run the accept loop until `shutdown` resolves. +/// +/// Each connection is handled on its own task tracked in a `JoinSet`. When +/// shutdown fires we stop accepting new connections but drain the in-flight +/// ones with a short deadline so the response that triggered the shutdown +/// (e.g. `drop_takeover` of the last takeover) actually reaches the client +/// before the daemon's tokio runtime drops the task. +pub async fn run(listener: UnixListener, handler: std::sync::Arc, shutdown: F) +where + H: Handler, + F: std::future::Future + Send, +{ + tokio::pin!(shutdown); + let mut tasks: JoinSet<()> = JoinSet::new(); + loop { + tokio::select! { + _ = &mut shutdown => { + log::debug!("daemon ipc: shutdown signalled, draining in-flight connections"); + drain_in_flight(&mut tasks).await; + return; + } + accept = listener.accept() => { + match accept { + Ok((stream, _)) => { + let handler = handler.clone(); + tasks.spawn(async move { + if let Err(err) = serve_connection(stream, handler).await { + log::warn!("daemon ipc: connection failed: {err}"); + } + }); + } + Err(err) => { + log::warn!("daemon ipc: accept failed: {err}"); + } + } + } + // Reap finished connection tasks so the JoinSet doesn't grow + // unboundedly on a long-running daemon. + Some(_) = tasks.join_next(), if !tasks.is_empty() => {} + } + } +} + +async fn drain_in_flight(tasks: &mut JoinSet<()>) { + if tasks.is_empty() { + return; + } + let drain = async { while tasks.join_next().await.is_some() {} }; + if tokio::time::timeout(DRAIN_TIMEOUT, drain).await.is_err() { + log::warn!( + "daemon ipc: drain deadline ({}s) elapsed with {} in-flight connection(s)", + DRAIN_TIMEOUT.as_secs(), + tasks.len() + ); + tasks.abort_all(); + } +} + +async fn serve_connection(stream: UnixStream, handler: std::sync::Arc) -> std::io::Result<()> +where + H: Handler, +{ + let (read_half, mut write_half) = stream.into_split(); + let mut reader = BufReader::new(read_half); + let mut line = String::new(); + let n = reader.read_line(&mut line).await?; + if n == 0 { + return Ok(()); + } + + let trimmed = line.trim(); + let request = match serde_json::from_str::(trimmed) { + Ok(req) => req, + Err(err) => { + let resp = Response::Error { + message: format!("invalid request: {err}"), + }; + return write_response(&mut write_half, &resp).await; + } + }; + + let response = handler.handle(request).await; + write_response(&mut write_half, &response).await +} + +async fn write_response( + write_half: &mut tokio::net::unix::OwnedWriteHalf, + response: &Response, +) -> std::io::Result<()> { + let payload = encode_response(response).map_err(std::io::Error::other)?; + write_half.write_all(payload.as_bytes()).await?; + write_half.write_all(b"\n").await?; + write_half.flush().await?; + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::Arc; + use tokio::sync::oneshot; + + struct Echo; + + impl Handler for Echo { + async fn handle(&self, request: Request) -> Response { + match request { + Request::Status => Response::Ok, + Request::Shutdown => Response::Ok, + Request::EnsureWorker { app_type } => Response::Worker { + address: format!("addr-for-{app_type}"), + port: 1, + session_token: "tok".into(), + pid: 42, + }, + _ => Response::Error { + message: "unsupported in echo".into(), + }, + } + } + } + + #[tokio::test] + async fn server_handles_request_response_round_trip() { + let tmp = tempfile::tempdir().expect("tmp"); + let sock = tmp.path().join("daemon.sock"); + let listener = bind(&sock).expect("bind"); + + let (shutdown_tx, shutdown_rx) = oneshot::channel(); + let server = tokio::spawn(async move { + run(listener, Arc::new(Echo), async move { + let _ = shutdown_rx.await; + }) + .await; + }); + + // Drive a single request through the connection to confirm the loop + // dispatches and writes back. + let mut stream = UnixStream::connect(&sock).await.expect("connect"); + let req = serde_json::to_string(&Request::EnsureWorker { + app_type: "claude".into(), + }) + .unwrap(); + stream + .write_all(format!("{req}\n").as_bytes()) + .await + .unwrap(); + stream.shutdown().await.unwrap(); + let mut buf = String::new(); + let mut reader = BufReader::new(stream); + reader.read_line(&mut buf).await.expect("read response"); + let parsed: Response = serde_json::from_str(buf.trim()).expect("parse"); + match parsed { + Response::Worker { address, .. } => assert_eq!(address, "addr-for-claude"), + other => panic!("unexpected: {other:?}"), + } + + let _ = shutdown_tx.send(()); + server.await.expect("server task join"); + } + + #[tokio::test] + async fn server_returns_error_response_for_invalid_json() { + let tmp = tempfile::tempdir().expect("tmp"); + let sock = tmp.path().join("daemon.sock"); + let listener = bind(&sock).expect("bind"); + + let (shutdown_tx, shutdown_rx) = oneshot::channel(); + let server = tokio::spawn(async move { + run(listener, Arc::new(Echo), async move { + let _ = shutdown_rx.await; + }) + .await; + }); + + let mut stream = UnixStream::connect(&sock).await.expect("connect"); + stream.write_all(b"not-json\n").await.unwrap(); + stream.shutdown().await.unwrap(); + let mut buf = String::new(); + let mut reader = BufReader::new(stream); + reader.read_line(&mut buf).await.expect("read response"); + let parsed: Response = serde_json::from_str(buf.trim()).expect("parse"); + assert!(matches!(parsed, Response::Error { .. })); + + let _ = shutdown_tx.send(()); + server.await.expect("server task join"); + } +} diff --git a/src-tauri/src/daemon/logging.rs b/src-tauri/src/daemon/logging.rs new file mode 100644 index 00000000..a2b90a81 --- /dev/null +++ b/src-tauri/src/daemon/logging.rs @@ -0,0 +1,213 @@ +//! Daemon log file appender. +//! +//! Adds a file destination to the existing `log` facade. The file is opened in +//! append mode and rotated when it grows past `MAX_LOG_BYTES`, keeping a single +//! `.1` backup. Concurrent writers serialize through a `Mutex`. +//! +//! Initialization is idempotent: calling `install()` more than once installs +//! the logger only once. + +use std::fs::{File, OpenOptions}; +use std::io::Write; +use std::path::{Path, PathBuf}; +use std::sync::{Mutex, OnceLock}; + +use chrono::Utc; +use log::{Level, LevelFilter, Log, Metadata, Record}; + +const MAX_LOG_BYTES: u64 = 5 * 1024 * 1024; + +static INSTALLED: OnceLock<()> = OnceLock::new(); + +struct DaemonLogger { + path: PathBuf, + file: Mutex, + level: LevelFilter, +} + +impl DaemonLogger { + fn open(path: &Path, level: LevelFilter) -> std::io::Result { + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent)?; + } + let file = OpenOptions::new().create(true).append(true).open(path)?; + Ok(Self { + path: path.to_path_buf(), + file: Mutex::new(file), + level, + }) + } +} + +impl Log for DaemonLogger { + fn enabled(&self, metadata: &Metadata) -> bool { + metadata.level() <= self.level + } + + fn log(&self, record: &Record) { + if !self.enabled(record.metadata()) { + return; + } + + let now = Utc::now().to_rfc3339(); + let line = format!( + "{} {:5} {} {}\n", + now, + level_label(record.level()), + record.target(), + record.args() + ); + + let mut file = match self.file.lock() { + Ok(f) => f, + Err(poisoned) => poisoned.into_inner(), + }; + + // Rotate before writing if the file is already over the cap; this way + // the new line lands in the fresh log file rather than the rolled-over + // backup. + if let Ok(metadata) = file.metadata() { + if metadata.len() > MAX_LOG_BYTES { + drop(file); + let _ = self.rotate(); + file = match self.file.lock() { + Ok(f) => f, + Err(poisoned) => poisoned.into_inner(), + }; + } + } + + let _ = file.write_all(line.as_bytes()); + } + + fn flush(&self) { + if let Ok(mut file) = self.file.lock() { + let _ = file.flush(); + } + } +} + +impl DaemonLogger { + fn rotate(&self) -> std::io::Result<()> { + let backup = self.path.with_extension(format!( + "{}.1", + self.path + .extension() + .and_then(|s| s.to_str()) + .unwrap_or("log") + )); + let _ = std::fs::remove_file(&backup); + std::fs::rename(&self.path, &backup)?; + let new_file = OpenOptions::new() + .create(true) + .append(true) + .open(&self.path)?; + let mut guard = match self.file.lock() { + Ok(f) => f, + Err(poisoned) => poisoned.into_inner(), + }; + *guard = new_file; + Ok(()) + } +} + +fn level_label(level: Level) -> &'static str { + match level { + Level::Error => "ERROR", + Level::Warn => "WARN", + Level::Info => "INFO", + Level::Debug => "DEBUG", + Level::Trace => "TRACE", + } +} + +/// Install the daemon logger as the global `log` sink. Returns the resolved +/// log path. Idempotent — subsequent calls return the originally chosen path +/// without re-installing. +pub fn install(path: &Path, level: LevelFilter) -> Result { + let mut installed_path: Option = None; + INSTALLED.get_or_init(|| match DaemonLogger::open(path, level) { + Ok(logger) => { + let resolved = logger.path.clone(); + let boxed: Box = Box::new(logger); + if log::set_boxed_logger(boxed).is_ok() { + log::set_max_level(level); + } + installed_path = Some(resolved); + } + Err(_) => { + installed_path = None; + } + }); + + installed_path + .or_else(|| Some(path.to_path_buf())) + .ok_or_else(|| format!("install daemon logger at {} failed", path.display())) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn write_directly(path: &Path, msg: &str) { + let logger = DaemonLogger::open(path, LevelFilter::Info).expect("open logger"); + logger.log( + &Record::builder() + .level(Level::Info) + .target("test") + .args(format_args!("{}", msg)) + .build(), + ); + logger.flush(); + } + + #[test] + fn writing_appends_to_log_file() { + let tmp = tempfile::tempdir().expect("tmp"); + let path = tmp.path().join("test.log"); + write_directly(&path, "hello world"); + let contents = std::fs::read_to_string(&path).expect("read log"); + assert!(contents.contains("INFO")); + assert!(contents.contains("hello world")); + assert!(contents.ends_with('\n')); + } + + #[test] + fn rotation_renames_existing_log_when_too_large() { + let tmp = tempfile::tempdir().expect("tmp"); + let path = tmp.path().join("test.log"); + // Pre-populate with > MAX_LOG_BYTES so the next write triggers rotation. + std::fs::write(&path, vec![b'x'; (MAX_LOG_BYTES + 1) as usize]).expect("seed"); + write_directly(&path, "trigger rotate"); + + let backup = path.with_extension("log.1"); + assert!(backup.exists(), "rotation should produce a .1 backup"); + let new_contents = std::fs::read_to_string(&path).expect("read new log"); + assert!(new_contents.contains("trigger rotate")); + } + + #[test] + fn level_filter_is_respected() { + let tmp = tempfile::tempdir().expect("tmp"); + let path = tmp.path().join("test.log"); + let logger = DaemonLogger::open(&path, LevelFilter::Warn).expect("open"); + logger.log( + &Record::builder() + .level(Level::Info) + .target("test") + .args(format_args!("info-line")) + .build(), + ); + logger.log( + &Record::builder() + .level(Level::Warn) + .target("test") + .args(format_args!("warn-line")) + .build(), + ); + logger.flush(); + let contents = std::fs::read_to_string(&path).unwrap_or_default(); + assert!(!contents.contains("info-line")); + assert!(contents.contains("warn-line")); + } +} diff --git a/src-tauri/src/daemon/mod.rs b/src-tauri/src/daemon/mod.rs new file mode 100644 index 00000000..df934576 --- /dev/null +++ b/src-tauri/src/daemon/mod.rs @@ -0,0 +1,152 @@ +//! User-space supervisor for the proxy worker. +//! +//! The daemon owns the worker process: it spawns it, watches it, restarts it +//! under a backoff policy, and keeps the SQLite `proxy_runtime_session` row +//! aligned with the actual worker state. Foreground TUI/CLI processes talk to +//! the daemon via a Unix domain socket. + +pub mod ipc; +pub mod logging; +pub mod paths; +pub mod pidfile; +pub mod restart; +pub mod supervisor; + +use std::path::PathBuf; +use std::sync::Arc; + +use log::LevelFilter; + +use crate::database::Database; + +use self::ipc::client; +use self::ipc::protocol::{Request, Response}; +use self::pidfile::{AcquireError, PidFile}; +use self::supervisor::Supervisor; + +/// Notify the daemon that the persisted global proxy switch should change. +/// The daemon writes the new desired state and aligns worker state with it. +/// +/// Returns `Ok(())` if there is no live daemon (socket missing, or socket +/// inode left over from a daemon that died ungracefully so `connect` returns +/// ECONNREFUSED/ENOENT) or the daemon acknowledged. Returns `Err(message)` +/// only when the socket has a live listener but the round-trip failed or the +/// daemon refused. +pub fn notify_global_switch(enabled: bool) -> Result<(), String> { + use std::io::ErrorKind; + let socket = paths::socket_path(); + if !socket.exists() { + return Ok(()); + } + match client::round_trip(&socket, &Request::SetGlobalEnabled { enabled }) { + Ok(Response::Ok) => Ok(()), + Ok(Response::Error { message }) => Err(message), + Ok(other) => Err(format!("unexpected daemon response: {other:?}")), + Err(client::ClientError::Io(e)) + if matches!(e.kind(), ErrorKind::ConnectionRefused | ErrorKind::NotFound) => + { + // Stale socket inode from a dead daemon — there is no worker to + // align with anyone. Best-effort remove so subsequent calls don't + // hit the same misfire. + let _ = std::fs::remove_file(&socket); + Ok(()) + } + Err(err) => Err(err.to_string()), + } +} + +/// Run the daemon to completion. Acquires the pidfile, installs the file +/// logger, runs startup recovery, binds the IPC socket, and dispatches +/// requests until shutdown is signalled. +pub async fn run(binary_path: PathBuf) -> Result<(), String> { + let pidfile_path = paths::pidfile_path(); + let socket_path = paths::socket_path(); + let log_path = paths::log_path(); + + let _pidfile = match PidFile::acquire(&pidfile_path) { + Ok(p) => p, + Err(AcquireError::AlreadyHeld { pid }) => { + // Another daemon is already running — exit cleanly. + log::info!( + "another cc-switch daemon is already running (pid {})", + pid.map(|p| p.to_string()).unwrap_or_else(|| "?".into()) + ); + return Ok(()); + } + Err(AcquireError::Io(err)) => { + return Err(format!( + "acquire pidfile {} failed: {err}", + pidfile_path.display() + )); + } + }; + + let _ = logging::install(&log_path, LevelFilter::Info); + log::info!( + "[daemon] starting; pid={} socket={} log={}", + std::process::id(), + socket_path.display(), + log_path.display() + ); + + let db = + Arc::new(Database::init().map_err(|err| format!("daemon: open database failed: {err}"))?); + let supervisor = Supervisor::new(db, socket_path.clone(), binary_path); + + if let Err(err) = supervisor.recover_on_startup().await { + log::warn!("[daemon] startup recovery: {err}"); + } + + let listener = ipc::server::bind(&socket_path) + .map_err(|err| format!("bind socket {}: {err}", socket_path.display()))?; + log::info!("[daemon] listening on {}", socket_path.display()); + + let shutdown = supervisor.shutdown_signal(); + let supervisor_arc = Arc::new(supervisor); + + install_signal_handlers(supervisor_arc.clone()); + + ipc::server::run(listener, supervisor_arc, async move { + shutdown.notified().await; + }) + .await; + + log::info!("[daemon] exiting"); + let _ = std::fs::remove_file(&socket_path); + Ok(()) +} + +#[cfg(unix)] +fn install_signal_handlers(supervisor: Arc) { + use tokio::signal::unix::{signal, SignalKind}; + let term_supervisor = supervisor.clone(); + tokio::spawn(async move { + let mut sigterm = match signal(SignalKind::terminate()) { + Ok(s) => s, + Err(err) => { + log::warn!("install SIGTERM handler failed: {err}"); + return; + } + }; + if sigterm.recv().await.is_some() { + log::info!("[daemon] SIGTERM received, shutting down"); + term_supervisor.shutdown().await; + } + }); + tokio::spawn(async move { + let mut sigint = match signal(SignalKind::interrupt()) { + Ok(s) => s, + Err(err) => { + log::warn!("install SIGINT handler failed: {err}"); + return; + } + }; + if sigint.recv().await.is_some() { + log::info!("[daemon] SIGINT received, shutting down"); + supervisor.shutdown().await; + } + }); +} + +#[cfg(not(unix))] +fn install_signal_handlers(_: Arc) {} diff --git a/src-tauri/src/daemon/paths.rs b/src-tauri/src/daemon/paths.rs new file mode 100644 index 00000000..f5f9ace3 --- /dev/null +++ b/src-tauri/src/daemon/paths.rs @@ -0,0 +1,130 @@ +use std::path::PathBuf; + +const APP_DIR_NAME: &str = "cc-switch"; + +pub fn runtime_dir() -> PathBuf { + runtime_dir_from(env_dir("XDG_RUNTIME_DIR"), env_dir("TMPDIR"), current_uid()) +} + +pub fn state_dir() -> PathBuf { + state_dir_from(env_dir("XDG_STATE_HOME"), home_dir(), current_uid()) +} + +pub fn socket_path() -> PathBuf { + runtime_dir().join("daemon.sock") +} + +pub fn pidfile_path() -> PathBuf { + runtime_dir().join("daemon.pid") +} + +pub fn log_path() -> PathBuf { + state_dir().join("cc-switchd.log") +} + +fn runtime_dir_from(xdg: Option, tmpdir: Option, uid: u32) -> PathBuf { + if let Some(dir) = xdg { + return dir.join(APP_DIR_NAME); + } + if let Some(dir) = tmpdir { + return dir.join(format!("{APP_DIR_NAME}-{uid}")); + } + PathBuf::from("/tmp").join(format!("{APP_DIR_NAME}-{uid}")) +} + +fn state_dir_from(xdg: Option, home: Option, uid: u32) -> PathBuf { + if let Some(dir) = xdg { + return dir.join(APP_DIR_NAME); + } + if let Some(home) = home { + return home.join(".local").join("state").join(APP_DIR_NAME); + } + PathBuf::from("/tmp").join(format!("{APP_DIR_NAME}-state-{uid}")) +} + +fn env_dir(key: &str) -> Option { + std::env::var_os(key) + .map(PathBuf::from) + .filter(|path| !path.as_os_str().is_empty()) +} + +fn home_dir() -> Option { + crate::config::home_dir() +} + +#[cfg(unix)] +fn current_uid() -> u32 { + unsafe { libc::getuid() } +} + +#[cfg(not(unix))] +fn current_uid() -> u32 { + 0 +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn runtime_dir_uses_xdg_runtime_dir_when_set() { + let dir = runtime_dir_from( + Some(PathBuf::from("/run/user/1000")), + Some(PathBuf::from("/tmp")), + 1000, + ); + assert_eq!(dir, PathBuf::from("/run/user/1000/cc-switch")); + } + + #[test] + fn runtime_dir_falls_back_to_tmpdir_with_uid_when_xdg_unset() { + let dir = runtime_dir_from(None, Some(PathBuf::from("/private/tmp")), 501); + assert_eq!(dir, PathBuf::from("/private/tmp/cc-switch-501")); + } + + #[test] + fn runtime_dir_uses_slash_tmp_when_neither_xdg_nor_tmpdir_set() { + let dir = runtime_dir_from(None, None, 0); + assert_eq!(dir, PathBuf::from("/tmp/cc-switch-0")); + } + + #[test] + fn state_dir_uses_xdg_state_home_when_set() { + let dir = state_dir_from( + Some(PathBuf::from("/home/u/.local/state")), + Some(PathBuf::from("/home/u")), + 1000, + ); + assert_eq!(dir, PathBuf::from("/home/u/.local/state/cc-switch")); + } + + #[test] + fn state_dir_falls_back_to_home_dot_local_state_when_xdg_state_unset() { + let dir = state_dir_from(None, Some(PathBuf::from("/home/u")), 1000); + assert_eq!(dir, PathBuf::from("/home/u/.local/state/cc-switch")); + } + + #[test] + fn state_dir_falls_back_to_tmp_when_no_home() { + let dir = state_dir_from(None, None, 42); + assert_eq!(dir, PathBuf::from("/tmp/cc-switch-state-42")); + } + + #[test] + fn socket_pidfile_log_paths_compose_from_resolved_dirs() { + let runtime = runtime_dir_from(Some(PathBuf::from("/run")), None, 0); + let state = state_dir_from(Some(PathBuf::from("/state")), None, 0); + assert_eq!( + runtime.join("daemon.sock"), + PathBuf::from("/run/cc-switch/daemon.sock") + ); + assert_eq!( + runtime.join("daemon.pid"), + PathBuf::from("/run/cc-switch/daemon.pid") + ); + assert_eq!( + state.join("cc-switchd.log"), + PathBuf::from("/state/cc-switch/cc-switchd.log") + ); + } +} diff --git a/src-tauri/src/daemon/pidfile.rs b/src-tauri/src/daemon/pidfile.rs new file mode 100644 index 00000000..f4575660 --- /dev/null +++ b/src-tauri/src/daemon/pidfile.rs @@ -0,0 +1,161 @@ +//! Pidfile management for the supervisor daemon. +//! +//! - The pidfile lives at `daemon::paths::pidfile_path()`. +//! - Acquiring it grabs a non-blocking exclusive flock; if another daemon is +//! already holding the lock we return `AlreadyHeld` so the caller can exit +//! gracefully. +//! - The lock is held for the lifetime of the returned `PidFile` value; the +//! kernel releases the flock when the file descriptor is closed (process +//! exit, panic, drop), so even an `abort()` cleans up automatically. + +use std::fs::{File, OpenOptions}; +use std::io::Write; +use std::os::unix::io::AsRawFd; +use std::path::{Path, PathBuf}; + +#[derive(Debug)] +pub enum AcquireError { + /// Another daemon already holds the lock. + AlreadyHeld { pid: Option }, + /// Filesystem or syscall error. + Io(std::io::Error), +} + +impl std::fmt::Display for AcquireError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::AlreadyHeld { pid: Some(p) } => { + write!(f, "another cc-switch daemon is already running (pid {p})") + } + Self::AlreadyHeld { pid: None } => { + write!(f, "another cc-switch daemon is already running") + } + Self::Io(e) => write!(f, "{e}"), + } + } +} + +impl std::error::Error for AcquireError {} + +#[derive(Debug)] +pub struct PidFile { + file: File, + path: PathBuf, +} + +impl PidFile { + pub fn acquire(path: impl AsRef) -> Result { + let path = path.as_ref().to_path_buf(); + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent).map_err(AcquireError::Io)?; + } + + let file = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(false) + .open(&path) + .map_err(AcquireError::Io)?; + + flock_exclusive_nonblock(&file).map_err(|err| { + if err.kind() == std::io::ErrorKind::WouldBlock { + let pid = read_pid(&path); + AcquireError::AlreadyHeld { pid } + } else { + AcquireError::Io(err) + } + })?; + + // We own the lock — write our pid (truncate first so a stale longer + // value doesn't bleed through). + let mut writer = &file; + writer.set_len(0).map_err(AcquireError::Io)?; + let pid_text = format!("{}\n", std::process::id()); + writer + .write_all(pid_text.as_bytes()) + .map_err(AcquireError::Io)?; + writer.flush().map_err(AcquireError::Io)?; + + Ok(Self { file, path }) + } + + pub fn path(&self) -> &Path { + &self.path + } +} + +impl Drop for PidFile { + fn drop(&mut self) { + // Kernel releases the flock when the fd closes; we only need to remove + // the on-disk file so a fresh daemon doesn't see a leftover pid number. + let _ = std::fs::remove_file(&self.path); + // file is dropped after this, releasing the flock. + let _ = &self.file; + } +} + +fn flock_exclusive_nonblock(file: &File) -> std::io::Result<()> { + let rc = unsafe { libc::flock(file.as_raw_fd(), libc::LOCK_EX | libc::LOCK_NB) }; + if rc == 0 { + Ok(()) + } else { + Err(std::io::Error::last_os_error()) + } +} + +fn read_pid(path: &Path) -> Option { + std::fs::read_to_string(path) + .ok()? + .trim() + .parse::() + .ok() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn acquire_writes_current_pid_and_releases_on_drop() { + let tmp = tempfile::tempdir().expect("tmp"); + let pidfile_path = tmp.path().join("daemon.pid"); + + let lock = PidFile::acquire(&pidfile_path).expect("acquire"); + let contents = std::fs::read_to_string(&pidfile_path).expect("read pidfile"); + assert_eq!( + contents.trim().parse::().ok(), + Some(std::process::id()) + ); + + drop(lock); + assert!(!pidfile_path.exists(), "pidfile should be removed on drop"); + } + + #[test] + fn second_acquire_returns_already_held_with_pid() { + let tmp = tempfile::tempdir().expect("tmp"); + let pidfile_path = tmp.path().join("daemon.pid"); + + let _first = PidFile::acquire(&pidfile_path).expect("first acquire"); + let err = PidFile::acquire(&pidfile_path).expect_err("second acquire should fail"); + match err { + AcquireError::AlreadyHeld { pid } => { + assert_eq!(pid, Some(std::process::id())); + } + other => panic!("unexpected error: {other:?}"), + } + } + + #[test] + fn second_acquire_succeeds_after_first_drops() { + let tmp = tempfile::tempdir().expect("tmp"); + let pidfile_path = tmp.path().join("daemon.pid"); + + let first = PidFile::acquire(&pidfile_path).expect("first"); + drop(first); + + let second = PidFile::acquire(&pidfile_path).expect("second after release"); + drop(second); + } +} diff --git a/src-tauri/src/daemon/restart.rs b/src-tauri/src/daemon/restart.rs new file mode 100644 index 00000000..19fcc900 --- /dev/null +++ b/src-tauri/src/daemon/restart.rs @@ -0,0 +1,239 @@ +//! Restart policy state machine for the supervised worker. +//! +//! Behavior (matches the approved plan): +//! - Exponential backoff per attempt: 1s, 2s, 4s, 8s, 16s, capped at 30s. +//! - Circuit-break after `MAX_FAILURES` consecutive failures inside +//! `WINDOW_SECS`; the daemon should give up, log a fatal trace, clear +//! `proxy_runtime_session`, and exit. +//! - The attempt counter (and therefore the next-delay) resets after the +//! worker has been running continuously for `STABLE_UPTIME_SECS`. +//! +//! The state machine is pure: it takes a "now" instant from the caller and +//! returns the chosen action. That keeps tests deterministic and avoids any +//! reliance on real wall-clock sleeps. + +use std::collections::VecDeque; +use std::time::{Duration, Instant}; + +const BASE_DELAY: Duration = Duration::from_secs(1); +const MAX_DELAY: Duration = Duration::from_secs(30); +const MAX_FAILURES: usize = 5; +const WINDOW: Duration = Duration::from_secs(60); +const STABLE_UPTIME: Duration = Duration::from_secs(60); + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Decision { + /// Daemon should sleep for `delay` and then respawn the worker. + Restart { delay: Duration, attempt: u32 }, + /// Daemon should give up, mark proxy as down, and exit. + GiveUp, +} + +#[derive(Debug)] +pub struct RestartPolicy { + failures: VecDeque, + attempt: u32, + last_started_at: Option, +} + +impl Default for RestartPolicy { + fn default() -> Self { + Self::new() + } +} + +impl RestartPolicy { + pub fn new() -> Self { + Self { + failures: VecDeque::new(), + attempt: 0, + last_started_at: None, + } + } + + /// Called whenever the daemon is about to start (or restart) the worker. + pub fn on_worker_started(&mut self, now: Instant) { + self.last_started_at = Some(now); + } + + /// Called when the worker has exited abnormally. Returns the next decision. + pub fn on_worker_exited(&mut self, now: Instant) -> Decision { + if let Some(started) = self.last_started_at.take() { + if now.saturating_duration_since(started) >= STABLE_UPTIME { + self.attempt = 0; + self.failures.clear(); + } + } + + self.failures.push_back(now); + while let Some(front) = self.failures.front() { + if now.saturating_duration_since(*front) > WINDOW { + self.failures.pop_front(); + } else { + break; + } + } + + if self.failures.len() >= MAX_FAILURES { + return Decision::GiveUp; + } + + let delay = backoff_for(self.attempt); + let decision = Decision::Restart { + delay, + attempt: self.attempt, + }; + self.attempt = self.attempt.saturating_add(1); + decision + } + + #[cfg(test)] + pub(crate) fn attempt_count(&self) -> u32 { + self.attempt + } +} + +fn backoff_for(attempt: u32) -> Duration { + let secs = 1u64.checked_shl(attempt).unwrap_or(u64::MAX); + let computed = Duration::from_secs(secs.min(MAX_DELAY.as_secs())); + if computed < BASE_DELAY { + BASE_DELAY + } else if computed > MAX_DELAY { + MAX_DELAY + } else { + computed + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn t(secs: u64) -> Instant { + Instant::now() + .checked_sub(Duration::from_secs(0)) + .unwrap() + .checked_add(Duration::from_secs(secs)) + .unwrap() + } + + #[test] + fn first_failure_returns_one_second_delay() { + let mut p = RestartPolicy::new(); + let now = t(0); + p.on_worker_started(now); + match p.on_worker_exited(now + Duration::from_secs(1)) { + Decision::Restart { delay, attempt } => { + assert_eq!(delay, Duration::from_secs(1)); + assert_eq!(attempt, 0); + } + other => panic!("expected Restart, got {other:?}"), + } + } + + #[test] + fn delay_doubles_per_attempt_until_circuit_break() { + // With MAX_FAILURES = 5, the 5th consecutive failure inside the window + // gives up, so we only see four Restart decisions before GiveUp. + let mut p = RestartPolicy::new(); + let now = t(0); + p.on_worker_started(now); + let mut delays = Vec::new(); + for i in 0..5 { + let exit = now + Duration::from_secs(i); + match p.on_worker_exited(exit) { + Decision::Restart { delay, .. } => { + delays.push(delay); + p.on_worker_started(exit); + } + Decision::GiveUp => break, + } + } + assert_eq!( + delays, + vec![ + Duration::from_secs(1), + Duration::from_secs(2), + Duration::from_secs(4), + Duration::from_secs(8), + ] + ); + } + + #[test] + fn long_run_of_failures_caps_delay_at_thirty_seconds() { + // Force the attempt counter past the 30s cap by spacing failures + // beyond the 60s window so the circuit doesn't trip. + let mut p = RestartPolicy::new(); + let mut now = t(0); + let mut last_delay = Duration::from_secs(0); + for _ in 0..10 { + p.on_worker_started(now); + let exit = now + Duration::from_secs(1); + match p.on_worker_exited(exit) { + Decision::Restart { delay, .. } => last_delay = delay, + Decision::GiveUp => panic!("should not give up when window evicts old failures"), + } + // Step ~70s forward so the rolling window evicts the prior failure. + now = exit + Duration::from_secs(70); + } + assert_eq!(last_delay, Duration::from_secs(30)); + } + + #[test] + fn circuit_breaks_after_five_failures_in_window() { + let mut p = RestartPolicy::new(); + let start = t(0); + for i in 0..5 { + let exit = start + Duration::from_secs(i); + p.on_worker_started(exit); + let decision = p.on_worker_exited(exit + Duration::from_millis(100)); + if i < 4 { + assert!(matches!(decision, Decision::Restart { .. }), "i={i}"); + } else { + assert_eq!(decision, Decision::GiveUp, "i={i}"); + } + } + } + + #[test] + fn failures_outside_window_do_not_count_toward_circuit_break() { + let mut p = RestartPolicy::new(); + let start = t(0); + // 4 failures spread well within the window but separated, so attempt + // grows but the deque should evict old entries when `now > window`. + for i in 0..4 { + let exit = start + Duration::from_secs(i); + p.on_worker_started(exit); + assert!(matches!(p.on_worker_exited(exit), Decision::Restart { .. })); + } + // Far future: previous failures fall out of the rolling window. + let later = start + Duration::from_secs(200); + p.on_worker_started(later); + let decision = p.on_worker_exited(later + Duration::from_secs(1)); + assert!(matches!(decision, Decision::Restart { .. })); + } + + #[test] + fn stable_uptime_resets_attempt_counter() { + let mut p = RestartPolicy::new(); + let start = t(0); + for i in 0..3 { + let exit = start + Duration::from_secs(i); + p.on_worker_started(exit); + p.on_worker_exited(exit + Duration::from_millis(50)); + } + assert_eq!(p.attempt_count(), 3); + + let stable_start = start + Duration::from_secs(1000); + p.on_worker_started(stable_start); + let stable_exit = stable_start + STABLE_UPTIME + Duration::from_secs(1); + match p.on_worker_exited(stable_exit) { + Decision::Restart { delay, attempt } => { + assert_eq!(attempt, 0, "stable uptime should reset attempt counter"); + assert_eq!(delay, Duration::from_secs(1)); + } + other => panic!("expected Restart, got {other:?}"), + } + } +} diff --git a/src-tauri/src/daemon/supervisor.rs b/src-tauri/src/daemon/supervisor.rs new file mode 100644 index 00000000..e1e0072a --- /dev/null +++ b/src-tauri/src/daemon/supervisor.rs @@ -0,0 +1,579 @@ +//! The supervisor: spawns and watches the proxy worker, owns the daemon's +//! shared `ProxyService`, and translates IPC requests into actions. +//! +//! Most of the heavy lifting (config rewrites, restoration, common-config +//! preservation) lives in `ProxyService`. The supervisor's job is to keep one +//! worker per active app route, keep the `proxy_runtime_session` row aligned +//! with the actual workers, and survive worker crashes via the restart policy. + +use std::collections::{HashMap, HashSet}; +use std::path::PathBuf; +use std::process::Stdio; +use std::sync::Arc; +use std::time::{Duration, Instant}; + +use serde_json::json; +use tokio::process::{Child, Command}; +use tokio::sync::{oneshot, Mutex, Notify}; + +use crate::app_config::AppType; +use crate::database::Database; +use crate::services::ProxyService; + +use super::ipc::protocol::{Request, Response, TakeoverFlags, WorkerState}; +use super::ipc::server::Handler; +use super::restart::{Decision, RestartPolicy}; + +const PROXY_RUNTIME_SESSION_KEY: &str = "proxy_runtime_session"; +pub const DAEMON_SOCKET_ENV: &str = "CC_SWITCH_DAEMON_SOCKET"; +pub const SESSION_TOKEN_ENV: &str = "CC_SWITCH_PROXY_SESSION_TOKEN"; +pub const RESTORE_GUARD_BYPASS_ENV: &str = "CC_SWITCH_RESTORE_GUARD_BYPASS"; +/// Mirrors `services::proxy::PROXY_RUNTIME_KIND_ENV_KEY`. Setting this to +/// `managed_external` makes the worker skip self-publishing the runtime +/// session row — the daemon writes it after WorkerHello. +pub const RUNTIME_KIND_ENV: &str = "CC_SWITCH_PROXY_RUNTIME_KIND"; +pub const RUNTIME_KIND_MANAGED_EXTERNAL: &str = "managed_external"; + +const WORKER_HELLO_TIMEOUT: Duration = Duration::from_secs(10); + +#[derive(Debug, Clone)] +struct WorkerInfo { + app_type: AppType, + pid: u32, + address: String, + port: u16, + session_token: String, +} + +#[derive(Default)] +struct SupervisorInner { + workers: HashMap, + pending_hellos: HashMap>, + pending_tokens: HashMap, + stopping_workers: HashSet, + restart: RestartPolicy, + last_restart_at: Option>, + restart_count: u32, + shutdown_requested: bool, +} + +#[derive(Clone)] +pub struct Supervisor { + db: Arc, + proxy: ProxyService, + inner: Arc>, + /// Serializes worker spawn so concurrent EnsureWorker IPCs share the same + /// pending hello rather than racing — a second caller used to overwrite + /// `pending_hello` and `pending_token`, leaving the first caller waiting + /// the full 10 s `WORKER_HELLO_TIMEOUT` and then surfacing as + /// "Resource temporarily unavailable (os error 35)" once the client's 15 s + /// IPC read timeout expired. + spawn_lock: Arc>, + socket_path: PathBuf, + binary_path: PathBuf, + shutdown_notify: Arc, +} + +impl Supervisor { + pub fn new(db: Arc, socket_path: PathBuf, binary_path: PathBuf) -> Self { + let proxy = ProxyService::new(db.clone()); + Self { + db, + proxy, + inner: Arc::new(Mutex::new(SupervisorInner::default())), + spawn_lock: Arc::new(Mutex::new(())), + socket_path, + binary_path, + shutdown_notify: Arc::new(Notify::new()), + } + } + + pub fn shutdown_signal(&self) -> Arc { + self.shutdown_notify.clone() + } + + pub async fn recover_on_startup(&self) -> Result<(), String> { + self.proxy.recover_takeovers_on_startup().await + } + + /// Bring up a worker if none is running, then return its bound address. + /// + /// Concurrent callers serialize through `spawn_lock` so we never spawn two + /// workers in parallel (which would fight for the listen port and corrupt + /// `pending_hello`). After acquiring the lock we re-check `inner.worker` so + /// later callers reuse the worker the first one brought up. + async fn ensure_worker(&self, app: AppType) -> Result { + let _spawn_guard = self.spawn_lock.lock().await; + let app_key = app.as_str().to_string(); + + let (session_token, hello_rx) = { + let mut inner = self.inner.lock().await; + if let Some(info) = inner.workers.get(&app).cloned() { + return Ok(info); + } + let (tx, rx) = oneshot::channel(); + inner.pending_hellos.insert(app_key.clone(), tx); + let token = uuid::Uuid::new_v4().to_string(); + inner.pending_tokens.insert(app_key.clone(), token.clone()); + (token, rx) + }; + + let app_config = self + .db + .get_proxy_config_for_app(&app_key) + .await + .map_err(|err| format!("load proxy config for {app_key} failed: {err}"))?; + let global_config = self + .db + .get_global_proxy_config() + .await + .map_err(|err| format!("load global proxy config failed: {err}"))?; + + let mut cmd = Command::new(&self.binary_path); + cmd.arg("proxy") + .arg("serve") + .arg("--listen-address") + .arg(global_config.listen_address) + .arg("--listen-port") + .arg(app_config.listen_port.to_string()) + .env(DAEMON_SOCKET_ENV, &self.socket_path) + .env(SESSION_TOKEN_ENV, &session_token) + .env(RESTORE_GUARD_BYPASS_ENV, "1") + .env(RUNTIME_KIND_ENV, RUNTIME_KIND_MANAGED_EXTERNAL) + .stdin(Stdio::null()) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .kill_on_drop(true); + + let spawned = cmd + .spawn() + .map_err(|err| format!("spawn {app_key} proxy worker failed: {err}"))?; + let pid = spawned + .id() + .ok_or_else(|| format!("spawned {app_key} worker has no pid"))?; + log::info!("[daemon] spawned {app_key} worker pid={pid}"); + + let supervisor = self.clone(); + let watch_app = app.clone(); + tokio::spawn(async move { + supervisor.watch_worker(spawned, watch_app, pid).await; + }); + + let info = match tokio::time::timeout(WORKER_HELLO_TIMEOUT, hello_rx).await { + Ok(Ok(info)) => info, + Ok(Err(_)) => return Err(format!("{app_key} worker exited before hello")), + Err(_) => return Err(format!("{app_key} worker hello timed out")), + }; + + { + let mut inner = self.inner.lock().await; + inner.workers.insert(app.clone(), info.clone()); + inner.last_restart_at = Some(chrono::Utc::now()); + inner.restart.on_worker_started(Instant::now()); + inner.pending_tokens.remove(&app_key); + } + self.persist_runtime_session().await?; + Ok(info) + } + + async fn handle_ensure_worker(&self, app_type: &str) -> Response { + let app = match parse_app_type(app_type) { + Some(a) => a, + None => { + return Response::Error { + message: format!("proxy takeover not supported for app: {app_type}"), + }; + } + }; + + let info = match self.ensure_worker(app.clone()).await { + Ok(info) => info, + Err(err) => { + return Response::Error { message: err }; + } + }; + + if let Err(err) = self.proxy.set_global_enabled(true).await { + return Response::Error { + message: err.to_string(), + }; + } + + if let Err(err) = self.proxy.set_takeover_for_app(app.as_str(), true).await { + return Response::Error { message: err }; + } + + Response::Worker { + address: info.address, + port: info.port, + session_token: info.session_token, + pid: info.pid, + } + } + + async fn handle_drop_takeover(&self, app_type: &str) -> Response { + let app = match parse_app_type(app_type) { + Some(a) => a, + None => { + return Response::Error { + message: format!("proxy takeover not supported for app: {app_type}"), + }; + } + }; + + if let Err(err) = self.proxy.set_takeover_for_app(app.as_str(), false).await { + return Response::Error { message: err }; + } + + let (stop_pid, had_worker, should_shutdown) = { + let mut inner = self.inner.lock().await; + let pid = inner.workers.get(&app).map(|w| w.pid); + if pid.is_some() { + inner.stopping_workers.insert(app.clone()); + if inner.workers.len() <= 1 { + inner.shutdown_requested = true; + } + } + (pid, pid.is_some(), inner.workers.len() <= 1) + }; + let _ = send_sigterm(stop_pid); + if had_worker { + tokio::time::sleep(Duration::from_millis(100)).await; + } else if should_shutdown { + self.shutdown_notify.notify_waiters(); + } + Response::Ok + } + + async fn handle_worker_hello( + &self, + pid: u32, + address: String, + port: u16, + session_token: String, + ) -> Response { + let mut inner = self.inner.lock().await; + let app_key = inner + .pending_tokens + .iter() + .find_map(|(app_type, token)| (token == &session_token).then(|| app_type.clone())); + let Some(app_key) = app_key else { + log::warn!("[daemon] worker hello with mismatched token (pid={pid})"); + return Response::Error { + message: "session token mismatch".to_string(), + }; + }; + let Some(tx) = inner.pending_hellos.remove(&app_key) else { + log::warn!("[daemon] worker hello received but no pending ensure (pid={pid})"); + return Response::Error { + message: "no pending worker registration".to_string(), + }; + }; + let Some(app_type) = parse_app_type(&app_key) else { + return Response::Error { + message: format!("proxy takeover not supported for app: {app_key}"), + }; + }; + let info = WorkerInfo { + app_type, + pid, + address, + port, + session_token, + }; + if tx.send(info).is_err() { + log::warn!("[daemon] worker hello dropped: ensure waiter cancelled"); + } + Response::Ok + } + + async fn handle_set_global_enabled(&self, enabled: bool) -> Response { + if enabled { + match self.proxy.set_global_enabled(true).await { + Ok(_) => return Response::Ok, + Err(err) => { + return Response::Error { + message: err.to_string(), + }; + } + } + } + + // Disabling: drop every active takeover so each app's live config is + // restored, then stop the worker. We snapshot the active list under + // the inner lock so we don't hold it while running per-app restores + // (which acquire the file-level state mutation guard). + let mut active = Vec::new(); + for app in [AppType::Claude, AppType::Codex, AppType::Gemini] { + match self.db.get_proxy_config_for_app(app.as_str()).await { + Ok(config) if config.enabled => active.push(app), + Ok(_) => {} + Err(err) => log::warn!( + "[daemon] set_global_enabled(false): read {} proxy config failed: {err}", + app.as_str() + ), + } + } + for app in &active { + if let Err(err) = self.proxy.set_takeover_for_app(app.as_str(), false).await { + log::warn!( + "[daemon] set_global_enabled(false): drop takeover for {} failed: {err}", + app.as_str() + ); + } + } + + let stop_pids = { + let mut inner = self.inner.lock().await; + inner.shutdown_requested = true; + inner.workers.values().map(|w| w.pid).collect::>() + }; + for pid in &stop_pids { + let _ = send_sigterm(Some(*pid)); + } + if !stop_pids.is_empty() { + // Brief pause so the worker has a chance to exit and the watcher + // task can clear the runtime session row before we ack. The + // watcher then sees `active_takeovers.is_empty()` and signals + // daemon shutdown. + tokio::time::sleep(Duration::from_millis(100)).await; + } else { + // No worker to drain — signal shutdown directly so the daemon + // doesn't stay idle after a "disable everything" with nothing + // currently running. + self.shutdown_notify.notify_waiters(); + } + Response::Ok + } + + async fn handle_status(&self) -> Response { + let inner = self.inner.lock().await; + let takeovers = self.read_takeover_flags().await; + let mut workers = inner + .workers + .values() + .map(|info| WorkerState { + app_type: info.app_type.as_str().to_string(), + running: true, + address: info.address.clone(), + port: info.port, + pid: Some(info.pid), + }) + .collect::>(); + workers.sort_by(|left, right| left.app_type.cmp(&right.app_type)); + let primary = workers.first(); + Response::Status { + running: !workers.is_empty(), + address: primary.map(|w| w.address.clone()).unwrap_or_default(), + port: primary.map(|w| w.port).unwrap_or_default(), + worker_pid: primary.and_then(|w| w.pid), + takeovers, + restart_count: inner.restart_count, + last_restart_at: inner.last_restart_at.map(|d| d.to_rfc3339()), + workers, + } + } + + pub async fn shutdown(&self) { + let stop_pids = { + let mut inner = self.inner.lock().await; + inner.shutdown_requested = true; + inner.workers.values().map(|w| w.pid).collect::>() + }; + for pid in stop_pids { + let _ = send_sigterm(Some(pid)); + } + if let Err(err) = self.proxy.stop_with_restore().await { + log::warn!("[daemon] shutdown: stop_with_restore failed: {err}"); + } + let _ = self.clear_runtime_session(); + self.shutdown_notify.notify_waiters(); + } + + async fn handle_shutdown(&self) -> Response { + self.shutdown().await; + Response::Ok + } + + async fn read_takeover_flags(&self) -> TakeoverFlags { + let status = self.proxy.get_takeover_status().await.unwrap_or_default(); + TakeoverFlags { + claude: status.claude, + codex: status.codex, + gemini: status.gemini, + } + } + + async fn watch_worker(&self, mut child: Child, app: AppType, pid: u32) { + let app_key = app.as_str().to_string(); + let exit_status = match child.wait().await { + Ok(status) => status, + Err(err) => { + log::warn!("[daemon] waitpid {app_key} worker={pid} failed: {err}"); + return; + } + }; + log::info!("[daemon] {app_key} worker pid={pid} exited: {exit_status}"); + + let (intentional, has_remaining_workers) = { + let mut inner = self.inner.lock().await; + inner.workers.remove(&app); + inner.pending_tokens.remove(&app_key); + if let Some(tx) = inner.pending_hellos.remove(&app_key) { + drop(tx); + } + let intentional = inner.shutdown_requested || inner.stopping_workers.remove(&app); + (intentional, !inner.workers.is_empty()) + }; + + let _ = self.persist_runtime_session().await; + + if intentional { + log::info!("[daemon] {app_key} worker exit was expected, not restarting"); + if !has_remaining_workers { + log::info!("[daemon] no remaining workers, exiting"); + self.shutdown_notify.notify_waiters(); + } + return; + } + + if let Err(err) = self.proxy.set_takeover_for_app(app.as_str(), false).await { + log::warn!("[daemon] restore takeover for {app_key} failed: {err}"); + } + + let decision = { + let mut inner = self.inner.lock().await; + inner.restart.on_worker_exited(Instant::now()) + }; + + match decision { + Decision::Restart { delay, attempt } => { + log::warn!( + "[daemon] {app_key} worker pid={pid} crashed; restarting in {:?} (attempt {})", + delay, + attempt + 1 + ); + tokio::time::sleep(delay).await; + { + let mut inner = self.inner.lock().await; + inner.restart_count = inner.restart_count.saturating_add(1); + } + if let Err(err) = self.respawn_after_crash(app).await { + log::error!("[daemon] respawn {app_key} after crash failed: {err}"); + } + } + Decision::GiveUp => { + log::error!( + "[daemon] {app_key} worker pid={pid} circuit-broke after repeated crashes" + ); + if !has_remaining_workers { + self.shutdown_notify.notify_waiters(); + } + } + } + } + + fn respawn_after_crash<'a>( + &'a self, + app: AppType, + ) -> std::pin::Pin> + Send + 'a>> { + Box::pin(async move { + let _info = self.ensure_worker(app.clone()).await?; + if let Err(err) = self.proxy.set_takeover_for_app(app.as_str(), true).await { + log::warn!( + "[daemon] re-applying takeover for {} after restart failed: {err}", + app.as_str() + ); + } + Ok(()) + }) + } + + async fn persist_runtime_session(&self) -> Result<(), String> { + let workers = { + let inner = self.inner.lock().await; + inner + .workers + .iter() + .map(|(app, info)| { + ( + app.as_str().to_string(), + json!({ + "pid": info.pid, + "address": info.address, + "port": info.port, + "started_at": chrono::Utc::now().to_rfc3339(), + "kind": "managed_external", + "session_token": info.session_token, + "app_type": app.as_str(), + }), + ) + }) + .collect::>() + }; + if workers.is_empty() { + return self.clear_runtime_session(); + } + let payload = json!({ "workers": workers }); + let serialized = serde_json::to_string(&payload) + .map_err(|err| format!("serialize runtime session failed: {err}"))?; + self.db + .set_setting(PROXY_RUNTIME_SESSION_KEY, &serialized) + .map_err(|err| format!("persist runtime session failed: {err}")) + } + + fn clear_runtime_session(&self) -> Result<(), String> { + self.db + .delete_setting(PROXY_RUNTIME_SESSION_KEY) + .map_err(|err| format!("clear runtime session failed: {err}")) + } +} + +impl Handler for Supervisor { + async fn handle(&self, request: Request) -> Response { + match request { + Request::EnsureWorker { app_type } => self.handle_ensure_worker(&app_type).await, + Request::DropTakeover { app_type } => self.handle_drop_takeover(&app_type).await, + Request::Status => self.handle_status().await, + Request::WorkerHello { + pid, + address, + port, + session_token, + } => { + self.handle_worker_hello(pid, address, port, session_token) + .await + } + Request::SetGlobalEnabled { enabled } => self.handle_set_global_enabled(enabled).await, + Request::Shutdown => self.handle_shutdown().await, + } + } +} + +fn parse_app_type(s: &str) -> Option { + match s { + "claude" => Some(AppType::Claude), + "codex" => Some(AppType::Codex), + "gemini" => Some(AppType::Gemini), + _ => None, + } +} + +fn send_sigterm(pid: Option) -> Result<(), String> { + let Some(pid) = pid else { + return Ok(()); + }; + if pid == 0 { + return Ok(()); + } + unsafe { + let rc = libc::kill(pid as i32, libc::SIGTERM); + if rc != 0 { + let err = std::io::Error::last_os_error(); + if err.raw_os_error() != Some(libc::ESRCH) { + return Err(format!("SIGTERM worker {pid}: {err}")); + } + } + } + Ok(()) +} diff --git a/src-tauri/src/database/dao/proxy.rs b/src-tauri/src/database/dao/proxy.rs index 2db7a078..954b1b77 100644 --- a/src-tauri/src/database/dao/proxy.rs +++ b/src-tauri/src/database/dao/proxy.rs @@ -9,6 +9,15 @@ use rust_decimal::Decimal; use super::super::{lock_conn, Database}; +fn default_app_listen_port(app_type: &str) -> u16 { + match app_type { + "claude" => 15721, + "codex" => 15722, + "gemini" => 15723, + _ => 15721, + } +} + impl Database { // ==================== Global Proxy Config ==================== @@ -85,13 +94,11 @@ impl Database { "UPDATE proxy_config SET proxy_enabled = ?1, listen_address = ?2, - listen_port = ?3, - enable_logging = ?4, + enable_logging = ?3, updated_at = datetime('now')", rusqlite::params![ if config.proxy_enabled { 1 } else { 0 }, config.listen_address, - config.listen_port as i32, if config.enable_logging { 1 } else { 0 }, ], ) @@ -252,7 +259,7 @@ impl Database { let result = { let conn = lock_conn!(self.conn); conn.query_row( - "SELECT app_type, enabled, auto_failover_enabled, + "SELECT app_type, enabled, listen_port, auto_failover_enabled, max_retries, streaming_first_byte_timeout, streaming_idle_timeout, non_streaming_timeout, circuit_failure_threshold, circuit_success_threshold, circuit_timeout_seconds, circuit_error_rate_threshold, circuit_min_requests @@ -262,16 +269,17 @@ impl Database { Ok(AppProxyConfig { app_type: row.get(0)?, enabled: row.get::<_, i32>(1)? != 0, - auto_failover_enabled: row.get::<_, i32>(2)? != 0, - max_retries: row.get::<_, i32>(3)? as u32, - streaming_first_byte_timeout: row.get::<_, i32>(4)? as u32, - streaming_idle_timeout: row.get::<_, i32>(5)? as u32, - non_streaming_timeout: row.get::<_, i32>(6)? as u32, - circuit_failure_threshold: row.get::<_, i32>(7)? as u32, - circuit_success_threshold: row.get::<_, i32>(8)? as u32, - circuit_timeout_seconds: row.get::<_, i32>(9)? as u32, - circuit_error_rate_threshold: row.get(10)?, - circuit_min_requests: row.get::<_, i32>(11)? as u32, + listen_port: row.get::<_, i32>(2)? as u16, + auto_failover_enabled: row.get::<_, i32>(3)? != 0, + max_retries: row.get::<_, i32>(4)? as u32, + streaming_first_byte_timeout: row.get::<_, i32>(5)? as u32, + streaming_idle_timeout: row.get::<_, i32>(6)? as u32, + non_streaming_timeout: row.get::<_, i32>(7)? as u32, + circuit_failure_threshold: row.get::<_, i32>(8)? as u32, + circuit_success_threshold: row.get::<_, i32>(9)? as u32, + circuit_timeout_seconds: row.get::<_, i32>(10)? as u32, + circuit_error_rate_threshold: row.get(11)?, + circuit_min_requests: row.get::<_, i32>(12)? as u32, }) }, ) @@ -286,6 +294,7 @@ impl Database { Ok(AppProxyConfig { app_type: app_type_owned, enabled: false, + listen_port: default_app_listen_port(app_type), auto_failover_enabled: false, max_retries: 3, streaming_first_byte_timeout: 60, @@ -318,21 +327,23 @@ impl Database { conn.execute( "UPDATE proxy_config SET enabled = ?2, - auto_failover_enabled = ?3, - max_retries = ?4, - streaming_first_byte_timeout = ?5, - streaming_idle_timeout = ?6, - non_streaming_timeout = ?7, - circuit_failure_threshold = ?8, - circuit_success_threshold = ?9, - circuit_timeout_seconds = ?10, - circuit_error_rate_threshold = ?11, - circuit_min_requests = ?12, + listen_port = ?3, + auto_failover_enabled = ?4, + max_retries = ?5, + streaming_first_byte_timeout = ?6, + streaming_idle_timeout = ?7, + non_streaming_timeout = ?8, + circuit_failure_threshold = ?9, + circuit_success_threshold = ?10, + circuit_timeout_seconds = ?11, + circuit_error_rate_threshold = ?12, + circuit_min_requests = ?13, updated_at = datetime('now') WHERE app_type = ?1", rusqlite::params![ config.app_type, if config.enabled { 1 } else { 0 }, + config.listen_port as i32, if auto_failover_enabled { 1 } else { 0 }, config.max_retries as i32, config.streaming_first_byte_timeout as i32, @@ -360,23 +371,33 @@ impl Database { .map_err(|e| AppError::Lock(e.to_string()))?; // 根据 app_type 使用不同的默认值(与 schema.rs seed 保持一致) - let (retries, fb_timeout, idle_timeout, cb_fail, cb_succ, cb_timeout, cb_rate, cb_min) = - match app_type { - "claude" => (6, 90, 180, 8, 3, 90, 0.7, 15), - "codex" => (3, 60, 120, 4, 2, 60, 0.6, 10), - "gemini" => (5, 60, 120, 4, 2, 60, 0.6, 10), - _ => (3, 60, 120, 4, 2, 60, 0.6, 10), // 默认值 - }; + let ( + retries, + fb_timeout, + idle_timeout, + cb_fail, + cb_succ, + cb_timeout, + cb_rate, + cb_min, + listen_port, + ) = match app_type { + "claude" => (6, 90, 180, 8, 3, 90, 0.7, 15, 15721), + "codex" => (3, 60, 120, 4, 2, 60, 0.6, 10, 15722), + "gemini" => (5, 60, 120, 4, 2, 60, 0.6, 10, 15723), + _ => (3, 60, 120, 4, 2, 60, 0.6, 10, 15721), + }; conn.execute( "INSERT OR IGNORE INTO proxy_config ( - app_type, max_retries, + app_type, listen_port, max_retries, streaming_first_byte_timeout, streaming_idle_timeout, non_streaming_timeout, circuit_failure_threshold, circuit_success_threshold, circuit_timeout_seconds, circuit_error_rate_threshold, circuit_min_requests - ) VALUES (?1, ?2, ?3, ?4, 600, ?5, ?6, ?7, ?8, ?9)", + ) VALUES (?1, ?2, ?3, ?4, ?5, 600, ?6, ?7, ?8, ?9, ?10)", rusqlite::params![ app_type, + listen_port, retries, fb_timeout, idle_timeout, @@ -402,11 +423,11 @@ impl Database { // claude: 更激进的重试和超时配置 conn.execute( "INSERT OR IGNORE INTO proxy_config ( - app_type, max_retries, + app_type, listen_port, max_retries, streaming_first_byte_timeout, streaming_idle_timeout, non_streaming_timeout, circuit_failure_threshold, circuit_success_threshold, circuit_timeout_seconds, circuit_error_rate_threshold, circuit_min_requests - ) VALUES ('claude', 6, 90, 180, 600, 8, 3, 90, 0.7, 15)", + ) VALUES ('claude', 15721, 6, 90, 180, 600, 8, 3, 90, 0.7, 15)", [], ) .map_err(|e| AppError::Database(e.to_string()))?; @@ -414,11 +435,11 @@ impl Database { // codex: 默认配置 conn.execute( "INSERT OR IGNORE INTO proxy_config ( - app_type, max_retries, + app_type, listen_port, max_retries, streaming_first_byte_timeout, streaming_idle_timeout, non_streaming_timeout, circuit_failure_threshold, circuit_success_threshold, circuit_timeout_seconds, circuit_error_rate_threshold, circuit_min_requests - ) VALUES ('codex', 3, 60, 120, 600, 4, 2, 60, 0.6, 10)", + ) VALUES ('codex', 15722, 3, 60, 120, 600, 4, 2, 60, 0.6, 10)", [], ) .map_err(|e| AppError::Database(e.to_string()))?; @@ -426,11 +447,11 @@ impl Database { // gemini: 稍高的重试次数 conn.execute( "INSERT OR IGNORE INTO proxy_config ( - app_type, max_retries, + app_type, listen_port, max_retries, streaming_first_byte_timeout, streaming_idle_timeout, non_streaming_timeout, circuit_failure_threshold, circuit_success_threshold, circuit_timeout_seconds, circuit_error_rate_threshold, circuit_min_requests - ) VALUES ('gemini', 5, 60, 120, 600, 4, 2, 60, 0.6, 10)", + ) VALUES ('gemini', 15723, 5, 60, 120, 600, 4, 2, 60, 0.6, 10)", [], ) .map_err(|e| AppError::Database(e.to_string()))?; @@ -487,16 +508,14 @@ impl Database { conn.execute( "UPDATE proxy_config SET listen_address = ?1, - listen_port = ?2, - max_retries = ?3, - enable_logging = ?4, - streaming_first_byte_timeout = ?5, - streaming_idle_timeout = ?6, - non_streaming_timeout = ?7, + max_retries = ?2, + enable_logging = ?3, + streaming_first_byte_timeout = ?4, + streaming_idle_timeout = ?5, + non_streaming_timeout = ?6, updated_at = datetime('now')", rusqlite::params![ config.listen_address, - config.listen_port as i32, config.max_retries as i32, if config.enable_logging { 1 } else { 0 }, config.streaming_first_byte_timeout as i32, @@ -1031,6 +1050,86 @@ mod tests { Ok(()) } + #[tokio::test] + async fn update_global_proxy_config_preserves_app_listen_ports() -> Result<(), AppError> { + let db = Database::memory()?; + let mut codex = db.get_proxy_config_for_app("codex").await?; + codex.listen_port = 17022; + db.update_proxy_config_for_app(codex).await?; + let mut gemini = db.get_proxy_config_for_app("gemini").await?; + gemini.listen_port = 17023; + db.update_proxy_config_for_app(gemini).await?; + + let mut config = db.get_global_proxy_config().await?; + config.proxy_enabled = true; + config.listen_address = "127.0.0.2".to_string(); + config.listen_port = 18000; + db.update_global_proxy_config(config).await?; + + assert_eq!( + db.get_proxy_config_for_app("claude").await?.listen_port, + 15721 + ); + assert_eq!( + db.get_proxy_config_for_app("codex").await?.listen_port, + 17022 + ); + assert_eq!( + db.get_proxy_config_for_app("gemini").await?.listen_port, + 17023 + ); + Ok(()) + } + + #[tokio::test] + async fn update_proxy_config_preserves_app_listen_ports() -> Result<(), AppError> { + let db = Database::memory()?; + let mut codex = db.get_proxy_config_for_app("codex").await?; + codex.listen_port = 17022; + db.update_proxy_config_for_app(codex).await?; + let mut gemini = db.get_proxy_config_for_app("gemini").await?; + gemini.listen_port = 17023; + db.update_proxy_config_for_app(gemini).await?; + + let mut config = db.get_proxy_config().await?; + config.listen_address = "127.0.0.2".to_string(); + config.listen_port = 18000; + db.update_proxy_config(config).await?; + + assert_eq!( + db.get_proxy_config_for_app("claude").await?.listen_port, + 15721 + ); + assert_eq!( + db.get_proxy_config_for_app("codex").await?.listen_port, + 17022 + ); + assert_eq!( + db.get_proxy_config_for_app("gemini").await?.listen_port, + 17023 + ); + Ok(()) + } + + #[tokio::test] + async fn app_proxy_config_uses_distinct_default_ports() -> Result<(), AppError> { + let db = Database::memory()?; + + assert_eq!( + db.get_proxy_config_for_app("claude").await?.listen_port, + 15721 + ); + assert_eq!( + db.get_proxy_config_for_app("codex").await?.listen_port, + 15722 + ); + assert_eq!( + db.get_proxy_config_for_app("gemini").await?.listen_port, + 15723 + ); + Ok(()) + } + #[test] fn set_proxy_flags_sync_masks_failover_without_takeover() -> Result<(), AppError> { let db = Database::memory()?; diff --git a/src-tauri/src/database/mod.rs b/src-tauri/src/database/mod.rs index 0c891f1f..8843212a 100644 --- a/src-tauri/src/database/mod.rs +++ b/src-tauri/src/database/mod.rs @@ -48,7 +48,7 @@ const DB_BACKUP_RETAIN: usize = 10; /// 当前 Schema 版本号 /// 每次修改表结构时递增,并在 schema.rs 中添加相应的迁移逻辑 -pub(crate) const SCHEMA_VERSION: i32 = 10; +pub(crate) const SCHEMA_VERSION: i32 = 11; /// 安全地序列化 JSON,避免 unwrap panic pub(crate) fn to_json_string(value: &T) -> Result { @@ -94,6 +94,12 @@ impl Database { // 启用外键约束 conn.execute("PRAGMA foreign_keys = ON;", []) .map_err(|e| AppError::Database(e.to_string()))?; + // 多进程并发:daemon 与 worker 都会打开这个文件,WAL + busy_timeout 让 + // 短暂的 SQLITE_BUSY 自动重试而不是直接失败。 + conn.pragma_update(None, "journal_mode", "WAL") + .map_err(|e| AppError::Database(e.to_string()))?; + conn.busy_timeout(std::time::Duration::from_secs(5)) + .map_err(|e| AppError::Database(e.to_string()))?; let db = Self { conn: Mutex::new(conn), diff --git a/src-tauri/src/database/schema.rs b/src-tauri/src/database/schema.rs index 39106426..e8c0aa73 100644 --- a/src-tauri/src/database/schema.rs +++ b/src-tauri/src/database/schema.rs @@ -136,29 +136,29 @@ impl Database { // - 旧表会在 apply_schema_migrations() 中迁移为三行结构后再插入。 if Self::has_column(conn, "proxy_config", "app_type")? { conn.execute( - "INSERT OR IGNORE INTO proxy_config (app_type, max_retries, + "INSERT OR IGNORE INTO proxy_config (app_type, listen_port, max_retries, streaming_first_byte_timeout, streaming_idle_timeout, non_streaming_timeout, circuit_failure_threshold, circuit_success_threshold, circuit_timeout_seconds, circuit_error_rate_threshold, circuit_min_requests) - VALUES ('claude', 6, 90, 180, 600, 8, 3, 90, 0.7, 15)", + VALUES ('claude', 15721, 6, 90, 180, 600, 8, 3, 90, 0.7, 15)", [], ) .map_err(|e| AppError::Database(e.to_string()))?; conn.execute( - "INSERT OR IGNORE INTO proxy_config (app_type, max_retries, + "INSERT OR IGNORE INTO proxy_config (app_type, listen_port, max_retries, streaming_first_byte_timeout, streaming_idle_timeout, non_streaming_timeout, circuit_failure_threshold, circuit_success_threshold, circuit_timeout_seconds, circuit_error_rate_threshold, circuit_min_requests) - VALUES ('codex', 3, 60, 120, 600, 4, 2, 60, 0.6, 10)", + VALUES ('codex', 15722, 3, 60, 120, 600, 4, 2, 60, 0.6, 10)", [], ) .map_err(|e| AppError::Database(e.to_string()))?; conn.execute( - "INSERT OR IGNORE INTO proxy_config (app_type, max_retries, + "INSERT OR IGNORE INTO proxy_config (app_type, listen_port, max_retries, streaming_first_byte_timeout, streaming_idle_timeout, non_streaming_timeout, circuit_failure_threshold, circuit_success_threshold, circuit_timeout_seconds, circuit_error_rate_threshold, circuit_min_requests) - VALUES ('gemini', 5, 60, 120, 600, 4, 2, 60, 0.6, 10)", + VALUES ('gemini', 15723, 5, 60, 120, 600, 4, 2, 60, 0.6, 10)", [], ) .map_err(|e| AppError::Database(e.to_string()))?; @@ -415,10 +415,15 @@ impl Database { Self::set_user_version(conn, 9)?; } 9 => { - log::info!("迁移数据库从 v9 到 v10(添加 Hermes Agent 支持)"); + log::info!("迁移数据库从 v9 到 v10(Hermes Agent 支持)"); Self::migrate_v9_to_v10(conn)?; Self::set_user_version(conn, 10)?; } + 10 => { + log::info!("迁移数据库从 v10 到 v11(代理按应用默认端口)"); + Self::migrate_v10_to_v11(conn)?; + Self::set_user_version(conn, 11)?; + } _ => { return Err(AppError::Database(format!( "未知的数据库版本 {version},无法迁移到 {SCHEMA_VERSION}" @@ -1228,6 +1233,30 @@ impl Database { Ok(()) } + /// v10 -> v11 迁移:代理按应用默认端口 + fn migrate_v10_to_v11(conn: &Connection) -> Result<(), AppError> { + if Self::table_exists(conn, "proxy_config")? + && Self::has_column(conn, "proxy_config", "app_type")? + && Self::has_column(conn, "proxy_config", "listen_port")? + { + conn.execute( + "UPDATE proxy_config SET listen_port = 15722 + WHERE app_type = 'codex' AND listen_port = 15721", + [], + ) + .map_err(|e| AppError::Database(format!("迁移 Codex 代理端口失败: {e}")))?; + conn.execute( + "UPDATE proxy_config SET listen_port = 15723 + WHERE app_type = 'gemini' AND listen_port = 15721", + [], + ) + .map_err(|e| AppError::Database(format!("迁移 Gemini 代理端口失败: {e}")))?; + } + + log::info!("v10 -> v11 迁移完成:已设置按应用代理端口"); + Ok(()) + } + /// 插入默认模型定价数据 /// 格式: (model_id, display_name, input, output, cache_read, cache_creation) /// 注意: model_id 使用短横线格式(如 claude-haiku-4-5),与 API 返回的模型名称标准化后一致 diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index 0a7c4f64..c5dc8cf9 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -5,6 +5,7 @@ mod claude_plugin; mod codex_config; pub mod commands; mod config; +pub mod daemon; mod database; mod deeplink; mod error; diff --git a/src-tauri/src/main.rs b/src-tauri/src/main.rs index 9bad2247..536c9b8a 100644 --- a/src-tauri/src/main.rs +++ b/src-tauri/src/main.rs @@ -37,12 +37,14 @@ fn run(cli: Cli) -> Result<(), AppError> { } Some(Commands::Skills(cmd)) => cc_switch_lib::cli::commands::skills::execute(cmd, cli.app), Some(Commands::Config(cmd)) => cc_switch_lib::cli::commands::config::execute(cmd, cli.app), - Some(Commands::Proxy(cmd)) => cc_switch_lib::cli::commands::proxy::execute(cmd), + Some(Commands::Proxy(cmd)) => cc_switch_lib::cli::commands::proxy::execute(cmd, cli.app), Some(Commands::Failover(cmd)) => { cc_switch_lib::cli::commands::failover::execute(cmd, cli.app) } #[cfg(unix)] Some(Commands::Start(cmd)) => cc_switch_lib::cli::commands::start::execute(cmd), + #[cfg(unix)] + Some(Commands::Daemon(cmd)) => cc_switch_lib::cli::commands::daemon::execute(cmd), Some(Commands::Env(cmd)) => cc_switch_lib::cli::commands::env::execute(cmd, cli.app), Some(Commands::Update(cmd)) => cc_switch_lib::cli::commands::update::execute(cmd), Some(Commands::Completions(cmd)) => cc_switch_lib::cli::commands::completions::execute(cmd), @@ -55,6 +57,8 @@ fn command_requires_startup_state(command: &Option) -> bool { Some(Commands::Completions(_)) | Some(Commands::Update(_)) | Some(Commands::Internal(_)) => false, + #[cfg(unix)] + Some(Commands::Daemon(_)) => false, _ => true, } } diff --git a/src-tauri/src/proxy/types.rs b/src-tauri/src/proxy/types.rs index be8b62d5..27f8431d 100644 --- a/src-tauri/src/proxy/types.rs +++ b/src-tauri/src/proxy/types.rs @@ -98,6 +98,18 @@ pub struct ProxyStatus { /// 当前活跃的代理目标列表 #[serde(default)] pub active_targets: Vec, + /// 当前活跃的 daemon-managed worker 列表 + #[serde(default)] + pub active_workers: Vec, +} + +/// 活跃的 daemon-managed worker 信息 +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ActiveWorker { + pub app_type: String, + pub address: String, + pub port: u16, + pub pid: Option, } /// 活跃的代理目标信息 @@ -179,6 +191,9 @@ pub struct AppProxyConfig { pub app_type: String, /// 该 app 代理启用开关 pub enabled: bool, + /// 该 app 监听端口 + #[serde(default = "default_app_listen_port")] + pub listen_port: u16, /// 该 app 自动故障转移开关 pub auto_failover_enabled: bool, /// 最大重试次数 @@ -201,6 +216,10 @@ pub struct AppProxyConfig { pub circuit_min_requests: u32, } +fn default_app_listen_port() -> u16 { + 15721 +} + /// 整流器配置 /// /// 存储在 settings 表中 diff --git a/src-tauri/src/services/proxy.rs b/src-tauri/src/services/proxy.rs index 2cdababb..a61e1e62 100644 --- a/src-tauri/src/services/proxy.rs +++ b/src-tauri/src/services/proxy.rs @@ -3,14 +3,10 @@ mod codex_toml; use std::{ collections::HashMap, future::Future, - process::{Command, Stdio}, sync::{Arc, Mutex as StdMutex, OnceLock, Weak}, time::Duration, }; -#[cfg(unix)] -use std::os::unix::process::CommandExt; - use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; use tokio::sync::RwLock; @@ -112,6 +108,13 @@ struct PersistedProxyRuntimeSession { kind: PersistedProxyRuntimeSessionKind, #[serde(default)] session_token: Option, + #[serde(default)] + app_type: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct PersistedProxyRuntimeSessions { + workers: HashMap, } enum ExternalProxyStatusProbe { @@ -219,86 +222,157 @@ impl ProxyService { let app_type = Self::takeover_app_from_str(app_type)?; let current_status = self.get_status().await; if current_status.running { - return Err( - "proxy is already running; stop the current runtime before starting a managed session" - .to_string(), - ); + // Daemon is already running this worker. Just attach the app. + self.daemon_ensure_worker(app_type.as_str()).await + } else { + self.validate_app_proxy_activation(&app_type, None).await?; + self.daemon_ensure_worker(app_type.as_str()).await } - self.validate_app_proxy_activation(&app_type, None).await?; - - let executable = Self::resolve_managed_proxy_executable()?; - let session_token = uuid::Uuid::new_v4().to_string(); - let mut child = Command::new(executable); - child - .arg("proxy") - .arg("serve") - .arg("--takeover") - .arg(app_type.as_str()) - .env( - PROXY_RUNTIME_KIND_ENV_KEY, - PersistedProxyRuntimeSessionKind::ManagedExternal.as_env_value(), - ) - .env(PROXY_RUNTIME_SESSION_TOKEN_ENV_KEY, &session_token) - .env( - crate::services::state_coordination::RESTORE_GUARD_BYPASS_ENV_KEY, - "1", - ) - .stdin(Stdio::null()) - .stdout(Stdio::null()) - .stderr(Stdio::null()); + } + async fn daemon_ensure_worker(&self, app_type: &str) -> Result { #[cfg(unix)] - unsafe { - child.pre_exec(|| { - if libc::setsid() == -1 { - return Err(std::io::Error::last_os_error()); - } - Ok(()) - }); - } - - let mut child = child - .spawn() - .map_err(|error| format!("spawn managed proxy session failed: {error}"))?; - let child_pid = child.id(); + { + use crate::daemon::ipc::client; + use crate::daemon::ipc::protocol::{Request, Response}; + let socket_path = crate::daemon::paths::socket_path(); + let app_type = app_type.to_string(); + let response = tokio::task::spawn_blocking(move || { + let mut stream = client::connect_or_spawn(&socket_path, || { + let bin = Self::resolve_managed_proxy_executable() + .map_err(client::ClientError::NoDaemon)?; + Ok(bin) + })?; + client::exchange(&mut stream, &Request::EnsureWorker { app_type }) + }) + .await + .map_err(|err| format!("daemon ensure worker task panicked: {err}"))? + .map_err(|err| format!("daemon ensure worker failed: {err}"))?; - let start_deadline = tokio::time::Instant::now() + Duration::from_secs(10); - loop { - if let Some(status) = child - .try_wait() - .map_err(|error| format!("poll managed proxy process failed: {error}"))? - { - return Err(format!( - "managed proxy session exited before becoming ready: {}", - status - )); + match response { + Response::Worker { address, port, .. } => Ok(ProxyServerInfo { + address, + port, + started_at: chrono::Utc::now().to_rfc3339(), + }), + Response::Error { message } => Err(message), + other => Err(format!( + "daemon ensure worker returned unexpected response: {other:?}" + )), } + } - if let Some(info) = self - .managed_session_ready_info(child_pid, session_token.as_str()) - .await - { - Self::spawn_managed_child_reaper(child); - return Ok(info); - } + #[cfg(not(unix))] + { + let _ = app_type; + Err("managed sessions are only supported on unix".to_string()) + } + } - if tokio::time::Instant::now() >= start_deadline { - let _ = child.kill(); - let _ = child.wait(); - let _ = self.clear_persisted_runtime_session(); - return Err("managed proxy session did not become ready in time".to_string()); + async fn daemon_drop_takeover(&self, app_type: &str) -> Result<(), String> { + #[cfg(unix)] + { + use crate::daemon::ipc::client; + use crate::daemon::ipc::protocol::{Request, Response}; + use std::io::ErrorKind; + let socket_path = crate::daemon::paths::socket_path(); + // No socket at all → daemon isn't running. Do the cleanup the + // daemon would have done so the DB and live config stay aligned + // with "this app is no longer being proxied". + if !socket_path.exists() { + return self.local_disable_takeover(app_type).await; } + let app_type_owned = app_type.to_string(); + let socket_for_task = socket_path.clone(); + let outcome = tokio::task::spawn_blocking( + move || -> Result, client::ClientError> { + let mut stream = match client::connect(&socket_for_task) { + Ok(s) => s, + // ECONNREFUSED / ENOENT here means the socket inode is + // a leftover from a daemon that died ungracefully — + // nobody is listening. Treat as "no daemon" and let + // the caller fall back to local cleanup. + Err(client::ClientError::Io(e)) + if matches!( + e.kind(), + ErrorKind::ConnectionRefused | ErrorKind::NotFound + ) => + { + return Ok(None); + } + Err(e) => return Err(e), + }; + client::exchange( + &mut stream, + &Request::DropTakeover { + app_type: app_type_owned, + }, + ) + .map(Some) + }, + ) + .await + .map_err(|err| format!("daemon drop takeover task panicked: {err}"))? + .map_err(|err| format!("daemon drop takeover failed: {err}"))?; + + match outcome { + Some(Response::Ok) => Ok(()), + Some(Response::Error { message }) => Err(message), + Some(other) => Err(format!( + "daemon drop takeover returned unexpected response: {other:?}" + )), + None => { + // Stale socket inode — best-effort remove so the next call + // takes the !socket_path.exists() short-circuit instead of + // tripping over the same ECONNREFUSED again. + let _ = std::fs::remove_file(&socket_path); + self.local_disable_takeover(app_type).await + } + } + } - tokio::time::sleep(Duration::from_millis(100)).await; + #[cfg(not(unix))] + { + let _ = app_type; + Err("managed sessions are only supported on unix".to_string()) } } + /// Foreground-only fallback for when no daemon is reachable. Drops the + /// per-app takeover via the same code path the supervisor uses, takes the + /// cross-process state-mutation guard around it (so a concurrent CLI + /// invocation can't race the live-config restore), and clears the + /// `proxy_runtime_session` row — the daemon would normally own that row, + /// but if it's gone, leaving the row behind would make `get_status()` + /// report a phantom running proxy on the next launch. + async fn local_disable_takeover(&self, app_type: &str) -> Result<(), String> { + let app = Self::takeover_app_from_str(app_type)?; + let _guard = crate::services::state_coordination::acquire_restore_mutation_guard().await?; + self.disable_takeover_for_app_unlocked(&app, false).await?; + let _ = self.clear_persisted_runtime_session(); + Ok(()) + } + + pub fn has_persisted_managed_sessions(&self) -> bool { + self.load_persisted_runtime_sessions() + .into_iter() + .any(|session| { + session.kind.is_managed_external() && Self::is_process_alive(session.pid) + }) + } + pub async fn set_managed_session_for_app( &self, app_type: &str, enabled: bool, ) -> Result<(), String> { - let _guard = crate::services::state_coordination::acquire_restore_mutation_guard().await?; + // Intentionally NO state-mutation guard here. This function purely + // delegates to the daemon (`daemon_ensure_worker` / `daemon_drop_takeover`) + // which acquires its own cross-process guard inside its IPC handler. + // Holding the guard on the foreground side and then making a synchronous + // IPC call deadlocks against the daemon's handler — observed as + // "Resource temporarily unavailable (os error 35)" once the IPC read + // times out. self.set_managed_session_for_app_unlocked(app_type, enabled) .await } @@ -308,36 +382,25 @@ impl ProxyService { app_type: &str, enabled: bool, ) -> Result<(), String> { - let app_type = Self::takeover_app_from_str(app_type)?; + let app_type_enum = Self::takeover_app_from_str(app_type)?; if enabled { let status = self.get_status().await; - if !status.running { - self.start_managed_session_unlocked(app_type.as_str()) - .await?; - return Ok(()); - } - - if self - .load_persisted_runtime_session() - .is_some_and(|session| session.kind.is_managed_external()) - { - self.enable_takeover_for_app_unlocked(&app_type).await?; - return Ok(()); + if status.running && self.load_persisted_runtime_sessions().is_empty() { + return Err( + "proxy is already running in foreground mode; stop the current runtime before attaching another app to a managed session" + .to_string(), + ); } - return Err( - "proxy is already running in foreground mode; stop the current runtime before attaching another app to a managed session" - .to_string(), - ); + // Daemon-driven path: ensure worker is up + takeover is on for this app. + self.daemon_ensure_worker(app_type_enum.as_str()).await?; + return Ok(()); } - let stop_server_when_last = self - .load_persisted_runtime_session() - .map(|session| session.kind.is_managed_external()) - .unwrap_or(false); - self.disable_takeover_for_app_unlocked(&app_type, stop_server_when_last) - .await + // Disable: route through the daemon when one is running so it stays + // the sole writer of `proxy_runtime_session`. + self.daemon_drop_takeover(app_type_enum.as_str()).await } async fn start_with_resolved_config_unlocked( @@ -478,44 +541,89 @@ impl ProxyService { return server.get_status().await; } - if let Some(session) = self.load_persisted_runtime_session() { - if session.kind.is_managed_external() { - if Self::is_process_alive(session.pid) { - match Self::probe_external_proxy_status(&session).await { - ExternalProxyStatusProbe::Matched(status) => return status, - ExternalProxyStatusProbe::Mismatched => { - let _ = self.clear_persisted_runtime_session(); - return ProxyStatus::default(); + let sessions = self.load_persisted_runtime_sessions(); + if !sessions.is_empty() + && sessions + .iter() + .all(|session| session.kind.is_managed_external()) + { + let mut workers = Vec::new(); + let mut primary_status = None; + let mut stale = false; + + for session in sessions { + if !Self::is_process_alive(session.pid) { + stale = true; + continue; + } + + match Self::probe_external_proxy_status(&session).await { + ExternalProxyStatusProbe::Matched(status) => { + workers.push(crate::proxy::types::ActiveWorker { + app_type: session + .app_type + .clone() + .unwrap_or_else(|| "proxy".to_string()), + address: status.address.clone(), + port: status.port, + pid: Some(session.pid), + }); + if primary_status.is_none() { + primary_status = Some(status); } - ExternalProxyStatusProbe::Unreachable => { - if Self::has_managed_external_ownership_signal(&session) { - let uptime_seconds = - chrono::DateTime::parse_from_rfc3339(&session.started_at) - .ok() - .map(|started_at| { - let started_at = started_at.with_timezone(&chrono::Utc); - (chrono::Utc::now() - started_at).num_seconds().max(0) - as u64 - }) - .unwrap_or(0); - - return ProxyStatus { + } + ExternalProxyStatusProbe::Mismatched => stale = true, + ExternalProxyStatusProbe::Unreachable => { + if Self::has_managed_external_ownership_signal(&session) { + let uptime_seconds = + chrono::DateTime::parse_from_rfc3339(&session.started_at) + .ok() + .map(|started_at| { + let started_at = started_at.with_timezone(&chrono::Utc); + (chrono::Utc::now() - started_at).num_seconds().max(0) + as u64 + }) + .unwrap_or(0); + workers.push(crate::proxy::types::ActiveWorker { + app_type: session + .app_type + .clone() + .unwrap_or_else(|| "proxy".to_string()), + address: session.address.clone(), + port: session.port, + pid: Some(session.pid), + }); + if primary_status.is_none() { + primary_status = Some(ProxyStatus { running: true, address: session.address.clone(), port: session.port, uptime_seconds, managed_session_token: session.session_token.clone(), ..ProxyStatus::default() - }; + }); } + } else { + stale = true; } } } + } + if stale { let _ = self.clear_persisted_runtime_session(); - return ProxyStatus::default(); } + if let Some(mut status) = primary_status { + status.running = !workers.is_empty(); + status.active_workers = workers; + return status; + } + + return ProxyStatus::default(); + } + + if let Some(session) = sessions.into_iter().next() { if Self::is_process_alive(session.pid) { let uptime_seconds = chrono::DateTime::parse_from_rfc3339(&session.started_at) .ok() @@ -644,8 +752,15 @@ impl ProxyService { async fn ensure_proxy_routing_active_for_app(&self, app_type: &str) -> Result<(), String> { let app_type = Self::takeover_app_from_str(app_type)?; - if !self.is_running().await { - return Err("automatic failover requires the local proxy to be running".to_string()); + let has_managed_worker = self + .load_persisted_runtime_session_for_app(&app_type) + .is_some_and(|session| { + session.kind.is_managed_external() && Self::is_process_alive(session.pid) + }); + if !has_managed_worker { + return Err( + "automatic failover requires daemon-managed proxy routing for this app".to_string(), + ); } let app_key = app_type.as_str(); @@ -678,18 +793,7 @@ impl ProxyService { let first_provider_id = self.first_failover_provider_id(app_type)?; let app_type = Self::takeover_app_from_str(app_type)?; let app_key = app_type.as_str(); - { - let _guard = - crate::services::state_coordination::acquire_restore_mutation_guard().await?; - self.enable_takeover_for_app_unlocked_with_provider( - &app_type, - Some(&first_provider_id), - ) - .await?; - } - self.set_global_enabled(true) - .await - .map_err(|e| e.to_string())?; + self.set_managed_session_for_app(app_key, true).await?; self.switch_proxy_target(app_key, &first_provider_id) .await?; self.persist_auto_failover_for_app(app_key, true).await?; @@ -1137,7 +1241,7 @@ impl ProxyService { .await?; } - let (proxy_url, proxy_codex_base_url) = self.build_proxy_urls().await?; + let (proxy_url, proxy_codex_base_url) = self.build_proxy_urls_for_app(app_type).await?; let mut taken_over = live; self.rewrite_live_for_proxy(app_type, &mut taken_over, &proxy_url, &proxy_codex_base_url)?; self.write_live_config_for_app(app_type, &taken_over)?; @@ -1538,19 +1642,32 @@ impl ProxyService { .map(|live| (live, false)) } - async fn build_proxy_urls(&self) -> Result<(String, String), String> { - let runtime_status = self.get_status().await; + async fn build_proxy_urls_for_app( + &self, + app_type: &AppType, + ) -> Result<(String, String), String> { let persisted = self.get_config().await.map_err(|e| e.to_string())?; - let listen_address = if runtime_status.running && !runtime_status.address.is_empty() { - runtime_status.address - } else { - persisted.listen_address.clone() - }; - let listen_port = if runtime_status.running && runtime_status.port != 0 { - runtime_status.port - } else { - persisted.listen_port - }; + let app_proxy = self + .db + .get_proxy_config_for_app(app_type.as_str()) + .await + .map_err(|error| { + format!( + "load proxy config for {} failed: {error}", + app_type.as_str() + ) + })?; + let session = self.load_persisted_runtime_session_for_app(app_type); + let listen_address = session + .as_ref() + .map(|session| session.address.clone()) + .filter(|address| !address.trim().is_empty()) + .unwrap_or_else(|| persisted.listen_address.clone()); + let listen_port = session + .as_ref() + .map(|session| session.port) + .filter(|port| *port != 0) + .unwrap_or(app_proxy.listen_port); let connect_host = match listen_address.as_str() { "0.0.0.0" => "127.0.0.1".to_string(), @@ -1903,6 +2020,7 @@ impl ProxyService { session_token: std::env::var(PROXY_RUNTIME_SESSION_TOKEN_ENV_KEY) .ok() .filter(|value| !value.trim().is_empty()), + app_type: None, }; let serialized = serde_json::to_string(&session) .map_err(|error| format!("serialize proxy runtime session failed: {error}"))?; @@ -1921,26 +2039,57 @@ impl ProxyService { .map_err(|error| format!("clear proxy runtime session failed: {error}")) } - fn load_persisted_runtime_session(&self) -> Option { - let raw = self + fn load_persisted_runtime_sessions(&self) -> Vec { + let Some(raw) = self .db .get_setting(PROXY_RUNTIME_SESSION_KEY) .ok() - .flatten()?; + .flatten() + else { + return Vec::new(); + }; let raw = raw.trim(); if raw.is_empty() { - return None; + return Vec::new(); } - match serde_json::from_str(raw) { - Ok(session) => Some(session), + if let Ok(sessions) = serde_json::from_str::(raw) { + return sessions + .workers + .into_iter() + .map(|(app_type, mut session)| { + if session.app_type.is_none() { + session.app_type = Some(app_type); + } + session + }) + .collect(); + } + + match serde_json::from_str::(raw) { + Ok(session) => vec![session], Err(_) => { let _ = self.clear_persisted_runtime_session(); - None + Vec::new() } } } + fn load_persisted_runtime_session(&self) -> Option { + self.load_persisted_runtime_sessions().into_iter().next() + } + + fn load_persisted_runtime_session_for_app( + &self, + app_type: &AppType, + ) -> Option { + let app_key = app_type.as_str(); + self.load_persisted_runtime_sessions() + .into_iter() + .find(|session| session.app_type.as_deref() == Some(app_key)) + .or_else(|| self.load_persisted_runtime_session()) + } + fn is_process_alive(pid: u32) -> bool { if pid == 0 { return false; @@ -3248,6 +3397,49 @@ base_url = "https://api.openai.com/v1" service.stop().await.expect("stop proxy runtime"); } + #[test] + fn loads_per_app_managed_runtime_sessions() { + let db = Arc::new(Database::memory().expect("create database")); + db.set_setting( + PROXY_RUNTIME_SESSION_KEY, + &json!({ + "workers": { + "claude": { + "pid": std::process::id(), + "address": "127.0.0.1", + "port": 15721, + "started_at": chrono::Utc::now().to_rfc3339(), + "kind": "managed_external", + "session_token": "claude-token" + }, + "codex": { + "pid": std::process::id(), + "address": "127.0.0.1", + "port": 15722, + "started_at": chrono::Utc::now().to_rfc3339(), + "kind": "managed_external", + "session_token": "codex-token" + } + } + }) + .to_string(), + ) + .expect("write runtime sessions"); + let service = ProxyService::new(db); + + let claude = service + .load_persisted_runtime_session_for_app(&AppType::Claude) + .expect("claude session"); + let codex = service + .load_persisted_runtime_session_for_app(&AppType::Codex) + .expect("codex session"); + + assert_eq!(claude.app_type.as_deref(), Some("claude")); + assert_eq!(claude.port, 15721); + assert_eq!(codex.app_type.as_deref(), Some("codex")); + assert_eq!(codex.port, 15722); + } + #[tokio::test] #[serial] async fn managed_external_runtime_publishes_session_when_ready_signal_is_sent() { @@ -3451,6 +3643,7 @@ base_url = "https://api.openai.com/v1" started_at: "2026-03-10T00:00:00Z".to_string(), kind: PersistedProxyRuntimeSessionKind::ManagedExternal, session_token: Some("expected-session-token".to_string()), + app_type: Some("claude".to_string()), }) .expect("serialize runtime session"), ) @@ -3518,6 +3711,7 @@ base_url = "https://api.openai.com/v1" started_at: "2026-03-10T00:00:00Z".to_string(), kind: PersistedProxyRuntimeSessionKind::ManagedExternal, session_token: Some("expected-session-token".to_string()), + app_type: Some("claude".to_string()), }) .expect("serialize runtime session"), ) @@ -3586,6 +3780,7 @@ base_url = "https://api.openai.com/v1" started_at: "2026-03-10T00:00:00Z".to_string(), kind: PersistedProxyRuntimeSessionKind::ManagedExternal, session_token: Some("expected-session-token".to_string()), + app_type: Some("claude".to_string()), }) .expect("serialize runtime session"), ) diff --git a/src-tauri/tests/proxy_claude_forwarder_alignment.rs b/src-tauri/tests/proxy_claude_forwarder_alignment.rs index 4aec3f1a..348b9e3d 100644 --- a/src-tauri/tests/proxy_claude_forwarder_alignment.rs +++ b/src-tauri/tests/proxy_claude_forwarder_alignment.rs @@ -43,6 +43,14 @@ async fn bind_test_listener() -> tokio::net::TcpListener { ); } +fn free_loopback_port() -> u16 { + let listener = std::net::TcpListener::bind(("127.0.0.1", 0)).expect("bind ephemeral port"); + listener + .local_addr() + .expect("read ephemeral address") + .port() +} + #[derive(Clone, Default)] struct CountingUpstreamState { attempts: Arc, @@ -555,6 +563,7 @@ async fn proxy_claude_auto_failover_uses_activated_queue_providers() { .await .expect("read claude app proxy config"); app_proxy.enabled = true; + app_proxy.listen_port = free_loopback_port(); app_proxy.auto_failover_enabled = true; db.update_proxy_config_for_app(app_proxy) .await diff --git a/src-tauri/tests/proxy_daemon.rs b/src-tauri/tests/proxy_daemon.rs new file mode 100644 index 00000000..b7a40ceb --- /dev/null +++ b/src-tauri/tests/proxy_daemon.rs @@ -0,0 +1,1039 @@ +//! End-to-end tests for the supervisor daemon. +//! +//! Each test runs in a fully isolated sandbox: +//! - HOME, USERPROFILE → fresh per-test TempDir +//! - CC_SWITCH_CONFIG_DIR → $sandbox/.cc-switch (so the spawned daemon's +//! `Database::init()` writes inside the sandbox, NEVER the user's real +//! ~/.cc-switch) +//! - XDG_RUNTIME_DIR → $sandbox/run (daemon socket + pidfile) +//! - XDG_STATE_HOME → $sandbox/state (daemon log) +//! - The daemon is spawned by resolving `CARGO_BIN_EXE_cc-switch`, the test +//! binary built by Cargo. The TestSandbox Drop impl shuts the daemon down +//! and removes the temp dir. +//! +//! These tests are guarded by `#[cfg(unix)]` because the daemon path is +//! Unix-only. + +#![cfg(unix)] + +use std::ffi::OsString; +use std::io::{BufRead, BufReader, Write}; +use std::os::unix::net::UnixStream; +use std::path::{Path, PathBuf}; +use std::sync::{Mutex, MutexGuard, OnceLock}; +use std::time::{Duration, Instant}; + +use serial_test::serial; +use tempfile::TempDir; + +/// Global mutex to prevent test sandboxes racing on shared env vars. +fn env_mutex() -> &'static Mutex<()> { + static M: OnceLock> = OnceLock::new(); + M.get_or_init(|| Mutex::new(())) +} + +fn lock_env() -> MutexGuard<'static, ()> { + env_mutex().lock().unwrap_or_else(|p| p.into_inner()) +} + +const ENV_KEYS: &[&str] = &[ + "HOME", + "USERPROFILE", + "CC_SWITCH_CONFIG_DIR", + "XDG_RUNTIME_DIR", + "XDG_STATE_HOME", + "CLAUDE_CONFIG_DIR", +]; + +struct TestSandbox { + _guard: MutexGuard<'static, ()>, + _root: TempDir, + runtime_dir: PathBuf, + socket: PathBuf, + pidfile: PathBuf, + original_env: Vec<(&'static str, Option)>, +} + +impl TestSandbox { + fn new() -> Self { + let guard = lock_env(); + let root = TempDir::new().expect("create sandbox tempdir"); + let home = root.path().to_path_buf(); + let config_dir = home.join(".cc-switch"); + let claude_config_dir = home.join(".claude"); + let runtime_dir = home.join("run"); + let state_dir = home.join("state"); + std::fs::create_dir_all(&config_dir).expect("create sandbox cc-switch"); + std::fs::create_dir_all(&claude_config_dir).expect("create sandbox claude"); + std::fs::create_dir_all(&runtime_dir).expect("create sandbox runtime"); + std::fs::create_dir_all(&state_dir).expect("create sandbox state"); + + let mut original_env = Vec::new(); + for key in ENV_KEYS { + original_env.push((*key, std::env::var_os(key))); + } + + // SAFETY: env mutation is serialized by `env_mutex()`. + unsafe { + std::env::set_var("HOME", &home); + std::env::set_var("USERPROFILE", &home); + std::env::set_var("CC_SWITCH_CONFIG_DIR", &config_dir); + std::env::set_var("CLAUDE_CONFIG_DIR", &claude_config_dir); + std::env::set_var("XDG_RUNTIME_DIR", &runtime_dir); + std::env::set_var("XDG_STATE_HOME", &state_dir); + } + + let socket = runtime_dir.join("cc-switch").join("daemon.sock"); + let pidfile = runtime_dir.join("cc-switch").join("daemon.pid"); + + Self { + _guard: guard, + _root: root, + runtime_dir, + socket, + pidfile, + original_env, + } + } + + fn socket(&self) -> &Path { + &self.socket + } + + fn pidfile(&self) -> &Path { + &self.pidfile + } + + fn binary() -> PathBuf { + PathBuf::from(env!("CARGO_BIN_EXE_cc-switch")) + } + + /// Spawn the daemon as a background child. Caller is responsible for + /// keeping the returned Child alive until they want to stop it. + fn spawn_daemon(&self) -> std::process::Child { + std::process::Command::new(Self::binary()) + .arg("daemon") + .arg("start") + .stdin(std::process::Stdio::null()) + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .spawn() + .expect("spawn daemon") + } + + fn wait_for_socket(&self, timeout: Duration) -> bool { + let deadline = Instant::now() + timeout; + while Instant::now() < deadline { + if self.socket.exists() && UnixStream::connect(&self.socket).is_ok() { + return true; + } + std::thread::sleep(Duration::from_millis(50)); + } + false + } + + fn read_pid(&self) -> Option { + std::fs::read_to_string(&self.pidfile) + .ok()? + .trim() + .parse() + .ok() + } +} + +impl Drop for TestSandbox { + fn drop(&mut self) { + // Ask the daemon to shut down cleanly, then SIGKILL any leftover. + if self.socket.exists() { + if let Ok(mut stream) = UnixStream::connect(&self.socket) { + let _ = stream.set_write_timeout(Some(Duration::from_secs(2))).ok(); + let _ = stream.write_all(b"{\"kind\":\"shutdown\"}\n"); + let _ = stream.flush(); + let mut sink = String::new(); + let _ = BufReader::new(&stream).read_line(&mut sink); + } + } + + if let Some(pid) = self.read_pid() { + unsafe { + let _ = libc::kill(pid as i32, libc::SIGTERM); + } + // Give it a moment to exit, then SIGKILL. + std::thread::sleep(Duration::from_millis(200)); + unsafe { + let _ = libc::kill(pid as i32, libc::SIGKILL); + } + } + + // Restore environment. + for (key, value) in &self.original_env { + unsafe { + match value { + Some(v) => std::env::set_var(key, v), + None => std::env::remove_var(key), + } + } + } + + let _ = self.runtime_dir.display(); + } +} + +fn send_request(socket: &Path, request_json: &str) -> String { + try_send_request(socket, request_json).expect("daemon socket should be reachable") +} + +/// Like `send_request` but returns `None` instead of panicking when the +/// daemon isn't reachable. Used by tests that intentionally drive the daemon +/// to self-exit and then probe whether anything is still listening. +fn try_send_request(socket: &Path, request_json: &str) -> Option { + let mut stream = UnixStream::connect(socket).ok()?; + stream.set_read_timeout(Some(Duration::from_secs(15))).ok(); + stream.set_write_timeout(Some(Duration::from_secs(15))).ok(); + stream.write_all(request_json.as_bytes()).ok()?; + stream.write_all(b"\n").ok()?; + stream.flush().ok()?; + let _ = stream.shutdown(std::net::Shutdown::Write); + let mut buf = String::new(); + BufReader::new(stream).read_line(&mut buf).ok()?; + Some(buf.trim().to_string()) +} + +fn run_cc_switch(args: &[&str]) -> std::process::Output { + std::process::Command::new(TestSandbox::binary()) + .args(args) + .stdin(std::process::Stdio::null()) + .output() + .expect("run cc-switch") +} + +fn assert_command_success(output: &std::process::Output, command: &str) { + assert!( + output.status.success(), + "{command} should succeed; status={:?}, stdout={}, stderr={}", + output.status, + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); +} + +/// Block until either the daemon process exits or `timeout` elapses. +/// Returns true if the process exited within the timeout. +fn wait_for_daemon_exit(child: &mut std::process::Child, timeout: Duration) -> bool { + let deadline = Instant::now() + timeout; + while Instant::now() < deadline { + match child.try_wait() { + Ok(Some(_)) => return true, + Ok(None) => std::thread::sleep(Duration::from_millis(50)), + Err(_) => return false, + } + } + false +} + +#[test] +#[serial] +fn daemon_starts_and_serves_status_request() { + let sandbox = TestSandbox::new(); + + // Spawn the daemon in the foreground (no --detach) as a child process. + // The sandbox env is set, so the daemon writes its socket + db inside + // the temp dir and does NOT touch the user's real config. + let mut child = sandbox.spawn_daemon(); + + if !sandbox.wait_for_socket(Duration::from_secs(10)) { + let _ = child.kill(); + panic!("daemon socket did not come up within 10s"); + } + assert!( + sandbox.pidfile().exists(), + "pidfile should be written under {}", + sandbox.pidfile().display() + ); + + let response = send_request(sandbox.socket(), r#"{"kind":"status"}"#); + assert!( + response.contains("\"kind\":\"status\""), + "expected status response, got {response}" + ); + // The worker hasn't been requested yet, so it should report not running. + assert!( + response.contains("\"running\":false"), + "expected running:false before EnsureWorker, got {response}" + ); + + // Clean shutdown via Shutdown RPC. + let _ = send_request(sandbox.socket(), r#"{"kind":"shutdown"}"#); + let _ = child.wait(); + assert!( + !sandbox.pidfile().exists(), + "pidfile should be removed after shutdown" + ); +} + +#[test] +#[serial] +fn ensure_worker_spawns_a_worker_and_drop_takeover_brings_it_down() { + let sandbox = TestSandbox::new(); + seed_minimal_claude_provider(&sandbox); + + let mut daemon = sandbox.spawn_daemon(); + assert!( + sandbox.wait_for_socket(Duration::from_secs(10)), + "daemon socket should come up" + ); + + let response = send_request( + sandbox.socket(), + r#"{"kind":"ensure_worker","app_type":"claude"}"#, + ); + assert!( + response.contains("\"kind\":\"worker\""), + "expected Worker response, got {response}" + ); + + // Worker is now running; status should reflect it. + let status = send_request(sandbox.socket(), r#"{"kind":"status"}"#); + assert!( + status.contains("\"running\":true"), + "expected running:true after EnsureWorker, got {status}" + ); + + // Drop takeover: daemon should signal the worker to exit and return Ok. + let drop_resp = send_request( + sandbox.socket(), + r#"{"kind":"drop_takeover","app_type":"claude"}"#, + ); + assert!( + drop_resp.contains("\"kind\":\"ok\""), + "expected Ok response, got {drop_resp}" + ); + + // With no remaining takeovers the daemon self-exits — no shutdown RPC + // needed. The pidfile + socket are removed by the daemon's cleanup path. + assert!( + wait_for_daemon_exit(&mut daemon, Duration::from_secs(5)), + "daemon should self-exit after the last takeover is dropped" + ); +} + +#[test] +#[serial] +fn proxy_enable_and_disable_cli_manage_daemon_worker() { + let sandbox = TestSandbox::new(); + seed_minimal_claude_provider(&sandbox); + + let enable = run_cc_switch(&["proxy", "enable"]); + assert_command_success(&enable, "proxy enable"); + assert!( + sandbox.wait_for_socket(Duration::from_secs(10)), + "proxy enable should auto-start the daemon socket" + ); + + let status = run_cc_switch(&["daemon", "status"]); + assert_command_success(&status, "daemon status after proxy enable"); + let status_stdout = String::from_utf8_lossy(&status.stdout); + assert!( + status_stdout.contains("worker: running at"), + "daemon status should report a running worker after proxy enable, got {status_stdout}" + ); + assert!( + status_stdout.contains("takeovers: claude=true"), + "daemon status should report claude takeover after proxy enable, got {status_stdout}" + ); + let taken_over_url = read_claude_settings_base_url().expect("read taken-over claude base url"); + assert!( + taken_over_url.starts_with("http://127.0.0.1:"), + "proxy enable should rewrite Claude base URL to local worker, got {taken_over_url}" + ); + + let disable = run_cc_switch(&["proxy", "disable"]); + assert_command_success(&disable, "proxy disable"); + + let deadline = Instant::now() + Duration::from_secs(5); + loop { + if !sandbox.socket().exists() && !sandbox.pidfile().exists() { + break; + } + assert!( + Instant::now() < deadline, + "proxy disable should stop the daemon after the last takeover" + ); + std::thread::sleep(Duration::from_millis(50)); + } + assert_eq!( + read_claude_settings_base_url().as_deref(), + None, + "proxy disable should restore Claude live config without a base URL" + ); +} + +#[test] +#[serial] +fn set_global_enabled_false_clears_takeovers_and_stops_the_worker() { + let sandbox = TestSandbox::new(); + seed_minimal_claude_provider(&sandbox); + + let mut daemon = sandbox.spawn_daemon(); + assert!( + sandbox.wait_for_socket(Duration::from_secs(10)), + "daemon socket should come up" + ); + + // Bring up a worker for claude. + let ensure = send_request( + sandbox.socket(), + r#"{"kind":"ensure_worker","app_type":"claude"}"#, + ); + assert!( + ensure.contains("\"kind\":\"worker\""), + "expected Worker response, got {ensure}" + ); + + let pre_status = send_request(sandbox.socket(), r#"{"kind":"status"}"#); + assert!( + pre_status.contains("\"running\":true"), + "worker should be running before disable, got {pre_status}" + ); + assert!( + pre_status.contains("\"claude\":true"), + "claude takeover should be on before disable, got {pre_status}" + ); + + // Flip the global switch off via IPC. Daemon should clear takeovers, + // stop the worker, and self-exit once nothing is left to supervise. + let disable = send_request( + sandbox.socket(), + r#"{"kind":"set_global_enabled","enabled":false}"#, + ); + assert!( + disable.contains("\"kind\":\"ok\""), + "expected Ok response, got {disable}" + ); + + assert!( + wait_for_daemon_exit(&mut daemon, Duration::from_secs(5)), + "daemon should self-exit after set_global_enabled(false) clears the last takeover" + ); + assert!( + !sandbox.socket().exists(), + "socket file should be removed on daemon exit" + ); + assert!( + !sandbox.pidfile().exists(), + "pidfile should be removed on daemon exit" + ); +} + +/// Regression for the file-lock deadlock that surfaced as +/// "daemon drop takeover failed: Resource temporarily unavailable (os error 35)". +/// +/// The TUI invokes `ProxyService::set_managed_session_for_app` from a worker +/// thread. The foreground used to take the cross-process state-mutation guard +/// and THEN make a synchronous IPC call to the daemon. The daemon's handler +/// also acquires that guard, so it blocked behind the foreground's hold; the +/// foreground's `read_line` then timed out after 15s. +/// +/// With the foreground guard removed, `set_managed_session_for_app` should +/// round-trip in well under that timeout. +#[test] +#[serial] +fn set_managed_session_for_app_does_not_deadlock_on_state_mutation_lock() { + let sandbox = TestSandbox::new(); + seed_minimal_claude_provider(&sandbox); + + let mut daemon = sandbox.spawn_daemon(); + assert!( + sandbox.wait_for_socket(Duration::from_secs(10)), + "daemon socket should come up" + ); + + // Bring the worker up via IPC so there's an active takeover to drop. + let ensure = send_request( + sandbox.socket(), + r#"{"kind":"ensure_worker","app_type":"claude"}"#, + ); + assert!( + ensure.contains("\"kind\":\"worker\""), + "expected Worker response, got {ensure}" + ); + + // Drive the same code path the TUI uses: load the AppState in this process + // and call the proxy service directly. With the deadlock unfixed this hangs + // for the full 15s IPC read timeout and then errors out. + let state = cc_switch_lib::AppState::try_new().expect("create app state in sandbox"); + let runtime = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .expect("create test runtime"); + + let started = Instant::now(); + let result = runtime.block_on( + state + .proxy_service + .set_managed_session_for_app("claude", false), + ); + let elapsed = started.elapsed(); + + assert!( + result.is_ok(), + "set_managed_session_for_app(false) should succeed, got {result:?}" + ); + assert!( + elapsed < Duration::from_secs(5), + "set_managed_session_for_app(false) should not block on the state \ + mutation lock; took {elapsed:?}" + ); + + // The drop was for the last (only) takeover, so the daemon self-exits. + assert!( + wait_for_daemon_exit(&mut daemon, Duration::from_secs(5)), + "daemon should self-exit after the last takeover is dropped" + ); +} + +/// Regression for the symptom the user reported on the TUI: +/// "✗ daemon ensure worker failed: Resource temporarily unavailable (os error 35)" +/// when toggling proxy on and off via the main TUI proxy action. +/// +/// The TUI's `SetManagedProxyForCurrentApp` action funnels into +/// `ProxyService::set_managed_session_for_app(app, enabled)` on a worker thread. +/// This test drives that exact code path through the public service API and +/// cycles enable→disable→enable→disable several times. With the daemon healthy +/// and no concurrent foreground guard holders, every round trip should complete +/// quickly — well under the 15 s IPC read timeout that produces EAGAIN. +#[test] +#[serial] +fn set_managed_session_for_app_round_trips_on_repeated_toggles() { + let sandbox = TestSandbox::new(); + seed_minimal_claude_provider(&sandbox); + + let mut daemon = sandbox.spawn_daemon(); + assert!( + sandbox.wait_for_socket(Duration::from_secs(10)), + "daemon socket should come up" + ); + + let state = cc_switch_lib::AppState::try_new().expect("create app state in sandbox"); + let runtime = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .expect("create test runtime"); + + for round in 0..3 { + let enable_started = Instant::now(); + let enable = runtime.block_on( + state + .proxy_service + .set_managed_session_for_app("claude", true), + ); + let enable_elapsed = enable_started.elapsed(); + assert!( + enable.is_ok(), + "round {round}: enable should succeed, got {enable:?}" + ); + assert!( + enable_elapsed < Duration::from_secs(5), + "round {round}: enable round-trip should not approach the 15 s IPC timeout; took {enable_elapsed:?}" + ); + + let status = send_request(sandbox.socket(), r#"{"kind":"status"}"#); + assert!( + status.contains("\"running\":true"), + "round {round}: worker should be running after enable, got {status}" + ); + + let disable_started = Instant::now(); + let disable = runtime.block_on( + state + .proxy_service + .set_managed_session_for_app("claude", false), + ); + let disable_elapsed = disable_started.elapsed(); + assert!( + disable.is_ok(), + "round {round}: disable should succeed, got {disable:?}" + ); + assert!( + disable_elapsed < Duration::from_secs(5), + "round {round}: disable round-trip should not approach the 15 s IPC timeout; took {disable_elapsed:?}" + ); + + // After disable, the daemon self-exits. Wait for the socket to go away + // so the next round's enable spawns a fresh daemon rather than racing + // the previous one's teardown. + let deadline = Instant::now() + Duration::from_secs(5); + loop { + match try_send_request(sandbox.socket(), r#"{"kind":"status"}"#) { + None => break, + Some(status) if status.contains("\"running\":false") => { + // Daemon hasn't finished tearing down the socket yet but + // already reports stopped — wait for the socket to vanish. + if !sandbox.socket().exists() { + break; + } + } + Some(_) => {} + } + assert!( + Instant::now() < deadline, + "round {round}: daemon did not self-exit after disable" + ); + std::thread::sleep(Duration::from_millis(50)); + } + } + + // The original daemon handle was for round 0's daemon; subsequent rounds + // each spawned a fresh detached daemon on enable. The first one is long + // dead — just reap it. + let _ = daemon.wait(); +} + +/// Concurrency reproducer for "daemon ensure worker failed: Resource +/// temporarily unavailable (os error 35)". +/// +/// The supervisor's `ensure_worker` overwrites `pending_hello`/`pending_token` +/// when called concurrently — the first caller's oneshot is dropped and that +/// caller waits the full 10 s `WORKER_HELLO_TIMEOUT` before returning. If +/// `set_takeover_for_app` afterwards waits even briefly on the file lock, the +/// 15 s client read timeout fires and the foreground sees os error 35. +/// +/// We hit this from a single foreground process by issuing two +/// `set_managed_session_for_app(true)` calls in parallel — the same code path +/// the TUI uses when the user toggles proxy. +#[test] +#[serial] +fn concurrent_set_managed_session_does_not_time_out_on_ipc_read() { + let sandbox = TestSandbox::new(); + seed_minimal_claude_provider(&sandbox); + + let mut daemon = sandbox.spawn_daemon(); + assert!( + sandbox.wait_for_socket(Duration::from_secs(10)), + "daemon socket should come up" + ); + + let runtime = tokio::runtime::Builder::new_multi_thread() + .worker_threads(2) + .enable_all() + .build() + .expect("create test runtime"); + + let started = Instant::now(); + let (a, b) = runtime.block_on(async { + let state_a = cc_switch_lib::AppState::try_new().expect("create state a"); + let state_b = cc_switch_lib::AppState::try_new().expect("create state b"); + tokio::join!( + state_a + .proxy_service + .set_managed_session_for_app("claude", true), + state_b + .proxy_service + .set_managed_session_for_app("claude", true), + ) + }); + let elapsed = started.elapsed(); + + assert!( + elapsed < Duration::from_secs(10), + "concurrent enables should not approach the 15 s IPC timeout; took {elapsed:?}, a={a:?}, b={b:?}" + ); + assert!( + a.is_ok() && b.is_ok(), + "both concurrent enables should succeed; a={a:?}, b={b:?}" + ); + + let _ = send_request(sandbox.socket(), r#"{"kind":"shutdown"}"#); + let _ = daemon.wait(); +} + +/// Stress reproducer for the user-reported flake when toggling proxy via the +/// TUI: kill the worker process behind the daemon's back, then immediately +/// drive `set_managed_session_for_app("claude", true)`. The supervisor's +/// `inner.worker` is briefly stale (the watcher hasn't observed exit yet), so +/// `ensure_worker` returns the dead worker, then `set_takeover_for_app` runs in +/// the daemon and probes the dead session. This must still complete well under +/// the 15 s IPC timeout — if the daemon ever starts a foreground server inside +/// itself or blocks on the persisted session probe for too long, the client +/// surfaces "Resource temporarily unavailable (os error 35)". +#[test] +#[serial] +fn set_managed_session_for_app_recovers_when_worker_was_killed_externally() { + let sandbox = TestSandbox::new(); + seed_minimal_claude_provider(&sandbox); + + let mut daemon = sandbox.spawn_daemon(); + assert!( + sandbox.wait_for_socket(Duration::from_secs(10)), + "daemon socket should come up" + ); + + let state = cc_switch_lib::AppState::try_new().expect("create app state in sandbox"); + let runtime = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .expect("create test runtime"); + + runtime + .block_on( + state + .proxy_service + .set_managed_session_for_app("claude", true), + ) + .expect("initial enable should succeed"); + + // Read the worker pid out of the daemon's status response, then SIGKILL it + // without telling the daemon. Picks up everything between `worker_pid":` and + // the next non-digit, which is sufficient for the supervisor's status JSON. + let status_before = send_request(sandbox.socket(), r#"{"kind":"status"}"#); + let worker_pid = parse_worker_pid(&status_before).expect("worker pid in status"); + unsafe { + libc::kill(worker_pid as i32, libc::SIGKILL); + } + + // Race the daemon's watcher: re-enable immediately while inner.worker is + // still the now-dead worker. + let started = Instant::now(); + let result = runtime.block_on( + state + .proxy_service + .set_managed_session_for_app("claude", true), + ); + let elapsed = started.elapsed(); + + assert!( + elapsed < Duration::from_secs(5), + "set_managed_session_for_app(true) should not approach the 15 s IPC timeout after a worker kill; took {elapsed:?}, result={result:?}" + ); + assert!( + result.is_ok(), + "set_managed_session_for_app(true) should recover after a SIGKILL'd worker, got {result:?}" + ); + + let _ = send_request(sandbox.socket(), r#"{"kind":"shutdown"}"#); + let _ = daemon.wait(); +} + +fn parse_worker_pid(status_json: &str) -> Option { + let key = "\"worker_pid\":"; + let start = status_json.find(key)? + key.len(); + let tail = &status_json[start..]; + let digits: String = tail.chars().take_while(|c| c.is_ascii_digit()).collect(); + digits.parse().ok() +} + +/// Seed minimal Claude live config and a per-test proxy port so worker startup +/// does not depend on the host's default proxy port being free. +fn seed_minimal_claude_provider(sandbox: &TestSandbox) { + let claude_dir = std::env::var_os("HOME") + .map(|h| Path::new(&h).join(".claude")) + .expect("HOME set in sandbox"); + std::fs::create_dir_all(&claude_dir).expect("create sandbox .claude"); + std::fs::write( + claude_dir.join("settings.json"), + r#"{"env":{"ANTHROPIC_API_KEY":"live-key"},"workspace":{"path":"/tmp/workspace"}}"#, + ) + .expect("seed sandbox claude settings"); + + let listen_port = free_loopback_port(); + let state = cc_switch_lib::AppState::try_new().expect("create app state in sandbox"); + let provider = cc_switch_lib::Provider::with_id( + "claude-provider".to_string(), + "Claude Provider".to_string(), + serde_json::json!({ + "env": { + "ANTHROPIC_API_KEY": "db-key" + } + }), + Some("claude".to_string()), + ); + state + .db + .save_provider("claude", &provider) + .expect("save sandbox claude provider"); + state + .db + .set_current_provider("claude", &provider.id) + .expect("set sandbox current claude provider"); + let runtime = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .expect("create test runtime"); + runtime.block_on(async { + let mut config = state + .db + .get_proxy_config_for_app("claude") + .await + .expect("load sandbox claude proxy config"); + config.listen_port = listen_port; + state + .db + .update_proxy_config_for_app(config) + .await + .expect("update sandbox claude proxy port"); + }); + + let _ = sandbox; // tie lifetime so the sandbox outlives this seed +} + +fn read_claude_settings_base_url() -> Option { + let settings_path = cc_switch_lib::get_claude_settings_path(); + let source = std::fs::read_to_string(settings_path).ok()?; + let value: serde_json::Value = serde_json::from_str(&source).ok()?; + value + .get("env")? + .get("ANTHROPIC_BASE_URL")? + .as_str() + .map(ToString::to_string) +} + +fn free_loopback_port() -> u16 { + let listener = std::net::TcpListener::bind(("127.0.0.1", 0)).expect("bind ephemeral port"); + listener + .local_addr() + .expect("read ephemeral address") + .port() +} + +#[test] +#[serial] +fn sigterm_restores_takeover_and_stops_worker() { + let sandbox = TestSandbox::new(); + seed_minimal_claude_provider(&sandbox); + + let mut daemon = sandbox.spawn_daemon(); + assert!( + sandbox.wait_for_socket(Duration::from_secs(10)), + "daemon socket should come up" + ); + + let ensure = send_request( + sandbox.socket(), + r#"{"kind":"ensure_worker","app_type":"claude"}"#, + ); + assert!( + ensure.contains("\"kind\":\"worker\""), + "expected Worker response, got {ensure}" + ); + let taken_over_url = read_claude_settings_base_url().expect("read taken-over claude base url"); + assert!( + taken_over_url.starts_with("http://127.0.0.1:"), + "claude base URL should point at local proxy before SIGTERM, got {taken_over_url}" + ); + + let pid = sandbox.read_pid().expect("read daemon pid"); + unsafe { + let rc = libc::kill(pid as i32, libc::SIGTERM); + assert_eq!(rc, 0, "SIGTERM should be delivered to daemon"); + } + + assert!( + wait_for_daemon_exit(&mut daemon, Duration::from_secs(5)), + "daemon should exit after SIGTERM" + ); + assert_eq!( + read_claude_settings_base_url().as_deref(), + None, + "SIGTERM shutdown should restore the original Claude live config without a base URL" + ); + assert!( + !sandbox.socket().exists(), + "socket file should be removed on SIGTERM cleanup" + ); + assert!( + !sandbox.pidfile().exists(), + "pidfile should be removed on SIGTERM cleanup" + ); +} + +#[test] +#[serial] +fn second_daemon_invocation_exits_cleanly_when_one_already_runs() { + let sandbox = TestSandbox::new(); + + let mut first = sandbox.spawn_daemon(); + assert!( + sandbox.wait_for_socket(Duration::from_secs(10)), + "first daemon should bring up socket" + ); + + // Second daemon: should detect the pidfile is locked and exit 0. + let second = std::process::Command::new(TestSandbox::binary()) + .arg("daemon") + .arg("start") + .stdin(std::process::Stdio::null()) + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .spawn() + .expect("spawn second daemon"); + let exit = second.wait_with_output().expect("await second daemon"); + assert!( + exit.status.success(), + "second daemon should exit cleanly, got {:?}", + exit.status + ); + + // First daemon should still be alive and reachable. + let response = send_request(sandbox.socket(), r#"{"kind":"status"}"#); + assert!(response.contains("\"kind\":\"status\"")); + + let _ = send_request(sandbox.socket(), r#"{"kind":"shutdown"}"#); + let _ = first.wait(); +} + +/// Self-exit invariant: after the last `drop_takeover`, the daemon should +/// shut itself down so an idle supervisor doesn't outlive its purpose and +/// later get SIGKILL'd (which is the path that leaks a stale socket inode +/// and breaks subsequent disables — the original user-reported bug). +#[test] +#[serial] +fn daemon_self_exits_after_last_drop_takeover() { + let sandbox = TestSandbox::new(); + seed_minimal_claude_provider(&sandbox); + + let mut daemon = sandbox.spawn_daemon(); + assert!( + sandbox.wait_for_socket(Duration::from_secs(10)), + "daemon socket should come up" + ); + + let ensure = send_request( + sandbox.socket(), + r#"{"kind":"ensure_worker","app_type":"claude"}"#, + ); + assert!( + ensure.contains("\"kind\":\"worker\""), + "expected Worker response, got {ensure}" + ); + + let drop_resp = send_request( + sandbox.socket(), + r#"{"kind":"drop_takeover","app_type":"claude"}"#, + ); + assert!( + drop_resp.contains("\"kind\":\"ok\""), + "expected Ok response from drop_takeover, got {drop_resp}" + ); + + assert!( + wait_for_daemon_exit(&mut daemon, Duration::from_secs(5)), + "daemon should self-exit after the last takeover is dropped" + ); + + // Socket + pidfile must be cleaned up so the next disable doesn't see a + // stale socket and produce ECONNREFUSED. + assert!( + !sandbox.socket().exists(), + "socket file should be removed on daemon exit, still at {}", + sandbox.socket().display() + ); + assert!( + !sandbox.pidfile().exists(), + "pidfile should be removed on daemon exit, still at {}", + sandbox.pidfile().display() + ); + + // And the meta DB should agree: no runtime session row left behind. + let state = cc_switch_lib::AppState::try_new().expect("create app state in sandbox"); + let runtime = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .expect("create test runtime"); + let status = runtime.block_on(state.proxy_service.get_status()); + assert!( + !status.running, + "proxy_runtime_session must be cleared after daemon self-exit, got {status:?}" + ); +} + +/// `set_managed_session_for_app("claude", false)` must succeed even when the +/// daemon socket is stale (file present on disk, no listener bound). This is +/// the exact failure mode the user hit on the TUI `P` hotkey: a prior daemon +/// crashed/got SIGKILL'd, leaving `daemon.sock` behind, and every subsequent +/// disable attempt tripped over `Connection refused (os error 61)`. +#[test] +#[serial] +fn set_managed_session_for_app_false_recovers_when_socket_is_stale() { + let sandbox = TestSandbox::new(); + seed_minimal_claude_provider(&sandbox); + + // Fabricate a stale socket inode: bind a UnixListener and immediately drop + // it. On macOS + Linux the file lingers on disk, but connect() returns + // ECONNREFUSED — exactly what a dead-daemon leftover looks like. + std::fs::create_dir_all(sandbox.socket().parent().expect("socket parent")) + .expect("ensure runtime dir"); + let listener = + std::os::unix::net::UnixListener::bind(sandbox.socket()).expect("bind stale socket"); + drop(listener); + assert!( + sandbox.socket().exists(), + "stale socket should exist on disk after bind+drop" + ); + assert!( + UnixStream::connect(sandbox.socket()).is_err(), + "stale socket should refuse connections" + ); + + let state = cc_switch_lib::AppState::try_new().expect("create app state in sandbox"); + let runtime = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .expect("create test runtime"); + + let started = Instant::now(); + let result = runtime.block_on( + state + .proxy_service + .set_managed_session_for_app("claude", false), + ); + let elapsed = started.elapsed(); + + assert!( + result.is_ok(), + "set_managed_session_for_app(false) should fall back to local cleanup \ + when the daemon socket is stale; got {result:?}" + ); + assert!( + elapsed < Duration::from_secs(5), + "stale-socket fallback should be fast (no IPC timeout); took {elapsed:?}" + ); + assert!( + !sandbox.socket().exists(), + "stale socket should be removed by the fallback so subsequent calls \ + take the no-socket short-circuit" + ); + + // Meta DB: no phantom runtime session. + let status = runtime.block_on(state.proxy_service.get_status()); + assert!( + !status.running, + "proxy status must report not-running after stale-socket disable, got {status:?}" + ); +} + +/// `notify_global_switch` is the proxy-settings-page sibling of the same +/// bug: it used to bubble up ECONNREFUSED when the daemon socket was stale. +/// It now treats a stale socket as "no daemon — nothing to align" and +/// returns Ok, after cleaning up the stale inode. +#[test] +#[serial] +fn notify_global_switch_treats_stale_socket_as_no_daemon() { + let sandbox = TestSandbox::new(); + + std::fs::create_dir_all(sandbox.socket().parent().expect("socket parent")) + .expect("ensure runtime dir"); + let listener = + std::os::unix::net::UnixListener::bind(sandbox.socket()).expect("bind stale socket"); + drop(listener); + assert!(sandbox.socket().exists(), "stale socket should exist"); + + let result = cc_switch_lib::daemon::notify_global_switch(false); + assert!( + result.is_ok(), + "notify_global_switch must succeed against a stale socket, got {result:?}" + ); + assert!( + !sandbox.socket().exists(), + "stale socket should be removed by notify_global_switch fallback" + ); +} From 970eef163232a9dd590693a3cfb104742bc49ae3 Mon Sep 17 00:00:00 2001 From: saladday <1203511142@qq.com> Date: Thu, 21 May 2026 13:02:28 +0800 Subject: [PATCH 2/5] (fix) daemon proxy lifecycle --- src-tauri/Cargo.lock | 1 - src-tauri/Cargo.toml | 1 - src-tauri/src/cli/commands/proxy.rs | 85 ++- src-tauri/src/daemon/logging.rs | 29 +- src-tauri/src/daemon/mod.rs | 2 +- src-tauri/src/daemon/supervisor.rs | 881 +++++++++++++++++++++++++--- src-tauri/src/lib.rs | 1 + src-tauri/src/main.rs | 50 +- src-tauri/src/services/proxy.rs | 228 ++++++- src-tauri/tests/proxy_service.rs | 364 +++++++----- src-tauri/tests/support.rs | 4 + 11 files changed, 1354 insertions(+), 292 deletions(-) diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index 1cdabba4..c3a1ed04 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -495,7 +495,6 @@ dependencies = [ "log", "minisign", "minisign-verify", - "nix", "once_cell", "ratatui", "regex", diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index da60a72c..0c178e72 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -64,7 +64,6 @@ json-five = "0.3.1" # Network and async reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "json", "stream", "socks"] } tokio = { version = "1", features = ["macros", "rt-multi-thread", "time", "sync", "signal", "net", "process", "io-util"] } -nix = { version = "0.29", features = ["signal", "process", "fs"] } futures = "0.3" async-stream = "0.3" bytes = "1.5" diff --git a/src-tauri/src/cli/commands/proxy.rs b/src-tauri/src/cli/commands/proxy.rs index f6a71a9c..c149271e 100644 --- a/src-tauri/src/cli/commands/proxy.rs +++ b/src-tauri/src/cli/commands/proxy.rs @@ -13,6 +13,9 @@ use crate::daemon::ipc::protocol::{Request as DaemonRequest, Response as DaemonR #[cfg(unix)] use crate::daemon::supervisor::{DAEMON_SOCKET_ENV, SESSION_TOKEN_ENV}; +const MIN_PROXY_LISTEN_PORT: u16 = 1024; +const MAX_PROXY_LISTEN_PORT: u16 = u16::MAX; + #[derive(Subcommand, Debug, Clone)] pub enum ProxyCommand { /// Show current proxy configuration and routes @@ -129,6 +132,7 @@ fn configure_proxy(app_type: AppType, listen_port: Option) -> Result<(), Ap let Some(listen_port) = listen_port else { return show_proxy(); }; + validate_listen_port(listen_port)?; if !matches!(app_type, AppType::Claude | AppType::Codex | AppType::Gemini) { return Err(AppError::InvalidInput(format!( "proxy takeover is not supported for {}", @@ -184,7 +188,7 @@ fn serve_proxy( )); } let base_config = service.get_config().await?; - let effective_config = apply_overrides(&base_config, listen_address, listen_port); + let effective_config = apply_overrides(&base_config, listen_address, listen_port)?; let result = async { let server_info = service @@ -192,20 +196,33 @@ fn serve_proxy( .await .map_err(AppError::Message)?; - if let Err(err) = apply_takeovers(&service, &takeovers).await { - let _ = service.stop_with_restore().await; - return Err(AppError::Message(err)); - } + let announced_to_daemon = { + #[cfg(unix)] + { + match announce_to_daemon_if_managed(&server_info) { + Ok(announced) => announced, + Err(err) => { + let _ = service.stop_with_restore().await; + return Err(AppError::Message(err)); + } + } + } + #[cfg(not(unix))] + { + false + } + }; - #[cfg(unix)] - if let Err(err) = announce_to_daemon_if_managed(&server_info) { + if let Err(err) = apply_takeovers(&service, &takeovers).await { let _ = service.stop_with_restore().await; return Err(AppError::Message(err)); } - if let Err(err) = service.publish_runtime_session_if_needed(&server_info) { - let _ = service.stop_with_restore().await; - return Err(AppError::Message(err)); + if !announced_to_daemon { + if let Err(err) = service.publish_runtime_session_if_needed(&server_info) { + let _ = service.stop_with_restore().await; + return Err(AppError::Message(err)); + } } crate::services::state_coordination::clear_restore_mutation_guard_bypass_env(); @@ -275,9 +292,9 @@ fn serve_proxy( #[cfg(unix)] fn announce_to_daemon_if_managed( info: &crate::proxy::types::ProxyServerInfo, -) -> Result<(), String> { +) -> Result { let Some(socket_os) = std::env::var_os(DAEMON_SOCKET_ENV) else { - return Ok(()); + return Ok(false); }; let socket_path = std::path::PathBuf::from(socket_os); let session_token = std::env::var(SESSION_TOKEN_ENV) @@ -291,7 +308,7 @@ fn announce_to_daemon_if_managed( let response = daemon_client::round_trip(&socket_path, &request) .map_err(|err| format!("worker hello to daemon failed: {err}"))?; match response { - DaemonResponse::Ok => Ok(()), + DaemonResponse::Ok => Ok(true), DaemonResponse::Error { message } => { Err(format!("daemon rejected worker hello: {message}")) } @@ -326,7 +343,7 @@ fn apply_overrides( original: &ProxyConfig, listen_address: Option, listen_port: Option, -) -> ProxyConfig { +) -> Result { let mut config = original.clone(); if let Some(address) = listen_address { config.listen_address = address; @@ -334,7 +351,17 @@ fn apply_overrides( if let Some(port) = listen_port { config.listen_port = port; } - config + Ok(config) +} + +fn validate_listen_port(port: u16) -> Result<(), AppError> { + if (MIN_PROXY_LISTEN_PORT..=MAX_PROXY_LISTEN_PORT).contains(&port) { + return Ok(()); + } + + Err(AppError::InvalidInput(format!( + "proxy listen port must be between {MIN_PROXY_LISTEN_PORT} and {MAX_PROXY_LISTEN_PORT}" + ))) } fn load_proxy_app_configs( @@ -584,7 +611,33 @@ mod tests { Database, MultiAppConfig, ProxyService, }; - use super::{build_proxy_overview_lines, load_proxy_app_configs}; + use super::{ + apply_overrides, build_proxy_overview_lines, load_proxy_app_configs, validate_listen_port, + }; + + #[test] + fn cli_proxy_listen_port_validation_rejects_reserved_ports() { + let error = validate_listen_port(0).expect_err("port 0 should not be accepted from CLI"); + + assert!(error.to_string().contains("1024")); + } + + #[test] + fn apply_overrides_allows_ephemeral_listen_port_for_foreground_serve() { + let config = crate::ProxyConfig::default(); + let updated = apply_overrides(&config, None, Some(0)) + .expect("foreground serve should allow an ephemeral port"); + + assert_eq!(updated.listen_port, 0); + } + + #[test] + fn apply_overrides_accepts_user_listen_port_range() { + let config = crate::ProxyConfig::default(); + let updated = apply_overrides(&config, None, Some(1024)).expect("1024 is allowed"); + + assert_eq!(updated.listen_port, 1024); + } #[test] fn proxy_overview_lines_include_runtime_status_and_takeover_state() { diff --git a/src-tauri/src/daemon/logging.rs b/src-tauri/src/daemon/logging.rs index a2b90a81..c5f5a2b5 100644 --- a/src-tauri/src/daemon/logging.rs +++ b/src-tauri/src/daemon/logging.rs @@ -17,7 +17,7 @@ use log::{Level, LevelFilter, Log, Metadata, Record}; const MAX_LOG_BYTES: u64 = 5 * 1024 * 1024; -static INSTALLED: OnceLock<()> = OnceLock::new(); +static INSTALL_RESULT: OnceLock> = OnceLock::new(); struct DaemonLogger { path: PathBuf, @@ -125,24 +125,19 @@ fn level_label(level: Level) -> &'static str { /// log path. Idempotent — subsequent calls return the originally chosen path /// without re-installing. pub fn install(path: &Path, level: LevelFilter) -> Result { - let mut installed_path: Option = None; - INSTALLED.get_or_init(|| match DaemonLogger::open(path, level) { - Ok(logger) => { + INSTALL_RESULT + .get_or_init(|| { + let logger = DaemonLogger::open(path, level) + .map_err(|err| format!("open daemon logger at {} failed: {err}", path.display()))?; let resolved = logger.path.clone(); let boxed: Box = Box::new(logger); - if log::set_boxed_logger(boxed).is_ok() { - log::set_max_level(level); - } - installed_path = Some(resolved); - } - Err(_) => { - installed_path = None; - } - }); - - installed_path - .or_else(|| Some(path.to_path_buf())) - .ok_or_else(|| format!("install daemon logger at {} failed", path.display())) + log::set_boxed_logger(boxed).map_err(|err| { + format!("install daemon logger at {} failed: {err}", path.display()) + })?; + log::set_max_level(level); + Ok(resolved) + }) + .clone() } #[cfg(test)] diff --git a/src-tauri/src/daemon/mod.rs b/src-tauri/src/daemon/mod.rs index df934576..d1504c6e 100644 --- a/src-tauri/src/daemon/mod.rs +++ b/src-tauri/src/daemon/mod.rs @@ -81,7 +81,7 @@ pub async fn run(binary_path: PathBuf) -> Result<(), String> { } }; - let _ = logging::install(&log_path, LevelFilter::Info); + logging::install(&log_path, LevelFilter::Info)?; log::info!( "[daemon] starting; pid={} socket={} log={}", std::process::id(), diff --git a/src-tauri/src/daemon/supervisor.rs b/src-tauri/src/daemon/supervisor.rs index e1e0072a..06ed8d02 100644 --- a/src-tauri/src/daemon/supervisor.rs +++ b/src-tauri/src/daemon/supervisor.rs @@ -50,11 +50,28 @@ struct SupervisorInner { workers: HashMap, pending_hellos: HashMap>, pending_tokens: HashMap, - stopping_workers: HashSet, + pending_worker_pids: HashMap, + stopping_workers: HashSet<(AppType, u32)>, + cancelled_apps: HashSet, restart: RestartPolicy, last_restart_at: Option>, restart_count: u32, shutdown_requested: bool, + teardown_in_progress: bool, +} + +struct WorkerStopPlan { + pids: Vec, + should_shutdown: bool, + previous_shutdown_requested: bool, + cancelled_pending: Vec, +} + +struct CancelledPendingWorker { + app: AppType, + pid: u32, + token: Option, + hello: Option>, } #[derive(Clone)] @@ -96,21 +113,33 @@ impl Supervisor { self.proxy.recover_takeovers_on_startup().await } - /// Bring up a worker if none is running, then return its bound address. - /// - /// Concurrent callers serialize through `spawn_lock` so we never spawn two - /// workers in parallel (which would fight for the listen port and corrupt - /// `pending_hello`). After acquiring the lock we re-check `inner.worker` so - /// later callers reuse the worker the first one brought up. - async fn ensure_worker(&self, app: AppType) -> Result { - let _spawn_guard = self.spawn_lock.lock().await; + async fn ensure_worker_locked(&self, app: AppType) -> Result { let app_key = app.as_str().to_string(); let (session_token, hello_rx) = { let mut inner = self.inner.lock().await; + if inner.shutdown_requested || inner.teardown_in_progress { + return Err("proxy daemon is shutting down".to_string()); + } if let Some(info) = inner.workers.get(&app).cloned() { + if inner.stopping_workers.contains(&(app.clone(), info.pid)) { + return Err(format!( + "{app_key} proxy worker is stopping; retry after it exits" + )); + } + inner.cancelled_apps.remove(&app); return Ok(info); } + if inner + .stopping_workers + .iter() + .any(|(stopping_app, _)| stopping_app == &app) + { + return Err(format!( + "{app_key} proxy worker is stopping; retry after it exits" + )); + } + inner.cancelled_apps.remove(&app); let (tx, rx) = oneshot::channel(); inner.pending_hellos.insert(app_key.clone(), tx); let token = uuid::Uuid::new_v4().to_string(); @@ -118,16 +147,20 @@ impl Supervisor { (token, rx) }; - let app_config = self - .db - .get_proxy_config_for_app(&app_key) - .await - .map_err(|err| format!("load proxy config for {app_key} failed: {err}"))?; - let global_config = self - .db - .get_global_proxy_config() - .await - .map_err(|err| format!("load global proxy config failed: {err}"))?; + let app_config = match self.db.get_proxy_config_for_app(&app_key).await { + Ok(config) => config, + Err(err) => { + self.clear_pending_worker_registration(&app).await; + return Err(format!("load proxy config for {app_key} failed: {err}")); + } + }; + let global_config = match self.db.get_global_proxy_config().await { + Ok(config) => config, + Err(err) => { + self.clear_pending_worker_registration(&app).await; + return Err(format!("load global proxy config failed: {err}")); + } + }; let mut cmd = Command::new(&self.binary_path); cmd.arg("proxy") @@ -145,12 +178,24 @@ impl Supervisor { .stderr(Stdio::null()) .kill_on_drop(true); - let spawned = cmd - .spawn() - .map_err(|err| format!("spawn {app_key} proxy worker failed: {err}"))?; - let pid = spawned - .id() - .ok_or_else(|| format!("spawned {app_key} worker has no pid"))?; + let spawned = match cmd.spawn() { + Ok(child) => child, + Err(err) => { + self.clear_pending_worker_registration(&app).await; + return Err(format!("spawn {app_key} proxy worker failed: {err}")); + } + }; + let pid = match spawned.id() { + Some(pid) => pid, + None => { + self.clear_pending_worker_registration(&app).await; + return Err(format!("spawned {app_key} worker has no pid")); + } + }; + { + let mut inner = self.inner.lock().await; + inner.pending_worker_pids.insert(app.clone(), pid); + } log::info!("[daemon] spawned {app_key} worker pid={pid}"); let supervisor = self.clone(); @@ -161,16 +206,35 @@ impl Supervisor { let info = match tokio::time::timeout(WORKER_HELLO_TIMEOUT, hello_rx).await { Ok(Ok(info)) => info, - Ok(Err(_)) => return Err(format!("{app_key} worker exited before hello")), - Err(_) => return Err(format!("{app_key} worker hello timed out")), + Ok(Err(_)) => { + self.clear_pending_worker_registration(&app).await; + return Err(format!("{app_key} worker exited before hello")); + } + Err(_) => { + self.abandon_starting_worker(&app, Some(pid)).await; + return Err(format!("{app_key} worker hello timed out")); + } }; + let became_stopping = { + let inner = self.inner.lock().await; + inner.shutdown_requested + || inner.teardown_in_progress + || inner.stopping_workers.contains(&(app.clone(), info.pid)) + }; + if became_stopping { + self.abandon_starting_worker(&app, Some(info.pid)).await; + return Err("proxy daemon is shutting down".to_string()); + } + { let mut inner = self.inner.lock().await; inner.workers.insert(app.clone(), info.clone()); inner.last_restart_at = Some(chrono::Utc::now()); inner.restart.on_worker_started(Instant::now()); inner.pending_tokens.remove(&app_key); + inner.pending_worker_pids.remove(&app); + inner.shutdown_requested = false; } self.persist_runtime_session().await?; Ok(info) @@ -186,20 +250,31 @@ impl Supervisor { } }; - let info = match self.ensure_worker(app.clone()).await { + if let Err(err) = self.proxy.validate_app_proxy_activation(&app, None).await { + return Response::Error { message: err }; + } + + let _spawn_guard = self.spawn_lock.lock().await; + let info = match self.ensure_worker_locked(app.clone()).await { Ok(info) => info, - Err(err) => { - return Response::Error { message: err }; - } + Err(err) => return Response::Error { message: err }, }; - if let Err(err) = self.proxy.set_global_enabled(true).await { - return Response::Error { - message: err.to_string(), - }; + let activation = async { + self.proxy + .set_global_enabled(true) + .await + .map_err(|err| err.to_string())?; + self.proxy.set_takeover_for_app(app.as_str(), true).await } + .await; - if let Err(err) = self.proxy.set_takeover_for_app(app.as_str(), true).await { + if let Err(err) = activation { + log::warn!( + "[daemon] enabling {} takeover failed after worker start, cleaning up: {err}", + app.as_str() + ); + self.stop_worker_after_enable_failure(app.clone()).await; return Response::Error { message: err }; } @@ -211,6 +286,191 @@ impl Supervisor { } } + async fn clear_pending_worker_registration(&self, app: &AppType) { + let app_key = app.as_str().to_string(); + let mut inner = self.inner.lock().await; + inner.pending_tokens.remove(&app_key); + inner.pending_hellos.remove(&app_key); + inner.pending_worker_pids.remove(app); + } + + async fn abandon_starting_worker(&self, app: &AppType, pid: Option) { + let app_key = app.as_str().to_string(); + { + let mut inner = self.inner.lock().await; + inner.pending_tokens.remove(&app_key); + inner.pending_hellos.remove(&app_key); + inner.pending_worker_pids.remove(app); + if let Some(pid) = pid { + inner.stopping_workers.insert((app.clone(), pid)); + } + } + if let Err(err) = send_sigterm(pid) { + log::warn!("[daemon] stopping abandoned {app_key} worker failed: {err}"); + } + } + + async fn stop_worker_after_enable_failure(&self, app: AppType) { + let plan = self.plan_stop_for_app(app.clone()).await; + + if let Err(err) = self.proxy.clear_daemon_takeover_for_app(app.as_str()).await { + log::warn!( + "[daemon] restoring {} takeover after enable failure failed: {err}", + app.as_str() + ); + } + + if let Err(err) = self.persist_runtime_session().await { + log::warn!( + "[daemon] clearing runtime session after {} enable failure failed: {err}", + app.as_str() + ); + } + + let takeovers = self.read_takeover_flags().await; + let has_active_takeover = takeovers.claude || takeovers.codex || takeovers.gemini; + if !has_active_takeover { + if let Err(err) = self.proxy.set_global_enabled(false).await { + log::warn!( + "[daemon] clearing global proxy switch after {} enable failure failed: {err}", + app.as_str() + ); + } + } + for pid in &plan.pids { + if let Err(err) = send_sigterm(Some(*pid)) { + log::warn!( + "[daemon] stopping {} worker after enable failure failed: {err}", + app.as_str() + ); + } + } + if plan.should_shutdown && plan.pids.is_empty() { + self.shutdown_notify.notify_waiters(); + } + } + + fn has_remaining_workers_locked(inner: &SupervisorInner) -> bool { + !inner.workers.is_empty() || !inner.pending_worker_pids.is_empty() + } + + fn remaining_workers_are_only_stopping_locked(inner: &SupervisorInner) -> bool { + Self::has_remaining_workers_locked(inner) + && inner + .workers + .iter() + .all(|(app, worker)| inner.stopping_workers.contains(&(app.clone(), worker.pid))) + && inner + .pending_worker_pids + .iter() + .all(|(app, pid)| inner.stopping_workers.contains(&(app.clone(), *pid))) + } + + async fn plan_stop_for_app(&self, app: AppType) -> WorkerStopPlan { + let app_key = app.as_str().to_string(); + let mut inner = self.inner.lock().await; + let mut pids = Vec::new(); + let previous_shutdown_requested = inner.shutdown_requested; + let mut cancelled_pending = Vec::new(); + inner.cancelled_apps.insert(app.clone()); + + if let Some(pid) = inner.workers.get(&app).map(|info| info.pid) { + inner.stopping_workers.insert((app.clone(), pid)); + pids.push(pid); + } + if let Some(pid) = inner.pending_worker_pids.remove(&app) { + inner.stopping_workers.insert((app.clone(), pid)); + pids.push(pid); + cancelled_pending.push(CancelledPendingWorker { + app: app.clone(), + pid, + token: inner.pending_tokens.remove(&app_key), + hello: inner.pending_hellos.remove(&app_key), + }); + } + + pids.sort_unstable(); + pids.dedup(); + let target_had_worker = !pids.is_empty(); + let no_remaining_workers = !Self::has_remaining_workers_locked(&inner) + || (target_had_worker && Self::remaining_workers_are_only_stopping_locked(&inner)); + if target_had_worker && no_remaining_workers { + inner.shutdown_requested = true; + } + + WorkerStopPlan { + pids, + should_shutdown: target_had_worker && no_remaining_workers, + previous_shutdown_requested, + cancelled_pending, + } + } + + async fn rollback_stop_plan_for_app(&self, app: &AppType, mut plan: WorkerStopPlan) { + let mut inner = self.inner.lock().await; + for pid in &plan.pids { + inner.stopping_workers.remove(&(app.clone(), *pid)); + } + inner.cancelled_apps.remove(app); + for pending in plan.cancelled_pending.drain(..) { + let app_key = pending.app.as_str().to_string(); + inner.pending_worker_pids.insert(pending.app, pending.pid); + if let Some(token) = pending.token { + inner.pending_tokens.insert(app_key.clone(), token); + } + if let Some(hello) = pending.hello { + inner.pending_hellos.insert(app_key, hello); + } + } + inner.shutdown_requested = plan.previous_shutdown_requested; + } + + async fn plan_stop_all_workers(&self, teardown_in_progress: bool) -> Vec { + let mut inner = self.inner.lock().await; + inner.shutdown_requested = true; + if teardown_in_progress { + inner.teardown_in_progress = true; + } + inner + .cancelled_apps + .extend([AppType::Claude, AppType::Codex, AppType::Gemini]); + + let workers = inner + .workers + .iter() + .map(|(app, worker)| (app.clone(), worker.pid)) + .collect::>(); + let pending = inner + .pending_worker_pids + .iter() + .map(|(app, pid)| (app.clone(), *pid)) + .collect::>(); + + let mut pids = Vec::new(); + for (app, pid) in workers.into_iter().chain(pending.into_iter()) { + inner.stopping_workers.insert((app, pid)); + pids.push(pid); + } + + let pending_apps = inner + .pending_worker_pids + .keys() + .cloned() + .collect::>(); + for app in pending_apps { + inner.pending_worker_pids.remove(&app); + let app_key = app.as_str().to_string(); + inner.pending_tokens.remove(&app_key); + if let Some(tx) = inner.pending_hellos.remove(&app_key) { + drop(tx); + } + } + + pids.sort_unstable(); + pids.dedup(); + pids + } + async fn handle_drop_takeover(&self, app_type: &str) -> Response { let app = match parse_app_type(app_type) { Some(a) => a, @@ -221,27 +481,32 @@ impl Supervisor { } }; - if let Err(err) = self.proxy.set_takeover_for_app(app.as_str(), false).await { + let _spawn_guard = self.spawn_lock.lock().await; + let stop_plan = self.plan_stop_for_app(app.clone()).await; + if let Err(err) = self.proxy.clear_daemon_takeover_for_app(app.as_str()).await { + self.rollback_stop_plan_for_app(&app, stop_plan).await; return Response::Error { message: err }; } - - let (stop_pid, had_worker, should_shutdown) = { - let mut inner = self.inner.lock().await; - let pid = inner.workers.get(&app).map(|w| w.pid); - if pid.is_some() { - inner.stopping_workers.insert(app.clone()); - if inner.workers.len() <= 1 { - inner.shutdown_requested = true; - } + let takeovers = self.read_takeover_flags().await; + let has_active_takeover = takeovers.claude || takeovers.codex || takeovers.gemini; + let mut global_disable_error = None; + if !has_active_takeover { + if let Err(err) = self.proxy.set_global_enabled(false).await { + global_disable_error = Some(err.to_string()); } - (pid, pid.is_some(), inner.workers.len() <= 1) - }; - let _ = send_sigterm(stop_pid); - if had_worker { + } + + for pid in &stop_plan.pids { + let _ = send_sigterm(Some(*pid)); + } + if !stop_plan.pids.is_empty() { tokio::time::sleep(Duration::from_millis(100)).await; - } else if should_shutdown { + } else if stop_plan.should_shutdown { self.shutdown_notify.notify_waiters(); } + if let Some(message) = global_disable_error { + return Response::Error { message }; + } Response::Ok } @@ -263,17 +528,28 @@ impl Supervisor { message: "session token mismatch".to_string(), }; }; - let Some(tx) = inner.pending_hellos.remove(&app_key) else { - log::warn!("[daemon] worker hello received but no pending ensure (pid={pid})"); + let Some(app_type) = parse_app_type(&app_key) else { return Response::Error { - message: "no pending worker registration".to_string(), + message: format!("proxy takeover not supported for app: {app_key}"), }; }; - let Some(app_type) = parse_app_type(&app_key) else { + if let Some(expected_pid) = inner.pending_worker_pids.get(&app_type) { + if *expected_pid != pid { + log::warn!( + "[daemon] worker hello pid mismatch for {app_key}: expected {expected_pid}, got {pid}" + ); + return Response::Error { + message: "worker pid mismatch".to_string(), + }; + } + } + let Some(tx) = inner.pending_hellos.remove(&app_key) else { + log::warn!("[daemon] worker hello received but no pending ensure (pid={pid})"); return Response::Error { - message: format!("proxy takeover not supported for app: {app_key}"), + message: "no pending worker registration".to_string(), }; }; + inner.pending_worker_pids.remove(&app_type); let info = WorkerInfo { app_type, pid, @@ -299,6 +575,13 @@ impl Supervisor { } } + let _spawn_guard = self.spawn_lock.lock().await; + if let Err(err) = self.proxy.set_global_enabled(false).await { + return Response::Error { + message: err.to_string(), + }; + } + // Disabling: drop every active takeover so each app's live config is // restored, then stop the worker. We snapshot the active list under // the inner lock so we don't hold it while running per-app restores @@ -315,7 +598,7 @@ impl Supervisor { } } for app in &active { - if let Err(err) = self.proxy.set_takeover_for_app(app.as_str(), false).await { + if let Err(err) = self.proxy.clear_daemon_takeover_for_app(app.as_str()).await { log::warn!( "[daemon] set_global_enabled(false): drop takeover for {} failed: {err}", app.as_str() @@ -323,11 +606,7 @@ impl Supervisor { } } - let stop_pids = { - let mut inner = self.inner.lock().await; - inner.shutdown_requested = true; - inner.workers.values().map(|w| w.pid).collect::>() - }; + let stop_pids = self.plan_stop_all_workers(false).await; for pid in &stop_pids { let _ = send_sigterm(Some(*pid)); } @@ -375,11 +654,8 @@ impl Supervisor { } pub async fn shutdown(&self) { - let stop_pids = { - let mut inner = self.inner.lock().await; - inner.shutdown_requested = true; - inner.workers.values().map(|w| w.pid).collect::>() - }; + let _spawn_guard = self.spawn_lock.lock().await; + let stop_pids = self.plan_stop_all_workers(true).await; for pid in stop_pids { let _ = send_sigterm(Some(pid)); } @@ -415,29 +691,21 @@ impl Supervisor { }; log::info!("[daemon] {app_key} worker pid={pid} exited: {exit_status}"); - let (intentional, has_remaining_workers) = { - let mut inner = self.inner.lock().await; - inner.workers.remove(&app); - inner.pending_tokens.remove(&app_key); - if let Some(tx) = inner.pending_hellos.remove(&app_key) { - drop(tx); - } - let intentional = inner.shutdown_requested || inner.stopping_workers.remove(&app); - (intentional, !inner.workers.is_empty()) - }; + let (intentional, has_remaining_workers, teardown_in_progress) = + self.record_worker_exit(&app, pid).await; let _ = self.persist_runtime_session().await; if intentional { log::info!("[daemon] {app_key} worker exit was expected, not restarting"); - if !has_remaining_workers { + if !has_remaining_workers && !teardown_in_progress { log::info!("[daemon] no remaining workers, exiting"); self.shutdown_notify.notify_waiters(); } return; } - if let Err(err) = self.proxy.set_takeover_for_app(app.as_str(), false).await { + if let Err(err) = self.proxy.clear_daemon_takeover_for_app(app.as_str()).await { log::warn!("[daemon] restore takeover for {app_key} failed: {err}"); } @@ -454,6 +722,16 @@ impl Supervisor { attempt + 1 ); tokio::time::sleep(delay).await; + if !self.should_restart_after_crash(&app).await { + log::info!( + "[daemon] {} worker restart cancelled after route was disabled", + app.as_str() + ); + if !has_remaining_workers && !teardown_in_progress { + self.shutdown_notify.notify_waiters(); + } + return; + } { let mut inner = self.inner.lock().await; inner.restart_count = inner.restart_count.saturating_add(1); @@ -466,19 +744,78 @@ impl Supervisor { log::error!( "[daemon] {app_key} worker pid={pid} circuit-broke after repeated crashes" ); - if !has_remaining_workers { + if !has_remaining_workers && !teardown_in_progress { self.shutdown_notify.notify_waiters(); } } } } + async fn record_worker_exit(&self, app: &AppType, pid: u32) -> (bool, bool, bool) { + let app_key = app.as_str().to_string(); + let mut inner = self.inner.lock().await; + + let registered_pid = inner.workers.get(app).map(|worker| worker.pid); + let was_registered = registered_pid == Some(pid); + if was_registered { + inner.workers.remove(app); + } + + let pending_pid = inner.pending_worker_pids.get(app).copied(); + let was_pending_startup = pending_pid == Some(pid); + if was_pending_startup { + inner.pending_worker_pids.remove(app); + inner.pending_tokens.remove(&app_key); + if let Some(tx) = inner.pending_hellos.remove(&app_key) { + drop(tx); + } + } + + let was_stopping = inner.stopping_workers.remove(&(app.clone(), pid)); + let stale_exit = registered_pid.is_some_and(|current_pid| current_pid != pid) + || pending_pid.is_some_and(|current_pid| current_pid != pid); + let intentional = inner.shutdown_requested + || was_stopping + || (!was_registered && was_pending_startup) + || stale_exit; + + let has_remaining_workers = Self::has_remaining_workers_locked(&inner); + ( + intentional, + has_remaining_workers, + inner.teardown_in_progress, + ) + } + + async fn should_restart_after_crash(&self, app: &AppType) -> bool { + let inner = self.inner.lock().await; + !inner.shutdown_requested + && !inner.teardown_in_progress + && !inner.cancelled_apps.contains(app) + } + fn respawn_after_crash<'a>( &'a self, app: AppType, ) -> std::pin::Pin> + Send + 'a>> { Box::pin(async move { - let _info = self.ensure_worker(app.clone()).await?; + let _spawn_guard = self.spawn_lock.lock().await; + if !self.should_restart_after_crash(&app).await { + return Err(format!( + "{} proxy worker restart was cancelled", + app.as_str() + )); + } + let _info = self.ensure_worker_locked(app.clone()).await?; + { + let inner = self.inner.lock().await; + if inner.shutdown_requested + || inner.teardown_in_progress + || inner.cancelled_apps.contains(&app) + { + return Err("proxy daemon is shutting down".to_string()); + } + } if let Err(err) = self.proxy.set_takeover_for_app(app.as_str(), true).await { log::warn!( "[daemon] re-applying takeover for {} after restart failed: {err}", @@ -577,3 +914,385 @@ fn send_sigterm(pid: Option) -> Result<(), String> { } Ok(()) } + +#[cfg(test)] +mod tests { + use std::ffi::OsString; + use std::path::{Path, PathBuf}; + use std::sync::Arc; + + use serde_json::json; + + use super::*; + use crate::daemon::ipc::protocol::Response; + use crate::provider::Provider; + use crate::test_support::{lock_test_home_and_settings, set_test_home_override}; + + struct TestHomeEnvGuard { + _lock: crate::test_support::TestHomeSettingsLock, + old_home: Option, + old_userprofile: Option, + old_config_dir: Option, + } + + impl TestHomeEnvGuard { + fn set(home: &Path) -> Self { + let lock = lock_test_home_and_settings(); + let old_home = std::env::var_os("HOME"); + let old_userprofile = std::env::var_os("USERPROFILE"); + let old_config_dir = std::env::var_os("CC_SWITCH_CONFIG_DIR"); + std::env::set_var("HOME", home); + std::env::set_var("USERPROFILE", home); + std::env::set_var("CC_SWITCH_CONFIG_DIR", home.join(".cc-switch")); + set_test_home_override(Some(home)); + crate::settings::reload_test_settings(); + Self { + _lock: lock, + old_home, + old_userprofile, + old_config_dir, + } + } + } + + impl Drop for TestHomeEnvGuard { + fn drop(&mut self) { + match &self.old_home { + Some(value) => std::env::set_var("HOME", value), + None => std::env::remove_var("HOME"), + } + match &self.old_userprofile { + Some(value) => std::env::set_var("USERPROFILE", value), + None => std::env::remove_var("USERPROFILE"), + } + match &self.old_config_dir { + Some(value) => std::env::set_var("CC_SWITCH_CONFIG_DIR", value), + None => std::env::remove_var("CC_SWITCH_CONFIG_DIR"), + } + set_test_home_override(self.old_home.as_deref().map(Path::new)); + crate::settings::reload_test_settings(); + } + } + + fn supervisor_for_test(db: Arc, dir: &Path) -> Supervisor { + Supervisor::new( + db, + dir.join("daemon.sock"), + PathBuf::from("/bin/cc-switch-test-missing"), + ) + } + + #[tokio::test] + #[serial_test::serial] + async fn ensure_worker_validation_failure_does_not_start_worker_or_write_session() { + let temp_home = tempfile::tempdir().expect("create temp home"); + let _env = TestHomeEnvGuard::set(temp_home.path()); + let db = Arc::new(Database::memory().expect("create database")); + let supervisor = supervisor_for_test(db.clone(), temp_home.path()); + + let response = supervisor.handle_ensure_worker("claude").await; + + assert!( + matches!(response, Response::Error { message } if message.contains("no active provider")) + ); + assert_eq!( + db.get_setting(PROXY_RUNTIME_SESSION_KEY) + .expect("read runtime session"), + None + ); + let inner = supervisor.inner.lock().await; + assert!(inner.workers.is_empty()); + assert!(inner.pending_hellos.is_empty()); + assert!(inner.pending_tokens.is_empty()); + } + + #[tokio::test] + #[serial_test::serial] + async fn ensure_worker_spawn_failure_clears_pending_registration() { + let temp_home = tempfile::tempdir().expect("create temp home"); + let _env = TestHomeEnvGuard::set(temp_home.path()); + let db = Arc::new(Database::memory().expect("create database")); + let provider = Provider::with_id( + "p1".to_string(), + "Provider".to_string(), + json!({"env": {"ANTHROPIC_BASE_URL": "https://example.com", "ANTHROPIC_AUTH_TOKEN": "token"}}), + None, + ); + db.save_provider("claude", &provider) + .expect("save provider"); + db.set_current_provider("claude", &provider.id) + .expect("set current provider"); + let supervisor = supervisor_for_test(db.clone(), temp_home.path()); + + let response = supervisor.handle_ensure_worker("claude").await; + + assert!( + matches!(response, Response::Error { message } if message.contains("spawn claude proxy worker failed")) + ); + assert_eq!( + db.get_setting(PROXY_RUNTIME_SESSION_KEY) + .expect("read runtime session"), + None + ); + let inner = supervisor.inner.lock().await; + assert!(inner.workers.is_empty()); + assert!(inner.pending_hellos.is_empty()); + assert!(inner.pending_tokens.is_empty()); + } + + #[tokio::test] + async fn old_worker_exit_does_not_remove_restarted_worker_for_same_app() { + let db = Arc::new(Database::memory().expect("create database")); + let supervisor = supervisor_for_test(db, Path::new("/tmp")); + let app = AppType::Claude; + let old_pid = 1001; + let new_pid = 1002; + + { + let mut inner = supervisor.inner.lock().await; + inner.workers.insert( + app.clone(), + WorkerInfo { + app_type: app.clone(), + pid: new_pid, + address: "127.0.0.1".to_string(), + port: 18080, + session_token: "new-token".to_string(), + }, + ); + inner.stopping_workers.insert((app.clone(), old_pid)); + } + + let (intentional, has_remaining_workers, teardown_in_progress) = + supervisor.record_worker_exit(&app, old_pid).await; + + assert!(intentional); + assert!(has_remaining_workers); + assert!(!teardown_in_progress); + let inner = supervisor.inner.lock().await; + assert_eq!(inner.workers.get(&app).map(|info| info.pid), Some(new_pid)); + assert!(inner.stopping_workers.is_empty()); + } + + #[tokio::test] + async fn ensure_worker_does_not_reuse_stopping_worker() { + let db = Arc::new(Database::memory().expect("create database")); + let supervisor = supervisor_for_test(db, Path::new("/tmp")); + let app = AppType::Claude; + let pid = 1001; + + { + let mut inner = supervisor.inner.lock().await; + inner.workers.insert( + app.clone(), + WorkerInfo { + app_type: app.clone(), + pid, + address: "127.0.0.1".to_string(), + port: 18080, + session_token: "token".to_string(), + }, + ); + inner.stopping_workers.insert((app.clone(), pid)); + } + + let error = supervisor + .ensure_worker_locked(app) + .await + .expect_err("stopping worker must not be reused"); + + assert!(error.contains("worker is stopping")); + } + + #[tokio::test] + async fn ensure_worker_rejects_shutdown_in_progress() { + let db = Arc::new(Database::memory().expect("create database")); + let supervisor = supervisor_for_test(db, Path::new("/tmp")); + + { + let mut inner = supervisor.inner.lock().await; + inner.shutdown_requested = true; + } + + let error = supervisor + .ensure_worker_locked(AppType::Claude) + .await + .expect_err("shutdown should reject new workers"); + + assert!(error.contains("shutting down")); + } + + #[tokio::test] + async fn drop_inactive_app_does_not_shutdown_other_worker() { + let db = Arc::new(Database::memory().expect("create database")); + let supervisor = supervisor_for_test(db, Path::new("/tmp")); + + { + let mut inner = supervisor.inner.lock().await; + inner.workers.insert( + AppType::Claude, + WorkerInfo { + app_type: AppType::Claude, + pid: 1001, + address: "127.0.0.1".to_string(), + port: 18080, + session_token: "token".to_string(), + }, + ); + } + + let plan = supervisor.plan_stop_for_app(AppType::Codex).await; + + assert!(plan.pids.is_empty()); + assert!(!plan.should_shutdown); + let inner = supervisor.inner.lock().await; + assert!(!inner.shutdown_requested); + assert!( + inner.cancelled_apps.contains(&AppType::Codex), + "dropping an inactive app should still cancel any delayed restart for that route" + ); + assert_eq!( + inner.workers.get(&AppType::Claude).map(|info| info.pid), + Some(1001) + ); + } + + #[tokio::test] + async fn drop_takeover_cancels_delayed_restart_for_target_app() { + let db = Arc::new(Database::memory().expect("create database")); + let supervisor = supervisor_for_test(db, Path::new("/tmp")); + + let plan = supervisor.plan_stop_for_app(AppType::Claude).await; + + assert!(plan.pids.is_empty()); + assert!(!plan.should_shutdown); + assert!( + !supervisor + .should_restart_after_crash(&AppType::Claude) + .await, + "disabled app should not restart after crash backoff" + ); + } + + #[tokio::test] + async fn drop_takeover_cancels_pending_worker_for_target_app() { + let db = Arc::new(Database::memory().expect("create database")); + let supervisor = supervisor_for_test(db, Path::new("/tmp")); + let app = AppType::Claude; + let app_key = app.as_str().to_string(); + let pending_pid = 1002; + + { + let mut inner = supervisor.inner.lock().await; + let (tx, _rx) = oneshot::channel(); + inner.pending_hellos.insert(app_key.clone(), tx); + inner + .pending_tokens + .insert(app_key.clone(), "token".to_string()); + inner.pending_worker_pids.insert(app.clone(), pending_pid); + } + + let plan = supervisor.plan_stop_for_app(app.clone()).await; + + assert_eq!(plan.pids, vec![pending_pid]); + assert!(plan.should_shutdown); + let inner = supervisor.inner.lock().await; + assert!(inner.shutdown_requested); + assert!(inner.pending_hellos.is_empty()); + assert!(inner.pending_tokens.is_empty()); + assert!(inner.pending_worker_pids.is_empty()); + assert!(inner.stopping_workers.contains(&(app, pending_pid))); + assert!(inner.cancelled_apps.contains(&AppType::Claude)); + } + + #[tokio::test] + async fn global_disable_cancels_pending_workers() { + let db = Arc::new(Database::memory().expect("create database")); + let supervisor = supervisor_for_test(db, Path::new("/tmp")); + let app = AppType::Claude; + let app_key = app.as_str().to_string(); + let pending_pid = 1002; + + { + let mut inner = supervisor.inner.lock().await; + let (tx, _rx) = oneshot::channel(); + inner.pending_hellos.insert(app_key.clone(), tx); + inner + .pending_tokens + .insert(app_key.clone(), "token".to_string()); + inner.pending_worker_pids.insert(app.clone(), pending_pid); + } + + let pids = supervisor.plan_stop_all_workers(false).await; + + assert_eq!(pids, vec![pending_pid]); + let inner = supervisor.inner.lock().await; + assert!(inner.shutdown_requested); + assert!(!inner.teardown_in_progress); + assert!(inner.pending_hellos.is_empty()); + assert!(inner.pending_tokens.is_empty()); + assert!(inner.pending_worker_pids.is_empty()); + assert!(inner.stopping_workers.contains(&(app, pending_pid))); + assert!(inner.cancelled_apps.contains(&AppType::Claude)); + } + + #[tokio::test] + async fn shutdown_teardown_prevents_worker_exit_from_signalling_shutdown() { + let db = Arc::new(Database::memory().expect("create database")); + let supervisor = supervisor_for_test(db, Path::new("/tmp")); + let app = AppType::Claude; + let pid = 1001; + + { + let mut inner = supervisor.inner.lock().await; + inner.workers.insert( + app.clone(), + WorkerInfo { + app_type: app.clone(), + pid, + address: "127.0.0.1".to_string(), + port: 18080, + session_token: "token".to_string(), + }, + ); + } + + let pids = supervisor.plan_stop_all_workers(true).await; + assert_eq!(pids, vec![pid]); + + let (intentional, has_remaining_workers, teardown_in_progress) = + supervisor.record_worker_exit(&app, pid).await; + + assert!(intentional); + assert!(!has_remaining_workers); + assert!(teardown_in_progress); + } + + #[tokio::test] + async fn old_worker_exit_keeps_daemon_alive_for_pending_restarted_worker() { + let db = Arc::new(Database::memory().expect("create database")); + let supervisor = supervisor_for_test(db, Path::new("/tmp")); + let app = AppType::Claude; + let old_pid = 1001; + let pending_pid = 1002; + + { + let mut inner = supervisor.inner.lock().await; + inner.pending_worker_pids.insert(app.clone(), pending_pid); + inner.stopping_workers.insert((app.clone(), old_pid)); + } + + let (intentional, has_remaining_workers, teardown_in_progress) = + supervisor.record_worker_exit(&app, old_pid).await; + + assert!(intentional); + assert!(has_remaining_workers); + assert!(!teardown_in_progress); + let inner = supervisor.inner.lock().await; + assert_eq!( + inner.pending_worker_pids.get(&app).copied(), + Some(pending_pid) + ); + assert!(inner.stopping_workers.is_empty()); + } +} diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index c5dc8cf9..6ea71769 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -5,6 +5,7 @@ mod claude_plugin; mod codex_config; pub mod commands; mod config; +#[cfg(unix)] pub mod daemon; mod database; mod deeplink; diff --git a/src-tauri/src/main.rs b/src-tauri/src/main.rs index 536c9b8a..636923cc 100644 --- a/src-tauri/src/main.rs +++ b/src-tauri/src/main.rs @@ -7,6 +7,20 @@ fn main() { // 解析命令行参数 let cli = Cli::parse(); + init_logger_if_needed(&cli); + + // 执行命令 + if let Err(e) = run(cli) { + eprintln!("Error: {}", e); + process::exit(1); + } +} + +fn init_logger_if_needed(cli: &Cli) { + if command_uses_own_logger(&cli.command) { + return; + } + // 初始化日志(交互模式和命令行模式都避免干扰输出) let log_level = if cli.verbose { "debug" @@ -14,11 +28,15 @@ fn main() { "error" // 默认只显示错误日志,避免 INFO 日志干扰命令输出 }; env_logger::Builder::from_env(env_logger::Env::default().default_filter_or(log_level)).init(); +} - // 执行命令 - if let Err(e) = run(cli) { - eprintln!("Error: {}", e); - process::exit(1); +fn command_uses_own_logger(command: &Option) -> bool { + match command { + #[cfg(unix)] + Some(Commands::Daemon(cc_switch_lib::cli::commands::daemon::DaemonCommand::Start { + .. + })) => true, + _ => false, } } @@ -53,6 +71,11 @@ fn run(cli: Cli) -> Result<(), AppError> { } fn command_requires_startup_state(command: &Option) -> bool { + #[cfg(unix)] + if std::env::var_os(cc_switch_lib::daemon::supervisor::DAEMON_SOCKET_ENV).is_some() { + return false; + } + match command { Some(Commands::Completions(_)) | Some(Commands::Update(_)) @@ -72,7 +95,9 @@ fn initialize_startup_state_if_needed(command: &Option) -> Result<(), #[cfg(test)] mod tests { - use super::{command_requires_startup_state, initialize_startup_state_if_needed}; + use super::{ + command_requires_startup_state, command_uses_own_logger, initialize_startup_state_if_needed, + }; use cc_switch_lib::cli::Cli; use clap::Parser; use serial_test::serial; @@ -109,6 +134,21 @@ mod tests { .expect("set future schema version"); } + #[cfg(unix)] + #[test] + fn daemon_start_uses_daemon_file_logger() { + let cli = Cli::parse_from(["cc-switch", "daemon", "start"]); + + assert!(command_uses_own_logger(&cli.command)); + } + + #[test] + fn normal_commands_use_env_logger() { + let cli = Cli::parse_from(["cc-switch", "provider", "list"]); + + assert!(!command_uses_own_logger(&cli.command)); + } + #[test] fn update_and_completions_skip_startup_state() { let update = Cli::parse_from(["cc-switch", "update"]); diff --git a/src-tauri/src/services/proxy.rs b/src-tauri/src/services/proxy.rs index a61e1e62..72c927e3 100644 --- a/src-tauri/src/services/proxy.rs +++ b/src-tauri/src/services/proxy.rs @@ -209,7 +209,9 @@ impl ProxyService { } pub async fn start_managed_session(&self, app_type: &str) -> Result { - let _guard = crate::services::state_coordination::acquire_restore_mutation_guard().await?; + // This delegates to the daemon, whose IPC handler owns the state + // mutation guard while it rewrites live config. Holding the guard in + // the caller while waiting for the daemon can deadlock the handshake. self.start_managed_session_unlocked(app_type).await } @@ -220,7 +222,21 @@ impl ProxyService { Self::ensure_managed_sessions_supported()?; let app_type = Self::takeover_app_from_str(app_type)?; + if self.has_running_foreground_runtime().await { + return Err( + "proxy is already running in foreground mode; stop the current runtime before attaching another app to a managed session" + .to_string(), + ); + } + let current_status = self.get_status().await; + let persisted_sessions = self.load_persisted_runtime_sessions(); + if current_status.running && persisted_sessions.is_empty() { + return Err( + "proxy is already running in foreground mode; stop the current runtime before attaching another app to a managed session" + .to_string(), + ); + } if current_status.running { // Daemon is already running this worker. Just attach the app. self.daemon_ensure_worker(app_type.as_str()).await @@ -341,15 +357,36 @@ impl ProxyService { /// Foreground-only fallback for when no daemon is reachable. Drops the /// per-app takeover via the same code path the supervisor uses, takes the /// cross-process state-mutation guard around it (so a concurrent CLI - /// invocation can't race the live-config restore), and clears the - /// `proxy_runtime_session` row — the daemon would normally own that row, - /// but if it's gone, leaving the row behind would make `get_status()` - /// report a phantom running proxy on the next launch. + /// invocation can't race the live-config restore), and clears the matching + /// daemon-managed runtime marker. If the daemon died ungracefully, this + /// also stops the orphaned worker before removing its persisted PID. async fn local_disable_takeover(&self, app_type: &str) -> Result<(), String> { let app = Self::takeover_app_from_str(app_type)?; let _guard = crate::services::state_coordination::acquire_restore_mutation_guard().await?; self.disable_takeover_for_app_unlocked(&app, false).await?; - let _ = self.clear_persisted_runtime_session(); + if !self + .db + .is_live_takeover_active() + .await + .map_err(|error| format!("check active takeovers failed: {error}"))? + { + self.sync_persisted_global_proxy_enabled(false).await?; + } + if let Some(session) = self.load_persisted_runtime_session_for_app(&app) { + if session.kind.is_managed_external() { + let should_terminate = match Self::probe_external_proxy_status(&session).await { + ExternalProxyStatusProbe::Matched(_) => true, + ExternalProxyStatusProbe::Mismatched => false, + ExternalProxyStatusProbe::Unreachable => { + Self::has_managed_external_ownership_signal(&session) + } + }; + if Self::is_process_alive(session.pid) && should_terminate { + Self::terminate_external_process(session.pid).await?; + } + } + } + let _ = self.clear_persisted_runtime_session_for_app(&app); Ok(()) } @@ -385,6 +422,13 @@ impl ProxyService { let app_type_enum = Self::takeover_app_from_str(app_type)?; if enabled { + if self.has_running_foreground_runtime().await { + return Err( + "proxy is already running in foreground mode; stop the current runtime before attaching another app to a managed session" + .to_string(), + ); + } + let status = self.get_status().await; if status.running && self.load_persisted_runtime_sessions().is_empty() { return Err( @@ -549,11 +593,11 @@ impl ProxyService { { let mut workers = Vec::new(); let mut primary_status = None; - let mut stale = false; + let mut stale_app_keys = Vec::new(); for session in sessions { if !Self::is_process_alive(session.pid) { - stale = true; + stale_app_keys.push(session.app_type.clone()); continue; } @@ -572,7 +616,9 @@ impl ProxyService { primary_status = Some(status); } } - ExternalProxyStatusProbe::Mismatched => stale = true, + ExternalProxyStatusProbe::Mismatched => { + stale_app_keys.push(session.app_type.clone()); + } ExternalProxyStatusProbe::Unreachable => { if Self::has_managed_external_ownership_signal(&session) { let uptime_seconds = @@ -604,14 +650,14 @@ impl ProxyService { }); } } else { - stale = true; + stale_app_keys.push(session.app_type.clone()); } } } } - if stale { - let _ = self.clear_persisted_runtime_session(); + if !stale_app_keys.is_empty() { + let _ = self.clear_persisted_runtime_sessions_for_app_keys(&stale_app_keys); } if let Some(mut status) = primary_status { @@ -648,6 +694,13 @@ impl ProxyService { ProxyStatus::default() } + async fn has_running_foreground_runtime(&self) -> bool { + if let Some(server) = self.runtime.server.read().await.as_ref() { + return server.get_status().await.running; + } + false + } + pub async fn get_config(&self) -> Result { self.db.get_proxy_config().await } @@ -836,6 +889,13 @@ impl ProxyService { } } + pub(crate) async fn clear_daemon_takeover_for_app(&self, app_type: &str) -> Result<(), String> { + let app_type = Self::takeover_app_from_str(app_type)?; + let _guard = crate::services::state_coordination::acquire_restore_mutation_guard().await?; + self.disable_takeover_for_app_unlocked(&app_type, false) + .await + } + pub async fn is_app_takeover_active(&self, app_type: &AppType) -> Result { let app_key = app_type.as_str(); let app_proxy = self @@ -1094,7 +1154,7 @@ impl ProxyService { Ok(()) } - async fn validate_app_proxy_activation( + pub(crate) async fn validate_app_proxy_activation( &self, app_type: &AppType, fallback_provider_id: Option<&str>, @@ -2039,21 +2099,76 @@ impl ProxyService { .map_err(|error| format!("clear proxy runtime session failed: {error}")) } - fn load_persisted_runtime_sessions(&self) -> Vec { - let Some(raw) = self + fn load_raw_persisted_runtime_session(&self) -> Option { + let raw = self .db .get_setting(PROXY_RUNTIME_SESSION_KEY) .ok() - .flatten() - else { - return Vec::new(); - }; + .flatten()?; let raw = raw.trim(); if raw.is_empty() { - return Vec::new(); + None + } else { + Some(raw.to_string()) + } + } + + fn load_persisted_runtime_sessions_map( + &self, + ) -> Option> { + let raw = self.load_raw_persisted_runtime_session()?; + if let Ok(sessions) = serde_json::from_str::(&raw) { + return Some(sessions.workers); + } + None + } + + fn persist_persisted_runtime_sessions_map( + &self, + sessions: HashMap, + ) -> Result<(), String> { + if sessions.is_empty() { + return self.clear_persisted_runtime_session(); + } + + let serialized = + serde_json::to_string(&PersistedProxyRuntimeSessions { workers: sessions }) + .map_err(|error| format!("serialize proxy runtime sessions failed: {error}"))?; + self.db + .set_setting(PROXY_RUNTIME_SESSION_KEY, &serialized) + .map_err(|error| format!("persist proxy runtime sessions failed: {error}")) + } + + fn clear_persisted_runtime_session_for_app(&self, app_type: &AppType) -> Result<(), String> { + let Some(mut sessions) = self.load_persisted_runtime_sessions_map() else { + return self.clear_persisted_runtime_session(); + }; + sessions.remove(app_type.as_str()); + self.persist_persisted_runtime_sessions_map(sessions) + } + + fn clear_persisted_runtime_sessions_for_app_keys( + &self, + app_keys: &[Option], + ) -> Result<(), String> { + if app_keys.iter().any(Option::is_none) { + return self.clear_persisted_runtime_session(); } + let Some(mut sessions) = self.load_persisted_runtime_sessions_map() else { + return self.clear_persisted_runtime_session(); + }; + for app_key in app_keys.iter().filter_map(Option::as_deref) { + sessions.remove(app_key); + } + self.persist_persisted_runtime_sessions_map(sessions) + } + + fn load_persisted_runtime_sessions(&self) -> Vec { + let Some(raw) = self.load_raw_persisted_runtime_session() else { + return Vec::new(); + }; - if let Ok(sessions) = serde_json::from_str::(raw) { + if let Ok(sessions) = serde_json::from_str::(&raw) { return sessions .workers .into_iter() @@ -2066,7 +2181,7 @@ impl ProxyService { .collect(); } - match serde_json::from_str::(raw) { + match serde_json::from_str::(&raw) { Ok(session) => vec![session], Err(_) => { let _ = self.clear_persisted_runtime_session(); @@ -2084,10 +2199,16 @@ impl ProxyService { app_type: &AppType, ) -> Option { let app_key = app_type.as_str(); - self.load_persisted_runtime_sessions() - .into_iter() - .find(|session| session.app_type.as_deref() == Some(app_key)) - .or_else(|| self.load_persisted_runtime_session()) + if let Some(mut sessions) = self.load_persisted_runtime_sessions_map() { + return sessions.remove(app_key).map(|mut session| { + if session.app_type.is_none() { + session.app_type = Some(app_key.to_string()); + } + session + }); + } + + self.load_persisted_runtime_session() } fn is_process_alive(pid: u32) -> bool { @@ -2108,7 +2229,60 @@ impl ProxyService { } fn has_managed_external_ownership_signal(session: &PersistedProxyRuntimeSession) -> bool { - session.session_token.is_some() && Self::is_detached_session_leader(session.pid) + session.session_token.is_some() + && (Self::is_detached_session_leader(session.pid) + || Self::process_looks_like_cc_switch_proxy_worker(session.pid)) + } + + #[cfg(unix)] + fn process_looks_like_cc_switch_proxy_worker(pid: u32) -> bool { + if pid == 0 { + return false; + } + + #[cfg(target_os = "macos")] + { + let output = std::process::Command::new("ps") + .args(["-p", &pid.to_string(), "-o", "command="]) + .output(); + let Ok(output) = output else { + return false; + }; + if !output.status.success() { + return false; + } + let command = String::from_utf8_lossy(&output.stdout); + return command.contains("cc-switch") + && command.contains("proxy serve") + && command.contains("--listen-port"); + } + + #[cfg(target_os = "linux")] + { + let path = format!("/proc/{pid}/cmdline"); + let Ok(raw) = std::fs::read(path) else { + return false; + }; + let command = raw + .split(|byte| *byte == 0) + .filter(|part| !part.is_empty()) + .map(|part| String::from_utf8_lossy(part)) + .collect::>() + .join(" "); + return command.contains("cc-switch") + && command.contains("proxy serve") + && command.contains("--listen-port"); + } + + #[cfg(not(any(target_os = "macos", target_os = "linux")))] + { + false + } + } + + #[cfg(not(unix))] + fn process_looks_like_cc_switch_proxy_worker(_pid: u32) -> bool { + false } #[cfg(unix)] diff --git a/src-tauri/tests/proxy_service.rs b/src-tauri/tests/proxy_service.rs index 8ac747be..89e5b269 100644 --- a/src-tauri/tests/proxy_service.rs +++ b/src-tauri/tests/proxy_service.rs @@ -55,7 +55,7 @@ fn seed_codex_live_config(auth: serde_json::Value, config_text: &str) { write_codex_live_atomic(&auth, Some(config_text)).expect("seed codex live config"); } -fn load_runtime_session_pid(state: &AppState) -> u32 { +fn load_runtime_session_pid_for_app(state: &AppState, app_type: &str) -> u32 { let session: serde_json::Value = serde_json::from_str( &state .db @@ -64,12 +64,50 @@ fn load_runtime_session_pid(state: &AppState) -> u32 { .expect("persisted runtime session should exist"), ) .expect("parse runtime session setting"); + let session = session + .get("workers") + .and_then(|workers| workers.get(app_type)) + .unwrap_or(&session); session .get("pid") .and_then(|value| value.as_u64()) .expect("runtime session pid") as u32 } +fn load_runtime_session_pid(state: &AppState) -> u32 { + load_runtime_session_pid_for_app(state, "claude") +} + +fn load_runtime_session_worker_count(state: &AppState) -> usize { + let session: serde_json::Value = serde_json::from_str( + &state + .db + .get_setting("proxy_runtime_session") + .expect("load runtime session setting") + .expect("persisted runtime session should exist"), + ) + .expect("parse runtime session setting"); + session + .get("workers") + .and_then(|workers| workers.as_object()) + .map_or(1, serde_json::Map::len) +} + +async fn set_app_proxy_port(db: &Database, app_type: &str, port: u16) { + let mut config = db + .get_proxy_config_for_app(app_type) + .await + .unwrap_or_else(|_| panic!("get {app_type} proxy config")); + config.listen_port = port; + db.update_proxy_config_for_app(config) + .await + .unwrap_or_else(|_| panic!("update {app_type} proxy port")); +} + +async fn set_claude_proxy_port(db: &Database, port: u16) { + set_app_proxy_port(db, "claude", port).await; +} + #[cfg(unix)] struct ManagedSessionCleanup(Option); @@ -133,9 +171,11 @@ async fn proxy_service_starts_and_stops_without_takeover() { let mut config = service.get_config().await.expect("get config"); config.listen_port = 0; - service.update_config(&config).await.expect("update config"); - let started = service.start().await.expect("start proxy"); + let started = service + .start_with_runtime_config(config) + .await + .expect("start proxy"); assert!(started.port > 0, "proxy should bind an ephemeral port"); assert!( service.is_running().await, @@ -213,9 +253,11 @@ async fn proxy_service_status_tracks_runtime_uptime() { let mut config = service.get_config().await.expect("get config"); config.listen_port = 0; - service.update_config(&config).await.expect("update config"); - service.start().await.expect("start proxy"); + service + .start_with_runtime_config(config) + .await + .expect("start proxy"); tokio::time::sleep(std::time::Duration::from_millis(1100)).await; let status = service.get_status().await; @@ -271,13 +313,12 @@ async fn app_state_reuses_active_proxy_runtime_across_reloads() { .await .expect("get proxy config"); config.listen_port = 0; - state + + let started = state .proxy_service - .update_config(&config) + .start_with_runtime_config(config) .await - .expect("persist proxy config with ephemeral port"); - - let started = state.proxy_service.start().await.expect("start proxy"); + .expect("start proxy"); let reloaded = AppState::try_new().expect("reload app state"); let status = reloaded.proxy_service.get_status().await; @@ -313,11 +354,9 @@ async fn reloaded_app_state_can_stop_active_proxy_runtime() { config.listen_port = 0; state .proxy_service - .update_config(&config) + .start_with_runtime_config(config) .await - .expect("persist proxy config with ephemeral port"); - - state.proxy_service.start().await.expect("start proxy"); + .expect("start proxy"); let reloaded = AppState::try_new().expect("reload app state"); reloaded @@ -480,17 +519,8 @@ async fn proxy_service_can_stop_managed_external_proxy_session() { .expect("seed claude live config"); let state = AppState::try_new().expect("create app state"); - let mut config = state - .proxy_service - .get_config() - .await - .expect("get proxy config"); - config.listen_port = find_free_port(); - state - .proxy_service - .update_config(&config) - .await - .expect("persist proxy config"); + let listen_port = find_free_port(); + set_claude_proxy_port(&state.db, listen_port).await; let started = state .proxy_service @@ -498,22 +528,11 @@ async fn proxy_service_can_stop_managed_external_proxy_session() { .await .expect("start managed proxy session"); assert_eq!( - started.port, config.listen_port, + started.port, listen_port, "managed session should reuse the configured listen port" ); - let session: serde_json::Value = serde_json::from_str( - &state - .db - .get_setting("proxy_runtime_session") - .expect("load runtime session setting") - .expect("persisted runtime session should exist"), - ) - .expect("parse runtime session setting"); - let pid = session - .get("pid") - .and_then(|value| value.as_u64()) - .expect("runtime session pid") as u32; + let pid = load_runtime_session_pid(&state); assert_ne!( pid, std::process::id(), @@ -557,17 +576,8 @@ async fn managed_proxy_session_is_detached_from_parent_terminal_session() { .expect("seed claude live config"); let state = AppState::try_new().expect("create app state"); - let mut config = state - .proxy_service - .get_config() - .await - .expect("get proxy config"); - config.listen_port = find_free_port(); - state - .proxy_service - .update_config(&config) - .await - .expect("persist proxy config"); + set_claude_proxy_port(&state.db, find_free_port()).await; + set_app_proxy_port(&state.db, "codex", find_free_port()).await; state .proxy_service @@ -575,18 +585,7 @@ async fn managed_proxy_session_is_detached_from_parent_terminal_session() { .await .expect("start managed proxy session"); - let session: serde_json::Value = serde_json::from_str( - &state - .db - .get_setting("proxy_runtime_session") - .expect("load runtime session setting") - .expect("persisted runtime session should exist"), - ) - .expect("parse runtime session setting"); - let pid = session - .get("pid") - .and_then(|value| value.as_u64()) - .expect("runtime session pid") as u32; + let pid = load_runtime_session_pid(&state); let _cleanup = ManagedSessionCleanup::new(pid); @@ -628,13 +627,9 @@ async fn proxy_service_rejects_managed_session_start_when_foreground_runtime_is_ let mut config = service.get_config().await.expect("get proxy config"); config.listen_port = 0; - service - .update_config(&config) - .await - .expect("persist proxy config"); service - .start() + .start_with_runtime_config(config) .await .expect("start foreground proxy runtime"); @@ -659,13 +654,9 @@ async fn proxy_service_rejects_managed_session_attach_when_foreground_runtime_is let mut config = service.get_config().await.expect("get proxy config"); config.listen_port = 0; - service - .update_config(&config) - .await - .expect("persist proxy config"); service - .start() + .start_with_runtime_config(config) .await .expect("start foreground proxy runtime"); @@ -704,17 +695,8 @@ async fn proxy_service_reloaded_app_state_keeps_managed_session_running_for_curr .expect("seed claude live config"); let state = AppState::try_new().expect("create app state"); - let mut config = state - .proxy_service - .get_config() - .await - .expect("get proxy config"); - config.listen_port = find_free_port(); - state - .proxy_service - .update_config(&config) - .await - .expect("persist proxy config"); + set_claude_proxy_port(&state.db, find_free_port()).await; + set_app_proxy_port(&state.db, "codex", find_free_port()).await; state .proxy_service @@ -748,7 +730,7 @@ async fn proxy_service_reloaded_app_state_keeps_managed_session_running_for_curr #[cfg(unix)] #[tokio::test] #[serial] -async fn managed_session_allows_second_supported_app_to_reuse_existing_runtime() { +async fn managed_session_allows_second_supported_app_to_start_its_own_worker() { let _guard = lock_test_mutex(); reset_test_fs(); let _home = ensure_test_home(); @@ -766,31 +748,24 @@ async fn managed_session_allows_second_supported_app_to_reuse_existing_runtime() ); let state = AppState::try_new().expect("create app state"); - let mut config = state - .proxy_service - .get_config() - .await - .expect("get proxy config"); - config.listen_port = find_free_port(); - state - .proxy_service - .update_config(&config) - .await - .expect("persist proxy config"); + set_claude_proxy_port(&state.db, find_free_port()).await; + set_app_proxy_port(&state.db, "codex", find_free_port()).await; state .proxy_service .set_managed_session_for_app("claude", true) .await .expect("start managed proxy for claude"); - let runtime_pid = load_runtime_session_pid(&state); - let _cleanup = ManagedSessionCleanup::new(runtime_pid); + let claude_pid = load_runtime_session_pid_for_app(&state, "claude"); + let _claude_cleanup = ManagedSessionCleanup::new(claude_pid); state .proxy_service .set_managed_session_for_app("codex", true) .await - .expect("reuse managed proxy for codex"); + .expect("start managed proxy for codex"); + let codex_pid = load_runtime_session_pid_for_app(&state, "codex"); + let _codex_cleanup = ManagedSessionCleanup::new(codex_pid); let takeover = state .proxy_service @@ -799,17 +774,28 @@ async fn managed_session_allows_second_supported_app_to_reuse_existing_runtime() .expect("read takeover status"); assert!( takeover.claude, - "claude should stay attached to the shared runtime" + "claude should stay attached to its daemon-managed worker" ); assert!( takeover.codex, - "codex should join the existing managed runtime" + "codex should attach to its daemon-managed worker" ); assert_eq!( - load_runtime_session_pid(&state), - runtime_pid, - "attaching a second app should reuse the existing managed runtime process" + load_runtime_session_worker_count(&state), + 2, + "attaching a second app should persist one worker per app" + ); + let status = state.proxy_service.get_status().await; + assert_eq!( + status.active_workers.len(), + 2, + "status should expose both daemon-managed workers" ); + assert!( + is_process_alive(claude_pid), + "claude worker should be alive" + ); + assert!(is_process_alive(codex_pid), "codex worker should be alive"); state .proxy_service @@ -838,17 +824,7 @@ async fn proxy_service_stop_preserves_takeover_state_until_explicit_restore() { })); let state = AppState::try_new().expect("create app state"); - let mut config = state - .proxy_service - .get_config() - .await - .expect("get proxy config"); - config.listen_port = 0; - state - .proxy_service - .update_config(&config) - .await - .expect("persist proxy config with ephemeral port"); + set_claude_proxy_port(&state.db, find_free_port()).await; state .proxy_service @@ -934,17 +910,8 @@ async fn managed_session_keeps_runtime_alive_while_another_supported_app_is_atta ); let state = AppState::try_new().expect("create app state"); - let mut config = state - .proxy_service - .get_config() - .await - .expect("get proxy config"); - config.listen_port = find_free_port(); - state - .proxy_service - .update_config(&config) - .await - .expect("persist proxy config"); + set_claude_proxy_port(&state.db, find_free_port()).await; + set_app_proxy_port(&state.db, "codex", find_free_port()).await; state .proxy_service @@ -955,9 +922,11 @@ async fn managed_session_keeps_runtime_alive_while_another_supported_app_is_atta .proxy_service .set_managed_session_for_app("codex", true) .await - .expect("reuse managed proxy for codex"); - let runtime_pid = load_runtime_session_pid(&state); - let _cleanup = ManagedSessionCleanup::new(runtime_pid); + .expect("start managed proxy for codex"); + let claude_pid = load_runtime_session_pid_for_app(&state, "claude"); + let codex_pid = load_runtime_session_pid_for_app(&state, "codex"); + let _claude_cleanup = ManagedSessionCleanup::new(claude_pid); + let _codex_cleanup = ManagedSessionCleanup::new(codex_pid); state .proxy_service @@ -967,11 +936,15 @@ async fn managed_session_keeps_runtime_alive_while_another_supported_app_is_atta assert!( state.proxy_service.is_running().await, - "shared managed runtime should stay up while codex is still attached" + "managed routing should stay up while codex is still attached" + ); + assert!( + !is_process_alive(claude_pid), + "claude worker should stop after disabling only claude" ); assert!( - is_process_alive(runtime_pid), - "shared managed runtime process should still be alive after disabling only one app" + is_process_alive(codex_pid), + "codex worker should remain alive after disabling only claude" ); let takeover = state .proxy_service @@ -984,7 +957,17 @@ async fn managed_session_keeps_runtime_alive_while_another_supported_app_is_atta ); assert!( takeover.codex, - "codex should remain attached to the shared runtime" + "codex should remain attached to its managed worker" + ); + assert_eq!( + load_runtime_session_worker_count(&state), + 1, + "disabling claude should clear only claude's persisted worker" + ); + assert_eq!( + load_runtime_session_pid_for_app(&state, "codex"), + codex_pid, + "codex worker metadata should remain persisted" ); state @@ -1010,17 +993,7 @@ async fn managed_session_disable_last_app_terminates_external_process_even_when_ })); let state = AppState::try_new().expect("create app state"); - let mut config = state - .proxy_service - .get_config() - .await - .expect("get proxy config"); - config.listen_port = find_free_port(); - state - .proxy_service - .update_config(&config) - .await - .expect("persist proxy config"); + set_claude_proxy_port(&state.db, find_free_port()).await; state .proxy_service @@ -1038,7 +1011,14 @@ async fn managed_session_disable_last_app_terminates_external_process_even_when_ .expect("runtime session marker should exist"), ) .expect("parse runtime session marker"); - runtime_session["port"] = json!(find_free_port()); + if let Some(claude_session) = runtime_session + .get_mut("workers") + .and_then(|workers| workers.get_mut("claude")) + { + claude_session["port"] = json!(find_free_port()); + } else { + runtime_session["port"] = json!(find_free_port()); + } state .db .set_setting("proxy_runtime_session", &runtime_session.to_string()) @@ -1327,6 +1307,104 @@ async fn proxy_service_rejects_external_status_with_mismatched_session_token() { server.await.expect("fake status server should finish"); } +#[tokio::test] +async fn proxy_service_get_status_clears_only_stale_worker_from_multi_app_session() { + let listener = TokioTcpListener::bind(("127.0.0.1", 0)) + .await + .expect("bind fake proxy status listener"); + let port = listener + .local_addr() + .expect("read fake proxy listener addr") + .port(); + let healthy_status = json!({ + "running": true, + "address": "127.0.0.1", + "port": port, + "active_connections": 0, + "total_requests": 1, + "success_requests": 1, + "failed_requests": 0, + "success_rate": 100.0, + "uptime_seconds": 10, + "current_provider": null, + "current_provider_id": null, + "last_request_at": null, + "last_error": null, + "failover_count": 0, + "managed_session_token": "codex-token" + }); + + let server = tokio::spawn(async move { + let (mut socket, _) = listener.accept().await.expect("accept status request"); + let response = format!( + "HTTP/1.1 200 OK\r\ncontent-type: application/json\r\ncontent-length: {}\r\nconnection: close\r\n\r\n{}", + healthy_status.to_string().len(), + healthy_status + ); + socket + .write_all(response.as_bytes()) + .await + .expect("write fake status response"); + }); + + let db = Arc::new(Database::memory().expect("create database")); + db.set_setting( + "proxy_runtime_session", + &json!({ + "workers": { + "claude": { + "pid": 0, + "address": "127.0.0.1", + "port": find_free_port(), + "started_at": "2026-03-10T00:00:00Z", + "kind": "managed_external", + "session_token": "claude-token", + "app_type": "claude" + }, + "codex": { + "pid": std::process::id(), + "address": "127.0.0.1", + "port": port, + "started_at": "2026-03-10T00:00:00Z", + "kind": "managed_external", + "session_token": "codex-token", + "app_type": "codex" + } + } + }) + .to_string(), + ) + .expect("persist multi-app runtime sessions"); + + let service = ProxyService::new(db.clone()); + let status = service.get_status().await; + + assert!(status.running, "healthy codex worker should remain visible"); + assert_eq!(status.active_workers.len(), 1); + assert_eq!(status.active_workers[0].app_type, "codex"); + + let stored: serde_json::Value = serde_json::from_str( + &db.get_setting("proxy_runtime_session") + .expect("read runtime session marker") + .expect("runtime session marker should remain"), + ) + .expect("parse runtime session marker"); + let workers = stored + .get("workers") + .and_then(|value| value.as_object()) + .expect("runtime session workers map"); + assert!( + !workers.contains_key("claude"), + "stale worker metadata should be removed for only the stale app" + ); + assert!( + workers.contains_key("codex"), + "healthy worker metadata should not be erased by another stale app" + ); + + server.await.expect("fake status server should finish"); +} + #[cfg(unix)] #[tokio::test] async fn proxy_service_does_not_kill_process_when_status_token_mismatches() { diff --git a/src-tauri/tests/support.rs b/src-tauri/tests/support.rs index 4c054619..8a199a70 100644 --- a/src-tauri/tests/support.rs +++ b/src-tauri/tests/support.rs @@ -20,6 +20,8 @@ pub fn ensure_test_home() -> &'static Path { std::env::set_var("HOME", home); #[cfg(windows)] std::env::set_var("USERPROFILE", home); + std::env::set_var("XDG_RUNTIME_DIR", home.join("run")); + std::env::set_var("XDG_STATE_HOME", home.join("state")); home.as_path() } @@ -33,6 +35,8 @@ pub fn reset_test_fs() { ".gemini", ".openclaw", ".config", + "run", + "state", ] { let path = home.join(sub); if path.exists() { From 7adff90f3d693245ff108eb5adb22013b3372468 Mon Sep 17 00:00:00 2001 From: saladday <1203511142@qq.com> Date: Thu, 21 May 2026 19:57:25 +0800 Subject: [PATCH 3/5] (fix) move proxy ports to settings --- src-tauri/src/cli/commands/proxy.rs | 96 +++----- src-tauri/src/cli/tui/data.rs | 5 +- .../src/cli/tui/runtime_actions/settings.rs | 7 +- src-tauri/src/daemon/supervisor.rs | 12 +- src-tauri/src/database/dao/proxy.rs | 227 ++++++++---------- src-tauri/src/database/mod.rs | 2 +- src-tauri/src/database/schema.rs | 43 +--- src-tauri/src/proxy/types.rs | 19 +- src-tauri/src/services/proxy.rs | 95 +++++++- .../tests/proxy_claude_forwarder_alignment.rs | 3 +- src-tauri/tests/proxy_daemon.rs | 21 +- src-tauri/tests/proxy_service.rs | 10 +- 12 files changed, 263 insertions(+), 277 deletions(-) diff --git a/src-tauri/src/cli/commands/proxy.rs b/src-tauri/src/cli/commands/proxy.rs index c149271e..9ca02393 100644 --- a/src-tauri/src/cli/commands/proxy.rs +++ b/src-tauri/src/cli/commands/proxy.rs @@ -3,7 +3,6 @@ use clap::Subcommand; use crate::app_config::AppType; use crate::cli::ui::{highlight, info, success}; use crate::error::AppError; -use crate::proxy::types::AppProxyConfig; use crate::{AppState, ProxyConfig}; #[cfg(unix)] @@ -81,13 +80,13 @@ fn show_proxy() -> Result<(), AppError> { let runtime = create_runtime()?; let config = runtime.block_on(state.proxy_service.get_config())?; let status = runtime.block_on(state.proxy_service.get_status()); - let app_configs = load_proxy_app_configs(&state, &runtime)?; + let app_ports = load_proxy_app_ports(&state)?; let takeovers = runtime .block_on(state.proxy_service.get_takeover_status()) .map_err(AppError::Message)?; println!("{}", highlight(crate::t!("Local Proxy", "本地代理"))); - for line in build_proxy_overview_lines(&state, &config, &status, &app_configs, &takeovers) { + for line in build_proxy_overview_lines(&state, &config, &status, &app_ports, &takeovers) { println!("{line}"); } @@ -152,13 +151,9 @@ fn configure_proxy(app_type: AppType, listen_port: Option) -> Result<(), Ap app_type.as_str() ))); } - let mut config = runtime - .block_on(state.db.get_proxy_config_for_app(app_type.as_str())) - .map_err(AppError::from)?; - config.listen_port = listen_port; - runtime - .block_on(state.db.update_proxy_config_for_app(config)) - .map_err(AppError::from)?; + state + .db + .set_app_proxy_preferred_port(app_type.as_str(), listen_port)?; println!( "{}", success(&format!( @@ -364,16 +359,14 @@ fn validate_listen_port(port: u16) -> Result<(), AppError> { ))) } -fn load_proxy_app_configs( - state: &AppState, - runtime: &tokio::runtime::Runtime, -) -> Result, AppError> { +fn load_proxy_app_ports(state: &AppState) -> Result, AppError> { [AppType::Claude, AppType::Codex, AppType::Gemini] .into_iter() .map(|app| { - runtime - .block_on(state.db.get_proxy_config_for_app(app.as_str())) - .map_err(AppError::from) + state + .db + .get_app_proxy_preferred_port(app.as_str()) + .map(|port| (app, port)) }) .collect() } @@ -381,7 +374,7 @@ fn load_proxy_app_configs( fn build_proxy_route_lines( config: &ProxyConfig, status: &crate::ProxyStatus, - app_configs: &[AppProxyConfig], + app_ports: &[(AppType, u16)], takeovers: &crate::proxy::types::ProxyTakeoverStatus, ) -> Vec { [ @@ -391,7 +384,7 @@ fn build_proxy_route_lines( ] .into_iter() .map(|(app, label, enabled)| { - let configured_port = app_configured_port(app_configs, &app).unwrap_or(config.listen_port); + let configured_port = app_configured_port(app_ports, &app).unwrap_or(config.listen_port); let worker = status .active_workers .iter() @@ -425,18 +418,18 @@ fn build_proxy_route_lines( .collect() } -fn app_configured_port(app_configs: &[AppProxyConfig], app: &AppType) -> Option { - app_configs +fn app_configured_port(app_ports: &[(AppType, u16)], app: &AppType) -> Option { + app_ports .iter() - .find(|config| config.app_type == app.as_str()) - .map(|config| config.listen_port) + .find(|(candidate, _)| candidate == app) + .map(|(_, port)| *port) } fn build_proxy_overview_lines( state: &AppState, config: &ProxyConfig, status: &crate::ProxyStatus, - app_configs: &[AppProxyConfig], + app_ports: &[(AppType, u16)], takeovers: &crate::proxy::types::ProxyTakeoverStatus, ) -> Vec { let current_providers = AppType::all() @@ -456,7 +449,7 @@ fn build_proxy_overview_lines( } else { config.listen_address.as_str() }; - let route_lines = build_proxy_route_lines(config, status, app_configs, takeovers); + let route_lines = build_proxy_route_lines(config, status, app_ports, takeovers); let mut lines = vec![ format!( @@ -550,7 +543,7 @@ fn build_proxy_overview_lines( format!( "- ANTHROPIC_BASE_URL=http://{}:{}", listen_host, - app_configured_port(app_configs, &AppType::Claude).unwrap_or(config.listen_port) + app_configured_port(app_ports, &AppType::Claude).unwrap_or(config.listen_port) ), "- ANTHROPIC_AUTH_TOKEN=proxy-placeholder".to_string(), crate::t!( @@ -612,7 +605,7 @@ mod tests { }; use super::{ - apply_overrides, build_proxy_overview_lines, load_proxy_app_configs, validate_listen_port, + apply_overrides, build_proxy_overview_lines, load_proxy_app_ports, validate_listen_port, }; #[test] @@ -649,39 +642,15 @@ mod tests { }; let mut config = crate::ProxyConfig::default(); config.listen_port = 15721; - let runtime = tokio::runtime::Runtime::new().expect("create runtime"); - runtime.block_on(async { - let mut claude = db - .get_proxy_config_for_app("claude") - .await - .expect("load claude proxy config"); - claude.listen_port = 15721; - claude.enabled = true; - db.update_proxy_config_for_app(claude) - .await - .expect("save claude proxy config"); - - let mut codex = db - .get_proxy_config_for_app("codex") - .await - .expect("load codex proxy config"); - codex.listen_port = 15722; - codex.enabled = false; - db.update_proxy_config_for_app(codex) - .await - .expect("save codex proxy config"); - - let mut gemini = db - .get_proxy_config_for_app("gemini") - .await - .expect("load gemini proxy config"); - gemini.listen_port = 15723; - gemini.enabled = true; - db.update_proxy_config_for_app(gemini) - .await - .expect("save gemini proxy config"); - }); - let app_configs = load_proxy_app_configs(&state, &runtime).expect("load app proxy configs"); + db.set_proxy_flags_sync("claude", true, false) + .expect("enable claude proxy route"); + db.set_app_proxy_preferred_port("codex", 15722) + .expect("save codex preferred proxy port"); + db.set_proxy_flags_sync("gemini", true, false) + .expect("enable gemini proxy route"); + db.set_app_proxy_preferred_port("gemini", 15723) + .expect("save gemini preferred proxy port"); + let app_ports = load_proxy_app_ports(&state).expect("load app proxy ports"); let status = ProxyStatus { running: true, address: "127.0.0.1".to_string(), @@ -708,7 +677,7 @@ mod tests { gemini: true, }; - let lines = build_proxy_overview_lines(&state, &config, &status, &app_configs, &takeover); + let lines = build_proxy_overview_lines(&state, &config, &status, &app_ports, &takeover); let output = lines.join("\n"); assert!( @@ -774,10 +743,9 @@ mod tests { let config = crate::ProxyConfig::default(); let status = ProxyStatus::default(); let takeover = ProxyTakeoverStatus::default(); - let runtime = tokio::runtime::Runtime::new().expect("create runtime"); - let app_configs = load_proxy_app_configs(&state, &runtime).expect("load app proxy configs"); + let app_ports = load_proxy_app_ports(&state).expect("load app proxy ports"); - let lines = build_proxy_overview_lines(&state, &config, &status, &app_configs, &takeover); + let lines = build_proxy_overview_lines(&state, &config, &status, &app_ports, &takeover); let output = lines.join("\n"); assert!( diff --git a/src-tauri/src/cli/tui/data.rs b/src-tauri/src/cli/tui/data.rs index fcc6c0a5..c5048367 100644 --- a/src-tauri/src/cli/tui/data.rs +++ b/src-tauri/src/cli/tui/data.rs @@ -1006,6 +1006,7 @@ fn load_proxy_snapshot(app_type: &AppType) -> Result { runtime.block_on(async { let config = state.proxy_service.get_global_config().await?; let app_proxy_config = state.db.get_proxy_config_for_app(app_type.as_str()).await?; + let configured_listen_port = state.db.get_app_proxy_preferred_port(app_type.as_str())?; let runtime_status = state.proxy_service.get_status().await; let takeover = state .proxy_service @@ -1031,7 +1032,7 @@ fn load_proxy_snapshot(app_type: &AppType) -> Result { .find(|worker| worker.app_type == current_app) .map(|worker| worker.port) .or_else(|| (runtime_status.port != 0).then_some(runtime_status.port)) - .unwrap_or(app_proxy_config.listen_port); + .unwrap_or(configured_listen_port); let default_cost_multiplier = state .db .get_default_cost_multiplier(app_type.as_str()) @@ -1051,7 +1052,7 @@ fn load_proxy_snapshot(app_type: &AppType) -> Result { gemini_takeover: takeover.gemini, default_cost_multiplier, configured_listen_address: config.listen_address.clone(), - configured_listen_port: app_proxy_config.listen_port, + configured_listen_port, listen_address, listen_port, uptime_seconds: runtime_status.uptime_seconds, diff --git a/src-tauri/src/cli/tui/runtime_actions/settings.rs b/src-tauri/src/cli/tui/runtime_actions/settings.rs index 5c1cfae8..257132f4 100644 --- a/src-tauri/src/cli/tui/runtime_actions/settings.rs +++ b/src-tauri/src/cli/tui/runtime_actions/settings.rs @@ -69,10 +69,9 @@ pub(super) fn set_proxy_listen_port( return Ok(()); } - let mut app_config = - runtime.block_on(state.db.get_proxy_config_for_app(ctx.app.app_type.as_str()))?; - app_config.listen_port = port; - runtime.block_on(state.db.update_proxy_config_for_app(app_config))?; + state + .db + .set_app_proxy_preferred_port(ctx.app.app_type.as_str(), port)?; *ctx.data = UiData::load(&ctx.app.app_type)?; ctx.app.push_toast( diff --git a/src-tauri/src/daemon/supervisor.rs b/src-tauri/src/daemon/supervisor.rs index 06ed8d02..ae3b3180 100644 --- a/src-tauri/src/daemon/supervisor.rs +++ b/src-tauri/src/daemon/supervisor.rs @@ -147,18 +147,18 @@ impl Supervisor { (token, rx) }; - let app_config = match self.db.get_proxy_config_for_app(&app_key).await { + let global_config = match self.db.get_global_proxy_config().await { Ok(config) => config, Err(err) => { self.clear_pending_worker_registration(&app).await; - return Err(format!("load proxy config for {app_key} failed: {err}")); + return Err(format!("load global proxy config failed: {err}")); } }; - let global_config = match self.db.get_global_proxy_config().await { - Ok(config) => config, + let listen_port = match self.db.get_app_proxy_preferred_port(&app_key) { + Ok(port) => port, Err(err) => { self.clear_pending_worker_registration(&app).await; - return Err(format!("load global proxy config failed: {err}")); + return Err(format!("load proxy preference for {app_key} failed: {err}")); } }; @@ -168,7 +168,7 @@ impl Supervisor { .arg("--listen-address") .arg(global_config.listen_address) .arg("--listen-port") - .arg(app_config.listen_port.to_string()) + .arg(listen_port.to_string()) .env(DAEMON_SOCKET_ENV, &self.socket_path) .env(SESSION_TOKEN_ENV, &session_token) .env(RESTORE_GUARD_BYPASS_ENV, "1") diff --git a/src-tauri/src/database/dao/proxy.rs b/src-tauri/src/database/dao/proxy.rs index 954b1b77..275fb9fb 100644 --- a/src-tauri/src/database/dao/proxy.rs +++ b/src-tauri/src/database/dao/proxy.rs @@ -9,7 +9,9 @@ use rust_decimal::Decimal; use super::super::{lock_conn, Database}; -fn default_app_listen_port(app_type: &str) -> u16 { +pub(crate) const PROXY_PREFERENCES_KEY: &str = "proxy_preferences_cli_only"; + +fn default_app_preferred_port(app_type: &str) -> u16 { match app_type { "claude" => 15721, "codex" => 15722, @@ -94,11 +96,13 @@ impl Database { "UPDATE proxy_config SET proxy_enabled = ?1, listen_address = ?2, - enable_logging = ?3, + listen_port = ?3, + enable_logging = ?4, updated_at = datetime('now')", rusqlite::params![ if config.proxy_enabled { 1 } else { 0 }, config.listen_address, + config.listen_port as i32, if config.enable_logging { 1 } else { 0 }, ], ) @@ -259,7 +263,7 @@ impl Database { let result = { let conn = lock_conn!(self.conn); conn.query_row( - "SELECT app_type, enabled, listen_port, auto_failover_enabled, + "SELECT app_type, enabled, auto_failover_enabled, max_retries, streaming_first_byte_timeout, streaming_idle_timeout, non_streaming_timeout, circuit_failure_threshold, circuit_success_threshold, circuit_timeout_seconds, circuit_error_rate_threshold, circuit_min_requests @@ -269,17 +273,16 @@ impl Database { Ok(AppProxyConfig { app_type: row.get(0)?, enabled: row.get::<_, i32>(1)? != 0, - listen_port: row.get::<_, i32>(2)? as u16, - auto_failover_enabled: row.get::<_, i32>(3)? != 0, - max_retries: row.get::<_, i32>(4)? as u32, - streaming_first_byte_timeout: row.get::<_, i32>(5)? as u32, - streaming_idle_timeout: row.get::<_, i32>(6)? as u32, - non_streaming_timeout: row.get::<_, i32>(7)? as u32, - circuit_failure_threshold: row.get::<_, i32>(8)? as u32, - circuit_success_threshold: row.get::<_, i32>(9)? as u32, - circuit_timeout_seconds: row.get::<_, i32>(10)? as u32, - circuit_error_rate_threshold: row.get(11)?, - circuit_min_requests: row.get::<_, i32>(12)? as u32, + auto_failover_enabled: row.get::<_, i32>(2)? != 0, + max_retries: row.get::<_, i32>(3)? as u32, + streaming_first_byte_timeout: row.get::<_, i32>(4)? as u32, + streaming_idle_timeout: row.get::<_, i32>(5)? as u32, + non_streaming_timeout: row.get::<_, i32>(6)? as u32, + circuit_failure_threshold: row.get::<_, i32>(7)? as u32, + circuit_success_threshold: row.get::<_, i32>(8)? as u32, + circuit_timeout_seconds: row.get::<_, i32>(9)? as u32, + circuit_error_rate_threshold: row.get(10)?, + circuit_min_requests: row.get::<_, i32>(11)? as u32, }) }, ) @@ -294,7 +297,6 @@ impl Database { Ok(AppProxyConfig { app_type: app_type_owned, enabled: false, - listen_port: default_app_listen_port(app_type), auto_failover_enabled: false, max_retries: 3, streaming_first_byte_timeout: 60, @@ -327,23 +329,21 @@ impl Database { conn.execute( "UPDATE proxy_config SET enabled = ?2, - listen_port = ?3, - auto_failover_enabled = ?4, - max_retries = ?5, - streaming_first_byte_timeout = ?6, - streaming_idle_timeout = ?7, - non_streaming_timeout = ?8, - circuit_failure_threshold = ?9, - circuit_success_threshold = ?10, - circuit_timeout_seconds = ?11, - circuit_error_rate_threshold = ?12, - circuit_min_requests = ?13, + auto_failover_enabled = ?3, + max_retries = ?4, + streaming_first_byte_timeout = ?5, + streaming_idle_timeout = ?6, + non_streaming_timeout = ?7, + circuit_failure_threshold = ?8, + circuit_success_threshold = ?9, + circuit_timeout_seconds = ?10, + circuit_error_rate_threshold = ?11, + circuit_min_requests = ?12, updated_at = datetime('now') WHERE app_type = ?1", rusqlite::params![ config.app_type, if config.enabled { 1 } else { 0 }, - config.listen_port as i32, if auto_failover_enabled { 1 } else { 0 }, config.max_retries as i32, config.streaming_first_byte_timeout as i32, @@ -371,33 +371,23 @@ impl Database { .map_err(|e| AppError::Lock(e.to_string()))?; // 根据 app_type 使用不同的默认值(与 schema.rs seed 保持一致) - let ( - retries, - fb_timeout, - idle_timeout, - cb_fail, - cb_succ, - cb_timeout, - cb_rate, - cb_min, - listen_port, - ) = match app_type { - "claude" => (6, 90, 180, 8, 3, 90, 0.7, 15, 15721), - "codex" => (3, 60, 120, 4, 2, 60, 0.6, 10, 15722), - "gemini" => (5, 60, 120, 4, 2, 60, 0.6, 10, 15723), - _ => (3, 60, 120, 4, 2, 60, 0.6, 10, 15721), - }; + let (retries, fb_timeout, idle_timeout, cb_fail, cb_succ, cb_timeout, cb_rate, cb_min) = + match app_type { + "claude" => (6, 90, 180, 8, 3, 90, 0.7, 15), + "codex" => (3, 60, 120, 4, 2, 60, 0.6, 10), + "gemini" => (5, 60, 120, 4, 2, 60, 0.6, 10), + _ => (3, 60, 120, 4, 2, 60, 0.6, 10), // 默认值 + }; conn.execute( "INSERT OR IGNORE INTO proxy_config ( - app_type, listen_port, max_retries, + app_type, max_retries, streaming_first_byte_timeout, streaming_idle_timeout, non_streaming_timeout, circuit_failure_threshold, circuit_success_threshold, circuit_timeout_seconds, circuit_error_rate_threshold, circuit_min_requests - ) VALUES (?1, ?2, ?3, ?4, ?5, 600, ?6, ?7, ?8, ?9, ?10)", + ) VALUES (?1, ?2, ?3, ?4, 600, ?5, ?6, ?7, ?8, ?9)", rusqlite::params![ app_type, - listen_port, retries, fb_timeout, idle_timeout, @@ -423,11 +413,11 @@ impl Database { // claude: 更激进的重试和超时配置 conn.execute( "INSERT OR IGNORE INTO proxy_config ( - app_type, listen_port, max_retries, + app_type, max_retries, streaming_first_byte_timeout, streaming_idle_timeout, non_streaming_timeout, circuit_failure_threshold, circuit_success_threshold, circuit_timeout_seconds, circuit_error_rate_threshold, circuit_min_requests - ) VALUES ('claude', 15721, 6, 90, 180, 600, 8, 3, 90, 0.7, 15)", + ) VALUES ('claude', 6, 90, 180, 600, 8, 3, 90, 0.7, 15)", [], ) .map_err(|e| AppError::Database(e.to_string()))?; @@ -435,11 +425,11 @@ impl Database { // codex: 默认配置 conn.execute( "INSERT OR IGNORE INTO proxy_config ( - app_type, listen_port, max_retries, + app_type, max_retries, streaming_first_byte_timeout, streaming_idle_timeout, non_streaming_timeout, circuit_failure_threshold, circuit_success_threshold, circuit_timeout_seconds, circuit_error_rate_threshold, circuit_min_requests - ) VALUES ('codex', 15722, 3, 60, 120, 600, 4, 2, 60, 0.6, 10)", + ) VALUES ('codex', 3, 60, 120, 600, 4, 2, 60, 0.6, 10)", [], ) .map_err(|e| AppError::Database(e.to_string()))?; @@ -447,11 +437,11 @@ impl Database { // gemini: 稍高的重试次数 conn.execute( "INSERT OR IGNORE INTO proxy_config ( - app_type, listen_port, max_retries, + app_type, max_retries, streaming_first_byte_timeout, streaming_idle_timeout, non_streaming_timeout, circuit_failure_threshold, circuit_success_threshold, circuit_timeout_seconds, circuit_error_rate_threshold, circuit_min_requests - ) VALUES ('gemini', 15723, 5, 60, 120, 600, 4, 2, 60, 0.6, 10)", + ) VALUES ('gemini', 5, 60, 120, 600, 4, 2, 60, 0.6, 10)", [], ) .map_err(|e| AppError::Database(e.to_string()))?; @@ -508,14 +498,16 @@ impl Database { conn.execute( "UPDATE proxy_config SET listen_address = ?1, - max_retries = ?2, - enable_logging = ?3, - streaming_first_byte_timeout = ?4, - streaming_idle_timeout = ?5, - non_streaming_timeout = ?6, + listen_port = ?2, + max_retries = ?3, + enable_logging = ?4, + streaming_first_byte_timeout = ?5, + streaming_idle_timeout = ?6, + non_streaming_timeout = ?7, updated_at = datetime('now')", rusqlite::params![ config.listen_address, + config.listen_port as i32, config.max_retries as i32, if config.enable_logging { 1 } else { 0 }, config.streaming_first_byte_timeout as i32, @@ -528,6 +520,42 @@ impl Database { Ok(()) } + pub fn get_proxy_preferences(&self) -> Result { + let Some(raw) = self.get_setting(PROXY_PREFERENCES_KEY)? else { + return Ok(ProxyPreferences::default()); + }; + serde_json::from_str(&raw).map_err(|error| AppError::Json { + path: PROXY_PREFERENCES_KEY.to_string(), + source: error, + }) + } + + pub fn update_proxy_preferences(&self, preferences: &ProxyPreferences) -> Result<(), AppError> { + if preferences.apps.is_empty() { + self.delete_setting(PROXY_PREFERENCES_KEY)?; + return Ok(()); + } + let serialized = serde_json::to_string(preferences) + .map_err(|source| AppError::JsonSerialize { source })?; + self.set_setting(PROXY_PREFERENCES_KEY, &serialized) + } + + pub fn get_app_proxy_preferred_port(&self, app_type: &str) -> Result { + Ok(self + .get_proxy_preferences()? + .apps + .get(app_type) + .and_then(|preference| preference.preferred_port) + .unwrap_or_else(|| default_app_preferred_port(app_type))) + } + + pub fn set_app_proxy_preferred_port(&self, app_type: &str, port: u16) -> Result<(), AppError> { + let mut preferences = self.get_proxy_preferences()?; + let entry = preferences.apps.entry(app_type.to_string()).or_default(); + entry.preferred_port = Some(port); + self.update_proxy_preferences(&preferences) + } + /// 设置 Live 接管状态(兼容旧版本,更新 enabled 字段) pub async fn set_live_takeover_active(&self, _active: bool) -> Result<(), AppError> { // 不再使用此字段,由 enabled 字段替代 @@ -932,6 +960,7 @@ impl Database { #[cfg(test)] mod tests { + use crate::database::dao::proxy::PROXY_PREFERENCES_KEY; use crate::database::Database; use crate::error::AppError; use crate::provider::Provider; @@ -1050,83 +1079,27 @@ mod tests { Ok(()) } - #[tokio::test] - async fn update_global_proxy_config_preserves_app_listen_ports() -> Result<(), AppError> { + #[test] + fn proxy_preferences_persist_preferred_ports_in_settings_kv() -> Result<(), AppError> { let db = Database::memory()?; - let mut codex = db.get_proxy_config_for_app("codex").await?; - codex.listen_port = 17022; - db.update_proxy_config_for_app(codex).await?; - let mut gemini = db.get_proxy_config_for_app("gemini").await?; - gemini.listen_port = 17023; - db.update_proxy_config_for_app(gemini).await?; - - let mut config = db.get_global_proxy_config().await?; - config.proxy_enabled = true; - config.listen_address = "127.0.0.2".to_string(); - config.listen_port = 18000; - db.update_global_proxy_config(config).await?; - assert_eq!( - db.get_proxy_config_for_app("claude").await?.listen_port, - 15721 - ); - assert_eq!( - db.get_proxy_config_for_app("codex").await?.listen_port, - 17022 - ); - assert_eq!( - db.get_proxy_config_for_app("gemini").await?.listen_port, - 17023 - ); - Ok(()) - } + db.set_app_proxy_preferred_port("codex", 17022)?; + db.set_app_proxy_preferred_port("gemini", 17023)?; - #[tokio::test] - async fn update_proxy_config_preserves_app_listen_ports() -> Result<(), AppError> { - let db = Database::memory()?; - let mut codex = db.get_proxy_config_for_app("codex").await?; - codex.listen_port = 17022; - db.update_proxy_config_for_app(codex).await?; - let mut gemini = db.get_proxy_config_for_app("gemini").await?; - gemini.listen_port = 17023; - db.update_proxy_config_for_app(gemini).await?; - - let mut config = db.get_proxy_config().await?; - config.listen_address = "127.0.0.2".to_string(); - config.listen_port = 18000; - db.update_proxy_config(config).await?; + let raw = db + .get_setting(PROXY_PREFERENCES_KEY)? + .expect("proxy preferences should be stored in settings"); + assert!(raw.contains("\"preferredPort\":17022")); + let preferences = db.get_proxy_preferences()?; assert_eq!( - db.get_proxy_config_for_app("claude").await?.listen_port, - 15721 - ); - assert_eq!( - db.get_proxy_config_for_app("codex").await?.listen_port, - 17022 - ); - assert_eq!( - db.get_proxy_config_for_app("gemini").await?.listen_port, - 17023 + preferences + .apps + .get("codex") + .and_then(|preference| preference.preferred_port), + Some(17022) ); - Ok(()) - } - - #[tokio::test] - async fn app_proxy_config_uses_distinct_default_ports() -> Result<(), AppError> { - let db = Database::memory()?; - assert_eq!( - db.get_proxy_config_for_app("claude").await?.listen_port, - 15721 - ); - assert_eq!( - db.get_proxy_config_for_app("codex").await?.listen_port, - 15722 - ); - assert_eq!( - db.get_proxy_config_for_app("gemini").await?.listen_port, - 15723 - ); Ok(()) } diff --git a/src-tauri/src/database/mod.rs b/src-tauri/src/database/mod.rs index 8843212a..9865f486 100644 --- a/src-tauri/src/database/mod.rs +++ b/src-tauri/src/database/mod.rs @@ -48,7 +48,7 @@ const DB_BACKUP_RETAIN: usize = 10; /// 当前 Schema 版本号 /// 每次修改表结构时递增,并在 schema.rs 中添加相应的迁移逻辑 -pub(crate) const SCHEMA_VERSION: i32 = 11; +pub(crate) const SCHEMA_VERSION: i32 = 10; /// 安全地序列化 JSON,避免 unwrap panic pub(crate) fn to_json_string(value: &T) -> Result { diff --git a/src-tauri/src/database/schema.rs b/src-tauri/src/database/schema.rs index e8c0aa73..39106426 100644 --- a/src-tauri/src/database/schema.rs +++ b/src-tauri/src/database/schema.rs @@ -136,29 +136,29 @@ impl Database { // - 旧表会在 apply_schema_migrations() 中迁移为三行结构后再插入。 if Self::has_column(conn, "proxy_config", "app_type")? { conn.execute( - "INSERT OR IGNORE INTO proxy_config (app_type, listen_port, max_retries, + "INSERT OR IGNORE INTO proxy_config (app_type, max_retries, streaming_first_byte_timeout, streaming_idle_timeout, non_streaming_timeout, circuit_failure_threshold, circuit_success_threshold, circuit_timeout_seconds, circuit_error_rate_threshold, circuit_min_requests) - VALUES ('claude', 15721, 6, 90, 180, 600, 8, 3, 90, 0.7, 15)", + VALUES ('claude', 6, 90, 180, 600, 8, 3, 90, 0.7, 15)", [], ) .map_err(|e| AppError::Database(e.to_string()))?; conn.execute( - "INSERT OR IGNORE INTO proxy_config (app_type, listen_port, max_retries, + "INSERT OR IGNORE INTO proxy_config (app_type, max_retries, streaming_first_byte_timeout, streaming_idle_timeout, non_streaming_timeout, circuit_failure_threshold, circuit_success_threshold, circuit_timeout_seconds, circuit_error_rate_threshold, circuit_min_requests) - VALUES ('codex', 15722, 3, 60, 120, 600, 4, 2, 60, 0.6, 10)", + VALUES ('codex', 3, 60, 120, 600, 4, 2, 60, 0.6, 10)", [], ) .map_err(|e| AppError::Database(e.to_string()))?; conn.execute( - "INSERT OR IGNORE INTO proxy_config (app_type, listen_port, max_retries, + "INSERT OR IGNORE INTO proxy_config (app_type, max_retries, streaming_first_byte_timeout, streaming_idle_timeout, non_streaming_timeout, circuit_failure_threshold, circuit_success_threshold, circuit_timeout_seconds, circuit_error_rate_threshold, circuit_min_requests) - VALUES ('gemini', 15723, 5, 60, 120, 600, 4, 2, 60, 0.6, 10)", + VALUES ('gemini', 5, 60, 120, 600, 4, 2, 60, 0.6, 10)", [], ) .map_err(|e| AppError::Database(e.to_string()))?; @@ -415,15 +415,10 @@ impl Database { Self::set_user_version(conn, 9)?; } 9 => { - log::info!("迁移数据库从 v9 到 v10(Hermes Agent 支持)"); + log::info!("迁移数据库从 v9 到 v10(添加 Hermes Agent 支持)"); Self::migrate_v9_to_v10(conn)?; Self::set_user_version(conn, 10)?; } - 10 => { - log::info!("迁移数据库从 v10 到 v11(代理按应用默认端口)"); - Self::migrate_v10_to_v11(conn)?; - Self::set_user_version(conn, 11)?; - } _ => { return Err(AppError::Database(format!( "未知的数据库版本 {version},无法迁移到 {SCHEMA_VERSION}" @@ -1233,30 +1228,6 @@ impl Database { Ok(()) } - /// v10 -> v11 迁移:代理按应用默认端口 - fn migrate_v10_to_v11(conn: &Connection) -> Result<(), AppError> { - if Self::table_exists(conn, "proxy_config")? - && Self::has_column(conn, "proxy_config", "app_type")? - && Self::has_column(conn, "proxy_config", "listen_port")? - { - conn.execute( - "UPDATE proxy_config SET listen_port = 15722 - WHERE app_type = 'codex' AND listen_port = 15721", - [], - ) - .map_err(|e| AppError::Database(format!("迁移 Codex 代理端口失败: {e}")))?; - conn.execute( - "UPDATE proxy_config SET listen_port = 15723 - WHERE app_type = 'gemini' AND listen_port = 15721", - [], - ) - .map_err(|e| AppError::Database(format!("迁移 Gemini 代理端口失败: {e}")))?; - } - - log::info!("v10 -> v11 迁移完成:已设置按应用代理端口"); - Ok(()) - } - /// 插入默认模型定价数据 /// 格式: (model_id, display_name, input, output, cache_read, cache_creation) /// 注意: model_id 使用短横线格式(如 claude-haiku-4-5),与 API 返回的模型名称标准化后一致 diff --git a/src-tauri/src/proxy/types.rs b/src-tauri/src/proxy/types.rs index 27f8431d..e01dea4c 100644 --- a/src-tauri/src/proxy/types.rs +++ b/src-tauri/src/proxy/types.rs @@ -1,3 +1,5 @@ +use std::collections::BTreeMap; + use serde::{Deserialize, Serialize}; /// 代理服务器配置 @@ -191,9 +193,6 @@ pub struct AppProxyConfig { pub app_type: String, /// 该 app 代理启用开关 pub enabled: bool, - /// 该 app 监听端口 - #[serde(default = "default_app_listen_port")] - pub listen_port: u16, /// 该 app 自动故障转移开关 pub auto_failover_enabled: bool, /// 最大重试次数 @@ -216,8 +215,18 @@ pub struct AppProxyConfig { pub circuit_min_requests: u32, } -fn default_app_listen_port() -> u16 { - 15721 +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +#[serde(rename_all = "camelCase")] +pub struct ProxyPreferences { + #[serde(default)] + pub apps: BTreeMap, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +#[serde(rename_all = "camelCase")] +pub struct AppProxyPreference { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub preferred_port: Option, } /// 整流器配置 diff --git a/src-tauri/src/services/proxy.rs b/src-tauri/src/services/proxy.rs index 72c927e3..11100549 100644 --- a/src-tauri/src/services/proxy.rs +++ b/src-tauri/src/services/proxy.rs @@ -476,6 +476,20 @@ impl ProxyService { Ok(info) } + async fn runtime_config_for_app(&self, app_type: &AppType) -> Result { + let mut config = self.get_config().await.map_err(|e| e.to_string())?; + config.listen_port = self + .db + .get_app_proxy_preferred_port(app_type.as_str()) + .map_err(|error| { + format!( + "load proxy preference for {} failed: {error}", + app_type.as_str() + ) + })?; + Ok(config) + } + pub(crate) fn publish_runtime_session_if_needed( &self, info: &ProxyServerInfo, @@ -1263,7 +1277,7 @@ impl ProxyService { .await?; if !self.is_running().await { - let config = self.get_config().await.map_err(|e| e.to_string())?; + let config = self.runtime_config_for_app(app_type).await?; self.start_with_resolved_config_unlocked(config).await?; } @@ -1707,13 +1721,12 @@ impl ProxyService { app_type: &AppType, ) -> Result<(String, String), String> { let persisted = self.get_config().await.map_err(|e| e.to_string())?; - let app_proxy = self + let preferred_port = self .db - .get_proxy_config_for_app(app_type.as_str()) - .await + .get_app_proxy_preferred_port(app_type.as_str()) .map_err(|error| { format!( - "load proxy config for {} failed: {error}", + "load proxy preference for {} failed: {error}", app_type.as_str() ) })?; @@ -1727,7 +1740,7 @@ impl ProxyService { .as_ref() .map(|session| session.port) .filter(|port| *port != 0) - .unwrap_or(app_proxy.listen_port); + .unwrap_or(preferred_port); let connect_host = match listen_address.as_str() { "0.0.0.0" => "127.0.0.1".to_string(), @@ -2973,6 +2986,76 @@ mod tests { service.stop().await.expect("stop proxy runtime"); } + #[tokio::test] + #[serial] + async fn takeover_activation_uses_app_preferred_port_from_settings_kv() { + let temp_home = TempDir::new().expect("create temp home"); + let _env = TestHomeEnvGuard::set(temp_home.path()); + std::fs::create_dir_all( + get_claude_settings_path() + .parent() + .expect("claude settings parent dir"), + ) + .expect("create ~/.claude"); + write_json_file( + &get_claude_settings_path(), + &json!({ + "env": { + "ANTHROPIC_BASE_URL": "https://api.anthropic.com", + "ANTHROPIC_AUTH_TOKEN": "fresh-live-token" + } + }), + ) + .expect("seed claude live config"); + + let db = Arc::new(Database::memory().expect("create database")); + let service = ProxyService::new(db.clone()); + let provider = Provider::with_id( + "claude-provider".to_string(), + "Claude Provider".to_string(), + json!({ + "env": { + "ANTHROPIC_BASE_URL": "https://api.anthropic.com", + "ANTHROPIC_AUTH_TOKEN": "provider-token" + } + }), + None, + ); + db.save_provider("claude", &provider) + .expect("save claude provider"); + db.set_current_provider("claude", &provider.id) + .expect("set current claude provider"); + + let listener = tokio::net::TcpListener::bind(("127.0.0.1", 0)) + .await + .expect("reserve free local port"); + let preferred_port = listener + .local_addr() + .expect("read reserved listener address") + .port(); + drop(listener); + db.set_app_proxy_preferred_port("claude", preferred_port) + .expect("persist claude preferred proxy port"); + + service + .set_takeover_for_app("claude", true) + .await + .expect("enable claude takeover"); + + let status = service.get_status().await; + assert_eq!(status.port, preferred_port); + let live: Value = + read_json_file(&get_claude_settings_path()).expect("read claude live config"); + let expected_proxy_url = format!("http://127.0.0.1:{preferred_port}"); + assert_eq!( + live.pointer("/env/ANTHROPIC_BASE_URL") + .and_then(Value::as_str), + Some(expected_proxy_url.as_str()) + ); + + service.stop().await.expect("stop proxy runtime"); + } + #[tokio::test] #[serial] async fn takeover_activation_rejects_empty_queue_when_failover_enabled() { diff --git a/src-tauri/tests/proxy_claude_forwarder_alignment.rs b/src-tauri/tests/proxy_claude_forwarder_alignment.rs index 348b9e3d..5f90427a 100644 --- a/src-tauri/tests/proxy_claude_forwarder_alignment.rs +++ b/src-tauri/tests/proxy_claude_forwarder_alignment.rs @@ -563,11 +563,12 @@ async fn proxy_claude_auto_failover_uses_activated_queue_providers() { .await .expect("read claude app proxy config"); app_proxy.enabled = true; - app_proxy.listen_port = free_loopback_port(); app_proxy.auto_failover_enabled = true; db.update_proxy_config_for_app(app_proxy) .await .expect("enable auto failover"); + db.set_app_proxy_preferred_port("claude", free_loopback_port()) + .expect("set claude preferred proxy port"); let service = ProxyService::new(db.clone()); let mut config = service.get_config().await.expect("read proxy config"); diff --git a/src-tauri/tests/proxy_daemon.rs b/src-tauri/tests/proxy_daemon.rs index b7a40ceb..f2e2e491 100644 --- a/src-tauri/tests/proxy_daemon.rs +++ b/src-tauri/tests/proxy_daemon.rs @@ -755,23 +755,10 @@ fn seed_minimal_claude_provider(sandbox: &TestSandbox) { .db .set_current_provider("claude", &provider.id) .expect("set sandbox current claude provider"); - let runtime = tokio::runtime::Builder::new_current_thread() - .enable_all() - .build() - .expect("create test runtime"); - runtime.block_on(async { - let mut config = state - .db - .get_proxy_config_for_app("claude") - .await - .expect("load sandbox claude proxy config"); - config.listen_port = listen_port; - state - .db - .update_proxy_config_for_app(config) - .await - .expect("update sandbox claude proxy port"); - }); + state + .db + .set_app_proxy_preferred_port("claude", listen_port) + .expect("update sandbox claude proxy preferred port"); let _ = sandbox; // tie lifetime so the sandbox outlives this seed } diff --git a/src-tauri/tests/proxy_service.rs b/src-tauri/tests/proxy_service.rs index 89e5b269..4c440988 100644 --- a/src-tauri/tests/proxy_service.rs +++ b/src-tauri/tests/proxy_service.rs @@ -94,14 +94,8 @@ fn load_runtime_session_worker_count(state: &AppState) -> usize { } async fn set_app_proxy_port(db: &Database, app_type: &str, port: u16) { - let mut config = db - .get_proxy_config_for_app(app_type) - .await - .unwrap_or_else(|_| panic!("get {app_type} proxy config")); - config.listen_port = port; - db.update_proxy_config_for_app(config) - .await - .unwrap_or_else(|_| panic!("update {app_type} proxy port")); + db.set_app_proxy_preferred_port(app_type, port) + .unwrap_or_else(|_| panic!("update {app_type} proxy preferred port")); } async fn set_claude_proxy_port(db: &Database, port: u16) { From fccfdf25ada58604c15a7d82f779bb3b94bc5777 Mon Sep 17 00:00:00 2001 From: saladday <1203511142@qq.com> Date: Thu, 21 May 2026 20:21:16 +0800 Subject: [PATCH 4/5] (fix) require status proof before stopping workers --- src-tauri/src/cli/commands/proxy.rs | 13 ++- src-tauri/src/services/proxy.rs | 145 +++------------------------- src-tauri/tests/proxy_service.rs | 20 ++-- 3 files changed, 32 insertions(+), 146 deletions(-) diff --git a/src-tauri/src/cli/commands/proxy.rs b/src-tauri/src/cli/commands/proxy.rs index 9ca02393..c78c6c78 100644 --- a/src-tauri/src/cli/commands/proxy.rs +++ b/src-tauri/src/cli/commands/proxy.rs @@ -176,11 +176,14 @@ fn serve_proxy( runtime.block_on(async move { let service = state.proxy_service.clone(); - if !takeovers.is_empty() && service.has_persisted_managed_sessions() { - return Err(AppError::Message( - "cannot run foreground proxy takeover while a daemon-managed proxy session is active; disable daemon-managed proxy routes first" - .to_string(), - )); + if !takeovers.is_empty() { + let status = service.get_status().await; + if status.running && !status.active_workers.is_empty() { + return Err(AppError::Message( + "cannot run foreground proxy takeover while a daemon-managed proxy session is active; disable daemon-managed proxy routes first" + .to_string(), + )); + } } let base_config = service.get_config().await?; let effective_config = apply_overrides(&base_config, listen_address, listen_port)?; diff --git a/src-tauri/src/services/proxy.rs b/src-tauri/src/services/proxy.rs index 11100549..c05289e0 100644 --- a/src-tauri/src/services/proxy.rs +++ b/src-tauri/src/services/proxy.rs @@ -358,8 +358,7 @@ impl ProxyService { /// per-app takeover via the same code path the supervisor uses, takes the /// cross-process state-mutation guard around it (so a concurrent CLI /// invocation can't race the live-config restore), and clears the matching - /// daemon-managed runtime marker. If the daemon died ungracefully, this - /// also stops the orphaned worker before removing its persisted PID. + /// daemon-managed runtime marker. async fn local_disable_takeover(&self, app_type: &str) -> Result<(), String> { let app = Self::takeover_app_from_str(app_type)?; let _guard = crate::services::state_coordination::acquire_restore_mutation_guard().await?; @@ -374,14 +373,11 @@ impl ProxyService { } if let Some(session) = self.load_persisted_runtime_session_for_app(&app) { if session.kind.is_managed_external() { - let should_terminate = match Self::probe_external_proxy_status(&session).await { - ExternalProxyStatusProbe::Matched(_) => true, - ExternalProxyStatusProbe::Mismatched => false, - ExternalProxyStatusProbe::Unreachable => { - Self::has_managed_external_ownership_signal(&session) - } - }; - if Self::is_process_alive(session.pid) && should_terminate { + if matches!( + Self::probe_external_proxy_status(&session).await, + ExternalProxyStatusProbe::Matched(_) + ) && Self::is_process_alive(session.pid) + { Self::terminate_external_process(session.pid).await?; } } @@ -390,14 +386,6 @@ impl ProxyService { Ok(()) } - pub fn has_persisted_managed_sessions(&self) -> bool { - self.load_persisted_runtime_sessions() - .into_iter() - .any(|session| { - session.kind.is_managed_external() && Self::is_process_alive(session.pid) - }) - } - pub async fn set_managed_session_for_app( &self, app_type: &str, @@ -565,16 +553,11 @@ impl ProxyService { if let Some(session) = self.load_persisted_runtime_session() { if session.kind.is_managed_external() { - let probe = Self::probe_external_proxy_status(&session).await; - let should_terminate = match probe { - ExternalProxyStatusProbe::Matched(_) => true, - ExternalProxyStatusProbe::Mismatched => false, - ExternalProxyStatusProbe::Unreachable => { - Self::has_managed_external_ownership_signal(&session) - } - }; - - if Self::is_process_alive(session.pid) && should_terminate { + if matches!( + Self::probe_external_proxy_status(&session).await, + ExternalProxyStatusProbe::Matched(_) + ) && Self::is_process_alive(session.pid) + { Self::terminate_external_process(session.pid).await?; stopped_runtime = true; } @@ -634,38 +617,7 @@ impl ProxyService { stale_app_keys.push(session.app_type.clone()); } ExternalProxyStatusProbe::Unreachable => { - if Self::has_managed_external_ownership_signal(&session) { - let uptime_seconds = - chrono::DateTime::parse_from_rfc3339(&session.started_at) - .ok() - .map(|started_at| { - let started_at = started_at.with_timezone(&chrono::Utc); - (chrono::Utc::now() - started_at).num_seconds().max(0) - as u64 - }) - .unwrap_or(0); - workers.push(crate::proxy::types::ActiveWorker { - app_type: session - .app_type - .clone() - .unwrap_or_else(|| "proxy".to_string()), - address: session.address.clone(), - port: session.port, - pid: Some(session.pid), - }); - if primary_status.is_none() { - primary_status = Some(ProxyStatus { - running: true, - address: session.address.clone(), - port: session.port, - uptime_seconds, - managed_session_token: session.session_token.clone(), - ..ProxyStatus::default() - }); - } - } else { - stale_app_keys.push(session.app_type.clone()); - } + stale_app_keys.push(session.app_type.clone()); } } } @@ -2241,79 +2193,6 @@ impl ProxyService { } } - fn has_managed_external_ownership_signal(session: &PersistedProxyRuntimeSession) -> bool { - session.session_token.is_some() - && (Self::is_detached_session_leader(session.pid) - || Self::process_looks_like_cc_switch_proxy_worker(session.pid)) - } - - #[cfg(unix)] - fn process_looks_like_cc_switch_proxy_worker(pid: u32) -> bool { - if pid == 0 { - return false; - } - - #[cfg(target_os = "macos")] - { - let output = std::process::Command::new("ps") - .args(["-p", &pid.to_string(), "-o", "command="]) - .output(); - let Ok(output) = output else { - return false; - }; - if !output.status.success() { - return false; - } - let command = String::from_utf8_lossy(&output.stdout); - return command.contains("cc-switch") - && command.contains("proxy serve") - && command.contains("--listen-port"); - } - - #[cfg(target_os = "linux")] - { - let path = format!("/proc/{pid}/cmdline"); - let Ok(raw) = std::fs::read(path) else { - return false; - }; - let command = raw - .split(|byte| *byte == 0) - .filter(|part| !part.is_empty()) - .map(|part| String::from_utf8_lossy(part)) - .collect::>() - .join(" "); - return command.contains("cc-switch") - && command.contains("proxy serve") - && command.contains("--listen-port"); - } - - #[cfg(not(any(target_os = "macos", target_os = "linux")))] - { - false - } - } - - #[cfg(not(unix))] - fn process_looks_like_cc_switch_proxy_worker(_pid: u32) -> bool { - false - } - - #[cfg(unix)] - fn is_detached_session_leader(pid: u32) -> bool { - if pid == 0 { - return false; - } - - let sid = unsafe { libc::getsid(pid as i32) }; - let pgid = unsafe { libc::getpgid(pid as i32) }; - sid == pid as i32 && pgid == pid as i32 - } - - #[cfg(not(unix))] - fn is_detached_session_leader(_pid: u32) -> bool { - false - } - async fn managed_session_ready_info( &self, child_pid: u32, diff --git a/src-tauri/tests/proxy_service.rs b/src-tauri/tests/proxy_service.rs index 4c440988..b347364a 100644 --- a/src-tauri/tests/proxy_service.rs +++ b/src-tauri/tests/proxy_service.rs @@ -974,8 +974,7 @@ async fn managed_session_keeps_runtime_alive_while_another_supported_app_is_atta #[cfg(unix)] #[tokio::test] #[serial] -async fn managed_session_disable_last_app_terminates_external_process_even_when_status_probe_fails() -{ +async fn managed_session_disable_last_app_does_not_terminate_when_status_probe_fails() { let _guard = lock_test_mutex(); reset_test_fs(); let _home = ensure_test_home(); @@ -1020,25 +1019,28 @@ async fn managed_session_disable_last_app_terminates_external_process_even_when_ let status = state.proxy_service.get_status().await; assert!( - status.running, - "owned managed external markers should still report running when /status probe is unreachable" + !status.running, + "managed external markers should not report running when /status probe is unreachable" ); assert!( state .db .get_setting("proxy_runtime_session") .expect("read runtime session marker after unreachable get_status") - .is_some(), - "owned managed external marker should survive an unreachable /status probe so last-app disable can still stop it" + .is_none(), + "unreachable managed external markers should be cleared instead of treated as owned" ); state .proxy_service .set_managed_session_for_app("claude", false) .await - .expect("disable final managed app and stop runtime"); + .expect("disable final managed app and clear runtime marker"); - wait_for(Duration::from_secs(5), || !is_process_alive(runtime_pid)); + assert!( + is_process_alive(runtime_pid), + "unreachable status must not terminate a process based only on stale persisted metadata" + ); assert!( state @@ -1058,6 +1060,8 @@ async fn managed_session_disable_last_app_terminates_external_process_even_when_ !global.proxy_enabled, "stopping the last managed app should persist global proxy enabled=false" ); + + ensure_process_stopped(runtime_pid); } #[cfg(unix)] From 2077fa33a83e78e0397b65e15aed3e26f6b6849f Mon Sep 17 00:00:00 2001 From: saladday <1203511142@qq.com> Date: Thu, 21 May 2026 22:47:04 +0800 Subject: [PATCH 5/5] (fix) verify daemon worker before drop --- src-tauri/src/daemon/supervisor.rs | 10 ++++++ src-tauri/src/services/proxy.rs | 56 +++++++++++++++++++++++++++++- 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/src-tauri/src/daemon/supervisor.rs b/src-tauri/src/daemon/supervisor.rs index ae3b3180..ada3db8d 100644 --- a/src-tauri/src/daemon/supervisor.rs +++ b/src-tauri/src/daemon/supervisor.rs @@ -178,6 +178,16 @@ impl Supervisor { .stderr(Stdio::null()) .kill_on_drop(true); + #[cfg(unix)] + unsafe { + cmd.pre_exec(|| { + if libc::setsid() == -1 { + return Err(std::io::Error::last_os_error()); + } + Ok(()) + }); + } + let spawned = match cmd.spawn() { Ok(child) => child, Err(err) => { diff --git a/src-tauri/src/services/proxy.rs b/src-tauri/src/services/proxy.rs index c05289e0..de8d09e4 100644 --- a/src-tauri/src/services/proxy.rs +++ b/src-tauri/src/services/proxy.rs @@ -354,6 +354,56 @@ impl ProxyService { } } + async fn should_drop_takeover_via_daemon(&self, app_type: &AppType) -> Result { + let Some(session) = self.load_persisted_runtime_session_for_app(app_type) else { + self.remove_stale_daemon_socket_if_unreachable(); + return Ok(false); + }; + + if !session.kind.is_managed_external() { + return Ok(true); + } + + if !Self::is_process_alive(session.pid) { + self.clear_persisted_runtime_session_for_app(app_type)?; + return Ok(false); + } + + match Self::probe_external_proxy_status(&session).await { + ExternalProxyStatusProbe::Matched(_) => Ok(true), + ExternalProxyStatusProbe::Mismatched | ExternalProxyStatusProbe::Unreachable => { + self.clear_persisted_runtime_session_for_app(app_type)?; + Ok(false) + } + } + } + + #[cfg(unix)] + fn remove_stale_daemon_socket_if_unreachable(&self) { + use std::io::ErrorKind; + + let socket_path = crate::daemon::paths::socket_path(); + if !socket_path.exists() { + return; + } + + match std::os::unix::net::UnixStream::connect(&socket_path) { + Ok(_) => {} + Err(error) + if matches!( + error.kind(), + ErrorKind::ConnectionRefused | ErrorKind::NotFound + ) => + { + let _ = std::fs::remove_file(socket_path); + } + Err(_) => {} + } + } + + #[cfg(not(unix))] + fn remove_stale_daemon_socket_if_unreachable(&self) {} + /// Foreground-only fallback for when no daemon is reachable. Drops the /// per-app takeover via the same code path the supervisor uses, takes the /// cross-process state-mutation guard around it (so a concurrent CLI @@ -432,7 +482,11 @@ impl ProxyService { // Disable: route through the daemon when one is running so it stays // the sole writer of `proxy_runtime_session`. - self.daemon_drop_takeover(app_type_enum.as_str()).await + if self.should_drop_takeover_via_daemon(&app_type_enum).await? { + self.daemon_drop_takeover(app_type_enum.as_str()).await + } else { + self.local_disable_takeover(app_type_enum.as_str()).await + } } async fn start_with_resolved_config_unlocked(