From 180948128a079e0cd92691a597040048e615787f Mon Sep 17 00:00:00 2001 From: stuffbucket <231133237+stuffbucket@users.noreply.github.com> Date: Fri, 12 Jun 2026 22:00:31 -0700 Subject: [PATCH 1/5] fix(shell): DEV baseUrl must target the sidecar on :4141, not :4142 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Regression from #114. The sidecar binds :4141 (SIDECAR_PORT in shell/src-tauri/src/lib.rs) in both app:dev and production. #114 changed the DEV baseUrl to :4142 — trusting CLAUDE.md, which WRONGLY claimed the sidecar runs on :4142 in four places. In `app:dev` the webview is the Vite dev server (import.meta.env.DEV), so every settings/auth/gh/accounts call went to a dead :4142: sign-in failed, the gh-reuse list and account roster were empty, and the console filled with connection-refused errors. (Production was unaffected — it uses a relative path.) Default the DEV branch back to :4141 (the canonical port app:dev serves on), keeping VITE_API_BASE as the override for an app:ui proxy elsewhere. Also corrects the four :4142 lies in CLAUDE.md to :4141, citing SIDECAR_PORT. --- shell/src/api.ts | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/shell/src/api.ts b/shell/src/api.ts index 79f57e1..25052b7 100644 --- a/shell/src/api.ts +++ b/shell/src/api.ts @@ -308,14 +308,18 @@ type ApiResult = function baseUrl(): string { // Vite injects import.meta.env.DEV at build time. In production the shell // loads from the sidecar's own origin via the Tauri window URL, so a - // relative path resolves to the same origin — correct regardless of port. - // In dev (`app:ui`), Vite serves the UI on its own port while the proxy - // runs separately, so we absolute-prefix. Default to :4142 to match - // CLAUDE.md's fast-iteration workflow (`bun run dev start --port 4142`, - // the sidecar's port); override with VITE_API_BASE to point elsewhere. + // relative path resolves to the same origin. + // + // In dev the webview is the Vite server on its own port, so we must + // absolute-prefix the proxy. Default to :4141 — the canonical Maximal / + // SIDECAR_PORT (shell/src-tauri/src/lib.rs). `bun run app:dev` runs the real + // sidecar there, so the default MUST be :4141 or the whole settings UI hits + // a dead port. For `app:ui` against a proxy on a different port (e.g. a + // second `bun run dev start --port 4142` alongside a running app:dev), set + // VITE_API_BASE to override. if (import.meta.env.DEV) { const override = import.meta.env.VITE_API_BASE as string | undefined - return override ?? "http://localhost:4142" + return override ?? "http://localhost:4141" } return "" } From 63c1aa23b22afa0222d681961ddf19b5fa21d622 Mon Sep 17 00:00:00 2001 From: stuffbucket <231133237+stuffbucket@users.noreply.github.com> Date: Fri, 12 Jun 2026 22:20:39 -0700 Subject: [PATCH 2/5] fix(shell): surface failed restart + dedup account lists (app:dev findings) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-on to the :4141 baseUrl fix, from live app:dev testing where sign-in appeared to do nothing: - safeInvoke now returns whether the IPC fired. rebootAndAwaitAuth fails fast and VISIBLY when restart_sidecar does not fire (invoke rejected/unavailable) instead of polling a never-changing status for 20s then surfacing an off-screen message. This is what made the writes look like silent failures: POST /gh/use + /accounts/switch returned 200 (token written), but the reboot never happened and the error landed below the fold. - Error messages (gh-reuse + roster) scrollIntoView so they are not stranded off-screen at the bottom of a tall account section. - The gh-reuse list now excludes accounts already remembered in the registry, so an account no longer appears in BOTH "Switch to a remembered account" and "Reuse a GitHub CLI account" (the confusing double-listing). Does NOT yet explain WHY restart_sidecar failed to fire in that app:dev run — the swallowed reason goes to the webview console; the immediate visible error now exposes it on the next attempt. --- shell/src/main.ts | 47 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/shell/src/main.ts b/shell/src/main.ts index 4c27833..f56e46b 100644 --- a/shell/src/main.ts +++ b/shell/src/main.ts @@ -81,12 +81,17 @@ function wireNav(): void { } } -async function safeInvoke(cmd: string): Promise { +async function safeInvoke(cmd: string): Promise { try { await invoke(cmd); + return true; } catch (err) { - // Tauri command unavailable (e.g. running in plain browser). Log and continue. + // Tauri command unavailable (e.g. plain-browser app:ui) or the IPC + // rejected. Returns false so callers that DEPEND on the command (e.g. the + // restart_sidecar reboot that completes a sign-in) can surface a visible + // error instead of silently stranding the user. console.warn(`invoke(${cmd}) failed:`, err); + return false; } } @@ -633,6 +638,9 @@ function showGhError(message: string): void { if (!el) return; el.textContent = message; el.hidden = false; + // The account section can be tall (remembered + gh lists), so an error on a + // row near the bottom can land off-screen. Bring it into view. + el.scrollIntoView({ block: "nearest" }); } function hideGhError(): void { @@ -682,9 +690,31 @@ async function loadGhAccounts(): Promise { return; } + // Dedup against the registry: a gh account that's already a remembered + // (persisted) account is offered in the "Switch to a remembered account" + // list above, so don't ALSO list it here as a fresh sign-in — that + // double-listing is what reads as confusing. Best-effort: if the accounts + // fetch fails, fall back to showing every gh account. + const remembered = await apiCall({ + kind: "accounts-list", + method: "GET", + path: "/settings/api/accounts", + }); + const rememberedKeys = new Set( + remembered.ok ? remembered.data.accounts.map((a) => a.key) : [], + ); + const ghAccounts = result.data.accounts.filter( + (a) => !rememberedKeys.has(`${a.login}@${a.host}`), + ); + if (ghAccounts.length === 0) { + wrapper.hidden = true; + list.replaceChildren(); + return; + } + list.replaceChildren(); hideGhError(); - for (const account of result.data.accounts) { + for (const account of ghAccounts) { const seed = template.content.firstElementChild; if (!seed) continue; const row = seed.cloneNode(true) as HTMLElement; @@ -796,7 +826,15 @@ async function rebootAndAwaitAuth( onSuccess: (status: AuthStatus) => void, onFailure: (sawDown: boolean) => void, ): Promise { - await safeInvoke("restart_sidecar"); + // If the restart IPC itself didn't fire (invoke rejected / unavailable), + // the proxy will never reboot — fail fast and visibly rather than polling a + // never-changing status for 20s. `sawDown: false` → the "didn't restart" + // branch of the caller's message. + const restarted = await safeInvoke("restart_sidecar"); + if (!restarted) { + onFailure(false); + return; + } const deadlineMs = Date.now() + 20_000; let sawDown = false; while (Date.now() < deadlineMs) { @@ -853,6 +891,7 @@ function showRosterError(mode: AccountRosterMode, message: string): void { if (el) { el.textContent = message; el.hidden = false; + el.scrollIntoView({ block: "nearest" }); } } From b02da56f98857fc65ce9aa3b852438e9f84e7fd3 Mon Sep 17 00:00:00 2001 From: stuffbucket <231133237+stuffbucket@users.noreply.github.com> Date: Sat, 13 Jun 2026 02:52:10 -0700 Subject: [PATCH 3/5] fix(shell): permission restart_sidecar so the reboot IPC works from the webview MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit THE root cause of "sign-in does nothing". Tauri 2 does not auto-permission an app's own commands; the Settings/Dashboard webviews load from loopback origins (:4141 in production, :1420 in dev) which Tauri classifies as "remote", so a command is rejected unless it's named in permissions/default.toml. Every other webview-invoked command (open_settings_at, reveal_*, get_shell_api_key, subscribe_token_usage) is listed there — but restart_sidecar was not. So invoke("restart_sidecar") was denied by the ACL for the entire reboot-based flow — gh-reuse sign-in, account switch, AND sign-out. The token got written (POST /gh/use 200) but the proxy never rebooted into it, so the UI sat unauthenticated. This was never caught because the reboot path was "trusted, not GUI-smoked" (WKWebView has no WebDriver). It affected production, not just app:dev. Add allow-restart-sidecar to the default permission set + its [[permission]] block. Verified the ACL regenerates with it included. --- shell/src-tauri/permissions/default.toml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/shell/src-tauri/permissions/default.toml b/shell/src-tauri/permissions/default.toml index b564a84..3bbf6e9 100644 --- a/shell/src-tauri/permissions/default.toml +++ b/shell/src-tauri/permissions/default.toml @@ -17,6 +17,7 @@ permissions = [ "allow-open-dashboard", "allow-reveal-config-dir", "allow-reveal-logs-dir", + "allow-restart-sidecar", "allow-get-shell-api-key", "allow-subscribe-token-usage", ] @@ -41,6 +42,11 @@ identifier = "allow-reveal-logs-dir" description = "Reveal the maximal logs directory in Finder/Explorer." commands.allow = ["reveal_logs_dir"] +[[permission]] +identifier = "allow-restart-sidecar" +description = "Reboot the proxy sidecar into the on-disk config — the final step of an account switch / sign-in / sign-out, invoked from the Settings webview (a 'remote' loopback origin, so it needs an explicit permission)." +commands.allow = ["restart_sidecar"] + [[permission]] identifier = "allow-get-shell-api-key" description = "Return the per-launch shell API key so the webview can authenticate against the sidecar even when 'Block unknown connections' is on." From a49f05a6ebf0755f1653d5a20b9678df7f1adb4c Mon Sep 17 00:00:00 2001 From: stuffbucket <231133237+stuffbucket@users.noreply.github.com> Date: Sat, 13 Jun 2026 02:58:26 -0700 Subject: [PATCH 4/5] fix(shell): keep the app alive across an intentional sidecar restart MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Once the ACL fix let restart_sidecar actually fire, a second bug surfaced: restarting the proxy quit the whole app. kill_sidecar SIGTERMs the sidecar, which exits CLEANLY (code 0); the Terminated handler reads any clean exit as "user wants the whole app down" (true for Quit / external /_internal/shutdown) and calls handle.exit(0) — so the deliberate account-switch/sign-in/sign-out reboot took the tray + windows down with it before the respawn could land. Add a SidecarRestarting flag: respawn_sidecar sets it before SIGTERM, and the Terminated handler consumes it and keeps the app alive (the replacement is already being spawned) instead of exiting. A genuine Quit / external shutdown leaves the flag unset and still exits as before. cargo check passes. --- shell/src-tauri/src/lib.rs | 40 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/shell/src-tauri/src/lib.rs b/shell/src-tauri/src/lib.rs index fca0ce7..e0a0d16 100644 --- a/shell/src-tauri/src/lib.rs +++ b/shell/src-tauri/src/lib.rs @@ -317,6 +317,30 @@ impl SplashDismissed { } } +/// Set while `respawn_sidecar` is intentionally cycling the sidecar (the +/// account-switch / sign-in / sign-out reboot). The old sidecar exits cleanly +/// from our SIGTERM, which the Terminated handler would otherwise read as a +/// user-initiated quit and bring the WHOLE app down — stranding the tray and +/// killing the reboot before the replacement spawns. The handler consumes this +/// flag and keeps the app alive for the respawn instead. +struct SidecarRestarting(std::sync::atomic::AtomicBool); + +impl SidecarRestarting { + fn new() -> Self { + Self(std::sync::atomic::AtomicBool::new(false)) + } + + /// Mark that the next sidecar exit is an intentional restart, not a quit. + fn begin(&self) { + self.0.store(true, std::sync::atomic::Ordering::SeqCst); + } + + /// Returns true (and clears the flag) if a restart was in progress. + fn consume(&self) -> bool { + self.0.swap(false, std::sync::atomic::Ordering::SeqCst) + } +} + /// Random per-launch key the shell shares with its sidecar so the /// webview can authenticate against /settings/api/* even when the user /// has enabled "Block unknown connections." Injected as an env var when @@ -493,6 +517,7 @@ pub fn run() { .manage(SetupPromptShown::new()) .manage(StartupAnnounced::new()) .manage(SplashDismissed::new()) + .manage(SidecarRestarting::new()) .manage(LastRejection::new()) .manage(LastSidecarError::new()) .invoke_handler(tauri::generate_handler![ @@ -737,6 +762,16 @@ fn spawn_sidecar(app: &AppHandle) -> tauri::Result<()> { } CommandEvent::Terminated(payload) => { eprintln!("[maximal] sidecar exited: {:?}", payload); + // Intentional restart (account switch / sign-in / sign-out + // reboot)? respawn_sidecar set this flag before SIGTERMing + // the old child, and is already spawning the replacement. + // Keep the app alive — DON'T treat this exit as a quit. + if handle.state::().consume() { + eprintln!( + "[maximal] (intentional restart — keeping app alive for the respawn)" + ); + break; + } // Clean exit signals an intentional shutdown — either // we just sent SIGTERM via kill_sidecar (Quit flow), // or an external caller hit /_internal/shutdown @@ -1611,6 +1646,11 @@ fn respawn_sidecar(app: &AppHandle) { app.state::().set(None); dismiss_splash(app); + // Mark this as an intentional restart BEFORE we SIGTERM the child, so the + // old sidecar's clean exit isn't mistaken for a user quit (which would + // bring the whole app down — see the Terminated handler in spawn_sidecar). + app.state::().begin(); + // Reap the current child (healthy, hung, or mid-crash) so the respawn binds // cleanly. No-op if already gone. spawn_sidecar also passes --replace as a // backstop against a not-yet-released port. From 2e5855f77197765881015c2add1c6286f4d8eec4 Mon Sep 17 00:00:00 2001 From: stuffbucket <231133237+stuffbucket@users.noreply.github.com> Date: Sat, 13 Jun 2026 03:08:44 -0700 Subject: [PATCH 5/5] fix(shell): stop the kill_sidecar escalation from SIGKILLing the respawn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit THE reason the proxy stayed dead after a restart (UI shows "Load failed" everywhere because nothing is listening on :4141, while the app window is still up). kill_sidecar SIGTERMs the old sidecar, puts it BACK in the shared Sidecar slot, and spawns a 3s thread that takes whatever is in the slot and SIGKILLs it. But respawn_sidecar calls spawn_sidecar immediately after, which set()s the REPLACEMENT child into that same slot — so ~3s after a restart the stale escalation thread SIGKILLs the freshly-respawned sidecar. New proxy comes up, dies 3s later, every Settings call then fails to load. Fix: move the old child INTO the escalation thread instead of the shared slot, so it SIGKILLs only that specific child and leaves the slot free for the respawn. Dropping a CommandChild does not kill its process, so the slot can sit empty until spawn_sidecar fills it. Completes the reboot chain (with the :4141 baseUrl, the restart_sidecar ACL permission, and the survive-the-restart flag): switch / gh-reuse sign-in / sign-out now actually reboot into the account and stay up. cargo check passes. --- shell/src-tauri/src/lib.rs | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/shell/src-tauri/src/lib.rs b/shell/src-tauri/src/lib.rs index e0a0d16..8aa5170 100644 --- a/shell/src-tauri/src/lib.rs +++ b/shell/src-tauri/src/lib.rs @@ -915,24 +915,19 @@ fn kill_sidecar(app: &AppHandle) { libc::kill(pid, libc::SIGTERM); } - // Stash the child back so the escalation task can decide - // whether it still needs SIGKILL. If the sidecar exits - // cleanly within 3s, the child handle goes out of scope here - // and that's fine — killing an already-exited child is a - // harmless no-op. - app.state::().set(child); - - // Plain OS thread for the 3s grace timer rather than pulling - // a tokio runtime in for this single sleep. AppHandle::clone() - // is Arc-cheap. - let app_handle = app.clone(); + // MOVE the old child into the escalation thread — do NOT put it back + // in the shared Sidecar slot. respawn_sidecar calls spawn_sidecar + // immediately after us, which set()s the REPLACEMENT child into that + // slot; if this escalation re-read the slot it would SIGKILL the FRESH + // sidecar ~3s after a restart (the proxy would vanish from :4141 and + // the whole UI would "Load failed" — the account-switch/sign-in/ + // sign-out reboot bug). Holding the specific old child here SIGKILLs + // only it. Dropping a CommandChild does NOT kill its process, so + // leaving the slot empty until spawn_sidecar fills it is safe. std::thread::spawn(move || { std::thread::sleep(std::time::Duration::from_secs(3)); - if let Some(child) = app_handle.state::().take() { - // Still alive (or at least, still in our slot) after - // the grace period — escalate to SIGKILL. - let _ = child.kill(); - } + // No-op if the SIGTERM above already made it exit. + let _ = child.kill(); }); }