From d934d9c257cf1eb2f6d72cedcba3558142a48af8 Mon Sep 17 00:00:00 2001
From: Marcelo Leal <marcelocardosoleal@gmail.com>
Date: Sat, 18 Apr 2026 18:49:33 -0300
Subject: [PATCH 1/6] fix(swarm): three deploy bugs found during production
 setup

1. Add ANTHROPIC_API_KEY to ALLOWED_VARS in claude-bridge.js
   The env var was silently filtered out, causing Claude Code to fall
   back to OAuth login on every session start instead of using the
   API key configured in the Providers page.

2. Fix orphaned session crash ("Session already exists")
   When a Claude process died without firing the PTY onExit event,
   the session remained in the bridge's in-memory Map as inactive.
   The next start attempt threw "already exists". Now detects dead
   sessions, cleans them up, and restarts normally.

3. Exclude dashboard/data/ and workspace/ from Docker build context
   Without these entries in .dockerignore, the local SQLite database
   (with hashed passwords) and workspace files were baked into the
   image. On first Swarm deploy, the volume was seeded from the image,
   making login impossible with any other credentials.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .dockerignore                                  |  2 ++
 dashboard/terminal-server/src/claude-bridge.js | 11 ++++++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/.dockerignore b/.dockerignore
index a4adbae1..f1c74a2b 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -3,6 +3,8 @@
 .env
 __pycache__/
 *.pyc
+dashboard/data/
+workspace/
 ADWs/logs/
 ADWs/__pycache__/
 .claude/agent-memory/
diff --git a/dashboard/terminal-server/src/claude-bridge.js b/dashboard/terminal-server/src/claude-bridge.js
index 8260f0b3..34c70aa6 100644
--- a/dashboard/terminal-server/src/claude-bridge.js
+++ b/dashboard/terminal-server/src/claude-bridge.js
@@ -15,6 +15,7 @@ class ClaudeBridge {
   _loadProviderConfig() {
     const ALLOWED_CLI = new Set(['claude', 'openclaude']);
     const ALLOWED_VARS = new Set([
+      'ANTHROPIC_API_KEY',
       'CLAUDE_CODE_USE_OPENAI', 'CLAUDE_CODE_USE_GEMINI',
       'CLAUDE_CODE_USE_BEDROCK', 'CLAUDE_CODE_USE_VERTEX',
       'OPENAI_BASE_URL', 'OPENAI_API_KEY', 'OPENAI_MODEL',
@@ -138,7 +139,15 @@ class ClaudeBridge {
 
   async startSession(sessionId, options = {}) {
     if (this.sessions.has(sessionId)) {
-      throw new Error(`Session ${sessionId} already exists`);
+      const existing = this.sessions.get(sessionId);
+      if (existing.active) {
+        throw new Error(`Session ${sessionId} already exists`);
+      }
+      // Orphaned dead session — clean up and restart
+      if (existing.process) {
+        try { existing.process.kill('SIGKILL'); } catch (_) {}
+      }
+      this.sessions.delete(sessionId);
     }
 
     const {

From 7da1ca1fe0b28bbbce24d02c629120068159fcc7 Mon Sep 17 00:00:00 2001
From: Marcelo Leal <marcelocardosoleal@gmail.com>
Date: Sat, 18 Apr 2026 19:28:20 -0300
Subject: [PATCH 2/6] fix(swarm): add claude-auth volume and fix docker-compose
 for dashboard
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add evonexus_claude_auth:/root/.claude to all three Swarm services
  (dashboard, telegram, scheduler) so Claude Code OAuth tokens persist
  across redeploys — avoids re-authentication on every deploy
- docker-compose.yml: use Dockerfile.swarm.dashboard, expose terminal
  port 32352, add claude-auth volume, fix config mount (remove :ro so
  providers.json can be written by the UI)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 docker-compose.yml           |  13 +--
 evonexus.portainer.stack.yml | 166 +++++++++++++++++++++++++++++++++++
 2 files changed, 174 insertions(+), 5 deletions(-)
 create mode 100644 evonexus.portainer.stack.yml

diff --git a/docker-compose.yml b/docker-compose.yml
index 1b01017c..7050e9aa 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -3,17 +3,19 @@ services:
   dashboard:
     build:
       context: .
-      dockerfile: Dockerfile.dashboard
+      dockerfile: Dockerfile.swarm.dashboard
     container_name: evonexus-dashboard
     ports:
-      - "${EVONEXUS_PORT:-8080}:8080"
+      - "8081:8080"
+      - "32352:32352"
     env_file: .env
     environment:
       - TZ=America/Sao_Paulo
       - EVONEXUS_PORT=8080
+      - TERMINAL_SERVER_PORT=32352
     volumes:
-      - ./.env:/workspace/.env:ro
-      - ./config:/workspace/config:ro
+      - claude-auth:/root/.claude
+      - ./config:/workspace/config
       - ./workspace:/workspace/workspace
       - ./dashboard/data:/workspace/dashboard/data
       - ./.claude/agents:/workspace/.claude/agents:ro
@@ -21,7 +23,7 @@ services:
       - ./.claude/commands:/workspace/.claude/commands:ro
       - ./.claude/templates:/workspace/.claude/templates:ro
       - ./memory:/workspace/memory:ro
-      - ./ADWs/logs:/workspace/ADWs/logs:ro
+      - ./ADWs/logs:/workspace/ADWs/logs
     restart: unless-stopped
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:8080/api/version"]
@@ -79,6 +81,7 @@ services:
       - manual
 
 volumes:
+  claude-auth:
   daily-logs:
   projects:
   community:
diff --git a/evonexus.portainer.stack.yml b/evonexus.portainer.stack.yml
new file mode 100644
index 00000000..fd641579
--- /dev/null
+++ b/evonexus.portainer.stack.yml
@@ -0,0 +1,166 @@
+version: "3.8"
+
+# ============================================================
+# EvoNexus - Stack de Produção (Docker Swarm / Portainer)
+#
+# Baseado na documentação oficial do projeto:
+# - README.swarm.md
+# - evonexus.stack.yml
+#
+# Ajustado para este servidor:
+# - Domínio: evonexus.advancedbot.com.br
+# - Rede Traefik: network_public
+# - Entrypoint Traefik: websecure
+# - Certresolver: letsencryptresolver
+#
+# Antes do deploy:
+# 1. Garanta que a rede `network_public` já exista no Swarm
+# 2. As imagens publicadas no Docker Hub usam a tag latest:
+#    - marcelolealhub/evo-nexus-dashboard:latest
+#    - marcelolealhub/evo-nexus-runtime:latest
+# ============================================================
+
+services:
+
+  evonexus_dashboard:
+    image: marcelolealhub/evo-nexus-dashboard:latest
+
+    volumes:
+      - evonexus_config:/workspace/config
+      - evonexus_workspace:/workspace/workspace
+      - evonexus_dashboard_data:/workspace/dashboard/data
+      - evonexus_memory:/workspace/memory
+      - evonexus_adw_logs:/workspace/ADWs/logs
+      - evonexus_agent_memory:/workspace/.claude/agent-memory
+      - evonexus_claude_auth:/root/.claude
+      - evonexus_codex_auth:/root/.codex
+
+    networks:
+      - network_public
+
+    environment:
+      - TZ=America/Sao_Paulo
+      - EVONEXUS_PORT=8080
+      - TERMINAL_SERVER_PORT=32352
+      - FORWARDED_ALLOW_IPS=*
+
+    deploy:
+      mode: replicated
+      replicas: 1
+      restart_policy:
+        condition: on-failure
+      placement:
+        constraints:
+          - node.role == manager
+      resources:
+        limits:
+          cpus: "1"
+          memory: 1024M
+      labels:
+        - traefik.enable=true
+        - traefik.docker.network=network_public
+
+        - traefik.http.routers.evonexus.rule=Host(`evonexus.advancedbot.com.br`)
+        - traefik.http.routers.evonexus.entrypoints=websecure
+        - traefik.http.routers.evonexus.priority=1
+        - traefik.http.routers.evonexus.tls.certresolver=letsencryptresolver
+        - traefik.http.routers.evonexus.service=evonexus
+        - traefik.http.services.evonexus.loadbalancer.server.port=8080
+        - traefik.http.services.evonexus.loadbalancer.passHostHeader=true
+
+        - traefik.http.routers.evonexus-terminal.rule=Host(`evonexus.advancedbot.com.br`) && PathPrefix(`/terminal`)
+        - traefik.http.routers.evonexus-terminal.entrypoints=websecure
+        - traefik.http.routers.evonexus-terminal.priority=10
+        - traefik.http.routers.evonexus-terminal.tls.certresolver=letsencryptresolver
+        - traefik.http.routers.evonexus-terminal.service=evonexus-terminal
+        - traefik.http.routers.evonexus-terminal.middlewares=evonexus-terminal-strip
+        - traefik.http.middlewares.evonexus-terminal-strip.stripprefix.prefixes=/terminal
+        - traefik.http.services.evonexus-terminal.loadbalancer.server.port=32352
+        - traefik.http.services.evonexus-terminal.loadbalancer.passHostHeader=true
+
+  evonexus_telegram:
+    image: marcelolealhub/evo-nexus-runtime:latest
+    command:
+      - "claude"
+      - "--channels"
+      - "plugin:telegram@claude-plugins-official"
+      - "--dangerously-skip-permissions"
+
+    volumes:
+      - evonexus_config:/workspace/config
+      - evonexus_workspace:/workspace/workspace
+      - evonexus_memory:/workspace/memory
+      - evonexus_adw_logs:/workspace/ADWs/logs
+      - evonexus_agent_memory:/workspace/.claude/agent-memory
+      - evonexus_claude_auth:/root/.claude
+      - evonexus_codex_auth:/root/.codex
+
+    networks:
+      - network_public
+
+    environment:
+      - TZ=America/Sao_Paulo
+      - REQUIRE_ANTHROPIC_KEY=1
+
+    stdin_open: true
+    tty: true
+
+    deploy:
+      mode: replicated
+      replicas: 1
+      restart_policy:
+        condition: on-failure
+      placement:
+        constraints:
+          - node.role == manager
+      resources:
+        limits:
+          cpus: "1"
+          memory: 1024M
+
+  evonexus_scheduler:
+    image: marcelolealhub/evo-nexus-runtime:latest
+    command: ["uv", "run", "python", "scheduler.py"]
+
+    volumes:
+      - evonexus_config:/workspace/config
+      - evonexus_workspace:/workspace/workspace
+      - evonexus_memory:/workspace/memory
+      - evonexus_adw_logs:/workspace/ADWs/logs
+      - evonexus_agent_memory:/workspace/.claude/agent-memory
+      - evonexus_claude_auth:/root/.claude
+      - evonexus_codex_auth:/root/.codex
+
+    networks:
+      - network_public
+
+    environment:
+      - TZ=America/Sao_Paulo
+      - REQUIRE_ANTHROPIC_KEY=1
+
+    deploy:
+      mode: replicated
+      replicas: 1
+      restart_policy:
+        condition: on-failure
+      placement:
+        constraints:
+          - node.role == manager
+      resources:
+        limits:
+          cpus: "1"
+          memory: 1024M
+
+volumes:
+  evonexus_config:
+  evonexus_workspace:
+  evonexus_dashboard_data:
+  evonexus_memory:
+  evonexus_adw_logs:
+  evonexus_agent_memory:
+  evonexus_claude_auth:
+  evonexus_codex_auth:
+
+networks:
+  network_public:
+    external: true

From 61bd3a4c83164a227fafa99bcfe1b3278e5da88c Mon Sep 17 00:00:00 2001
From: Marcelo Leal <marcelocardosoleal@gmail.com>
Date: Sat, 18 Apr 2026 19:33:06 -0300
Subject: [PATCH 3/6] fix(swarm): add claude_auth volume to official stack
 template

Add evonexus_claude_auth:/root/.claude to all three services in
evonexus.stack.yml so Claude Code OAuth tokens persist across redeploys.
Same fix applied to evonexus.portainer.stack.yml in the previous commit.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 evonexus.stack.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/evonexus.stack.yml b/evonexus.stack.yml
index a329183b..ef0401c4 100644
--- a/evonexus.stack.yml
+++ b/evonexus.stack.yml
@@ -32,6 +32,7 @@ services:
       - evonexus_memory:/workspace/memory
       - evonexus_adw_logs:/workspace/ADWs/logs
       - evonexus_agent_memory:/workspace/.claude/agent-memory
+      - evonexus_claude_auth:/root/.claude
       - evonexus_codex_auth:/root/.codex
 
     networks:
@@ -96,6 +97,7 @@ services:
       - evonexus_memory:/workspace/memory
       - evonexus_adw_logs:/workspace/ADWs/logs
       - evonexus_agent_memory:/workspace/.claude/agent-memory
+      - evonexus_claude_auth:/root/.claude
       - evonexus_codex_auth:/root/.codex
 
     networks:
@@ -155,6 +157,7 @@ volumes:
   evonexus_memory:
   evonexus_adw_logs:
   evonexus_agent_memory:
+  evonexus_claude_auth:
   evonexus_codex_auth:
 
 networks:

From 1c858e6ece57ed82a19f255ed6387c072ec54afa Mon Sep 17 00:00:00 2001
From: Marcelo Leal <marcelocardosoleal@gmail.com>
Date: Sat, 18 Apr 2026 20:09:05 -0300
Subject: [PATCH 4/6] fix(dashboard): three more bugs found in production after
 F1.4 redeploy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bug 1 — Theme picker on every agent
Each agent runs in its own working directory, which Claude Code treats
as a separate project. Without a global theme set, the user is asked to
choose a theme on every single agent terminal. Pre-seed
/root/.claude/settings.json with theme + onboarding flags during
container startup so the first-run prompts are skipped. Only writes the
file if it doesn't exist (preserves user-chosen overrides).

Bug 2 — "Session already exists" error toast
The previous fix only cleaned up *inactive* orphans. The actual production
trigger is different: when a WebSocket reconnects through Traefik, the
frontend can re-send start_claude before learning the session is still
alive. The bridge's startSession then threw on a duplicate active session.
Make startSession idempotent: if the session is already active, return
the existing entry instead of throwing.

Bug 3 — Misleading error on duplicate start
Server.startClaude() responded with type:'error' "An agent is already
running" when the session was active. From the user's perspective this
looked like a failure even though everything was working. Send
type:'claude_started' instead so the frontend updates UI to "running"
and replays the buffer.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../terminal-server/src/claude-bridge.js      |  9 ++++++++-
 dashboard/terminal-server/src/server.js       |  6 +++++-
 start-dashboard.sh                            | 20 +++++++++++++++++++
 3 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/dashboard/terminal-server/src/claude-bridge.js b/dashboard/terminal-server/src/claude-bridge.js
index 34c70aa6..16079126 100644
--- a/dashboard/terminal-server/src/claude-bridge.js
+++ b/dashboard/terminal-server/src/claude-bridge.js
@@ -141,7 +141,14 @@ class ClaudeBridge {
     if (this.sessions.has(sessionId)) {
       const existing = this.sessions.get(sessionId);
       if (existing.active) {
-        throw new Error(`Session ${sessionId} already exists`);
+        // Idempotent: a duplicate startSession can arrive when the WebSocket
+        // reconnects through a reverse proxy (Traefik) and the frontend
+        // re-sends start_claude before learning the session is still alive.
+        // Returning the existing session instead of throwing prevents a
+        // confusing "Session already exists" toast on the user's terminal
+        // while keeping the original PTY intact.
+        console.log(`[bridge] startSession(${sessionId}) — already active, returning existing session`);
+        return existing;
       }
       // Orphaned dead session — clean up and restart
       if (existing.process) {
diff --git a/dashboard/terminal-server/src/server.js b/dashboard/terminal-server/src/server.js
index c3482e47..69a21846 100644
--- a/dashboard/terminal-server/src/server.js
+++ b/dashboard/terminal-server/src/server.js
@@ -778,7 +778,11 @@ class TerminalServer {
     if (!session) return;
 
     if (session.active) {
-      this.sendToWebSocket(wsInfo.ws, { type: 'error', message: 'An agent is already running in this session' });
+      // Frontend may re-send start_claude on WebSocket reconnect (common
+      // through reverse proxies like Traefik). The session is already
+      // running — replay the buffer and tell the client it's attached
+      // instead of surfacing a misleading error toast.
+      this.sendToWebSocket(wsInfo.ws, { type: 'claude_started', sessionId: wsInfo.claudeSessionId });
       return;
     }
 
diff --git a/start-dashboard.sh b/start-dashboard.sh
index 21f52614..a78bfc63 100755
--- a/start-dashboard.sh
+++ b/start-dashboard.sh
@@ -22,6 +22,26 @@ FLASK_PORT="${EVONEXUS_PORT:-8080}"
 
 echo "[start-dashboard] terminal-server on :${TERMINAL_PORT}, Flask on :${FLASK_PORT}"
 
+# ----------------------------------------------------------------------------
+# Pre-seed Claude Code global settings so the first-run theme/onboarding
+# prompts are skipped on every new agent terminal. Each agent runs in its
+# own working directory, which Claude Code treats as a separate project —
+# without this, the user has to pick a theme on every single agent.
+# Only writes the file if it doesn't already exist (preserves user choices).
+# ----------------------------------------------------------------------------
+mkdir -p /root/.claude
+if [ ! -f /root/.claude/settings.json ]; then
+    echo "[start-dashboard] seeding /root/.claude/settings.json with default theme"
+    cat > /root/.claude/settings.json <<'EOF'
+{
+  "theme": "dark",
+  "hasCompletedOnboarding": true,
+  "hasSeenWelcome": true,
+  "telemetry": false
+}
+EOF
+fi
+
 # Start terminal-server in the background
 node /workspace/dashboard/terminal-server/bin/server.js --port "${TERMINAL_PORT}" &
 TERMINAL_PID=$!

From bc06c5ee6c59ab96c2b8be3ab2075a83e7a35858 Mon Sep 17 00:00:00 2001
From: Marcelo Leal <marcelocardosoleal@gmail.com>
Date: Sat, 18 Apr 2026 22:19:22 -0300
Subject: [PATCH 5/6] fix(swarm): restore /root/.claude.json from backup on
 container start
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Claude Code stores its main config at /root/.claude.json — a SIBLING
of the /root/.claude/ directory, not inside it. The Swarm volume
mounts /root/.claude/ only, so .claude.json sits in the container's
writable layer and is wiped on every redeploy. Result: theme picker
and onboarding reappear on every release, even though the OAuth
tokens (in /root/.claude/) survive.

Claude Code itself writes timestamped backups into
/root/.claude/backups/ (which IS in the volume), so we just need to
restore the latest one on startup when the main file is missing. If
no backup exists either, seed a minimal config so first-run prompts
are skipped.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 start-dashboard.sh | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/start-dashboard.sh b/start-dashboard.sh
index a78bfc63..b56848bf 100755
--- a/start-dashboard.sh
+++ b/start-dashboard.sh
@@ -42,6 +42,39 @@ if [ ! -f /root/.claude/settings.json ]; then
 EOF
 fi
 
+# ----------------------------------------------------------------------------
+# Restore /root/.claude.json from the most recent backup when missing.
+#
+# Claude Code's main config (theme, OAuth tokens, per-project state) lives
+# at /root/.claude.json — a SIBLING of the /root/.claude/ directory, NOT
+# inside it. The Swarm volume mounts /root/.claude/, so /root/.claude.json
+# sits in the container's writable layer and is wiped on every redeploy.
+# Result: theme picker and onboarding reappear on every release.
+#
+# Claude Code itself writes timestamped backups into /root/.claude/backups/
+# (which IS in the volume). We just need to restore the latest on startup
+# if the main file is missing. If no backup exists either, seed a minimal
+# config so the first-run prompts are skipped.
+# ----------------------------------------------------------------------------
+if [ ! -f /root/.claude.json ]; then
+    latest_backup=$(ls -t /root/.claude/backups/.claude.json.backup.* 2>/dev/null | head -n1 || true)
+    if [ -n "${latest_backup:-}" ] && [ -f "${latest_backup}" ]; then
+        echo "[start-dashboard] restoring /root/.claude.json from ${latest_backup}"
+        cp "${latest_backup}" /root/.claude.json
+    else
+        echo "[start-dashboard] seeding minimal /root/.claude.json (no backup found)"
+        cat > /root/.claude.json <<'EOF'
+{
+  "theme": "dark",
+  "hasCompletedOnboarding": true,
+  "hasSeenWelcome": true,
+  "bypassPermissionsModeAccepted": true,
+  "telemetry": false
+}
+EOF
+    fi
+fi
+
 # Start terminal-server in the background
 node /workspace/dashboard/terminal-server/bin/server.js --port "${TERMINAL_PORT}" &
 TERMINAL_PID=$!

From ff1b7709d93c175acd4cbb044ae89446c88dde66 Mon Sep 17 00:00:00 2001
From: Marcelo Leal <marcelocardosoleal@gmail.com>
Date: Sat, 18 Apr 2026 23:21:11 -0300
Subject: [PATCH 6/6] fix(dashboard): copy .claude/ and docs/ into image
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Dockerfile only copied dashboard/backend/, social-auth/, scheduler.py
and the built frontend. .claude/ (agents, skills, commands, templates,
rules) and docs/ were never copied, so on a fresh deploy the backend's
WORKSPACE / ".claude" / "agents" path was empty. Result: /api/agents,
/api/skills, /api/commands and /api/templates all returned empty lists,
and the UI showed "No agents found — Add agent files to .claude/agents/
to get started" on every clean Swarm deploy.

Local development worked because uv runs the backend with cwd at the
repo root, where .claude/ and docs/ exist.

.claude/agent-memory and .claude/.env stay excluded by .dockerignore so
user data and secrets remain out of the image.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 Dockerfile.dashboard | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/Dockerfile.dashboard b/Dockerfile.dashboard
index e817bf07..523f16d0 100644
--- a/Dockerfile.dashboard
+++ b/Dockerfile.dashboard
@@ -40,6 +40,14 @@ COPY dashboard/backend/ dashboard/backend/
 COPY social-auth/ social-auth/
 COPY scheduler.py ./
 
+# Copy workspace assets the backend reads at runtime.
+# Without these, /api/agents, /api/skills, /api/commands etc. all return empty
+# and the UI shows "No agents found" / "No skills found" on a fresh deploy.
+# .claude/agent-memory and .claude/.env are excluded by .dockerignore so user
+# data and secrets stay out of the image.
+COPY .claude/ .claude/
+COPY docs/ docs/
+
 # Copy built frontend from stage 1
 COPY --from=frontend-build /frontend/dist dashboard/frontend/dist