Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion services/gastown/container/plugin/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,17 @@ export const GastownPlugin: Plugin = async ({ client }) => {
// Best-effort logging — never let telemetry failures break tool execution
async function log(level: 'info' | 'error', message: string) {
console.log(`${SERVICE} ${level}: ${message}`);
const townId = process.env.GASTOWN_TOWN_ID;

try {
await client.app.log({ body: { service: SERVICE, level, message } });
await client.app.log({
body: {
service: SERVICE,
level,
message,
...(townId ? { extra: { townId } } : {}),
},
});
} catch {
// Swallow — logging is non-critical
}
Expand Down
1 change: 1 addition & 0 deletions services/gastown/container/src/agent-runner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -580,6 +580,7 @@ export async function runAgent(originalRequest: StartAgentRequest): Promise<Mana
await verifyGitCredentials(workdir, request.gitUrl, envVars);

log.info('agent.startup_phase', {
townId: request.townId,
agentId: request.agentId,
phase: 'git_done',
elapsedMs: Date.now() - t0,
Expand Down
4 changes: 4 additions & 0 deletions services/gastown/container/src/control-server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ app.post('/refresh-token', async c => {

const activeAgents = listAgents().filter(a => a.status === 'running' || a.status === 'starting');
log.info('refresh_token.received', {
townId: process.env.GASTOWN_TOWN_ID ?? null,
agentCount: activeAgents.length,
agentIds: activeAgents.map(a => a.agentId),
});
Expand All @@ -299,6 +300,7 @@ app.post('/refresh-token', async c => {
const results = await refreshTokenForAllAgents();
const successCount = results.filter(r => r.success).length;
log.info('refresh_token.completed', {
townId: process.env.GASTOWN_TOWN_ID ?? null,
agentCount: results.length,
successCount,
failureCount: results.length - successCount,
Expand Down Expand Up @@ -829,6 +831,7 @@ app.post('/agents/:agentId/pty', async c => {
const reuseAgent = getAgentStatus(agentId);
if (reuseAgent) {
log.info('agent.pty_connected', {
townId: reuseAgent.townId,
agentId,
containerUptimeMs: getUptime(),
agentUptimeMs: Date.now() - new Date(reuseAgent.startedAt).getTime(),
Expand Down Expand Up @@ -889,6 +892,7 @@ app.post('/agents/:agentId/pty', async c => {
);
if (createResp.ok) {
log.info('agent.pty_connected', {
townId: agent.townId,
agentId,
containerUptimeMs: getUptime(),
agentUptimeMs: Date.now() - new Date(agent.startedAt).getTime(),
Expand Down
1 change: 1 addition & 0 deletions services/gastown/container/src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ setInterval(() => {
});
} catch (err) {
log.warn('container.memory_usage_failed', {
townId: townIdForLogs(),
error: err instanceof Error ? err.message : String(err),
});
}
Expand Down
33 changes: 32 additions & 1 deletion services/gastown/container/src/process-manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ const IDLE_TIMER_IGNORE_EVENTS = new Set([

let nextPort = 4096;
const startTime = Date.now();
const TOWN_ID = process.env.GASTOWN_TOWN_ID ?? null;

// Set to true when drainAll() starts — prevents new agent starts and
// lets the drain loop nudge agents that transition to running mid-drain.
Expand Down Expand Up @@ -163,6 +164,7 @@ function markMayorReadyOnce(): void {
if (mayorReadyAt !== null) return;
mayorReadyAt = new Date().toISOString();
log.info('mayor.ready', {
townId: TOWN_ID,
containerUptimeMs: getUptime(),
mayorReadyAt,
});
Expand Down Expand Up @@ -405,6 +407,7 @@ async function saveDbSnapshot(
);
log.error('mayor.snapshot_failed', {
event: 'mayor.snapshot_failed',
townId,
agentId,
role,
durationMs: Date.now() - t0,
Expand All @@ -430,6 +433,7 @@ async function saveDbSnapshot(
console.warn(`${MANAGER_LOG} Failed to save DB snapshot for ${agentId}: ${resp.status}`);
log.error('mayor.snapshot_failed', {
event: 'mayor.snapshot_failed',
townId,
agentId,
role,
durationMs: Date.now() - t0,
Expand All @@ -444,6 +448,7 @@ async function saveDbSnapshot(
);
log.info('mayor.snapshot_saved', {
event: 'mayor.snapshot_saved',
townId,
agentId,
role,
durationMs: Date.now() - t0,
Expand All @@ -458,6 +463,7 @@ async function saveDbSnapshot(
console.warn(`${MANAGER_LOG} DB snapshot save failed for agent ${agentId}:`, err);
log.error('mayor.snapshot_failed', {
event: 'mayor.snapshot_failed',
townId,
agentId,
role,
durationMs: Date.now() - t0,
Expand Down Expand Up @@ -973,6 +979,7 @@ async function subscribeToEvents(
const exitAgent = () => {
if (agent.status !== 'running') return;
log.info('agent.exit', {
townId: agent.townId,
agentId: agent.agentId,
name: agent.name,
reason: 'completed',
Expand Down Expand Up @@ -1073,6 +1080,7 @@ async function subscribeToEvents(
} catch (err) {
if (!controller.signal.aborted) {
log.error('agent.stream_error', {
townId: agent.townId,
agentId: agent.agentId,
error: err instanceof Error ? err.message : String(err),
});
Expand Down Expand Up @@ -1197,6 +1205,7 @@ async function startAgentImpl(
}
const tDbDone = Date.now();
log.info('agent.startup_phase', {
townId: request.townId,
agentId: request.agentId,
phase: 'db_hydrated',
elapsedMs: tDbDone - t0,
Expand All @@ -1214,6 +1223,7 @@ async function startAgentImpl(
agent.serverPort = port;
const tSdkDone = Date.now();
log.info('agent.startup_phase', {
townId: request.townId,
agentId: request.agentId,
phase: 'sdk_ready',
elapsedMs: tSdkDone - t0,
Expand Down Expand Up @@ -1276,6 +1286,7 @@ async function startAgentImpl(
agent.sessionId = sessionId;
const tSessionDone = Date.now();
log.info('agent.startup_phase', {
townId: request.townId,
agentId: request.agentId,
phase: 'session_created',
elapsedMs: tSessionDone - t0,
Expand Down Expand Up @@ -1352,6 +1363,7 @@ async function startAgentImpl(
agent.messageCount = 1;

log.info('agent.start', {
townId: request.townId,
agentId: request.agentId,
role: request.role,
name: request.name,
Expand All @@ -1360,6 +1372,7 @@ async function startAgentImpl(
});

log.info('agent.startup_complete', {
townId: request.townId,
agentId: request.agentId,
totalMs: Date.now() - t0,
containerUptimeMs: getUptime(),
Expand Down Expand Up @@ -1462,14 +1475,20 @@ export async function stopAgent(agentId: string): Promise<void> {
}
} catch (err) {
log.warn('agent.stop_failed', {
townId: agent.townId,
agentId,
error: err instanceof Error ? err.message : String(err),
});
}

agent.status = 'exited';
agent.exitReason = 'stopped';
log.info('agent.exit', { agentId, reason: 'stopped', exitReason: 'stopped' });
log.info('agent.exit', {
townId: agent.townId,
agentId,
reason: 'stopped',
exitReason: 'stopped',
});
broadcastEvent(agentId, 'agent.exited', { reason: 'stopped' });
syncRegistry();

Expand Down Expand Up @@ -1504,6 +1523,7 @@ export async function sendMessage(agentId: string, prompt: string): Promise<void
});
} catch (err) {
log.error('agent.send_failed', {
townId: agent.townId,
agentId,
error: err instanceof Error ? err.message : String(err),
});
Expand Down Expand Up @@ -1793,6 +1813,7 @@ export async function refreshTokenForAllAgents(): Promise<
}
} catch (err) {
log.warn('refresh_token.session_list_failed', {
townId: agent.townId,
agentId: agent.agentId,
error: err instanceof Error ? err.message : String(err),
});
Expand All @@ -1810,6 +1831,7 @@ export async function refreshTokenForAllAgents(): Promise<
oldInstance.server.close();
} catch (err) {
log.warn('refresh_token.old_server_close_failed', {
townId: agent.townId,
agentId: agent.agentId,
error: err instanceof Error ? err.message : String(err),
});
Expand All @@ -1834,6 +1856,7 @@ export async function refreshTokenForAllAgents(): Promise<

const durationMs = Date.now() - t0;
log.info('refresh_token.agent_restarted', {
townId: agent.townId,
agentId: agent.agentId,
role: agent.role,
name: agent.name,
Expand Down Expand Up @@ -1880,6 +1903,7 @@ export async function refreshTokenForAllAgents(): Promise<
current.server.close();
} catch (closeErr) {
log.warn('refresh_token.fresh_close_failed', {
townId: agent.townId,
agentId: agent.agentId,
error: closeErr instanceof Error ? closeErr.message : String(closeErr),
});
Expand All @@ -1890,6 +1914,7 @@ export async function refreshTokenForAllAgents(): Promise<
agent.sessionId = oldSessionId;
}
log.warn('refresh_token.fresh_rolled_back', {
townId: agent.townId,
agentId: agent.agentId,
oldPort,
error: message,
Expand All @@ -1913,6 +1938,7 @@ export async function refreshTokenForAllAgents(): Promise<
if (pendingEnsure) {
const reapWorkdir = agent.workdir;
const reapAgentId = agent.agentId;
const reapTownId = agent.townId;
const reapOldInstance = oldInstance;
pendingEnsure.then(
({ port: orphanPort }) => {
Expand All @@ -1926,11 +1952,13 @@ export async function refreshTokenForAllAgents(): Promise<
orphan.server.close();
} catch (closeErr) {
log.warn('refresh_token.orphan_close_failed', {
townId: reapTownId,
agentId: reapAgentId,
error: closeErr instanceof Error ? closeErr.message : String(closeErr),
});
}
log.warn('refresh_token.orphan_reaped', {
townId: reapTownId,
agentId: reapAgentId,
orphanPort,
});
Expand All @@ -1942,6 +1970,7 @@ export async function refreshTokenForAllAgents(): Promise<
);
}
log.error('refresh_token.agent_restarted', {
townId: agent.townId,
agentId: agent.agentId,
role: agent.role,
name: agent.name,
Expand Down Expand Up @@ -2537,6 +2566,7 @@ export async function drainAll(): Promise<void> {
console.error(`${DRAIN_LOG} snapshot timeout/failure for ${agent.agentId}:`, err);
log.error('mayor.snapshot_failed', {
event: 'mayor.snapshot_failed',
townId: agent.townId,
agentId: agent.agentId,
role: agent.role,
error: err instanceof Error ? err.message : String(err),
Expand Down Expand Up @@ -2613,6 +2643,7 @@ export async function stopAll(): Promise<void> {
console.error(`[stop-all] snapshot timeout/failure for ${agent.agentId}:`, err);
log.error('mayor.snapshot_failed', {
event: 'mayor.snapshot_failed',
townId: agent.townId,
agentId: agent.agentId,
role: agent.role,
error: err instanceof Error ? err.message : String(err),
Expand Down
8 changes: 6 additions & 2 deletions services/gastown/container/src/token-refresh.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ export async function fetchFreshContainerToken(): Promise<string | null> {

if (!apiUrl || !townId || !currentToken) {
log.warn('token_refresh.skipped_missing_env', {
townId: townId ?? null,
hasApiUrl: !!apiUrl,
hasTownId: !!townId,
hasCurrentToken: !!currentToken,
Expand All @@ -75,6 +76,7 @@ export async function fetchFreshContainerToken(): Promise<string | null> {
if (!resp.ok) {
const text = await resp.text().catch(() => '');
log.warn('token_refresh.fetch_failed', {
townId,
status: resp.status,
durationMs: Date.now() - t0,
body: text.slice(0, 200),
Expand All @@ -87,14 +89,15 @@ export async function fetchFreshContainerToken(): Promise<string | null> {
? (body as { data?: { token?: unknown } }).data?.token
: undefined;
if (typeof token !== 'string' || token.length === 0) {
log.warn('token_refresh.invalid_response', { durationMs: Date.now() - t0 });
log.warn('token_refresh.invalid_response', { townId, durationMs: Date.now() - t0 });
return null;
}
process.env.GASTOWN_CONTAINER_TOKEN = token;
log.info('token_refresh.succeeded', { durationMs: Date.now() - t0 });
log.info('token_refresh.succeeded', { townId, durationMs: Date.now() - t0 });
return token;
} catch (err) {
log.warn('token_refresh.network_error', {
townId,
error: err instanceof Error ? err.message : String(err),
durationMs: Date.now() - t0,
});
Expand Down Expand Up @@ -124,6 +127,7 @@ export async function refreshTokenIfNearExpiry(thresholdMs = 30 * 60_000): Promi
return;
}
log.info('token_refresh.boot_near_expiry', {
townId: process.env.GASTOWN_TOWN_ID ?? null,
msUntilExpiry: msLeft,
thresholdMs,
});
Expand Down
3 changes: 2 additions & 1 deletion services/gastown/src/dos/Town.do.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3515,6 +3515,7 @@ export class TownDO extends DurableObject<Env> {
)
.catch(err =>
console.warn(`${TOWN_LOG} slingConvoy: createConvoyBranch failed (non-fatal)`, {
townId: this.townId,
error: err instanceof Error ? err.message : String(err),
})
);
Expand Down Expand Up @@ -3997,7 +3998,7 @@ export class TownDO extends DurableObject<Env> {
// ══════════════════════════════════════════════════════════════════

async alarm(): Promise<void> {
return withLogTags({ source: 'Town.do' }, async () => {
return withLogTags({ source: 'Town.do', tags: { townId: this.townId } }, async () => {
await this._alarm();
});
}
Expand Down
2 changes: 2 additions & 0 deletions services/gastown/src/dos/town/container-idle-stop.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ export async function stopContainerIfIdle(deps: IdleStopDeps): Promise<void> {
state = await stub.getState();
} catch (err) {
logger.warn('stopContainerIfIdle: getState() failed', {
townId,
error: err instanceof Error ? err.message : String(err),
});
return;
Expand All @@ -73,6 +74,7 @@ export async function stopContainerIfIdle(deps: IdleStopDeps): Promise<void> {
deps.writeEventFn({ event: 'container.idle_stop', townId, reason });
} catch (err) {
logger.warn('stopContainerIfIdle: stop() failed', {
townId,
error: err instanceof Error ? err.message : String(err),
});
deps.writeEventFn({
Expand Down
Loading