Skip to content

Commit 4c7f697

Browse files
committed
fix: SSH connection pool concurrency and behavioral issues
- Remove srcBaseDir from connection key: workspaces on same host now share health tracking and control socket multiplexing - Fix double markFailedByKey on timeout: add timedOut flag to prevent both timeout callback and on('close') from incrementing failures - Add HEALTHY_TTL_MS (5 min): stale healthy connections get re-probed when network may have silently degraded - Fix singleflighting test: actually test concurrent probes share one failure count instead of pre-marking healthy
1 parent 73eee61 commit 4c7f697

File tree

2 files changed

+36
-17
lines changed

2 files changed

+36
-17
lines changed

src/node/runtime/sshConnectionPool.test.ts

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,9 @@ describe("sshConnectionPool", () => {
5858
expect(getControlPath(config1)).not.toBe(getControlPath(config2));
5959
});
6060

61-
test("different srcBaseDirs produce different controlPaths", () => {
61+
test("different srcBaseDirs produce same controlPaths (connection shared)", () => {
62+
// srcBaseDir is intentionally excluded from connection key -
63+
// workspaces on the same host share health tracking and multiplexing
6264
const config1: SSHRuntimeConfig = {
6365
host: "test.com",
6466
srcBaseDir: "/work1",
@@ -68,7 +70,7 @@ describe("sshConnectionPool", () => {
6870
srcBaseDir: "/work2",
6971
};
7072

71-
expect(getControlPath(config1)).not.toBe(getControlPath(config2));
73+
expect(getControlPath(config1)).toBe(getControlPath(config2));
7274
});
7375

7476
test("controlPath is in tmpdir with expected format", () => {
@@ -262,22 +264,22 @@ describe("SSHConnectionPool", () => {
262264
test("concurrent acquireConnection calls share same probe", async () => {
263265
const pool = new SSHConnectionPool();
264266
const config: SSHRuntimeConfig = {
265-
host: "test.example.com",
267+
host: "nonexistent.invalid.host.test",
266268
srcBaseDir: "/work",
267269
};
268270

269-
// Mark healthy to avoid actual probe
270-
pool.markHealthy(config);
271-
272-
// Multiple concurrent calls should all succeed
273-
const results = await Promise.all([
274-
pool.acquireConnection(config),
275-
pool.acquireConnection(config),
276-
pool.acquireConnection(config),
271+
// All concurrent calls should share the same probe and get same result
272+
const results = await Promise.allSettled([
273+
pool.acquireConnection(config, 1000),
274+
pool.acquireConnection(config, 1000),
275+
pool.acquireConnection(config, 1000),
277276
]);
278277

279-
// All should resolve (no errors)
280-
expect(results).toHaveLength(3);
278+
// All should be rejected (connection fails)
279+
expect(results.every((r) => r.status === "rejected")).toBe(true);
280+
281+
// Only 1 failure should be recorded (not 3) - proves singleflighting worked
282+
expect(pool.getConnectionHealth(config)?.consecutiveFailures).toBe(1);
281283
});
282284
});
283285
});

src/node/runtime/sshConnectionPool.ts

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,12 @@ export interface ConnectionHealth {
5858
*/
5959
const BACKOFF_SCHEDULE = [1, 5, 10, 20, 40, 60];
6060

61+
/**
62+
* Time after which a "healthy" connection should be re-probed.
63+
* Prevents stale health state when network silently degrades.
64+
*/
65+
const HEALTHY_TTL_MS = 5 * 60 * 1000; // 5 minutes
66+
6167
/**
6268
* SSH Connection Pool
6369
*
@@ -91,10 +97,16 @@ export class SSHConnectionPool {
9197
);
9298
}
9399

94-
// Return immediately if known healthy
100+
// Return immediately if known healthy and not stale
95101
if (health?.status === "healthy") {
96-
log.debug(`SSH connection to ${config.host} is known healthy, skipping probe`);
97-
return;
102+
const age = Date.now() - (health.lastSuccess?.getTime() ?? 0);
103+
if (age < HEALTHY_TTL_MS) {
104+
log.debug(`SSH connection to ${config.host} is known healthy, skipping probe`);
105+
return;
106+
}
107+
log.debug(
108+
`SSH connection to ${config.host} health is stale (${Math.round(age / 1000)}s), re-probing`
109+
);
98110
}
99111

100112
// Check for inflight probe - singleflighting
@@ -243,7 +255,9 @@ export class SSHConnectionPool {
243255
stderr += data.toString();
244256
});
245257

258+
let timedOut = false;
246259
const timeout = setTimeout(() => {
260+
timedOut = true;
247261
proc.kill("SIGKILL");
248262
const error = "SSH probe timed out";
249263
this.markFailedByKey(key, error);
@@ -252,6 +266,7 @@ export class SSHConnectionPool {
252266

253267
proc.on("close", (code) => {
254268
clearTimeout(timeout);
269+
if (timedOut) return; // Already handled by timeout
255270

256271
if (code === 0) {
257272
this.markHealthyByKey(key);
@@ -304,11 +319,13 @@ export function getControlPath(config: SSHRuntimeConfig): string {
304319
* Includes local username to prevent cross-user socket collisions.
305320
*/
306321
function makeConnectionKey(config: SSHRuntimeConfig): string {
322+
// Note: srcBaseDir is intentionally excluded - connection identity is determined
323+
// by user + host + port + key. This allows health tracking and multiplexing
324+
// to be shared across workspaces on the same host.
307325
const parts = [
308326
os.userInfo().username, // Include local user to prevent cross-user collisions
309327
config.host,
310328
config.port?.toString() ?? "22",
311-
config.srcBaseDir,
312329
config.identityFile ?? "default",
313330
];
314331
return parts.join(":");

0 commit comments

Comments
 (0)