diff --git a/CHANGELOG.md b/CHANGELOG.md index 99a2da9..bc635a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.5.4] - 2026-04-12 + +### Added + +- **`ssh-mcp healthcheck` CLI subcommand** — built-in liveness probe that auto-detects transport (stdio vs streamable-http), respects auth env vars (`SSH_MCP_HTTP_TOKEN`, `SSH_MCP_HTTP_TOKEN_FILE`, `SSH_MCP_HTTP_AUTH=none`), and performs a real MCP `initialize` handshake in HTTP mode. Uses Python stdlib only, 3-second timeout, exits 0 healthy / 1 unhealthy. +- New module `src/ssh_mcp/healthcheck.py` with unit tests in `tests/test_healthcheck.py`. + +### Changed + +- **Dockerfile `HEALTHCHECK`** now uses the built-in `ssh-mcp healthcheck` subcommand instead of the old `python -c "import ssh_mcp"` liveness-only check. The new check performs a real MCP protocol handshake in HTTP mode, so a container marked "healthy" actually means the MCP tools respond correctly — not just that the Python package is importable. +- **`compose.yaml`** — removed the ~40-line inline Python healthcheck block from the commented `ssh-mcp-http` service template. Operators no longer need to embed credential-handling Python in their compose files. The Dockerfile's baked-in HEALTHCHECK handles both stdio and HTTP modes automatically. + +### Fixed + +- Eliminates the pain point where upgrading the healthcheck required editing every operator's compose.yaml. + ## [0.5.3] - 2026-04-12 ### Security @@ -271,7 +287,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Tilde expansion for config file paths - Packaged for distribution via PyPI; installable with `uvx ssh-mcp` -[Unreleased]: https://github.com/blackaxgit/ssh-mcp/compare/v0.5.3...HEAD +[Unreleased]: https://github.com/blackaxgit/ssh-mcp/compare/v0.5.4...HEAD +[0.5.4]: https://github.com/blackaxgit/ssh-mcp/compare/v0.5.3...v0.5.4 [0.5.3]: https://github.com/blackaxgit/ssh-mcp/compare/v0.5.2...v0.5.3 [0.5.2]: https://github.com/blackaxgit/ssh-mcp/compare/v0.5.1...v0.5.2 [0.5.1]: https://github.com/blackaxgit/ssh-mcp/compare/v0.5.0...v0.5.1 diff --git a/Dockerfile b/Dockerfile index 623f4ef..40e6627 100644 --- a/Dockerfile +++ b/Dockerfile @@ -55,13 +55,16 @@ USER sshmcp # Override via SSH_MCP_HTTP_PORT env var and republish with -p. EXPOSE 8000 -# HEALTHCHECK: Python-based import check (slim image has no `ps`) -# Verifies the ssh_mcp package is importable — signals the runtime is healthy. -# For HTTP transport, operators may prefer a curl-based probe against -# http://127.0.0.1:${SSH_MCP_HTTP_PORT:-8000}/mcp but curl is not in the slim -# image, so the import check is the portable default. -HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --retries=3 \ - CMD python -c "import ssh_mcp" || exit 1 +# HEALTHCHECK: use the built-in ``ssh-mcp healthcheck`` subcommand which +# auto-detects transport (stdio vs streamable-http) and performs a real +# MCP initialize handshake in HTTP mode. Reads the same env vars as the +# server itself (SSH_MCP_TRANSPORT, SSH_MCP_HTTP_PORT, SSH_MCP_HTTP_TOKEN, +# SSH_MCP_HTTP_TOKEN_FILE, SSH_MCP_HTTP_AUTH). +# +# start_period=10s covers startup for HTTP transport (FastMCP session +# manager init + uvicorn bind). interval=30s is standard. +HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \ + CMD ssh-mcp healthcheck # Entry point: invoke the console script installed by uv ENTRYPOINT ["ssh-mcp"] diff --git a/README.md b/README.md index e7ce6a4..438f7bf 100644 --- a/README.md +++ b/README.md @@ -213,6 +213,43 @@ Host: ssh-mcp.internal For stateful sessions (default), FastMCP maintains per-client context across requests. For stateless deployments behind a load balancer, set `SSH_MCP_HTTP_STATELESS=true` — each request is handled independently with no server-side session. +### Healthcheck + +The Docker image includes a built-in `ssh-mcp healthcheck` CLI subcommand that +Docker's `HEALTHCHECK` directive invokes automatically. No inline Python, no +`curl`, no manual compose surgery required. The subcommand: + +- Auto-detects the transport via `SSH_MCP_TRANSPORT`: + - **stdio mode**: verifies the package imports and `servers.toml` parses + - **http mode**: sends a real MCP `initialize` JSON-RPC POST and checks for any non-5xx response +- Reads the same auth env vars as the server (`SSH_MCP_HTTP_TOKEN`, `SSH_MCP_HTTP_TOKEN_FILE`, `SSH_MCP_HTTP_AUTH`) — never logs the token +- Exits 0 if healthy, 1 otherwise +- Uses Python stdlib only (no `curl`/`wget` dependency) +- 3-second hard timeout per probe + +Run manually for debugging: + +```bash +docker exec ssh-mcp ssh-mcp healthcheck && echo "healthy" +``` + +Check current status: + +```bash +docker inspect ssh-mcp --format '{{.State.Health.Status}}' +``` + +To override the baked-in settings in your compose file: + +```yaml +healthcheck: + test: ["CMD", "ssh-mcp", "healthcheck"] + interval: 15s + timeout: 5s + retries: 3 + start_period: 10s +``` + ### Reverse proxy deployment (auth at the edge) If your reverse proxy (Caddy, nginx, Traefik, Envoy, Cloudflare Access, etc.) already authenticates requests before they reach ssh-mcp, you can disable the built-in bearer middleware with `SSH_MCP_HTTP_AUTH=none`. This mode is deliberately hard to enable on a public bind — you must also set a verbose acknowledgement env var: diff --git a/compose.yaml b/compose.yaml index 89bfdb7..41b5519 100644 --- a/compose.yaml +++ b/compose.yaml @@ -109,55 +109,15 @@ services: # env_file: # - .env.ssh-mcp # restart: unless-stopped - # # Healthcheck: a proper MCP ``initialize`` POST request. + # # Healthcheck: inherited from the Dockerfile ``HEALTHCHECK`` directive + # # which runs ``ssh-mcp healthcheck``. The subcommand auto-detects the + # # transport from SSH_MCP_TRANSPORT and performs a real MCP initialize + # # handshake in HTTP mode — no inline Python or curl needed. # # - # # We send the minimum valid JSON-RPC ``initialize`` payload with the - # # required Accept header and a valid bearer token. The healthcheck - # # passes on any 2xx/4xx response (the MCP server is processing - # # requests) and fails only on 5xx, timeout, or connection refused. - # # - # # Why we can't just use GET: - # # * MCP streamable HTTP rejects GET with 406 Not Acceptable unless - # # the client sends ``Accept: application/json, text/event-stream``. - # # * 406 is still a sign the server is alive, so technically any - # # non-5xx is "healthy" — but a real ``initialize`` POST gives - # # a much stronger signal that the MCP protocol is working. - # healthcheck: - # test: - # - CMD-SHELL - # - | - # python3 -c " - # import json, os, sys, urllib.request - # port = os.environ.get('SSH_MCP_HTTP_PORT', '8000') - # token = os.environ.get('SSH_MCP_HTTP_TOKEN', '') - # payload = json.dumps({ - # 'jsonrpc': '2.0', 'id': 1, 'method': 'initialize', - # 'params': { - # 'protocolVersion': '2025-03-26', - # 'capabilities': {}, - # 'clientInfo': {'name': 'healthcheck', 'version': '1'}, - # }, - # }).encode() - # req = urllib.request.Request( - # f'http://127.0.0.1:{port}/mcp', - # data=payload, - # method='POST', - # headers={ - # 'Authorization': f'Bearer {token}', - # 'Content-Type': 'application/json', - # 'Accept': 'application/json, text/event-stream', - # }, - # ) - # try: - # urllib.request.urlopen(req, timeout=3).read() - # sys.exit(0) - # except urllib.error.HTTPError as e: - # # Any non-5xx means the MCP app processed the request - # sys.exit(0 if e.code < 500 else 1) - # except Exception: - # sys.exit(1) - # " - # interval: 30s - # timeout: 5s - # retries: 3 - # start_period: 10s + # # To override (e.g. shorter interval), uncomment: + # # healthcheck: + # # test: ["CMD", "ssh-mcp", "healthcheck"] + # # interval: 15s + # # timeout: 5s + # # retries: 3 + # # start_period: 10s diff --git a/src/ssh_mcp/__init__.py b/src/ssh_mcp/__init__.py index 6c3aa6e..ac7da2f 100644 --- a/src/ssh_mcp/__init__.py +++ b/src/ssh_mcp/__init__.py @@ -1,3 +1,3 @@ """SSH MCP Server - Manage infrastructure via Claude Code.""" -__version__ = "0.5.3" +__version__ = "0.5.4" diff --git a/src/ssh_mcp/healthcheck.py b/src/ssh_mcp/healthcheck.py new file mode 100644 index 0000000..cd85c30 --- /dev/null +++ b/src/ssh_mcp/healthcheck.py @@ -0,0 +1,126 @@ +"""Liveness healthcheck for ssh-mcp Docker container. + +Invoked as ``ssh-mcp healthcheck`` from the Dockerfile HEALTHCHECK +directive. Exits 0 if the server is healthy, 1 otherwise. Prints a +single diagnostic line to stderr on failure (never logs the token). + +Auto-detects transport via ``SSH_MCP_TRANSPORT`` env var: + * ``stdio`` (default): import check + config file parse + * ``http`` / ``streamable-http``: MCP initialize POST handshake +""" + +from __future__ import annotations + +import json +import os +import sys +import urllib.error +import urllib.request +from pathlib import Path +from typing import NoReturn + +HEALTHCHECK_TIMEOUT = 3 # seconds + + +def _load_token() -> str | None: + """Read bearer token from env or token file. Returns None if neither set.""" + raw = os.environ.get("SSH_MCP_HTTP_TOKEN", "").strip() + if raw: + return raw + token_file = os.environ.get("SSH_MCP_HTTP_TOKEN_FILE", "").strip() + if token_file: + try: + return Path(token_file).read_text().strip() or None + except OSError: + return None + return None + + +def _check_stdio() -> tuple[bool, str]: + """Verify the package imports and the config file parses. + + Returns (ok, diagnostic). + """ + try: + import ssh_mcp # noqa: F401 + except ImportError as e: + return False, f"import failed: {e}" + # Try to resolve and parse config if present + config_path = os.environ.get("SSH_MCP_CONFIG", "") + if config_path and Path(config_path).exists(): + try: + from ssh_mcp.config import ServerRegistry + + ServerRegistry(config_path) + except Exception as e: + return False, f"config parse failed: {type(e).__name__}" + return True, "stdio healthy" + + +def _check_http() -> tuple[bool, str]: + """Send MCP initialize POST and verify the server responds. + + Returns (ok, diagnostic). Any non-5xx status is considered healthy + (including 401 if auth is misconfigured — the server is clearly alive). + """ + port = os.environ.get("SSH_MCP_HTTP_PORT", "8000") + auth_mode = os.environ.get("SSH_MCP_HTTP_AUTH", "bearer").strip().lower() + token = _load_token() if auth_mode != "none" else None + + payload = json.dumps( + { + "jsonrpc": "2.0", + "id": 1, + "method": "initialize", + "params": { + "protocolVersion": "2025-03-26", + "capabilities": {}, + "clientInfo": {"name": "ssh-mcp-healthcheck", "version": "1"}, + }, + } + ).encode() + + headers = { + "Content-Type": "application/json", + "Accept": "application/json, text/event-stream", + } + if token: + headers["Authorization"] = f"Bearer {token}" + + url = f"http://127.0.0.1:{port}/mcp" + req = urllib.request.Request(url, data=payload, method="POST", headers=headers) # noqa: S310 + + try: + # URL is hardcoded http://127.0.0.1:/mcp — not user-controlled + # and not a file:// scheme. Port comes from the validated + # SSH_MCP_HTTP_PORT env var, so B310 (permitted-schemes) doesn't apply. + with urllib.request.urlopen(req, timeout=HEALTHCHECK_TIMEOUT) as resp: # nosec B310 # noqa: S310 + return True, f"http {resp.status}" + except urllib.error.HTTPError as e: + # Any 4xx means the server is alive but the request was rejected + # (wrong auth, wrong protocol version, etc.) — still healthy. + if e.code < 500: + return True, f"http {e.code}" + return False, f"http {e.code}" + except urllib.error.URLError as e: + return False, f"connect failed: {type(e.reason).__name__}" + except Exception as e: + return False, f"unexpected: {type(e).__name__}" + + +def run() -> NoReturn: + """Entry point invoked by ``ssh-mcp healthcheck`` CLI.""" + transport = os.environ.get("SSH_MCP_TRANSPORT", "stdio").strip().lower() + if transport in ("http", "streamable-http"): + ok, diag = _check_http() + else: + ok, diag = _check_stdio() + + if not ok: + print(f"ssh-mcp healthcheck: UNHEALTHY ({diag})", file=sys.stderr) + sys.exit(1) + sys.exit(0) + + +if __name__ == "__main__": + run() diff --git a/src/ssh_mcp/server.py b/src/ssh_mcp/server.py index 3c3e91d..ce1dd98 100644 --- a/src/ssh_mcp/server.py +++ b/src/ssh_mcp/server.py @@ -961,6 +961,14 @@ def main() -> None: a TCP port. Requires ``SSH_MCP_HTTP_TOKEN`` for non-localhost binds. See ``_run_http`` for the full list of env vars. """ + # Dispatch subcommands BEFORE any expensive setup. + # The ``healthcheck`` subcommand must NOT touch ``mcp.run`` or open sockets. + if len(sys.argv) >= 2 and sys.argv[1] == "healthcheck": + from ssh_mcp.healthcheck import run as run_healthcheck + + run_healthcheck() # exits 0 or 1 + return # unreachable but keeps mypy happy + from ssh_mcp import __version__ transport = os.environ.get("SSH_MCP_TRANSPORT", "stdio").strip().lower() diff --git a/tests/test_healthcheck.py b/tests/test_healthcheck.py new file mode 100644 index 0000000..349df0a --- /dev/null +++ b/tests/test_healthcheck.py @@ -0,0 +1,214 @@ +"""Tests for ssh_mcp.healthcheck liveness probe.""" + +from __future__ import annotations + +import urllib.error +from unittest.mock import MagicMock, patch + +import pytest + +from ssh_mcp import healthcheck + + +# --------------------------------------------------------------------------- +# stdio mode +# --------------------------------------------------------------------------- + + +def test_stdio_healthy_without_config(monkeypatch: pytest.MonkeyPatch) -> None: + """Package imports cleanly; no config path set -> healthy.""" + monkeypatch.delenv("SSH_MCP_CONFIG", raising=False) + ok, diag = healthcheck._check_stdio() + assert ok is True + assert "healthy" in diag + + +def test_stdio_healthy_with_valid_config( + monkeypatch: pytest.MonkeyPatch, tmp_path +) -> None: + """Valid TOML config file parses cleanly.""" + config = tmp_path / "servers.toml" + # Empty registry (no [servers.*] entries) is still valid TOML. + config.write_text("# empty registry\n") + monkeypatch.setenv("SSH_MCP_CONFIG", str(config)) + ok, diag = healthcheck._check_stdio() + assert ok is True, f"expected healthy, got: {diag}" + + +def test_stdio_unhealthy_on_malformed_config( + monkeypatch: pytest.MonkeyPatch, tmp_path +) -> None: + """Malformed TOML -> unhealthy with diagnostic.""" + config = tmp_path / "servers.toml" + config.write_text("this is [ not valid ::: toml") + monkeypatch.setenv("SSH_MCP_CONFIG", str(config)) + ok, diag = healthcheck._check_stdio() + assert ok is False + assert "config parse failed" in diag + + +# --------------------------------------------------------------------------- +# http mode +# --------------------------------------------------------------------------- + + +def _mock_response(status: int = 200) -> MagicMock: + """Build a context-manager mock that urlopen's ``with`` block returns.""" + resp = MagicMock() + resp.status = status + cm = MagicMock() + cm.__enter__.return_value = resp + cm.__exit__.return_value = False + return cm + + +def test_http_healthy_200(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("SSH_MCP_HTTP_PORT", "8000") + monkeypatch.setenv("SSH_MCP_HTTP_AUTH", "none") + with patch("urllib.request.urlopen", return_value=_mock_response(200)): + ok, diag = healthcheck._check_http() + assert ok is True + assert diag == "http 200" + + +def test_http_401_still_healthy(monkeypatch: pytest.MonkeyPatch) -> None: + """401 means the server is alive but rejected our creds -> still healthy.""" + monkeypatch.setenv("SSH_MCP_HTTP_PORT", "8000") + monkeypatch.setenv("SSH_MCP_HTTP_AUTH", "none") + err = urllib.error.HTTPError( + url="http://127.0.0.1:8000/mcp", + code=401, + msg="Unauthorized", + hdrs=None, # type: ignore[arg-type] + fp=None, + ) + with patch("urllib.request.urlopen", side_effect=err): + ok, diag = healthcheck._check_http() + assert ok is True + assert diag == "http 401" + + +def test_http_500_unhealthy(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("SSH_MCP_HTTP_PORT", "8000") + monkeypatch.setenv("SSH_MCP_HTTP_AUTH", "none") + err = urllib.error.HTTPError( + url="http://127.0.0.1:8000/mcp", + code=500, + msg="Internal Server Error", + hdrs=None, # type: ignore[arg-type] + fp=None, + ) + with patch("urllib.request.urlopen", side_effect=err): + ok, diag = healthcheck._check_http() + assert ok is False + assert diag == "http 500" + + +def test_http_connect_failure(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("SSH_MCP_HTTP_PORT", "8000") + monkeypatch.setenv("SSH_MCP_HTTP_AUTH", "none") + err = urllib.error.URLError(ConnectionRefusedError("nope")) + with patch("urllib.request.urlopen", side_effect=err): + ok, diag = healthcheck._check_http() + assert ok is False + assert "connect failed" in diag + assert "ConnectionRefusedError" in diag + + +def test_http_auth_none_omits_authorization_header( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """With SSH_MCP_HTTP_AUTH=none, no Authorization header should be sent.""" + monkeypatch.setenv("SSH_MCP_HTTP_PORT", "8000") + monkeypatch.setenv("SSH_MCP_HTTP_AUTH", "none") + # Even if a token is set, it must be ignored. + monkeypatch.setenv("SSH_MCP_HTTP_TOKEN", "should-not-be-sent") + captured = {} + + def fake_urlopen(req, timeout): # type: ignore[no-untyped-def] + captured["headers"] = dict(req.header_items()) + return _mock_response(200) + + with patch("urllib.request.urlopen", side_effect=fake_urlopen): + ok, _ = healthcheck._check_http() + assert ok is True + # urllib normalizes header capitalization to Title-Case + assert not any(k.lower() == "authorization" for k in captured["headers"]) + + +def test_http_token_from_env(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("SSH_MCP_HTTP_PORT", "8000") + monkeypatch.setenv("SSH_MCP_HTTP_AUTH", "bearer") + monkeypatch.setenv("SSH_MCP_HTTP_TOKEN", "secret-from-env") + monkeypatch.delenv("SSH_MCP_HTTP_TOKEN_FILE", raising=False) + captured = {} + + def fake_urlopen(req, timeout): # type: ignore[no-untyped-def] + captured["headers"] = dict(req.header_items()) + return _mock_response(200) + + with patch("urllib.request.urlopen", side_effect=fake_urlopen): + healthcheck._check_http() + + auth_values = [ + v for k, v in captured["headers"].items() if k.lower() == "authorization" + ] + assert auth_values == ["Bearer secret-from-env"] + + +def test_http_token_from_file(monkeypatch: pytest.MonkeyPatch, tmp_path) -> None: + """When env token is unset, fall back to SSH_MCP_HTTP_TOKEN_FILE.""" + token_file = tmp_path / "token" + token_file.write_text("secret-from-file\n") + monkeypatch.setenv("SSH_MCP_HTTP_PORT", "8000") + monkeypatch.setenv("SSH_MCP_HTTP_AUTH", "bearer") + monkeypatch.delenv("SSH_MCP_HTTP_TOKEN", raising=False) + monkeypatch.setenv("SSH_MCP_HTTP_TOKEN_FILE", str(token_file)) + captured = {} + + def fake_urlopen(req, timeout): # type: ignore[no-untyped-def] + captured["headers"] = dict(req.header_items()) + return _mock_response(200) + + with patch("urllib.request.urlopen", side_effect=fake_urlopen): + healthcheck._check_http() + + auth_values = [ + v for k, v in captured["headers"].items() if k.lower() == "authorization" + ] + assert auth_values == ["Bearer secret-from-file"] + + +# --------------------------------------------------------------------------- +# run() entry point +# --------------------------------------------------------------------------- + + +def test_run_exits_0_on_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("SSH_MCP_TRANSPORT", "stdio") + monkeypatch.delenv("SSH_MCP_CONFIG", raising=False) + with pytest.raises(SystemExit) as exc_info: + healthcheck.run() + assert exc_info.value.code == 0 + + +def test_run_exits_1_on_failure(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("SSH_MCP_TRANSPORT", "http") + monkeypatch.setenv("SSH_MCP_HTTP_PORT", "8000") + monkeypatch.setenv("SSH_MCP_HTTP_AUTH", "none") + err = urllib.error.URLError(ConnectionRefusedError("nope")) + with patch("urllib.request.urlopen", side_effect=err): + with pytest.raises(SystemExit) as exc_info: + healthcheck.run() + assert exc_info.value.code == 1 + + +def test_run_http_transport_aliases(monkeypatch: pytest.MonkeyPatch) -> None: + """Both ``http`` and ``streamable-http`` should go through _check_http.""" + monkeypatch.setenv("SSH_MCP_TRANSPORT", "streamable-http") + monkeypatch.setenv("SSH_MCP_HTTP_AUTH", "none") + with patch("ssh_mcp.healthcheck._check_http", return_value=(True, "http 200")) as m: + with pytest.raises(SystemExit) as exc_info: + healthcheck.run() + m.assert_called_once() + assert exc_info.value.code == 0