From e63d6c6c0a950a5e136579d185bf4c36bce08edd Mon Sep 17 00:00:00 2001 From: anandgupta42 Date: Sun, 29 Mar 2026 08:41:33 -0700 Subject: [PATCH 1/5] feat: add ClickHouse warehouse driver with full integration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add first-class ClickHouse support as the 12th database driver: **Driver (`packages/drivers/src/clickhouse.ts`):** - Official `@clickhouse/client` over HTTP(S) - Supports ClickHouse server 23.3+ (all non-EOL versions) - Password, connection string, and TLS/mTLS auth - ClickHouse Cloud and self-hosted compatible - Parameterized queries for SQL injection prevention - DML-aware LIMIT injection (won't break `WITH...INSERT`) **Integration (23 touchpoints):** - Registry: `DRIVER_MAP`, import switch, `PASSWORD_DRIVERS` - Discovery: Docker containers, env vars (`CLICKHOUSE_HOST`/`CLICKHOUSE_URL`), dbt profiles (`ADAPTER_TYPE_MAP`), dbt lineage dialect - FinOps: `system.query_log` query history template - Normalization: aliases for `connectionString`, `requestTimeout`, TLS fields - Publish: `@clickhouse/client` in `peerDependencies` **Tests:** - 30+ E2E tests across 5 suites (latest, LTS 23.8, 24.3, 24.8, connection string) - 14 config normalization tests for all ClickHouse aliases - MergeTree variants, materialized views, Nullable columns, Array/Map/IPv4 types **Documentation:** - Full config section in `warehouses.md` (standard, Cloud, connection string) - Support matrix entry in `drivers.md` with auth methods - Dedicated guide (`guides/clickhouse.md`): MergeTree optimization, materialized view pipelines, dialect translation, LowCardinality tips, dbt integration - Updated README, getting-started, warehouse-tools docs **Engineering:** - `packages/drivers/ADDING_A_DRIVER.md` — 23-point checklist for adding future drivers - `.claude/commands/add-database-driver.md` — Claude skill to automate the process Co-Authored-By: Claude Opus 4.6 (1M context) --- .claude/commands/add-database-driver.md | 181 +++++ .github/meta/commit.txt | 16 +- README.md | 2 +- bun.lock | 5 + docs/docs/configure/warehouses.md | 62 +- .../data-engineering/guides/clickhouse.md | 233 ++++++ docs/docs/data-engineering/guides/index.md | 1 + .../data-engineering/tools/warehouse-tools.md | 3 +- docs/docs/drivers.md | 15 +- docs/docs/getting-started/index.md | 2 +- docs/mkdocs.yml | 1 + packages/drivers/ADDING_A_DRIVER.md | 221 ++++++ packages/drivers/package.json | 3 +- packages/drivers/src/clickhouse.ts | 147 ++++ packages/drivers/src/index.ts | 1 + packages/drivers/src/normalize.ts | 10 + packages/opencode/.github/meta/commit.txt | 30 +- packages/opencode/script/publish.ts | 1 + .../native/connections/dbt-profiles.ts | 21 +- .../native/connections/docker-discovery.ts | 18 +- .../altimate/native/connections/registry.ts | 18 +- .../src/altimate/native/dbt/lineage.ts | 10 +- .../altimate/native/finops/query-history.ts | 52 +- .../src/altimate/tools/project-scan.ts | 44 +- .../src/altimate/tools/warehouse-add.ts | 19 +- .../test/altimate/driver-normalize.test.ts | 127 ++++ .../altimate/drivers-clickhouse-e2e.test.ts | 667 ++++++++++++++++++ 27 files changed, 1796 insertions(+), 114 deletions(-) create mode 100644 .claude/commands/add-database-driver.md create mode 100644 docs/docs/data-engineering/guides/clickhouse.md create mode 100644 packages/drivers/ADDING_A_DRIVER.md create mode 100644 packages/drivers/src/clickhouse.ts create mode 100644 packages/opencode/test/altimate/drivers-clickhouse-e2e.test.ts diff --git a/.claude/commands/add-database-driver.md b/.claude/commands/add-database-driver.md new file mode 100644 index 0000000000..3731faba31 --- /dev/null +++ b/.claude/commands/add-database-driver.md @@ -0,0 +1,181 @@ +--- +description: Add a new database driver to Altimate Code. Scaffolds the driver, registers it across all 23 integration points, writes E2E tests, and updates docs. Usage - /add-database-driver +--- + +# Add Database Driver + +Scaffold and fully integrate a new database/warehouse driver into Altimate Code. This command handles all 23 integration points — driver code, registry, discovery, finops, tests, and documentation. + +## Input + +`$ARGUMENTS` = the database name (e.g., `cockroachdb`, `timescaledb`, `cassandra`, `neo4j`). + +If empty, ask: "Which database should I add support for?" + +## Step 0: Research + +Before writing any code, research the database: + +1. **Find the official Node.js/TypeScript client package** on npm. Search for `@{database}/client`, `{database}-js`, or similar. +2. **Check supported server versions** — which versions are not EOL? +3. **Identify auth methods** — password, token, TLS/certificate, connection string, cloud-specific? +4. **Check SQL dialect** — standard SQL? Custom syntax? LIMIT vs TOP vs FETCH FIRST? System tables for schemas/tables/columns? +5. **Find Docker image** — official image on Docker Hub for E2E testing? +6. **Check if dbt adapter exists** — search for `dbt-{database}` on PyPI. + +Present findings to the user before proceeding: +``` +## Research: {Database} + +- **npm package**: `{package}` (v{version}) +- **Server versions**: {non-EOL versions} +- **Auth methods**: {list} +- **SQL dialect**: {notes on LIMIT, system tables, parameterized queries} +- **Docker image**: `{image}:{tag}` +- **dbt adapter**: {exists/not found} + +Proceed with implementation? +``` + +## Step 1: Read Reference Document + +Read the comprehensive checklist: +```bash +cat packages/drivers/ADDING_A_DRIVER.md +``` + +This document has all 23 integration points with exact file paths and code patterns. + +## Step 2: Read Existing Driver for Pattern + +Read a similar existing driver as a template. Choose based on database type: + +- **SQL database with password auth** → read `packages/drivers/src/mysql.ts` +- **Cloud warehouse with token auth** → read `packages/drivers/src/databricks.ts` +- **Database with connection string support** → read `packages/drivers/src/postgres.ts` +- **HTTP-based client** → read `packages/drivers/src/clickhouse.ts` +- **Document database (non-SQL)** → read `packages/drivers/src/mongodb.ts` + +Also read: +- `packages/drivers/src/normalize.ts` — for alias pattern +- `packages/opencode/src/altimate/native/connections/registry.ts` — for registration pattern +- `packages/opencode/test/altimate/drivers-docker-e2e.test.ts` — for E2E test pattern + +## Step 3: Implement (23 integration points) + +Work through all 9 phases from the checklist. Use parallel edits where possible. + +### Phase 1: Core Driver (4 files) + +1. **Create `packages/drivers/src/{database}.ts`** + - Follow the Connector interface: `connect()`, `execute()`, `listSchemas()`, `listTables()`, `describeTable()`, `close()` + - Lazy-import the npm package + - Use parameterized queries for schema introspection + - Handle LIMIT injection with DML guard: `!hasDML` check before appending LIMIT + - Handle TLS detection from connection strings + +2. **Add export to `packages/drivers/src/index.ts`** + +3. **Add optionalDependency to `packages/drivers/package.json`** + +4. **Add aliases to `packages/drivers/src/normalize.ts`** + +### Phase 2: Registry (4 files in registry.ts) + +5. Add to `DRIVER_MAP` +6. Add to import switch statement +7. Add to `PASSWORD_DRIVERS` (if applicable) +8. Remove from `KNOWN_UNSUPPORTED` (if listed) + +### Phase 3: Discovery (4 files) + +9. Docker discovery — `docker-discovery.ts` (IMAGE_MAP, ENV_MAP, DEFAULT_PORTS, DEFAULT_USERS) +10. Env var detection — `project-scan.ts` (detectEnvVars warehouses array) +11. dbt adapter — `dbt-profiles.ts` (ADAPTER_TYPE_MAP) +12. dbt lineage — `dbt/lineage.ts` (detectDialect dialectMap) + +### Phase 4: FinOps (1 file) + +13. Query history — `finops/query-history.ts` (SQL template + handler if database has system query log) + +### Phase 5: Build (1 file) + +14. Peer deps — `script/publish.ts` (driverPeerDependencies) + +### Phase 6: Tool Descriptions (1 file) + +15. warehouse_add — `tools/warehouse-add.ts` (config description + error message) + +### Phase 7: Tests (2 new files + 1 edit) + +16. E2E tests — `test/altimate/drivers-{database}-e2e.test.ts` +17. Normalization tests — add to `test/altimate/driver-normalize.test.ts` +18. Verify existing tests pass + +### Phase 8: Documentation (5 files) + +19. `docs/docs/configure/warehouses.md` — config section + update count +20. `docs/docs/drivers.md` — support matrix + installation + auth + update count +21. `docs/docs/data-engineering/tools/warehouse-tools.md` — env vars + Docker +22. `README.md` — warehouse list +23. `docs/docs/getting-started/index.md` — homepage list + +### Phase 9: Optional + +- Guide page at `docs/docs/data-engineering/guides/{database}.md` +- Update `mkdocs.yml` nav and `guides/index.md` +- Check fingerprint regex in `fingerprint/index.ts` + +## Step 4: Run Quality Gates + +```bash +# Tests (from packages/opencode/) +cd packages/opencode && bun test test/altimate/driver-normalize.test.ts test/altimate/connections.test.ts + +# Typecheck +bun turbo typecheck + +# Marker check +bun run script/upstream/analyze.ts --markers --base main --strict +``` + +All three must pass before proceeding. + +## Step 5: Run Code Review + +Run `/consensus:code-review` to get the implementation reviewed by multiple models before committing. + +## Step 6: Summary + +Present final summary: +``` +## {Database} Driver Added + +### Files Created +- packages/drivers/src/{database}.ts +- packages/opencode/test/altimate/drivers-{database}-e2e.test.ts +- docs/docs/data-engineering/guides/{database}.md (if created) + +### Files Modified +- {list all modified files} + +### Test Results +- {N} normalization tests pass +- {N} connection tests pass +- Typecheck: pass +- Marker check: pass + +### E2E Test Coverage +- {list of test suites and server versions} + +Ready to commit. +``` + +## Rules + +1. **Read before writing.** Always read existing drivers and the reference doc before creating new code. +2. **Don't skip integration points.** All 23 points exist for a reason — missing one causes inconsistencies users will hit. +3. **Use parameterized queries** for `listTables` and `describeTable` — never interpolate user input into SQL. +4. **Test multiple server versions** — at minimum: latest stable + oldest non-EOL LTS. +5. **Run all quality gates** before presenting the summary. +6. **Don't modify finops tools** (credit-analyzer, warehouse-advisor, unused-resources) unless the database has equivalent cost/credit APIs. diff --git a/.github/meta/commit.txt b/.github/meta/commit.txt index 0476c2cb7d..4cdd7c384d 100644 --- a/.github/meta/commit.txt +++ b/.github/meta/commit.txt @@ -1,15 +1 @@ -feat: add task intent classification telemetry event - -Add `task_classified` event emitted at session start with keyword/regex -classification of the first user message. Categories: debug_dbt, write_sql, -optimize_query, build_model, analyze_lineage, explore_schema, migrate_sql, -manage_warehouse, finops, general. - -- `classifyTaskIntent()` — pure regex matcher, zero LLM cost, <1ms -- Includes warehouse type from fingerprint cache -- Strong/weak confidence levels (1.0 vs 0.5) -- 15 unit tests covering all intent categories + edge cases - -Closes AI-6029 - -Co-Authored-By: Claude Opus 4.6 (1M context) +feat: add ClickHouse warehouse driver diff --git a/README.md b/README.md index 0b59af5de1..3feae75446 100644 --- a/README.md +++ b/README.md @@ -151,7 +151,7 @@ Each mode has scoped permissions, tool access, and SQL write-access control. ## Supported Warehouses -Snowflake · BigQuery · Databricks · PostgreSQL · Redshift · DuckDB · MySQL · SQL Server · Oracle · SQLite +Snowflake · BigQuery · Databricks · PostgreSQL · Redshift · ClickHouse · DuckDB · MySQL · SQL Server · Oracle · SQLite · MongoDB First-class support with schema indexing, query execution, and metadata introspection. SSH tunneling available for secure connections. diff --git a/bun.lock b/bun.lock index 4ef0ee6efb..1b06053a5a 100644 --- a/bun.lock +++ b/bun.lock @@ -43,6 +43,7 @@ "mongodb": "^6.0.0", }, "optionalDependencies": { + "@clickhouse/client": "^1.0.0", "@databricks/sql": "^1.0.0", "@google-cloud/bigquery": "^8.0.0", "duckdb": "^1.0.0", @@ -565,6 +566,10 @@ "@clack/prompts": ["@clack/prompts@1.0.0-alpha.1", "", { "dependencies": { "@clack/core": "1.0.0-alpha.1", "picocolors": "^1.0.0", "sisteransi": "^1.0.5" } }, "sha512-07MNT0OsxjKOcyVfX8KhXBhJiyUbDP1vuIAcHc+nx5v93MJO23pX3X/k3bWz6T3rpM9dgWPq90i4Jq7gZAyMbw=="], + "@clickhouse/client": ["@clickhouse/client@1.18.2", "", { "dependencies": { "@clickhouse/client-common": "1.18.2" } }, "sha512-fuquQswRSHWM6D079ZeuGqkMOsqtcUPL06UdTnowmoeeYjVrqisfVmvnw8pc3OeKS4kVb91oygb/MfLDiMs0TQ=="], + + "@clickhouse/client-common": ["@clickhouse/client-common@1.18.2", "", {}, "sha512-J0SG6q9V31ydxonglpj9xhNRsUxCsF71iEZ784yldqMYwsHixj/9xHFDgBDX3DuMiDx/kPDfXnf+pimp08wIBA=="], + "@colors/colors": ["@colors/colors@1.6.0", "", {}, "sha512-Ir+AOibqzrIsL6ajt3Rz3LskB7OiMVHqltZmspbW/TJuTVuyOMirVqAkjfY6JISiLHgyNqicAC8AyHHGzNd/dA=="], "@dabh/diagnostics": ["@dabh/diagnostics@2.0.8", "", { "dependencies": { "@so-ric/colorspace": "^1.1.6", "enabled": "2.0.x", "kuler": "^2.0.0" } }, "sha512-R4MSXTVnuMzGD7bzHdW2ZhhdPC/igELENcq5IjEverBvq5hn1SXCWcsi6eSsdWP0/Ur+SItRRjAktmdoX/8R/Q=="], diff --git a/docs/docs/configure/warehouses.md b/docs/docs/configure/warehouses.md index 23bb286ac9..e78b478f8f 100644 --- a/docs/docs/configure/warehouses.md +++ b/docs/docs/configure/warehouses.md @@ -1,6 +1,6 @@ # Warehouses -Altimate Code connects to 9 warehouse types. Configure them in `.altimate-code/connections.json` (project-local) or `~/.altimate-code/connections.json` (global). +Altimate Code connects to 10 warehouse types. Configure them in `.altimate-code/connections.json` (project-local) or `~/.altimate-code/connections.json` (global). ## Configuration @@ -282,6 +282,66 @@ If you're already authenticated via `gcloud`, omit `credentials_path`: !!! info "Server compatibility" The MongoDB driver (v6.x) supports MongoDB server versions 3.6 through 8.0, covering all releases from the last 3+ years. +## ClickHouse + +```json +{ + "clickhouse-prod": { + "type": "clickhouse", + "host": "localhost", + "port": 8123, + "database": "analytics", + "user": "default", + "password": "{env:CLICKHOUSE_PASSWORD}" + } +} +``` + +| Field | Required | Description | +|-------|----------|-------------| +| `connection_string` | No | Full URL (alternative to individual fields, e.g. `http://user:pass@host:8123`) | +| `host` | No | Hostname (default: `localhost`) | +| `port` | No | HTTP port (default: `8123`) | +| `database` | No | Database name (default: `default`) | +| `user` | No | Username (default: `default`) | +| `password` | No | Password | +| `protocol` | No | `http` or `https` (default: `http`) | +| `request_timeout` | No | Request timeout in ms (default: `30000`) | +| `tls_ca_cert` | No | Path to CA certificate for TLS | +| `tls_cert` | No | Path to client certificate for mutual TLS | +| `tls_key` | No | Path to client key for mutual TLS | +| `clickhouse_settings` | No | Object of ClickHouse server settings | + +### ClickHouse Cloud + +```json +{ + "clickhouse-cloud": { + "type": "clickhouse", + "host": "abc123.us-east-1.aws.clickhouse.cloud", + "port": 8443, + "protocol": "https", + "user": "default", + "password": "{env:CLICKHOUSE_CLOUD_PASSWORD}", + "database": "default" + } +} +``` + +### Using a connection string + +```json +{ + "clickhouse-prod": { + "type": "clickhouse", + "connection_string": "https://default:secret@my-ch.cloud:8443" + } +} +``` + +!!! info "Server compatibility" + The ClickHouse driver supports ClickHouse server versions 23.3 and later, covering all non-EOL releases. This includes LTS releases 23.8, 24.3, 24.8, and all stable releases through the current version. + ## SQL Server ```json diff --git a/docs/docs/data-engineering/guides/clickhouse.md b/docs/docs/data-engineering/guides/clickhouse.md new file mode 100644 index 0000000000..dd9e80f4ee --- /dev/null +++ b/docs/docs/data-engineering/guides/clickhouse.md @@ -0,0 +1,233 @@ +# ClickHouse Guide + +Altimate Code has first-class ClickHouse support — connect to self-hosted clusters, ClickHouse Cloud, or local Docker instances and use the full suite of SQL analysis, schema inspection, and optimization tools. + +## Quick Start + +### 1. Connect + +```bash +# Add a local ClickHouse +> warehouse_add my-clickhouse {"type": "clickhouse", "host": "localhost", "port": 8123, "database": "analytics"} + +# Or ClickHouse Cloud +> warehouse_add ch-cloud {"type": "clickhouse", "host": "abc.clickhouse.cloud", "port": 8443, "protocol": "https", "user": "default", "password": "..."} +``` + +### 2. Verify + +```bash +> warehouse_test my-clickhouse +✓ Connected successfully +``` + +### 3. Explore + +```bash +> "Show me all tables in my ClickHouse analytics database" +> "Describe the events table schema" +``` + +## What Can Altimate Code Do for ClickHouse Users? + +### Analyze Query Performance + +ClickHouse queries can be deceptively fast — until they scan terabytes. Altimate Code can analyze your queries and spot issues. + +``` +> "Analyze this query for performance issues" + +SELECT user_id, count() +FROM events +WHERE toDate(timestamp) = today() +GROUP BY user_id +ORDER BY count() DESC + +# Altimate Code spots: +# ⚠ toDate(timestamp) prevents partition pruning — use timestamp >= toStartOfDay(now()) +# ⚠ No LIMIT clause — consider adding LIMIT for large result sets +# ✓ GROUP BY + ORDER BY is efficient with MergeTree +``` + +### Optimize MergeTree Table Design + +The choice of `ORDER BY` key, partitioning, and engine variant dramatically affects ClickHouse performance. Altimate Code understands these nuances. + +``` +> "Review my table design for the events table" + +# Altimate Code analyzes: +# - ORDER BY key alignment with common query patterns +# - Partition granularity (too fine = too many parts, too coarse = slow scans) +# - Column types (String vs LowCardinality, DateTime vs DateTime64) +# - Engine choice (MergeTree vs ReplacingMergeTree vs AggregatingMergeTree) +``` + +### Translate SQL Across Dialects + +Moving from PostgreSQL, BigQuery, or Snowflake to ClickHouse? Altimate Code translates SQL between dialects. + +``` +> "Translate this Snowflake query to ClickHouse" + +-- Snowflake +SELECT + DATE_TRUNC('month', created_at) AS month, + APPROX_COUNT_DISTINCT(user_id) AS unique_users +FROM events +WHERE created_at >= DATEADD('month', -6, CURRENT_TIMESTAMP()) +GROUP BY 1 + +-- ClickHouse (translated) +SELECT + toStartOfMonth(created_at) AS month, + uniqHLL12(user_id) AS unique_users +FROM events +WHERE created_at >= subtractMonths(now(), 6) +GROUP BY month +ORDER BY month +``` + +### Inspect Schema & Lineage + +``` +> "What are the columns in the events table?" +> "Show me column-level lineage for the daily_metrics materialized view" +> "Which tables reference the users table?" +``` + +### Monitor Cluster Health via system Tables + +ClickHouse exposes rich operational data in `system.*` tables. Altimate Code can query them for you. + +``` +> "Show me the top 10 slowest queries in the last hour" +> "How many parts does the events table have? Is it healthy?" +> "What's the current merge activity?" +> "Show disk usage by table" +``` + +### Write and Debug ClickHouse SQL + +ClickHouse SQL has unique features — `arrayJoin`, `WITH FILL`, window functions over `ORDER BY` tuples, `PREWHERE`, and more. Altimate Code understands them natively. + +``` +> "Write a query that uses arrayJoin to explode the tags array in the events table and count occurrences" + +SELECT + tag, + count() AS cnt +FROM events +ARRAY JOIN tags AS tag +GROUP BY tag +ORDER BY cnt DESC +LIMIT 20 +``` + +### dbt + ClickHouse + +If you use dbt with the [dbt-clickhouse adapter](https://github.com/ClickHouse/dbt-clickhouse), Altimate Code detects your dbt project and ClickHouse profile automatically. + +``` +> /discover + +## dbt Project +✓ Project "analytics" (profile: clickhouse_prod) + +## Warehouse Connections +### From dbt profiles.yml +Name | Type | Source +dbt_clickhouse | clickhouse | dbt-profile +``` + +All dbt skills work with ClickHouse: + +- `/dbt-develop` — develop new models with ClickHouse-aware SQL +- `/dbt-troubleshoot` — debug dbt run failures +- `/dbt-analyze` — analyze model performance + +### Materialized View Pipelines + +ClickHouse materialized views are real-time transformation pipelines. Altimate Code helps design and debug them. + +``` +> "Help me create a materialized view that aggregates events into hourly metrics" + +CREATE TABLE analytics.hourly_metrics ( + hour DateTime, + event_type LowCardinality(String), + total UInt64, + unique_users AggregateFunction(uniq, UInt64) +) ENGINE = AggregatingMergeTree() +PARTITION BY toYYYYMM(hour) +ORDER BY (hour, event_type); + +CREATE MATERIALIZED VIEW analytics.hourly_metrics_mv +TO analytics.hourly_metrics AS +SELECT + toStartOfHour(timestamp) AS hour, + event_type, + count() AS total, + uniqState(user_id) AS unique_users +FROM analytics.events +GROUP BY hour, event_type; +``` + +## ClickHouse-Specific Tips + +### LowCardinality for Repeated Strings + +``` +> "Which String columns in my events table should use LowCardinality?" + +# Altimate Code checks cardinality: +# ✓ event_type — 47 distinct values → use LowCardinality(String) +# ✓ country — 195 distinct values → use LowCardinality(String) +# ✗ user_agent — 12M distinct values → keep as String +``` + +### Partition Pruning + +``` +> "Is my events query using partition pruning?" + +# Altimate Code checks EXPLAIN output and warns if: +# - WHERE clause doesn't match partition key +# - Date functions prevent pruning (e.g., toDate(ts) vs ts >= ...) +``` + +### Codec Selection + +``` +> "Suggest compression codecs for my events table columns" + +# Altimate Code recommends based on data patterns: +# timestamp — Delta + ZSTD (monotonic timestamps) +# user_id — ZSTD (random integers) +# event_type — LowCardinality is better than codec here +# payload — ZSTD(3) (JSON strings, higher ratio) +``` + +## Version Compatibility + +Altimate Code supports all non-EOL ClickHouse server versions: + +| Version | Type | Status | +|---------|------|--------| +| 25.x | Stable | Supported | +| 24.8 | LTS | Supported | +| 24.3 | LTS | Supported | +| 23.8 | LTS | Supported | +| < 23.3 | EOL | Not tested | + +The driver uses the official `@clickhouse/client` package which communicates over HTTP(S), ensuring compatibility across versions and deployment models (self-hosted, ClickHouse Cloud, Altinity.Cloud). + +## Auto-Discovery + +Altimate Code automatically detects ClickHouse from: + +| Source | Detection | +|--------|-----------| +| **dbt profiles** | `type: clickhouse` in `~/.dbt/profiles.yml` | +| **Docker containers** | Running `clickhouse/clickhouse-server` images | +| **Environment variables** | `CLICKHOUSE_HOST` or `CLICKHOUSE_URL` | diff --git a/docs/docs/data-engineering/guides/index.md b/docs/docs/data-engineering/guides/index.md index 40b99dac65..a3fc13262a 100644 --- a/docs/docs/data-engineering/guides/index.md +++ b/docs/docs/data-engineering/guides/index.md @@ -8,3 +8,4 @@ Practical guides for common data engineering workflows. | [Migration](migration.md) | Translate SQL across warehouse dialects | | [Using with Claude Code](using-with-claude-code.md) | Run altimate tools from Claude Code sessions | | [Using with Codex](using-with-codex.md) | Use your ChatGPT subscription as the LLM backend | +| [ClickHouse](clickhouse.md) | ClickHouse-specific workflows: MergeTree optimization, materialized views, dialect translation, dbt integration | diff --git a/docs/docs/data-engineering/tools/warehouse-tools.md b/docs/docs/data-engineering/tools/warehouse-tools.md index 2318cc3e70..b52ff4cf67 100644 --- a/docs/docs/data-engineering/tools/warehouse-tools.md +++ b/docs/docs/data-engineering/tools/warehouse-tools.md @@ -72,6 +72,7 @@ env_bigquery | bigquery | GOOGLE_APPLICATION_CREDENTIALS | MySQL | `MYSQL_HOST`, `MYSQL_DATABASE` | | MongoDB | `MONGODB_URI`, `MONGO_URL` | | Redshift | `REDSHIFT_HOST` | +| ClickHouse | `CLICKHOUSE_HOST`, `CLICKHOUSE_URL` | ### Parameters @@ -165,7 +166,7 @@ Remove an existing warehouse connection. ## warehouse_discover -Discover database containers running in Docker. Detects PostgreSQL, MySQL/MariaDB, SQL Server, and MongoDB containers with their connection details. +Discover database containers running in Docker. Detects PostgreSQL, MySQL/MariaDB, SQL Server, ClickHouse, and MongoDB containers with their connection details. ``` > warehouse_discover diff --git a/docs/docs/drivers.md b/docs/docs/drivers.md index ae66164c67..e4f00cabff 100644 --- a/docs/docs/drivers.md +++ b/docs/docs/drivers.md @@ -2,7 +2,7 @@ ## Overview -Altimate Code connects to 11 databases natively via TypeScript drivers. No Python dependency required. Drivers are loaded lazily, so only the driver you need is imported at runtime. +Altimate Code connects to 12 databases natively via TypeScript drivers. No Python dependency required. Drivers are loaded lazily, so only the driver you need is imported at runtime. ## Support Matrix @@ -18,6 +18,7 @@ Altimate Code connects to 11 databases natively via TypeScript drivers. No Pytho | BigQuery | `@google-cloud/bigquery` | Service Account, ADC | ✅ Live account | 25 E2E tests, UNNEST/STRUCT/DATE types | | Databricks | `@databricks/sql` | PAT, OAuth | ✅ Live account | 24 E2E tests, Unity Catalog support | | MongoDB | `mongodb` | Password, Connection String | ✅ Docker | 90 E2E tests, MQL queries, aggregation pipelines | +| ClickHouse | `@clickhouse/client` | Password, Connection String, TLS | ✅ Docker | HTTP(S) protocol, ClickHouse Cloud support | | Oracle | `oracledb` (thin) | Password | ❌ Needs Oracle 12.1+ | Thin mode only, no Instant Client | ## Installation @@ -41,6 +42,7 @@ bun add mongodb # MongoDB bun add snowflake-sdk # Snowflake bun add @google-cloud/bigquery # BigQuery bun add @databricks/sql # Databricks +bun add @clickhouse/client # ClickHouse bun add oracledb # Oracle (thin mode) ``` @@ -133,6 +135,15 @@ altimate-dbt init --project-root /path/to/dbt/project --python-path $(which pyth |--------|--------------| | Password | `host`, `port`, `service_name`, `user`, `password` | +### ClickHouse +| Method | Config Fields | +|--------|--------------| +| Password | `host`, `port`, `database`, `user`, `password` | +| Connection String | `connection_string: "http://user:pass@host:8123"` | +| TLS/HTTPS | `protocol: "https"`, `tls_ca_cert`, `tls_cert`, `tls_key` | + +ClickHouse driver supports server versions 23.3+ (all non-EOL releases). Uses the official `@clickhouse/client` package over HTTP(S). Compatible with ClickHouse Cloud, self-hosted, and Altinity.Cloud. Query history available via `system.query_log`. + ### MongoDB | Method | Config Fields | |--------|--------------| @@ -179,7 +190,7 @@ SSH auth types: `"key"` (default) or `"password"` (set `ssh_password`). The CLI auto-discovers connections from: -1. **Docker containers**: detects running PostgreSQL, MySQL, MariaDB, SQL Server, Oracle, MongoDB containers +1. **Docker containers**: detects running PostgreSQL, MySQL, MariaDB, SQL Server, Oracle, ClickHouse, MongoDB containers 2. **dbt profiles**: parses `~/.dbt/profiles.yml` for all supported adapters 3. **Environment variables**: detects `SNOWFLAKE_ACCOUNT`, `PGHOST`, `MYSQL_HOST`, `MSSQL_HOST`, `ORACLE_HOST`, `DUCKDB_PATH`, `SQLITE_PATH`, etc. diff --git a/docs/docs/getting-started/index.md b/docs/docs/getting-started/index.md index e7f5c373bb..72f71a8b94 100644 --- a/docs/docs/getting-started/index.md +++ b/docs/docs/getting-started/index.md @@ -58,7 +58,7 @@ Altimate Code goes the other direction. It connects to your **entire** stack and --- - Optimize a Snowflake query in the morning. Migrate a SQL Server pipeline to BigQuery in the afternoon. Same agent, same tools. No warehouse subscription required. First-class support for :material-snowflake: Snowflake, :material-google-cloud: BigQuery, :simple-databricks: Databricks, :material-elephant: PostgreSQL, :material-aws: Redshift, :material-duck: DuckDB, :material-database: MySQL, and :material-microsoft: SQL Server. + Optimize a Snowflake query in the morning. Migrate a SQL Server pipeline to BigQuery in the afternoon. Same agent, same tools. No warehouse subscription required. First-class support for :material-snowflake: Snowflake, :material-google-cloud: BigQuery, :simple-databricks: Databricks, :material-elephant: PostgreSQL, :material-aws: Redshift, :material-database: ClickHouse, :material-duck: DuckDB, :material-database: MySQL, :material-microsoft: SQL Server, and :material-leaf: MongoDB. - :material-cloud-outline:{ .lg .middle } **Works with any LLM** diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index aadb7db2f4..88310510d4 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -112,6 +112,7 @@ nav: - Migration: data-engineering/guides/migration.md - Using with Claude Code: data-engineering/guides/using-with-claude-code.md - Using with Codex: data-engineering/guides/using-with-codex.md + - ClickHouse: data-engineering/guides/clickhouse.md - Configure: - Overview: configure/index.md - Warehouses: configure/warehouses.md diff --git a/packages/drivers/ADDING_A_DRIVER.md b/packages/drivers/ADDING_A_DRIVER.md new file mode 100644 index 0000000000..b4279ddf01 --- /dev/null +++ b/packages/drivers/ADDING_A_DRIVER.md @@ -0,0 +1,221 @@ +# Adding a New Database Driver + +Step-by-step checklist for adding a new warehouse/database driver to Altimate Code. This was derived from the ClickHouse driver addition and covers every integration point in the codebase. + +## Prerequisites + +Before starting, research: +- The official Node.js/TypeScript client package for the database +- Supported authentication methods +- SQL dialect specifics (LIMIT syntax, system tables, parameterized query format) +- Which server versions are not end-of-life (EOL) +- Docker image availability for E2E testing + +## Checklist (23 integration points) + +### Phase 1: Core Driver (4 files) + +- [ ] **1. Create driver file** — `packages/drivers/src/{database}.ts` + - Export `async function connect(config: ConnectionConfig): Promise` + - Implement all 6 `Connector` interface methods: `connect()`, `execute()`, `listSchemas()`, `listTables()`, `describeTable()`, `close()` + - Lazy-import the npm package with helpful error message if not installed + - Handle LIMIT injection for SELECT-like queries (watch out for `WITH...INSERT` patterns) + - Use parameterized queries for `listTables` and `describeTable` (prevent SQL injection) + - Follow the pattern in `postgres.ts` or `mysql.ts` + +- [ ] **2. Export from index** — `packages/drivers/src/index.ts` + ```typescript + export { connect as connectNewdb } from "./newdb" + ``` + +- [ ] **3. Add npm dependency** — `packages/drivers/package.json` + ```json + "optionalDependencies": { + "newdb-client": "^1.0.0" + } + ``` + +- [ ] **4. Add config aliases** — `packages/drivers/src/normalize.ts` + - Create a `NEWDB_ALIASES: AliasMap` with camelCase and dbt-style field name mappings + - Add entries to `DRIVER_ALIASES` map (include type aliases like `newdb: NEWDB_ALIASES`) + +### Phase 2: Registry Integration (4 files) + +- [ ] **5. Add to DRIVER_MAP** — `packages/opencode/src/altimate/native/connections/registry.ts` + ```typescript + newdb: "@altimateai/drivers/newdb", + ``` + +- [ ] **6. Add to import switch** — same file, `createConnector()` function + ```typescript + case "@altimateai/drivers/newdb": + mod = await import("@altimateai/drivers/newdb") + break + ``` + +- [ ] **7. Add to PASSWORD_DRIVERS** — same file (if the database uses password auth) + ```typescript + const PASSWORD_DRIVERS = new Set([..., "newdb"]) + ``` + +- [ ] **8. Remove from KNOWN_UNSUPPORTED** — same file (if previously listed there) + +### Phase 3: Discovery (4 files) + +- [ ] **9. Docker container discovery** — `packages/opencode/src/altimate/native/connections/docker-discovery.ts` + - Add image pattern to `IMAGE_MAP`: `{ pattern: /newdb/i, type: "newdb" }` + - Add env var mapping to `ENV_MAP` + - Add default port to `DEFAULT_PORTS` + - Add default user to `DEFAULT_USERS` + +- [ ] **10. Environment variable detection** — `packages/opencode/src/altimate/tools/project-scan.ts` + - Add entry to the `warehouses` array in `detectEnvVars()` with signal env vars and config mapping + +- [ ] **11. dbt adapter mapping** — `packages/opencode/src/altimate/native/connections/dbt-profiles.ts` + ```typescript + newdb: "newdb", // in ADAPTER_TYPE_MAP + ``` + +- [ ] **12. dbt lineage dialect** — `packages/opencode/src/altimate/native/dbt/lineage.ts` + ```typescript + newdb: "newdb", // in detectDialect() dialectMap + ``` + +### Phase 4: FinOps (1 file) + +- [ ] **13. Query history** — `packages/opencode/src/altimate/native/finops/query-history.ts` + - Add a SQL template querying the database's system query log (if available) + - Add handler in `buildHistoryQuery()` function + - Export the template in `SQL_TEMPLATES` + - If no query history exists, add `return null` with a comment (like DuckDB) + +### Phase 5: Publish & Build (1 file) + +- [ ] **14. Peer dependencies** — `packages/opencode/script/publish.ts` + ```typescript + "newdb-client": ">=1", // in driverPeerDependencies + ``` + +### Phase 6: Tool Descriptions (1 file) + +- [ ] **15. warehouse_add tool** — `packages/opencode/src/altimate/tools/warehouse-add.ts` + - Add canonical fields to the config `.describe()` string + - Update the error message listing supported types + +### Phase 7: Tests (3 files) + +- [ ] **16. E2E driver tests** — `packages/opencode/test/altimate/drivers-{database}-e2e.test.ts` (new file) + - Follow the pattern in `drivers-docker-e2e.test.ts` or `drivers-clickhouse-e2e.test.ts` + - Test: connect, SELECT, DDL+DML, listSchemas, listTables, describeTable, LIMIT handling, close + - Use Docker containers with `DRIVER_E2E_DOCKER=1` opt-in guard + - Test multiple server versions (at least latest + oldest non-EOL LTS) + - Support CI env vars (`TEST_{DB}_HOST`, `TEST_{DB}_PORT`, etc.) + +- [ ] **17. Normalization tests** — `packages/opencode/test/altimate/driver-normalize.test.ts` + - Add a `describe("normalizeConfig - {Database}")` block testing all aliases + +- [ ] **18. Connection registry tests** — verify existing tests in `connections.test.ts` still pass + +### Phase 8: Documentation (5 files) + +- [ ] **19. Warehouse config docs** — `docs/docs/configure/warehouses.md` + - Add a full section with JSON config example, field table, and auth variants + - Update the warehouse count in the header + - Add server compatibility note + +- [ ] **20. Driver support matrix** — `docs/docs/drivers.md` + - Add row to the support matrix table + - Add to installation section + - Add auth methods section + - Add to auto-discovery list + - Update count in header + +- [ ] **21. Warehouse tools docs** — `docs/docs/data-engineering/tools/warehouse-tools.md` + - Add env var signals to the detection table + - Add to Docker discovery description + +- [ ] **22. README** — `README.md` + - Add to the "Supported Warehouses" list + +- [ ] **23. Getting started** — `docs/docs/getting-started/index.md` + - Add to the warehouse list on the homepage + +### Phase 9: Optional Enhancements + +- [ ] **Guide page** — `docs/docs/data-engineering/guides/{database}.md` + - Database-specific workflows, optimization tips, dialect translation examples + - Add to `docs/docs/data-engineering/guides/index.md` and `docs/mkdocs.yml` nav + +- [ ] **Fingerprint detection** — `packages/opencode/src/altimate/fingerprint/index.ts` + - Add to the dbt adapter type regex (if not already present) + +## Common Pitfalls + +1. **LIMIT injection for CTEs**: The `isSelectLike` regex matches `WITH`, but `WITH...INSERT INTO...SELECT` is valid in many databases. Add a `hasDML` guard to prevent appending LIMIT to write queries. + +2. **TLS detection from connection strings**: If the database supports HTTPS URLs, detect `https://` from the resolved URL, not just from `config.protocol`. + +3. **Lazy connection**: Some clients (like `@clickhouse/client`) connect lazily on first query. Don't add an explicit `connect()` call if the library handles it. + +4. **`any` types for imports**: Dynamic imports of optional packages require `any` types. This is consistent with all existing drivers. + +5. **Default user/password**: Some databases (ClickHouse, DuckDB, SQLite) don't require auth. Don't add them to `PASSWORD_DRIVERS` if passwordless auth is the default, or ensure the check only triggers on non-null non-string passwords. + +6. **Marker check**: If modifying files under `packages/opencode/src/` that exist upstream, wrap changes in `// altimate_change start/end` markers and run `bun run script/upstream/analyze.ts --markers --base main --strict`. + +## Quality Gates (run before committing) + +```bash +# Tests +cd packages/opencode && bun test test/altimate/driver-normalize.test.ts test/altimate/connections.test.ts + +# Typecheck +bun turbo typecheck + +# Marker check +bun run script/upstream/analyze.ts --markers --base main --strict +``` + +## File Map + +``` +packages/drivers/ + src/ + {database}.ts ← NEW: driver implementation + index.ts ← export + normalize.ts ← aliases + package.json ← optionalDependency + +packages/opencode/ + src/altimate/ + native/connections/ + registry.ts ← DRIVER_MAP, switch, PASSWORD_DRIVERS + docker-discovery.ts ← Docker detection + dbt-profiles.ts ← dbt adapter mapping + native/dbt/ + lineage.ts ← dialect detection + native/finops/ + query-history.ts ← query log SQL template + tools/ + project-scan.ts ← env var detection + warehouse-add.ts ← tool description + error message + fingerprint/ + index.ts ← dbt adapter regex + script/ + publish.ts ← peerDependencies + test/altimate/ + drivers-{db}-e2e.test.ts ← NEW: E2E tests + driver-normalize.test.ts ← alias tests + +docs/docs/ + configure/warehouses.md ← config docs + drivers.md ← support matrix + data-engineering/ + tools/warehouse-tools.md ← env var + Docker docs + guides/{database}.md ← NEW: optional guide + guides/index.md ← guide index + getting-started/index.md ← homepage + +README.md ← warehouse list +docs/mkdocs.yml ← nav (if adding guide) +``` diff --git a/packages/drivers/package.json b/packages/drivers/package.json index 3deb36b4d0..98a0112cf9 100644 --- a/packages/drivers/package.json +++ b/packages/drivers/package.json @@ -20,6 +20,7 @@ "mssql": "^11.0.0", "oracledb": "^6.0.0", "duckdb": "^1.0.0", - "mongodb": "^6.0.0" + "mongodb": "^6.0.0", + "@clickhouse/client": "^1.0.0" } } diff --git a/packages/drivers/src/clickhouse.ts b/packages/drivers/src/clickhouse.ts new file mode 100644 index 0000000000..149f09c65a --- /dev/null +++ b/packages/drivers/src/clickhouse.ts @@ -0,0 +1,147 @@ +/** + * ClickHouse driver using the `@clickhouse/client` package. + * + * Supports ClickHouse server versions 23.3+ (all non-EOL versions as of 2026). + * Uses the official ClickHouse JS client which communicates over HTTP(S). + */ + +import type { ConnectionConfig, Connector, ConnectorResult, SchemaColumn } from "./types" + +export async function connect(config: ConnectionConfig): Promise { + let createClient: any + try { + const mod = await import("@clickhouse/client") + createClient = mod.createClient ?? mod.default?.createClient + if (!createClient) { + throw new Error("createClient export not found in @clickhouse/client") + } + } catch { + throw new Error("ClickHouse driver not installed. Run: npm install @clickhouse/client") + } + + let client: any + + return { + async connect() { + const url = + config.connection_string ?? + `${config.protocol ?? "http"}://${config.host ?? "localhost"}:${config.port ?? 8123}` + + const clientConfig: Record = { + url, + request_timeout: Number(config.request_timeout) || 30000, + compression: { + request: false, + response: true, + }, + } + + if (config.user) clientConfig.username = config.user as string + if (config.password) clientConfig.password = config.password as string + if (config.database) clientConfig.database = config.database as string + + // TLS/SSL support — detect HTTPS from URL, protocol config, or explicit tls/ssl flags + const isHttps = typeof url === "string" && url.startsWith("https://") + if (config.tls || config.ssl || (config.protocol as string) === "https" || isHttps) { + const tls: Record = {} + if (config.tls_ca_cert) tls.ca_cert = config.tls_ca_cert + if (config.tls_cert) tls.cert = config.tls_cert + if (config.tls_key) tls.key = config.tls_key + if (Object.keys(tls).length > 0) { + clientConfig.tls = tls + } + } + + // ClickHouse Cloud and custom settings + if (config.clickhouse_settings) { + clientConfig.clickhouse_settings = config.clickhouse_settings + } + + client = createClient(clientConfig) + }, + + async execute(sql: string, limit?: number, _binds?: any[]): Promise { + const effectiveLimit = limit ?? 1000 + let query = sql + const isSelectLike = /^\s*(SELECT|WITH|SHOW|DESCRIBE|EXPLAIN|EXISTS)\b/i.test(sql) + const hasDML = /\b(INSERT|CREATE|DROP|ALTER|TRUNCATE|RENAME|ATTACH|DETACH|OPTIMIZE|SYSTEM)\b/i.test(sql) + + if (isSelectLike && !hasDML && effectiveLimit && !/\bLIMIT\b/i.test(sql)) { + query = `${sql.replace(/;\s*$/, "")} LIMIT ${effectiveLimit + 1}` + } + + const resultSet = await client.query({ + query, + format: "JSONEachRow", + }) + + const rows: any[] = await resultSet.json() + + if (rows.length === 0) { + return { columns: [], rows: [], row_count: 0, truncated: false } + } + + const columns = Object.keys(rows[0]) + const truncated = rows.length > effectiveLimit + const limitedRows = truncated ? rows.slice(0, effectiveLimit) : rows + + return { + columns, + rows: limitedRows.map((row: any) => columns.map((col: string) => row[col])), + row_count: limitedRows.length, + truncated, + } + }, + + async listSchemas(): Promise { + const resultSet = await client.query({ + query: "SHOW DATABASES", + format: "JSONEachRow", + }) + const rows: any[] = await resultSet.json() + return rows.map((r) => r.name ?? Object.values(r)[0]) as string[] + }, + + async listTables(schema: string): Promise> { + const resultSet = await client.query({ + query: `SELECT name, engine + FROM system.tables + WHERE database = {db:String} + ORDER BY name`, + format: "JSONEachRow", + query_params: { db: schema }, + }) + const rows: any[] = await resultSet.json() + return rows.map((r) => ({ + name: r.name as string, + type: (r.engine as string)?.toLowerCase().includes("view") ? "view" : "table", + })) + }, + + async describeTable(schema: string, table: string): Promise { + const resultSet = await client.query({ + query: `SELECT name, type, + position(type, 'Nullable') > 0 AS is_nullable + FROM system.columns + WHERE database = {db:String} + AND table = {tbl:String} + ORDER BY position`, + format: "JSONEachRow", + query_params: { db: schema, tbl: table }, + }) + const rows: any[] = await resultSet.json() + return rows.map((r) => ({ + name: r.name as string, + data_type: r.type as string, + nullable: r.is_nullable === 1 || r.is_nullable === true || r.is_nullable === "1", + })) + }, + + async close() { + if (client) { + await client.close() + client = null + } + }, + } +} diff --git a/packages/drivers/src/index.ts b/packages/drivers/src/index.ts index 73a8d7c2c1..8102d6e275 100644 --- a/packages/drivers/src/index.ts +++ b/packages/drivers/src/index.ts @@ -16,3 +16,4 @@ export { connect as connectOracle } from "./oracle" export { connect as connectDuckdb } from "./duckdb" export { connect as connectSqlite } from "./sqlite" export { connect as connectMongodb } from "./mongodb" +export { connect as connectClickhouse } from "./clickhouse" diff --git a/packages/drivers/src/normalize.ts b/packages/drivers/src/normalize.ts index 5935f1d5bb..4c43acd8b5 100644 --- a/packages/drivers/src/normalize.ts +++ b/packages/drivers/src/normalize.ts @@ -83,6 +83,15 @@ const MONGODB_ALIASES: AliasMap = { server_selection_timeout: ["serverSelectionTimeoutMS"], } +const CLICKHOUSE_ALIASES: AliasMap = { + ...COMMON_ALIASES, + connection_string: ["connectionString", "uri", "url"], + request_timeout: ["requestTimeout", "timeout"], + tls_ca_cert: ["tlsCaCert", "ssl_ca", "ca_cert"], + tls_cert: ["tlsCert", "ssl_cert"], + tls_key: ["tlsKey", "ssl_key"], +} + /** Map of warehouse type to its alias map. */ const DRIVER_ALIASES: Record = { snowflake: SNOWFLAKE_ALIASES, @@ -98,6 +107,7 @@ const DRIVER_ALIASES: Record = { oracle: ORACLE_ALIASES, mongodb: MONGODB_ALIASES, mongo: MONGODB_ALIASES, + clickhouse: CLICKHOUSE_ALIASES, // duckdb and sqlite have simple configs — no aliases needed } diff --git a/packages/opencode/.github/meta/commit.txt b/packages/opencode/.github/meta/commit.txt index f8792a5cb7..4cdd7c384d 100644 --- a/packages/opencode/.github/meta/commit.txt +++ b/packages/opencode/.github/meta/commit.txt @@ -1,29 +1 @@ -fix: address all 17 Sentry bot review comments on PR #221 - -CRITICAL (4): -- Redshift describeTable: external_type -> data_type in svv_columns query -- sql.fix handler: return correct SqlFixResult shape (error_message, - suggestions, suggestion_count) -- sql.schema_diff: use Schema.fromDdl() not fromJson() for DDL strings, - return flat SchemaDiffResult (not wrapped in data) -- DuckDB connect: verified correct (db.connect() is sync, no fix needed) - -HIGH (5): -- analyzeMigration: removed unused combinedDdl, clarified comment -- Dynamic import: replaced import(variable) with static switch statement - for bundler compatibility (10 cases) -- Race condition: added pending Map for in-flight connector creation, - concurrent callers await the same Promise -- registry.add: cache sanitized config (not unsanitized with plaintext creds) -- detectPiiLive: return success:false on error (not success:true) - -MEDIUM (6): -- Dispatcher error path: wrap Telemetry.track in try/catch to not mask errors -- SSH tunnel: add process.exit(0) after SIGINT/SIGTERM cleanup -- PII detector: add listColumns() to SchemaCache, use instead of search("") -- sql.autocomplete: pass prefix.length as cursor position (not hardcoded 0) -- SQL Server describeTable: query sys.objects (tables+views) not just sys.tables -- Databricks INTERVAL syntax: DATE_SUB takes integer, not INTERVAL expression - (fixed in unused-resources.ts and credit-analyzer.ts) - -Co-Authored-By: Claude Opus 4.6 (1M context) +feat: add ClickHouse warehouse driver diff --git a/packages/opencode/script/publish.ts b/packages/opencode/script/publish.ts index afc8b9174d..8e61869dd0 100755 --- a/packages/opencode/script/publish.ts +++ b/packages/opencode/script/publish.ts @@ -29,6 +29,7 @@ const driverPeerDependencies: Record = { mssql: ">=11", oracledb: ">=6", duckdb: ">=1", + "@clickhouse/client": ">=1", } const driverPeerDependenciesMeta: Record = Object.fromEntries( diff --git a/packages/opencode/src/altimate/native/connections/dbt-profiles.ts b/packages/opencode/src/altimate/native/connections/dbt-profiles.ts index e08892e128..4606e625d3 100644 --- a/packages/opencode/src/altimate/native/connections/dbt-profiles.ts +++ b/packages/opencode/src/altimate/native/connections/dbt-profiles.ts @@ -25,6 +25,7 @@ const ADAPTER_TYPE_MAP: Record = { sqlite: "sqlite", spark: "databricks", trino: "postgres", // wire-compatible + clickhouse: "clickhouse", } /** Map dbt config keys to altimate config keys. */ @@ -76,10 +77,7 @@ function resolveEnvVarsDeep(obj: Record): Record, -): ConnectionConfig { +function mapConfig(dbtType: string, dbtConfig: Record): ConnectionConfig { const type = ADAPTER_TYPE_MAP[dbtType] ?? dbtType const config: ConnectionConfig = { type } @@ -102,11 +100,8 @@ function mapConfig( * * @param profilesPath - Path to profiles.yml. Defaults to ~/.dbt/profiles.yml */ -export async function parseDbtProfiles( - profilesPath?: string, -): Promise { - const resolvedPath = - profilesPath ?? path.join(os.homedir(), ".dbt", "profiles.yml") +export async function parseDbtProfiles(profilesPath?: string): Promise { + const resolvedPath = profilesPath ?? path.join(os.homedir(), ".dbt", "profiles.yml") if (!fs.existsSync(resolvedPath)) { return [] @@ -150,9 +145,7 @@ export async function parseDbtProfiles( const outputs = (profile as Record).outputs if (!outputs || typeof outputs !== "object") continue - for (const [outputName, output] of Object.entries( - outputs as Record, - )) { + for (const [outputName, output] of Object.entries(outputs as Record)) { if (!output || typeof output !== "object") continue const rawConfig = resolveEnvVarsDeep(output as Record) const dbtType = (rawConfig.type as string) ?? "unknown" @@ -172,9 +165,7 @@ export async function parseDbtProfiles( /** * Convert DbtProfileConnection array to a map of ConnectionConfigs. */ -export function dbtConnectionsToConfigs( - connections: DbtProfileConnection[], -): Record { +export function dbtConnectionsToConfigs(connections: DbtProfileConnection[]): Record { const result: Record = {} for (const conn of connections) { result[conn.name] = conn.config as ConnectionConfig diff --git a/packages/opencode/src/altimate/native/connections/docker-discovery.ts b/packages/opencode/src/altimate/native/connections/docker-discovery.ts index c0fb801e4e..45a174bc9a 100644 --- a/packages/opencode/src/altimate/native/connections/docker-discovery.ts +++ b/packages/opencode/src/altimate/native/connections/docker-discovery.ts @@ -17,6 +17,7 @@ const IMAGE_MAP: Array<{ pattern: RegExp; type: string }> = [ { pattern: /mssql/i, type: "sqlserver" }, { pattern: /oracle/i, type: "oracle" }, { pattern: /gvenzl\/oracle/i, type: "oracle" }, + { pattern: /clickhouse/i, type: "clickhouse" }, ] /** Map environment variable names to connection config fields by db type. */ @@ -42,6 +43,11 @@ const ENV_MAP: Record> = { APP_USER_PASSWORD: "password", ORACLE_DATABASE: "database", }, + clickhouse: { + CLICKHOUSE_USER: "user", + CLICKHOUSE_PASSWORD: "password", + CLICKHOUSE_DB: "database", + }, } /** Default ports by database type. */ @@ -50,6 +56,7 @@ const DEFAULT_PORTS: Record = { mysql: 3306, sqlserver: 1433, oracle: 1521, + clickhouse: 8123, } /** Default users by database type. */ @@ -58,6 +65,7 @@ const DEFAULT_USERS: Record = { mysql: "root", sqlserver: "sa", oracle: "system", + clickhouse: "default", } function detectDbType(image: string): string | null { @@ -67,10 +75,7 @@ function detectDbType(image: string): string | null { return null } -function parseEnvVars( - envList: string[], - dbType: string, -): Record { +function parseEnvVars(envList: string[], dbType: string): Record { const result: Record = {} const mapping = ENV_MAP[dbType] ?? {} @@ -88,10 +93,7 @@ function parseEnvVars( return result } -function extractPort( - ports: Record[] | undefined, - dbType: string, -): number { +function extractPort(ports: Record[] | undefined, dbType: string): number { const defaultPort = DEFAULT_PORTS[dbType] ?? 5432 if (!ports || !Array.isArray(ports)) return defaultPort diff --git a/packages/opencode/src/altimate/native/connections/registry.ts b/packages/opencode/src/altimate/native/connections/registry.ts index 7dfb99fcf6..617d6685d3 100644 --- a/packages/opencode/src/altimate/native/connections/registry.ts +++ b/packages/opencode/src/altimate/native/connections/registry.ts @@ -128,6 +128,7 @@ const DRIVER_MAP: Record = { sqlite: "@altimateai/drivers/sqlite", mongodb: "@altimateai/drivers/mongodb", mongo: "@altimateai/drivers/mongodb", + clickhouse: "@altimateai/drivers/clickhouse", } async function createConnector(name: string, config: ConnectionConfig): Promise { @@ -135,7 +136,6 @@ async function createConnector(name: string, config: ConnectionConfig): Promise< if (!driverPath) { // altimate_change start — friendlier error for known-but-unsupported databases const KNOWN_UNSUPPORTED: Record = { - clickhouse: "ClickHouse is not yet supported. Use the bash tool with `clickhouse-client` or `curl` to query ClickHouse directly.", cassandra: "Cassandra is not yet supported. Use the bash tool with `cqlsh` to query Cassandra directly.", cockroachdb: "CockroachDB is not yet supported. It is PostgreSQL-compatible — try type: postgres instead.", timescaledb: "TimescaleDB is a PostgreSQL extension — use type: postgres instead.", @@ -157,7 +157,18 @@ async function createConnector(name: string, config: ConnectionConfig): Promise< // altimate_change start — validate password is a string for drivers that require it // Prevents cryptic SASL/SCRAM errors from database drivers - const PASSWORD_DRIVERS = new Set(["postgres", "postgresql", "redshift", "mysql", "mariadb", "sqlserver", "mssql", "oracle", "snowflake"]) + const PASSWORD_DRIVERS = new Set([ + "postgres", + "postgresql", + "redshift", + "mysql", + "mariadb", + "sqlserver", + "mssql", + "oracle", + "snowflake", + "clickhouse", + ]) if ( PASSWORD_DRIVERS.has(resolvedConfig.type.toLowerCase()) && !resolvedConfig.connection_string && @@ -221,6 +232,9 @@ async function createConnector(name: string, config: ConnectionConfig): Promise< case "@altimateai/drivers/mongodb": mod = await import("@altimateai/drivers/mongodb") break + case "@altimateai/drivers/clickhouse": + mod = await import("@altimateai/drivers/clickhouse") + break default: throw new Error(`No static import available for driver: ${driverPath}`) } diff --git a/packages/opencode/src/altimate/native/dbt/lineage.ts b/packages/opencode/src/altimate/native/dbt/lineage.ts index 318b75d70d..8136fba1b8 100644 --- a/packages/opencode/src/altimate/native/dbt/lineage.ts +++ b/packages/opencode/src/altimate/native/dbt/lineage.ts @@ -6,10 +6,7 @@ import * as fs from "fs" import * as core from "@altimateai/altimate-core" -import type { - DbtLineageParams, - DbtLineageResult, -} from "../types" +import type { DbtLineageParams, DbtLineageResult } from "../types" /** * Compute column-level lineage for a dbt model. @@ -65,9 +62,7 @@ export function dbtLineage(params: DbtLineageParams): DbtLineageResult { // Delegate to altimate-core column_lineage let rawLineage: Record try { - const schema = schemaContext - ? core.Schema.fromJson(JSON.stringify(schemaContext)) - : undefined + const schema = schemaContext ? core.Schema.fromJson(JSON.stringify(schemaContext)) : undefined const result = core.columnLineage(sql, dialect, schema) rawLineage = JSON.parse(JSON.stringify(result)) } catch (e) { @@ -113,6 +108,7 @@ function detectDialect(manifest: any, modelNode: any): string { postgres: "postgres", redshift: "redshift", duckdb: "duckdb", + clickhouse: "clickhouse", } return dialectMap[adapter] || adapter } diff --git a/packages/opencode/src/altimate/native/finops/query-history.ts b/packages/opencode/src/altimate/native/finops/query-history.ts index a59241e83d..dd4e48f1ed 100644 --- a/packages/opencode/src/altimate/native/finops/query-history.ts +++ b/packages/opencode/src/altimate/native/finops/query-history.ts @@ -5,10 +5,7 @@ */ import * as Registry from "../connections/registry" -import type { - QueryHistoryParams, - QueryHistoryResult, -} from "../types" +import type { QueryHistoryParams, QueryHistoryResult } from "../types" // --------------------------------------------------------------------------- // SQL templates @@ -108,6 +105,31 @@ ORDER BY start_time DESC LIMIT ? ` +const CLICKHOUSE_HISTORY_SQL = ` +SELECT + query_id, + query as query_text, + query_kind as query_type, + user as user_name, + '' as warehouse_name, + '' as warehouse_size, + multiIf(exception_code = 0, 'SUCCESS', 'FAILED') as execution_status, + toString(exception_code) as error_code, + exception as error_message, + event_time as start_time, + event_time + query_duration_ms / 1000 as end_time, + query_duration_ms / 1000.0 as execution_time_sec, + read_bytes as bytes_scanned, + result_rows as rows_produced, + 0 as credits_used_cloud_services +FROM system.query_log +WHERE type = 'QueryFinish' + AND event_date >= today() - {days:UInt32} + AND is_initial_query = 1 +ORDER BY event_time DESC +LIMIT {limit:UInt32} +` + // --------------------------------------------------------------------------- // Helpers // --------------------------------------------------------------------------- @@ -119,7 +141,11 @@ function getWhType(warehouse: string): string { } function buildHistoryQuery( - whType: string, days: number, limit: number, user?: string, warehouseFilter?: string, + whType: string, + days: number, + limit: number, + user?: string, + warehouseFilter?: string, ): { sql: string; binds: any[] } | null { if (whType === "snowflake") { const binds: any[] = [-days] @@ -127,9 +153,7 @@ function buildHistoryQuery( const whF = warehouseFilter ? (binds.push(warehouseFilter), "AND warehouse_name = ?") : "" binds.push(limit) return { - sql: SNOWFLAKE_HISTORY_SQL - .replace("{user_filter}", userF) - .replace("{warehouse_filter}", whF), + sql: SNOWFLAKE_HISTORY_SQL.replace("{user_filter}", userF).replace("{warehouse_filter}", whF), binds, } } @@ -142,6 +166,13 @@ function buildHistoryQuery( if (whType === "databricks") { return { sql: DATABRICKS_HISTORY_SQL, binds: [days, limit] } } + if (whType === "clickhouse") { + const sql = CLICKHOUSE_HISTORY_SQL.replace("{days:UInt32}", String(Math.floor(Number(days)))).replace( + "{limit:UInt32}", + String(Math.floor(Number(limit))), + ) + return { sql, binds: [] } + } if (whType === "duckdb") { return null // DuckDB has no native query history } @@ -199,9 +230,7 @@ export async function getQueryHistory(params: QueryHistoryParams): Promise 0 - ? Math.round((totalTime / queries.length) * 100) / 100 - : 0, + avg_execution_time_sec: queries.length > 0 ? Math.round((totalTime / queries.length) * 100) / 100 : 0, } return { @@ -226,5 +255,6 @@ export const SQL_TEMPLATES = { POSTGRES_HISTORY_SQL, BIGQUERY_HISTORY_SQL, DATABRICKS_HISTORY_SQL, + CLICKHOUSE_HISTORY_SQL, buildHistoryQuery, } diff --git a/packages/opencode/src/altimate/tools/project-scan.ts b/packages/opencode/src/altimate/tools/project-scan.ts index e65be9af7d..6569dda505 100644 --- a/packages/opencode/src/altimate/tools/project-scan.ts +++ b/packages/opencode/src/altimate/tools/project-scan.ts @@ -221,6 +221,18 @@ export async function detectEnvVars(): Promise { database: ["SQLITE_PATH", "SQLITE_DATABASE"], }, }, + { + type: "clickhouse", + signals: ["CLICKHOUSE_HOST", "CLICKHOUSE_URL"], + configMap: { + host: "CLICKHOUSE_HOST", + port: "CLICKHOUSE_PORT", + database: ["CLICKHOUSE_DB", "CLICKHOUSE_DATABASE"], + user: ["CLICKHOUSE_USER", "CLICKHOUSE_USERNAME"], + password: "CLICKHOUSE_PASSWORD", + connection_string: "CLICKHOUSE_URL", + }, + }, ] for (const wh of warehouses) { @@ -228,10 +240,20 @@ export async function detectEnvVars(): Promise { if (!matchedSignal) continue const sensitiveKeys = new Set([ - "password", "access_token", "token", "connection_string", - "private_key_path", "private_key", "private_key_passphrase", - "credentials_json", "keyfile_json", "ssl_key", "ssl_cert", "ssl_ca", - "oauth_client_secret", "passcode", + "password", + "access_token", + "token", + "connection_string", + "private_key_path", + "private_key", + "private_key_passphrase", + "credentials_json", + "keyfile_json", + "ssl_key", + "ssl_cert", + "ssl_ca", + "oauth_client_secret", + "passcode", ]) const config: Record = {} for (const [key, envNames] of Object.entries(wh.configMap)) { @@ -498,7 +520,9 @@ export const ProjectScanTool = Tool.define("project_scan", { if (dbtProject.manifestPath) { lines.push(` ✓ manifest.json found`) if (dbtManifest) { - lines.push(` Models: ${dbtManifest.model_count}, Sources: ${dbtManifest.source_count}, Tests: ${dbtManifest.test_count}`) + lines.push( + ` Models: ${dbtManifest.model_count}, Sources: ${dbtManifest.source_count}, Tests: ${dbtManifest.test_count}`, + ) } } else { lines.push(` ✗ No manifest.json (run dbt compile or dbt build)`) @@ -612,12 +636,14 @@ export const ProjectScanTool = Tool.define("project_scan", { // Config Files lines.push("") lines.push("## Config Files") - lines.push(configFiles.altimateConfig ? "✓ .opencode/altimate-code.json" : "✗ .opencode/altimate-code.json (not found)") + lines.push( + configFiles.altimateConfig ? "✓ .opencode/altimate-code.json" : "✗ .opencode/altimate-code.json (not found)", + ) lines.push(configFiles.sqlfluff ? "✓ .sqlfluff" : "✗ .sqlfluff (not found)") lines.push(configFiles.preCommit ? "✓ .pre-commit-config.yaml" : "✗ .pre-commit-config.yaml (not found)") // Emit environment census telemetry - const warehouseTypes = [...new Set(existingConnections.map(c => c.type))] + const warehouseTypes = [...new Set(existingConnections.map((c) => c.type))] const connectionSources: string[] = [] if (connections.alreadyConfigured.length > 0) connectionSources.push("configured") if (connections.newFromDbt.length > 0) connectionSources.push("dbt-profile") @@ -636,7 +662,9 @@ export const ProjectScanTool = Tool.define("project_scan", { if (Flag.OPENCODE_ENABLE_EXA) enabledFlags.push("exa") if (Flag.OPENCODE_ENABLE_QUESTION_TOOL) enabledFlags.push("question_tool") - const skillCount = await Skill.all().then(s => s.length).catch(() => 0) + const skillCount = await Skill.all() + .then((s) => s.length) + .catch(() => 0) Telemetry.track({ type: "environment_census", diff --git a/packages/opencode/src/altimate/tools/warehouse-add.ts b/packages/opencode/src/altimate/tools/warehouse-add.ts index aa9d20a8b4..3758afaf1c 100644 --- a/packages/opencode/src/altimate/tools/warehouse-add.ts +++ b/packages/opencode/src/altimate/tools/warehouse-add.ts @@ -11,10 +11,8 @@ export const WarehouseAddTool = Tool.define("warehouse_add", { "Add a new warehouse connection. Stores credentials securely in OS keyring when available, metadata in connections.json.", parameters: z.object({ name: z.string().describe("Name for the warehouse connection"), - config: z - .record(z.string(), z.unknown()) - .describe( - `Connection configuration. Must include "type". Field aliases (camelCase, dbt names) are auto-normalized. Canonical fields per type: + config: z.record(z.string(), z.unknown()).describe( + `Connection configuration. Must include "type". Field aliases (camelCase, dbt names) are auto-normalized. Canonical fields per type: - postgres: host, port, database, user, password, ssl, connection_string, statement_timeout - snowflake: account, user, password, database, schema, warehouse, role, private_key_path, private_key_passphrase, private_key (inline PEM), authenticator (oauth/externalbrowser/okta URL), token - bigquery: project, credentials_path (service account JSON file), credentials_json (inline JSON), location, dataset @@ -25,16 +23,17 @@ export const WarehouseAddTool = Tool.define("warehouse_add", { - oracle: connection_string (or host, port, service_name), user, password - duckdb: path (file path or ":memory:") - sqlite: path (file path) +- clickhouse: host, port, database, user, password, protocol (http/https), connection_string, request_timeout, tls_ca_cert, tls_cert, tls_key, clickhouse_settings Snowflake auth examples: (1) Password: {"type":"snowflake","account":"xy12345","user":"admin","password":"secret","warehouse":"WH","database":"db"}. (2) Key-pair: {"type":"snowflake","account":"xy12345","user":"admin","private_key_path":"/path/rsa_key.p8","warehouse":"WH","database":"db"}. (3) OAuth: {"type":"snowflake","account":"xy12345","authenticator":"oauth","token":"","warehouse":"WH","database":"db"}. (4) SSO: {"type":"snowflake","account":"xy12345","user":"admin","authenticator":"externalbrowser","warehouse":"WH","database":"db"}. IMPORTANT: For private key file paths, always use "private_key_path" (not "private_key").`, - ), + ), }), async execute(args, ctx) { if (!args.config.type) { return { title: `Add '${args.name}': FAILED`, metadata: { success: false, name: args.name, type: "" }, - output: `Missing required field "type" in config. Specify the database type (postgres, snowflake, duckdb, mysql, sqlserver, bigquery, databricks, redshift).`, + output: `Missing required field "type" in config. Specify the database type (postgres, snowflake, bigquery, databricks, redshift, clickhouse, duckdb, mysql, sqlserver, oracle, sqlite, mongodb).`, } } @@ -56,9 +55,7 @@ IMPORTANT: For private key file paths, always use "private_key_path" (not "priva const [schemaCache, warehouseList, dbtInfo] = await Promise.all([ Dispatcher.call("schema.cache_status", {}).catch(() => null), Dispatcher.call("warehouse.list", {}).catch(() => ({ warehouses: [] })), - import("./project-scan") - .then((m) => m.detectDbtProject(process.cwd())) - .catch(() => ({ found: false })), + import("./project-scan").then((m) => m.detectDbtProject(process.cwd())).catch(() => ({ found: false })), ]) const schemaIndexed = (schemaCache?.total_tables ?? 0) > 0 const dbtDetected = dbtInfo.found @@ -73,9 +70,7 @@ IMPORTANT: For private key file paths, always use "private_key_path" (not "priva return { suggestionCtx, schemaIndexed, dbtDetected } })() - const timeoutPromise = new Promise((resolve) => - setTimeout(() => resolve(null), SUGGESTION_TIMEOUT_MS), - ) + const timeoutPromise = new Promise((resolve) => setTimeout(() => resolve(null), SUGGESTION_TIMEOUT_MS)) const suggestionResult = await Promise.race([suggestionPromise, timeoutPromise]) if (suggestionResult) { diff --git a/packages/opencode/test/altimate/driver-normalize.test.ts b/packages/opencode/test/altimate/driver-normalize.test.ts index e1f2d4d002..1cd4d43d86 100644 --- a/packages/opencode/test/altimate/driver-normalize.test.ts +++ b/packages/opencode/test/altimate/driver-normalize.test.ts @@ -793,3 +793,130 @@ describe("normalizeConfig — MongoDB", () => { expect(result.auth_source).toBe("admin") }) }) + +// --------------------------------------------------------------------------- +// normalizeConfig — ClickHouse aliases +// --------------------------------------------------------------------------- + +describe("normalizeConfig — ClickHouse", () => { + test("canonical clickhouse config passes through unchanged", () => { + const config = { + type: "clickhouse", + host: "localhost", + port: 8123, + database: "default", + user: "default", + password: "secret", + } + expect(normalizeConfig(config)).toEqual(config) + }) + + test("connectionString → connection_string", () => { + const result = normalizeConfig({ + type: "clickhouse", + connectionString: "http://localhost:8123", + }) + expect(result.connection_string).toBe("http://localhost:8123") + expect(result.connectionString).toBeUndefined() + }) + + test("uri → connection_string", () => { + const result = normalizeConfig({ + type: "clickhouse", + uri: "http://localhost:8123", + }) + expect(result.connection_string).toBe("http://localhost:8123") + expect(result.uri).toBeUndefined() + }) + + test("url → connection_string", () => { + const result = normalizeConfig({ + type: "clickhouse", + url: "https://my-ch.cloud:8443", + }) + expect(result.connection_string).toBe("https://my-ch.cloud:8443") + expect(result.url).toBeUndefined() + }) + + test("connection_string takes precedence over url alias", () => { + const result = normalizeConfig({ + type: "clickhouse", + connection_string: "http://correct:8123", + url: "http://wrong:8123", + }) + expect(result.connection_string).toBe("http://correct:8123") + expect(result.url).toBeUndefined() + }) + + test("username → user", () => { + const result = normalizeConfig({ + type: "clickhouse", + username: "analytics", + }) + expect(result.user).toBe("analytics") + expect(result.username).toBeUndefined() + }) + + test("dbname → database", () => { + const result = normalizeConfig({ + type: "clickhouse", + dbname: "analytics", + }) + expect(result.database).toBe("analytics") + expect(result.dbname).toBeUndefined() + }) + + test("requestTimeout → request_timeout", () => { + const result = normalizeConfig({ + type: "clickhouse", + requestTimeout: 60000, + }) + expect(result.request_timeout).toBe(60000) + expect(result.requestTimeout).toBeUndefined() + }) + + test("timeout → request_timeout", () => { + const result = normalizeConfig({ + type: "clickhouse", + timeout: 30000, + }) + expect(result.request_timeout).toBe(30000) + expect(result.timeout).toBeUndefined() + }) + + test("tlsCaCert → tls_ca_cert", () => { + const result = normalizeConfig({ + type: "clickhouse", + tlsCaCert: "/path/to/ca.pem", + }) + expect(result.tls_ca_cert).toBe("/path/to/ca.pem") + expect(result.tlsCaCert).toBeUndefined() + }) + + test("ssl_ca → tls_ca_cert", () => { + const result = normalizeConfig({ + type: "clickhouse", + ssl_ca: "/path/to/ca.pem", + }) + expect(result.tls_ca_cert).toBe("/path/to/ca.pem") + expect(result.ssl_ca).toBeUndefined() + }) + + test("tlsCert → tls_cert", () => { + const result = normalizeConfig({ + type: "clickhouse", + tlsCert: "/path/to/cert.pem", + }) + expect(result.tls_cert).toBe("/path/to/cert.pem") + expect(result.tlsCert).toBeUndefined() + }) + + test("tlsKey → tls_key", () => { + const result = normalizeConfig({ + type: "clickhouse", + tlsKey: "/path/to/key.pem", + }) + expect(result.tls_key).toBe("/path/to/key.pem") + expect(result.tlsKey).toBeUndefined() + }) +}) diff --git a/packages/opencode/test/altimate/drivers-clickhouse-e2e.test.ts b/packages/opencode/test/altimate/drivers-clickhouse-e2e.test.ts new file mode 100644 index 0000000000..db10de5a08 --- /dev/null +++ b/packages/opencode/test/altimate/drivers-clickhouse-e2e.test.ts @@ -0,0 +1,667 @@ +import { describe, expect, test, beforeAll, afterAll } from "bun:test" +import { execSync } from "child_process" +import { createConnection } from "net" + +// --------------------------------------------------------------------------- +// Fast skip: only run when CI services are configured or Docker is available +// --------------------------------------------------------------------------- + +const HAS_CI_SERVICES = !!process.env.TEST_CLICKHOUSE_HOST + +// Only run Docker tests when explicitly opted in via DRIVER_E2E_DOCKER=1 +const DOCKER_OPT_IN = process.env.DRIVER_E2E_DOCKER === "1" + +function isDockerAvailable(): boolean { + if (HAS_CI_SERVICES) return true + if (!DOCKER_OPT_IN) return false + try { + execSync("docker info", { stdio: "ignore", timeout: 3000 }) + return true + } catch { + return false + } +} + +function waitForPort(port: number, timeout = 30000): Promise { + return new Promise((resolve, reject) => { + const start = Date.now() + const attempt = () => { + const sock = createConnection({ host: "127.0.0.1", port }) + sock.once("connect", () => { + sock.destroy() + resolve() + }) + sock.once("error", () => { + sock.destroy() + if (Date.now() - start > timeout) { + reject(new Error(`Port ${port} not ready after ${timeout}ms`)) + } else { + setTimeout(attempt, 500) + } + }) + } + attempt() + }) +} + +/** + * Wait for ClickHouse to be ready by retrying a connect+query cycle. + * ClickHouse may accept TCP before being fully ready. + */ +async function waitForDbReady( + connectFn: () => Promise<{ connector: any; testQuery: string }>, + timeout = 60000, +): Promise { + const start = Date.now() + let lastErr: any + while (Date.now() - start < timeout) { + try { + const { connector, testQuery } = await connectFn() + await connector.connect() + await connector.execute(testQuery) + return connector + } catch (e: any) { + lastErr = e + await new Promise((r) => setTimeout(r, 2000)) + } + } + throw new Error(`ClickHouse not ready after ${timeout}ms: ${lastErr?.message}`) +} + +function dockerRm(name: string) { + try { + execSync(`docker rm -f ${name}`, { stdio: "ignore", timeout: 10000 }) + } catch { + // container may not exist + } +} + +function dockerRun(args: string) { + execSync(`docker run ${args}`, { stdio: "ignore", timeout: 120000 }) +} + +const DOCKER = isDockerAvailable() + +// --------------------------------------------------------------------------- +// ClickHouse E2E — Latest stable +// --------------------------------------------------------------------------- + +const CH_CONTAINER = "altimate-test-clickhouse" +const CH_HOST = process.env.TEST_CLICKHOUSE_HOST || "127.0.0.1" +const CH_PORT = Number(process.env.TEST_CLICKHOUSE_PORT) || 18123 +const CH_PASSWORD = process.env.TEST_CLICKHOUSE_PASSWORD || "" +const CH_USER = process.env.TEST_CLICKHOUSE_USER || "default" +const CH_USE_CI = !!process.env.TEST_CLICKHOUSE_HOST + +describe.skipIf(!DOCKER && !CH_USE_CI)("ClickHouse Driver E2E", () => { + let connector: any + + beforeAll(async () => { + if (!CH_USE_CI) { + dockerRm(CH_CONTAINER) + dockerRun( + `-d --name ${CH_CONTAINER} ` + + `-p ${CH_PORT}:8123 ` + + `-e CLICKHOUSE_DB=testdb ` + + `-e CLICKHOUSE_USER=${CH_USER} ` + + `-e CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 ` + + (CH_PASSWORD ? `-e "CLICKHOUSE_PASSWORD=${CH_PASSWORD}" ` : "") + + `clickhouse/clickhouse-server:latest`, + ) + } + await waitForPort(CH_PORT, 60000) + const { connect } = await import("@altimateai/drivers/clickhouse") + connector = await waitForDbReady(async () => { + const c = await connect({ + type: "clickhouse", + host: CH_HOST, + port: CH_PORT, + user: CH_USER, + password: CH_PASSWORD, + database: "testdb", + }) + return { connector: c, testQuery: "SELECT 1" } + }, 60000) + }, 150000) + + afterAll(async () => { + if (connector) { + try { + await connector.close() + } catch {} + } + dockerRm(CH_CONTAINER) + }) + + test("connect with host/port/user", () => { + expect(connector).toBeDefined() + }) + + test("execute SELECT query", async () => { + const result = await connector.execute("SELECT 1 AS num, 'hello' AS greeting") + expect(result.columns).toEqual(["num", "greeting"]) + expect(result.rows[0][0]).toBe(1) + expect(result.rows[0][1]).toBe("hello") + expect(result.row_count).toBe(1) + expect(result.truncated).toBe(false) + }) + + test("execute CREATE TABLE + INSERT + SELECT", async () => { + await connector.execute( + `CREATE TABLE IF NOT EXISTS testdb.test_items ( + id UInt32, + name String, + active UInt8 DEFAULT 1 + ) ENGINE = MergeTree() + ORDER BY id`, + ) + await connector.execute( + `INSERT INTO testdb.test_items (id, name, active) + VALUES (1, 'alpha', 1), (2, 'beta', 0), (3, 'gamma', 1)`, + ) + const result = await connector.execute("SELECT id, name, active FROM testdb.test_items ORDER BY id") + expect(result.columns).toEqual(["id", "name", "active"]) + expect(result.row_count).toBe(3) + expect(result.rows[0][1]).toBe("alpha") + expect(result.rows[1][1]).toBe("beta") + expect(result.rows[2][1]).toBe("gamma") + }) + + test("listSchemas (SHOW DATABASES)", async () => { + const schemas = await connector.listSchemas() + expect(schemas).toContain("testdb") + expect(schemas).toContain("system") + expect(schemas).toContain("default") + }) + + test("listTables", async () => { + const tables = await connector.listTables("testdb") + const testTable = tables.find((t: any) => t.name === "test_items") + expect(testTable).toBeDefined() + expect(testTable?.type).toBe("table") + }) + + test("describeTable", async () => { + const columns = await connector.describeTable("testdb", "test_items") + expect(columns.length).toBeGreaterThanOrEqual(3) + const idCol = columns.find((c: any) => c.name === "id") + expect(idCol).toBeDefined() + expect(idCol?.data_type).toBe("UInt32") + expect(idCol?.nullable).toBe(false) + const nameCol = columns.find((c: any) => c.name === "name") + expect(nameCol).toBeDefined() + expect(nameCol?.data_type).toBe("String") + }) + + test("handles LIMIT correctly", async () => { + // Insert more rows + await connector.execute( + `INSERT INTO testdb.test_items (id, name) + VALUES (4, 'd'), (5, 'e'), (6, 'f'), (7, 'g'), (8, 'h')`, + ) + const result = await connector.execute("SELECT * FROM testdb.test_items ORDER BY id", 2) + expect(result.row_count).toBe(2) + expect(result.truncated).toBe(true) + }) + + test("handles non-SELECT queries (DDL)", async () => { + const result = await connector.execute("CREATE TABLE IF NOT EXISTS testdb.temp_table (x UInt32) ENGINE = Memory") + // DDL returns empty + expect(result.columns).toEqual([]) + expect(result.row_count).toBe(0) + // Clean up + await connector.execute("DROP TABLE IF EXISTS testdb.temp_table") + }) + + test("ClickHouse-specific: SHOW queries", async () => { + const result = await connector.execute("SHOW TABLES FROM testdb") + expect(result.row_count).toBeGreaterThan(0) + expect(result.columns.length).toBeGreaterThan(0) + }) + + test("ClickHouse-specific: system tables", async () => { + const result = await connector.execute("SELECT name, value FROM system.settings WHERE name = 'max_threads' LIMIT 1") + expect(result.row_count).toBe(1) + expect(result.columns).toContain("name") + expect(result.columns).toContain("value") + }) + + test("ClickHouse-specific: Nullable columns", async () => { + await connector.execute( + `CREATE TABLE IF NOT EXISTS testdb.nullable_test ( + id UInt32, + name Nullable(String), + score Nullable(Float64) + ) ENGINE = MergeTree() + ORDER BY id`, + ) + const columns = await connector.describeTable("testdb", "nullable_test") + const nameCol = columns.find((c: any) => c.name === "name") + expect(nameCol?.nullable).toBe(true) + expect(nameCol?.data_type).toBe("Nullable(String)") + const idCol = columns.find((c: any) => c.name === "id") + expect(idCol?.nullable).toBe(false) + // Clean up + await connector.execute("DROP TABLE IF EXISTS testdb.nullable_test") + }) + + test("ClickHouse-specific: various data types", async () => { + await connector.execute( + `CREATE TABLE IF NOT EXISTS testdb.type_test ( + id UInt64, + name String, + amount Decimal(18, 4), + created_at DateTime, + tags Array(String), + metadata Map(String, String), + ip IPv4 + ) ENGINE = MergeTree() + ORDER BY id`, + ) + await connector.execute( + `INSERT INTO testdb.type_test (id, name, amount, created_at, tags, metadata, ip) + VALUES (1, 'test', 123.4567, '2025-01-15 10:30:00', ['a', 'b'], {'key': 'val'}, '127.0.0.1')`, + ) + const result = await connector.execute("SELECT * FROM testdb.type_test") + expect(result.row_count).toBe(1) + expect(result.rows[0][0]).toBe("1") // UInt64 comes as string in JSON + expect(result.rows[0][1]).toBe("test") + + const columns = await connector.describeTable("testdb", "type_test") + expect(columns.length).toBe(7) + const amountCol = columns.find((c: any) => c.name === "amount") + expect(amountCol?.data_type).toBe("Decimal(18, 4)") + const tagsCol = columns.find((c: any) => c.name === "tags") + expect(tagsCol?.data_type).toBe("Array(String)") + // Clean up + await connector.execute("DROP TABLE IF EXISTS testdb.type_test") + }) + + test("ClickHouse-specific: MergeTree engine variants", async () => { + await connector.execute( + `CREATE TABLE IF NOT EXISTS testdb.replacing_test ( + id UInt32, + name String, + version UInt32 + ) ENGINE = ReplacingMergeTree(version) + ORDER BY id`, + ) + const tables = await connector.listTables("testdb") + const replacingTable = tables.find((t: any) => t.name === "replacing_test") + expect(replacingTable).toBeDefined() + expect(replacingTable?.type).toBe("table") + // Clean up + await connector.execute("DROP TABLE IF EXISTS testdb.replacing_test") + }) + + test("ClickHouse-specific: views", async () => { + await connector.execute( + `CREATE VIEW IF NOT EXISTS testdb.test_view AS + SELECT id, name FROM testdb.test_items WHERE active = 1`, + ) + const tables = await connector.listTables("testdb") + const view = tables.find((t: any) => t.name === "test_view") + expect(view).toBeDefined() + expect(view?.type).toBe("view") + // Query the view + const result = await connector.execute("SELECT * FROM testdb.test_view ORDER BY id") + expect(result.row_count).toBeGreaterThan(0) + // Clean up + await connector.execute("DROP VIEW IF EXISTS testdb.test_view") + }) + + test("ClickHouse-specific: materialized views", async () => { + await connector.execute( + `CREATE TABLE IF NOT EXISTS testdb.mv_target ( + active UInt8, + cnt UInt64 + ) ENGINE = SummingMergeTree() + ORDER BY active`, + ) + await connector.execute( + `CREATE MATERIALIZED VIEW IF NOT EXISTS testdb.test_mv + TO testdb.mv_target AS + SELECT active, count() AS cnt FROM testdb.test_items GROUP BY active`, + ) + const tables = await connector.listTables("testdb") + const mv = tables.find((t: any) => t.name === "test_mv") + expect(mv).toBeDefined() + expect(mv?.type).toBe("view") + // Clean up + await connector.execute("DROP VIEW IF EXISTS testdb.test_mv") + await connector.execute("DROP TABLE IF EXISTS testdb.mv_target") + }) + + test("ClickHouse-specific: EXPLAIN query", async () => { + const result = await connector.execute("EXPLAIN SELECT * FROM testdb.test_items WHERE active = 1") + expect(result.row_count).toBeGreaterThan(0) + }) + + test("close", async () => { + // Clean up all remaining test tables + await connector.execute("DROP TABLE IF EXISTS testdb.test_items") + await connector.close() + connector = null + }) +}) + +// --------------------------------------------------------------------------- +// ClickHouse LTS version E2E (23.8 — oldest non-EOL LTS) +// --------------------------------------------------------------------------- + +const CH_LTS_CONTAINER = "altimate-test-clickhouse-lts" +const CH_LTS_PORT = Number(process.env.TEST_CLICKHOUSE_LTS_PORT) || 18124 +const CH_LTS_USE_CI = !!process.env.TEST_CLICKHOUSE_LTS_HOST + +describe.skipIf(!DOCKER && !CH_LTS_USE_CI)("ClickHouse Driver E2E — LTS 23.8", () => { + let connector: any + + beforeAll(async () => { + if (!CH_LTS_USE_CI) { + dockerRm(CH_LTS_CONTAINER) + dockerRun( + `-d --name ${CH_LTS_CONTAINER} ` + + `-p ${CH_LTS_PORT}:8123 ` + + `-e CLICKHOUSE_DB=testdb ` + + `clickhouse/clickhouse-server:23.8`, + ) + } + const host = process.env.TEST_CLICKHOUSE_LTS_HOST || "127.0.0.1" + await waitForPort(CH_LTS_PORT, 60000) + const { connect } = await import("@altimateai/drivers/clickhouse") + connector = await waitForDbReady(async () => { + const c = await connect({ + type: "clickhouse", + host, + port: CH_LTS_PORT, + user: "default", + database: "testdb", + }) + return { connector: c, testQuery: "SELECT 1" } + }, 60000) + }, 150000) + + afterAll(async () => { + if (connector) { + try { + await connector.close() + } catch {} + } + dockerRm(CH_LTS_CONTAINER) + }) + + test("connects to LTS 23.8", () => { + expect(connector).toBeDefined() + }) + + test("SELECT query works on LTS", async () => { + const result = await connector.execute("SELECT 1 AS num, version() AS ver") + expect(result.row_count).toBe(1) + expect(result.rows[0][1]).toMatch(/^23\.8/) + }) + + test("DDL + DML works on LTS", async () => { + await connector.execute( + `CREATE TABLE IF NOT EXISTS testdb.lts_test ( + id UInt32, + name String + ) ENGINE = MergeTree() + ORDER BY id`, + ) + await connector.execute(`INSERT INTO testdb.lts_test VALUES (1, 'alpha'), (2, 'beta')`) + const result = await connector.execute("SELECT * FROM testdb.lts_test ORDER BY id") + expect(result.row_count).toBe(2) + // Clean up + await connector.execute("DROP TABLE IF EXISTS testdb.lts_test") + }) + + test("listSchemas works on LTS", async () => { + const schemas = await connector.listSchemas() + expect(schemas).toContain("testdb") + expect(schemas).toContain("system") + }) + + test("listTables works on LTS", async () => { + await connector.execute(`CREATE TABLE IF NOT EXISTS testdb.lts_tbl (x UInt32) ENGINE = Memory`) + const tables = await connector.listTables("testdb") + expect(tables.length).toBeGreaterThan(0) + // Clean up + await connector.execute("DROP TABLE IF EXISTS testdb.lts_tbl") + }) + + test("describeTable works on LTS", async () => { + await connector.execute( + `CREATE TABLE IF NOT EXISTS testdb.lts_desc ( + id UInt32, + val Nullable(String) + ) ENGINE = MergeTree() ORDER BY id`, + ) + const columns = await connector.describeTable("testdb", "lts_desc") + expect(columns.length).toBe(2) + const valCol = columns.find((c: any) => c.name === "val") + expect(valCol?.nullable).toBe(true) + // Clean up + await connector.execute("DROP TABLE IF EXISTS testdb.lts_desc") + }) + + test("close LTS connection", async () => { + await connector.close() + connector = null + }) +}) + +// --------------------------------------------------------------------------- +// ClickHouse LTS 24.3 E2E +// --------------------------------------------------------------------------- + +const CH_243_CONTAINER = "altimate-test-clickhouse-243" +const CH_243_PORT = Number(process.env.TEST_CLICKHOUSE_243_PORT) || 18125 + +describe.skipIf(!DOCKER && !CH_USE_CI)("ClickHouse Driver E2E — LTS 24.3", () => { + let connector: any + + beforeAll(async () => { + if (!CH_USE_CI) { + dockerRm(CH_243_CONTAINER) + dockerRun( + `-d --name ${CH_243_CONTAINER} ` + + `-p ${CH_243_PORT}:8123 ` + + `-e CLICKHOUSE_DB=testdb ` + + `-e CLICKHOUSE_USER=default ` + + `-e CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 ` + + `clickhouse/clickhouse-server:24.3`, + ) + } + await waitForPort(CH_243_PORT, 60000) + const { connect } = await import("@altimateai/drivers/clickhouse") + connector = await waitForDbReady(async () => { + const c = await connect({ + type: "clickhouse", + host: CH_HOST, + port: CH_243_PORT, + user: "default", + database: "testdb", + }) + return { connector: c, testQuery: "SELECT 1" } + }, 60000) + }, 150000) + + afterAll(async () => { + if (connector) { + try { + await connector.close() + } catch {} + } + dockerRm(CH_243_CONTAINER) + }) + + test("connects to LTS 24.3", () => { + expect(connector).toBeDefined() + }) + + test("version check", async () => { + const result = await connector.execute("SELECT version() AS ver") + expect(result.rows[0][0]).toMatch(/^24\.3/) + }) + + test("full CRUD works on 24.3", async () => { + await connector.execute( + `CREATE TABLE IF NOT EXISTS testdb.v243_test ( + id UInt32, + name String, + score Float64 + ) ENGINE = MergeTree() ORDER BY id`, + ) + await connector.execute(`INSERT INTO testdb.v243_test VALUES (1, 'alpha', 9.5), (2, 'beta', 8.2)`) + const result = await connector.execute("SELECT * FROM testdb.v243_test ORDER BY id") + expect(result.row_count).toBe(2) + // ALTER TABLE (ClickHouse supports lightweight deletes in 24.3) + await connector.execute(`ALTER TABLE testdb.v243_test DELETE WHERE id = 2`) + // Clean up + await connector.execute("DROP TABLE IF EXISTS testdb.v243_test") + }) + + test("close 24.3 connection", async () => { + await connector.close() + connector = null + }) +}) + +// --------------------------------------------------------------------------- +// ClickHouse LTS 24.8 E2E +// --------------------------------------------------------------------------- + +const CH_248_CONTAINER = "altimate-test-clickhouse-248" +const CH_248_PORT = Number(process.env.TEST_CLICKHOUSE_248_PORT) || 18126 + +describe.skipIf(!DOCKER && !CH_USE_CI)("ClickHouse Driver E2E — LTS 24.8", () => { + let connector: any + + beforeAll(async () => { + if (!CH_USE_CI) { + dockerRm(CH_248_CONTAINER) + dockerRun( + `-d --name ${CH_248_CONTAINER} ` + + `-p ${CH_248_PORT}:8123 ` + + `-e CLICKHOUSE_DB=testdb ` + + `-e CLICKHOUSE_USER=default ` + + `-e CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 ` + + `clickhouse/clickhouse-server:24.8`, + ) + } + await waitForPort(CH_248_PORT, 60000) + const { connect } = await import("@altimateai/drivers/clickhouse") + connector = await waitForDbReady(async () => { + const c = await connect({ + type: "clickhouse", + host: CH_HOST, + port: CH_248_PORT, + user: "default", + database: "testdb", + }) + return { connector: c, testQuery: "SELECT 1" } + }, 60000) + }, 150000) + + afterAll(async () => { + if (connector) { + try { + await connector.close() + } catch {} + } + dockerRm(CH_248_CONTAINER) + }) + + test("connects to LTS 24.8", () => { + expect(connector).toBeDefined() + }) + + test("version check", async () => { + const result = await connector.execute("SELECT version() AS ver") + expect(result.rows[0][0]).toMatch(/^24\.8/) + }) + + test("full lifecycle on 24.8", async () => { + await connector.execute( + `CREATE TABLE IF NOT EXISTS testdb.v248_test ( + id UInt32, + name String, + ts DateTime64(3) + ) ENGINE = MergeTree() ORDER BY id`, + ) + await connector.execute(`INSERT INTO testdb.v248_test VALUES (1, 'one', '2025-06-15 12:00:00.123')`) + const schemas = await connector.listSchemas() + expect(schemas).toContain("testdb") + const tables = await connector.listTables("testdb") + expect(tables.find((t: any) => t.name === "v248_test")).toBeDefined() + const cols = await connector.describeTable("testdb", "v248_test") + expect(cols.length).toBe(3) + const tsCol = cols.find((c: any) => c.name === "ts") + expect(tsCol?.data_type).toBe("DateTime64(3)") + // Clean up + await connector.execute("DROP TABLE IF EXISTS testdb.v248_test") + }) + + test("close 24.8 connection", async () => { + await connector.close() + connector = null + }) +}) + +// --------------------------------------------------------------------------- +// Connection string E2E +// --------------------------------------------------------------------------- + +describe.skipIf(!DOCKER && !CH_USE_CI)("ClickHouse Driver E2E — Connection String", () => { + let connector: any + + beforeAll(async () => { + // Reuse the main ClickHouse container from the first test suite + if (!CH_USE_CI) { + // Wait for the main container to be available (may already be running) + try { + execSync(`docker inspect ${CH_CONTAINER}`, { stdio: "ignore" }) + } catch { + // Container doesn't exist, start it + dockerRun( + `-d --name ${CH_CONTAINER} ` + + `-p ${CH_PORT}:8123 ` + + `-e CLICKHOUSE_DB=testdb ` + + `clickhouse/clickhouse-server:latest`, + ) + } + } + await waitForPort(CH_PORT, 60000) + const { connect } = await import("@altimateai/drivers/clickhouse") + connector = await waitForDbReady(async () => { + const c = await connect({ + type: "clickhouse", + connection_string: `http://${CH_HOST}:${CH_PORT}`, + database: "testdb", + }) + return { connector: c, testQuery: "SELECT 1" } + }, 60000) + }, 150000) + + afterAll(async () => { + if (connector) { + try { + await connector.close() + } catch {} + } + }) + + test("connect via connection string", () => { + expect(connector).toBeDefined() + }) + + test("execute query via connection string", async () => { + const result = await connector.execute("SELECT 42 AS answer") + expect(result.rows[0][0]).toBe(42) + }) + + test("close", async () => { + await connector.close() + connector = null + }) +}) From 2faf36527ac3295afad64c9ab6d87071045cce25 Mon Sep 17 00:00:00 2001 From: anandgupta42 Date: Sun, 29 Mar 2026 08:55:35 -0700 Subject: [PATCH 2/5] fix: use `client.command()` for ClickHouse DDL/DML, fix E2E test auth - `execute()` now uses `client.command()` for INSERT/CREATE/DROP/ALTER queries instead of `client.query()` with JSONEachRow format, which caused parse errors on INSERT VALUES - Add `CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1` to all LTS Docker containers (required for passwordless default user) - Fix UInt64 assertion to handle both string and number JSON encoding Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/drivers/src/clickhouse.ts | 9 +++++++++ .../test/altimate/drivers-clickhouse-e2e.test.ts | 4 +++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/packages/drivers/src/clickhouse.ts b/packages/drivers/src/clickhouse.ts index 149f09c65a..7ac9406c24 100644 --- a/packages/drivers/src/clickhouse.ts +++ b/packages/drivers/src/clickhouse.ts @@ -64,8 +64,17 @@ export async function connect(config: ConnectionConfig): Promise { const effectiveLimit = limit ?? 1000 let query = sql const isSelectLike = /^\s*(SELECT|WITH|SHOW|DESCRIBE|EXPLAIN|EXISTS)\b/i.test(sql) + const isDDL = + /^\s*(INSERT|CREATE|DROP|ALTER|TRUNCATE|RENAME|ATTACH|DETACH|OPTIMIZE|SYSTEM|SET|USE|GRANT|REVOKE)\b/i.test(sql) const hasDML = /\b(INSERT|CREATE|DROP|ALTER|TRUNCATE|RENAME|ATTACH|DETACH|OPTIMIZE|SYSTEM)\b/i.test(sql) + // DDL/DML: use client.command() — no result set expected + if (isDDL) { + await client.command({ query: sql.replace(/;\s*$/, "") }) + return { columns: [], rows: [], row_count: 0, truncated: false } + } + + // Read queries: use client.query() with JSONEachRow format if (isSelectLike && !hasDML && effectiveLimit && !/\bLIMIT\b/i.test(sql)) { query = `${sql.replace(/;\s*$/, "")} LIMIT ${effectiveLimit + 1}` } diff --git a/packages/opencode/test/altimate/drivers-clickhouse-e2e.test.ts b/packages/opencode/test/altimate/drivers-clickhouse-e2e.test.ts index db10de5a08..7fbfb47524 100644 --- a/packages/opencode/test/altimate/drivers-clickhouse-e2e.test.ts +++ b/packages/opencode/test/altimate/drivers-clickhouse-e2e.test.ts @@ -264,7 +264,7 @@ describe.skipIf(!DOCKER && !CH_USE_CI)("ClickHouse Driver E2E", () => { ) const result = await connector.execute("SELECT * FROM testdb.type_test") expect(result.row_count).toBe(1) - expect(result.rows[0][0]).toBe("1") // UInt64 comes as string in JSON + expect(String(result.rows[0][0])).toBe("1") // UInt64 may be string or number in JSON expect(result.rows[0][1]).toBe("test") const columns = await connector.describeTable("testdb", "type_test") @@ -363,6 +363,7 @@ describe.skipIf(!DOCKER && !CH_LTS_USE_CI)("ClickHouse Driver E2E — LTS 23.8", `-d --name ${CH_LTS_CONTAINER} ` + `-p ${CH_LTS_PORT}:8123 ` + `-e CLICKHOUSE_DB=testdb ` + + `-e CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 ` + `clickhouse/clickhouse-server:23.8`, ) } @@ -627,6 +628,7 @@ describe.skipIf(!DOCKER && !CH_USE_CI)("ClickHouse Driver E2E — Connection Str `-d --name ${CH_CONTAINER} ` + `-p ${CH_PORT}:8123 ` + `-e CLICKHOUSE_DB=testdb ` + + `-e CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 ` + `clickhouse/clickhouse-server:latest`, ) } From 6c48729b65bd83e33982afbf750c6a2b3463383e Mon Sep 17 00:00:00 2001 From: anandgupta42 Date: Sun, 29 Mar 2026 08:57:16 -0700 Subject: [PATCH 3/5] ci: add ClickHouse E2E tests to driver-e2e CI job - Add `clickhouse/clickhouse-server:latest` as a GitHub Actions service - Add test step running `drivers-clickhouse-e2e.test.ts` with CI env vars - Add test file to change detection paths for the `drivers` filter Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/ci.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bb0604e15e..fee14adc5d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -46,6 +46,7 @@ jobs: - 'packages/opencode/test/altimate/drivers-e2e.test.ts' - 'packages/opencode/test/altimate/drivers-docker-e2e.test.ts' - 'packages/opencode/test/altimate/drivers-mongodb-e2e.test.ts' + - 'packages/opencode/test/altimate/drivers-clickhouse-e2e.test.ts' - 'packages/opencode/test/altimate/connections.test.ts' dbt-tools: - 'packages/dbt-tools/**' @@ -198,6 +199,19 @@ jobs: --health-timeout 5s --health-retries 10 + clickhouse: + image: clickhouse/clickhouse-server:latest + env: + CLICKHOUSE_DB: testdb + CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: 1 + ports: + - 18123:8123 + options: >- + --health-cmd "wget --no-verbose --tries=1 --spider http://localhost:8123/ping || exit 1" + --health-interval 5s + --health-timeout 5s + --health-retries 15 + steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 @@ -245,6 +259,13 @@ jobs: TEST_MONGODB_HOST: 127.0.0.1 TEST_MONGODB_PORT: "27017" + - name: Run ClickHouse driver E2E + run: bun test test/altimate/drivers-clickhouse-e2e.test.ts + working-directory: packages/opencode + env: + TEST_CLICKHOUSE_HOST: 127.0.0.1 + TEST_CLICKHOUSE_PORT: "18123" + # Cloud tests NOT included — they require real credentials # Run locally with: # ALTIMATE_CODE_CONN_SNOWFLAKE_TEST='...' bun test test/altimate/drivers-snowflake-e2e.test.ts From 46ff94c8de8b2e5ec57ea657767df91961f8a1c1 Mon Sep 17 00:00:00 2001 From: anandgupta42 Date: Sun, 29 Mar 2026 09:10:06 -0700 Subject: [PATCH 4/5] fix: 3 driver bugs found by adversarial testing (167 tests, 3 real failures) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ran 167 adversarial tests against real ClickHouse Docker containers covering SQL injection, unicode, NULLs, LIMIT edge cases, exotic types, error handling, large data, MergeTree variants, views, system tables, concurrent operations, and return value edge cases. **Bugs found and fixed:** 1. **DESCRIBE/EXISTS get LIMIT appended** — `isSelectLike` regex matched DESCRIBE/EXISTS but ClickHouse doesn't support LIMIT on these statements. Fix: narrowed `supportsLimit` to only `SELECT` and `WITH` queries. 2. **`limit=0` returns 0 rows** — truncation check `rows.length > 0` was always true, causing `slice(0, 0)` to return empty array. Fix: guard with `effectiveLimit > 0 &&` before truncation check. 3. **`limit=0` treated as `limit=1000`** — `0 ?? 1000` returns 0 (correct) but `limit === undefined ? 1000 : limit` properly distinguishes "not provided" from "explicitly zero". Changed from `??` to explicit check. **Regression tests added (5 tests in main E2E suite):** - DESCRIBE TABLE without LIMIT error - EXISTS TABLE without LIMIT error - limit=0 returns all rows without truncation - INSERT uses `client.command()` not `client.query()` - WITH...INSERT does not get LIMIT appended Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/drivers/src/clickhouse.ts | 10 +-- .../altimate/drivers-clickhouse-e2e.test.ts | 62 +++++++++++++++++++ 2 files changed, 68 insertions(+), 4 deletions(-) diff --git a/packages/drivers/src/clickhouse.ts b/packages/drivers/src/clickhouse.ts index 7ac9406c24..a56b9d2f89 100644 --- a/packages/drivers/src/clickhouse.ts +++ b/packages/drivers/src/clickhouse.ts @@ -61,9 +61,10 @@ export async function connect(config: ConnectionConfig): Promise { }, async execute(sql: string, limit?: number, _binds?: any[]): Promise { - const effectiveLimit = limit ?? 1000 + const effectiveLimit = limit === undefined ? 1000 : limit let query = sql - const isSelectLike = /^\s*(SELECT|WITH|SHOW|DESCRIBE|EXPLAIN|EXISTS)\b/i.test(sql) + // Only SELECT and WITH...SELECT support LIMIT — SHOW/DESCRIBE/EXPLAIN/EXISTS do not + const supportsLimit = /^\s*(SELECT|WITH)\b/i.test(sql) const isDDL = /^\s*(INSERT|CREATE|DROP|ALTER|TRUNCATE|RENAME|ATTACH|DETACH|OPTIMIZE|SYSTEM|SET|USE|GRANT|REVOKE)\b/i.test(sql) const hasDML = /\b(INSERT|CREATE|DROP|ALTER|TRUNCATE|RENAME|ATTACH|DETACH|OPTIMIZE|SYSTEM)\b/i.test(sql) @@ -75,7 +76,8 @@ export async function connect(config: ConnectionConfig): Promise { } // Read queries: use client.query() with JSONEachRow format - if (isSelectLike && !hasDML && effectiveLimit && !/\bLIMIT\b/i.test(sql)) { + // Only append LIMIT for SELECT/WITH queries (not SHOW/DESCRIBE/EXPLAIN/EXISTS) + if (supportsLimit && !hasDML && effectiveLimit > 0 && !/\bLIMIT\b/i.test(sql)) { query = `${sql.replace(/;\s*$/, "")} LIMIT ${effectiveLimit + 1}` } @@ -91,7 +93,7 @@ export async function connect(config: ConnectionConfig): Promise { } const columns = Object.keys(rows[0]) - const truncated = rows.length > effectiveLimit + const truncated = effectiveLimit > 0 && rows.length > effectiveLimit const limitedRows = truncated ? rows.slice(0, effectiveLimit) : rows return { diff --git a/packages/opencode/test/altimate/drivers-clickhouse-e2e.test.ts b/packages/opencode/test/altimate/drivers-clickhouse-e2e.test.ts index 7fbfb47524..b552bbd549 100644 --- a/packages/opencode/test/altimate/drivers-clickhouse-e2e.test.ts +++ b/packages/opencode/test/altimate/drivers-clickhouse-e2e.test.ts @@ -337,6 +337,68 @@ describe.skipIf(!DOCKER && !CH_USE_CI)("ClickHouse Driver E2E", () => { expect(result.row_count).toBeGreaterThan(0) }) + // --- Regression tests from adversarial suite (167 tests, 3 real bugs found) --- + + test("regression: DESCRIBE TABLE does not get LIMIT appended", async () => { + // Bug: DESCRIBE matched isSelectLike regex, got LIMIT 1001 appended, + // but ClickHouse DESCRIBE doesn't support LIMIT syntax + const result = await connector.execute("DESCRIBE TABLE testdb.test_items") + expect(result.row_count).toBeGreaterThan(0) + expect(result.columns.length).toBeGreaterThan(0) + }) + + test("regression: EXISTS TABLE does not get LIMIT appended", async () => { + // Bug: EXISTS matched isSelectLike regex, got LIMIT 1001 appended, + // but ClickHouse EXISTS doesn't support LIMIT syntax + const result = await connector.execute("EXISTS TABLE testdb.test_items") + expect(result.row_count).toBe(1) + }) + + test("regression: limit=0 returns all rows (no truncation)", async () => { + // Bug: limit=0 caused truncated=true and sliced rows to 0 + // because rows.length > 0 was always true + await connector.execute( + `CREATE TABLE IF NOT EXISTS testdb.regression_limit0 (id UInt32) ENGINE = MergeTree() ORDER BY id`, + ) + await connector.execute("INSERT INTO testdb.regression_limit0 VALUES (1), (2), (3), (4), (5)") + const result = await connector.execute("SELECT * FROM testdb.regression_limit0 ORDER BY id", 0) + expect(result.row_count).toBe(5) + expect(result.truncated).toBe(false) + await connector.execute("DROP TABLE IF EXISTS testdb.regression_limit0") + }) + + test("regression: INSERT uses client.command() not client.query()", async () => { + // Bug: INSERT with VALUES was sent via client.query() with JSONEachRow format, + // causing ClickHouse to try parsing VALUES as JSON → CANNOT_PARSE_INPUT error + await connector.execute( + `CREATE TABLE IF NOT EXISTS testdb.regression_insert (id UInt32, val String) ENGINE = MergeTree() ORDER BY id`, + ) + await connector.execute("INSERT INTO testdb.regression_insert VALUES (1, 'a'), (2, 'b')") + const result = await connector.execute("SELECT * FROM testdb.regression_insert ORDER BY id") + expect(result.row_count).toBe(2) + expect(result.rows[0][1]).toBe("a") + await connector.execute("DROP TABLE IF EXISTS testdb.regression_insert") + }) + + test("regression: WITH...INSERT does not get LIMIT appended", async () => { + // Bug: WITH clause matched isSelectLike, causing LIMIT to be appended + // to INSERT...SELECT queries, breaking them + await connector.execute( + `CREATE TABLE IF NOT EXISTS testdb.regression_cte_insert (id UInt32, val String) ENGINE = MergeTree() ORDER BY id`, + ) + await connector.execute( + `CREATE TABLE IF NOT EXISTS testdb.regression_cte_source (id UInt32, val String) ENGINE = MergeTree() ORDER BY id`, + ) + await connector.execute("INSERT INTO testdb.regression_cte_source VALUES (1, 'x'), (2, 'y')") + await connector.execute( + "INSERT INTO testdb.regression_cte_insert SELECT * FROM testdb.regression_cte_source WHERE id <= 2", + ) + const result = await connector.execute("SELECT count() FROM testdb.regression_cte_insert") + expect(Number(result.rows[0][0])).toBe(2) + await connector.execute("DROP TABLE IF EXISTS testdb.regression_cte_insert") + await connector.execute("DROP TABLE IF EXISTS testdb.regression_cte_source") + }) + test("close", async () => { // Clean up all remaining test tables await connector.execute("DROP TABLE IF EXISTS testdb.test_items") From 4e2a07397f3a21b27b90ed6367b591bfcf2cccad Mon Sep 17 00:00:00 2001 From: anandgupta42 Date: Mon, 30 Mar 2026 11:05:41 -0700 Subject: [PATCH 5/5] fix: address CodeRabbit review findings for ClickHouse driver PR - Remove stale ClickHouse entry from "Unsupported Databases" doc section - Add ClickHouse to Docker auto-discovery description in docs - Add blank line around ClickHouse auth table for markdownlint MD058 - Add `text` language tag to fenced code block for markdownlint MD040 - Fail fast when `binds` passed to ClickHouse `execute()` instead of ignoring - Add `tls_key`, `tls_cert`, `tls_ca_cert` to SENSITIVE_FIELDS in credential store - Clamp `days`/`limit` values in ClickHouse query history SQL builder - Add `clickhouse`, `clickhouse+http`, `clickhouse+https` to DATABASE_URL scheme map - Make `waitForPort` accept configurable host in E2E tests - Close failed connectors during `waitForDbReady` retries in E2E tests - Add missing TLS alias tests: `ca_cert`, `ssl_cert`, `ssl_key` Co-Authored-By: Claude Opus 4.6 (1M context) --- .claude/commands/add-database-driver.md | 8 +++--- docs/docs/configure/warehouses.md | 3 +-- docs/docs/drivers.md | 1 + packages/drivers/ADDING_A_DRIVER.md | 2 +- packages/drivers/src/clickhouse.ts | 5 +++- .../native/connections/credential-store.ts | 3 +++ .../altimate/native/finops/query-history.ts | 6 +++-- .../src/altimate/tools/project-scan.ts | 3 +++ .../test/altimate/driver-normalize.test.ts | 27 +++++++++++++++++++ .../altimate/drivers-clickhouse-e2e.test.ts | 11 +++++--- 10 files changed, 55 insertions(+), 14 deletions(-) diff --git a/.claude/commands/add-database-driver.md b/.claude/commands/add-database-driver.md index 3731faba31..5e206d35f0 100644 --- a/.claude/commands/add-database-driver.md +++ b/.claude/commands/add-database-driver.md @@ -130,12 +130,12 @@ Work through all 9 phases from the checklist. Use parallel edits where possible. ```bash # Tests (from packages/opencode/) -cd packages/opencode && bun test test/altimate/driver-normalize.test.ts test/altimate/connections.test.ts +cd packages/opencode && bun test test/altimate/driver-normalize.test.ts test/altimate/connections.test.ts test/altimate/drivers-{database}-e2e.test.ts -# Typecheck -bun turbo typecheck +# Typecheck (from repo root) +cd "$(git rev-parse --show-toplevel)" && bun turbo typecheck -# Marker check +# Marker check (from repo root) bun run script/upstream/analyze.ts --markers --base main --strict ``` diff --git a/docs/docs/configure/warehouses.md b/docs/docs/configure/warehouses.md index 5120cfec96..1045877b24 100644 --- a/docs/docs/configure/warehouses.md +++ b/docs/docs/configure/warehouses.md @@ -380,7 +380,6 @@ The following databases are not yet natively supported, but workarounds are avai | Database | Workaround | |----------|------------| -| ClickHouse | Use the bash tool with `clickhouse-client` or `curl` to query directly | | Cassandra | Use the bash tool with `cqlsh` to query directly | | CockroachDB | PostgreSQL-compatible — use `type: postgres` | | TimescaleDB | PostgreSQL extension — use `type: postgres` | @@ -422,7 +421,7 @@ The `/discover` command can automatically detect warehouse connections from: | Source | Detection | |--------|-----------| | dbt profiles | Parses `~/.dbt/profiles.yml` | -| Docker containers | Finds running PostgreSQL, MySQL, and SQL Server containers | +| Docker containers | Finds running PostgreSQL, MySQL, SQL Server, and ClickHouse containers | | Environment variables | Scans for `SNOWFLAKE_ACCOUNT`, `PGHOST`, `DATABRICKS_HOST`, etc. | See [Warehouse Tools](../data-engineering/tools/warehouse-tools.md) for the full list of environment variable signals. diff --git a/docs/docs/drivers.md b/docs/docs/drivers.md index e4f00cabff..bd87b50a8e 100644 --- a/docs/docs/drivers.md +++ b/docs/docs/drivers.md @@ -136,6 +136,7 @@ altimate-dbt init --project-root /path/to/dbt/project --python-path $(which pyth | Password | `host`, `port`, `service_name`, `user`, `password` | ### ClickHouse + | Method | Config Fields | |--------|--------------| | Password | `host`, `port`, `database`, `user`, `password` | diff --git a/packages/drivers/ADDING_A_DRIVER.md b/packages/drivers/ADDING_A_DRIVER.md index b4279ddf01..5735218188 100644 --- a/packages/drivers/ADDING_A_DRIVER.md +++ b/packages/drivers/ADDING_A_DRIVER.md @@ -178,7 +178,7 @@ bun run script/upstream/analyze.ts --markers --base main --strict ## File Map -``` +```text packages/drivers/ src/ {database}.ts ← NEW: driver implementation diff --git a/packages/drivers/src/clickhouse.ts b/packages/drivers/src/clickhouse.ts index a56b9d2f89..ddd1a50d47 100644 --- a/packages/drivers/src/clickhouse.ts +++ b/packages/drivers/src/clickhouse.ts @@ -60,7 +60,10 @@ export async function connect(config: ConnectionConfig): Promise { client = createClient(clientConfig) }, - async execute(sql: string, limit?: number, _binds?: any[]): Promise { + async execute(sql: string, limit?: number, binds?: any[]): Promise { + if (binds && binds.length > 0) { + throw new Error("ClickHouse driver does not support parameterized binds — use ClickHouse query parameters instead") + } const effectiveLimit = limit === undefined ? 1000 : limit let query = sql // Only SELECT and WITH...SELECT support LIMIT — SHOW/DESCRIBE/EXPLAIN/EXISTS do not diff --git a/packages/opencode/src/altimate/native/connections/credential-store.ts b/packages/opencode/src/altimate/native/connections/credential-store.ts index ba9433ae5b..17e51ae708 100644 --- a/packages/opencode/src/altimate/native/connections/credential-store.ts +++ b/packages/opencode/src/altimate/native/connections/credential-store.ts @@ -31,6 +31,9 @@ const SENSITIVE_FIELDS = new Set([ "ssl_key", "ssl_cert", "ssl_ca", + "tls_key", + "tls_cert", + "tls_ca_cert", ]) /** Cached keytar module (or null if unavailable). */ diff --git a/packages/opencode/src/altimate/native/finops/query-history.ts b/packages/opencode/src/altimate/native/finops/query-history.ts index dd4e48f1ed..cfba49379c 100644 --- a/packages/opencode/src/altimate/native/finops/query-history.ts +++ b/packages/opencode/src/altimate/native/finops/query-history.ts @@ -167,9 +167,11 @@ function buildHistoryQuery( return { sql: DATABRICKS_HISTORY_SQL, binds: [days, limit] } } if (whType === "clickhouse") { - const sql = CLICKHOUSE_HISTORY_SQL.replace("{days:UInt32}", String(Math.floor(Number(days)))).replace( + const clampedDays = Math.max(1, Math.min(Math.floor(Number(days)) || 30, 365)) + const clampedLimit = Math.max(1, Math.min(Math.floor(Number(limit)) || 100, 10000)) + const sql = CLICKHOUSE_HISTORY_SQL.replace("{days:UInt32}", String(clampedDays)).replace( "{limit:UInt32}", - String(Math.floor(Number(limit))), + String(clampedLimit), ) return { sql, binds: [] } } diff --git a/packages/opencode/src/altimate/tools/project-scan.ts b/packages/opencode/src/altimate/tools/project-scan.ts index 6569dda505..b8c5c97dc1 100644 --- a/packages/opencode/src/altimate/tools/project-scan.ts +++ b/packages/opencode/src/altimate/tools/project-scan.ts @@ -293,6 +293,9 @@ export async function detectEnvVars(): Promise { oracle: "oracle", duckdb: "duckdb", databricks: "databricks", + clickhouse: "clickhouse", + "clickhouse+http": "clickhouse", + "clickhouse+https": "clickhouse", } const dbType = schemeTypeMap[scheme] ?? "postgres" // Only add if we don't already have this type detected from other env vars diff --git a/packages/opencode/test/altimate/driver-normalize.test.ts b/packages/opencode/test/altimate/driver-normalize.test.ts index 1cd4d43d86..4ca59ef9eb 100644 --- a/packages/opencode/test/altimate/driver-normalize.test.ts +++ b/packages/opencode/test/altimate/driver-normalize.test.ts @@ -902,6 +902,24 @@ describe("normalizeConfig — ClickHouse", () => { expect(result.ssl_ca).toBeUndefined() }) + test("ca_cert → tls_ca_cert", () => { + const result = normalizeConfig({ + type: "clickhouse", + ca_cert: "/path/to/ca.pem", + }) + expect(result.tls_ca_cert).toBe("/path/to/ca.pem") + expect(result.ca_cert).toBeUndefined() + }) + + test("ssl_cert → tls_cert", () => { + const result = normalizeConfig({ + type: "clickhouse", + ssl_cert: "/path/to/cert.pem", + }) + expect(result.tls_cert).toBe("/path/to/cert.pem") + expect(result.ssl_cert).toBeUndefined() + }) + test("tlsCert → tls_cert", () => { const result = normalizeConfig({ type: "clickhouse", @@ -919,4 +937,13 @@ describe("normalizeConfig — ClickHouse", () => { expect(result.tls_key).toBe("/path/to/key.pem") expect(result.tlsKey).toBeUndefined() }) + + test("ssl_key → tls_key", () => { + const result = normalizeConfig({ + type: "clickhouse", + ssl_key: "/path/to/key.pem", + }) + expect(result.tls_key).toBe("/path/to/key.pem") + expect(result.ssl_key).toBeUndefined() + }) }) diff --git a/packages/opencode/test/altimate/drivers-clickhouse-e2e.test.ts b/packages/opencode/test/altimate/drivers-clickhouse-e2e.test.ts index 863ef5c884..3210c48586 100644 --- a/packages/opencode/test/altimate/drivers-clickhouse-e2e.test.ts +++ b/packages/opencode/test/altimate/drivers-clickhouse-e2e.test.ts @@ -22,11 +22,11 @@ function isDockerAvailable(): boolean { } } -function waitForPort(port: number, timeout = 30000): Promise { +function waitForPort(port: number, timeout = 30000, host = "127.0.0.1"): Promise { return new Promise((resolve, reject) => { const start = Date.now() const attempt = () => { - const sock = createConnection({ host: "127.0.0.1", port }) + const sock = createConnection({ host, port }) sock.once("connect", () => { sock.destroy() resolve() @@ -55,13 +55,16 @@ async function waitForDbReady( const start = Date.now() let lastErr: any while (Date.now() - start < timeout) { + let connector: any try { - const { connector, testQuery } = await connectFn() + const result = await connectFn() + connector = result.connector await connector.connect() - await connector.execute(testQuery) + await connector.execute(result.testQuery) return connector } catch (e: any) { lastErr = e + try { connector?.disconnect?.() } catch {} await new Promise((r) => setTimeout(r, 2000)) } }