From e4f1c8376dfd78e2ad6162581b45d3ed0ddba6e1 Mon Sep 17 00:00:00 2001 From: Rich Hankins Date: Fri, 30 Jan 2026 00:20:10 -0800 Subject: [PATCH 1/8] Add URL parser module for detecting source type from URLs - Parse GitHub URLs (owner/repo, tree/branch, commit/sha) - Parse GitLab URLs (project path, subgroups, /-/tree/branch) - Parse Bitbucket URLs (workspace/repo, src/branch, branch/name) - Fallback to website source for unknown URLs - Extract default index names from URLs - Support self-hosted GitLab and Bitbucket instances - Export parseSourceUrl and ParsedUrl from @augmentcode/context-connectors/core - Add comprehensive unit tests (19 test cases) Agent-Id: agent-8394bd07-7a81-41d0-ac95-1ca62623e6fb --- src/core/index.ts | 3 + src/core/url-parser.test.ts | 163 ++++++++++++++++++++++++++++++++++ src/core/url-parser.ts | 171 ++++++++++++++++++++++++++++++++++++ 3 files changed, 337 insertions(+) create mode 100644 src/core/url-parser.test.ts create mode 100644 src/core/url-parser.ts diff --git a/src/core/index.ts b/src/core/index.ts index 61bbb28..7f360d7 100644 --- a/src/core/index.ts +++ b/src/core/index.ts @@ -26,3 +26,6 @@ export type { ClientProduct, MCPClientInfo } from "./utils.js"; export { Indexer } from "./indexer.js"; export type { IndexerConfig } from "./indexer.js"; +export { parseSourceUrl } from "./url-parser.js"; +export type { ParsedUrl } from "./url-parser.js"; + diff --git a/src/core/url-parser.test.ts b/src/core/url-parser.test.ts new file mode 100644 index 0000000..67e753a --- /dev/null +++ b/src/core/url-parser.test.ts @@ -0,0 +1,163 @@ +import { describe, it, expect } from "vitest"; +import { parseSourceUrl } from "./url-parser.js"; + +describe("parseSourceUrl", () => { + describe("GitHub URLs", () => { + it("parses basic github.com URL", () => { + const result = parseSourceUrl("https://github.com/owner/repo"); + expect(result.type).toBe("github"); + expect(result.config).toEqual({ owner: "owner", repo: "repo", ref: "HEAD" }); + expect(result.defaultIndexName).toBe("repo"); + }); + + it("parses GitHub URL with tree/branch", () => { + const result = parseSourceUrl("https://github.com/owner/repo/tree/main"); + expect(result.type).toBe("github"); + expect(result.config).toEqual({ owner: "owner", repo: "repo", ref: "main" }); + expect(result.defaultIndexName).toBe("repo"); + }); + + it("parses GitHub URL with tree/feature/branch (slashes in branch name)", () => { + const result = parseSourceUrl("https://github.com/owner/repo/tree/feature/branch"); + expect(result.type).toBe("github"); + expect(result.config).toEqual({ owner: "owner", repo: "repo", ref: "feature/branch" }); + expect(result.defaultIndexName).toBe("repo"); + }); + + it("parses GitHub URL with commit SHA", () => { + const result = parseSourceUrl("https://github.com/owner/repo/commit/abc123def456"); + expect(result.type).toBe("github"); + expect(result.config).toEqual({ owner: "owner", repo: "repo", ref: "abc123def456" }); + expect(result.defaultIndexName).toBe("repo"); + }); + + it("throws on invalid GitHub URL without repo", () => { + expect(() => parseSourceUrl("https://github.com/owner")).toThrow("Invalid GitHub URL"); + }); + }); + + describe("GitLab URLs", () => { + it("parses basic gitlab.com URL", () => { + const result = parseSourceUrl("https://gitlab.com/group/project"); + expect(result.type).toBe("gitlab"); + expect(result.config).toEqual({ projectId: "group/project", ref: "HEAD", baseUrl: undefined }); + expect(result.defaultIndexName).toBe("project"); + }); + + it("parses GitLab URL with subgroups", () => { + const result = parseSourceUrl("https://gitlab.com/group/subgroup/project"); + expect(result.type).toBe("gitlab"); + expect(result.config).toEqual({ + projectId: "group/subgroup/project", + ref: "HEAD", + baseUrl: undefined, + }); + expect(result.defaultIndexName).toBe("project"); + }); + + it("parses GitLab URL with /-/tree/branch", () => { + const result = parseSourceUrl("https://gitlab.com/group/project/-/tree/main"); + expect(result.type).toBe("gitlab"); + expect(result.config).toEqual({ projectId: "group/project", ref: "main", baseUrl: undefined }); + expect(result.defaultIndexName).toBe("project"); + }); + + it("parses GitLab URL with /-/tree/feature/branch", () => { + const result = parseSourceUrl("https://gitlab.com/group/project/-/tree/feature/branch"); + expect(result.type).toBe("gitlab"); + expect(result.config).toEqual({ + projectId: "group/project", + ref: "feature/branch", + baseUrl: undefined, + }); + }); + + it("parses self-hosted GitLab URL", () => { + const result = parseSourceUrl("https://gitlab.mycompany.com/team/project"); + expect(result.type).toBe("gitlab"); + expect(result.config).toEqual({ + projectId: "team/project", + ref: "HEAD", + baseUrl: "https://gitlab.mycompany.com", + }); + expect(result.defaultIndexName).toBe("project"); + }); + + it("throws on invalid GitLab URL", () => { + expect(() => parseSourceUrl("https://gitlab.com/group")).toThrow("Invalid GitLab URL"); + }); + }); + + describe("Bitbucket URLs", () => { + it("parses basic bitbucket.org URL", () => { + const result = parseSourceUrl("https://bitbucket.org/workspace/repo"); + expect(result.type).toBe("bitbucket"); + expect(result.config).toEqual({ + workspace: "workspace", + repo: "repo", + ref: "HEAD", + baseUrl: undefined, + }); + expect(result.defaultIndexName).toBe("repo"); + }); + + it("parses Bitbucket URL with /src/branch", () => { + const result = parseSourceUrl("https://bitbucket.org/workspace/repo/src/main"); + expect(result.type).toBe("bitbucket"); + expect(result.config).toEqual({ + workspace: "workspace", + repo: "repo", + ref: "main", + baseUrl: undefined, + }); + }); + + it("parses Bitbucket URL with /branch/feature", () => { + const result = parseSourceUrl("https://bitbucket.org/workspace/repo/branch/feature"); + expect(result.type).toBe("bitbucket"); + expect(result.config).toEqual({ + workspace: "workspace", + repo: "repo", + ref: "feature", + baseUrl: undefined, + }); + }); + + it("parses self-hosted Bitbucket URL", () => { + const result = parseSourceUrl("https://bitbucket.mycompany.com/workspace/repo"); + expect(result.type).toBe("bitbucket"); + expect(result.config).toEqual({ + workspace: "workspace", + repo: "repo", + ref: "HEAD", + baseUrl: "https://bitbucket.mycompany.com", + }); + }); + + it("throws on invalid Bitbucket URL", () => { + expect(() => parseSourceUrl("https://bitbucket.org/workspace")).toThrow("Invalid Bitbucket URL"); + }); + }); + + describe("Website URLs (fallback)", () => { + it("parses unknown URL as website", () => { + const result = parseSourceUrl("https://docs.example.com/api/v2"); + expect(result.type).toBe("website"); + expect(result.config).toEqual({ url: "https://docs.example.com/api/v2" }); + expect(result.defaultIndexName).toBe("docs.example.com"); + }); + + it("uses hostname as default index name for website", () => { + const result = parseSourceUrl("https://react.dev/learn/thinking-in-react"); + expect(result.type).toBe("website"); + expect(result.defaultIndexName).toBe("react.dev"); + }); + }); + + describe("Invalid URLs", () => { + it("throws on invalid URL format", () => { + expect(() => parseSourceUrl("not-a-url")).toThrow(); + }); + }); +}); + diff --git a/src/core/url-parser.ts b/src/core/url-parser.ts new file mode 100644 index 0000000..f2fe3b4 --- /dev/null +++ b/src/core/url-parser.ts @@ -0,0 +1,171 @@ +/** + * URL Parser - Parses source URLs to determine type and extract configuration + * + * @module core/url-parser + */ + +import type { GitHubSourceConfig } from "../sources/github.js"; +import type { GitLabSourceConfig } from "../sources/gitlab.js"; +import type { BitBucketSourceConfig } from "../sources/bitbucket.js"; +import type { WebsiteSourceConfig } from "../sources/website.js"; + +/** + * Result of parsing a source URL + */ +export interface ParsedUrl { + type: "github" | "gitlab" | "bitbucket" | "website"; + config: GitHubSourceConfig | GitLabSourceConfig | BitBucketSourceConfig | WebsiteSourceConfig; + defaultIndexName: string; +} + +/** + * Parse a source URL to determine the source type and extract configuration. + * + * @param urlString - The URL to parse + * @returns Parsed URL with type, config, and default index name + * @throws Error if the URL is invalid + * + * @example + * ```typescript + * const result = parseSourceUrl("https://github.com/owner/repo/tree/main"); + * // result.type === "github" + * // result.config === { owner: "owner", repo: "repo", ref: "main" } + * // result.defaultIndexName === "repo" + * ``` + */ +export function parseSourceUrl(urlString: string): ParsedUrl { + const url = new URL(urlString); + const hostname = url.hostname.toLowerCase(); + + // GitHub + if (hostname === "github.com") { + return parseGitHubUrl(url); + } + + // GitLab (gitlab.com or hostname contains "gitlab") + if (hostname === "gitlab.com" || hostname.includes("gitlab")) { + return parseGitLabUrl(url); + } + + // Bitbucket (bitbucket.org or hostname contains "bitbucket") + if (hostname === "bitbucket.org" || hostname.includes("bitbucket")) { + return parseBitBucketUrl(url); + } + + // Fallback to website + return { + type: "website", + config: { url: urlString }, + defaultIndexName: hostname, + }; +} + +/** + * Parse a GitHub URL + * Formats: + * - https://github.com/owner/repo + * - https://github.com/owner/repo/tree/branch + * - https://github.com/owner/repo/tree/feature/branch + * - https://github.com/owner/repo/commit/sha + */ +function parseGitHubUrl(url: URL): ParsedUrl { + const pathParts = url.pathname.split("/").filter(Boolean); + + if (pathParts.length < 2) { + throw new Error(`Invalid GitHub URL: ${url.href} - expected owner and repo in path`); + } + + const owner = pathParts[0]; + const repo = pathParts[1]; + let ref = "HEAD"; + + // Check for tree/branch or commit/sha patterns + if (pathParts.length >= 4) { + if (pathParts[2] === "tree" || pathParts[2] === "commit") { + // Join all remaining parts to handle branch names with slashes + ref = pathParts.slice(3).join("/"); + } + } + + return { + type: "github", + config: { owner, repo, ref }, + defaultIndexName: repo, + }; +} + +/** + * Parse a GitLab URL + * Formats: + * - https://gitlab.com/group/project + * - https://gitlab.com/group/subgroup/project + * - https://gitlab.com/group/project/-/tree/branch + */ +function parseGitLabUrl(url: URL): ParsedUrl { + const pathParts = url.pathname.split("/").filter(Boolean); + + if (pathParts.length < 2) { + throw new Error(`Invalid GitLab URL: ${url.href} - expected project path`); + } + + let ref = "HEAD"; + let projectParts = pathParts; + + // Check for /-/tree/branch pattern + const dashIndex = pathParts.indexOf("-"); + if (dashIndex !== -1) { + projectParts = pathParts.slice(0, dashIndex); + // After "-", expect "tree" or "commits" followed by ref + if (pathParts[dashIndex + 1] === "tree" || pathParts[dashIndex + 1] === "commits") { + ref = pathParts.slice(dashIndex + 2).join("/"); + } + } + + const projectId = projectParts.join("/"); + const projectName = projectParts[projectParts.length - 1]; + + // Handle self-hosted GitLab + const baseUrl = url.origin !== "https://gitlab.com" ? url.origin : undefined; + + return { + type: "gitlab", + config: { projectId, ref, baseUrl }, + defaultIndexName: projectName, + }; +} + +/** + * Parse a Bitbucket URL + * Formats: + * - https://bitbucket.org/workspace/repo + * - https://bitbucket.org/workspace/repo/src/branch + * - https://bitbucket.org/workspace/repo/branch/feature + */ +function parseBitBucketUrl(url: URL): ParsedUrl { + const pathParts = url.pathname.split("/").filter(Boolean); + + if (pathParts.length < 2) { + throw new Error(`Invalid Bitbucket URL: ${url.href} - expected workspace and repo in path`); + } + + const workspace = pathParts[0]; + const repo = pathParts[1]; + let ref = "HEAD"; + + // Check for /src/branch or /branch/name patterns + if (pathParts.length >= 4) { + if (pathParts[2] === "src" || pathParts[2] === "branch") { + ref = pathParts.slice(3).join("/"); + } + } + + // Handle self-hosted Bitbucket + const baseUrl = url.origin !== "https://bitbucket.org" ? url.origin : undefined; + + return { + type: "bitbucket", + config: { workspace, repo, ref, baseUrl }, + defaultIndexName: repo, + }; +} + From 0a60c2c272881266c455ea7c7636a6e4ebf2d49e Mon Sep 17 00:00:00 2001 From: Rich Hankins Date: Fri, 30 Jan 2026 00:22:51 -0800 Subject: [PATCH 2/8] Add URL-based indexing command to CLI Adds command that auto-detects source type (GitHub, GitLab, Bitbucket, or website) from the URL and creates the appropriate source. Features: - Parses URL using parseSourceUrl() to determine source type - Supports --ref option to override URL-detected branch/tag - Supports -i/--index option to override default index name - Supports --store and --store-path options - Default index name derived from repo/project name - Graceful error handling for invalid URLs Agent-Id: agent-c9423996-94bb-4ab3-8311-ca0cc822da14 --- src/bin/cmd-index.ts | 66 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/src/bin/cmd-index.ts b/src/bin/cmd-index.ts index d56d060..cc26183 100644 --- a/src/bin/cmd-index.ts +++ b/src/bin/cmd-index.ts @@ -8,6 +8,7 @@ import { Source } from "../sources/types.js"; import { FilesystemStore } from "../stores/filesystem.js"; import { getS3Config } from "../stores/s3-config.js"; import { buildClientUserAgent } from "../core/utils.js"; +import { parseSourceUrl } from "../core/url-parser.js"; // Shared store options interface StoreOptions { @@ -206,9 +207,74 @@ websiteCommand.action(async (options) => { } }); +// URL-based indexing command (auto-detects source type) +const urlCommand = new Command("url") + .description("Index from a URL (auto-detects source type)") + .argument("", "URL of the repository or website to index") + .option("--ref ", "Branch, tag, or commit (overrides URL-detected ref)"); +addStoreOptions(urlCommand); +urlCommand.action(async (url: string, options) => { + try { + // Parse the URL to determine source type and config + const parsed = parseSourceUrl(url); + const indexKey = options.index || parsed.defaultIndexName; + + let source: Source; + + switch (parsed.type) { + case "github": { + const { GitHubSource } = await import("../sources/github.js"); + const config = parsed.config as import("../sources/github.js").GitHubSourceConfig; + source = new GitHubSource({ + ...config, + ref: options.ref || config.ref, + }); + break; + } + case "gitlab": { + const { GitLabSource } = await import("../sources/gitlab.js"); + const config = parsed.config as import("../sources/gitlab.js").GitLabSourceConfig; + source = new GitLabSource({ + ...config, + ref: options.ref || config.ref, + }); + break; + } + case "bitbucket": { + const { BitBucketSource } = await import("../sources/bitbucket.js"); + const config = parsed.config as import("../sources/bitbucket.js").BitBucketSourceConfig; + source = new BitBucketSource({ + ...config, + ref: options.ref || config.ref, + }); + break; + } + case "website": { + const { WebsiteSource } = await import("../sources/website.js"); + const config = parsed.config as import("../sources/website.js").WebsiteSourceConfig; + source = new WebsiteSource(config); + break; + } + default: + throw new Error(`Unknown source type: ${parsed.type}`); + } + + const store = await createStore(options); + await runIndex(source, store, indexKey, parsed.type); + } catch (error) { + if (error instanceof Error && error.message.includes("Invalid")) { + console.error(`Error parsing URL: ${error.message}`); + } else { + console.error("Indexing failed:", error); + } + process.exit(1); + } +}); + // Main index command export const indexCommand = new Command("index") .description("Index a data source") + .addCommand(urlCommand) .addCommand(githubCommand) .addCommand(gitlabCommand) .addCommand(bitbucketCommand) From 1f1933d6ab17179bdd223ecac8d612c3d830ad4a Mon Sep 17 00:00:00 2001 From: Rich Hankins Date: Fri, 30 Jan 2026 00:46:29 -0800 Subject: [PATCH 3/8] Allow direct URL syntax: ctxc index without 'url' subcommand Adds pre-parse argument rewriting that auto-detects when a URL is passed directly to 'ctxc index' and transparently inserts the 'url' subcommand. Before: ctxc index url https://github.com/owner/repo After: ctxc index https://github.com/owner/repo Both syntaxes now work. Existing subcommands (github, gitlab, etc.) are unchanged and continue to work. Agent-Id: agent-ce81a04d-72f2-4289-8eb7-c3074d7d8030 --- src/bin/index.ts | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/bin/index.ts b/src/bin/index.ts index b0c06f3..8636046 100644 --- a/src/bin/index.ts +++ b/src/bin/index.ts @@ -29,5 +29,18 @@ program.addCommand(searchCommand); program.addCommand(mcpCommand); program.addCommand(agentCommand); -program.parse(); +// Auto-detect URL mode: ctxc index -> ctxc index url +// This allows users to skip the 'url' subcommand when providing a URL directly +const indexIdx = process.argv.indexOf("index"); +if (indexIdx !== -1 && indexIdx + 1 < process.argv.length) { + const nextArg = process.argv[indexIdx + 1]; + const subcommands = ["url", "github", "gitlab", "bitbucket", "website"]; + if ( + nextArg.match(/^https?:\/\//) && + !subcommands.includes(nextArg) + ) { + process.argv.splice(indexIdx + 1, 0, "url"); + } +} +program.parse(); From ad138f4a9bac876060b7d2c10da5fcd467a62f78 Mon Sep 17 00:00:00 2001 From: Rich Hankins Date: Thu, 5 Feb 2026 12:32:56 -0800 Subject: [PATCH 4/8] Fix URL parser edge cases from PR review - Strip .git suffix from repo/project names (clone URLs now work) - Conservative self-hosted detection (hostname.startsWith instead of includes) - CLI: Reorder args so options before URL are handled correctly Fixes: 1. URLs like https://github.com/owner/repo.git now parse correctly 2. notgitlab.com no longer incorrectly matches as GitLab 3. now works (options can be anywhere) Added 8 new tests for edge cases. Agent-Id: agent-ce81a04d-72f2-4289-8eb7-c3074d7d8030 --- src/bin/index.ts | 23 +++++++++------ src/core/url-parser.test.ts | 57 +++++++++++++++++++++++++++++++++++++ src/core/url-parser.ts | 21 +++++++++++--- 3 files changed, 89 insertions(+), 12 deletions(-) diff --git a/src/bin/index.ts b/src/bin/index.ts index 8636046..bf0fda5 100644 --- a/src/bin/index.ts +++ b/src/bin/index.ts @@ -30,16 +30,23 @@ program.addCommand(mcpCommand); program.addCommand(agentCommand); // Auto-detect URL mode: ctxc index -> ctxc index url -// This allows users to skip the 'url' subcommand when providing a URL directly +// Scan for URL anywhere after 'index' to support: ctxc index -i name https://... const indexIdx = process.argv.indexOf("index"); -if (indexIdx !== -1 && indexIdx + 1 < process.argv.length) { - const nextArg = process.argv[indexIdx + 1]; +if (indexIdx !== -1) { const subcommands = ["url", "github", "gitlab", "bitbucket", "website"]; - if ( - nextArg.match(/^https?:\/\//) && - !subcommands.includes(nextArg) - ) { - process.argv.splice(indexIdx + 1, 0, "url"); + // Find first URL-like argument after 'index' + for (let i = indexIdx + 1; i < process.argv.length; i++) { + const arg = process.argv[i]; + // Stop if we hit a known subcommand + if (subcommands.includes(arg)) break; + // Found a URL - reorder args to put 'url' and the URL right after 'index' + if (arg.match(/^https?:\/\//)) { + // Remove the URL from its current position + process.argv.splice(i, 1); + // Insert 'url' right after 'index' + process.argv.splice(indexIdx + 1, 0, "url", arg); + break; + } } } diff --git a/src/core/url-parser.test.ts b/src/core/url-parser.test.ts index 67e753a..821dce7 100644 --- a/src/core/url-parser.test.ts +++ b/src/core/url-parser.test.ts @@ -161,3 +161,60 @@ describe("parseSourceUrl", () => { }); }); + +describe("Edge cases", () => { + describe(".git suffix handling", () => { + it("strips .git suffix from GitHub URLs", () => { + const result = parseSourceUrl("https://github.com/owner/repo.git"); + expect(result.type).toBe("github"); + expect(result.config).toEqual({ owner: "owner", repo: "repo", ref: "HEAD" }); + expect(result.defaultIndexName).toBe("repo"); + }); + + it("strips .git suffix from GitLab URLs", () => { + const result = parseSourceUrl("https://gitlab.com/group/project.git"); + expect(result.type).toBe("gitlab"); + expect(result.config).toEqual({ projectId: "group/project", ref: "HEAD", baseUrl: undefined }); + expect(result.defaultIndexName).toBe("project"); + }); + + it("strips .git suffix from Bitbucket URLs", () => { + const result = parseSourceUrl("https://bitbucket.org/workspace/repo.git"); + expect(result.type).toBe("bitbucket"); + expect(result.config).toEqual({ + workspace: "workspace", + repo: "repo", + ref: "HEAD", + baseUrl: undefined, + }); + expect(result.defaultIndexName).toBe("repo"); + }); + }); + + describe("Conservative self-hosted detection", () => { + it("detects gitlab.company.com as GitLab", () => { + const result = parseSourceUrl("https://gitlab.company.com/team/project"); + expect(result.type).toBe("gitlab"); + }); + + it("does NOT match notgitlab.com as GitLab", () => { + const result = parseSourceUrl("https://notgitlab.com/some/path"); + expect(result.type).toBe("website"); + }); + + it("does NOT match mygitlabserver.com as GitLab", () => { + const result = parseSourceUrl("https://mygitlabserver.com/some/path"); + expect(result.type).toBe("website"); + }); + + it("detects bitbucket.company.com as Bitbucket", () => { + const result = parseSourceUrl("https://bitbucket.company.com/workspace/repo"); + expect(result.type).toBe("bitbucket"); + }); + + it("does NOT match notbitbucket.org as Bitbucket", () => { + const result = parseSourceUrl("https://notbitbucket.org/some/path"); + expect(result.type).toBe("website"); + }); + }); +}); diff --git a/src/core/url-parser.ts b/src/core/url-parser.ts index f2fe3b4..8c59988 100644 --- a/src/core/url-parser.ts +++ b/src/core/url-parser.ts @@ -9,6 +9,14 @@ import type { GitLabSourceConfig } from "../sources/gitlab.js"; import type { BitBucketSourceConfig } from "../sources/bitbucket.js"; import type { WebsiteSourceConfig } from "../sources/website.js"; +/** + * Strip .git suffix from repo/project names + */ +function stripGitSuffix(name: string): string { + return name.endsWith(".git") ? name.slice(0, -4) : name; +} + + /** * Result of parsing a source URL */ @@ -43,12 +51,12 @@ export function parseSourceUrl(urlString: string): ParsedUrl { } // GitLab (gitlab.com or hostname contains "gitlab") - if (hostname === "gitlab.com" || hostname.includes("gitlab")) { + if (hostname === "gitlab.com" || hostname.startsWith("gitlab.")) { return parseGitLabUrl(url); } // Bitbucket (bitbucket.org or hostname contains "bitbucket") - if (hostname === "bitbucket.org" || hostname.includes("bitbucket")) { + if (hostname === "bitbucket.org" || hostname.startsWith("bitbucket.")) { return parseBitBucketUrl(url); } @@ -76,7 +84,7 @@ function parseGitHubUrl(url: URL): ParsedUrl { } const owner = pathParts[0]; - const repo = pathParts[1]; + const repo = stripGitSuffix(pathParts[1]); let ref = "HEAD"; // Check for tree/branch or commit/sha patterns @@ -121,6 +129,11 @@ function parseGitLabUrl(url: URL): ParsedUrl { } } + // Strip .git suffix from project name if present + const lastPart = projectParts[projectParts.length - 1]; + if (lastPart.endsWith(".git")) { + projectParts[projectParts.length - 1] = stripGitSuffix(lastPart); + } const projectId = projectParts.join("/"); const projectName = projectParts[projectParts.length - 1]; @@ -149,7 +162,7 @@ function parseBitBucketUrl(url: URL): ParsedUrl { } const workspace = pathParts[0]; - const repo = pathParts[1]; + const repo = stripGitSuffix(pathParts[1]); let ref = "HEAD"; // Check for /src/branch or /branch/name patterns From df1b35e2274a200899f043427b3141cccd031c71 Mon Sep 17 00:00:00 2001 From: Rich Hankins Date: Thu, 5 Feb 2026 12:52:13 -0800 Subject: [PATCH 5/8] Simplify URL detection: only check first arg after 'index' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove argument reordering logic. URL must now appear immediately after 'index', consistent with how other subcommands work: ctxc index https://github.com/owner/repo -i name ✓ ctxc index -i name https://github.com/owner/repo ✗ (error) This is more predictable and matches CLI conventions. --- src/bin/index.ts | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/src/bin/index.ts b/src/bin/index.ts index bf0fda5..6834e6f 100644 --- a/src/bin/index.ts +++ b/src/bin/index.ts @@ -30,23 +30,13 @@ program.addCommand(mcpCommand); program.addCommand(agentCommand); // Auto-detect URL mode: ctxc index -> ctxc index url -// Scan for URL anywhere after 'index' to support: ctxc index -i name https://... +// URL must be the first argument after 'index' (like any subcommand) const indexIdx = process.argv.indexOf("index"); -if (indexIdx !== -1) { - const subcommands = ["url", "github", "gitlab", "bitbucket", "website"]; - // Find first URL-like argument after 'index' - for (let i = indexIdx + 1; i < process.argv.length; i++) { - const arg = process.argv[i]; - // Stop if we hit a known subcommand - if (subcommands.includes(arg)) break; - // Found a URL - reorder args to put 'url' and the URL right after 'index' - if (arg.match(/^https?:\/\//)) { - // Remove the URL from its current position - process.argv.splice(i, 1); - // Insert 'url' right after 'index' - process.argv.splice(indexIdx + 1, 0, "url", arg); - break; - } +if (indexIdx !== -1 && indexIdx + 1 < process.argv.length) { + const nextArg = process.argv[indexIdx + 1]; + if (nextArg.match(/^https?:\/\//)) { + // Insert 'url' before the URL + process.argv.splice(indexIdx + 1, 0, "url"); } } From 89e626899f16a4ae3caadfa44216260319a044cf Mon Sep 17 00:00:00 2001 From: Rich Hankins Date: Thu, 5 Feb 2026 13:04:39 -0800 Subject: [PATCH 6/8] Improve index command help: hide internal url subcommand, show usage examples --- src/bin/cmd-index.ts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/bin/cmd-index.ts b/src/bin/cmd-index.ts index cc26183..313c74c 100644 --- a/src/bin/cmd-index.ts +++ b/src/bin/cmd-index.ts @@ -209,7 +209,7 @@ websiteCommand.action(async (options) => { // URL-based indexing command (auto-detects source type) const urlCommand = new Command("url") - .description("Index from a URL (auto-detects source type)") + .description("Index from URL with auto-detection (used internally when URL is passed directly)") .argument("", "URL of the repository or website to index") .option("--ref ", "Branch, tag, or commit (overrides URL-detected ref)"); addStoreOptions(urlCommand); @@ -273,8 +273,9 @@ urlCommand.action(async (url: string, options) => { // Main index command export const indexCommand = new Command("index") - .description("Index a data source") - .addCommand(urlCommand) + .usage(" [options]\n ctxc index [options]") + .description("Index a data source\n\nExamples:\n ctxc index https://github.com/owner/repo\n ctxc index https://github.com/owner/repo -i myindex\n ctxc index github --owner x --repo y") + .addCommand(urlCommand, { hidden: true }) .addCommand(githubCommand) .addCommand(gitlabCommand) .addCommand(bitbucketCommand) From 711390a660c863ce1bff7088bf31d1e8ceca49c6 Mon Sep 17 00:00:00 2001 From: Rich Hankins Date: Thu, 5 Feb 2026 14:26:52 -0800 Subject: [PATCH 7/8] Fix index command help menu formatting Agent-Id: agent-f65941cf-2aac-4651-a905-32f3d8b9313d Linked-Note-Id: 26a5d7df-b154-45b3-9351-1698a06d4fd0 --- src/bin/cmd-index.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/bin/cmd-index.ts b/src/bin/cmd-index.ts index 313c74c..b3791a7 100644 --- a/src/bin/cmd-index.ts +++ b/src/bin/cmd-index.ts @@ -274,7 +274,12 @@ urlCommand.action(async (url: string, options) => { // Main index command export const indexCommand = new Command("index") .usage(" [options]\n ctxc index [options]") - .description("Index a data source\n\nExamples:\n ctxc index https://github.com/owner/repo\n ctxc index https://github.com/owner/repo -i myindex\n ctxc index github --owner x --repo y") + .description("Index a data source") + .addHelpText('after', ` +Examples: + ctxc index https://github.com/owner/repo + ctxc index https://github.com/owner/repo -i myindex + ctxc index github --owner x --repo y`) .addCommand(urlCommand, { hidden: true }) .addCommand(githubCommand) .addCommand(gitlabCommand) From 8ed221ebbf3e77e8d19562bdf8879f26023c5a43 Mon Sep 17 00:00:00 2001 From: Rich Hankins Date: Thu, 5 Feb 2026 14:37:26 -0800 Subject: [PATCH 8/8] fix: improve index command help menu formatting - Move examples from description to addHelpText() so they only appear in Usage: ctxc index [options] [command] Index a data source Options: -h, --help display help for command Commands: github [options] Index a GitHub repository gitlab [options] Index a GitLab project bitbucket [options] Index a Bitbucket repository website [options] Crawl and index a website help [command] display help for command, not in the main menu - Simplify usage line to instead of showing two separate usage patterns Agent-Id: agent-8cab8bce-f29f-48f1-8fc1-86167ff2398b --- src/bin/cmd-index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bin/cmd-index.ts b/src/bin/cmd-index.ts index b3791a7..0aa4ff1 100644 --- a/src/bin/cmd-index.ts +++ b/src/bin/cmd-index.ts @@ -273,7 +273,7 @@ urlCommand.action(async (url: string, options) => { // Main index command export const indexCommand = new Command("index") - .usage(" [options]\n ctxc index [options]") + .usage(" [options]") .description("Index a data source") .addHelpText('after', ` Examples: