diff --git a/cmd/mcpproxy/main.go b/cmd/mcpproxy/main.go index 2277120b..048420d1 100644 --- a/cmd/mcpproxy/main.go +++ b/cmd/mcpproxy/main.go @@ -41,8 +41,8 @@ import ( clioutput "github.com/smart-mcp-proxy/mcpproxy-go/internal/cli/output" "github.com/smart-mcp-proxy/mcpproxy-go/internal/config" - "github.com/smart-mcp-proxy/mcpproxy-go/internal/httpapi" "github.com/smart-mcp-proxy/mcpproxy-go/internal/experiments" + "github.com/smart-mcp-proxy/mcpproxy-go/internal/httpapi" "github.com/smart-mcp-proxy/mcpproxy-go/internal/logs" "github.com/smart-mcp-proxy/mcpproxy-go/internal/registries" "github.com/smart-mcp-proxy/mcpproxy-go/internal/server" @@ -476,8 +476,9 @@ func runServer(cmd *cobra.Command, _ []string) error { zap.String("log_level", cmdLogLevel), zap.Bool("log_to_file", cmdLogToFile)) - // Pass edition to httpapi for status endpoint + // Pass edition and version to internal packages httpapi.SetEdition(Edition) + server.SetMCPServerVersion(version) // Override other settings from command line cfg.DebugSearch = cmdDebugSearch diff --git a/cmd/mcpproxy/status_cmd.go b/cmd/mcpproxy/status_cmd.go index bc85f169..81df7ed8 100644 --- a/cmd/mcpproxy/status_cmd.go +++ b/cmd/mcpproxy/status_cmd.go @@ -21,19 +21,20 @@ import ( // StatusInfo holds the collected status data for display. type StatusInfo struct { - State string `json:"state"` - Edition string `json:"edition"` - ListenAddr string `json:"listen_addr"` - Uptime string `json:"uptime,omitempty"` - UptimeSeconds float64 `json:"uptime_seconds,omitempty"` - APIKey string `json:"api_key"` - WebUIURL string `json:"web_ui_url"` - RoutingMode string `json:"routing_mode"` - Servers *ServerCounts `json:"servers,omitempty"` - SocketPath string `json:"socket_path,omitempty"` - ConfigPath string `json:"config_path,omitempty"` - Version string `json:"version,omitempty"` - TeamsInfo *TeamsStatusInfo `json:"teams,omitempty"` + State string `json:"state"` + Edition string `json:"edition"` + ListenAddr string `json:"listen_addr"` + Uptime string `json:"uptime,omitempty"` + UptimeSeconds float64 `json:"uptime_seconds,omitempty"` + APIKey string `json:"api_key"` + WebUIURL string `json:"web_ui_url"` + RoutingMode string `json:"routing_mode"` + Endpoints map[string]string `json:"endpoints"` + Servers *ServerCounts `json:"servers,omitempty"` + SocketPath string `json:"socket_path,omitempty"` + ConfigPath string `json:"config_path,omitempty"` + Version string `json:"version,omitempty"` + TeamsInfo *TeamsStatusInfo `json:"teams,omitempty"` } // TeamsStatusInfo holds teams-specific status information. @@ -218,6 +219,9 @@ func collectStatusFromDaemon(cfg *config.Config, socketPath, configPath string) info.WebUIURL = statusBuildWebUIURL(info.ListenAddr, cfg.APIKey) } + // Build MCP endpoint URLs + info.Endpoints = statusBuildEndpoints(info.ListenAddr) + return info, nil } @@ -239,6 +243,7 @@ func collectStatusFromConfig(cfg *config.Config, socketPath, configPath string) APIKey: cfg.APIKey, WebUIURL: statusBuildWebUIURL(listenAddr, cfg.APIKey), RoutingMode: routingMode, + Endpoints: statusBuildEndpoints(listenAddr), ConfigPath: configPath, } @@ -273,6 +278,21 @@ func statusMaskAPIKey(apiKey string) string { return apiKey[:4] + "****" + apiKey[len(apiKey)-4:] } +// statusBuildEndpoints constructs the MCP endpoint URLs map. +func statusBuildEndpoints(listenAddr string) map[string]string { + addr := listenAddr + if strings.HasPrefix(addr, ":") { + addr = "127.0.0.1" + addr + } + base := "http://" + addr + return map[string]string{ + "default": base + "/mcp", + "retrieve_tools": base + "/mcp/call", + "direct": base + "/mcp/all", + "code_execution": base + "/mcp/code", + } +} + // statusBuildWebUIURL constructs the Web UI URL with embedded API key. func statusBuildWebUIURL(listenAddr, apiKey string) string { addr := listenAddr @@ -379,6 +399,23 @@ func printStatusTable(info *StatusInfo) { fmt.Printf(" %-12s %s\n", "Config:", info.ConfigPath) } + if info.Endpoints != nil { + fmt.Println() + fmt.Println("MCP Endpoints") + if v, ok := info.Endpoints["default"]; ok { + fmt.Printf(" %-16s %s (default, %s mode)\n", "/mcp", v, info.RoutingMode) + } + if v, ok := info.Endpoints["retrieve_tools"]; ok { + fmt.Printf(" %-16s %s (retrieve + call tools)\n", "/mcp/call", v) + } + if v, ok := info.Endpoints["direct"]; ok { + fmt.Printf(" %-16s %s (all tools, direct access)\n", "/mcp/all", v) + } + if v, ok := info.Endpoints["code_execution"]; ok { + fmt.Printf(" %-16s %s (code execution)\n", "/mcp/code", v) + } + } + if info.TeamsInfo != nil { fmt.Println() fmt.Println("Server Edition") diff --git a/docs/features/code-execution.md b/docs/features/code-execution.md index effb19b2..5f4cb116 100644 --- a/docs/features/code-execution.md +++ b/docs/features/code-execution.md @@ -233,7 +233,7 @@ const user: User = { name: input.username }; - TypeScript support uses type-stripping only (no type checking or semantic validation) - Valid JavaScript is also valid TypeScript - Transpilation errors return the `TRANSPILE_ERROR` error code with line/column information -- See [Code Execution Overview](../code_execution/overview.md#typescript-support) for comprehensive TypeScript documentation +- See `docs/code_execution/overview.md` in the repository for comprehensive TypeScript documentation ## Best Practices diff --git a/docs/features/routing-modes.md b/docs/features/routing-modes.md index ca1aff1d..e114ce4d 100644 --- a/docs/features/routing-modes.md +++ b/docs/features/routing-modes.md @@ -52,36 +52,51 @@ This means you can point different AI clients at different endpoints. For exampl ### retrieve_tools (Default) -The default mode uses BM25 full-text search to help AI agents discover relevant tools without exposing the entire tool catalog. This is the most token-efficient mode. +The default mode uses BM25 full-text search to help AI agents discover relevant tools without exposing the entire tool catalog. This approach — sometimes called "lazy tool loading" or "tool search" — is used by Anthropic's own MCP implementation and is the recommended pattern for large tool sets. + +**Endpoint:** `/mcp/call` **Tools exposed:** -- `retrieve_tools` -- Search for tools by natural language query -- `call_tool_read` -- Execute read-only tool calls -- `call_tool_write` -- Execute write tool calls -- `call_tool_destructive` -- Execute destructive tool calls -- `upstream_servers` -- Manage upstream servers (if management enabled) -- `code_execution` -- JavaScript orchestration (if enabled) +- `retrieve_tools` — Search for tools by natural language query +- `call_tool_read` — Execute read-only tool calls +- `call_tool_write` — Execute write tool calls +- `call_tool_destructive` — Execute destructive tool calls +- `read_cache` — Access paginated responses **How it works:** 1. AI agent calls `retrieve_tools` with a natural language query -2. MCPProxy returns matching tools ranked by BM25 relevance -3. AI agent calls the appropriate `call_tool_*` variant with the tool name +2. MCPProxy returns matching tools ranked by BM25 relevance, with `call_with` recommendations +3. AI agent calls the appropriate `call_tool_*` variant with the exact tool name from results + +**Pros:** +- Massive token savings: only matched tools are sent to the model, not the full catalog +- Scales to hundreds of tools across many servers without context window bloat +- Intent-based permission control (read/write/destructive variants) enables granular IDE approval flows +- Activity logging captures operation type and intent metadata for auditing + +**Cons:** +- Two-step workflow (search then call) adds one round-trip compared to direct mode +- BM25 search quality depends on tool descriptions — poorly described tools may not surface +- The AI agent must learn the retrieve-then-call pattern (most modern models handle this well) **When to use:** - You have many upstream servers with dozens or hundreds of tools -- Token usage is a concern (only tool metadata for matched tools is sent) -- You want intent-based permission control (read/write/destructive variants) +- Token usage is a concern (common with paid API usage) +- You want intent-based permission control in IDE auto-approve settings +- Production deployments where audit trails matter ### direct -Direct mode exposes every upstream tool directly to the AI agent. Each tool is named `serverName__toolName` (double underscore separator). +Direct mode exposes every upstream tool directly to the AI agent. Each tool appears in the standard MCP `tools/list` with a `serverName__toolName` name. This is the simplest mode and the closest to how individual MCP servers work natively. + +**Endpoint:** `/mcp/all` **Tools exposed:** - Every tool from every connected, enabled, non-quarantined upstream server - Named as `serverName__toolName` (e.g., `github__create_issue`, `filesystem__read_file`) **How it works:** -1. AI agent sees all available tools in the tools list +1. AI agent sees all available tools in `tools/list` 2. AI agent calls tools directly by their `serverName__toolName` name 3. MCPProxy routes the call to the correct upstream server @@ -94,31 +109,74 @@ Direct mode exposes every upstream tool directly to the AI agent. Each tool is n - Agent tokens with server restrictions are enforced (access denied if token lacks server access) - Permission levels are derived from tool annotations (read-only, destructive, etc.) +**Pros:** +- Zero learning curve: tools work exactly like native MCP tools +- Single round-trip: no search step needed, call any tool directly +- Maximum compatibility: works with any MCP client without special handling +- Tool annotations (readOnlyHint, destructiveHint) are preserved from upstream + +**Cons:** +- High token cost: all tool definitions are sent in every request context +- Does not scale well beyond ~50 tools (context window fills up, model accuracy degrades) +- No intent-based permission tiers (the model just calls tools) +- All tools visible upfront increases attack surface for prompt injection + **When to use:** -- You have a small number of upstream servers (fewer than 50 total tools) -- You want maximum simplicity and compatibility -- AI clients that work better with a flat tool list +- Small setups with fewer than 50 total tools +- Quick prototyping and testing +- AI clients that don't support the retrieve-then-call pattern +- CI/CD agents that know exactly which tools they need ### code_execution -Code execution mode is designed for multi-step orchestration workflows. It exposes the `code_execution` tool with an enhanced description that includes a catalog of all available upstream tools. +Code execution mode is designed for multi-step orchestration workflows. Instead of making separate tool calls for each step, the AI agent writes JavaScript or TypeScript code that chains multiple tool calls together in a single request. This is inspired by patterns from OpenAI's code interpreter and similar "tool-as-code" approaches. + +**Endpoint:** `/mcp/code` **Tools exposed:** -- `code_execution` -- Execute JavaScript/TypeScript that orchestrates upstream tools -- `retrieve_tools` -- Search for tools (useful for discovery before writing code) +- `code_execution` — Execute JavaScript/TypeScript that orchestrates upstream tools (includes a catalog of all available tools in the description) +- `retrieve_tools` — Search for tools (instructs to use `code_execution`, not `call_tool_*`) **How it works:** 1. AI agent sees the `code_execution` tool with a listing of all available upstream tools 2. AI agent writes JavaScript/TypeScript code that calls `call_tool(serverName, toolName, args)` -3. MCPProxy executes the code in a sandboxed VM, routing tool calls to upstream servers +3. MCPProxy executes the code in a sandboxed ES2020+ VM, routing tool calls to upstream servers +4. Results are returned as a single response + +**Pros:** +- Minimal round-trips: complex multi-step workflows execute in one request +- Full programming power: conditionals, loops, error handling, data transformation +- TypeScript support with type safety (auto-transpiled via esbuild) +- Sandboxed execution: no filesystem or network access, timeout enforcement +- Tool catalog in description means no separate search step needed + +**Cons:** +- Requires the AI model to write correct JavaScript/TypeScript code +- Debugging is harder: errors come from inside the sandbox, not from MCP tool calls +- Higher latency per request (VM startup + multiple sequential tool calls) +- Must be explicitly enabled (`"enable_code_execution": true`) +- Not all AI models are equally good at writing code for tool orchestration **When to use:** -- Workflows that require chaining 2+ tool calls together -- You want to minimize model round-trips -- Complex conditional logic or data transformation between tool calls +- Workflows that chain 2+ tool calls with data dependencies between them +- Batch operations (e.g., "for each repo, check CI status and create issue if failing") +- Complex conditional logic that would require many round-trips in other modes +- Data transformation pipelines (fetch from one tool, transform, send to another) **Note:** Code execution must be enabled in config (`"enable_code_execution": true`). If disabled, the `code_execution` tool appears but returns an error message directing the user to enable it. +## Choosing the Right Mode + +| Factor | retrieve_tools | direct | code_execution | +|--------|---------------|--------|----------------| +| **Token cost** | Low (only matched tools) | High (all tools) | Medium (catalog in description) | +| **Round-trips per task** | 2 (search + call) | 1 (direct call) | 1 (code handles all) | +| **Max practical tools** | 500+ | ~50 | 500+ | +| **Setup complexity** | None (default) | None | Requires enablement | +| **Model requirements** | Any modern LLM | Any LLM | Code-capable LLM | +| **Audit granularity** | High (intent metadata) | Medium (annotations) | Medium (code logged) | +| **IDE auto-approve** | Per-variant rules | Per-tool rules | Single rule | + ## Viewing Current Routing Mode ### CLI diff --git a/frontend/src/components/TopHeader.vue b/frontend/src/components/TopHeader.vue index 40937b77..640ef0a0 100644 --- a/frontend/src/components/TopHeader.vue +++ b/frontend/src/components/TopHeader.vue @@ -1,6 +1,6 @@ - + +