smart-mcp-proxy · Dumbris · Mar 11, 2026 · Mar 10, 2026 · Mar 10, 2026 · Mar 10, 2026
diff --git a/cmd/mcpproxy/main.go b/cmd/mcpproxy/main.go
@@ -41,8 +41,8 @@ import (
 
 	clioutput "github.com/smart-mcp-proxy/mcpproxy-go/internal/cli/output"
 	"github.com/smart-mcp-proxy/mcpproxy-go/internal/config"
-	"github.com/smart-mcp-proxy/mcpproxy-go/internal/httpapi"
 	"github.com/smart-mcp-proxy/mcpproxy-go/internal/experiments"
+	"github.com/smart-mcp-proxy/mcpproxy-go/internal/httpapi"
 	"github.com/smart-mcp-proxy/mcpproxy-go/internal/logs"
 	"github.com/smart-mcp-proxy/mcpproxy-go/internal/registries"
 	"github.com/smart-mcp-proxy/mcpproxy-go/internal/server"
@@ -476,8 +476,9 @@ func runServer(cmd *cobra.Command, _ []string) error {
 		zap.String("log_level", cmdLogLevel),
 		zap.Bool("log_to_file", cmdLogToFile))
 
-	// Pass edition to httpapi for status endpoint
+	// Pass edition and version to internal packages
 	httpapi.SetEdition(Edition)
+	server.SetMCPServerVersion(version)
 
 	// Override other settings from command line
 	cfg.DebugSearch = cmdDebugSearch

diff --git a/cmd/mcpproxy/status_cmd.go b/cmd/mcpproxy/status_cmd.go
@@ -21,19 +21,20 @@ import (
 
 // StatusInfo holds the collected status data for display.
 type StatusInfo struct {
-	State         string           `json:"state"`
-	Edition       string           `json:"edition"`
-	ListenAddr    string           `json:"listen_addr"`
-	Uptime        string           `json:"uptime,omitempty"`
-	UptimeSeconds float64          `json:"uptime_seconds,omitempty"`
-	APIKey        string           `json:"api_key"`
-	WebUIURL      string           `json:"web_ui_url"`
-	RoutingMode   string           `json:"routing_mode"`
-	Servers       *ServerCounts    `json:"servers,omitempty"`
-	SocketPath    string           `json:"socket_path,omitempty"`
-	ConfigPath    string           `json:"config_path,omitempty"`
-	Version       string           `json:"version,omitempty"`
-	TeamsInfo     *TeamsStatusInfo `json:"teams,omitempty"`
+	State         string            `json:"state"`
+	Edition       string            `json:"edition"`
+	ListenAddr    string            `json:"listen_addr"`
+	Uptime        string            `json:"uptime,omitempty"`
+	UptimeSeconds float64           `json:"uptime_seconds,omitempty"`
+	APIKey        string            `json:"api_key"`
+	WebUIURL      string            `json:"web_ui_url"`
+	RoutingMode   string            `json:"routing_mode"`
+	Endpoints     map[string]string `json:"endpoints"`
+	Servers       *ServerCounts     `json:"servers,omitempty"`
+	SocketPath    string            `json:"socket_path,omitempty"`
+	ConfigPath    string            `json:"config_path,omitempty"`
+	Version       string            `json:"version,omitempty"`
+	TeamsInfo     *TeamsStatusInfo  `json:"teams,omitempty"`
 }
 
 // TeamsStatusInfo holds teams-specific status information.
@@ -218,6 +219,9 @@ func collectStatusFromDaemon(cfg *config.Config, socketPath, configPath string)
 		info.WebUIURL = statusBuildWebUIURL(info.ListenAddr, cfg.APIKey)
 	}
 
+	// Build MCP endpoint URLs
+	info.Endpoints = statusBuildEndpoints(info.ListenAddr)
+
 	return info, nil
 }
 
@@ -239,6 +243,7 @@ func collectStatusFromConfig(cfg *config.Config, socketPath, configPath string)
 		APIKey:      cfg.APIKey,
 		WebUIURL:    statusBuildWebUIURL(listenAddr, cfg.APIKey),
 		RoutingMode: routingMode,
+		Endpoints:   statusBuildEndpoints(listenAddr),
 		ConfigPath:  configPath,
 	}
 
@@ -273,6 +278,21 @@ func statusMaskAPIKey(apiKey string) string {
 	return apiKey[:4] + "****" + apiKey[len(apiKey)-4:]
 }
 
+// statusBuildEndpoints constructs the MCP endpoint URLs map.
+func statusBuildEndpoints(listenAddr string) map[string]string {
+	addr := listenAddr
+	if strings.HasPrefix(addr, ":") {
+		addr = "127.0.0.1" + addr
+	}
+	base := "http://" + addr
+	return map[string]string{
+		"default":        base + "/mcp",
+		"retrieve_tools": base + "/mcp/call",
+		"direct":         base + "/mcp/all",
+		"code_execution": base + "/mcp/code",
+	}
+}
+
 // statusBuildWebUIURL constructs the Web UI URL with embedded API key.
 func statusBuildWebUIURL(listenAddr, apiKey string) string {
 	addr := listenAddr
@@ -379,6 +399,23 @@ func printStatusTable(info *StatusInfo) {
 		fmt.Printf("  %-12s %s\n", "Config:", info.ConfigPath)
 	}
 
+	if info.Endpoints != nil {
+		fmt.Println()
+		fmt.Println("MCP Endpoints")
+		if v, ok := info.Endpoints["default"]; ok {
+			fmt.Printf("  %-16s %s  (default, %s mode)\n", "/mcp", v, info.RoutingMode)
+		}
+		if v, ok := info.Endpoints["retrieve_tools"]; ok {
+			fmt.Printf("  %-16s %s  (retrieve + call tools)\n", "/mcp/call", v)
+		}
+		if v, ok := info.Endpoints["direct"]; ok {
+			fmt.Printf("  %-16s %s  (all tools, direct access)\n", "/mcp/all", v)
+		}
+		if v, ok := info.Endpoints["code_execution"]; ok {
+			fmt.Printf("  %-16s %s  (code execution)\n", "/mcp/code", v)
+		}
+	}
+
 	if info.TeamsInfo != nil {
 		fmt.Println()
 		fmt.Println("Server Edition")

diff --git a/docs/features/code-execution.md b/docs/features/code-execution.md
@@ -233,7 +233,7 @@ const user: User = { name: input.username };
 - TypeScript support uses type-stripping only (no type checking or semantic validation)
 - Valid JavaScript is also valid TypeScript
 - Transpilation errors return the `TRANSPILE_ERROR` error code with line/column information
-- See [Code Execution Overview](../code_execution/overview.md#typescript-support) for comprehensive TypeScript documentation
+- See `docs/code_execution/overview.md` in the repository for comprehensive TypeScript documentation
 
 ## Best Practices
 

diff --git a/docs/features/routing-modes.md b/docs/features/routing-modes.md
@@ -52,36 +52,51 @@ This means you can point different AI clients at different endpoints. For exampl
 
 ### retrieve_tools (Default)
 
-The default mode uses BM25 full-text search to help AI agents discover relevant tools without exposing the entire tool catalog. This is the most token-efficient mode.
+The default mode uses BM25 full-text search to help AI agents discover relevant tools without exposing the entire tool catalog. This approach — sometimes called "lazy tool loading" or "tool search" — is used by Anthropic's own MCP implementation and is the recommended pattern for large tool sets.
+
+**Endpoint:** `/mcp/call`
 
 **Tools exposed:**
-- `retrieve_tools` -- Search for tools by natural language query
-- `call_tool_read` -- Execute read-only tool calls
-- `call_tool_write` -- Execute write tool calls
-- `call_tool_destructive` -- Execute destructive tool calls
-- `upstream_servers` -- Manage upstream servers (if management enabled)
-- `code_execution` -- JavaScript orchestration (if enabled)
+- `retrieve_tools` — Search for tools by natural language query
+- `call_tool_read` — Execute read-only tool calls
+- `call_tool_write` — Execute write tool calls
+- `call_tool_destructive` — Execute destructive tool calls
+- `read_cache` — Access paginated responses
 
 **How it works:**
 1. AI agent calls `retrieve_tools` with a natural language query
-2. MCPProxy returns matching tools ranked by BM25 relevance
-3. AI agent calls the appropriate `call_tool_*` variant with the tool name
+2. MCPProxy returns matching tools ranked by BM25 relevance, with `call_with` recommendations
+3. AI agent calls the appropriate `call_tool_*` variant with the exact tool name from results
+
+**Pros:**
+- Massive token savings: only matched tools are sent to the model, not the full catalog
+- Scales to hundreds of tools across many servers without context window bloat
+- Intent-based permission control (read/write/destructive variants) enables granular IDE approval flows
+- Activity logging captures operation type and intent metadata for auditing
+
+**Cons:**
+- Two-step workflow (search then call) adds one round-trip compared to direct mode
+- BM25 search quality depends on tool descriptions — poorly described tools may not surface
+- The AI agent must learn the retrieve-then-call pattern (most modern models handle this well)
 
 **When to use:**
 - You have many upstream servers with dozens or hundreds of tools
-- Token usage is a concern (only tool metadata for matched tools is sent)
-- You want intent-based permission control (read/write/destructive variants)
+- Token usage is a concern (common with paid API usage)
+- You want intent-based permission control in IDE auto-approve settings
+- Production deployments where audit trails matter
 
 ### direct
 
-Direct mode exposes every upstream tool directly to the AI agent. Each tool is named `serverName__toolName` (double underscore separator).
+Direct mode exposes every upstream tool directly to the AI agent. Each tool appears in the standard MCP `tools/list` with a `serverName__toolName` name. This is the simplest mode and the closest to how individual MCP servers work natively.
+
+**Endpoint:** `/mcp/all`
 
 **Tools exposed:**
 - Every tool from every connected, enabled, non-quarantined upstream server
 - Named as `serverName__toolName` (e.g., `github__create_issue`, `filesystem__read_file`)
 
 **How it works:**
-1. AI agent sees all available tools in the tools list
+1. AI agent sees all available tools in `tools/list`
 2. AI agent calls tools directly by their `serverName__toolName` name
 3. MCPProxy routes the call to the correct upstream server
 
@@ -94,31 +109,74 @@ Direct mode exposes every upstream tool directly to the AI agent. Each tool is n
 - Agent tokens with server restrictions are enforced (access denied if token lacks server access)
 - Permission levels are derived from tool annotations (read-only, destructive, etc.)
 
+**Pros:**
+- Zero learning curve: tools work exactly like native MCP tools
+- Single round-trip: no search step needed, call any tool directly
+- Maximum compatibility: works with any MCP client without special handling
+- Tool annotations (readOnlyHint, destructiveHint) are preserved from upstream
+
+**Cons:**
+- High token cost: all tool definitions are sent in every request context
+- Does not scale well beyond ~50 tools (context window fills up, model accuracy degrades)
+- No intent-based permission tiers (the model just calls tools)
+- All tools visible upfront increases attack surface for prompt injection
+
 **When to use:**
-- You have a small number of upstream servers (fewer than 50 total tools)
-- You want maximum simplicity and compatibility
-- AI clients that work better with a flat tool list
+- Small setups with fewer than 50 total tools
+- Quick prototyping and testing
+- AI clients that don't support the retrieve-then-call pattern
+- CI/CD agents that know exactly which tools they need
 
 ### code_execution
 
-Code execution mode is designed for multi-step orchestration workflows. It exposes the `code_execution` tool with an enhanced description that includes a catalog of all available upstream tools.
+Code execution mode is designed for multi-step orchestration workflows. Instead of making separate tool calls for each step, the AI agent writes JavaScript or TypeScript code that chains multiple tool calls together in a single request. This is inspired by patterns from OpenAI's code interpreter and similar "tool-as-code" approaches.
+
+**Endpoint:** `/mcp/code`
 
 **Tools exposed:**
-- `code_execution` -- Execute JavaScript/TypeScript that orchestrates upstream tools
-- `retrieve_tools` -- Search for tools (useful for discovery before writing code)
+- `code_execution` — Execute JavaScript/TypeScript that orchestrates upstream tools (includes a catalog of all available tools in the description)
+- `retrieve_tools` — Search for tools (instructs to use `code_execution`, not `call_tool_*`)
 
 **How it works:**
 1. AI agent sees the `code_execution` tool with a listing of all available upstream tools
 2. AI agent writes JavaScript/TypeScript code that calls `call_tool(serverName, toolName, args)`
-3. MCPProxy executes the code in a sandboxed VM, routing tool calls to upstream servers
+3. MCPProxy executes the code in a sandboxed ES2020+ VM, routing tool calls to upstream servers
+4. Results are returned as a single response
+
+**Pros:**
+- Minimal round-trips: complex multi-step workflows execute in one request
+- Full programming power: conditionals, loops, error handling, data transformation
+- TypeScript support with type safety (auto-transpiled via esbuild)
+- Sandboxed execution: no filesystem or network access, timeout enforcement
+- Tool catalog in description means no separate search step needed
+
+**Cons:**
+- Requires the AI model to write correct JavaScript/TypeScript code
+- Debugging is harder: errors come from inside the sandbox, not from MCP tool calls
+- Higher latency per request (VM startup + multiple sequential tool calls)
+- Must be explicitly enabled (`"enable_code_execution": true`)
+- Not all AI models are equally good at writing code for tool orchestration
 
 **When to use:**
-- Workflows that require chaining 2+ tool calls together
-- You want to minimize model round-trips
-- Complex conditional logic or data transformation between tool calls
+- Workflows that chain 2+ tool calls with data dependencies between them
+- Batch operations (e.g., "for each repo, check CI status and create issue if failing")
+- Complex conditional logic that would require many round-trips in other modes
+- Data transformation pipelines (fetch from one tool, transform, send to another)
 
 **Note:** Code execution must be enabled in config (`"enable_code_execution": true`). If disabled, the `code_execution` tool appears but returns an error message directing the user to enable it.
 
+## Choosing the Right Mode
+
+| Factor | retrieve_tools | direct | code_execution |
+|--------|---------------|--------|----------------|
+| **Token cost** | Low (only matched tools) | High (all tools) | Medium (catalog in description) |
+| **Round-trips per task** | 2 (search + call) | 1 (direct call) | 1 (code handles all) |
+| **Max practical tools** | 500+ | ~50 | 500+ |
+| **Setup complexity** | None (default) | None | Requires enablement |
+| **Model requirements** | Any modern LLM | Any LLM | Code-capable LLM |
+| **Audit granularity** | High (intent metadata) | Medium (annotations) | Medium (code logged) |
+| **IDE auto-approve** | Per-variant rules | Per-tool rules | Single rule |
+
 ## Viewing Current Routing Mode
 
 ### CLI