diff --git a/go.mod b/go.mod index 40b87efb..f0d3f65f 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,7 @@ go 1.25.5 require ( github.com/Masterminds/semver/v3 v3.4.0 + github.com/anthropics/anthropic-sdk-go v1.26.0 github.com/bmatcuk/doublestar/v4 v4.9.2 github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964 github.com/fatih/color v1.18.0 @@ -13,6 +14,7 @@ require ( github.com/jarcoal/httpmock v1.4.1 github.com/magefile/mage v1.15.0 github.com/modelcontextprotocol/go-sdk v1.3.1 + github.com/openai/openai-go v1.12.0 github.com/ossf/osv-schema/bindings/go v0.0.0-20251230224438-88c48750ddae github.com/r3labs/diff/v3 v3.0.2 github.com/smartystreets/goconvey v1.8.1 @@ -23,6 +25,7 @@ require ( github.com/xeipuuv/gojsonschema v1.2.0 golang.org/x/crypto v0.47.0 golang.org/x/mod v0.31.0 + google.golang.org/genai v1.48.0 google.golang.org/protobuf v1.36.11 gopkg.in/yaml.v3 v3.0.1 ) @@ -131,6 +134,7 @@ require ( github.com/googleapis/enterprise-certificate-proxy v0.3.6 // indirect github.com/googleapis/gax-go/v2 v2.14.2 // indirect github.com/gopherjs/gopherjs v1.17.2 // indirect + github.com/gorilla/websocket v1.5.3 // indirect github.com/hhatto/gorst v0.0.0-20181029133204-ca9f730cac5b // indirect github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect github.com/jdkato/prose v1.2.1 // indirect @@ -186,6 +190,7 @@ require ( github.com/tidwall/jsonc v0.3.2 // indirect github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.1 // indirect + github.com/tidwall/sjson v1.2.5 // indirect github.com/tink-crypto/tink-go/v2 v2.4.0 // indirect github.com/tklauser/go-sysconf v0.3.15 // indirect github.com/tklauser/numcpus v0.10.0 // indirect diff --git a/go.sum b/go.sum index 72d0193b..25e82d7c 100644 --- a/go.sum +++ b/go.sum @@ -72,6 +72,8 @@ github.com/anchore/go-struct-converter v0.0.0-20250211213226-cce56d595160 h1:r8/ github.com/anchore/go-struct-converter v0.0.0-20250211213226-cce56d595160/go.mod h1:rYqSE9HbjzpHTI74vwPvae4ZVYZd1lue2ta6xHPdblA= github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8= github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4= +github.com/anthropics/anthropic-sdk-go v1.26.0 h1:oUTzFaUpAevfuELAP1sjL6CQJ9HHAfT7CoSYSac11PY= +github.com/anthropics/anthropic-sdk-go v1.26.0/go.mod h1:qUKmaW+uuPB64iy1l+4kOSvaLqPXnHTTBKH6RVZ7q5Q= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/bmatcuk/doublestar/v4 v4.9.2 h1:b0mc6WyRSYLjzofB2v/0cuDUZ+MqoGyH3r0dVij35GI= @@ -138,6 +140,8 @@ github.com/djherbis/times v1.6.0 h1:w2ctJ92J8fBvWPxugmXIv7Nz7Q3iDMKNx9v5ocVH20c= github.com/djherbis/times v1.6.0/go.mod h1:gOHeRAz2h+VJNZ5Gmc/o7iD9k4wW7NMVqieYCY99oc0= github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZQ= github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= +github.com/dnaeon/go-vcr v1.2.0 h1:zHCHvJYTMh1N7xnV7zf1m1GPBF9Ad0Jk/whtQ1663qI= +github.com/dnaeon/go-vcr v1.2.0/go.mod h1:R4UdLID7HZT3taECzJs4YgbbH6PIGXB6W/sc5OLb6RQ= github.com/docker/cli v28.3.3+incompatible h1:fp9ZHAr1WWPGdIWBM1b3zLtgCF+83gRdVMTJsUeiyAo= github.com/docker/cli v28.3.3+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8= github.com/docker/distribution v2.8.3+incompatible h1:AtKxIZ36LoNK51+Z6RpzLpddBirtxJnzDrHLEKxTAYk= @@ -283,6 +287,8 @@ github.com/googleapis/gax-go/v2 v2.14.2 h1:eBLnkZ9635krYIPD+ag1USrOAI0Nr0QYF3+/3 github.com/googleapis/gax-go/v2 v2.14.2/go.mod h1:ON64QhlJkhVtSqp4v1uaK92VyZ2gmvDQsweuyLV+8+w= github.com/gopherjs/gopherjs v1.17.2 h1:fQnZVsXk8uxXIStYb0N4bGk7jeyTalG/wsZjQ25dO0g= github.com/gopherjs/gopherjs v1.17.2/go.mod h1:pRRIvn/QzFLrKfvEz3qUuEhtE/zLCWfreZ6J5gM2i+k= +github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg= +github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.1 h1:e9Rjr40Z98/clHv5Yg79Is0NtosR5LXRvdr7o/6NwbA= github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.1/go.mod h1:tIxuGz/9mpox++sgp9fJjHO0+q1X9/UOWd798aAm22M= @@ -373,6 +379,8 @@ github.com/neurosnap/sentences v1.0.6 h1:iBVUivNtlwGkYsJblWV8GGVFmXzZzak907Ci8aA github.com/neurosnap/sentences v1.0.6/go.mod h1:pg1IapvYpWCJJm/Etxeh0+gtMf1rI1STY9S7eUCPbDc= github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k= github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY= +github.com/openai/openai-go v1.12.0 h1:NBQCnXzqOTv5wsgNC36PrFEiskGfO5wccfCWDo9S1U0= +github.com/openai/openai-go v1.12.0/go.mod h1:g461MYGXEXBVdV5SaR/5tNzNbSfwTBBefwc+LlDCK0Y= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040= @@ -480,6 +488,7 @@ github.com/terminalstatic/go-xsd-validate v0.1.6 h1:TenYeQ3eY631qNi1/cTmLH/s2slH github.com/terminalstatic/go-xsd-validate v0.1.6/go.mod h1:18lsvYFofBflqCrvo1umpABZ99+GneNTw2kEEc8UPJw= github.com/thoas/go-funk v0.9.3 h1:7+nAEx3kn5ZJcnDm2Bh23N2yOtweO14bi//dvRtgLpw= github.com/thoas/go-funk v0.9.3/go.mod h1:+IWnUfUmFO1+WVYQWQtIJHeRRdaIyyYglZN7xzUPe4Q= +github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY= github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= github.com/tidwall/jsonc v0.3.2 h1:ZTKrmejRlAJYdn0kcaFqRAKlxxFIC21pYq8vLa4p2Wc= @@ -673,6 +682,8 @@ google.golang.org/api v0.237.0 h1:MP7XVsGZesOsx3Q8WVa4sUdbrsTvDSOERd3Vh4xj/wc= google.golang.org/api v0.237.0/go.mod h1:cOVEm2TpdAGHL2z+UwyS+kmlGr3bVWQQ6sYEqkKje50= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/genai v1.48.0 h1:1vb15G291wAjJJueisMDpUhssljhEdJU2t5qTidrVPs= +google.golang.org/genai v1.48.0/go.mod h1:A3kkl0nyBjyFlNjgxIwKq70julKbIxpSxqKO5gw/gmk= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= @@ -712,6 +723,7 @@ gopkg.in/neurosnap/sentences.v1 v1.0.7/go.mod h1:YlK+SN+fLQZj+kY3r8DkGDhDr91+S3J gopkg.in/warnings.v0 v0.1.2 h1:wFXVbFY8DY5/xOe1ECiWdKCzZlxgshcYVNkBHstARME= gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/pkg/llmclient/README.md b/pkg/llmclient/README.md index 41cb7a84..0cd94df1 100644 --- a/pkg/llmclient/README.md +++ b/pkg/llmclient/README.md @@ -2,7 +2,7 @@ LLM client package for code analysis. -- **AgenticClient**: Provider-agnostic agentic client using [langchaingo](https://github.com/tmc/langchaingo). Gives the LLM tools to explore a repository and answer questions about code. +- **AgenticClient**: Provider-agnostic agentic client using native SDK implementations for each provider. Gives the LLM tools to explore a repository and answer questions about code. ## AgenticClient diff --git a/pkg/llmclient/agentic_client.go b/pkg/llmclient/agentic_client.go index cef19851..9a6cd7a4 100644 --- a/pkg/llmclient/agentic_client.go +++ b/pkg/llmclient/agentic_client.go @@ -6,44 +6,39 @@ import ( "fmt" "time" - "github.com/tmc/langchaingo/llms" - "github.com/tmc/langchaingo/llms/anthropic" - "github.com/tmc/langchaingo/llms/googleai" - "github.com/tmc/langchaingo/llms/openai" + "github.com/grafana/plugin-validator/pkg/llmprovider" + "github.com/grafana/plugin-validator/pkg/llmprovider/anthropicprovider" + "github.com/grafana/plugin-validator/pkg/llmprovider/geminiprovider" + "github.com/grafana/plugin-validator/pkg/llmprovider/openaiprovider" ) const ( - maxToolCalls = 100 - maxLLMRetries = 3 - maxConsecutiveNoTools = 5 - retryDelay = 2 * time.Second -) + maxToolCallsFirstQuestion = 60 + maxToolCallsFollowUp = 20 + maxLLMRetries = 3 + maxConsecutiveNoTools = 5 + retryDelay = 2 * time.Second -// AnswerSchema represents the structured response from the agentic client -type AnswerSchema struct { - Answer string `json:"answer"` - ShortAnswer bool `json:"short_answer"` - Files []string `json:"files,omitempty"` - CodeSnippet string `json:"code_snippet,omitempty"` -} + budgetNudgePrompt = `You have only %d tool calls remaining. Wrap up your investigation and call submit_answer now with whatever information you have gathered so far.` -// AgenticCallOptions contains configuration for the agentic LLM call -type AgenticCallOptions struct { - Model string // e.g. "gemini-2.0-flash" - Provider string // "google", "anthropic", "openai" - APIKey string -} + useToolsReminderPrompt = `You are in non-interactive mode. You must start using your tools now to explore the repository. When you have enough information, use submit_answer to provide your answer.` + + submitAnswerAloneError = `Error: submit_answer must be called alone. When you have an answer, call submit_answer as a single tool call without any other tools in the same response.` +) // AgenticClient is an interface for agentic LLM interactions type AgenticClient interface { - CallLLM(ctx context.Context, prompt, repositoryPath string) ([]AnswerSchema, error) + CallLLM(ctx context.Context, questions []string, repositoryPath string) ([]AnswerSchema, error) } // agenticClientImpl implements AgenticClient type agenticClientImpl struct { - apiKey string - model string - provider string + apiKey string + model string + provider string + tools []llmprovider.Tool + systemPrompt string + executor *toolExecutor } // NewAgenticClient creates a new AgenticClient with the given options @@ -60,130 +55,205 @@ func NewAgenticClient(opts *AgenticCallOptions) (AgenticClient, error) { if opts.Provider == "" { return nil, fmt.Errorf("provider is required") } + + tools, err := resolveTools(opts) + if err != nil { + return nil, fmt.Errorf("resolving tools: %w", err) + } + return &agenticClientImpl{ - apiKey: opts.APIKey, - model: opts.Model, - provider: opts.Provider, + apiKey: opts.APIKey, + model: opts.Model, + provider: opts.Provider, + tools: tools, + systemPrompt: buildSystemPrompt(opts.SystemPrompt, tools), }, nil } // CallLLM executes an agentic loop with tools to answer questions about code. -// The prompt may contain multiple questions, in which case the agent will call -// submit_answer multiple times. All answers are collected and returned. -func (c *agenticClientImpl) CallLLM(ctx context.Context, prompt, repositoryPath string) ([]AnswerSchema, error) { +// Each question is processed sequentially, with follow-up questions benefiting +// from the context accumulated by earlier questions. +func (c *agenticClientImpl) CallLLM( + ctx context.Context, + questions []string, + repositoryPath string, +) ([]AnswerSchema, error) { + if len(questions) == 0 { + return nil, fmt.Errorf("at least one question is required") + } + // Initialize LLM based on provider using the client's configured settings opts := &AgenticCallOptions{ APIKey: c.apiKey, Model: c.model, Provider: c.provider, } - llm, err := initLLM(ctx, opts) + provider, err := initProvider(ctx, opts) if err != nil { return nil, fmt.Errorf("failed to initialize LLM: %w", err) } - // Build tools - tools := buildAgenticTools() - - // Create tool executor - executor := newToolExecutor(repositoryPath) - - // System prompt - systemPrompt := `You are a code analysis assistant. You have tools to explore code in a repository. - -AVAILABLE TOOLS: -- list_directory: List files at a path. Use "." for root. -- read_file: Read a file's contents. This is your primary tool for understanding code. -- grep: Search for a pattern across files. -- git: Run read-only git commands (log, show, diff, status, etc.) -- submit_answer: Submit your final answer. + // Initialize executor for this repository + c.executor = newToolExecutor(repositoryPath) -STRATEGY: -1. Use list_directory to see what files exist -2. Use read_file to read the source code files -3. Analyze the code to answer the question - -You can only use one tool at a time. -IMPORTANT: You are in non-interactive mode. Start working and using your tools immediately. -When ready, use submit_answer. For multiple questions, call submit_answer once per question.` - - // Build initial messages - messages := []llms.MessageContent{ - llms.TextParts(llms.ChatMessageTypeSystem, systemPrompt), - llms.TextParts(llms.ChatMessageTypeHuman, prompt), + // Build initial messages with system prompt only (no user message yet) + messages := []llmprovider.Message{ + llmprovider.TextMessage(llmprovider.RoleSystem, c.systemPrompt), } - // Collect answers - var answers []AnswerSchema - - // Agentic loop - toolCallsRemaining := maxToolCalls - // Print debug log file path before starting the loop printDebugLogPath() debugLog("\n\n\n") debugLog("################################################################") debugLog("# NEW CallLLM - provider=%s model=%s", c.provider, c.model) debugLog("# repo=%s", repositoryPath) - debugLog("# prompt=%s", truncateString(prompt, 200)) + debugLog("# questions=%d", len(questions)) debugLog("################################################################") - iteration := 0 + // Collect answers + var answers []AnswerSchema + + // Process each question sequentially + for questionIndex, question := range questions { + debugLog( + "\n========== Processing question %d/%d ==========", + questionIndex+1, + len(questions), + ) + debugLog("Question: %s", truncateString(question, 200)) + + originalQuestion := question + + // Determine budget for this question + toolsBudget := maxToolCallsFirstQuestion + if questionIndex > 0 { + toolsBudget = maxToolCallsFollowUp + } + debugLog("Budget: %d tool calls", toolsBudget) + + // Add the question as a human message + messages = append(messages, llmprovider.TextMessage(llmprovider.RoleHuman, question)) + + // Run the question loop + updatedMessages, answer, err := c.runQuestionLoop( + ctx, + provider, + messages, + toolsBudget, + questionIndex, + ) + messages = updatedMessages + + if err != nil { + // Return partial results on error + debugLog("AgenticClient: question %d failed: %v", questionIndex+1, err) + if len(answers) > 0 { + debugLog("AgenticClient: returning %d partial answers", len(answers)) + return answers, nil + } + return nil, err + } + + if answer != nil { + // Set the question field + answer.Question = originalQuestion + answers = append(answers, *answer) + debugLog("AgenticClient: collected answer %d/%d", len(answers), len(questions)) + } else { + // Budget exhausted without answer - stop processing further questions + debugLog("AgenticClient: question %d exhausted budget without answer, stopping", questionIndex+1) + if len(answers) > 0 { + debugLog("AgenticClient: returning %d partial answers", len(answers)) + return answers, nil + } + return nil, fmt.Errorf("question %d exhausted budget without providing answer", questionIndex+1) + } + } + + debugLog("AgenticClient: successfully answered all %d questions", len(questions)) + return answers, nil +} + +// runQuestionLoop runs the tool-calling loop for a single question. +// Returns updated messages, the answer (or nil if budget exhausted), and error. +func (c *agenticClientImpl) runQuestionLoop( + ctx context.Context, + provider llmprovider.Provider, + messages []llmprovider.Message, + toolsBudget int, + questionIndex int, +) ([]llmprovider.Message, *AnswerSchema, error) { + toolCallsRemaining := toolsBudget consecutiveNoTools := 0 + iteration := 0 + + budgetNudged := false + for toolCallsRemaining > 0 { iteration++ - debugLog("========== AgenticClient: iteration %d ==========", iteration) - debugLog("AgenticClient: %d tool calls remaining, %d answers collected", toolCallsRemaining, len(answers)) + debugLog("========== Question %d iteration %d ==========", questionIndex+1, iteration) + debugLog("AgenticClient: %d tool calls remaining", toolCallsRemaining) + + if !budgetNudged && toolCallsRemaining <= 5 { + budgetNudged = true + debugLog("AgenticClient: nudging model about low budget") + messages = append(messages, llmprovider.TextMessage( + llmprovider.RoleHuman, + fmt.Sprintf(budgetNudgePrompt, toolCallsRemaining), + )) + } // Call LLM with retry logic debugLog("AgenticClient: calling LLM...") - resp, err := callLLMWithRetry(ctx, llm, messages, tools) + resp, err := c.callLLMWithRetry(ctx, provider, messages) if err != nil { debugLog("AgenticClient: LLM call failed: %v", err) - return nil, fmt.Errorf("LLM call failed after %d retries: %w", maxLLMRetries, err) + return messages, nil, fmt.Errorf( + "LLM call failed after %d retries: %w", + maxLLMRetries, + err, + ) } - // resp.Choices contains the LLM's response options. Each choice has Content (text) - // and/or ToolCalls (function calls the model wants to make). Typically there's - // only one choice unless you request multiple completions. if len(resp.Choices) == 0 { debugLog("AgenticClient: no choices in response") - return nil, fmt.Errorf("no response from LLM") + return messages, nil, fmt.Errorf("no response from LLM") } - // Use first choice. Google puts all tool calls in choices[0].ToolCalls. - // Anthropic creates a separate choice per content block (text or tool_use), - // but langchaingo's handleAIMessage only supports Parts[0] as either - // TextContent or ToolCall, so we process one choice at a time. choice := resp.Choices[0] - debugLog("AgenticClient: received response with %d tool calls", len(choice.ToolCalls)) - if choice.Content != "" { - debugLog("AgenticClient: AI message: %s", truncateString(choice.Content, 200)) + debugLog("AgenticClient: choice - Content=%q, ToolCalls=%d, Thinking=%d", + truncateString(choice.Content, 200), len(choice.ToolCalls), len(choice.Thinking)) + for j, t := range choice.Thinking { + debugLog("AgenticClient: thinking[%d]: text=%q sig=%v", + j, truncateString(t.Text, 150), t.Signature != "") } - // If no tool calls, check if we have answers + // If no tool calls, check if we should nudge the agent if len(choice.ToolCalls) == 0 { debugLog("AgenticClient: no tool calls in response") - // If we have collected answers, the agent is done - if len(answers) > 0 { - debugLog("AgenticClient: agent finished with %d answers", len(answers)) - return answers, nil - } - consecutiveNoTools++ - debugLog("AgenticClient: consecutive no-tool responses: %d/%d", consecutiveNoTools, maxConsecutiveNoTools) + debugLog( + "AgenticClient: consecutive no-tool responses: %d/%d", + consecutiveNoTools, + maxConsecutiveNoTools, + ) if consecutiveNoTools >= maxConsecutiveNoTools { - return nil, fmt.Errorf("agent failed to use tools after %d consecutive attempts", maxConsecutiveNoTools) + return messages, nil, fmt.Errorf( + "agent failed to use tools after %d consecutive attempts", + maxConsecutiveNoTools, + ) } - // No answers yet - add the AI response and remind to use tools + // Add the AI response and remind to use tools if choice.Content != "" { - messages = append(messages, llms.TextParts(llms.ChatMessageTypeAI, choice.Content)) + messages = append(messages, llmprovider.TextMessage(llmprovider.RoleAI, choice.Content)) } - debugLog("AgenticClient: no answers yet, reminding agent to use tools") - messages = append(messages, llms.TextParts(llms.ChatMessageTypeHuman, - "You are in non-interactive mode. You must start using your tools now to explore the repository. When you have enough information, use submit_answer to provide your answer.")) + debugLog("AgenticClient: reminding agent to use tools") + messages = append(messages, llmprovider.TextMessage( + llmprovider.RoleHuman, + useToolsReminderPrompt, + )) toolCallsRemaining-- continue } @@ -191,89 +261,137 @@ When ready, use submit_answer. For multiple questions, call submit_answer once p // Reset consecutive no-tool counter when tools are used consecutiveNoTools = 0 - // Build AI message with tool calls - aiMessage := llms.MessageContent{ - Role: llms.ChatMessageTypeAI, + // Build the assistant message with all parts from the response: + // thinking blocks, text content, and tool calls. + var aiParts []llmprovider.Part + for _, t := range choice.Thinking { + aiParts = append(aiParts, t) } if choice.Content != "" { - aiMessage.Parts = append(aiMessage.Parts, llms.TextContent{Text: choice.Content}) + aiParts = append(aiParts, llmprovider.TextPart{Text: choice.Content}) + } + for _, tc := range choice.ToolCalls { + aiParts = append(aiParts, tc) } + messages = append(messages, llmprovider.Message{ + Role: llmprovider.RoleAI, + Parts: aiParts, + }) + + // Validate submit_answer is called alone + hasSubmitAnswer := false for _, toolCall := range choice.ToolCalls { - aiMessage.Parts = append(aiMessage.Parts, toolCall) + if toolCall.Name == "submit_answer" { + hasSubmitAnswer = true + break + } + } + if hasSubmitAnswer && len(choice.ToolCalls) > 1 { + debugLog("AgenticClient: submit_answer called with other tools - rejecting all") + var resultParts []llmprovider.Part + for _, toolCall := range choice.ToolCalls { + toolCallsRemaining-- + resultParts = append(resultParts, llmprovider.ToolResultPart{ + ToolCallID: toolCall.ID, + Name: toolCall.Name, + Content: submitAnswerAloneError, + }) + } + messages = append(messages, llmprovider.Message{ + Role: llmprovider.RoleTool, + Parts: resultParts, + }) + continue } - messages = append(messages, aiMessage) - // Process tool calls + // Execute tool calls and collect results into a single tool message. + var resultParts []llmprovider.Part + var answer *AnswerSchema for i, toolCall := range choice.ToolCalls { toolCallsRemaining-- - response, answer := processToolCall(toolCall, i, len(choice.ToolCalls), len(answers), executor) - messages = append(messages, response) - if answer != nil { - answers = append(answers, *answer) + response, ans := c.processToolCall(toolCall, i, len(choice.ToolCalls)) + resultParts = append(resultParts, response.Parts...) + if ans != nil { + answer = ans } } + messages = append(messages, llmprovider.Message{ + Role: llmprovider.RoleTool, + Parts: resultParts, + }) + if answer != nil { + debugLog("AgenticClient: received answer for question %d", questionIndex+1) + return messages, answer, nil + } } - // If we collected some answers but ran out of tool calls, return what we have - if len(answers) > 0 { - debugLog("AgenticClient: ran out of tool calls, returning %d answers", len(answers)) - return answers, nil - } - - return nil, fmt.Errorf("exceeded maximum tool calls (%d), agent did not complete", maxToolCalls) + // Budget exhausted without answer + debugLog("AgenticClient: question %d exhausted budget", questionIndex+1) + return messages, nil, nil } // processToolCall processes a single tool call and returns the response message and optional answer -func processToolCall(toolCall llms.ToolCall, index, total, currentAnswerCount int, executor *toolExecutor) (llms.MessageContent, *AnswerSchema) { - debugLog("AgenticClient: [%d/%d] executing tool: %s", index+1, total, toolCall.FunctionCall.Name) - debugLog("AgenticClient: tool args: %s", truncateString(toolCall.FunctionCall.Arguments, 500)) +func (c *agenticClientImpl) processToolCall( + toolCall llmprovider.ToolCallPart, + index, total int, +) (llmprovider.Message, *AnswerSchema) { + debugLog( + "AgenticClient: [%d/%d] executing tool: %s", + index+1, + total, + toolCall.Name, + ) + debugLog("AgenticClient: tool args: %s", truncateString(toolCall.Arguments, 500)) // Check for submit_answer - if toolCall.FunctionCall.Name == "submit_answer" { + if toolCall.Name == "submit_answer" { var answer AnswerSchema - if err := json.Unmarshal([]byte(toolCall.FunctionCall.Arguments), &answer); err != nil { + if err := json.Unmarshal([]byte(toolCall.Arguments), &answer); err != nil { debugLog("AgenticClient: failed to parse submit_answer: %v", err) // Report parse error back to agent so it can retry - return llms.MessageContent{ - Role: llms.ChatMessageTypeTool, - Parts: []llms.ContentPart{ - llms.ToolCallResponse{ + return llmprovider.Message{ + Role: llmprovider.RoleTool, + Parts: []llmprovider.Part{ + llmprovider.ToolResultPart{ ToolCallID: toolCall.ID, - Name: toolCall.FunctionCall.Name, - Content: fmt.Sprintf("Error parsing answer: %v. Please try again with valid JSON.", err), + Name: toolCall.Name, + Content: fmt.Sprintf( + "Error parsing answer: %v. Please try again with valid JSON.", + err, + ), }, }, }, nil } - debugLog("AgenticClient: received answer #%d: short_answer=%v, answer=%s", - currentAnswerCount+1, answer.ShortAnswer, truncateString(answer.Answer, 100)) + debugLog("AgenticClient: received answer: short_answer=%v, answer=%s", + answer.ShortAnswer, truncateString(answer.Answer, 100)) // Return success response and the answer - return llms.MessageContent{ - Role: llms.ChatMessageTypeTool, - Parts: []llms.ContentPart{ - llms.ToolCallResponse{ + return llmprovider.Message{ + Role: llmprovider.RoleTool, + Parts: []llmprovider.Part{ + llmprovider.ToolResultPart{ ToolCallID: toolCall.ID, - Name: toolCall.FunctionCall.Name, - Content: "Answer recorded successfully. If you have answered all questions, respond with a plain text message saying 'I am finished'. Otherwise, continue with the next question.", + Name: toolCall.Name, + Content: "Answer recorded successfully.", }, }, }, &answer } // Execute other tools - result, err := executor.execute(toolCall.FunctionCall.Name, toolCall.FunctionCall.Arguments) + result, err := c.executor.execute(toolCall.Name, toolCall.Arguments) if err != nil { result = fmt.Sprintf("Error: %v", err) } debugLog("AgenticClient: tool result: %s", truncateString(result, 300)) - return llms.MessageContent{ - Role: llms.ChatMessageTypeTool, - Parts: []llms.ContentPart{ - llms.ToolCallResponse{ + return llmprovider.Message{ + Role: llmprovider.RoleTool, + Parts: []llmprovider.Part{ + llmprovider.ToolResultPart{ ToolCallID: toolCall.ID, - Name: toolCall.FunctionCall.Name, + Name: toolCall.Name, Content: result, }, }, @@ -281,10 +399,14 @@ func processToolCall(toolCall llms.ToolCall, index, total, currentAnswerCount in } // callLLMWithRetry calls the LLM with retry logic for transient errors -func callLLMWithRetry(ctx context.Context, llm llms.Model, messages []llms.MessageContent, tools []llms.Tool) (*llms.ContentResponse, error) { +func (c *agenticClientImpl) callLLMWithRetry( + ctx context.Context, + provider llmprovider.Provider, + messages []llmprovider.Message, +) (*llmprovider.Response, error) { var lastErr error for attempt := 1; attempt <= maxLLMRetries; attempt++ { - resp, err := llm.GenerateContent(ctx, messages, llms.WithTools(tools)) + resp, err := provider.GenerateContent(ctx, messages, llmprovider.WithTools(c.tools)) if err == nil { return resp, nil } @@ -307,26 +429,19 @@ func truncateString(s string, maxLen int) string { return s[:maxLen] + "..." } -// initLLM initializes the appropriate LLM based on provider -func initLLM(ctx context.Context, opts *AgenticCallOptions) (llms.Model, error) { +// initProvider creates the appropriate native provider for the given config. +func initProvider(ctx context.Context, opts *AgenticCallOptions) (llmprovider.Provider, error) { switch opts.Provider { case "google": - return googleai.New( - ctx, - googleai.WithAPIKey(opts.APIKey), - googleai.WithDefaultModel(opts.Model), - ) + return geminiprovider.New(ctx, opts.APIKey, opts.Model) case "anthropic": - return anthropic.New( - anthropic.WithToken(opts.APIKey), - anthropic.WithModel(opts.Model), - ) + return anthropicprovider.New(opts.APIKey, opts.Model) case "openai": - return openai.New( - openai.WithToken(opts.APIKey), - openai.WithModel(opts.Model), - ) + return openaiprovider.New(opts.APIKey, opts.Model) default: - return nil, fmt.Errorf("unsupported provider: %s (supported: google, anthropic, openai)", opts.Provider) + return nil, fmt.Errorf( + "unsupported provider: %s (supported: google, anthropic, openai)", + opts.Provider, + ) } } diff --git a/pkg/llmclient/agentic_client_integration_test.go b/pkg/llmclient/agentic_client_integration_test.go index 549aa77a..abf72676 100644 --- a/pkg/llmclient/agentic_client_integration_test.go +++ b/pkg/llmclient/agentic_client_integration_test.go @@ -6,169 +6,202 @@ import ( "path/filepath" "testing" - "github.com/grafana/plugin-validator/pkg/logme" - "github.com/grafana/plugin-validator/pkg/prettyprint" "github.com/stretchr/testify/require" ) -func hasGeminiAPIKey() bool { - return os.Getenv("GEMINI_API_KEY") != "" +type providerConfig struct { + name string + provider string + model string + envKey string } -func hasAnthropicAPIKey() bool { - return os.Getenv("ANTHROPIC_API_KEY") != "" +var providers = []providerConfig{ + {name: "Gemini", provider: "google", model: "gemini-2.5-flash", envKey: "GEMINI_API_KEY"}, + { + name: "Anthropic", + provider: "anthropic", + model: "claude-haiku-4-5", + envKey: "ANTHROPIC_API_KEY", + }, + {name: "OpenAI", provider: "openai", model: "gpt-5-mini", envKey: "OPENAI_API_KEY"}, } -// TestAgenticClient_NoFilesystemAccess tests that the agent correctly identifies -// when an application does NOT access the filesystem -func TestAgenticClient_NoFilesystemAccess(t *testing.T) { - if !hasGeminiAPIKey() { - t.Skip("GEMINI_API_KEY not set, skipping agentic client integration test") - } - - opts := &AgenticCallOptions{ - Provider: "google", - Model: "gemini-2.0-flash", - APIKey: os.Getenv("GEMINI_API_KEY"), +func skipIfMissingKey(t *testing.T, p providerConfig) { + t.Helper() + if os.Getenv(p.envKey) == "" || os.Getenv("DEBUG") != "1" { + t.Skipf("%s not set or DEBUG!=1, skipping %s integration test", p.envKey, p.name) } +} - client, err := NewAgenticClient(opts) - require.NoError(t, err) - - testDataPath, err := filepath.Abs(filepath.Join("testdata", "no_fs_access")) +func newClient(t *testing.T, p providerConfig) AgenticClient { + t.Helper() + client, err := NewAgenticClient(&AgenticCallOptions{ + Provider: p.provider, + Model: p.model, + APIKey: os.Getenv(p.envKey), + }) require.NoError(t, err) + return client +} - prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations." - - answers, err := client.CallLLM(context.Background(), prompt, testDataPath) - logme.DebugFln("Agent answers:") - prettyprint.Print(answers) +func TestAgenticClient_EmptyQuestions(t *testing.T) { + for _, p := range providers { + t.Run(p.name, func(t *testing.T) { + skipIfMissingKey(t, p) - require.NoError(t, err, "CallLLM should not return error") - require.Len(t, answers, 1, "Should return exactly one answer") + client := newClient(t, p) - answer := answers[0] - require.NotEmpty(t, answer.Answer, "Answer field should be populated") - require.Equal(t, false, answer.ShortAnswer, "ShortAnswer should be false - this app does not access the filesystem") + testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access")) + require.NoError(t, err) - t.Logf("Agent Answer: %s", answer.Answer) - t.Logf("Short Answer: %v", answer.ShortAnswer) - if len(answer.Files) > 0 { - t.Logf("Files: %v", answer.Files) + answers, err := client.CallLLM(context.Background(), []string{}, testDataPath) + require.Error(t, err, "Empty questions should return error") + require.Contains(t, err.Error(), "at least one question is required") + require.Nil(t, answers) + }) } } -// TestAgenticClient_FilesystemAccess tests that the agent correctly identifies -// when an application DOES access the filesystem -func TestAgenticClient_FilesystemAccess(t *testing.T) { - if !hasGeminiAPIKey() { - t.Skip("GEMINI_API_KEY not set, skipping agentic client integration test") - } - - opts := &AgenticCallOptions{ - Provider: "google", - Model: "gemini-2.0-flash", - APIKey: os.Getenv("GEMINI_API_KEY"), - } - - client, err := NewAgenticClient(opts) - require.NoError(t, err) +func TestAgenticClient_NoFilesystemAccess(t *testing.T) { + for _, p := range providers { + t.Run(p.name, func(t *testing.T) { + skipIfMissingKey(t, p) - testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access")) - require.NoError(t, err) + client := newClient(t, p) - prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations." + testDataPath, err := filepath.Abs(filepath.Join("testdata", "no_fs_access")) + require.NoError(t, err) - answers, err := client.CallLLM(context.Background(), prompt, testDataPath) - logme.DebugFln("Agent answers:") - prettyprint.Print(answers) + prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations." - require.NoError(t, err, "CallLLM should not return error") - require.Len(t, answers, 1, "Should return exactly one answer") + answers, err := client.CallLLM(context.Background(), []string{prompt}, testDataPath) - answer := answers[0] - require.NotEmpty(t, answer.Answer, "Answer field should be populated") - require.Equal(t, true, answer.ShortAnswer, "ShortAnswer should be true - this app accesses the filesystem via os.ReadFile") + require.NoError(t, err, "CallLLM should not return error") + require.Len(t, answers, 1, "Should return exactly one answer") - t.Logf("Agent Answer: %s", answer.Answer) - t.Logf("Short Answer: %v", answer.ShortAnswer) - if len(answer.Files) > 0 { - t.Logf("Files: %v", answer.Files) + answer := answers[0] + require.Equal(t, prompt, answer.Question, "Question field should match input question") + require.NotEmpty(t, answer.Answer, "Answer field should be populated") + require.Equal(t, false, answer.ShortAnswer, + "ShortAnswer should be false - this app does not access the filesystem") + }) } } -// TestAgenticClient_NoFilesystemAccess_Anthropic tests the same scenario using Anthropic Claude -func TestAgenticClient_NoFilesystemAccess_Anthropic(t *testing.T) { - if !hasAnthropicAPIKey() { - t.Skip("ANTHROPIC_API_KEY not set, skipping Anthropic agentic client integration test") - } - - opts := &AgenticCallOptions{ - Provider: "anthropic", - Model: "claude-sonnet-4-5", - APIKey: os.Getenv("ANTHROPIC_API_KEY"), - } +func TestAgenticClient_FilesystemAccess(t *testing.T) { + for _, p := range providers { + t.Run(p.name, func(t *testing.T) { + skipIfMissingKey(t, p) - client, err := NewAgenticClient(opts) - require.NoError(t, err) + client := newClient(t, p) - testDataPath, err := filepath.Abs(filepath.Join("testdata", "no_fs_access")) - require.NoError(t, err) + testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access")) + require.NoError(t, err) - prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations." + prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations." - answers, err := client.CallLLM(context.Background(), prompt, testDataPath) - logme.DebugFln("Agent answers:") - prettyprint.Print(answers) + answers, err := client.CallLLM(context.Background(), []string{prompt}, testDataPath) - require.NoError(t, err, "CallLLM should not return error") - require.Len(t, answers, 1, "Should return exactly one answer") + require.NoError(t, err, "CallLLM should not return error") + require.Len(t, answers, 1, "Should return exactly one answer") - answer := answers[0] - require.NotEmpty(t, answer.Answer, "Answer field should be populated") - require.Equal(t, false, answer.ShortAnswer, "ShortAnswer should be false - this app does not access the filesystem") - - t.Logf("Agent Answer: %s", answer.Answer) - t.Logf("Short Answer: %v", answer.ShortAnswer) - if len(answer.Files) > 0 { - t.Logf("Files: %v", answer.Files) + answer := answers[0] + require.Equal(t, prompt, answer.Question, "Question field should match input question") + require.NotEmpty(t, answer.Answer, "Answer field should be populated") + require.Equal(t, true, answer.ShortAnswer, + "ShortAnswer should be true - this app accesses the filesystem via os.ReadFile") + }) } } -// TestAgenticClient_FilesystemAccess_Anthropic tests the same scenario using Anthropic Claude -func TestAgenticClient_FilesystemAccess_Anthropic(t *testing.T) { - if !hasAnthropicAPIKey() { - t.Skip("ANTHROPIC_API_KEY not set, skipping Anthropic agentic client integration test") - } - - opts := &AgenticCallOptions{ - Provider: "anthropic", - Model: "claude-sonnet-4-5", - APIKey: os.Getenv("ANTHROPIC_API_KEY"), +func TestAgenticClient_TwoQuestions(t *testing.T) { + for _, p := range providers { + t.Run(p.name, func(t *testing.T) { + skipIfMissingKey(t, p) + + client := newClient(t, p) + + testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access")) + require.NoError(t, err) + + questions := []string{ + "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations.", + "Which specific files contain the filesystem operations and what operations do they perform?", + } + + answers, err := client.CallLLM(context.Background(), questions, testDataPath) + + require.NoError(t, err, "CallLLM should not return error") + require.Len(t, answers, 2, "Should return exactly two answers") + + require.Equal( + t, + questions[0], + answers[0].Question, + "First answer's question should match", + ) + require.NotEmpty(t, answers[0].Answer, "First answer should be populated") + require.Equal(t, true, answers[0].ShortAnswer, + "First answer should be true - app accesses filesystem") + + require.Equal( + t, + questions[1], + answers[1].Question, + "Second answer's question should match", + ) + require.NotEmpty(t, answers[1].Answer, "Second answer should be populated") + }) } +} - client, err := NewAgenticClient(opts) - require.NoError(t, err) - - testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access")) - require.NoError(t, err) - - prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations." - - answers, err := client.CallLLM(context.Background(), prompt, testDataPath) - logme.DebugFln("Agent answers:") - prettyprint.Print(answers) - - require.NoError(t, err, "CallLLM should not return error") - require.Len(t, answers, 1, "Should return exactly one answer") - - answer := answers[0] - require.NotEmpty(t, answer.Answer, "Answer field should be populated") - require.Equal(t, true, answer.ShortAnswer, "ShortAnswer should be true - this app accesses the filesystem via os.ReadFile") - - t.Logf("Agent Answer: %s", answer.Answer) - t.Logf("Short Answer: %v", answer.ShortAnswer) - if len(answer.Files) > 0 { - t.Logf("Files: %v", answer.Files) +func TestAgenticClient_ThreeQuestions(t *testing.T) { + for _, p := range providers { + t.Run(p.name, func(t *testing.T) { + skipIfMissingKey(t, p) + + client := newClient(t, p) + + testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access")) + require.NoError(t, err) + + questions := []string{ + "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations.", + "Which specific files contain the filesystem operations and what operations do they perform?", + "Does this application use any caching mechanisms? If so, describe how the cache works.", + } + + answers, err := client.CallLLM(context.Background(), questions, testDataPath) + + require.NoError(t, err, "CallLLM should not return error") + require.Len(t, answers, 3, "Should return exactly three answers") + + require.Equal( + t, + questions[0], + answers[0].Question, + "First answer's question should match", + ) + require.NotEmpty(t, answers[0].Answer, "First answer should be populated") + require.Equal(t, true, answers[0].ShortAnswer, + "First answer should be true - app accesses filesystem") + + require.Equal( + t, + questions[1], + answers[1].Question, + "Second answer's question should match", + ) + require.NotEmpty(t, answers[1].Answer, "Second answer should be populated") + + require.Equal( + t, + questions[2], + answers[2].Question, + "Third answer's question should match", + ) + require.NotEmpty(t, answers[2].Answer, "Third answer should be populated") + }) } } diff --git a/pkg/llmclient/agentic_debug.go b/pkg/llmclient/agentic_debug.go index bf6f5050..0b31075a 100644 --- a/pkg/llmclient/agentic_debug.go +++ b/pkg/llmclient/agentic_debug.go @@ -2,52 +2,19 @@ package llmclient import ( "fmt" - "io" - "log" "os" - "path/filepath" - "sync" - "time" -) -var ( - debugLogger *log.Logger - debugOnce sync.Once - debugPath string + "github.com/grafana/plugin-validator/pkg/logme" ) -func initDebugLogger() { - debugOnce.Do(func() { - debugVal := os.Getenv("DEBUG") - if debugVal != "1" && debugVal != "true" { - debugLogger = log.New(io.Discard, "", 0) - return - } - - timestamp := time.Now().Format("20060102-150405") - debugPath = filepath.Join(os.TempDir(), fmt.Sprintf("validator-agentic-%s.log", timestamp)) - - f, err := os.OpenFile(debugPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) - if err != nil { - fmt.Fprintf(os.Stderr, "AgenticClient: failed to create debug log file: %v\n", err) - debugLogger = log.New(io.Discard, "", 0) - return - } - - debugLogger = log.New(f, "", log.Ltime|log.Lmicroseconds) - }) -} - -// debugLog writes a formatted message to the debug log file if DEBUG=1 or DEBUG=true +// debugLog writes a formatted message to the LLM debug log file func debugLog(format string, args ...interface{}) { - initDebugLogger() - debugLogger.Printf(format, args...) + logme.LLMLog(format, args...) } // printDebugLogPath prints the debug log file path to stderr if debug is enabled func printDebugLogPath() { - initDebugLogger() - if debugPath != "" { - fmt.Fprintf(os.Stderr, "AgenticClient: debug log: %s\n", debugPath) + if p := logme.LLMLogPath(); p != "" { + fmt.Fprintf(os.Stderr, "AgenticClient: debug log: %s\n", p) } } diff --git a/pkg/llmclient/agentic_tools.go b/pkg/llmclient/agentic_tools.go index dd0fd81c..71a380a0 100644 --- a/pkg/llmclient/agentic_tools.go +++ b/pkg/llmclient/agentic_tools.go @@ -12,7 +12,7 @@ import ( "strings" "unicode/utf8" - "github.com/tmc/langchaingo/llms" + "github.com/grafana/plugin-validator/pkg/llmprovider" ) const maxFileSize = 500 * 1024 // 500KB @@ -46,109 +46,112 @@ var blockedGitFlags = []string{ "--run", } -// buildAgenticTools returns the list of tools available to the agent -func buildAgenticTools() []llms.Tool { - return []llms.Tool{ - { - Type: "function", - Function: &llms.FunctionDefinition{ - Name: "read_file", - Description: "Read the contents of a file at the given path", - Parameters: map[string]interface{}{ - "type": "object", - "properties": map[string]interface{}{ - "path": map[string]interface{}{ - "type": "string", - "description": "The relative path to the file to read", - }, +// toolRegistry maps AgenticTool names to their llmprovider.Tool definitions. +var toolRegistry = map[AgenticTool]llmprovider.Tool{ + ToolReadFile: { + Type: "function", + Function: &llmprovider.FunctionDef{ + Name: "read_file", + Description: "Read the contents of a file at the given path", + Parameters: map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "path": map[string]interface{}{ + "type": "string", + "description": "The relative path to the file to read", }, - "required": []string{"path"}, }, + "required": []string{"path"}, }, }, - { - Type: "function", - Function: &llms.FunctionDefinition{ - Name: "list_directory", - Description: "List files and directories at the given path", - Parameters: map[string]interface{}{ - "type": "object", - "properties": map[string]interface{}{ - "path": map[string]interface{}{ - "type": "string", - "description": "The relative path to the directory to list (use '.' for root)", - }, + }, + ToolListDirectory: { + Type: "function", + Function: &llmprovider.FunctionDef{ + Name: "list_directory", + Description: "List files and directories at the given path", + Parameters: map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "path": map[string]interface{}{ + "type": "string", + "description": "The relative path to the directory to list (use '.' for root)", }, - "required": []string{"path"}, }, + "required": []string{"path"}, }, }, - { - Type: "function", - Function: &llms.FunctionDefinition{ - Name: "grep", - Description: "Search for a pattern in files. Returns matching lines with file names and line numbers.", - Parameters: map[string]interface{}{ - "type": "object", - "properties": map[string]interface{}{ - "pattern": map[string]interface{}{ - "type": "string", - "description": "The pattern to search for", - }, - "path": map[string]interface{}{ - "type": "string", - "description": "Optional: directory or file to search in (defaults to '.')", - }, + }, + ToolGrep: { + Type: "function", + Function: &llmprovider.FunctionDef{ + Name: "grep", + Description: "Search for a pattern in files. Returns matching lines with file names and line numbers.", + Parameters: map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "pattern": map[string]interface{}{ + "type": "string", + "description": "The pattern to search for", + }, + "path": map[string]interface{}{ + "type": "string", + "description": "Optional: directory or file to search in (defaults to '.')", }, - "required": []string{"pattern"}, }, + "required": []string{"pattern"}, }, }, - { - Type: "function", - Function: &llms.FunctionDefinition{ - Name: "git", - Description: "Execute a git command. Only allowed commands: log, show, diff, status, ls-files, blame, rev-parse, cat-file, checkout, fetch, pull, branch, tag.", - Parameters: map[string]interface{}{ - "type": "object", - "properties": map[string]interface{}{ - "args": map[string]interface{}{ - "type": "string", - "description": "The git command arguments (e.g., 'log -n 5' or 'show HEAD')", - }, + }, + ToolGit: { + Type: "function", + Function: &llmprovider.FunctionDef{ + Name: "git", + Description: "Execute a git command. Only allowed commands: log, show, diff, status, ls-files, blame, rev-parse, cat-file, checkout, fetch, pull, branch, tag.", + Parameters: map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "args": map[string]interface{}{ + "type": "string", + "description": "The git command arguments (e.g., 'log -n 5' or 'show HEAD')", }, - "required": []string{"args"}, }, + "required": []string{"args"}, }, }, - { - Type: "function", - Function: &llms.FunctionDefinition{ - Name: "submit_answer", - Description: "Submit your final answer to the question. Use this when you have gathered enough information.", - Parameters: map[string]interface{}{ - "type": "object", - "properties": map[string]interface{}{ - "answer": map[string]interface{}{ - "type": "string", - "description": "Your detailed answer explaining your findings", - }, - "short_answer": map[string]interface{}{ - "type": "boolean", - "description": "A boolean true/false answer to yes/no questions", - }, - "files": map[string]interface{}{ - "type": "array", - "items": map[string]interface{}{"type": "string"}, - "description": "List of relevant files (optional)", - }, - "code_snippet": map[string]interface{}{ - "type": "string", - "description": "A relevant code snippet (optional)", - }, + }, +} + +// submitAnswerTool returns the submit_answer tool definition. +func submitAnswerTool() llmprovider.Tool { + return llmprovider.Tool{ + Type: "function", + Function: &llmprovider.FunctionDef{ + Name: "submit_answer", + Description: "Submit your final answer to the question. Use this when you have gathered enough information.", + Parameters: map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "answer": map[string]interface{}{ + "type": "string", + "description": "Your detailed answer explaining your findings", + }, + "short_answer": map[string]interface{}{ + "type": "boolean", + "description": "A boolean answer to the question: true means YES, false means NO. For example, if the question is 'Is the sky blue?' the short_answer is true. If the question is 'Is the sky green?' the short_answer is false.", + }, + "files": map[string]interface{}{ + "type": "array", + "items": map[string]interface{}{"type": "string"}, + "description": "List of relevant files. Pass an empty array if not relevant.", + }, + "code_snippet": map[string]interface{}{ + "type": "string", + "description": "A relevant code snippet. Pass an empty string if not relevant.", }, - "required": []string{"answer", "short_answer"}, }, + "required": []string{"answer", "short_answer", "files", "code_snippet"}, + "additionalProperties": false, }, }, } diff --git a/pkg/llmclient/agentic_types.go b/pkg/llmclient/agentic_types.go new file mode 100644 index 00000000..8439a568 --- /dev/null +++ b/pkg/llmclient/agentic_types.go @@ -0,0 +1,118 @@ +package llmclient + +import ( + "fmt" + "strings" + + "github.com/grafana/plugin-validator/pkg/llmprovider" +) + +const defaultSystemPromptIntro = `You are a code analysis assistant. You have tools to explore code in a repository. + +STRATEGY: +1. Use list_directory to see what files exist +2. Use read_file to read the source code files +3. Analyze the code to answer the question` + +// AgenticTool identifies an exploration tool available to the agent. +type AgenticTool string + +const ( + ToolReadFile AgenticTool = "read_file" + ToolListDirectory AgenticTool = "list_directory" + ToolGrep AgenticTool = "grep" + ToolGit AgenticTool = "git" +) + +// ToolSet is a preset collection of tools. +type ToolSet int + +const ( + // DefaultTooling includes all exploration tools (read_file, list_directory, + // grep, git) plus submit_answer. This is the zero value. + DefaultTooling ToolSet = iota + // NoTools includes only submit_answer with no exploration tools. + NoTools +) + +// AgenticCallOptions contains configuration for the agentic LLM call. +type AgenticCallOptions struct { + Model string // e.g. "gemini-2.5-flash" + Provider string // "google", "anthropic", "openai" + APIKey string + + // Tools selects specific exploration tools. When non-nil, takes precedence + // over ToolSet. submit_answer is always included regardless. + Tools []AgenticTool + + // ToolSet selects a preset collection of tools. Used when Tools is nil. + // The zero value (DefaultTooling) includes all exploration tools. + ToolSet ToolSet + + // SystemPrompt overrides the intro portion of the system prompt. The + // AVAILABLE TOOLS section is always auto-appended. When empty, a default + // intro is used. + SystemPrompt string +} + +// AnswerSchema represents the structured response from the agentic client. +type AnswerSchema struct { + Question string `json:"question"` + Answer string `json:"answer"` + ShortAnswer bool `json:"short_answer"` + Files []string `json:"files,omitempty"` + CodeSnippet string `json:"code_snippet,omitempty"` +} + +// defaultTools returns the full set of exploration tools. +func defaultTools() []AgenticTool { + return []AgenticTool{ToolReadFile, ToolListDirectory, ToolGrep, ToolGit} +} + +// resolveTools builds the final []llmprovider.Tool list from the options. +// submit_answer is always appended. +func resolveTools(opts *AgenticCallOptions) ([]llmprovider.Tool, error) { + var selected []AgenticTool + if opts.Tools != nil { + selected = opts.Tools + } else { + switch opts.ToolSet { + case DefaultTooling: + selected = defaultTools() + case NoTools: + // empty + default: + return nil, fmt.Errorf("unknown tool set: %d", opts.ToolSet) + } + } + + tools := make([]llmprovider.Tool, 0, len(selected)+1) + for _, name := range selected { + def, ok := toolRegistry[name] + if !ok { + return nil, fmt.Errorf("unknown tool: %q", name) + } + tools = append(tools, def) + } + tools = append(tools, submitAnswerTool()) + return tools, nil +} + +// buildSystemPrompt composes the system prompt from an intro and the resolved tools. +func buildSystemPrompt(intro string, tools []llmprovider.Tool) string { + if intro == "" { + intro = defaultSystemPromptIntro + } + + var b strings.Builder + b.WriteString(intro) + b.WriteString("\n\nAVAILABLE TOOLS:\n") + for _, t := range tools { + if t.Function != nil { + fmt.Fprintf(&b, "- %s: %s\n", t.Function.Name, t.Function.Description) + } + } + b.WriteString("\nIMPORTANT: You are in non-interactive mode. No one will read your text answers, only tools.\nWhen you have gathered enough information, use submit_answer to provide your answer.") + + return b.String() +} diff --git a/pkg/llmprovider/anthropicprovider/client.go b/pkg/llmprovider/anthropicprovider/client.go new file mode 100644 index 00000000..5eeed004 --- /dev/null +++ b/pkg/llmprovider/anthropicprovider/client.go @@ -0,0 +1,297 @@ +// Package anthropicprovider implements the llmprovider.Provider interface +// using the official Anthropic Go SDK (github.com/anthropics/anthropic-sdk-go). +package anthropicprovider + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/anthropics/anthropic-sdk-go" + "github.com/anthropics/anthropic-sdk-go/option" + "github.com/grafana/plugin-validator/pkg/llmprovider" + "github.com/grafana/plugin-validator/pkg/logme" +) + +// Client implements llmprovider.Provider for Anthropic. +type Client struct { + client *anthropic.Client + modelName string +} + +// New creates an Anthropic provider client. +func New(apiKey, modelName string) (*Client, error) { + if apiKey == "" { + return nil, fmt.Errorf("anthropic: API key is required") + } + if modelName == "" { + return nil, fmt.Errorf("anthropic: model name is required") + } + + client := anthropic.NewClient(option.WithAPIKey(apiKey)) + + return &Client{client: &client, modelName: modelName}, nil +} + +// GenerateContent sends messages to Anthropic and returns the response. +func (c *Client) GenerateContent( + ctx context.Context, + messages []llmprovider.Message, + options ...llmprovider.CallOption, +) (*llmprovider.Response, error) { + opts := &llmprovider.CallOptions{} + for _, o := range options { + o(opts) + } + + system, msgs := extractSystemAndMessages(messages) + + maxTokens := int64(4096) + if opts.MaxTokens > 0 { + maxTokens = int64(opts.MaxTokens) + } + + params := anthropic.MessageNewParams{ + Model: anthropic.Model(c.modelName), + MaxTokens: maxTokens, + Messages: msgs, + } + + if len(system) > 0 { + params.System = system + } + + if opts.Temperature > 0 { + params.Temperature = anthropic.Float(opts.Temperature) + } + if opts.TopP > 0 { + params.TopP = anthropic.Float(opts.TopP) + } + if len(opts.StopWords) > 0 { + params.StopSequences = opts.StopWords + } + + if len(opts.Tools) > 0 { + params.Tools = toAnthropicTools(opts.Tools) + } + + resp, err := c.client.Messages.New(ctx, params) + if err != nil { + return nil, fmt.Errorf("anthropic: API error: %w", err) + } + + return fromAnthropicResponse(resp), nil +} + +// --- Message conversion: llmprovider → anthropic --- + +// extractSystemAndMessages separates system messages (which go in a top-level +// param) from conversation messages. +func extractSystemAndMessages(messages []llmprovider.Message) ([]anthropic.TextBlockParam, []anthropic.MessageParam) { + var system []anthropic.TextBlockParam + var result []anthropic.MessageParam + + for _, msg := range messages { + switch msg.Role { + case llmprovider.RoleSystem: + text := extractText(msg.Parts) + if text != "" { + system = append(system, anthropic.TextBlockParam{Text: text}) + } + + case llmprovider.RoleHuman: + blocks := toUserBlocks(msg.Parts) + if len(blocks) > 0 { + result = append(result, anthropic.NewUserMessage(blocks...)) + } + + case llmprovider.RoleAI: + blocks := toAssistantBlocks(msg.Parts) + if len(blocks) > 0 { + result = append(result, anthropic.NewAssistantMessage(blocks...)) + } + + case llmprovider.RoleTool: + // Anthropic sends tool results as user messages + blocks := toToolResultBlocks(msg.Parts) + if len(blocks) > 0 { + result = append(result, anthropic.NewUserMessage(blocks...)) + } + } + } + + return system, result +} + +func toUserBlocks(parts []llmprovider.Part) []anthropic.ContentBlockParamUnion { + var blocks []anthropic.ContentBlockParamUnion + for _, p := range parts { + switch v := p.(type) { + case llmprovider.TextPart: + blocks = append(blocks, anthropic.NewTextBlock(v.Text)) + case llmprovider.ToolResultPart: + blocks = append(blocks, anthropic.NewToolResultBlock(v.ToolCallID, v.Content, false)) + } + } + return blocks +} + +func toAssistantBlocks(parts []llmprovider.Part) []anthropic.ContentBlockParamUnion { + var blocks []anthropic.ContentBlockParamUnion + for _, p := range parts { + switch v := p.(type) { + case llmprovider.TextPart: + blocks = append(blocks, anthropic.NewTextBlock(v.Text)) + case llmprovider.ToolCallPart: + // Parse the arguments string back to any for the input field + var input any + if err := json.Unmarshal([]byte(v.Arguments), &input); err != nil { + input = map[string]any{} + } + blocks = append(blocks, anthropic.NewToolUseBlock(v.ID, input, v.Name)) + case llmprovider.ThinkingPart: + if v.Encrypted != "" { + blocks = append(blocks, anthropic.NewRedactedThinkingBlock(v.Encrypted)) + } else if v.Text != "" { + blocks = append(blocks, anthropic.NewThinkingBlock(v.Signature, v.Text)) + } + } + } + return blocks +} + +func toToolResultBlocks(parts []llmprovider.Part) []anthropic.ContentBlockParamUnion { + var blocks []anthropic.ContentBlockParamUnion + for _, p := range parts { + if tr, ok := p.(llmprovider.ToolResultPart); ok { + blocks = append(blocks, anthropic.NewToolResultBlock(tr.ToolCallID, tr.Content, false)) + } + } + return blocks +} + +func extractText(parts []llmprovider.Part) string { + var text string + for _, p := range parts { + if tp, ok := p.(llmprovider.TextPart); ok { + if text != "" { + text += "\n" + } + text += tp.Text + } + } + return text +} + +// --- Tool conversion --- + +func toAnthropicTools(tools []llmprovider.Tool) []anthropic.ToolUnionParam { + var result []anthropic.ToolUnionParam + for _, tool := range tools { + if tool.Function == nil { + continue + } + + param := anthropic.ToolParam{ + Name: tool.Function.Name, + Description: anthropic.String(tool.Function.Description), + } + + // Convert parameters to ToolInputSchemaParam + if tool.Function.Parameters != nil { + schema := toInputSchema(tool.Function.Parameters) + param.InputSchema = schema + } + + result = append(result, anthropic.ToolUnionParam{OfTool: ¶m}) + } + return result +} + +func toInputSchema(params any) anthropic.ToolInputSchemaParam { + schema := anthropic.ToolInputSchemaParam{} + + var m map[string]any + switch p := params.(type) { + case map[string]any: + m = p + default: + data, err := json.Marshal(p) + if err != nil { + return schema + } + if err := json.Unmarshal(data, &m); err != nil { + return schema + } + } + + if props, ok := m["properties"]; ok { + schema.Properties = props + } + if req, ok := m["required"].([]any); ok { + for _, r := range req { + if s, ok := r.(string); ok { + schema.Required = append(schema.Required, s) + } + } + } + + return schema +} + +// --- Response conversion: anthropic → llmprovider --- + +func fromAnthropicResponse(resp *anthropic.Message) *llmprovider.Response { + choice := &llmprovider.Choice{ + StopReason: string(resp.StopReason), + GenerationInfo: make(map[string]any), + } + + for _, block := range resp.Content { + switch v := block.AsAny().(type) { + case anthropic.TextBlock: + if choice.Content != "" { + choice.Content += "\n" + } + choice.Content += v.Text + + case anthropic.ThinkingBlock: + logme.LLMLog("anthropic: thinking block (signature=%s, len=%d)", v.Signature[:min(20, len(v.Signature))], len(v.Thinking)) + choice.Thinking = append(choice.Thinking, llmprovider.ThinkingPart{ + Text: v.Thinking, + Signature: v.Signature, + }) + + case anthropic.RedactedThinkingBlock: + logme.LLMLog("anthropic: redacted thinking block (data_len=%d)", len(v.Data)) + choice.Thinking = append(choice.Thinking, llmprovider.ThinkingPart{ + Encrypted: v.Data, + }) + + case anthropic.ToolUseBlock: + args := string(v.Input) + logme.LLMLog("anthropic: tool call: name=%s id=%s", v.Name, v.ID) + choice.ToolCalls = append(choice.ToolCalls, llmprovider.ToolCallPart{ + ID: v.ID, + Name: v.Name, + Arguments: args, + }) + } + } + + choice.GenerationInfo["usage"] = map[string]any{ + "input_tokens": resp.Usage.InputTokens, + "output_tokens": resp.Usage.OutputTokens, + } + + return &llmprovider.Response{ + Choices: []*llmprovider.Choice{choice}, + } +} + +func min(a, b int) int { + if a < b { + return a + } + return b +} diff --git a/pkg/llmprovider/geminiprovider/client.go b/pkg/llmprovider/geminiprovider/client.go new file mode 100644 index 00000000..f7ad596a --- /dev/null +++ b/pkg/llmprovider/geminiprovider/client.go @@ -0,0 +1,380 @@ +// Package gemini implements the llmprovider.Provider interface using the +// Google GenAI SDK (google.golang.org/genai). It properly preserves +// thought_signatures for Gemini 3.x models. +package geminiprovider + +import ( + "context" + "crypto/rand" + "encoding/hex" + "encoding/json" + "fmt" + + "github.com/grafana/plugin-validator/pkg/llmprovider" + "github.com/grafana/plugin-validator/pkg/logme" + "google.golang.org/genai" +) + +// Client implements llmprovider.Provider for Gemini via AI Studio. +type Client struct { + client *genai.Client + modelName string +} + +// New creates a Gemini provider client using an AI Studio API key. +func New(ctx context.Context, apiKey, modelName string) (*Client, error) { + if apiKey == "" { + return nil, fmt.Errorf("gemini: API key is required") + } + if modelName == "" { + return nil, fmt.Errorf("gemini: model name is required") + } + + client, err := genai.NewClient(ctx, &genai.ClientConfig{ + APIKey: apiKey, + Backend: genai.BackendGeminiAPI, + }) + if err != nil { + return nil, fmt.Errorf("gemini: failed to create client: %w", err) + } + + return &Client{client: client, modelName: modelName}, nil +} + +// GenerateContent sends messages to Gemini and returns the response. +// It preserves thought_signatures for Gemini 3.x compatibility. +func (c *Client) GenerateContent( + ctx context.Context, + messages []llmprovider.Message, + options ...llmprovider.CallOption, +) (*llmprovider.Response, error) { + opts := &llmprovider.CallOptions{} + for _, o := range options { + o(opts) + } + + // Extract system instruction from messages (Gemini handles it separately) + systemInstruction, conversationMessages := extractSystemMessage(messages) + + // Convert our messages to genai.Content + contents, err := toGenAIContents(conversationMessages) + if err != nil { + return nil, fmt.Errorf("gemini: failed to convert messages: %w", err) + } + + // Build config + config := buildConfig(opts, systemInstruction) + + // Call Gemini API + resp, err := c.client.Models.GenerateContent(ctx, c.modelName, contents, config) + if err != nil { + return nil, fmt.Errorf("gemini: API error: %w", err) + } + + // Convert response, preserving thought_signatures + return fromGenAIResponse(resp) +} + +// extractSystemMessage pulls the system instruction out of the message list. +// Gemini takes system instructions via config, not as a message role. +func extractSystemMessage(messages []llmprovider.Message) (string, []llmprovider.Message) { + var system string + var rest []llmprovider.Message + + for _, m := range messages { + if m.Role == llmprovider.RoleSystem { + // Concatenate all text parts from system messages + for _, p := range m.Parts { + if tp, ok := p.(llmprovider.TextPart); ok { + if system != "" { + system += "\n" + } + system += tp.Text + } + } + } else { + rest = append(rest, m) + } + } + + return system, rest +} + +// buildConfig creates the GenAI generation config from our options. +func buildConfig(opts *llmprovider.CallOptions, systemInstruction string) *genai.GenerateContentConfig { + config := &genai.GenerateContentConfig{} + + if systemInstruction != "" { + config.SystemInstruction = &genai.Content{ + Parts: []*genai.Part{genai.NewPartFromText(systemInstruction)}, + } + } + + if opts.Temperature > 0 { + t := float32(opts.Temperature) + config.Temperature = &t + } + if opts.MaxTokens > 0 { + config.MaxOutputTokens = int32(opts.MaxTokens) + } + if opts.TopP > 0 { + p := float32(opts.TopP) + config.TopP = &p + } + if opts.TopK > 0 { + k := float32(opts.TopK) + config.TopK = &k + } + if len(opts.StopWords) > 0 { + config.StopSequences = opts.StopWords + } + + // Convert tools + if len(opts.Tools) > 0 { + var declarations []*genai.FunctionDeclaration + for _, tool := range opts.Tools { + if tool.Function != nil { + decl := &genai.FunctionDeclaration{ + Name: tool.Function.Name, + Description: tool.Function.Description, + } + // Use ParametersJsonSchema for raw JSON schema passthrough + if tool.Function.Parameters != nil { + decl.ParametersJsonSchema = tool.Function.Parameters + } + declarations = append(declarations, decl) + } + } + if len(declarations) > 0 { + config.Tools = []*genai.Tool{{ + FunctionDeclarations: declarations, + }} + config.ToolConfig = &genai.ToolConfig{ + FunctionCallingConfig: &genai.FunctionCallingConfig{ + Mode: genai.FunctionCallingConfigModeAuto, + }, + } + } + } + + return config +} + +// --- Message conversion: llmprovider → genai --- + +func toGenAIContents(messages []llmprovider.Message) ([]*genai.Content, error) { + var contents []*genai.Content + + for _, msg := range messages { + content := &genai.Content{ + Role: toGenAIRole(msg.Role), + } + + for _, part := range msg.Parts { + genaiPart, err := toGenAIPart(part) + if err != nil { + return nil, err + } + if genaiPart != nil { + content.Parts = append(content.Parts, genaiPart) + } + } + + if len(content.Parts) > 0 { + contents = append(contents, content) + } + } + + return contents, nil +} + +func toGenAIRole(role llmprovider.Role) string { + switch role { + case llmprovider.RoleHuman: + return "user" + case llmprovider.RoleAI: + return "model" + case llmprovider.RoleTool: + return "user" + default: + return "user" + } +} + +func toGenAIPart(part llmprovider.Part) (*genai.Part, error) { + switch p := part.(type) { + case llmprovider.TextPart: + return genai.NewPartFromText(p.Text), nil + + case llmprovider.ToolCallPart: + // Parse arguments from JSON string to map + var args map[string]any + if p.Arguments != "" { + if err := json.Unmarshal([]byte(p.Arguments), &args); err != nil { + return nil, fmt.Errorf("gemini: failed to unmarshal tool arguments for %q: %w", p.Name, err) + } + } + + genaiPart := genai.NewPartFromFunctionCall(p.Name, args) + if p.ID != "" { + genaiPart.FunctionCall.ID = p.ID + } + + // Echo back thought fields exactly as received from the API. + genaiPart.Thought = p.Thought + if p.ThoughtSignature != "" { + genaiPart.ThoughtSignature = []byte(p.ThoughtSignature) + } + + return genaiPart, nil + + case llmprovider.ToolResultPart: + // Convert response content to map + var responseMap map[string]any + if err := json.Unmarshal([]byte(p.Content), &responseMap); err != nil { + // If it's not JSON, wrap it + responseMap = map[string]any{"result": p.Content} + } + + genaiPart := genai.NewPartFromFunctionResponse(p.Name, responseMap) + if p.ToolCallID != "" { + genaiPart.FunctionResponse.ID = p.ToolCallID + } + + return genaiPart, nil + + case llmprovider.ThinkingPart: + // Thinking parts from previous responses need to be echoed back + genaiPart := &genai.Part{ + Text: p.Text, + Thought: true, + } + if p.Signature != "" { + genaiPart.ThoughtSignature = []byte(p.Signature) + } + return genaiPart, nil + + default: + return nil, fmt.Errorf("gemini: unsupported part type %T", part) + } +} + +// --- Response conversion: genai → llmprovider --- + +func fromGenAIResponse(resp *genai.GenerateContentResponse) (*llmprovider.Response, error) { + if resp == nil { + return nil, fmt.Errorf("gemini: nil response") + } + + result := &llmprovider.Response{ + Choices: make([]*llmprovider.Choice, 0, len(resp.Candidates)), + } + + for candidateIdx, candidate := range resp.Candidates { + choice := &llmprovider.Choice{ + StopReason: string(candidate.FinishReason), + GenerationInfo: make(map[string]any), + } + + if candidate.Content == nil || len(candidate.Content.Parts) == 0 { + raw, _ := json.MarshalIndent(candidate, "", " ") + logme.LLMLog("gemini: candidate[%d] empty/nil content, raw candidate:\n%s", candidateIdx, string(raw)) + } + + if candidate.Content != nil { + for partIdx, part := range candidate.Content.Parts { + if part == nil { + continue + } + + // Debug: log raw part fields so we can see exactly what the SDK returns + debugLogPart(partIdx, part) + + // Thought/thinking parts + if part.Thought && part.FunctionCall == nil { + thinking := llmprovider.ThinkingPart{ + Text: part.Text, + } + if len(part.ThoughtSignature) > 0 { + thinking.Signature = string(part.ThoughtSignature) + } + choice.Thinking = append(choice.Thinking, thinking) + continue + } + + // Text content (non-thought) + if part.Text != "" && part.FunctionCall == nil && part.FunctionResponse == nil { + choice.Content = part.Text + } + + // Function calls + if part.FunctionCall != nil { + id := part.FunctionCall.ID + if id == "" { + id = generateCallID() + logme.LLMLog("gemini: part[%d] FunctionCall has empty ID, generated: %s", partIdx, id) + } + tc := llmprovider.ToolCallPart{ + ID: id, + Name: part.FunctionCall.Name, + Thought: part.Thought, + } + + if part.FunctionCall.Args != nil { + argsJSON, err := json.Marshal(part.FunctionCall.Args) + if err != nil { + return nil, fmt.Errorf("gemini: failed to marshal function args: %w", err) + } + tc.Arguments = string(argsJSON) + } + + // CRITICAL: Capture thought_signature from function call parts + if len(part.ThoughtSignature) > 0 { + tc.ThoughtSignature = string(part.ThoughtSignature) + } + + choice.ToolCalls = append(choice.ToolCalls, tc) + } + } + } + + // Token usage + if resp.UsageMetadata != nil { + choice.GenerationInfo["usage"] = map[string]any{ + "prompt_tokens": resp.UsageMetadata.PromptTokenCount, + "completion_tokens": resp.UsageMetadata.CandidatesTokenCount, + "total_tokens": resp.UsageMetadata.TotalTokenCount, + "thoughts_tokens": resp.UsageMetadata.ThoughtsTokenCount, + } + } + + result.Choices = append(result.Choices, choice) + } + + return result, nil +} + +func generateCallID() string { + b := make([]byte, 8) + _, _ = rand.Read(b) + return "call_" + hex.EncodeToString(b) +} + +func debugLogPart(idx int, p *genai.Part) { + hasFuncCall := p.FunctionCall != nil + hasFuncResp := p.FunctionResponse != nil + textLen := len(p.Text) + sigLen := len(p.ThoughtSignature) + logme.LLMLog("gemini: part[%d] Thought=%v Text=%d bytes FuncCall=%v FuncResp=%v ThoughtSig=%d bytes", + idx, p.Thought, textLen, hasFuncCall, hasFuncResp, sigLen) + if p.Thought && textLen > 0 { + preview := p.Text + if len(preview) > 200 { + preview = preview[:200] + "..." + } + logme.LLMLog("gemini: part[%d] thinking preview: %s", idx, preview) + } + if hasFuncCall { + logme.LLMLog("gemini: part[%d] FunctionCall: name=%s id=%s", idx, p.FunctionCall.Name, p.FunctionCall.ID) + } +} diff --git a/pkg/llmprovider/openaiprovider/client.go b/pkg/llmprovider/openaiprovider/client.go new file mode 100644 index 00000000..4dd782cc --- /dev/null +++ b/pkg/llmprovider/openaiprovider/client.go @@ -0,0 +1,221 @@ +// Package openai implements the llmprovider.Provider interface using the +// official OpenAI Go SDK (github.com/openai/openai-go). +package openaiprovider + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/grafana/plugin-validator/pkg/llmprovider" + "github.com/grafana/plugin-validator/pkg/logme" + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "github.com/openai/openai-go/shared" +) + +// Client implements llmprovider.Provider for OpenAI. +type Client struct { + client *openai.Client + modelName string +} + +// New creates an OpenAI provider client. +func New(apiKey, modelName string) (*Client, error) { + if apiKey == "" { + return nil, fmt.Errorf("openai: API key is required") + } + if modelName == "" { + return nil, fmt.Errorf("openai: model name is required") + } + + client := openai.NewClient(option.WithAPIKey(apiKey)) + + return &Client{client: &client, modelName: modelName}, nil +} + +// GenerateContent sends messages to OpenAI and returns the response. +func (c *Client) GenerateContent( + ctx context.Context, + messages []llmprovider.Message, + options ...llmprovider.CallOption, +) (*llmprovider.Response, error) { + opts := &llmprovider.CallOptions{} + for _, o := range options { + o(opts) + } + + params := openai.ChatCompletionNewParams{ + Model: shared.ChatModel(c.modelName), + Messages: toOpenAIMessages(messages), + } + + if opts.Temperature > 0 { + params.Temperature = openai.Float(opts.Temperature) + } + if opts.MaxTokens > 0 { + params.MaxCompletionTokens = openai.Int(int64(opts.MaxTokens)) + } + if opts.TopP > 0 { + params.TopP = openai.Float(opts.TopP) + } + if len(opts.StopWords) > 0 { + params.Stop = openai.ChatCompletionNewParamsStopUnion{ + OfStringArray: opts.StopWords, + } + } + + if len(opts.Tools) > 0 { + params.Tools = toOpenAITools(opts.Tools) + } + + resp, err := c.client.Chat.Completions.New(ctx, params) + if err != nil { + return nil, fmt.Errorf("openai: API error: %w", err) + } + + return fromOpenAIResponse(resp), nil +} + +// --- Message conversion: llmprovider → openai --- + +func toOpenAIMessages(messages []llmprovider.Message) []openai.ChatCompletionMessageParamUnion { + var result []openai.ChatCompletionMessageParamUnion + + for _, msg := range messages { + switch msg.Role { + case llmprovider.RoleSystem: + text := extractText(msg.Parts) + result = append(result, openai.SystemMessage(text)) + + case llmprovider.RoleHuman: + text := extractText(msg.Parts) + result = append(result, openai.UserMessage(text)) + + case llmprovider.RoleAI: + result = append(result, toAssistantMessage(msg)) + + case llmprovider.RoleTool: + for _, part := range msg.Parts { + if tr, ok := part.(llmprovider.ToolResultPart); ok { + result = append(result, openai.ToolMessage(tr.Content, tr.ToolCallID)) + } + } + } + } + + return result +} + +func toAssistantMessage(msg llmprovider.Message) openai.ChatCompletionMessageParamUnion { + text := extractText(msg.Parts) + + var toolCalls []openai.ChatCompletionMessageToolCallParam + for _, part := range msg.Parts { + if tc, ok := part.(llmprovider.ToolCallPart); ok { + toolCalls = append(toolCalls, openai.ChatCompletionMessageToolCallParam{ + ID: tc.ID, + Function: openai.ChatCompletionMessageToolCallFunctionParam{ + Name: tc.Name, + Arguments: tc.Arguments, + }, + }) + } + } + + asst := openai.ChatCompletionAssistantMessageParam{} + if text != "" { + asst.Content.OfString = openai.String(text) + } + if len(toolCalls) > 0 { + asst.ToolCalls = toolCalls + } + + return openai.ChatCompletionMessageParamUnion{OfAssistant: &asst} +} + +func extractText(parts []llmprovider.Part) string { + var text string + for _, p := range parts { + if tp, ok := p.(llmprovider.TextPart); ok { + if text != "" { + text += "\n" + } + text += tp.Text + } + } + return text +} + +// --- Tool conversion --- + +func toOpenAITools(tools []llmprovider.Tool) []openai.ChatCompletionToolParam { + var result []openai.ChatCompletionToolParam + for _, tool := range tools { + if tool.Function == nil { + continue + } + + param := openai.ChatCompletionToolParam{ + Function: shared.FunctionDefinitionParam{ + Name: tool.Function.Name, + Description: openai.String(tool.Function.Description), + }, + } + + // Convert parameters to FunctionParameters (map[string]any) + if tool.Function.Parameters != nil { + switch p := tool.Function.Parameters.(type) { + case map[string]any: + param.Function.Parameters = shared.FunctionParameters(p) + default: + // Marshal and unmarshal to get map[string]any + data, err := json.Marshal(p) + if err == nil { + var m map[string]any + if json.Unmarshal(data, &m) == nil { + param.Function.Parameters = shared.FunctionParameters(m) + } + } + } + } + + result = append(result, param) + } + return result +} + +// --- Response conversion: openai → llmprovider --- + +func fromOpenAIResponse(resp *openai.ChatCompletion) *llmprovider.Response { + result := &llmprovider.Response{ + Choices: make([]*llmprovider.Choice, 0, len(resp.Choices)), + } + + for _, c := range resp.Choices { + choice := &llmprovider.Choice{ + Content: c.Message.Content, + StopReason: c.FinishReason, + GenerationInfo: make(map[string]any), + } + + for _, tc := range c.Message.ToolCalls { + logme.LLMLog("openai: tool call: name=%s id=%s", tc.Function.Name, tc.ID) + choice.ToolCalls = append(choice.ToolCalls, llmprovider.ToolCallPart{ + ID: tc.ID, + Name: tc.Function.Name, + Arguments: tc.Function.Arguments, + }) + } + + choice.GenerationInfo["usage"] = map[string]any{ + "prompt_tokens": resp.Usage.PromptTokens, + "completion_tokens": resp.Usage.CompletionTokens, + "total_tokens": resp.Usage.TotalTokens, + } + + result.Choices = append(result.Choices, choice) + } + + return result +} diff --git a/pkg/llmprovider/types.go b/pkg/llmprovider/types.go new file mode 100644 index 00000000..d6a592e8 --- /dev/null +++ b/pkg/llmprovider/types.go @@ -0,0 +1,168 @@ +// Package llmprovider defines a unified interface for LLM providers. +// +// Each provider (Gemini, Anthropic, OpenAI) has a native SDK implementation +// that supports provider-specific features like Gemini thought_signatures, +// Anthropic thinking blocks, and OpenAI encrypted reasoning content. +package llmprovider + +import "context" + +// Provider is the core interface that all LLM provider clients implement. +type Provider interface { + GenerateContent(ctx context.Context, messages []Message, options ...CallOption) (*Response, error) +} + +// Role identifies the sender of a message. +type Role string + +const ( + RoleSystem Role = "system" + RoleHuman Role = "human" + RoleAI Role = "ai" + RoleTool Role = "tool" +) + +// Message is a single message in a conversation. +type Message struct { + Role Role + Parts []Part +} + +// Part is a piece of content within a message. +// Concrete types: TextPart, ToolCallPart, ToolResultPart, ThinkingPart. +type Part interface { + partMarker() +} + +// TextPart is plain text content. +type TextPart struct { + Text string +} + +func (TextPart) partMarker() {} + +// ToolCallPart represents a model's request to call a tool. +type ToolCallPart struct { + ID string + Name string + Arguments string // JSON string + + // Thought indicates whether this part was produced during model thinking. + // Must be echoed back exactly as received from the API. + Thought bool + + // ThoughtSignature is the opaque token Gemini 3.x attaches to function + // call parts. It must be echoed back in subsequent requests or the API + // returns a 400. Nil/empty means no signature was provided. + ThoughtSignature string +} + +func (ToolCallPart) partMarker() {} + +// ToolResultPart is the response from executing a tool. +type ToolResultPart struct { + ToolCallID string + Name string + Content string +} + +func (ToolResultPart) partMarker() {} + +// ThinkingPart holds reasoning/thinking content from the model. +// Different providers represent this differently: +// - Gemini: thought text + thought_signature +// - Anthropic: thinking block with signature, or redacted_thinking +// - OpenAI: encrypted reasoning content +type ThinkingPart struct { + Text string + Signature string // Gemini thought_signature or Anthropic thinking signature + Encrypted string // OpenAI encrypted_content or Anthropic redacted_thinking data +} + +func (ThinkingPart) partMarker() {} + +// Response is the result of a GenerateContent call. +type Response struct { + Choices []*Choice +} + +// Choice is a single response candidate. +type Choice struct { + // Content is the text content of the response. + Content string + + // StopReason is why the model stopped generating. + StopReason string + + // ToolCalls requested by the model. These preserve ThoughtSignature + // so they can be echoed back in the next request. + ToolCalls []ToolCallPart + + // Thinking contains reasoning/thinking content if the model produced any. + Thinking []ThinkingPart + + // GenerationInfo holds arbitrary provider-specific metadata (token + // counts, safety ratings, etc.). + GenerationInfo map[string]any +} + +// --- Call options --- + +// CallOption configures a GenerateContent call. +type CallOption func(*CallOptions) + +// CallOptions holds all configurable parameters for a GenerateContent call. +type CallOptions struct { + MaxTokens int + Temperature float64 + TopP float64 + TopK int + StopWords []string + Tools []Tool +} + +// Tool describes a tool the model can invoke. +type Tool struct { + Type string + Function *FunctionDef +} + +// FunctionDef describes a callable function. +type FunctionDef struct { + Name string + Description string + Parameters any // JSON Schema +} + +// --- Option helpers --- + +func WithMaxTokens(n int) CallOption { + return func(o *CallOptions) { o.MaxTokens = n } +} + +func WithTemperature(t float64) CallOption { + return func(o *CallOptions) { o.Temperature = t } +} + +func WithTopP(p float64) CallOption { + return func(o *CallOptions) { o.TopP = p } +} + +func WithTopK(k int) CallOption { + return func(o *CallOptions) { o.TopK = k } +} + +func WithStopWords(words []string) CallOption { + return func(o *CallOptions) { o.StopWords = words } +} + +func WithTools(tools []Tool) CallOption { + return func(o *CallOptions) { o.Tools = tools } +} + +// --- Convenience constructors --- + +// TextMessage creates a Message with a single text part. +func TextMessage(role Role, text string) Message { + return Message{Role: role, Parts: []Part{TextPart{Text: text}}} +} diff --git a/pkg/logme/logme.go b/pkg/logme/logme.go index fc6c8d49..ece3e121 100644 --- a/pkg/logme/logme.go +++ b/pkg/logme/logme.go @@ -2,11 +2,53 @@ package logme import ( "fmt" + "io" + "log" "os" + "path/filepath" + "sync" ) var isDebugMode = os.Getenv("DEBUG") == "1" +var ( + llmLogger *log.Logger + llmOnce sync.Once + llmPath string +) + +func initLLMLogger() { + llmOnce.Do(func() { + if !isDebugMode { + llmLogger = log.New(io.Discard, "", 0) + return + } + + llmPath = filepath.Join(os.TempDir(), "validator-llm.log") + f, err := os.OpenFile(llmPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) + if err != nil { + fmt.Fprintf(os.Stderr, "logme: failed to open LLM log file: %v\n", err) + llmLogger = log.New(io.Discard, "", 0) + return + } + + llmLogger = log.New(f, "", log.Ltime|log.Lmicroseconds) + }) +} + +// LLMLog writes a formatted message to the LLM debug log file in /tmp. +// Only active when DEBUG=1. +func LLMLog(format string, args ...interface{}) { + initLLMLogger() + llmLogger.Printf(format, args...) +} + +// LLMLogPath returns the path to the LLM log file, or "" if not active. +func LLMLogPath() string { + initLLMLogger() + return llmPath +} + func DebugFln(msg string, args ...interface{}) { // check if ENV DEBUG is 1 if isDebugMode {