From 73b6e59ae2278b5078bb4fd9579b1b38f8af023e Mon Sep 17 00:00:00 2001 From: Esteban Beltran Date: Tue, 3 Mar 2026 18:02:15 +0100 Subject: [PATCH 01/10] Improve --- pkg/llmclient/agentic_client.go | 376 +++++++++++++----- .../agentic_client_integration_test.go | 253 ++++++------ pkg/llmclient/agentic_debug.go | 4 +- pkg/llmclient/agentic_tools.go | 2 +- 4 files changed, 428 insertions(+), 207 deletions(-) diff --git a/pkg/llmclient/agentic_client.go b/pkg/llmclient/agentic_client.go index cef19851..e41ab881 100644 --- a/pkg/llmclient/agentic_client.go +++ b/pkg/llmclient/agentic_client.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "strings" "time" "github.com/tmc/langchaingo/llms" @@ -13,14 +14,42 @@ import ( ) const ( - maxToolCalls = 100 - maxLLMRetries = 3 - maxConsecutiveNoTools = 5 - retryDelay = 2 * time.Second + maxToolCallsFirstQuestion = 60 + maxToolCallsFollowUp = 20 + maxLLMRetries = 3 + maxConsecutiveNoTools = 5 + retryDelay = 2 * time.Second + + systemPrompt = `You are a code analysis assistant. You have tools to explore code in a repository. + +AVAILABLE TOOLS: +- list_directory: List files at a path. Use "." for root. +- read_file: Read a file's contents. This is your primary tool for understanding code. +- grep: Search for a pattern across files. +- git: Run read-only git commands (log, show, diff, status, etc.) +- submit_answer: Submit your answers. + +STRATEGY: +1. Use list_directory to see what files exist +2. Use read_file to read the source code files +3. Analyze the code to answer the question + +You can only use one tool at a time. +IMPORTANT: You are in non-interactive mode. No one will read your text answers, only tools. +When you have gathered enough information, use submit_answer to provide your answer.` + + questionAppendPrompt = `Start by listing the files in the repository and exploring the contents.` + + budgetNudgePrompt = `You have only %d tool calls remaining. Wrap up your investigation and call submit_answer now with whatever information you have gathered so far.` + + useToolsReminderPrompt = `You are in non-interactive mode. You must start using your tools now to explore the repository. When you have enough information, use submit_answer to provide your answer.` + + submitAnswerAloneError = `Error: submit_answer must be called alone. When you have an answer, call submit_answer as a single tool call without any other tools in the same response.` ) // AnswerSchema represents the structured response from the agentic client type AnswerSchema struct { + Question string `json:"question"` Answer string `json:"answer"` ShortAnswer bool `json:"short_answer"` Files []string `json:"files,omitempty"` @@ -36,7 +65,7 @@ type AgenticCallOptions struct { // AgenticClient is an interface for agentic LLM interactions type AgenticClient interface { - CallLLM(ctx context.Context, prompt, repositoryPath string) ([]AnswerSchema, error) + CallLLM(ctx context.Context, questions []string, repositoryPath string) ([]AnswerSchema, error) } // agenticClientImpl implements AgenticClient @@ -68,9 +97,17 @@ func NewAgenticClient(opts *AgenticCallOptions) (AgenticClient, error) { } // CallLLM executes an agentic loop with tools to answer questions about code. -// The prompt may contain multiple questions, in which case the agent will call -// submit_answer multiple times. All answers are collected and returned. -func (c *agenticClientImpl) CallLLM(ctx context.Context, prompt, repositoryPath string) ([]AnswerSchema, error) { +// Each question is processed sequentially, with follow-up questions benefiting +// from the context accumulated by earlier questions. +func (c *agenticClientImpl) CallLLM( + ctx context.Context, + questions []string, + repositoryPath string, +) ([]AnswerSchema, error) { + if len(questions) == 0 { + return nil, fmt.Errorf("at least one question is required") + } + // Initialize LLM based on provider using the client's configured settings opts := &AgenticCallOptions{ APIKey: c.apiKey, @@ -88,102 +125,206 @@ func (c *agenticClientImpl) CallLLM(ctx context.Context, prompt, repositoryPath // Create tool executor executor := newToolExecutor(repositoryPath) - // System prompt - systemPrompt := `You are a code analysis assistant. You have tools to explore code in a repository. - -AVAILABLE TOOLS: -- list_directory: List files at a path. Use "." for root. -- read_file: Read a file's contents. This is your primary tool for understanding code. -- grep: Search for a pattern across files. -- git: Run read-only git commands (log, show, diff, status, etc.) -- submit_answer: Submit your final answer. - -STRATEGY: -1. Use list_directory to see what files exist -2. Use read_file to read the source code files -3. Analyze the code to answer the question - -You can only use one tool at a time. -IMPORTANT: You are in non-interactive mode. Start working and using your tools immediately. -When ready, use submit_answer. For multiple questions, call submit_answer once per question.` - - // Build initial messages + // Build initial messages with system prompt only (no user message yet) messages := []llms.MessageContent{ llms.TextParts(llms.ChatMessageTypeSystem, systemPrompt), - llms.TextParts(llms.ChatMessageTypeHuman, prompt), } - // Collect answers - var answers []AnswerSchema - - // Agentic loop - toolCallsRemaining := maxToolCalls - // Print debug log file path before starting the loop printDebugLogPath() debugLog("\n\n\n") debugLog("################################################################") debugLog("# NEW CallLLM - provider=%s model=%s", c.provider, c.model) debugLog("# repo=%s", repositoryPath) - debugLog("# prompt=%s", truncateString(prompt, 200)) + debugLog("# questions=%d", len(questions)) debugLog("################################################################") - iteration := 0 + // Collect answers + var answers []AnswerSchema + + // Process each question sequentially + for questionIndex, question := range questions { + debugLog( + "\n========== Processing question %d/%d ==========", + questionIndex+1, + len(questions), + ) + debugLog("Question: %s", truncateString(question, 200)) + + originalQuestion := question + question = fmt.Sprintf("%s\n\n%s", question, questionAppendPrompt) + + // Determine budget for this question + budget := maxToolCallsFirstQuestion + if questionIndex > 0 { + budget = maxToolCallsFollowUp + } + debugLog("Budget: %d tool calls", budget) + + // Add the question as a human message + messages = append(messages, llms.TextParts(llms.ChatMessageTypeHuman, question)) + + // Run the question loop + updatedMessages, answer, err := c.runQuestionLoop( + ctx, + llm, + messages, + tools, + executor, + budget, + questionIndex, + ) + messages = updatedMessages + + if err != nil { + // Return partial results on error + debugLog("AgenticClient: question %d failed: %v", questionIndex+1, err) + if len(answers) > 0 { + debugLog("AgenticClient: returning %d partial answers", len(answers)) + return answers, nil + } + return nil, err + } + + if answer != nil { + // Set the question field + answer.Question = originalQuestion + answers = append(answers, *answer) + debugLog("AgenticClient: collected answer %d/%d", len(answers), len(questions)) + } else { + // Budget exhausted without answer - stop processing further questions + debugLog("AgenticClient: question %d exhausted budget without answer, stopping", questionIndex+1) + if len(answers) > 0 { + debugLog("AgenticClient: returning %d partial answers", len(answers)) + return answers, nil + } + return nil, fmt.Errorf("question %d exhausted budget without providing answer", questionIndex+1) + } + } + + debugLog("AgenticClient: successfully answered all %d questions", len(questions)) + return answers, nil +} + +// runQuestionLoop runs the tool-calling loop for a single question. +// Returns updated messages, the answer (or nil if budget exhausted), and error. +func (c *agenticClientImpl) runQuestionLoop( + ctx context.Context, + llm llms.Model, + messages []llms.MessageContent, + tools []llms.Tool, + executor *toolExecutor, + budget int, + questionIndex int, +) ([]llms.MessageContent, *AnswerSchema, error) { + toolCallsRemaining := budget consecutiveNoTools := 0 + iteration := 0 + + budgetNudged := false + for toolCallsRemaining > 0 { iteration++ - debugLog("========== AgenticClient: iteration %d ==========", iteration) - debugLog("AgenticClient: %d tool calls remaining, %d answers collected", toolCallsRemaining, len(answers)) + debugLog("========== Question %d iteration %d ==========", questionIndex+1, iteration) + debugLog("AgenticClient: %d tool calls remaining", toolCallsRemaining) + + if !budgetNudged && toolCallsRemaining <= 5 { + budgetNudged = true + debugLog("AgenticClient: nudging model about low budget") + messages = append(messages, llms.TextParts( + llms.ChatMessageTypeHuman, + fmt.Sprintf(budgetNudgePrompt, toolCallsRemaining), + )) + } // Call LLM with retry logic debugLog("AgenticClient: calling LLM...") resp, err := callLLMWithRetry(ctx, llm, messages, tools) if err != nil { debugLog("AgenticClient: LLM call failed: %v", err) - return nil, fmt.Errorf("LLM call failed after %d retries: %w", maxLLMRetries, err) + return messages, nil, fmt.Errorf( + "LLM call failed after %d retries: %w", + maxLLMRetries, + err, + ) } - // resp.Choices contains the LLM's response options. Each choice has Content (text) - // and/or ToolCalls (function calls the model wants to make). Typically there's - // only one choice unless you request multiple completions. if len(resp.Choices) == 0 { debugLog("AgenticClient: no choices in response") - return nil, fmt.Errorf("no response from LLM") + return messages, nil, fmt.Errorf("no response from LLM") + } + + // Log raw response for debugging + debugLog("AgenticClient: received response with %d choices", len(resp.Choices)) + if choicesJSON, err := json.MarshalIndent(resp.Choices, "", " "); err == nil { + debugLog("Raw response Choices:\n%s", string(choicesJSON)) + } + + // Merge all choices into one (Anthropic returns text and tool calls as separate choices) + mergedChoice := llms.ContentChoice{} + var allToolCalls []llms.ToolCall + var contentParts []string + + for i, ch := range resp.Choices { + debugLog("AgenticClient: processing choice %d: Content=%q, ToolCalls=%d", + i, truncateString(ch.Content, 100), len(ch.ToolCalls)) + + if ch.Content != "" { + contentParts = append(contentParts, ch.Content) + } + if len(ch.ToolCalls) > 0 { + allToolCalls = append(allToolCalls, ch.ToolCalls...) + } + // Use StopReason from first non-empty one + if mergedChoice.StopReason == "" && ch.StopReason != "" { + mergedChoice.StopReason = ch.StopReason + } + // Use GenerationInfo from first choice + if i == 0 { + mergedChoice.GenerationInfo = ch.GenerationInfo + } } - // Use first choice. Google puts all tool calls in choices[0].ToolCalls. - // Anthropic creates a separate choice per content block (text or tool_use), - // but langchaingo's handleAIMessage only supports Parts[0] as either - // TextContent or ToolCall, so we process one choice at a time. - choice := resp.Choices[0] - debugLog("AgenticClient: received response with %d tool calls", len(choice.ToolCalls)) + // Build merged choice — join all content parts so nothing is lost + // when Anthropic returns multiple text blocks (e.g. thinking + response). + if len(contentParts) > 0 { + mergedChoice.Content = strings.Join(contentParts, "\n") + } + mergedChoice.ToolCalls = allToolCalls + + choice := mergedChoice + debugLog("AgenticClient: merged choice - Content=%q, ToolCalls=%d", + truncateString(choice.Content, 200), len(choice.ToolCalls)) if choice.Content != "" { debugLog("AgenticClient: AI message: %s", truncateString(choice.Content, 200)) } - // If no tool calls, check if we have answers + // If no tool calls, check if we should nudge the agent if len(choice.ToolCalls) == 0 { debugLog("AgenticClient: no tool calls in response") - // If we have collected answers, the agent is done - if len(answers) > 0 { - debugLog("AgenticClient: agent finished with %d answers", len(answers)) - return answers, nil - } - consecutiveNoTools++ - debugLog("AgenticClient: consecutive no-tool responses: %d/%d", consecutiveNoTools, maxConsecutiveNoTools) + debugLog( + "AgenticClient: consecutive no-tool responses: %d/%d", + consecutiveNoTools, + maxConsecutiveNoTools, + ) if consecutiveNoTools >= maxConsecutiveNoTools { - return nil, fmt.Errorf("agent failed to use tools after %d consecutive attempts", maxConsecutiveNoTools) + return messages, nil, fmt.Errorf( + "agent failed to use tools after %d consecutive attempts", + maxConsecutiveNoTools, + ) } - // No answers yet - add the AI response and remind to use tools + // Add the AI response and remind to use tools if choice.Content != "" { messages = append(messages, llms.TextParts(llms.ChatMessageTypeAI, choice.Content)) } - debugLog("AgenticClient: no answers yet, reminding agent to use tools") - messages = append(messages, llms.TextParts(llms.ChatMessageTypeHuman, - "You are in non-interactive mode. You must start using your tools now to explore the repository. When you have enough information, use submit_answer to provide your answer.")) + debugLog("AgenticClient: reminding agent to use tools") + messages = append(messages, llms.TextParts( + llms.ChatMessageTypeHuman, + useToolsReminderPrompt, + )) toolCallsRemaining-- continue } @@ -191,41 +332,85 @@ When ready, use submit_answer. For multiple questions, call submit_answer once p // Reset consecutive no-tool counter when tools are used consecutiveNoTools = 0 - // Build AI message with tool calls - aiMessage := llms.MessageContent{ - Role: llms.ChatMessageTypeAI, - } - if choice.Content != "" { - aiMessage.Parts = append(aiMessage.Parts, llms.TextContent{Text: choice.Content}) - } + // Validate submit_answer is called alone + hasSubmitAnswer := false for _, toolCall := range choice.ToolCalls { - aiMessage.Parts = append(aiMessage.Parts, toolCall) + if toolCall.FunctionCall.Name == "submit_answer" { + hasSubmitAnswer = true + break + } + } + if hasSubmitAnswer && len(choice.ToolCalls) > 1 { + debugLog("AgenticClient: submit_answer called with other tools - rejecting all") + // Add a single AI message with ALL tool calls so every + // tool_result below has a matching tool_use in the preceding + // assistant message. + aiParts := make([]llms.ContentPart, len(choice.ToolCalls)) + for i, tc := range choice.ToolCalls { + aiParts[i] = tc + } + aiMessage := llms.MessageContent{ + Role: llms.ChatMessageTypeAI, + Parts: aiParts, + } + messages = append(messages, aiMessage) + for _, toolCall := range choice.ToolCalls { + toolCallsRemaining-- + errorResponse := llms.MessageContent{ + Role: llms.ChatMessageTypeTool, + Parts: []llms.ContentPart{ + llms.ToolCallResponse{ + ToolCallID: toolCall.ID, + Name: toolCall.FunctionCall.Name, + Content: submitAnswerAloneError, + }, + }, + } + messages = append(messages, errorResponse) + } + continue } - messages = append(messages, aiMessage) - // Process tool calls + // Process each tool call as a separate AI message + tool result pair. + // langchaingo's Anthropic handleAIMessage only serializes Parts[0], so + // putting all tool calls in one message loses everything after the first. + // Interleaving ensures each tool_use has a matching tool_result in the + // immediately following user message. for i, toolCall := range choice.ToolCalls { toolCallsRemaining-- - response, answer := processToolCall(toolCall, i, len(choice.ToolCalls), len(answers), executor) + + aiMessage := llms.MessageContent{ + Role: llms.ChatMessageTypeAI, + Parts: []llms.ContentPart{toolCall}, + } + messages = append(messages, aiMessage) + + response, answer := processToolCall(toolCall, i, len(choice.ToolCalls), executor) messages = append(messages, response) if answer != nil { - answers = append(answers, *answer) + debugLog("AgenticClient: received answer for question %d", questionIndex+1) + return messages, answer, nil } } } - // If we collected some answers but ran out of tool calls, return what we have - if len(answers) > 0 { - debugLog("AgenticClient: ran out of tool calls, returning %d answers", len(answers)) - return answers, nil - } - - return nil, fmt.Errorf("exceeded maximum tool calls (%d), agent did not complete", maxToolCalls) + // Budget exhausted without answer + debugLog("AgenticClient: question %d exhausted budget", questionIndex+1) + return messages, nil, nil } // processToolCall processes a single tool call and returns the response message and optional answer -func processToolCall(toolCall llms.ToolCall, index, total, currentAnswerCount int, executor *toolExecutor) (llms.MessageContent, *AnswerSchema) { - debugLog("AgenticClient: [%d/%d] executing tool: %s", index+1, total, toolCall.FunctionCall.Name) +func processToolCall( + toolCall llms.ToolCall, + index, total int, + executor *toolExecutor, +) (llms.MessageContent, *AnswerSchema) { + debugLog( + "AgenticClient: [%d/%d] executing tool: %s", + index+1, + total, + toolCall.FunctionCall.Name, + ) debugLog("AgenticClient: tool args: %s", truncateString(toolCall.FunctionCall.Arguments, 500)) // Check for submit_answer @@ -240,13 +425,16 @@ func processToolCall(toolCall llms.ToolCall, index, total, currentAnswerCount in llms.ToolCallResponse{ ToolCallID: toolCall.ID, Name: toolCall.FunctionCall.Name, - Content: fmt.Sprintf("Error parsing answer: %v. Please try again with valid JSON.", err), + Content: fmt.Sprintf( + "Error parsing answer: %v. Please try again with valid JSON.", + err, + ), }, }, }, nil } - debugLog("AgenticClient: received answer #%d: short_answer=%v, answer=%s", - currentAnswerCount+1, answer.ShortAnswer, truncateString(answer.Answer, 100)) + debugLog("AgenticClient: received answer: short_answer=%v, answer=%s", + answer.ShortAnswer, truncateString(answer.Answer, 100)) // Return success response and the answer return llms.MessageContent{ @@ -255,7 +443,7 @@ func processToolCall(toolCall llms.ToolCall, index, total, currentAnswerCount in llms.ToolCallResponse{ ToolCallID: toolCall.ID, Name: toolCall.FunctionCall.Name, - Content: "Answer recorded successfully. If you have answered all questions, respond with a plain text message saying 'I am finished'. Otherwise, continue with the next question.", + Content: "Answer recorded successfully.", }, }, }, &answer @@ -281,7 +469,12 @@ func processToolCall(toolCall llms.ToolCall, index, total, currentAnswerCount in } // callLLMWithRetry calls the LLM with retry logic for transient errors -func callLLMWithRetry(ctx context.Context, llm llms.Model, messages []llms.MessageContent, tools []llms.Tool) (*llms.ContentResponse, error) { +func callLLMWithRetry( + ctx context.Context, + llm llms.Model, + messages []llms.MessageContent, + tools []llms.Tool, +) (*llms.ContentResponse, error) { var lastErr error for attempt := 1; attempt <= maxLLMRetries; attempt++ { resp, err := llm.GenerateContent(ctx, messages, llms.WithTools(tools)) @@ -327,6 +520,9 @@ func initLLM(ctx context.Context, opts *AgenticCallOptions) (llms.Model, error) openai.WithModel(opts.Model), ) default: - return nil, fmt.Errorf("unsupported provider: %s (supported: google, anthropic, openai)", opts.Provider) + return nil, fmt.Errorf( + "unsupported provider: %s (supported: google, anthropic, openai)", + opts.Provider, + ) } } diff --git a/pkg/llmclient/agentic_client_integration_test.go b/pkg/llmclient/agentic_client_integration_test.go index 549aa77a..b80381dc 100644 --- a/pkg/llmclient/agentic_client_integration_test.go +++ b/pkg/llmclient/agentic_client_integration_test.go @@ -6,169 +6,196 @@ import ( "path/filepath" "testing" - "github.com/grafana/plugin-validator/pkg/logme" "github.com/grafana/plugin-validator/pkg/prettyprint" "github.com/stretchr/testify/require" ) -func hasGeminiAPIKey() bool { - return os.Getenv("GEMINI_API_KEY") != "" +type providerConfig struct { + name string + provider string + model string + envKey string } -func hasAnthropicAPIKey() bool { - return os.Getenv("ANTHROPIC_API_KEY") != "" +var providers = []providerConfig{ + {name: "Gemini", provider: "google", model: "gemini-2.5-flash", envKey: "GEMINI_API_KEY"}, + {name: "Anthropic", provider: "anthropic", model: "claude-haiku-4-5", envKey: "ANTHROPIC_API_KEY"}, + {name: "OpenAI", provider: "openai", model: "gpt-5-mini", envKey: "OPENAI_API_KEY"}, } -// TestAgenticClient_NoFilesystemAccess tests that the agent correctly identifies -// when an application does NOT access the filesystem -func TestAgenticClient_NoFilesystemAccess(t *testing.T) { - if !hasGeminiAPIKey() { - t.Skip("GEMINI_API_KEY not set, skipping agentic client integration test") +func skipIfMissingKey(t *testing.T, p providerConfig) { + t.Helper() + if os.Getenv(p.envKey) == "" || os.Getenv("DEBUG") != "1" { + t.Skipf("%s not set or DEBUG!=1, skipping %s integration test", p.envKey, p.name) } +} + +func newClient(t *testing.T, p providerConfig) AgenticClient { + t.Helper() + client, err := NewAgenticClient(&AgenticCallOptions{ + Provider: p.provider, + Model: p.model, + APIKey: os.Getenv(p.envKey), + }) + require.NoError(t, err) + return client +} + +func TestAgenticClient_EmptyQuestions(t *testing.T) { + for _, p := range providers { + t.Run(p.name, func(t *testing.T) { + skipIfMissingKey(t, p) + + client := newClient(t, p) - opts := &AgenticCallOptions{ - Provider: "google", - Model: "gemini-2.0-flash", - APIKey: os.Getenv("GEMINI_API_KEY"), + testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access")) + require.NoError(t, err) + + answers, err := client.CallLLM(context.Background(), []string{}, testDataPath) + require.Error(t, err, "Empty questions should return error") + require.Contains(t, err.Error(), "at least one question is required") + require.Nil(t, answers) + }) } +} - client, err := NewAgenticClient(opts) - require.NoError(t, err) +func TestAgenticClient_NoFilesystemAccess(t *testing.T) { + for _, p := range providers { + t.Run(p.name, func(t *testing.T) { + skipIfMissingKey(t, p) - testDataPath, err := filepath.Abs(filepath.Join("testdata", "no_fs_access")) - require.NoError(t, err) + client := newClient(t, p) + + testDataPath, err := filepath.Abs(filepath.Join("testdata", "no_fs_access")) + require.NoError(t, err) - prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations." + prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations." - answers, err := client.CallLLM(context.Background(), prompt, testDataPath) - logme.DebugFln("Agent answers:") - prettyprint.Print(answers) + answers, err := client.CallLLM(context.Background(), []string{prompt}, testDataPath) + prettyprint.Print(answers) - require.NoError(t, err, "CallLLM should not return error") - require.Len(t, answers, 1, "Should return exactly one answer") + require.NoError(t, err, "CallLLM should not return error") + require.Len(t, answers, 1, "Should return exactly one answer") - answer := answers[0] - require.NotEmpty(t, answer.Answer, "Answer field should be populated") - require.Equal(t, false, answer.ShortAnswer, "ShortAnswer should be false - this app does not access the filesystem") + answer := answers[0] + require.Equal(t, prompt, answer.Question, "Question field should match input question") + require.NotEmpty(t, answer.Answer, "Answer field should be populated") + require.Equal(t, false, answer.ShortAnswer, + "ShortAnswer should be false - this app does not access the filesystem") - t.Logf("Agent Answer: %s", answer.Answer) - t.Logf("Short Answer: %v", answer.ShortAnswer) - if len(answer.Files) > 0 { - t.Logf("Files: %v", answer.Files) + t.Logf("Agent Answer: %s", answer.Answer) + t.Logf("Short Answer: %v", answer.ShortAnswer) + if len(answer.Files) > 0 { + t.Logf("Files: %v", answer.Files) + } + }) } } -// TestAgenticClient_FilesystemAccess tests that the agent correctly identifies -// when an application DOES access the filesystem func TestAgenticClient_FilesystemAccess(t *testing.T) { - if !hasGeminiAPIKey() { - t.Skip("GEMINI_API_KEY not set, skipping agentic client integration test") - } + for _, p := range providers { + t.Run(p.name, func(t *testing.T) { + skipIfMissingKey(t, p) - opts := &AgenticCallOptions{ - Provider: "google", - Model: "gemini-2.0-flash", - APIKey: os.Getenv("GEMINI_API_KEY"), - } - - client, err := NewAgenticClient(opts) - require.NoError(t, err) + client := newClient(t, p) - testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access")) - require.NoError(t, err) + testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access")) + require.NoError(t, err) - prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations." + prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations." - answers, err := client.CallLLM(context.Background(), prompt, testDataPath) - logme.DebugFln("Agent answers:") - prettyprint.Print(answers) + answers, err := client.CallLLM(context.Background(), []string{prompt}, testDataPath) + prettyprint.Print(answers) - require.NoError(t, err, "CallLLM should not return error") - require.Len(t, answers, 1, "Should return exactly one answer") + require.NoError(t, err, "CallLLM should not return error") + require.Len(t, answers, 1, "Should return exactly one answer") - answer := answers[0] - require.NotEmpty(t, answer.Answer, "Answer field should be populated") - require.Equal(t, true, answer.ShortAnswer, "ShortAnswer should be true - this app accesses the filesystem via os.ReadFile") + answer := answers[0] + require.Equal(t, prompt, answer.Question, "Question field should match input question") + require.NotEmpty(t, answer.Answer, "Answer field should be populated") + require.Equal(t, true, answer.ShortAnswer, + "ShortAnswer should be true - this app accesses the filesystem via os.ReadFile") - t.Logf("Agent Answer: %s", answer.Answer) - t.Logf("Short Answer: %v", answer.ShortAnswer) - if len(answer.Files) > 0 { - t.Logf("Files: %v", answer.Files) + t.Logf("Agent Answer: %s", answer.Answer) + t.Logf("Short Answer: %v", answer.ShortAnswer) + if len(answer.Files) > 0 { + t.Logf("Files: %v", answer.Files) + } + }) } } -// TestAgenticClient_NoFilesystemAccess_Anthropic tests the same scenario using Anthropic Claude -func TestAgenticClient_NoFilesystemAccess_Anthropic(t *testing.T) { - if !hasAnthropicAPIKey() { - t.Skip("ANTHROPIC_API_KEY not set, skipping Anthropic agentic client integration test") - } +func TestAgenticClient_TwoQuestions(t *testing.T) { + for _, p := range providers { + t.Run(p.name, func(t *testing.T) { + skipIfMissingKey(t, p) - opts := &AgenticCallOptions{ - Provider: "anthropic", - Model: "claude-sonnet-4-5", - APIKey: os.Getenv("ANTHROPIC_API_KEY"), - } + client := newClient(t, p) - client, err := NewAgenticClient(opts) - require.NoError(t, err) + testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access")) + require.NoError(t, err) - testDataPath, err := filepath.Abs(filepath.Join("testdata", "no_fs_access")) - require.NoError(t, err) + questions := []string{ + "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations.", + "Which specific files contain the filesystem operations and what operations do they perform?", + } - prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations." + answers, err := client.CallLLM(context.Background(), questions, testDataPath) + prettyprint.Print(answers) - answers, err := client.CallLLM(context.Background(), prompt, testDataPath) - logme.DebugFln("Agent answers:") - prettyprint.Print(answers) + require.NoError(t, err, "CallLLM should not return error") + require.Len(t, answers, 2, "Should return exactly two answers") - require.NoError(t, err, "CallLLM should not return error") - require.Len(t, answers, 1, "Should return exactly one answer") + require.Equal(t, questions[0], answers[0].Question, "First answer's question should match") + require.NotEmpty(t, answers[0].Answer, "First answer should be populated") + require.Equal(t, true, answers[0].ShortAnswer, + "First answer should be true - app accesses filesystem") - answer := answers[0] - require.NotEmpty(t, answer.Answer, "Answer field should be populated") - require.Equal(t, false, answer.ShortAnswer, "ShortAnswer should be false - this app does not access the filesystem") + require.Equal(t, questions[1], answers[1].Question, "Second answer's question should match") + require.NotEmpty(t, answers[1].Answer, "Second answer should be populated") - t.Logf("Agent Answer: %s", answer.Answer) - t.Logf("Short Answer: %v", answer.ShortAnswer) - if len(answer.Files) > 0 { - t.Logf("Files: %v", answer.Files) + t.Logf("Answer 1: %s", answers[0].Answer) + t.Logf("Answer 2: %s", answers[1].Answer) + }) } } -// TestAgenticClient_FilesystemAccess_Anthropic tests the same scenario using Anthropic Claude -func TestAgenticClient_FilesystemAccess_Anthropic(t *testing.T) { - if !hasAnthropicAPIKey() { - t.Skip("ANTHROPIC_API_KEY not set, skipping Anthropic agentic client integration test") - } +func TestAgenticClient_ThreeQuestions(t *testing.T) { + for _, p := range providers { + t.Run(p.name, func(t *testing.T) { + skipIfMissingKey(t, p) - opts := &AgenticCallOptions{ - Provider: "anthropic", - Model: "claude-sonnet-4-5", - APIKey: os.Getenv("ANTHROPIC_API_KEY"), - } + client := newClient(t, p) - client, err := NewAgenticClient(opts) - require.NoError(t, err) + testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access")) + require.NoError(t, err) - testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access")) - require.NoError(t, err) + questions := []string{ + "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations.", + "Which specific files contain the filesystem operations and what operations do they perform?", + "Does this application use any caching mechanisms? If so, describe how the cache works.", + } + + answers, err := client.CallLLM(context.Background(), questions, testDataPath) + prettyprint.Print(answers) - prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations." + require.NoError(t, err, "CallLLM should not return error") + require.Len(t, answers, 3, "Should return exactly three answers") - answers, err := client.CallLLM(context.Background(), prompt, testDataPath) - logme.DebugFln("Agent answers:") - prettyprint.Print(answers) + require.Equal(t, questions[0], answers[0].Question, "First answer's question should match") + require.NotEmpty(t, answers[0].Answer, "First answer should be populated") + require.Equal(t, true, answers[0].ShortAnswer, + "First answer should be true - app accesses filesystem") - require.NoError(t, err, "CallLLM should not return error") - require.Len(t, answers, 1, "Should return exactly one answer") + require.Equal(t, questions[1], answers[1].Question, "Second answer's question should match") + require.NotEmpty(t, answers[1].Answer, "Second answer should be populated") - answer := answers[0] - require.NotEmpty(t, answer.Answer, "Answer field should be populated") - require.Equal(t, true, answer.ShortAnswer, "ShortAnswer should be true - this app accesses the filesystem via os.ReadFile") + require.Equal(t, questions[2], answers[2].Question, "Third answer's question should match") + require.NotEmpty(t, answers[2].Answer, "Third answer should be populated") - t.Logf("Agent Answer: %s", answer.Answer) - t.Logf("Short Answer: %v", answer.ShortAnswer) - if len(answer.Files) > 0 { - t.Logf("Files: %v", answer.Files) + t.Logf("Answer 1: %s", answers[0].Answer) + t.Logf("Answer 2: %s", answers[1].Answer) + t.Logf("Answer 3: %s", answers[2].Answer) + }) } } diff --git a/pkg/llmclient/agentic_debug.go b/pkg/llmclient/agentic_debug.go index bf6f5050..7c93618d 100644 --- a/pkg/llmclient/agentic_debug.go +++ b/pkg/llmclient/agentic_debug.go @@ -7,7 +7,6 @@ import ( "os" "path/filepath" "sync" - "time" ) var ( @@ -24,8 +23,7 @@ func initDebugLogger() { return } - timestamp := time.Now().Format("20060102-150405") - debugPath = filepath.Join(os.TempDir(), fmt.Sprintf("validator-agentic-%s.log", timestamp)) + debugPath = filepath.Join(os.TempDir(), "validator-agentic.log") f, err := os.OpenFile(debugPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) if err != nil { diff --git a/pkg/llmclient/agentic_tools.go b/pkg/llmclient/agentic_tools.go index dd0fd81c..5afb5e6f 100644 --- a/pkg/llmclient/agentic_tools.go +++ b/pkg/llmclient/agentic_tools.go @@ -135,7 +135,7 @@ func buildAgenticTools() []llms.Tool { }, "short_answer": map[string]interface{}{ "type": "boolean", - "description": "A boolean true/false answer to yes/no questions", + "description": "A boolean answer to the question: true means YES, false means NO. For example, if the question is 'Is the sky blue?' the short_answer is true. If the question is 'Is the sky green?' the short_answer is false.", }, "files": map[string]interface{}{ "type": "array", From b8789906d4305f30f1e52347c1133916b0a6d9a4 Mon Sep 17 00:00:00 2001 From: Esteban Beltran Date: Tue, 3 Mar 2026 18:04:49 +0100 Subject: [PATCH 02/10] remove verbosity in tests --- .../agentic_client_integration_test.go | 24 ------------------- 1 file changed, 24 deletions(-) diff --git a/pkg/llmclient/agentic_client_integration_test.go b/pkg/llmclient/agentic_client_integration_test.go index b80381dc..8c4ea088 100644 --- a/pkg/llmclient/agentic_client_integration_test.go +++ b/pkg/llmclient/agentic_client_integration_test.go @@ -6,7 +6,6 @@ import ( "path/filepath" "testing" - "github.com/grafana/plugin-validator/pkg/prettyprint" "github.com/stretchr/testify/require" ) @@ -72,7 +71,6 @@ func TestAgenticClient_NoFilesystemAccess(t *testing.T) { prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations." answers, err := client.CallLLM(context.Background(), []string{prompt}, testDataPath) - prettyprint.Print(answers) require.NoError(t, err, "CallLLM should not return error") require.Len(t, answers, 1, "Should return exactly one answer") @@ -82,12 +80,6 @@ func TestAgenticClient_NoFilesystemAccess(t *testing.T) { require.NotEmpty(t, answer.Answer, "Answer field should be populated") require.Equal(t, false, answer.ShortAnswer, "ShortAnswer should be false - this app does not access the filesystem") - - t.Logf("Agent Answer: %s", answer.Answer) - t.Logf("Short Answer: %v", answer.ShortAnswer) - if len(answer.Files) > 0 { - t.Logf("Files: %v", answer.Files) - } }) } } @@ -105,7 +97,6 @@ func TestAgenticClient_FilesystemAccess(t *testing.T) { prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations." answers, err := client.CallLLM(context.Background(), []string{prompt}, testDataPath) - prettyprint.Print(answers) require.NoError(t, err, "CallLLM should not return error") require.Len(t, answers, 1, "Should return exactly one answer") @@ -115,12 +106,6 @@ func TestAgenticClient_FilesystemAccess(t *testing.T) { require.NotEmpty(t, answer.Answer, "Answer field should be populated") require.Equal(t, true, answer.ShortAnswer, "ShortAnswer should be true - this app accesses the filesystem via os.ReadFile") - - t.Logf("Agent Answer: %s", answer.Answer) - t.Logf("Short Answer: %v", answer.ShortAnswer) - if len(answer.Files) > 0 { - t.Logf("Files: %v", answer.Files) - } }) } } @@ -141,7 +126,6 @@ func TestAgenticClient_TwoQuestions(t *testing.T) { } answers, err := client.CallLLM(context.Background(), questions, testDataPath) - prettyprint.Print(answers) require.NoError(t, err, "CallLLM should not return error") require.Len(t, answers, 2, "Should return exactly two answers") @@ -153,9 +137,6 @@ func TestAgenticClient_TwoQuestions(t *testing.T) { require.Equal(t, questions[1], answers[1].Question, "Second answer's question should match") require.NotEmpty(t, answers[1].Answer, "Second answer should be populated") - - t.Logf("Answer 1: %s", answers[0].Answer) - t.Logf("Answer 2: %s", answers[1].Answer) }) } } @@ -177,7 +158,6 @@ func TestAgenticClient_ThreeQuestions(t *testing.T) { } answers, err := client.CallLLM(context.Background(), questions, testDataPath) - prettyprint.Print(answers) require.NoError(t, err, "CallLLM should not return error") require.Len(t, answers, 3, "Should return exactly three answers") @@ -192,10 +172,6 @@ func TestAgenticClient_ThreeQuestions(t *testing.T) { require.Equal(t, questions[2], answers[2].Question, "Third answer's question should match") require.NotEmpty(t, answers[2].Answer, "Third answer should be populated") - - t.Logf("Answer 1: %s", answers[0].Answer) - t.Logf("Answer 2: %s", answers[1].Answer) - t.Logf("Answer 3: %s", answers[2].Answer) }) } } From 8ee25498070174b6ef71064464dc662a69431bf8 Mon Sep 17 00:00:00 2001 From: Esteban Beltran Date: Tue, 3 Mar 2026 18:18:59 +0100 Subject: [PATCH 03/10] add strict toppenaio --- pkg/llmclient/agentic_tools.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pkg/llmclient/agentic_tools.go b/pkg/llmclient/agentic_tools.go index 5afb5e6f..6124bcfc 100644 --- a/pkg/llmclient/agentic_tools.go +++ b/pkg/llmclient/agentic_tools.go @@ -126,6 +126,7 @@ func buildAgenticTools() []llms.Tool { Function: &llms.FunctionDefinition{ Name: "submit_answer", Description: "Submit your final answer to the question. Use this when you have gathered enough information.", + Strict: true, Parameters: map[string]interface{}{ "type": "object", "properties": map[string]interface{}{ @@ -140,14 +141,15 @@ func buildAgenticTools() []llms.Tool { "files": map[string]interface{}{ "type": "array", "items": map[string]interface{}{"type": "string"}, - "description": "List of relevant files (optional)", + "description": "List of relevant files. Pass an empty array if not relevant.", }, "code_snippet": map[string]interface{}{ "type": "string", - "description": "A relevant code snippet (optional)", + "description": "A relevant code snippet. Pass an empty string if not relevant.", }, }, - "required": []string{"answer", "short_answer"}, + "required": []string{"answer", "short_answer", "files", "code_snippet"}, + "additionalProperties": false, }, }, }, From 77dfb068f48696bb2ecbe302ef50f59df5bedc48 Mon Sep 17 00:00:00 2001 From: Esteban Beltran Date: Wed, 4 Mar 2026 14:23:37 +0100 Subject: [PATCH 04/10] update --- docs/anthropic-choices-behavior.md | 70 ++++++++++++++++++ pkg/llmclient/agentic_client.go | 74 +++++++++---------- .../agentic_client_integration_test.go | 42 +++++++++-- 3 files changed, 143 insertions(+), 43 deletions(-) create mode 100644 docs/anthropic-choices-behavior.md diff --git a/docs/anthropic-choices-behavior.md b/docs/anthropic-choices-behavior.md new file mode 100644 index 00000000..a578e4c3 --- /dev/null +++ b/docs/anthropic-choices-behavior.md @@ -0,0 +1,70 @@ +# Anthropic Choices and Message Serialization in go-langchain + +## Overview +Anthropic's response structure and go-langchain's serialization behavior require special handling when building multi-turn conversations with tool use. + +## Response Structure (Anthropic → go-langchain) + +Anthropic API returns responses as an array of **content blocks**: +``` +[text_block, tool_use_block, tool_use_block, ...] +``` + +go-langchain converts each content block into a **separate ContentChoice**: +- `type: "text"` → `ContentChoice{Content: "...", ToolCalls: []}` +- `type: "tool_use"` → `ContentChoice{Content: "", ToolCalls: [{...}]}` +- `type: "thinking"` → `ContentChoice{Content: "", GenerationInfo: {...}}` + +**Key insight:** One Anthropic response can produce multiple Choices. For example: +- Response with text + 2 tool calls → 3 Choices +- Response with just text → 1 Choice + +## Serialization Constraint (go-langchain → Anthropic) + +The critical limitation is in `handleAIMessage()`: +```go +if toolCall, ok := msg.Parts[0].(llms.ToolCall); ok { + // Only Parts[0] is serialized! +} +``` + +**This means:** +- Only `Parts[0]` of a MessageContent is serialized back to Anthropic +- If you create `MessageContent{Parts: [toolCall1, toolCall2]}`, only `toolCall1` is sent +- Multiple ToolCalls in one message **will lose data** + +## Required Pattern: Interleaved Messages + +To work around this limitation, tool calls must be **interleaved** as separate messages: + +``` +AI message: Parts[toolCall1] +Tool message: Parts[toolResult1] +AI message: Parts[toolCall2] +Tool message: Parts[toolResult2] +``` + +Not: +``` +AI message: Parts[toolCall1, toolCall2] // toolCall2 would be lost! +Tool message: Parts[toolResult1, toolResult2] +``` + +## Why Merging Choices is Necessary + +When processing Anthropic's response: +1. Anthropic returns separate content blocks (potentially text + multiple tools) +2. go-langchain creates one Choice per block +3. We must merge these Choices to get the complete response +4. Then we must split them back into individual AI messages for serialization + +The merge preserves all information for processing, but the split ensures proper serialization. + +## Implementation Details in agentic_client.go + +The choice-merging code performs this merge: +- Collects all content parts from separate Choices +- Collects all ToolCalls from separate Choices +- Creates one merged view for processing + +Then later in the tool call processing, it **reverses** this by creating one AI message per ToolCall to avoid the serialization bug. diff --git a/pkg/llmclient/agentic_client.go b/pkg/llmclient/agentic_client.go index e41ab881..a8bac0d4 100644 --- a/pkg/llmclient/agentic_client.go +++ b/pkg/llmclient/agentic_client.go @@ -38,8 +38,6 @@ You can only use one tool at a time. IMPORTANT: You are in non-interactive mode. No one will read your text answers, only tools. When you have gathered enough information, use submit_answer to provide your answer.` - questionAppendPrompt = `Start by listing the files in the repository and exploring the contents.` - budgetNudgePrompt = `You have only %d tool calls remaining. Wrap up your investigation and call submit_answer now with whatever information you have gathered so far.` useToolsReminderPrompt = `You are in non-interactive mode. You must start using your tools now to explore the repository. When you have enough information, use submit_answer to provide your answer.` @@ -73,6 +71,8 @@ type agenticClientImpl struct { apiKey string model string provider string + tools []llms.Tool + executor *toolExecutor } // NewAgenticClient creates a new AgenticClient with the given options @@ -119,11 +119,9 @@ func (c *agenticClientImpl) CallLLM( return nil, fmt.Errorf("failed to initialize LLM: %w", err) } - // Build tools - tools := buildAgenticTools() - - // Create tool executor - executor := newToolExecutor(repositoryPath) + // Initialize tools and executor for this repository + c.tools = buildAgenticTools() + c.executor = newToolExecutor(repositoryPath) // Build initial messages with system prompt only (no user message yet) messages := []llms.MessageContent{ @@ -152,14 +150,13 @@ func (c *agenticClientImpl) CallLLM( debugLog("Question: %s", truncateString(question, 200)) originalQuestion := question - question = fmt.Sprintf("%s\n\n%s", question, questionAppendPrompt) // Determine budget for this question - budget := maxToolCallsFirstQuestion + toolsBudget := maxToolCallsFirstQuestion if questionIndex > 0 { - budget = maxToolCallsFollowUp + toolsBudget = maxToolCallsFollowUp } - debugLog("Budget: %d tool calls", budget) + debugLog("Budget: %d tool calls", toolsBudget) // Add the question as a human message messages = append(messages, llms.TextParts(llms.ChatMessageTypeHuman, question)) @@ -169,9 +166,7 @@ func (c *agenticClientImpl) CallLLM( ctx, llm, messages, - tools, - executor, - budget, + toolsBudget, questionIndex, ) messages = updatedMessages @@ -212,12 +207,10 @@ func (c *agenticClientImpl) runQuestionLoop( ctx context.Context, llm llms.Model, messages []llms.MessageContent, - tools []llms.Tool, - executor *toolExecutor, - budget int, + toolsBudget int, questionIndex int, ) ([]llms.MessageContent, *AnswerSchema, error) { - toolCallsRemaining := budget + toolCallsRemaining := toolsBudget consecutiveNoTools := 0 iteration := 0 @@ -239,7 +232,7 @@ func (c *agenticClientImpl) runQuestionLoop( // Call LLM with retry logic debugLog("AgenticClient: calling LLM...") - resp, err := callLLMWithRetry(ctx, llm, messages, tools) + resp, err := c.callLLMWithRetry(ctx, llm, messages) if err != nil { debugLog("AgenticClient: LLM call failed: %v", err) return messages, nil, fmt.Errorf( @@ -254,13 +247,18 @@ func (c *agenticClientImpl) runQuestionLoop( return messages, nil, fmt.Errorf("no response from LLM") } - // Log raw response for debugging - debugLog("AgenticClient: received response with %d choices", len(resp.Choices)) - if choicesJSON, err := json.MarshalIndent(resp.Choices, "", " "); err == nil { - debugLog("Raw response Choices:\n%s", string(choicesJSON)) - } - - // Merge all choices into one (Anthropic returns text and tool calls as separate choices) + // Merge all choices into one unified view for processing. + // + // Background: Anthropic's API returns separate content blocks (text, tool_use, thinking) + // which go-langchain converts into separate ContentChoice objects. For example, a response + // with text + 2 tool calls becomes 3 separate Choices. + // + // We merge them here to process the complete response, but later (around line 360) we must + // split them back into separate AI messages because go-langchain's handleAIMessage() only + // serializes Parts[0] when sending back to Anthropic. Putting multiple tool calls in one + // message would lose all but the first. + // + // See docs/anthropic-choices-behavior.md for detailed explanation of this pattern. mergedChoice := llms.ContentChoice{} var allToolCalls []llms.ToolCall var contentParts []string @@ -372,10 +370,14 @@ func (c *agenticClientImpl) runQuestionLoop( } // Process each tool call as a separate AI message + tool result pair. - // langchaingo's Anthropic handleAIMessage only serializes Parts[0], so - // putting all tool calls in one message loses everything after the first. - // Interleaving ensures each tool_use has a matching tool_result in the - // immediately following user message. + // This is the "split" part of the merge-then-split pattern. + // + // Why: go-langchain's Anthropic handleAIMessage() only serializes Parts[0], so + // MessageContent{Parts: [toolCall1, toolCall2]} would lose toolCall2 when sent back. + // By creating one AI message per tool call, we ensure all tool calls are properly + // serialized. Each tool_use then has its matching tool_result in the following message. + // + // See docs/anthropic-choices-behavior.md for details on this serialization constraint. for i, toolCall := range choice.ToolCalls { toolCallsRemaining-- @@ -385,7 +387,7 @@ func (c *agenticClientImpl) runQuestionLoop( } messages = append(messages, aiMessage) - response, answer := processToolCall(toolCall, i, len(choice.ToolCalls), executor) + response, answer := c.processToolCall(toolCall, i, len(choice.ToolCalls)) messages = append(messages, response) if answer != nil { debugLog("AgenticClient: received answer for question %d", questionIndex+1) @@ -400,10 +402,9 @@ func (c *agenticClientImpl) runQuestionLoop( } // processToolCall processes a single tool call and returns the response message and optional answer -func processToolCall( +func (c *agenticClientImpl) processToolCall( toolCall llms.ToolCall, index, total int, - executor *toolExecutor, ) (llms.MessageContent, *AnswerSchema) { debugLog( "AgenticClient: [%d/%d] executing tool: %s", @@ -450,7 +451,7 @@ func processToolCall( } // Execute other tools - result, err := executor.execute(toolCall.FunctionCall.Name, toolCall.FunctionCall.Arguments) + result, err := c.executor.execute(toolCall.FunctionCall.Name, toolCall.FunctionCall.Arguments) if err != nil { result = fmt.Sprintf("Error: %v", err) } @@ -469,15 +470,14 @@ func processToolCall( } // callLLMWithRetry calls the LLM with retry logic for transient errors -func callLLMWithRetry( +func (c *agenticClientImpl) callLLMWithRetry( ctx context.Context, llm llms.Model, messages []llms.MessageContent, - tools []llms.Tool, ) (*llms.ContentResponse, error) { var lastErr error for attempt := 1; attempt <= maxLLMRetries; attempt++ { - resp, err := llm.GenerateContent(ctx, messages, llms.WithTools(tools)) + resp, err := llm.GenerateContent(ctx, messages, llms.WithTools(c.tools)) if err == nil { return resp, nil } diff --git a/pkg/llmclient/agentic_client_integration_test.go b/pkg/llmclient/agentic_client_integration_test.go index 8c4ea088..abf72676 100644 --- a/pkg/llmclient/agentic_client_integration_test.go +++ b/pkg/llmclient/agentic_client_integration_test.go @@ -18,7 +18,12 @@ type providerConfig struct { var providers = []providerConfig{ {name: "Gemini", provider: "google", model: "gemini-2.5-flash", envKey: "GEMINI_API_KEY"}, - {name: "Anthropic", provider: "anthropic", model: "claude-haiku-4-5", envKey: "ANTHROPIC_API_KEY"}, + { + name: "Anthropic", + provider: "anthropic", + model: "claude-haiku-4-5", + envKey: "ANTHROPIC_API_KEY", + }, {name: "OpenAI", provider: "openai", model: "gpt-5-mini", envKey: "OPENAI_API_KEY"}, } @@ -130,12 +135,22 @@ func TestAgenticClient_TwoQuestions(t *testing.T) { require.NoError(t, err, "CallLLM should not return error") require.Len(t, answers, 2, "Should return exactly two answers") - require.Equal(t, questions[0], answers[0].Question, "First answer's question should match") + require.Equal( + t, + questions[0], + answers[0].Question, + "First answer's question should match", + ) require.NotEmpty(t, answers[0].Answer, "First answer should be populated") require.Equal(t, true, answers[0].ShortAnswer, "First answer should be true - app accesses filesystem") - require.Equal(t, questions[1], answers[1].Question, "Second answer's question should match") + require.Equal( + t, + questions[1], + answers[1].Question, + "Second answer's question should match", + ) require.NotEmpty(t, answers[1].Answer, "Second answer should be populated") }) } @@ -162,15 +177,30 @@ func TestAgenticClient_ThreeQuestions(t *testing.T) { require.NoError(t, err, "CallLLM should not return error") require.Len(t, answers, 3, "Should return exactly three answers") - require.Equal(t, questions[0], answers[0].Question, "First answer's question should match") + require.Equal( + t, + questions[0], + answers[0].Question, + "First answer's question should match", + ) require.NotEmpty(t, answers[0].Answer, "First answer should be populated") require.Equal(t, true, answers[0].ShortAnswer, "First answer should be true - app accesses filesystem") - require.Equal(t, questions[1], answers[1].Question, "Second answer's question should match") + require.Equal( + t, + questions[1], + answers[1].Question, + "Second answer's question should match", + ) require.NotEmpty(t, answers[1].Answer, "Second answer should be populated") - require.Equal(t, questions[2], answers[2].Question, "Third answer's question should match") + require.Equal( + t, + questions[2], + answers[2].Question, + "Third answer's question should match", + ) require.NotEmpty(t, answers[2].Answer, "Third answer should be populated") }) } From 970e69dd3f86334750918fcc81df2829b5369f19 Mon Sep 17 00:00:00 2001 From: Esteban Beltran Date: Wed, 4 Mar 2026 15:46:40 +0100 Subject: [PATCH 05/10] wip ditch langchain --- go.mod | 2 + go.sum | 4 + pkg/llmclient/agentic_client.go | 148 ++++++----- pkg/llmclient/agentic_debug.go | 41 +-- pkg/llmclient/agentic_tools.go | 17 +- pkg/llmprovider/gemini/client.go | 380 +++++++++++++++++++++++++++ pkg/llmprovider/langchain.go | 250 ++++++++++++++++++ pkg/llmprovider/langchain_adapter.go | 69 +++++ pkg/llmprovider/types.go | 171 ++++++++++++ pkg/logme/logme.go | 42 +++ 10 files changed, 1010 insertions(+), 114 deletions(-) create mode 100644 pkg/llmprovider/gemini/client.go create mode 100644 pkg/llmprovider/langchain.go create mode 100644 pkg/llmprovider/langchain_adapter.go create mode 100644 pkg/llmprovider/types.go diff --git a/go.mod b/go.mod index de3a3278..50893c06 100644 --- a/go.mod +++ b/go.mod @@ -131,6 +131,7 @@ require ( github.com/googleapis/enterprise-certificate-proxy v0.3.6 // indirect github.com/googleapis/gax-go/v2 v2.14.2 // indirect github.com/gopherjs/gopherjs v1.17.2 // indirect + github.com/gorilla/websocket v1.5.3 // indirect github.com/hhatto/gorst v0.0.0-20181029133204-ca9f730cac5b // indirect github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect github.com/jdkato/prose v1.2.1 // indirect @@ -220,6 +221,7 @@ require ( golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect gonum.org/v1/gonum v0.16.0 // indirect google.golang.org/api v0.237.0 // indirect + google.golang.org/genai v1.48.0 // indirect google.golang.org/genproto v0.0.0-20250707201910-8d1bb00bc6a7 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20251124214823-79d6a2a48846 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20251213004720-97cd9d5aeac2 // indirect diff --git a/go.sum b/go.sum index 35c11975..72cea00a 100644 --- a/go.sum +++ b/go.sum @@ -283,6 +283,8 @@ github.com/googleapis/gax-go/v2 v2.14.2 h1:eBLnkZ9635krYIPD+ag1USrOAI0Nr0QYF3+/3 github.com/googleapis/gax-go/v2 v2.14.2/go.mod h1:ON64QhlJkhVtSqp4v1uaK92VyZ2gmvDQsweuyLV+8+w= github.com/gopherjs/gopherjs v1.17.2 h1:fQnZVsXk8uxXIStYb0N4bGk7jeyTalG/wsZjQ25dO0g= github.com/gopherjs/gopherjs v1.17.2/go.mod h1:pRRIvn/QzFLrKfvEz3qUuEhtE/zLCWfreZ6J5gM2i+k= +github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg= +github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.1 h1:e9Rjr40Z98/clHv5Yg79Is0NtosR5LXRvdr7o/6NwbA= github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.1/go.mod h1:tIxuGz/9mpox++sgp9fJjHO0+q1X9/UOWd798aAm22M= @@ -669,6 +671,8 @@ google.golang.org/api v0.237.0 h1:MP7XVsGZesOsx3Q8WVa4sUdbrsTvDSOERd3Vh4xj/wc= google.golang.org/api v0.237.0/go.mod h1:cOVEm2TpdAGHL2z+UwyS+kmlGr3bVWQQ6sYEqkKje50= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/genai v1.48.0 h1:1vb15G291wAjJJueisMDpUhssljhEdJU2t5qTidrVPs= +google.golang.org/genai v1.48.0/go.mod h1:A3kkl0nyBjyFlNjgxIwKq70julKbIxpSxqKO5gw/gmk= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= diff --git a/pkg/llmclient/agentic_client.go b/pkg/llmclient/agentic_client.go index a8bac0d4..3d1b8bb8 100644 --- a/pkg/llmclient/agentic_client.go +++ b/pkg/llmclient/agentic_client.go @@ -7,9 +7,9 @@ import ( "strings" "time" - "github.com/tmc/langchaingo/llms" + "github.com/grafana/plugin-validator/pkg/llmprovider" + "github.com/grafana/plugin-validator/pkg/llmprovider/gemini" "github.com/tmc/langchaingo/llms/anthropic" - "github.com/tmc/langchaingo/llms/googleai" "github.com/tmc/langchaingo/llms/openai" ) @@ -71,7 +71,7 @@ type agenticClientImpl struct { apiKey string model string provider string - tools []llms.Tool + tools []llmprovider.Tool executor *toolExecutor } @@ -114,7 +114,7 @@ func (c *agenticClientImpl) CallLLM( Model: c.model, Provider: c.provider, } - llm, err := initLLM(ctx, opts) + provider, err := initProvider(ctx, opts) if err != nil { return nil, fmt.Errorf("failed to initialize LLM: %w", err) } @@ -124,8 +124,8 @@ func (c *agenticClientImpl) CallLLM( c.executor = newToolExecutor(repositoryPath) // Build initial messages with system prompt only (no user message yet) - messages := []llms.MessageContent{ - llms.TextParts(llms.ChatMessageTypeSystem, systemPrompt), + messages := []llmprovider.Message{ + llmprovider.TextMessage(llmprovider.RoleSystem, systemPrompt), } // Print debug log file path before starting the loop @@ -159,12 +159,12 @@ func (c *agenticClientImpl) CallLLM( debugLog("Budget: %d tool calls", toolsBudget) // Add the question as a human message - messages = append(messages, llms.TextParts(llms.ChatMessageTypeHuman, question)) + messages = append(messages, llmprovider.TextMessage(llmprovider.RoleHuman, question)) // Run the question loop updatedMessages, answer, err := c.runQuestionLoop( ctx, - llm, + provider, messages, toolsBudget, questionIndex, @@ -205,11 +205,11 @@ func (c *agenticClientImpl) CallLLM( // Returns updated messages, the answer (or nil if budget exhausted), and error. func (c *agenticClientImpl) runQuestionLoop( ctx context.Context, - llm llms.Model, - messages []llms.MessageContent, + provider llmprovider.Provider, + messages []llmprovider.Message, toolsBudget int, questionIndex int, -) ([]llms.MessageContent, *AnswerSchema, error) { +) ([]llmprovider.Message, *AnswerSchema, error) { toolCallsRemaining := toolsBudget consecutiveNoTools := 0 iteration := 0 @@ -224,15 +224,15 @@ func (c *agenticClientImpl) runQuestionLoop( if !budgetNudged && toolCallsRemaining <= 5 { budgetNudged = true debugLog("AgenticClient: nudging model about low budget") - messages = append(messages, llms.TextParts( - llms.ChatMessageTypeHuman, + messages = append(messages, llmprovider.TextMessage( + llmprovider.RoleHuman, fmt.Sprintf(budgetNudgePrompt, toolCallsRemaining), )) } // Call LLM with retry logic debugLog("AgenticClient: calling LLM...") - resp, err := c.callLLMWithRetry(ctx, llm, messages) + resp, err := c.callLLMWithRetry(ctx, provider, messages) if err != nil { debugLog("AgenticClient: LLM call failed: %v", err) return messages, nil, fmt.Errorf( @@ -253,19 +253,23 @@ func (c *agenticClientImpl) runQuestionLoop( // which go-langchain converts into separate ContentChoice objects. For example, a response // with text + 2 tool calls becomes 3 separate Choices. // - // We merge them here to process the complete response, but later (around line 360) we must + // We merge them here to process the complete response, but later we must // split them back into separate AI messages because go-langchain's handleAIMessage() only // serializes Parts[0] when sending back to Anthropic. Putting multiple tool calls in one // message would lose all but the first. // // See docs/anthropic-choices-behavior.md for detailed explanation of this pattern. - mergedChoice := llms.ContentChoice{} - var allToolCalls []llms.ToolCall + mergedChoice := llmprovider.Choice{} + var allToolCalls []llmprovider.ToolCallPart var contentParts []string for i, ch := range resp.Choices { - debugLog("AgenticClient: processing choice %d: Content=%q, ToolCalls=%d", - i, truncateString(ch.Content, 100), len(ch.ToolCalls)) + debugLog("AgenticClient: processing choice %d: Content=%q, ToolCalls=%d, Thinking=%d", + i, truncateString(ch.Content, 100), len(ch.ToolCalls), len(ch.Thinking)) + for j, t := range ch.Thinking { + debugLog("AgenticClient: thinking[%d]: text=%q sig=%v", + j, truncateString(t.Text, 150), t.Signature != "") + } if ch.Content != "" { contentParts = append(contentParts, ch.Content) @@ -316,11 +320,11 @@ func (c *agenticClientImpl) runQuestionLoop( // Add the AI response and remind to use tools if choice.Content != "" { - messages = append(messages, llms.TextParts(llms.ChatMessageTypeAI, choice.Content)) + messages = append(messages, llmprovider.TextMessage(llmprovider.RoleAI, choice.Content)) } debugLog("AgenticClient: reminding agent to use tools") - messages = append(messages, llms.TextParts( - llms.ChatMessageTypeHuman, + messages = append(messages, llmprovider.TextMessage( + llmprovider.RoleHuman, useToolsReminderPrompt, )) toolCallsRemaining-- @@ -333,7 +337,7 @@ func (c *agenticClientImpl) runQuestionLoop( // Validate submit_answer is called alone hasSubmitAnswer := false for _, toolCall := range choice.ToolCalls { - if toolCall.FunctionCall.Name == "submit_answer" { + if toolCall.Name == "submit_answer" { hasSubmitAnswer = true break } @@ -343,23 +347,23 @@ func (c *agenticClientImpl) runQuestionLoop( // Add a single AI message with ALL tool calls so every // tool_result below has a matching tool_use in the preceding // assistant message. - aiParts := make([]llms.ContentPart, len(choice.ToolCalls)) + aiParts := make([]llmprovider.Part, len(choice.ToolCalls)) for i, tc := range choice.ToolCalls { aiParts[i] = tc } - aiMessage := llms.MessageContent{ - Role: llms.ChatMessageTypeAI, + aiMessage := llmprovider.Message{ + Role: llmprovider.RoleAI, Parts: aiParts, } messages = append(messages, aiMessage) for _, toolCall := range choice.ToolCalls { toolCallsRemaining-- - errorResponse := llms.MessageContent{ - Role: llms.ChatMessageTypeTool, - Parts: []llms.ContentPart{ - llms.ToolCallResponse{ + errorResponse := llmprovider.Message{ + Role: llmprovider.RoleTool, + Parts: []llmprovider.Part{ + llmprovider.ToolResultPart{ ToolCallID: toolCall.ID, - Name: toolCall.FunctionCall.Name, + Name: toolCall.Name, Content: submitAnswerAloneError, }, }, @@ -381,9 +385,9 @@ func (c *agenticClientImpl) runQuestionLoop( for i, toolCall := range choice.ToolCalls { toolCallsRemaining-- - aiMessage := llms.MessageContent{ - Role: llms.ChatMessageTypeAI, - Parts: []llms.ContentPart{toolCall}, + aiMessage := llmprovider.Message{ + Role: llmprovider.RoleAI, + Parts: []llmprovider.Part{toolCall}, } messages = append(messages, aiMessage) @@ -403,29 +407,29 @@ func (c *agenticClientImpl) runQuestionLoop( // processToolCall processes a single tool call and returns the response message and optional answer func (c *agenticClientImpl) processToolCall( - toolCall llms.ToolCall, + toolCall llmprovider.ToolCallPart, index, total int, -) (llms.MessageContent, *AnswerSchema) { +) (llmprovider.Message, *AnswerSchema) { debugLog( "AgenticClient: [%d/%d] executing tool: %s", index+1, total, - toolCall.FunctionCall.Name, + toolCall.Name, ) - debugLog("AgenticClient: tool args: %s", truncateString(toolCall.FunctionCall.Arguments, 500)) + debugLog("AgenticClient: tool args: %s", truncateString(toolCall.Arguments, 500)) // Check for submit_answer - if toolCall.FunctionCall.Name == "submit_answer" { + if toolCall.Name == "submit_answer" { var answer AnswerSchema - if err := json.Unmarshal([]byte(toolCall.FunctionCall.Arguments), &answer); err != nil { + if err := json.Unmarshal([]byte(toolCall.Arguments), &answer); err != nil { debugLog("AgenticClient: failed to parse submit_answer: %v", err) // Report parse error back to agent so it can retry - return llms.MessageContent{ - Role: llms.ChatMessageTypeTool, - Parts: []llms.ContentPart{ - llms.ToolCallResponse{ + return llmprovider.Message{ + Role: llmprovider.RoleTool, + Parts: []llmprovider.Part{ + llmprovider.ToolResultPart{ ToolCallID: toolCall.ID, - Name: toolCall.FunctionCall.Name, + Name: toolCall.Name, Content: fmt.Sprintf( "Error parsing answer: %v. Please try again with valid JSON.", err, @@ -438,12 +442,12 @@ func (c *agenticClientImpl) processToolCall( answer.ShortAnswer, truncateString(answer.Answer, 100)) // Return success response and the answer - return llms.MessageContent{ - Role: llms.ChatMessageTypeTool, - Parts: []llms.ContentPart{ - llms.ToolCallResponse{ + return llmprovider.Message{ + Role: llmprovider.RoleTool, + Parts: []llmprovider.Part{ + llmprovider.ToolResultPart{ ToolCallID: toolCall.ID, - Name: toolCall.FunctionCall.Name, + Name: toolCall.Name, Content: "Answer recorded successfully.", }, }, @@ -451,18 +455,18 @@ func (c *agenticClientImpl) processToolCall( } // Execute other tools - result, err := c.executor.execute(toolCall.FunctionCall.Name, toolCall.FunctionCall.Arguments) + result, err := c.executor.execute(toolCall.Name, toolCall.Arguments) if err != nil { result = fmt.Sprintf("Error: %v", err) } debugLog("AgenticClient: tool result: %s", truncateString(result, 300)) - return llms.MessageContent{ - Role: llms.ChatMessageTypeTool, - Parts: []llms.ContentPart{ - llms.ToolCallResponse{ + return llmprovider.Message{ + Role: llmprovider.RoleTool, + Parts: []llmprovider.Part{ + llmprovider.ToolResultPart{ ToolCallID: toolCall.ID, - Name: toolCall.FunctionCall.Name, + Name: toolCall.Name, Content: result, }, }, @@ -472,12 +476,12 @@ func (c *agenticClientImpl) processToolCall( // callLLMWithRetry calls the LLM with retry logic for transient errors func (c *agenticClientImpl) callLLMWithRetry( ctx context.Context, - llm llms.Model, - messages []llms.MessageContent, -) (*llms.ContentResponse, error) { + provider llmprovider.Provider, + messages []llmprovider.Message, +) (*llmprovider.Response, error) { var lastErr error for attempt := 1; attempt <= maxLLMRetries; attempt++ { - resp, err := llm.GenerateContent(ctx, messages, llms.WithTools(c.tools)) + resp, err := provider.GenerateContent(ctx, messages, llmprovider.WithTools(c.tools)) if err == nil { return resp, nil } @@ -500,25 +504,31 @@ func truncateString(s string, maxLen int) string { return s[:maxLen] + "..." } -// initLLM initializes the appropriate LLM based on provider -func initLLM(ctx context.Context, opts *AgenticCallOptions) (llms.Model, error) { +// initProvider initializes the appropriate provider based on configuration. +// Gemini uses our native provider; Anthropic and OpenAI use langchain adapters +// until they are migrated. +func initProvider(ctx context.Context, opts *AgenticCallOptions) (llmprovider.Provider, error) { switch opts.Provider { case "google": - return googleai.New( - ctx, - googleai.WithAPIKey(opts.APIKey), - googleai.WithDefaultModel(opts.Model), - ) + return gemini.New(ctx, opts.APIKey, opts.Model) case "anthropic": - return anthropic.New( + llm, err := anthropic.New( anthropic.WithToken(opts.APIKey), anthropic.WithModel(opts.Model), ) + if err != nil { + return nil, err + } + return llmprovider.NewLangchainAdapter(llm), nil case "openai": - return openai.New( + llm, err := openai.New( openai.WithToken(opts.APIKey), openai.WithModel(opts.Model), ) + if err != nil { + return nil, err + } + return llmprovider.NewLangchainAdapter(llm), nil default: return nil, fmt.Errorf( "unsupported provider: %s (supported: google, anthropic, openai)", diff --git a/pkg/llmclient/agentic_debug.go b/pkg/llmclient/agentic_debug.go index 7c93618d..0b31075a 100644 --- a/pkg/llmclient/agentic_debug.go +++ b/pkg/llmclient/agentic_debug.go @@ -2,50 +2,19 @@ package llmclient import ( "fmt" - "io" - "log" "os" - "path/filepath" - "sync" -) -var ( - debugLogger *log.Logger - debugOnce sync.Once - debugPath string + "github.com/grafana/plugin-validator/pkg/logme" ) -func initDebugLogger() { - debugOnce.Do(func() { - debugVal := os.Getenv("DEBUG") - if debugVal != "1" && debugVal != "true" { - debugLogger = log.New(io.Discard, "", 0) - return - } - - debugPath = filepath.Join(os.TempDir(), "validator-agentic.log") - - f, err := os.OpenFile(debugPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) - if err != nil { - fmt.Fprintf(os.Stderr, "AgenticClient: failed to create debug log file: %v\n", err) - debugLogger = log.New(io.Discard, "", 0) - return - } - - debugLogger = log.New(f, "", log.Ltime|log.Lmicroseconds) - }) -} - -// debugLog writes a formatted message to the debug log file if DEBUG=1 or DEBUG=true +// debugLog writes a formatted message to the LLM debug log file func debugLog(format string, args ...interface{}) { - initDebugLogger() - debugLogger.Printf(format, args...) + logme.LLMLog(format, args...) } // printDebugLogPath prints the debug log file path to stderr if debug is enabled func printDebugLogPath() { - initDebugLogger() - if debugPath != "" { - fmt.Fprintf(os.Stderr, "AgenticClient: debug log: %s\n", debugPath) + if p := logme.LLMLogPath(); p != "" { + fmt.Fprintf(os.Stderr, "AgenticClient: debug log: %s\n", p) } } diff --git a/pkg/llmclient/agentic_tools.go b/pkg/llmclient/agentic_tools.go index 6124bcfc..c5b55f5c 100644 --- a/pkg/llmclient/agentic_tools.go +++ b/pkg/llmclient/agentic_tools.go @@ -12,7 +12,7 @@ import ( "strings" "unicode/utf8" - "github.com/tmc/langchaingo/llms" + "github.com/grafana/plugin-validator/pkg/llmprovider" ) const maxFileSize = 500 * 1024 // 500KB @@ -47,11 +47,11 @@ var blockedGitFlags = []string{ } // buildAgenticTools returns the list of tools available to the agent -func buildAgenticTools() []llms.Tool { - return []llms.Tool{ +func buildAgenticTools() []llmprovider.Tool { + return []llmprovider.Tool{ { Type: "function", - Function: &llms.FunctionDefinition{ + Function: &llmprovider.FunctionDef{ Name: "read_file", Description: "Read the contents of a file at the given path", Parameters: map[string]interface{}{ @@ -68,7 +68,7 @@ func buildAgenticTools() []llms.Tool { }, { Type: "function", - Function: &llms.FunctionDefinition{ + Function: &llmprovider.FunctionDef{ Name: "list_directory", Description: "List files and directories at the given path", Parameters: map[string]interface{}{ @@ -85,7 +85,7 @@ func buildAgenticTools() []llms.Tool { }, { Type: "function", - Function: &llms.FunctionDefinition{ + Function: &llmprovider.FunctionDef{ Name: "grep", Description: "Search for a pattern in files. Returns matching lines with file names and line numbers.", Parameters: map[string]interface{}{ @@ -106,7 +106,7 @@ func buildAgenticTools() []llms.Tool { }, { Type: "function", - Function: &llms.FunctionDefinition{ + Function: &llmprovider.FunctionDef{ Name: "git", Description: "Execute a git command. Only allowed commands: log, show, diff, status, ls-files, blame, rev-parse, cat-file, checkout, fetch, pull, branch, tag.", Parameters: map[string]interface{}{ @@ -123,10 +123,9 @@ func buildAgenticTools() []llms.Tool { }, { Type: "function", - Function: &llms.FunctionDefinition{ + Function: &llmprovider.FunctionDef{ Name: "submit_answer", Description: "Submit your final answer to the question. Use this when you have gathered enough information.", - Strict: true, Parameters: map[string]interface{}{ "type": "object", "properties": map[string]interface{}{ diff --git a/pkg/llmprovider/gemini/client.go b/pkg/llmprovider/gemini/client.go new file mode 100644 index 00000000..2ffa9f96 --- /dev/null +++ b/pkg/llmprovider/gemini/client.go @@ -0,0 +1,380 @@ +// Package gemini implements the llmprovider.Provider interface using the +// Google GenAI SDK (google.golang.org/genai). It properly preserves +// thought_signatures for Gemini 3.x models. +package gemini + +import ( + "context" + "crypto/rand" + "encoding/hex" + "encoding/json" + "fmt" + + "github.com/grafana/plugin-validator/pkg/llmprovider" + "github.com/grafana/plugin-validator/pkg/logme" + "google.golang.org/genai" +) + +// Client implements llmprovider.Provider for Gemini via AI Studio. +type Client struct { + client *genai.Client + modelName string +} + +// New creates a Gemini provider client using an AI Studio API key. +func New(ctx context.Context, apiKey, modelName string) (*Client, error) { + if apiKey == "" { + return nil, fmt.Errorf("gemini: API key is required") + } + if modelName == "" { + return nil, fmt.Errorf("gemini: model name is required") + } + + client, err := genai.NewClient(ctx, &genai.ClientConfig{ + APIKey: apiKey, + Backend: genai.BackendGeminiAPI, + }) + if err != nil { + return nil, fmt.Errorf("gemini: failed to create client: %w", err) + } + + return &Client{client: client, modelName: modelName}, nil +} + +// GenerateContent sends messages to Gemini and returns the response. +// It preserves thought_signatures for Gemini 3.x compatibility. +func (c *Client) GenerateContent( + ctx context.Context, + messages []llmprovider.Message, + options ...llmprovider.CallOption, +) (*llmprovider.Response, error) { + opts := &llmprovider.CallOptions{} + for _, o := range options { + o(opts) + } + + // Extract system instruction from messages (Gemini handles it separately) + systemInstruction, conversationMessages := extractSystemMessage(messages) + + // Convert our messages to genai.Content + contents, err := toGenAIContents(conversationMessages) + if err != nil { + return nil, fmt.Errorf("gemini: failed to convert messages: %w", err) + } + + // Build config + config := buildConfig(opts, systemInstruction) + + // Call Gemini API + resp, err := c.client.Models.GenerateContent(ctx, c.modelName, contents, config) + if err != nil { + return nil, fmt.Errorf("gemini: API error: %w", err) + } + + // Convert response, preserving thought_signatures + return fromGenAIResponse(resp) +} + +// extractSystemMessage pulls the system instruction out of the message list. +// Gemini takes system instructions via config, not as a message role. +func extractSystemMessage(messages []llmprovider.Message) (string, []llmprovider.Message) { + var system string + var rest []llmprovider.Message + + for _, m := range messages { + if m.Role == llmprovider.RoleSystem { + // Concatenate all text parts from system messages + for _, p := range m.Parts { + if tp, ok := p.(llmprovider.TextPart); ok { + if system != "" { + system += "\n" + } + system += tp.Text + } + } + } else { + rest = append(rest, m) + } + } + + return system, rest +} + +// buildConfig creates the GenAI generation config from our options. +func buildConfig(opts *llmprovider.CallOptions, systemInstruction string) *genai.GenerateContentConfig { + config := &genai.GenerateContentConfig{} + + if systemInstruction != "" { + config.SystemInstruction = &genai.Content{ + Parts: []*genai.Part{genai.NewPartFromText(systemInstruction)}, + } + } + + if opts.Temperature > 0 { + t := float32(opts.Temperature) + config.Temperature = &t + } + if opts.MaxTokens > 0 { + config.MaxOutputTokens = int32(opts.MaxTokens) + } + if opts.TopP > 0 { + p := float32(opts.TopP) + config.TopP = &p + } + if opts.TopK > 0 { + k := float32(opts.TopK) + config.TopK = &k + } + if len(opts.StopWords) > 0 { + config.StopSequences = opts.StopWords + } + + // Convert tools + if len(opts.Tools) > 0 { + var declarations []*genai.FunctionDeclaration + for _, tool := range opts.Tools { + if tool.Function != nil { + decl := &genai.FunctionDeclaration{ + Name: tool.Function.Name, + Description: tool.Function.Description, + } + // Use ParametersJsonSchema for raw JSON schema passthrough + if tool.Function.Parameters != nil { + decl.ParametersJsonSchema = tool.Function.Parameters + } + declarations = append(declarations, decl) + } + } + if len(declarations) > 0 { + config.Tools = []*genai.Tool{{ + FunctionDeclarations: declarations, + }} + config.ToolConfig = &genai.ToolConfig{ + FunctionCallingConfig: &genai.FunctionCallingConfig{ + Mode: genai.FunctionCallingConfigModeAuto, + }, + } + } + } + + return config +} + +// --- Message conversion: llmprovider → genai --- + +func toGenAIContents(messages []llmprovider.Message) ([]*genai.Content, error) { + var contents []*genai.Content + + for _, msg := range messages { + content := &genai.Content{ + Role: toGenAIRole(msg.Role), + } + + for _, part := range msg.Parts { + genaiPart, err := toGenAIPart(part) + if err != nil { + return nil, err + } + if genaiPart != nil { + content.Parts = append(content.Parts, genaiPart) + } + } + + if len(content.Parts) > 0 { + contents = append(contents, content) + } + } + + return contents, nil +} + +func toGenAIRole(role llmprovider.Role) string { + switch role { + case llmprovider.RoleHuman: + return "user" + case llmprovider.RoleAI: + return "model" + case llmprovider.RoleTool: + return "user" + default: + return "user" + } +} + +func toGenAIPart(part llmprovider.Part) (*genai.Part, error) { + switch p := part.(type) { + case llmprovider.TextPart: + return genai.NewPartFromText(p.Text), nil + + case llmprovider.ToolCallPart: + // Parse arguments from JSON string to map + var args map[string]any + if p.Arguments != "" { + if err := json.Unmarshal([]byte(p.Arguments), &args); err != nil { + return nil, fmt.Errorf("gemini: failed to unmarshal tool arguments for %q: %w", p.Name, err) + } + } + + genaiPart := genai.NewPartFromFunctionCall(p.Name, args) + if p.ID != "" { + genaiPart.FunctionCall.ID = p.ID + } + + // Echo back thought fields exactly as received from the API. + genaiPart.Thought = p.Thought + if p.ThoughtSignature != "" { + genaiPart.ThoughtSignature = []byte(p.ThoughtSignature) + } + + return genaiPart, nil + + case llmprovider.ToolResultPart: + // Convert response content to map + var responseMap map[string]any + if err := json.Unmarshal([]byte(p.Content), &responseMap); err != nil { + // If it's not JSON, wrap it + responseMap = map[string]any{"result": p.Content} + } + + genaiPart := genai.NewPartFromFunctionResponse(p.Name, responseMap) + if p.ToolCallID != "" { + genaiPart.FunctionResponse.ID = p.ToolCallID + } + + return genaiPart, nil + + case llmprovider.ThinkingPart: + // Thinking parts from previous responses need to be echoed back + genaiPart := &genai.Part{ + Text: p.Text, + Thought: true, + } + if p.Signature != "" { + genaiPart.ThoughtSignature = []byte(p.Signature) + } + return genaiPart, nil + + default: + return nil, fmt.Errorf("gemini: unsupported part type %T", part) + } +} + +// --- Response conversion: genai → llmprovider --- + +func fromGenAIResponse(resp *genai.GenerateContentResponse) (*llmprovider.Response, error) { + if resp == nil { + return nil, fmt.Errorf("gemini: nil response") + } + + result := &llmprovider.Response{ + Choices: make([]*llmprovider.Choice, 0, len(resp.Candidates)), + } + + for candidateIdx, candidate := range resp.Candidates { + choice := &llmprovider.Choice{ + StopReason: string(candidate.FinishReason), + GenerationInfo: make(map[string]any), + } + + if candidate.Content == nil || len(candidate.Content.Parts) == 0 { + raw, _ := json.MarshalIndent(candidate, "", " ") + logme.LLMLog("gemini: candidate[%d] empty/nil content, raw candidate:\n%s", candidateIdx, string(raw)) + } + + if candidate.Content != nil { + for partIdx, part := range candidate.Content.Parts { + if part == nil { + continue + } + + // Debug: log raw part fields so we can see exactly what the SDK returns + debugLogPart(partIdx, part) + + // Thought/thinking parts + if part.Thought && part.FunctionCall == nil { + thinking := llmprovider.ThinkingPart{ + Text: part.Text, + } + if len(part.ThoughtSignature) > 0 { + thinking.Signature = string(part.ThoughtSignature) + } + choice.Thinking = append(choice.Thinking, thinking) + continue + } + + // Text content (non-thought) + if part.Text != "" && part.FunctionCall == nil && part.FunctionResponse == nil { + choice.Content = part.Text + } + + // Function calls + if part.FunctionCall != nil { + id := part.FunctionCall.ID + if id == "" { + id = generateCallID() + logme.LLMLog("gemini: part[%d] FunctionCall has empty ID, generated: %s", partIdx, id) + } + tc := llmprovider.ToolCallPart{ + ID: id, + Name: part.FunctionCall.Name, + Thought: part.Thought, + } + + if part.FunctionCall.Args != nil { + argsJSON, err := json.Marshal(part.FunctionCall.Args) + if err != nil { + return nil, fmt.Errorf("gemini: failed to marshal function args: %w", err) + } + tc.Arguments = string(argsJSON) + } + + // CRITICAL: Capture thought_signature from function call parts + if len(part.ThoughtSignature) > 0 { + tc.ThoughtSignature = string(part.ThoughtSignature) + } + + choice.ToolCalls = append(choice.ToolCalls, tc) + } + } + } + + // Token usage + if resp.UsageMetadata != nil { + choice.GenerationInfo["usage"] = map[string]any{ + "prompt_tokens": resp.UsageMetadata.PromptTokenCount, + "completion_tokens": resp.UsageMetadata.CandidatesTokenCount, + "total_tokens": resp.UsageMetadata.TotalTokenCount, + "thoughts_tokens": resp.UsageMetadata.ThoughtsTokenCount, + } + } + + result.Choices = append(result.Choices, choice) + } + + return result, nil +} + +func generateCallID() string { + b := make([]byte, 8) + _, _ = rand.Read(b) + return "call_" + hex.EncodeToString(b) +} + +func debugLogPart(idx int, p *genai.Part) { + hasFuncCall := p.FunctionCall != nil + hasFuncResp := p.FunctionResponse != nil + textLen := len(p.Text) + sigLen := len(p.ThoughtSignature) + logme.LLMLog("gemini: part[%d] Thought=%v Text=%d bytes FuncCall=%v FuncResp=%v ThoughtSig=%d bytes", + idx, p.Thought, textLen, hasFuncCall, hasFuncResp, sigLen) + if p.Thought && textLen > 0 { + preview := p.Text + if len(preview) > 200 { + preview = preview[:200] + "..." + } + logme.LLMLog("gemini: part[%d] thinking preview: %s", idx, preview) + } + if hasFuncCall { + logme.LLMLog("gemini: part[%d] FunctionCall: name=%s id=%s", idx, p.FunctionCall.Name, p.FunctionCall.ID) + } +} diff --git a/pkg/llmprovider/langchain.go b/pkg/llmprovider/langchain.go new file mode 100644 index 00000000..75324935 --- /dev/null +++ b/pkg/llmprovider/langchain.go @@ -0,0 +1,250 @@ +package llmprovider + +import "github.com/tmc/langchaingo/llms" + +// This file provides conversion between llmprovider types and langchain +// llms types. It exists only for the transition period while we migrate +// providers one at a time. Once all providers use llmprovider types +// directly, this file can be deleted. + +// --- Role conversion --- + +func RoleFromLangchain(r llms.ChatMessageType) Role { + switch r { + case llms.ChatMessageTypeSystem: + return RoleSystem + case llms.ChatMessageTypeHuman: + return RoleHuman + case llms.ChatMessageTypeAI: + return RoleAI + case llms.ChatMessageTypeTool: + return RoleTool + default: + return RoleHuman + } +} + +func (r Role) ToLangchain() llms.ChatMessageType { + switch r { + case RoleSystem: + return llms.ChatMessageTypeSystem + case RoleHuman: + return llms.ChatMessageTypeHuman + case RoleAI: + return llms.ChatMessageTypeAI + case RoleTool: + return llms.ChatMessageTypeTool + default: + return llms.ChatMessageTypeHuman + } +} + +// --- Message conversion --- + +func MessageFromLangchain(lc llms.MessageContent) Message { + msg := Message{ + Role: RoleFromLangchain(lc.Role), + Parts: make([]Part, 0, len(lc.Parts)), + } + for _, p := range lc.Parts { + msg.Parts = append(msg.Parts, PartFromLangchain(p)) + } + return msg +} + +func MessagesFromLangchain(lc []llms.MessageContent) []Message { + msgs := make([]Message, len(lc)) + for i, m := range lc { + msgs[i] = MessageFromLangchain(m) + } + return msgs +} + +func (m Message) ToLangchain() llms.MessageContent { + lc := llms.MessageContent{ + Role: m.Role.ToLangchain(), + Parts: make([]llms.ContentPart, 0, len(m.Parts)), + } + for _, p := range m.Parts { + lc.Parts = append(lc.Parts, PartToLangchain(p)) + } + return lc +} + +func MessagesToLangchain(msgs []Message) []llms.MessageContent { + lc := make([]llms.MessageContent, len(msgs)) + for i, m := range msgs { + lc[i] = m.ToLangchain() + } + return lc +} + +// --- Part conversion --- + +func PartFromLangchain(p llms.ContentPart) Part { + switch v := p.(type) { + case llms.TextContent: + return TextPart{Text: v.Text} + case llms.ToolCall: + tc := ToolCallPart{ + ID: v.ID, + Name: v.FunctionCall.Name, + } + if v.FunctionCall != nil { + tc.Arguments = v.FunctionCall.Arguments + } + return tc + case llms.ToolCallResponse: + return ToolResultPart{ + ToolCallID: v.ToolCallID, + Name: v.Name, + Content: v.Content, + } + default: + // Unsupported part types become empty text + return TextPart{} + } +} + +func PartToLangchain(p Part) llms.ContentPart { + switch v := p.(type) { + case TextPart: + return llms.TextContent{Text: v.Text} + case ToolCallPart: + return llms.ToolCall{ + ID: v.ID, + Type: "function", + FunctionCall: &llms.FunctionCall{ + Name: v.Name, + Arguments: v.Arguments, + }, + } + case ToolResultPart: + return llms.ToolCallResponse{ + ToolCallID: v.ToolCallID, + Name: v.Name, + Content: v.Content, + } + case ThinkingPart: + // Langchain has no equivalent; drop thinking parts + return llms.TextContent{} + default: + return llms.TextContent{} + } +} + +// --- Response conversion --- + +func ResponseFromLangchain(lc *llms.ContentResponse) *Response { + if lc == nil { + return nil + } + resp := &Response{ + Choices: make([]*Choice, len(lc.Choices)), + } + for i, c := range lc.Choices { + resp.Choices[i] = ChoiceFromLangchain(c) + } + return resp +} + +func ChoiceFromLangchain(lc *llms.ContentChoice) *Choice { + if lc == nil { + return nil + } + ch := &Choice{ + Content: lc.Content, + StopReason: lc.StopReason, + GenerationInfo: lc.GenerationInfo, + } + for _, tc := range lc.ToolCalls { + ch.ToolCalls = append(ch.ToolCalls, ToolCallPart{ + ID: tc.ID, + Name: tc.FunctionCall.Name, + Arguments: func() string { + if tc.FunctionCall != nil { + return tc.FunctionCall.Arguments + } + return "" + }(), + }) + } + return ch +} + +func (r *Response) ToLangchain() *llms.ContentResponse { + if r == nil { + return nil + } + lc := &llms.ContentResponse{ + Choices: make([]*llms.ContentChoice, len(r.Choices)), + } + for i, c := range r.Choices { + lc.Choices[i] = c.ToLangchain() + } + return lc +} + +func (c *Choice) ToLangchain() *llms.ContentChoice { + if c == nil { + return nil + } + lc := &llms.ContentChoice{ + Content: c.Content, + StopReason: c.StopReason, + GenerationInfo: c.GenerationInfo, + } + for _, tc := range c.ToolCalls { + lc.ToolCalls = append(lc.ToolCalls, llms.ToolCall{ + ID: tc.ID, + Type: "function", + FunctionCall: &llms.FunctionCall{ + Name: tc.Name, + Arguments: tc.Arguments, + }, + }) + } + return lc +} + +// --- Tool conversion --- + +func ToolFromLangchain(t llms.Tool) Tool { + tool := Tool{Type: t.Type} + if t.Function != nil { + tool.Function = &FunctionDef{ + Name: t.Function.Name, + Description: t.Function.Description, + Parameters: t.Function.Parameters, + } + } + return tool +} + +func ToolsFromLangchain(tools []llms.Tool) []Tool { + out := make([]Tool, len(tools)) + for i, t := range tools { + out[i] = ToolFromLangchain(t) + } + return out +} + +func (t Tool) ToLangchain() llms.Tool { + lc := llms.Tool{Type: t.Type} + if t.Function != nil { + lc.Function = &llms.FunctionDefinition{ + Name: t.Function.Name, + Description: t.Function.Description, + Parameters: t.Function.Parameters, + } + } + return lc +} + +func ToolsToLangchain(tools []Tool) []llms.Tool { + out := make([]llms.Tool, len(tools)) + for i, t := range tools { + out[i] = t.ToLangchain() + } + return out +} diff --git a/pkg/llmprovider/langchain_adapter.go b/pkg/llmprovider/langchain_adapter.go new file mode 100644 index 00000000..20ae105c --- /dev/null +++ b/pkg/llmprovider/langchain_adapter.go @@ -0,0 +1,69 @@ +package llmprovider + +import ( + "context" + + "github.com/tmc/langchaingo/llms" +) + +// LangchainAdapter wraps an llms.Model to implement the Provider interface. +// This allows existing langchain-based providers (Anthropic, OpenAI) to be +// used alongside native providers during the migration. +type LangchainAdapter struct { + LLM llms.Model +} + +func NewLangchainAdapter(llm llms.Model) *LangchainAdapter { + return &LangchainAdapter{LLM: llm} +} + +func (a *LangchainAdapter) GenerateContent( + ctx context.Context, + messages []Message, + options ...CallOption, +) (*Response, error) { + // Convert our messages to langchain messages + lcMessages := MessagesToLangchain(messages) + + // Convert our options to langchain options + opts := &CallOptions{} + for _, o := range options { + o(opts) + } + lcOpts := toLangchainCallOptions(opts) + + // Call the langchain model + lcResp, err := a.LLM.GenerateContent(ctx, lcMessages, lcOpts...) + if err != nil { + return nil, err + } + + // Convert response back to our types + return ResponseFromLangchain(lcResp), nil +} + +// toLangchainCallOptions converts our CallOptions to langchain CallOptions. +func toLangchainCallOptions(opts *CallOptions) []llms.CallOption { + var lcOpts []llms.CallOption + + if opts.MaxTokens > 0 { + lcOpts = append(lcOpts, llms.WithMaxTokens(opts.MaxTokens)) + } + if opts.Temperature > 0 { + lcOpts = append(lcOpts, llms.WithTemperature(opts.Temperature)) + } + if opts.TopP > 0 { + lcOpts = append(lcOpts, llms.WithTopP(opts.TopP)) + } + if opts.TopK > 0 { + lcOpts = append(lcOpts, llms.WithTopK(opts.TopK)) + } + if len(opts.StopWords) > 0 { + lcOpts = append(lcOpts, llms.WithStopWords(opts.StopWords)) + } + if len(opts.Tools) > 0 { + lcOpts = append(lcOpts, llms.WithTools(ToolsToLangchain(opts.Tools))) + } + + return lcOpts +} diff --git a/pkg/llmprovider/types.go b/pkg/llmprovider/types.go new file mode 100644 index 00000000..e3b80056 --- /dev/null +++ b/pkg/llmprovider/types.go @@ -0,0 +1,171 @@ +// Package llmprovider defines a unified interface for LLM providers. +// +// These types mirror github.com/tmc/langchaingo/llms but add support for +// provider-specific features like Gemini thought_signatures, Anthropic +// thinking blocks, and OpenAI encrypted reasoning content. +// +// During migration, conversion helpers allow gradual adoption: one provider +// can be migrated at a time while the rest continue using langchain types. +package llmprovider + +import "context" + +// Provider is the core interface that all LLM provider clients implement. +type Provider interface { + GenerateContent(ctx context.Context, messages []Message, options ...CallOption) (*Response, error) +} + +// Role identifies the sender of a message. +type Role string + +const ( + RoleSystem Role = "system" + RoleHuman Role = "human" + RoleAI Role = "ai" + RoleTool Role = "tool" +) + +// Message is a single message in a conversation. +type Message struct { + Role Role + Parts []Part +} + +// Part is a piece of content within a message. +// Concrete types: TextPart, ToolCallPart, ToolResultPart, ThinkingPart. +type Part interface { + partMarker() +} + +// TextPart is plain text content. +type TextPart struct { + Text string +} + +func (TextPart) partMarker() {} + +// ToolCallPart represents a model's request to call a tool. +type ToolCallPart struct { + ID string + Name string + Arguments string // JSON string + + // Thought indicates whether this part was produced during model thinking. + // Must be echoed back exactly as received from the API. + Thought bool + + // ThoughtSignature is the opaque token Gemini 3.x attaches to function + // call parts. It must be echoed back in subsequent requests or the API + // returns a 400. Nil/empty means no signature was provided. + ThoughtSignature string +} + +func (ToolCallPart) partMarker() {} + +// ToolResultPart is the response from executing a tool. +type ToolResultPart struct { + ToolCallID string + Name string + Content string +} + +func (ToolResultPart) partMarker() {} + +// ThinkingPart holds reasoning/thinking content from the model. +// Different providers represent this differently: +// - Gemini: thought text + thought_signature +// - Anthropic: thinking block with signature, or redacted_thinking +// - OpenAI: encrypted reasoning content +type ThinkingPart struct { + Text string + Signature string // Gemini thought_signature or Anthropic thinking signature + Encrypted string // OpenAI encrypted_content or Anthropic redacted_thinking data +} + +func (ThinkingPart) partMarker() {} + +// Response is the result of a GenerateContent call. +type Response struct { + Choices []*Choice +} + +// Choice is a single response candidate. +type Choice struct { + // Content is the text content of the response. + Content string + + // StopReason is why the model stopped generating. + StopReason string + + // ToolCalls requested by the model. These preserve ThoughtSignature + // so they can be echoed back in the next request. + ToolCalls []ToolCallPart + + // Thinking contains reasoning/thinking content if the model produced any. + Thinking []ThinkingPart + + // GenerationInfo holds arbitrary provider-specific metadata (token + // counts, safety ratings, etc.). + GenerationInfo map[string]any +} + +// --- Call options --- + +// CallOption configures a GenerateContent call. +type CallOption func(*CallOptions) + +// CallOptions holds all configurable parameters for a GenerateContent call. +type CallOptions struct { + MaxTokens int + Temperature float64 + TopP float64 + TopK int + StopWords []string + Tools []Tool +} + +// Tool describes a tool the model can invoke. +type Tool struct { + Type string + Function *FunctionDef +} + +// FunctionDef describes a callable function. +type FunctionDef struct { + Name string + Description string + Parameters any // JSON Schema +} + +// --- Option helpers --- + +func WithMaxTokens(n int) CallOption { + return func(o *CallOptions) { o.MaxTokens = n } +} + +func WithTemperature(t float64) CallOption { + return func(o *CallOptions) { o.Temperature = t } +} + +func WithTopP(p float64) CallOption { + return func(o *CallOptions) { o.TopP = p } +} + +func WithTopK(k int) CallOption { + return func(o *CallOptions) { o.TopK = k } +} + +func WithStopWords(words []string) CallOption { + return func(o *CallOptions) { o.StopWords = words } +} + +func WithTools(tools []Tool) CallOption { + return func(o *CallOptions) { o.Tools = tools } +} + +// --- Convenience constructors --- + +// TextMessage creates a Message with a single text part. +func TextMessage(role Role, text string) Message { + return Message{Role: role, Parts: []Part{TextPart{Text: text}}} +} diff --git a/pkg/logme/logme.go b/pkg/logme/logme.go index fc6c8d49..ece3e121 100644 --- a/pkg/logme/logme.go +++ b/pkg/logme/logme.go @@ -2,11 +2,53 @@ package logme import ( "fmt" + "io" + "log" "os" + "path/filepath" + "sync" ) var isDebugMode = os.Getenv("DEBUG") == "1" +var ( + llmLogger *log.Logger + llmOnce sync.Once + llmPath string +) + +func initLLMLogger() { + llmOnce.Do(func() { + if !isDebugMode { + llmLogger = log.New(io.Discard, "", 0) + return + } + + llmPath = filepath.Join(os.TempDir(), "validator-llm.log") + f, err := os.OpenFile(llmPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) + if err != nil { + fmt.Fprintf(os.Stderr, "logme: failed to open LLM log file: %v\n", err) + llmLogger = log.New(io.Discard, "", 0) + return + } + + llmLogger = log.New(f, "", log.Ltime|log.Lmicroseconds) + }) +} + +// LLMLog writes a formatted message to the LLM debug log file in /tmp. +// Only active when DEBUG=1. +func LLMLog(format string, args ...interface{}) { + initLLMLogger() + llmLogger.Printf(format, args...) +} + +// LLMLogPath returns the path to the LLM log file, or "" if not active. +func LLMLogPath() string { + initLLMLogger() + return llmPath +} + func DebugFln(msg string, args ...interface{}) { // check if ENV DEBUG is 1 if isDebugMode { From 4d4042660e10dafec59a75935f40f0813c73994a Mon Sep 17 00:00:00 2001 From: Esteban Beltran Date: Wed, 4 Mar 2026 16:38:57 +0100 Subject: [PATCH 06/10] remove langchain from agentic client --- go.mod | 3 + go.sum | 5 + pkg/llmclient/README.md | 2 +- pkg/llmclient/agentic_client.go | 50 +-- pkg/llmprovider/anthropicprovider/client.go | 297 ++++++++++++++++++ .../{gemini => geminiprovider}/client.go | 2 +- pkg/llmprovider/langchain.go | 250 --------------- pkg/llmprovider/langchain_adapter.go | 69 ---- pkg/llmprovider/openaiprovider/client.go | 221 +++++++++++++ pkg/llmprovider/types.go | 9 +- 10 files changed, 543 insertions(+), 365 deletions(-) create mode 100644 pkg/llmprovider/anthropicprovider/client.go rename pkg/llmprovider/{gemini => geminiprovider}/client.go (99%) delete mode 100644 pkg/llmprovider/langchain.go delete mode 100644 pkg/llmprovider/langchain_adapter.go create mode 100644 pkg/llmprovider/openaiprovider/client.go diff --git a/go.mod b/go.mod index 50893c06..566675b3 100644 --- a/go.mod +++ b/go.mod @@ -73,6 +73,7 @@ require ( github.com/ProtonMail/go-crypto v1.3.0 // indirect github.com/agext/levenshtein v1.2.3 // indirect github.com/anchore/go-lzo v0.1.0 // indirect + github.com/anthropics/anthropic-sdk-go v1.26.0 // indirect github.com/cloudflare/circl v1.6.1 // indirect github.com/compose-spec/compose-go/v2 v2.8.1 // indirect github.com/containerd/cgroups/v3 v3.0.5 // indirect @@ -155,6 +156,7 @@ require ( github.com/moby/sys/userns v0.1.0 // indirect github.com/montanaflynn/stats v0.7.1 // indirect github.com/ncruces/go-strftime v0.1.9 // indirect + github.com/openai/openai-go v1.12.0 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.1.1 // indirect github.com/opencontainers/runtime-spec v1.2.1 // indirect @@ -185,6 +187,7 @@ require ( github.com/tidwall/jsonc v0.3.2 // indirect github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.1 // indirect + github.com/tidwall/sjson v1.2.5 // indirect github.com/tink-crypto/tink-go/v2 v2.4.0 // indirect github.com/tklauser/go-sysconf v0.3.15 // indirect github.com/tklauser/numcpus v0.10.0 // indirect diff --git a/go.sum b/go.sum index 72cea00a..342e8060 100644 --- a/go.sum +++ b/go.sum @@ -72,6 +72,8 @@ github.com/anchore/go-struct-converter v0.0.0-20250211213226-cce56d595160 h1:r8/ github.com/anchore/go-struct-converter v0.0.0-20250211213226-cce56d595160/go.mod h1:rYqSE9HbjzpHTI74vwPvae4ZVYZd1lue2ta6xHPdblA= github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8= github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4= +github.com/anthropics/anthropic-sdk-go v1.26.0 h1:oUTzFaUpAevfuELAP1sjL6CQJ9HHAfT7CoSYSac11PY= +github.com/anthropics/anthropic-sdk-go v1.26.0/go.mod h1:qUKmaW+uuPB64iy1l+4kOSvaLqPXnHTTBKH6RVZ7q5Q= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/bmatcuk/doublestar/v4 v4.9.2 h1:b0mc6WyRSYLjzofB2v/0cuDUZ+MqoGyH3r0dVij35GI= @@ -375,6 +377,8 @@ github.com/neurosnap/sentences v1.0.6 h1:iBVUivNtlwGkYsJblWV8GGVFmXzZzak907Ci8aA github.com/neurosnap/sentences v1.0.6/go.mod h1:pg1IapvYpWCJJm/Etxeh0+gtMf1rI1STY9S7eUCPbDc= github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k= github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY= +github.com/openai/openai-go v1.12.0 h1:NBQCnXzqOTv5wsgNC36PrFEiskGfO5wccfCWDo9S1U0= +github.com/openai/openai-go v1.12.0/go.mod h1:g461MYGXEXBVdV5SaR/5tNzNbSfwTBBefwc+LlDCK0Y= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040= @@ -478,6 +482,7 @@ github.com/terminalstatic/go-xsd-validate v0.1.6 h1:TenYeQ3eY631qNi1/cTmLH/s2slH github.com/terminalstatic/go-xsd-validate v0.1.6/go.mod h1:18lsvYFofBflqCrvo1umpABZ99+GneNTw2kEEc8UPJw= github.com/thoas/go-funk v0.9.3 h1:7+nAEx3kn5ZJcnDm2Bh23N2yOtweO14bi//dvRtgLpw= github.com/thoas/go-funk v0.9.3/go.mod h1:+IWnUfUmFO1+WVYQWQtIJHeRRdaIyyYglZN7xzUPe4Q= +github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY= github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= github.com/tidwall/jsonc v0.3.2 h1:ZTKrmejRlAJYdn0kcaFqRAKlxxFIC21pYq8vLa4p2Wc= diff --git a/pkg/llmclient/README.md b/pkg/llmclient/README.md index 41cb7a84..0cd94df1 100644 --- a/pkg/llmclient/README.md +++ b/pkg/llmclient/README.md @@ -2,7 +2,7 @@ LLM client package for code analysis. -- **AgenticClient**: Provider-agnostic agentic client using [langchaingo](https://github.com/tmc/langchaingo). Gives the LLM tools to explore a repository and answer questions about code. +- **AgenticClient**: Provider-agnostic agentic client using native SDK implementations for each provider. Gives the LLM tools to explore a repository and answer questions about code. ## AgenticClient diff --git a/pkg/llmclient/agentic_client.go b/pkg/llmclient/agentic_client.go index 3d1b8bb8..22f53202 100644 --- a/pkg/llmclient/agentic_client.go +++ b/pkg/llmclient/agentic_client.go @@ -8,9 +8,9 @@ import ( "time" "github.com/grafana/plugin-validator/pkg/llmprovider" - "github.com/grafana/plugin-validator/pkg/llmprovider/gemini" - "github.com/tmc/langchaingo/llms/anthropic" - "github.com/tmc/langchaingo/llms/openai" + "github.com/grafana/plugin-validator/pkg/llmprovider/anthropicprovider" + "github.com/grafana/plugin-validator/pkg/llmprovider/geminiprovider" + "github.com/grafana/plugin-validator/pkg/llmprovider/openaiprovider" ) const ( @@ -248,17 +248,8 @@ func (c *agenticClientImpl) runQuestionLoop( } // Merge all choices into one unified view for processing. - // - // Background: Anthropic's API returns separate content blocks (text, tool_use, thinking) - // which go-langchain converts into separate ContentChoice objects. For example, a response - // with text + 2 tool calls becomes 3 separate Choices. - // - // We merge them here to process the complete response, but later we must - // split them back into separate AI messages because go-langchain's handleAIMessage() only - // serializes Parts[0] when sending back to Anthropic. Putting multiple tool calls in one - // message would lose all but the first. - // - // See docs/anthropic-choices-behavior.md for detailed explanation of this pattern. + // Providers return a single Choice, but we merge defensively + // in case a provider returns multiple. mergedChoice := llmprovider.Choice{} var allToolCalls []llmprovider.ToolCallPart var contentParts []string @@ -376,12 +367,9 @@ func (c *agenticClientImpl) runQuestionLoop( // Process each tool call as a separate AI message + tool result pair. // This is the "split" part of the merge-then-split pattern. // - // Why: go-langchain's Anthropic handleAIMessage() only serializes Parts[0], so - // MessageContent{Parts: [toolCall1, toolCall2]} would lose toolCall2 when sent back. - // By creating one AI message per tool call, we ensure all tool calls are properly - // serialized. Each tool_use then has its matching tool_result in the following message. - // - // See docs/anthropic-choices-behavior.md for details on this serialization constraint. + // Create one AI message per tool call, each followed by its tool_result. + // This keeps the conversation in strict alternating assistant/user order + // as required by Anthropic's API. for i, toolCall := range choice.ToolCalls { toolCallsRemaining-- @@ -505,30 +493,16 @@ func truncateString(s string, maxLen int) string { } // initProvider initializes the appropriate provider based on configuration. -// Gemini uses our native provider; Anthropic and OpenAI use langchain adapters +// initProvider creates the appropriate native provider for the given config. // until they are migrated. func initProvider(ctx context.Context, opts *AgenticCallOptions) (llmprovider.Provider, error) { switch opts.Provider { case "google": - return gemini.New(ctx, opts.APIKey, opts.Model) + return geminiprovider.New(ctx, opts.APIKey, opts.Model) case "anthropic": - llm, err := anthropic.New( - anthropic.WithToken(opts.APIKey), - anthropic.WithModel(opts.Model), - ) - if err != nil { - return nil, err - } - return llmprovider.NewLangchainAdapter(llm), nil + return anthropicprovider.New(opts.APIKey, opts.Model) case "openai": - llm, err := openai.New( - openai.WithToken(opts.APIKey), - openai.WithModel(opts.Model), - ) - if err != nil { - return nil, err - } - return llmprovider.NewLangchainAdapter(llm), nil + return openaiprovider.New(opts.APIKey, opts.Model) default: return nil, fmt.Errorf( "unsupported provider: %s (supported: google, anthropic, openai)", diff --git a/pkg/llmprovider/anthropicprovider/client.go b/pkg/llmprovider/anthropicprovider/client.go new file mode 100644 index 00000000..5eeed004 --- /dev/null +++ b/pkg/llmprovider/anthropicprovider/client.go @@ -0,0 +1,297 @@ +// Package anthropicprovider implements the llmprovider.Provider interface +// using the official Anthropic Go SDK (github.com/anthropics/anthropic-sdk-go). +package anthropicprovider + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/anthropics/anthropic-sdk-go" + "github.com/anthropics/anthropic-sdk-go/option" + "github.com/grafana/plugin-validator/pkg/llmprovider" + "github.com/grafana/plugin-validator/pkg/logme" +) + +// Client implements llmprovider.Provider for Anthropic. +type Client struct { + client *anthropic.Client + modelName string +} + +// New creates an Anthropic provider client. +func New(apiKey, modelName string) (*Client, error) { + if apiKey == "" { + return nil, fmt.Errorf("anthropic: API key is required") + } + if modelName == "" { + return nil, fmt.Errorf("anthropic: model name is required") + } + + client := anthropic.NewClient(option.WithAPIKey(apiKey)) + + return &Client{client: &client, modelName: modelName}, nil +} + +// GenerateContent sends messages to Anthropic and returns the response. +func (c *Client) GenerateContent( + ctx context.Context, + messages []llmprovider.Message, + options ...llmprovider.CallOption, +) (*llmprovider.Response, error) { + opts := &llmprovider.CallOptions{} + for _, o := range options { + o(opts) + } + + system, msgs := extractSystemAndMessages(messages) + + maxTokens := int64(4096) + if opts.MaxTokens > 0 { + maxTokens = int64(opts.MaxTokens) + } + + params := anthropic.MessageNewParams{ + Model: anthropic.Model(c.modelName), + MaxTokens: maxTokens, + Messages: msgs, + } + + if len(system) > 0 { + params.System = system + } + + if opts.Temperature > 0 { + params.Temperature = anthropic.Float(opts.Temperature) + } + if opts.TopP > 0 { + params.TopP = anthropic.Float(opts.TopP) + } + if len(opts.StopWords) > 0 { + params.StopSequences = opts.StopWords + } + + if len(opts.Tools) > 0 { + params.Tools = toAnthropicTools(opts.Tools) + } + + resp, err := c.client.Messages.New(ctx, params) + if err != nil { + return nil, fmt.Errorf("anthropic: API error: %w", err) + } + + return fromAnthropicResponse(resp), nil +} + +// --- Message conversion: llmprovider → anthropic --- + +// extractSystemAndMessages separates system messages (which go in a top-level +// param) from conversation messages. +func extractSystemAndMessages(messages []llmprovider.Message) ([]anthropic.TextBlockParam, []anthropic.MessageParam) { + var system []anthropic.TextBlockParam + var result []anthropic.MessageParam + + for _, msg := range messages { + switch msg.Role { + case llmprovider.RoleSystem: + text := extractText(msg.Parts) + if text != "" { + system = append(system, anthropic.TextBlockParam{Text: text}) + } + + case llmprovider.RoleHuman: + blocks := toUserBlocks(msg.Parts) + if len(blocks) > 0 { + result = append(result, anthropic.NewUserMessage(blocks...)) + } + + case llmprovider.RoleAI: + blocks := toAssistantBlocks(msg.Parts) + if len(blocks) > 0 { + result = append(result, anthropic.NewAssistantMessage(blocks...)) + } + + case llmprovider.RoleTool: + // Anthropic sends tool results as user messages + blocks := toToolResultBlocks(msg.Parts) + if len(blocks) > 0 { + result = append(result, anthropic.NewUserMessage(blocks...)) + } + } + } + + return system, result +} + +func toUserBlocks(parts []llmprovider.Part) []anthropic.ContentBlockParamUnion { + var blocks []anthropic.ContentBlockParamUnion + for _, p := range parts { + switch v := p.(type) { + case llmprovider.TextPart: + blocks = append(blocks, anthropic.NewTextBlock(v.Text)) + case llmprovider.ToolResultPart: + blocks = append(blocks, anthropic.NewToolResultBlock(v.ToolCallID, v.Content, false)) + } + } + return blocks +} + +func toAssistantBlocks(parts []llmprovider.Part) []anthropic.ContentBlockParamUnion { + var blocks []anthropic.ContentBlockParamUnion + for _, p := range parts { + switch v := p.(type) { + case llmprovider.TextPart: + blocks = append(blocks, anthropic.NewTextBlock(v.Text)) + case llmprovider.ToolCallPart: + // Parse the arguments string back to any for the input field + var input any + if err := json.Unmarshal([]byte(v.Arguments), &input); err != nil { + input = map[string]any{} + } + blocks = append(blocks, anthropic.NewToolUseBlock(v.ID, input, v.Name)) + case llmprovider.ThinkingPart: + if v.Encrypted != "" { + blocks = append(blocks, anthropic.NewRedactedThinkingBlock(v.Encrypted)) + } else if v.Text != "" { + blocks = append(blocks, anthropic.NewThinkingBlock(v.Signature, v.Text)) + } + } + } + return blocks +} + +func toToolResultBlocks(parts []llmprovider.Part) []anthropic.ContentBlockParamUnion { + var blocks []anthropic.ContentBlockParamUnion + for _, p := range parts { + if tr, ok := p.(llmprovider.ToolResultPart); ok { + blocks = append(blocks, anthropic.NewToolResultBlock(tr.ToolCallID, tr.Content, false)) + } + } + return blocks +} + +func extractText(parts []llmprovider.Part) string { + var text string + for _, p := range parts { + if tp, ok := p.(llmprovider.TextPart); ok { + if text != "" { + text += "\n" + } + text += tp.Text + } + } + return text +} + +// --- Tool conversion --- + +func toAnthropicTools(tools []llmprovider.Tool) []anthropic.ToolUnionParam { + var result []anthropic.ToolUnionParam + for _, tool := range tools { + if tool.Function == nil { + continue + } + + param := anthropic.ToolParam{ + Name: tool.Function.Name, + Description: anthropic.String(tool.Function.Description), + } + + // Convert parameters to ToolInputSchemaParam + if tool.Function.Parameters != nil { + schema := toInputSchema(tool.Function.Parameters) + param.InputSchema = schema + } + + result = append(result, anthropic.ToolUnionParam{OfTool: ¶m}) + } + return result +} + +func toInputSchema(params any) anthropic.ToolInputSchemaParam { + schema := anthropic.ToolInputSchemaParam{} + + var m map[string]any + switch p := params.(type) { + case map[string]any: + m = p + default: + data, err := json.Marshal(p) + if err != nil { + return schema + } + if err := json.Unmarshal(data, &m); err != nil { + return schema + } + } + + if props, ok := m["properties"]; ok { + schema.Properties = props + } + if req, ok := m["required"].([]any); ok { + for _, r := range req { + if s, ok := r.(string); ok { + schema.Required = append(schema.Required, s) + } + } + } + + return schema +} + +// --- Response conversion: anthropic → llmprovider --- + +func fromAnthropicResponse(resp *anthropic.Message) *llmprovider.Response { + choice := &llmprovider.Choice{ + StopReason: string(resp.StopReason), + GenerationInfo: make(map[string]any), + } + + for _, block := range resp.Content { + switch v := block.AsAny().(type) { + case anthropic.TextBlock: + if choice.Content != "" { + choice.Content += "\n" + } + choice.Content += v.Text + + case anthropic.ThinkingBlock: + logme.LLMLog("anthropic: thinking block (signature=%s, len=%d)", v.Signature[:min(20, len(v.Signature))], len(v.Thinking)) + choice.Thinking = append(choice.Thinking, llmprovider.ThinkingPart{ + Text: v.Thinking, + Signature: v.Signature, + }) + + case anthropic.RedactedThinkingBlock: + logme.LLMLog("anthropic: redacted thinking block (data_len=%d)", len(v.Data)) + choice.Thinking = append(choice.Thinking, llmprovider.ThinkingPart{ + Encrypted: v.Data, + }) + + case anthropic.ToolUseBlock: + args := string(v.Input) + logme.LLMLog("anthropic: tool call: name=%s id=%s", v.Name, v.ID) + choice.ToolCalls = append(choice.ToolCalls, llmprovider.ToolCallPart{ + ID: v.ID, + Name: v.Name, + Arguments: args, + }) + } + } + + choice.GenerationInfo["usage"] = map[string]any{ + "input_tokens": resp.Usage.InputTokens, + "output_tokens": resp.Usage.OutputTokens, + } + + return &llmprovider.Response{ + Choices: []*llmprovider.Choice{choice}, + } +} + +func min(a, b int) int { + if a < b { + return a + } + return b +} diff --git a/pkg/llmprovider/gemini/client.go b/pkg/llmprovider/geminiprovider/client.go similarity index 99% rename from pkg/llmprovider/gemini/client.go rename to pkg/llmprovider/geminiprovider/client.go index 2ffa9f96..f7ad596a 100644 --- a/pkg/llmprovider/gemini/client.go +++ b/pkg/llmprovider/geminiprovider/client.go @@ -1,7 +1,7 @@ // Package gemini implements the llmprovider.Provider interface using the // Google GenAI SDK (google.golang.org/genai). It properly preserves // thought_signatures for Gemini 3.x models. -package gemini +package geminiprovider import ( "context" diff --git a/pkg/llmprovider/langchain.go b/pkg/llmprovider/langchain.go deleted file mode 100644 index 75324935..00000000 --- a/pkg/llmprovider/langchain.go +++ /dev/null @@ -1,250 +0,0 @@ -package llmprovider - -import "github.com/tmc/langchaingo/llms" - -// This file provides conversion between llmprovider types and langchain -// llms types. It exists only for the transition period while we migrate -// providers one at a time. Once all providers use llmprovider types -// directly, this file can be deleted. - -// --- Role conversion --- - -func RoleFromLangchain(r llms.ChatMessageType) Role { - switch r { - case llms.ChatMessageTypeSystem: - return RoleSystem - case llms.ChatMessageTypeHuman: - return RoleHuman - case llms.ChatMessageTypeAI: - return RoleAI - case llms.ChatMessageTypeTool: - return RoleTool - default: - return RoleHuman - } -} - -func (r Role) ToLangchain() llms.ChatMessageType { - switch r { - case RoleSystem: - return llms.ChatMessageTypeSystem - case RoleHuman: - return llms.ChatMessageTypeHuman - case RoleAI: - return llms.ChatMessageTypeAI - case RoleTool: - return llms.ChatMessageTypeTool - default: - return llms.ChatMessageTypeHuman - } -} - -// --- Message conversion --- - -func MessageFromLangchain(lc llms.MessageContent) Message { - msg := Message{ - Role: RoleFromLangchain(lc.Role), - Parts: make([]Part, 0, len(lc.Parts)), - } - for _, p := range lc.Parts { - msg.Parts = append(msg.Parts, PartFromLangchain(p)) - } - return msg -} - -func MessagesFromLangchain(lc []llms.MessageContent) []Message { - msgs := make([]Message, len(lc)) - for i, m := range lc { - msgs[i] = MessageFromLangchain(m) - } - return msgs -} - -func (m Message) ToLangchain() llms.MessageContent { - lc := llms.MessageContent{ - Role: m.Role.ToLangchain(), - Parts: make([]llms.ContentPart, 0, len(m.Parts)), - } - for _, p := range m.Parts { - lc.Parts = append(lc.Parts, PartToLangchain(p)) - } - return lc -} - -func MessagesToLangchain(msgs []Message) []llms.MessageContent { - lc := make([]llms.MessageContent, len(msgs)) - for i, m := range msgs { - lc[i] = m.ToLangchain() - } - return lc -} - -// --- Part conversion --- - -func PartFromLangchain(p llms.ContentPart) Part { - switch v := p.(type) { - case llms.TextContent: - return TextPart{Text: v.Text} - case llms.ToolCall: - tc := ToolCallPart{ - ID: v.ID, - Name: v.FunctionCall.Name, - } - if v.FunctionCall != nil { - tc.Arguments = v.FunctionCall.Arguments - } - return tc - case llms.ToolCallResponse: - return ToolResultPart{ - ToolCallID: v.ToolCallID, - Name: v.Name, - Content: v.Content, - } - default: - // Unsupported part types become empty text - return TextPart{} - } -} - -func PartToLangchain(p Part) llms.ContentPart { - switch v := p.(type) { - case TextPart: - return llms.TextContent{Text: v.Text} - case ToolCallPart: - return llms.ToolCall{ - ID: v.ID, - Type: "function", - FunctionCall: &llms.FunctionCall{ - Name: v.Name, - Arguments: v.Arguments, - }, - } - case ToolResultPart: - return llms.ToolCallResponse{ - ToolCallID: v.ToolCallID, - Name: v.Name, - Content: v.Content, - } - case ThinkingPart: - // Langchain has no equivalent; drop thinking parts - return llms.TextContent{} - default: - return llms.TextContent{} - } -} - -// --- Response conversion --- - -func ResponseFromLangchain(lc *llms.ContentResponse) *Response { - if lc == nil { - return nil - } - resp := &Response{ - Choices: make([]*Choice, len(lc.Choices)), - } - for i, c := range lc.Choices { - resp.Choices[i] = ChoiceFromLangchain(c) - } - return resp -} - -func ChoiceFromLangchain(lc *llms.ContentChoice) *Choice { - if lc == nil { - return nil - } - ch := &Choice{ - Content: lc.Content, - StopReason: lc.StopReason, - GenerationInfo: lc.GenerationInfo, - } - for _, tc := range lc.ToolCalls { - ch.ToolCalls = append(ch.ToolCalls, ToolCallPart{ - ID: tc.ID, - Name: tc.FunctionCall.Name, - Arguments: func() string { - if tc.FunctionCall != nil { - return tc.FunctionCall.Arguments - } - return "" - }(), - }) - } - return ch -} - -func (r *Response) ToLangchain() *llms.ContentResponse { - if r == nil { - return nil - } - lc := &llms.ContentResponse{ - Choices: make([]*llms.ContentChoice, len(r.Choices)), - } - for i, c := range r.Choices { - lc.Choices[i] = c.ToLangchain() - } - return lc -} - -func (c *Choice) ToLangchain() *llms.ContentChoice { - if c == nil { - return nil - } - lc := &llms.ContentChoice{ - Content: c.Content, - StopReason: c.StopReason, - GenerationInfo: c.GenerationInfo, - } - for _, tc := range c.ToolCalls { - lc.ToolCalls = append(lc.ToolCalls, llms.ToolCall{ - ID: tc.ID, - Type: "function", - FunctionCall: &llms.FunctionCall{ - Name: tc.Name, - Arguments: tc.Arguments, - }, - }) - } - return lc -} - -// --- Tool conversion --- - -func ToolFromLangchain(t llms.Tool) Tool { - tool := Tool{Type: t.Type} - if t.Function != nil { - tool.Function = &FunctionDef{ - Name: t.Function.Name, - Description: t.Function.Description, - Parameters: t.Function.Parameters, - } - } - return tool -} - -func ToolsFromLangchain(tools []llms.Tool) []Tool { - out := make([]Tool, len(tools)) - for i, t := range tools { - out[i] = ToolFromLangchain(t) - } - return out -} - -func (t Tool) ToLangchain() llms.Tool { - lc := llms.Tool{Type: t.Type} - if t.Function != nil { - lc.Function = &llms.FunctionDefinition{ - Name: t.Function.Name, - Description: t.Function.Description, - Parameters: t.Function.Parameters, - } - } - return lc -} - -func ToolsToLangchain(tools []Tool) []llms.Tool { - out := make([]llms.Tool, len(tools)) - for i, t := range tools { - out[i] = t.ToLangchain() - } - return out -} diff --git a/pkg/llmprovider/langchain_adapter.go b/pkg/llmprovider/langchain_adapter.go deleted file mode 100644 index 20ae105c..00000000 --- a/pkg/llmprovider/langchain_adapter.go +++ /dev/null @@ -1,69 +0,0 @@ -package llmprovider - -import ( - "context" - - "github.com/tmc/langchaingo/llms" -) - -// LangchainAdapter wraps an llms.Model to implement the Provider interface. -// This allows existing langchain-based providers (Anthropic, OpenAI) to be -// used alongside native providers during the migration. -type LangchainAdapter struct { - LLM llms.Model -} - -func NewLangchainAdapter(llm llms.Model) *LangchainAdapter { - return &LangchainAdapter{LLM: llm} -} - -func (a *LangchainAdapter) GenerateContent( - ctx context.Context, - messages []Message, - options ...CallOption, -) (*Response, error) { - // Convert our messages to langchain messages - lcMessages := MessagesToLangchain(messages) - - // Convert our options to langchain options - opts := &CallOptions{} - for _, o := range options { - o(opts) - } - lcOpts := toLangchainCallOptions(opts) - - // Call the langchain model - lcResp, err := a.LLM.GenerateContent(ctx, lcMessages, lcOpts...) - if err != nil { - return nil, err - } - - // Convert response back to our types - return ResponseFromLangchain(lcResp), nil -} - -// toLangchainCallOptions converts our CallOptions to langchain CallOptions. -func toLangchainCallOptions(opts *CallOptions) []llms.CallOption { - var lcOpts []llms.CallOption - - if opts.MaxTokens > 0 { - lcOpts = append(lcOpts, llms.WithMaxTokens(opts.MaxTokens)) - } - if opts.Temperature > 0 { - lcOpts = append(lcOpts, llms.WithTemperature(opts.Temperature)) - } - if opts.TopP > 0 { - lcOpts = append(lcOpts, llms.WithTopP(opts.TopP)) - } - if opts.TopK > 0 { - lcOpts = append(lcOpts, llms.WithTopK(opts.TopK)) - } - if len(opts.StopWords) > 0 { - lcOpts = append(lcOpts, llms.WithStopWords(opts.StopWords)) - } - if len(opts.Tools) > 0 { - lcOpts = append(lcOpts, llms.WithTools(ToolsToLangchain(opts.Tools))) - } - - return lcOpts -} diff --git a/pkg/llmprovider/openaiprovider/client.go b/pkg/llmprovider/openaiprovider/client.go new file mode 100644 index 00000000..4dd782cc --- /dev/null +++ b/pkg/llmprovider/openaiprovider/client.go @@ -0,0 +1,221 @@ +// Package openai implements the llmprovider.Provider interface using the +// official OpenAI Go SDK (github.com/openai/openai-go). +package openaiprovider + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/grafana/plugin-validator/pkg/llmprovider" + "github.com/grafana/plugin-validator/pkg/logme" + "github.com/openai/openai-go" + "github.com/openai/openai-go/option" + "github.com/openai/openai-go/shared" +) + +// Client implements llmprovider.Provider for OpenAI. +type Client struct { + client *openai.Client + modelName string +} + +// New creates an OpenAI provider client. +func New(apiKey, modelName string) (*Client, error) { + if apiKey == "" { + return nil, fmt.Errorf("openai: API key is required") + } + if modelName == "" { + return nil, fmt.Errorf("openai: model name is required") + } + + client := openai.NewClient(option.WithAPIKey(apiKey)) + + return &Client{client: &client, modelName: modelName}, nil +} + +// GenerateContent sends messages to OpenAI and returns the response. +func (c *Client) GenerateContent( + ctx context.Context, + messages []llmprovider.Message, + options ...llmprovider.CallOption, +) (*llmprovider.Response, error) { + opts := &llmprovider.CallOptions{} + for _, o := range options { + o(opts) + } + + params := openai.ChatCompletionNewParams{ + Model: shared.ChatModel(c.modelName), + Messages: toOpenAIMessages(messages), + } + + if opts.Temperature > 0 { + params.Temperature = openai.Float(opts.Temperature) + } + if opts.MaxTokens > 0 { + params.MaxCompletionTokens = openai.Int(int64(opts.MaxTokens)) + } + if opts.TopP > 0 { + params.TopP = openai.Float(opts.TopP) + } + if len(opts.StopWords) > 0 { + params.Stop = openai.ChatCompletionNewParamsStopUnion{ + OfStringArray: opts.StopWords, + } + } + + if len(opts.Tools) > 0 { + params.Tools = toOpenAITools(opts.Tools) + } + + resp, err := c.client.Chat.Completions.New(ctx, params) + if err != nil { + return nil, fmt.Errorf("openai: API error: %w", err) + } + + return fromOpenAIResponse(resp), nil +} + +// --- Message conversion: llmprovider → openai --- + +func toOpenAIMessages(messages []llmprovider.Message) []openai.ChatCompletionMessageParamUnion { + var result []openai.ChatCompletionMessageParamUnion + + for _, msg := range messages { + switch msg.Role { + case llmprovider.RoleSystem: + text := extractText(msg.Parts) + result = append(result, openai.SystemMessage(text)) + + case llmprovider.RoleHuman: + text := extractText(msg.Parts) + result = append(result, openai.UserMessage(text)) + + case llmprovider.RoleAI: + result = append(result, toAssistantMessage(msg)) + + case llmprovider.RoleTool: + for _, part := range msg.Parts { + if tr, ok := part.(llmprovider.ToolResultPart); ok { + result = append(result, openai.ToolMessage(tr.Content, tr.ToolCallID)) + } + } + } + } + + return result +} + +func toAssistantMessage(msg llmprovider.Message) openai.ChatCompletionMessageParamUnion { + text := extractText(msg.Parts) + + var toolCalls []openai.ChatCompletionMessageToolCallParam + for _, part := range msg.Parts { + if tc, ok := part.(llmprovider.ToolCallPart); ok { + toolCalls = append(toolCalls, openai.ChatCompletionMessageToolCallParam{ + ID: tc.ID, + Function: openai.ChatCompletionMessageToolCallFunctionParam{ + Name: tc.Name, + Arguments: tc.Arguments, + }, + }) + } + } + + asst := openai.ChatCompletionAssistantMessageParam{} + if text != "" { + asst.Content.OfString = openai.String(text) + } + if len(toolCalls) > 0 { + asst.ToolCalls = toolCalls + } + + return openai.ChatCompletionMessageParamUnion{OfAssistant: &asst} +} + +func extractText(parts []llmprovider.Part) string { + var text string + for _, p := range parts { + if tp, ok := p.(llmprovider.TextPart); ok { + if text != "" { + text += "\n" + } + text += tp.Text + } + } + return text +} + +// --- Tool conversion --- + +func toOpenAITools(tools []llmprovider.Tool) []openai.ChatCompletionToolParam { + var result []openai.ChatCompletionToolParam + for _, tool := range tools { + if tool.Function == nil { + continue + } + + param := openai.ChatCompletionToolParam{ + Function: shared.FunctionDefinitionParam{ + Name: tool.Function.Name, + Description: openai.String(tool.Function.Description), + }, + } + + // Convert parameters to FunctionParameters (map[string]any) + if tool.Function.Parameters != nil { + switch p := tool.Function.Parameters.(type) { + case map[string]any: + param.Function.Parameters = shared.FunctionParameters(p) + default: + // Marshal and unmarshal to get map[string]any + data, err := json.Marshal(p) + if err == nil { + var m map[string]any + if json.Unmarshal(data, &m) == nil { + param.Function.Parameters = shared.FunctionParameters(m) + } + } + } + } + + result = append(result, param) + } + return result +} + +// --- Response conversion: openai → llmprovider --- + +func fromOpenAIResponse(resp *openai.ChatCompletion) *llmprovider.Response { + result := &llmprovider.Response{ + Choices: make([]*llmprovider.Choice, 0, len(resp.Choices)), + } + + for _, c := range resp.Choices { + choice := &llmprovider.Choice{ + Content: c.Message.Content, + StopReason: c.FinishReason, + GenerationInfo: make(map[string]any), + } + + for _, tc := range c.Message.ToolCalls { + logme.LLMLog("openai: tool call: name=%s id=%s", tc.Function.Name, tc.ID) + choice.ToolCalls = append(choice.ToolCalls, llmprovider.ToolCallPart{ + ID: tc.ID, + Name: tc.Function.Name, + Arguments: tc.Function.Arguments, + }) + } + + choice.GenerationInfo["usage"] = map[string]any{ + "prompt_tokens": resp.Usage.PromptTokens, + "completion_tokens": resp.Usage.CompletionTokens, + "total_tokens": resp.Usage.TotalTokens, + } + + result.Choices = append(result.Choices, choice) + } + + return result +} diff --git a/pkg/llmprovider/types.go b/pkg/llmprovider/types.go index e3b80056..d6a592e8 100644 --- a/pkg/llmprovider/types.go +++ b/pkg/llmprovider/types.go @@ -1,11 +1,8 @@ // Package llmprovider defines a unified interface for LLM providers. // -// These types mirror github.com/tmc/langchaingo/llms but add support for -// provider-specific features like Gemini thought_signatures, Anthropic -// thinking blocks, and OpenAI encrypted reasoning content. -// -// During migration, conversion helpers allow gradual adoption: one provider -// can be migrated at a time while the rest continue using langchain types. +// Each provider (Gemini, Anthropic, OpenAI) has a native SDK implementation +// that supports provider-specific features like Gemini thought_signatures, +// Anthropic thinking blocks, and OpenAI encrypted reasoning content. package llmprovider import "context" From 300c064ba74d70c57cdfb80b050a44e169c44a91 Mon Sep 17 00:00:00 2001 From: Esteban Beltran Date: Wed, 4 Mar 2026 16:42:30 +0100 Subject: [PATCH 07/10] remve commetn --- pkg/llmclient/agentic_client.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/pkg/llmclient/agentic_client.go b/pkg/llmclient/agentic_client.go index 22f53202..eae435bd 100644 --- a/pkg/llmclient/agentic_client.go +++ b/pkg/llmclient/agentic_client.go @@ -492,9 +492,7 @@ func truncateString(s string, maxLen int) string { return s[:maxLen] + "..." } -// initProvider initializes the appropriate provider based on configuration. // initProvider creates the appropriate native provider for the given config. -// until they are migrated. func initProvider(ctx context.Context, opts *AgenticCallOptions) (llmprovider.Provider, error) { switch opts.Provider { case "google": From 901c4eb08656ee79af80723153fbed10ba5ce0dc Mon Sep 17 00:00:00 2001 From: Esteban Beltran Date: Wed, 4 Mar 2026 17:02:54 +0100 Subject: [PATCH 08/10] improve multi tool calling --- pkg/llmclient/agentic_client.go | 133 ++++++++++++-------------------- 1 file changed, 48 insertions(+), 85 deletions(-) diff --git a/pkg/llmclient/agentic_client.go b/pkg/llmclient/agentic_client.go index eae435bd..7c4b04dc 100644 --- a/pkg/llmclient/agentic_client.go +++ b/pkg/llmclient/agentic_client.go @@ -4,7 +4,6 @@ import ( "context" "encoding/json" "fmt" - "strings" "time" "github.com/grafana/plugin-validator/pkg/llmprovider" @@ -247,49 +246,12 @@ func (c *agenticClientImpl) runQuestionLoop( return messages, nil, fmt.Errorf("no response from LLM") } - // Merge all choices into one unified view for processing. - // Providers return a single Choice, but we merge defensively - // in case a provider returns multiple. - mergedChoice := llmprovider.Choice{} - var allToolCalls []llmprovider.ToolCallPart - var contentParts []string - - for i, ch := range resp.Choices { - debugLog("AgenticClient: processing choice %d: Content=%q, ToolCalls=%d, Thinking=%d", - i, truncateString(ch.Content, 100), len(ch.ToolCalls), len(ch.Thinking)) - for j, t := range ch.Thinking { - debugLog("AgenticClient: thinking[%d]: text=%q sig=%v", - j, truncateString(t.Text, 150), t.Signature != "") - } - - if ch.Content != "" { - contentParts = append(contentParts, ch.Content) - } - if len(ch.ToolCalls) > 0 { - allToolCalls = append(allToolCalls, ch.ToolCalls...) - } - // Use StopReason from first non-empty one - if mergedChoice.StopReason == "" && ch.StopReason != "" { - mergedChoice.StopReason = ch.StopReason - } - // Use GenerationInfo from first choice - if i == 0 { - mergedChoice.GenerationInfo = ch.GenerationInfo - } - } - - // Build merged choice — join all content parts so nothing is lost - // when Anthropic returns multiple text blocks (e.g. thinking + response). - if len(contentParts) > 0 { - mergedChoice.Content = strings.Join(contentParts, "\n") - } - mergedChoice.ToolCalls = allToolCalls - - choice := mergedChoice - debugLog("AgenticClient: merged choice - Content=%q, ToolCalls=%d", - truncateString(choice.Content, 200), len(choice.ToolCalls)) - if choice.Content != "" { - debugLog("AgenticClient: AI message: %s", truncateString(choice.Content, 200)) + choice := resp.Choices[0] + debugLog("AgenticClient: choice - Content=%q, ToolCalls=%d, Thinking=%d", + truncateString(choice.Content, 200), len(choice.ToolCalls), len(choice.Thinking)) + for j, t := range choice.Thinking { + debugLog("AgenticClient: thinking[%d]: text=%q sig=%v", + j, truncateString(t.Text, 150), t.Signature != "") } // If no tool calls, check if we should nudge the agent @@ -325,6 +287,23 @@ func (c *agenticClientImpl) runQuestionLoop( // Reset consecutive no-tool counter when tools are used consecutiveNoTools = 0 + // Build the assistant message with all parts from the response: + // thinking blocks, text content, and tool calls. + var aiParts []llmprovider.Part + for _, t := range choice.Thinking { + aiParts = append(aiParts, t) + } + if choice.Content != "" { + aiParts = append(aiParts, llmprovider.TextPart{Text: choice.Content}) + } + for _, tc := range choice.ToolCalls { + aiParts = append(aiParts, tc) + } + messages = append(messages, llmprovider.Message{ + Role: llmprovider.RoleAI, + Parts: aiParts, + }) + // Validate submit_answer is called alone hasSubmitAnswer := false for _, toolCall := range choice.ToolCalls { @@ -335,57 +314,41 @@ func (c *agenticClientImpl) runQuestionLoop( } if hasSubmitAnswer && len(choice.ToolCalls) > 1 { debugLog("AgenticClient: submit_answer called with other tools - rejecting all") - // Add a single AI message with ALL tool calls so every - // tool_result below has a matching tool_use in the preceding - // assistant message. - aiParts := make([]llmprovider.Part, len(choice.ToolCalls)) - for i, tc := range choice.ToolCalls { - aiParts[i] = tc - } - aiMessage := llmprovider.Message{ - Role: llmprovider.RoleAI, - Parts: aiParts, - } - messages = append(messages, aiMessage) + var resultParts []llmprovider.Part for _, toolCall := range choice.ToolCalls { toolCallsRemaining-- - errorResponse := llmprovider.Message{ - Role: llmprovider.RoleTool, - Parts: []llmprovider.Part{ - llmprovider.ToolResultPart{ - ToolCallID: toolCall.ID, - Name: toolCall.Name, - Content: submitAnswerAloneError, - }, - }, - } - messages = append(messages, errorResponse) + resultParts = append(resultParts, llmprovider.ToolResultPart{ + ToolCallID: toolCall.ID, + Name: toolCall.Name, + Content: submitAnswerAloneError, + }) } + messages = append(messages, llmprovider.Message{ + Role: llmprovider.RoleTool, + Parts: resultParts, + }) continue } - // Process each tool call as a separate AI message + tool result pair. - // This is the "split" part of the merge-then-split pattern. - // - // Create one AI message per tool call, each followed by its tool_result. - // This keeps the conversation in strict alternating assistant/user order - // as required by Anthropic's API. + // Execute tool calls and collect results into a single tool message. + var resultParts []llmprovider.Part + var answer *AnswerSchema for i, toolCall := range choice.ToolCalls { toolCallsRemaining-- - - aiMessage := llmprovider.Message{ - Role: llmprovider.RoleAI, - Parts: []llmprovider.Part{toolCall}, - } - messages = append(messages, aiMessage) - - response, answer := c.processToolCall(toolCall, i, len(choice.ToolCalls)) - messages = append(messages, response) - if answer != nil { - debugLog("AgenticClient: received answer for question %d", questionIndex+1) - return messages, answer, nil + response, ans := c.processToolCall(toolCall, i, len(choice.ToolCalls)) + resultParts = append(resultParts, response.Parts...) + if ans != nil { + answer = ans } } + messages = append(messages, llmprovider.Message{ + Role: llmprovider.RoleTool, + Parts: resultParts, + }) + if answer != nil { + debugLog("AgenticClient: received answer for question %d", questionIndex+1) + return messages, answer, nil + } } // Budget exhausted without answer From 53adf156ca3b4c044381bf87cd597d5d750645e8 Mon Sep 17 00:00:00 2001 From: Esteban Beltran Date: Thu, 5 Mar 2026 09:19:34 +0100 Subject: [PATCH 09/10] pass tools on llm call --- pkg/llmclient/agentic_client.go | 64 ++++-------- pkg/llmclient/agentic_tools.go | 174 ++++++++++++++++---------------- pkg/llmclient/agentic_types.go | 118 ++++++++++++++++++++++ 3 files changed, 225 insertions(+), 131 deletions(-) create mode 100644 pkg/llmclient/agentic_types.go diff --git a/pkg/llmclient/agentic_client.go b/pkg/llmclient/agentic_client.go index 7c4b04dc..9a6cd7a4 100644 --- a/pkg/llmclient/agentic_client.go +++ b/pkg/llmclient/agentic_client.go @@ -19,24 +19,6 @@ const ( maxConsecutiveNoTools = 5 retryDelay = 2 * time.Second - systemPrompt = `You are a code analysis assistant. You have tools to explore code in a repository. - -AVAILABLE TOOLS: -- list_directory: List files at a path. Use "." for root. -- read_file: Read a file's contents. This is your primary tool for understanding code. -- grep: Search for a pattern across files. -- git: Run read-only git commands (log, show, diff, status, etc.) -- submit_answer: Submit your answers. - -STRATEGY: -1. Use list_directory to see what files exist -2. Use read_file to read the source code files -3. Analyze the code to answer the question - -You can only use one tool at a time. -IMPORTANT: You are in non-interactive mode. No one will read your text answers, only tools. -When you have gathered enough information, use submit_answer to provide your answer.` - budgetNudgePrompt = `You have only %d tool calls remaining. Wrap up your investigation and call submit_answer now with whatever information you have gathered so far.` useToolsReminderPrompt = `You are in non-interactive mode. You must start using your tools now to explore the repository. When you have enough information, use submit_answer to provide your answer.` @@ -44,22 +26,6 @@ When you have gathered enough information, use submit_answer to provide your ans submitAnswerAloneError = `Error: submit_answer must be called alone. When you have an answer, call submit_answer as a single tool call without any other tools in the same response.` ) -// AnswerSchema represents the structured response from the agentic client -type AnswerSchema struct { - Question string `json:"question"` - Answer string `json:"answer"` - ShortAnswer bool `json:"short_answer"` - Files []string `json:"files,omitempty"` - CodeSnippet string `json:"code_snippet,omitempty"` -} - -// AgenticCallOptions contains configuration for the agentic LLM call -type AgenticCallOptions struct { - Model string // e.g. "gemini-2.0-flash" - Provider string // "google", "anthropic", "openai" - APIKey string -} - // AgenticClient is an interface for agentic LLM interactions type AgenticClient interface { CallLLM(ctx context.Context, questions []string, repositoryPath string) ([]AnswerSchema, error) @@ -67,11 +33,12 @@ type AgenticClient interface { // agenticClientImpl implements AgenticClient type agenticClientImpl struct { - apiKey string - model string - provider string - tools []llmprovider.Tool - executor *toolExecutor + apiKey string + model string + provider string + tools []llmprovider.Tool + systemPrompt string + executor *toolExecutor } // NewAgenticClient creates a new AgenticClient with the given options @@ -88,10 +55,18 @@ func NewAgenticClient(opts *AgenticCallOptions) (AgenticClient, error) { if opts.Provider == "" { return nil, fmt.Errorf("provider is required") } + + tools, err := resolveTools(opts) + if err != nil { + return nil, fmt.Errorf("resolving tools: %w", err) + } + return &agenticClientImpl{ - apiKey: opts.APIKey, - model: opts.Model, - provider: opts.Provider, + apiKey: opts.APIKey, + model: opts.Model, + provider: opts.Provider, + tools: tools, + systemPrompt: buildSystemPrompt(opts.SystemPrompt, tools), }, nil } @@ -118,13 +93,12 @@ func (c *agenticClientImpl) CallLLM( return nil, fmt.Errorf("failed to initialize LLM: %w", err) } - // Initialize tools and executor for this repository - c.tools = buildAgenticTools() + // Initialize executor for this repository c.executor = newToolExecutor(repositoryPath) // Build initial messages with system prompt only (no user message yet) messages := []llmprovider.Message{ - llmprovider.TextMessage(llmprovider.RoleSystem, systemPrompt), + llmprovider.TextMessage(llmprovider.RoleSystem, c.systemPrompt), } // Print debug log file path before starting the loop diff --git a/pkg/llmclient/agentic_tools.go b/pkg/llmclient/agentic_tools.go index c5b55f5c..71a380a0 100644 --- a/pkg/llmclient/agentic_tools.go +++ b/pkg/llmclient/agentic_tools.go @@ -46,110 +46,112 @@ var blockedGitFlags = []string{ "--run", } -// buildAgenticTools returns the list of tools available to the agent -func buildAgenticTools() []llmprovider.Tool { - return []llmprovider.Tool{ - { - Type: "function", - Function: &llmprovider.FunctionDef{ - Name: "read_file", - Description: "Read the contents of a file at the given path", - Parameters: map[string]interface{}{ - "type": "object", - "properties": map[string]interface{}{ - "path": map[string]interface{}{ - "type": "string", - "description": "The relative path to the file to read", - }, +// toolRegistry maps AgenticTool names to their llmprovider.Tool definitions. +var toolRegistry = map[AgenticTool]llmprovider.Tool{ + ToolReadFile: { + Type: "function", + Function: &llmprovider.FunctionDef{ + Name: "read_file", + Description: "Read the contents of a file at the given path", + Parameters: map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "path": map[string]interface{}{ + "type": "string", + "description": "The relative path to the file to read", }, - "required": []string{"path"}, }, + "required": []string{"path"}, }, }, - { - Type: "function", - Function: &llmprovider.FunctionDef{ - Name: "list_directory", - Description: "List files and directories at the given path", - Parameters: map[string]interface{}{ - "type": "object", - "properties": map[string]interface{}{ - "path": map[string]interface{}{ - "type": "string", - "description": "The relative path to the directory to list (use '.' for root)", - }, + }, + ToolListDirectory: { + Type: "function", + Function: &llmprovider.FunctionDef{ + Name: "list_directory", + Description: "List files and directories at the given path", + Parameters: map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "path": map[string]interface{}{ + "type": "string", + "description": "The relative path to the directory to list (use '.' for root)", }, - "required": []string{"path"}, }, + "required": []string{"path"}, }, }, - { - Type: "function", - Function: &llmprovider.FunctionDef{ - Name: "grep", - Description: "Search for a pattern in files. Returns matching lines with file names and line numbers.", - Parameters: map[string]interface{}{ - "type": "object", - "properties": map[string]interface{}{ - "pattern": map[string]interface{}{ - "type": "string", - "description": "The pattern to search for", - }, - "path": map[string]interface{}{ - "type": "string", - "description": "Optional: directory or file to search in (defaults to '.')", - }, + }, + ToolGrep: { + Type: "function", + Function: &llmprovider.FunctionDef{ + Name: "grep", + Description: "Search for a pattern in files. Returns matching lines with file names and line numbers.", + Parameters: map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "pattern": map[string]interface{}{ + "type": "string", + "description": "The pattern to search for", + }, + "path": map[string]interface{}{ + "type": "string", + "description": "Optional: directory or file to search in (defaults to '.')", }, - "required": []string{"pattern"}, }, + "required": []string{"pattern"}, }, }, - { - Type: "function", - Function: &llmprovider.FunctionDef{ - Name: "git", - Description: "Execute a git command. Only allowed commands: log, show, diff, status, ls-files, blame, rev-parse, cat-file, checkout, fetch, pull, branch, tag.", - Parameters: map[string]interface{}{ - "type": "object", - "properties": map[string]interface{}{ - "args": map[string]interface{}{ - "type": "string", - "description": "The git command arguments (e.g., 'log -n 5' or 'show HEAD')", - }, + }, + ToolGit: { + Type: "function", + Function: &llmprovider.FunctionDef{ + Name: "git", + Description: "Execute a git command. Only allowed commands: log, show, diff, status, ls-files, blame, rev-parse, cat-file, checkout, fetch, pull, branch, tag.", + Parameters: map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "args": map[string]interface{}{ + "type": "string", + "description": "The git command arguments (e.g., 'log -n 5' or 'show HEAD')", }, - "required": []string{"args"}, }, + "required": []string{"args"}, }, }, - { - Type: "function", - Function: &llmprovider.FunctionDef{ - Name: "submit_answer", - Description: "Submit your final answer to the question. Use this when you have gathered enough information.", - Parameters: map[string]interface{}{ - "type": "object", - "properties": map[string]interface{}{ - "answer": map[string]interface{}{ - "type": "string", - "description": "Your detailed answer explaining your findings", - }, - "short_answer": map[string]interface{}{ - "type": "boolean", - "description": "A boolean answer to the question: true means YES, false means NO. For example, if the question is 'Is the sky blue?' the short_answer is true. If the question is 'Is the sky green?' the short_answer is false.", - }, - "files": map[string]interface{}{ - "type": "array", - "items": map[string]interface{}{"type": "string"}, - "description": "List of relevant files. Pass an empty array if not relevant.", - }, - "code_snippet": map[string]interface{}{ - "type": "string", - "description": "A relevant code snippet. Pass an empty string if not relevant.", - }, + }, +} + +// submitAnswerTool returns the submit_answer tool definition. +func submitAnswerTool() llmprovider.Tool { + return llmprovider.Tool{ + Type: "function", + Function: &llmprovider.FunctionDef{ + Name: "submit_answer", + Description: "Submit your final answer to the question. Use this when you have gathered enough information.", + Parameters: map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "answer": map[string]interface{}{ + "type": "string", + "description": "Your detailed answer explaining your findings", + }, + "short_answer": map[string]interface{}{ + "type": "boolean", + "description": "A boolean answer to the question: true means YES, false means NO. For example, if the question is 'Is the sky blue?' the short_answer is true. If the question is 'Is the sky green?' the short_answer is false.", + }, + "files": map[string]interface{}{ + "type": "array", + "items": map[string]interface{}{"type": "string"}, + "description": "List of relevant files. Pass an empty array if not relevant.", + }, + "code_snippet": map[string]interface{}{ + "type": "string", + "description": "A relevant code snippet. Pass an empty string if not relevant.", }, - "required": []string{"answer", "short_answer", "files", "code_snippet"}, - "additionalProperties": false, }, + "required": []string{"answer", "short_answer", "files", "code_snippet"}, + "additionalProperties": false, }, }, } diff --git a/pkg/llmclient/agentic_types.go b/pkg/llmclient/agentic_types.go new file mode 100644 index 00000000..8439a568 --- /dev/null +++ b/pkg/llmclient/agentic_types.go @@ -0,0 +1,118 @@ +package llmclient + +import ( + "fmt" + "strings" + + "github.com/grafana/plugin-validator/pkg/llmprovider" +) + +const defaultSystemPromptIntro = `You are a code analysis assistant. You have tools to explore code in a repository. + +STRATEGY: +1. Use list_directory to see what files exist +2. Use read_file to read the source code files +3. Analyze the code to answer the question` + +// AgenticTool identifies an exploration tool available to the agent. +type AgenticTool string + +const ( + ToolReadFile AgenticTool = "read_file" + ToolListDirectory AgenticTool = "list_directory" + ToolGrep AgenticTool = "grep" + ToolGit AgenticTool = "git" +) + +// ToolSet is a preset collection of tools. +type ToolSet int + +const ( + // DefaultTooling includes all exploration tools (read_file, list_directory, + // grep, git) plus submit_answer. This is the zero value. + DefaultTooling ToolSet = iota + // NoTools includes only submit_answer with no exploration tools. + NoTools +) + +// AgenticCallOptions contains configuration for the agentic LLM call. +type AgenticCallOptions struct { + Model string // e.g. "gemini-2.5-flash" + Provider string // "google", "anthropic", "openai" + APIKey string + + // Tools selects specific exploration tools. When non-nil, takes precedence + // over ToolSet. submit_answer is always included regardless. + Tools []AgenticTool + + // ToolSet selects a preset collection of tools. Used when Tools is nil. + // The zero value (DefaultTooling) includes all exploration tools. + ToolSet ToolSet + + // SystemPrompt overrides the intro portion of the system prompt. The + // AVAILABLE TOOLS section is always auto-appended. When empty, a default + // intro is used. + SystemPrompt string +} + +// AnswerSchema represents the structured response from the agentic client. +type AnswerSchema struct { + Question string `json:"question"` + Answer string `json:"answer"` + ShortAnswer bool `json:"short_answer"` + Files []string `json:"files,omitempty"` + CodeSnippet string `json:"code_snippet,omitempty"` +} + +// defaultTools returns the full set of exploration tools. +func defaultTools() []AgenticTool { + return []AgenticTool{ToolReadFile, ToolListDirectory, ToolGrep, ToolGit} +} + +// resolveTools builds the final []llmprovider.Tool list from the options. +// submit_answer is always appended. +func resolveTools(opts *AgenticCallOptions) ([]llmprovider.Tool, error) { + var selected []AgenticTool + if opts.Tools != nil { + selected = opts.Tools + } else { + switch opts.ToolSet { + case DefaultTooling: + selected = defaultTools() + case NoTools: + // empty + default: + return nil, fmt.Errorf("unknown tool set: %d", opts.ToolSet) + } + } + + tools := make([]llmprovider.Tool, 0, len(selected)+1) + for _, name := range selected { + def, ok := toolRegistry[name] + if !ok { + return nil, fmt.Errorf("unknown tool: %q", name) + } + tools = append(tools, def) + } + tools = append(tools, submitAnswerTool()) + return tools, nil +} + +// buildSystemPrompt composes the system prompt from an intro and the resolved tools. +func buildSystemPrompt(intro string, tools []llmprovider.Tool) string { + if intro == "" { + intro = defaultSystemPromptIntro + } + + var b strings.Builder + b.WriteString(intro) + b.WriteString("\n\nAVAILABLE TOOLS:\n") + for _, t := range tools { + if t.Function != nil { + fmt.Fprintf(&b, "- %s: %s\n", t.Function.Name, t.Function.Description) + } + } + b.WriteString("\nIMPORTANT: You are in non-interactive mode. No one will read your text answers, only tools.\nWhen you have gathered enough information, use submit_answer to provide your answer.") + + return b.String() +} From 041c571be8542658cc85efeda7872c0fc88c2ee3 Mon Sep 17 00:00:00 2001 From: Esteban Beltran Date: Thu, 5 Mar 2026 14:42:31 +0100 Subject: [PATCH 10/10] remove stale doc --- docs/anthropic-choices-behavior.md | 70 ------------------------------ 1 file changed, 70 deletions(-) delete mode 100644 docs/anthropic-choices-behavior.md diff --git a/docs/anthropic-choices-behavior.md b/docs/anthropic-choices-behavior.md deleted file mode 100644 index a578e4c3..00000000 --- a/docs/anthropic-choices-behavior.md +++ /dev/null @@ -1,70 +0,0 @@ -# Anthropic Choices and Message Serialization in go-langchain - -## Overview -Anthropic's response structure and go-langchain's serialization behavior require special handling when building multi-turn conversations with tool use. - -## Response Structure (Anthropic → go-langchain) - -Anthropic API returns responses as an array of **content blocks**: -``` -[text_block, tool_use_block, tool_use_block, ...] -``` - -go-langchain converts each content block into a **separate ContentChoice**: -- `type: "text"` → `ContentChoice{Content: "...", ToolCalls: []}` -- `type: "tool_use"` → `ContentChoice{Content: "", ToolCalls: [{...}]}` -- `type: "thinking"` → `ContentChoice{Content: "", GenerationInfo: {...}}` - -**Key insight:** One Anthropic response can produce multiple Choices. For example: -- Response with text + 2 tool calls → 3 Choices -- Response with just text → 1 Choice - -## Serialization Constraint (go-langchain → Anthropic) - -The critical limitation is in `handleAIMessage()`: -```go -if toolCall, ok := msg.Parts[0].(llms.ToolCall); ok { - // Only Parts[0] is serialized! -} -``` - -**This means:** -- Only `Parts[0]` of a MessageContent is serialized back to Anthropic -- If you create `MessageContent{Parts: [toolCall1, toolCall2]}`, only `toolCall1` is sent -- Multiple ToolCalls in one message **will lose data** - -## Required Pattern: Interleaved Messages - -To work around this limitation, tool calls must be **interleaved** as separate messages: - -``` -AI message: Parts[toolCall1] -Tool message: Parts[toolResult1] -AI message: Parts[toolCall2] -Tool message: Parts[toolResult2] -``` - -Not: -``` -AI message: Parts[toolCall1, toolCall2] // toolCall2 would be lost! -Tool message: Parts[toolResult1, toolResult2] -``` - -## Why Merging Choices is Necessary - -When processing Anthropic's response: -1. Anthropic returns separate content blocks (potentially text + multiple tools) -2. go-langchain creates one Choice per block -3. We must merge these Choices to get the complete response -4. Then we must split them back into individual AI messages for serialization - -The merge preserves all information for processing, but the split ensures proper serialization. - -## Implementation Details in agentic_client.go - -The choice-merging code performs this merge: -- Collects all content parts from separate Choices -- Collects all ToolCalls from separate Choices -- Creates one merged view for processing - -Then later in the tool call processing, it **reverses** this by creating one AI message per ToolCall to avoid the serialization bug.