From 73b6e59ae2278b5078bb4fd9579b1b38f8af023e Mon Sep 17 00:00:00 2001
From: Esteban Beltran <sergies@gmail.com>
Date: Tue, 3 Mar 2026 18:02:15 +0100
Subject: [PATCH 01/10] Improve

---
 pkg/llmclient/agentic_client.go               | 376 +++++++++++++-----
 .../agentic_client_integration_test.go        | 253 ++++++------
 pkg/llmclient/agentic_debug.go                |   4 +-
 pkg/llmclient/agentic_tools.go                |   2 +-
 4 files changed, 428 insertions(+), 207 deletions(-)

diff --git a/pkg/llmclient/agentic_client.go b/pkg/llmclient/agentic_client.go
index cef19851..e41ab881 100644
--- a/pkg/llmclient/agentic_client.go
+++ b/pkg/llmclient/agentic_client.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
+	"strings"
 	"time"
 
 	"github.com/tmc/langchaingo/llms"
@@ -13,14 +14,42 @@ import (
 )
 
 const (
-	maxToolCalls            = 100
-	maxLLMRetries           = 3
-	maxConsecutiveNoTools   = 5
-	retryDelay              = 2 * time.Second
+	maxToolCallsFirstQuestion = 60
+	maxToolCallsFollowUp      = 20
+	maxLLMRetries             = 3
+	maxConsecutiveNoTools     = 5
+	retryDelay                = 2 * time.Second
+
+	systemPrompt = `You are a code analysis assistant. You have tools to explore code in a repository.
+
+AVAILABLE TOOLS:
+- list_directory: List files at a path. Use "." for root.
+- read_file: Read a file's contents. This is your primary tool for understanding code.
+- grep: Search for a pattern across files.
+- git: Run read-only git commands (log, show, diff, status, etc.)
+- submit_answer: Submit your answers.
+
+STRATEGY:
+1. Use list_directory to see what files exist
+2. Use read_file to read the source code files
+3. Analyze the code to answer the question
+
+You can only use one tool at a time.
+IMPORTANT: You are in non-interactive mode. No one will read your text answers, only tools.
+When you have gathered enough information, use submit_answer to provide your answer.`
+
+	questionAppendPrompt = `Start by listing the files in the repository and exploring the contents.`
+
+	budgetNudgePrompt = `You have only %d tool calls remaining. Wrap up your investigation and call submit_answer now with whatever information you have gathered so far.`
+
+	useToolsReminderPrompt = `You are in non-interactive mode. You must start using your tools now to explore the repository. When you have enough information, use submit_answer to provide your answer.`
+
+	submitAnswerAloneError = `Error: submit_answer must be called alone. When you have an answer, call submit_answer as a single tool call without any other tools in the same response.`
 )
 
 // AnswerSchema represents the structured response from the agentic client
 type AnswerSchema struct {
+	Question    string   `json:"question"`
 	Answer      string   `json:"answer"`
 	ShortAnswer bool     `json:"short_answer"`
 	Files       []string `json:"files,omitempty"`
@@ -36,7 +65,7 @@ type AgenticCallOptions struct {
 
 // AgenticClient is an interface for agentic LLM interactions
 type AgenticClient interface {
-	CallLLM(ctx context.Context, prompt, repositoryPath string) ([]AnswerSchema, error)
+	CallLLM(ctx context.Context, questions []string, repositoryPath string) ([]AnswerSchema, error)
 }
 
 // agenticClientImpl implements AgenticClient
@@ -68,9 +97,17 @@ func NewAgenticClient(opts *AgenticCallOptions) (AgenticClient, error) {
 }
 
 // CallLLM executes an agentic loop with tools to answer questions about code.
-// The prompt may contain multiple questions, in which case the agent will call
-// submit_answer multiple times. All answers are collected and returned.
-func (c *agenticClientImpl) CallLLM(ctx context.Context, prompt, repositoryPath string) ([]AnswerSchema, error) {
+// Each question is processed sequentially, with follow-up questions benefiting
+// from the context accumulated by earlier questions.
+func (c *agenticClientImpl) CallLLM(
+	ctx context.Context,
+	questions []string,
+	repositoryPath string,
+) ([]AnswerSchema, error) {
+	if len(questions) == 0 {
+		return nil, fmt.Errorf("at least one question is required")
+	}
+
 	// Initialize LLM based on provider using the client's configured settings
 	opts := &AgenticCallOptions{
 		APIKey:   c.apiKey,
@@ -88,102 +125,206 @@ func (c *agenticClientImpl) CallLLM(ctx context.Context, prompt, repositoryPath
 	// Create tool executor
 	executor := newToolExecutor(repositoryPath)
 
-	// System prompt
-	systemPrompt := `You are a code analysis assistant. You have tools to explore code in a repository.
-
-AVAILABLE TOOLS:
-- list_directory: List files at a path. Use "." for root.
-- read_file: Read a file's contents. This is your primary tool for understanding code.
-- grep: Search for a pattern across files.
-- git: Run read-only git commands (log, show, diff, status, etc.)
-- submit_answer: Submit your final answer.
-
-STRATEGY:
-1. Use list_directory to see what files exist
-2. Use read_file to read the source code files
-3. Analyze the code to answer the question
-
-You can only use one tool at a time.
-IMPORTANT: You are in non-interactive mode. Start working and using your tools immediately.
-When ready, use submit_answer. For multiple questions, call submit_answer once per question.`
-
-	// Build initial messages
+	// Build initial messages with system prompt only (no user message yet)
 	messages := []llms.MessageContent{
 		llms.TextParts(llms.ChatMessageTypeSystem, systemPrompt),
-		llms.TextParts(llms.ChatMessageTypeHuman, prompt),
 	}
 
-	// Collect answers
-	var answers []AnswerSchema
-
-	// Agentic loop
-	toolCallsRemaining := maxToolCalls
-
 	// Print debug log file path before starting the loop
 	printDebugLogPath()
 	debugLog("\n\n\n")
 	debugLog("################################################################")
 	debugLog("# NEW CallLLM - provider=%s model=%s", c.provider, c.model)
 	debugLog("# repo=%s", repositoryPath)
-	debugLog("# prompt=%s", truncateString(prompt, 200))
+	debugLog("# questions=%d", len(questions))
 	debugLog("################################################################")
 
-	iteration := 0
+	// Collect answers
+	var answers []AnswerSchema
+
+	// Process each question sequentially
+	for questionIndex, question := range questions {
+		debugLog(
+			"\n========== Processing question %d/%d ==========",
+			questionIndex+1,
+			len(questions),
+		)
+		debugLog("Question: %s", truncateString(question, 200))
+
+		originalQuestion := question
+		question = fmt.Sprintf("%s\n\n%s", question, questionAppendPrompt)
+
+		// Determine budget for this question
+		budget := maxToolCallsFirstQuestion
+		if questionIndex > 0 {
+			budget = maxToolCallsFollowUp
+		}
+		debugLog("Budget: %d tool calls", budget)
+
+		// Add the question as a human message
+		messages = append(messages, llms.TextParts(llms.ChatMessageTypeHuman, question))
+
+		// Run the question loop
+		updatedMessages, answer, err := c.runQuestionLoop(
+			ctx,
+			llm,
+			messages,
+			tools,
+			executor,
+			budget,
+			questionIndex,
+		)
+		messages = updatedMessages
+
+		if err != nil {
+			// Return partial results on error
+			debugLog("AgenticClient: question %d failed: %v", questionIndex+1, err)
+			if len(answers) > 0 {
+				debugLog("AgenticClient: returning %d partial answers", len(answers))
+				return answers, nil
+			}
+			return nil, err
+		}
+
+		if answer != nil {
+			// Set the question field
+			answer.Question = originalQuestion
+			answers = append(answers, *answer)
+			debugLog("AgenticClient: collected answer %d/%d", len(answers), len(questions))
+		} else {
+			// Budget exhausted without answer - stop processing further questions
+			debugLog("AgenticClient: question %d exhausted budget without answer, stopping", questionIndex+1)
+			if len(answers) > 0 {
+				debugLog("AgenticClient: returning %d partial answers", len(answers))
+				return answers, nil
+			}
+			return nil, fmt.Errorf("question %d exhausted budget without providing answer", questionIndex+1)
+		}
+	}
+
+	debugLog("AgenticClient: successfully answered all %d questions", len(questions))
+	return answers, nil
+}
+
+// runQuestionLoop runs the tool-calling loop for a single question.
+// Returns updated messages, the answer (or nil if budget exhausted), and error.
+func (c *agenticClientImpl) runQuestionLoop(
+	ctx context.Context,
+	llm llms.Model,
+	messages []llms.MessageContent,
+	tools []llms.Tool,
+	executor *toolExecutor,
+	budget int,
+	questionIndex int,
+) ([]llms.MessageContent, *AnswerSchema, error) {
+	toolCallsRemaining := budget
 	consecutiveNoTools := 0
+	iteration := 0
+
+	budgetNudged := false
+
 	for toolCallsRemaining > 0 {
 		iteration++
-		debugLog("========== AgenticClient: iteration %d ==========", iteration)
-		debugLog("AgenticClient: %d tool calls remaining, %d answers collected", toolCallsRemaining, len(answers))
+		debugLog("========== Question %d iteration %d ==========", questionIndex+1, iteration)
+		debugLog("AgenticClient: %d tool calls remaining", toolCallsRemaining)
+
+		if !budgetNudged && toolCallsRemaining <= 5 {
+			budgetNudged = true
+			debugLog("AgenticClient: nudging model about low budget")
+			messages = append(messages, llms.TextParts(
+				llms.ChatMessageTypeHuman,
+				fmt.Sprintf(budgetNudgePrompt, toolCallsRemaining),
+			))
+		}
 
 		// Call LLM with retry logic
 		debugLog("AgenticClient: calling LLM...")
 		resp, err := callLLMWithRetry(ctx, llm, messages, tools)
 		if err != nil {
 			debugLog("AgenticClient: LLM call failed: %v", err)
-			return nil, fmt.Errorf("LLM call failed after %d retries: %w", maxLLMRetries, err)
+			return messages, nil, fmt.Errorf(
+				"LLM call failed after %d retries: %w",
+				maxLLMRetries,
+				err,
+			)
 		}
 
-		// resp.Choices contains the LLM's response options. Each choice has Content (text)
-		// and/or ToolCalls (function calls the model wants to make). Typically there's
-		// only one choice unless you request multiple completions.
 		if len(resp.Choices) == 0 {
 			debugLog("AgenticClient: no choices in response")
-			return nil, fmt.Errorf("no response from LLM")
+			return messages, nil, fmt.Errorf("no response from LLM")
+		}
+
+		// Log raw response for debugging
+		debugLog("AgenticClient: received response with %d choices", len(resp.Choices))
+		if choicesJSON, err := json.MarshalIndent(resp.Choices, "", "  "); err == nil {
+			debugLog("Raw response Choices:\n%s", string(choicesJSON))
+		}
+
+		// Merge all choices into one (Anthropic returns text and tool calls as separate choices)
+		mergedChoice := llms.ContentChoice{}
+		var allToolCalls []llms.ToolCall
+		var contentParts []string
+
+		for i, ch := range resp.Choices {
+			debugLog("AgenticClient: processing choice %d: Content=%q, ToolCalls=%d",
+				i, truncateString(ch.Content, 100), len(ch.ToolCalls))
+
+			if ch.Content != "" {
+				contentParts = append(contentParts, ch.Content)
+			}
+			if len(ch.ToolCalls) > 0 {
+				allToolCalls = append(allToolCalls, ch.ToolCalls...)
+			}
+			// Use StopReason from first non-empty one
+			if mergedChoice.StopReason == "" && ch.StopReason != "" {
+				mergedChoice.StopReason = ch.StopReason
+			}
+			// Use GenerationInfo from first choice
+			if i == 0 {
+				mergedChoice.GenerationInfo = ch.GenerationInfo
+			}
 		}
 
-		// Use first choice. Google puts all tool calls in choices[0].ToolCalls.
-		// Anthropic creates a separate choice per content block (text or tool_use),
-		// but langchaingo's handleAIMessage only supports Parts[0] as either
-		// TextContent or ToolCall, so we process one choice at a time.
-		choice := resp.Choices[0]
-		debugLog("AgenticClient: received response with %d tool calls", len(choice.ToolCalls))
+		// Build merged choice — join all content parts so nothing is lost
+		// when Anthropic returns multiple text blocks (e.g. thinking + response).
+		if len(contentParts) > 0 {
+			mergedChoice.Content = strings.Join(contentParts, "\n")
+		}
+		mergedChoice.ToolCalls = allToolCalls
+
+		choice := mergedChoice
+		debugLog("AgenticClient: merged choice - Content=%q, ToolCalls=%d",
+			truncateString(choice.Content, 200), len(choice.ToolCalls))
 		if choice.Content != "" {
 			debugLog("AgenticClient: AI message: %s", truncateString(choice.Content, 200))
 		}
 
-		// If no tool calls, check if we have answers
+		// If no tool calls, check if we should nudge the agent
 		if len(choice.ToolCalls) == 0 {
 			debugLog("AgenticClient: no tool calls in response")
 
-			// If we have collected answers, the agent is done
-			if len(answers) > 0 {
-				debugLog("AgenticClient: agent finished with %d answers", len(answers))
-				return answers, nil
-			}
-
 			consecutiveNoTools++
-			debugLog("AgenticClient: consecutive no-tool responses: %d/%d", consecutiveNoTools, maxConsecutiveNoTools)
+			debugLog(
+				"AgenticClient: consecutive no-tool responses: %d/%d",
+				consecutiveNoTools,
+				maxConsecutiveNoTools,
+			)
 			if consecutiveNoTools >= maxConsecutiveNoTools {
-				return nil, fmt.Errorf("agent failed to use tools after %d consecutive attempts", maxConsecutiveNoTools)
+				return messages, nil, fmt.Errorf(
+					"agent failed to use tools after %d consecutive attempts",
+					maxConsecutiveNoTools,
+				)
 			}
 
-			// No answers yet - add the AI response and remind to use tools
+			// Add the AI response and remind to use tools
 			if choice.Content != "" {
 				messages = append(messages, llms.TextParts(llms.ChatMessageTypeAI, choice.Content))
 			}
-			debugLog("AgenticClient: no answers yet, reminding agent to use tools")
-			messages = append(messages, llms.TextParts(llms.ChatMessageTypeHuman,
-				"You are in non-interactive mode. You must start using your tools now to explore the repository. When you have enough information, use submit_answer to provide your answer."))
+			debugLog("AgenticClient: reminding agent to use tools")
+			messages = append(messages, llms.TextParts(
+				llms.ChatMessageTypeHuman,
+				useToolsReminderPrompt,
+			))
 			toolCallsRemaining--
 			continue
 		}
@@ -191,41 +332,85 @@ When ready, use submit_answer. For multiple questions, call submit_answer once p
 		// Reset consecutive no-tool counter when tools are used
 		consecutiveNoTools = 0
 
-		// Build AI message with tool calls
-		aiMessage := llms.MessageContent{
-			Role: llms.ChatMessageTypeAI,
-		}
-		if choice.Content != "" {
-			aiMessage.Parts = append(aiMessage.Parts, llms.TextContent{Text: choice.Content})
-		}
+		// Validate submit_answer is called alone
+		hasSubmitAnswer := false
 		for _, toolCall := range choice.ToolCalls {
-			aiMessage.Parts = append(aiMessage.Parts, toolCall)
+			if toolCall.FunctionCall.Name == "submit_answer" {
+				hasSubmitAnswer = true
+				break
+			}
+		}
+		if hasSubmitAnswer && len(choice.ToolCalls) > 1 {
+			debugLog("AgenticClient: submit_answer called with other tools - rejecting all")
+			// Add a single AI message with ALL tool calls so every
+			// tool_result below has a matching tool_use in the preceding
+			// assistant message.
+			aiParts := make([]llms.ContentPart, len(choice.ToolCalls))
+			for i, tc := range choice.ToolCalls {
+				aiParts[i] = tc
+			}
+			aiMessage := llms.MessageContent{
+				Role:  llms.ChatMessageTypeAI,
+				Parts: aiParts,
+			}
+			messages = append(messages, aiMessage)
+			for _, toolCall := range choice.ToolCalls {
+				toolCallsRemaining--
+				errorResponse := llms.MessageContent{
+					Role: llms.ChatMessageTypeTool,
+					Parts: []llms.ContentPart{
+						llms.ToolCallResponse{
+							ToolCallID: toolCall.ID,
+							Name:       toolCall.FunctionCall.Name,
+							Content:    submitAnswerAloneError,
+						},
+					},
+				}
+				messages = append(messages, errorResponse)
+			}
+			continue
 		}
-		messages = append(messages, aiMessage)
 
-		// Process tool calls
+		// Process each tool call as a separate AI message + tool result pair.
+		// langchaingo's Anthropic handleAIMessage only serializes Parts[0], so
+		// putting all tool calls in one message loses everything after the first.
+		// Interleaving ensures each tool_use has a matching tool_result in the
+		// immediately following user message.
 		for i, toolCall := range choice.ToolCalls {
 			toolCallsRemaining--
-			response, answer := processToolCall(toolCall, i, len(choice.ToolCalls), len(answers), executor)
+
+			aiMessage := llms.MessageContent{
+				Role:  llms.ChatMessageTypeAI,
+				Parts: []llms.ContentPart{toolCall},
+			}
+			messages = append(messages, aiMessage)
+
+			response, answer := processToolCall(toolCall, i, len(choice.ToolCalls), executor)
 			messages = append(messages, response)
 			if answer != nil {
-				answers = append(answers, *answer)
+				debugLog("AgenticClient: received answer for question %d", questionIndex+1)
+				return messages, answer, nil
 			}
 		}
 	}
 
-	// If we collected some answers but ran out of tool calls, return what we have
-	if len(answers) > 0 {
-		debugLog("AgenticClient: ran out of tool calls, returning %d answers", len(answers))
-		return answers, nil
-	}
-
-	return nil, fmt.Errorf("exceeded maximum tool calls (%d), agent did not complete", maxToolCalls)
+	// Budget exhausted without answer
+	debugLog("AgenticClient: question %d exhausted budget", questionIndex+1)
+	return messages, nil, nil
 }
 
 // processToolCall processes a single tool call and returns the response message and optional answer
-func processToolCall(toolCall llms.ToolCall, index, total, currentAnswerCount int, executor *toolExecutor) (llms.MessageContent, *AnswerSchema) {
-	debugLog("AgenticClient: [%d/%d] executing tool: %s", index+1, total, toolCall.FunctionCall.Name)
+func processToolCall(
+	toolCall llms.ToolCall,
+	index, total int,
+	executor *toolExecutor,
+) (llms.MessageContent, *AnswerSchema) {
+	debugLog(
+		"AgenticClient: [%d/%d] executing tool: %s",
+		index+1,
+		total,
+		toolCall.FunctionCall.Name,
+	)
 	debugLog("AgenticClient: tool args: %s", truncateString(toolCall.FunctionCall.Arguments, 500))
 
 	// Check for submit_answer
@@ -240,13 +425,16 @@ func processToolCall(toolCall llms.ToolCall, index, total, currentAnswerCount in
 					llms.ToolCallResponse{
 						ToolCallID: toolCall.ID,
 						Name:       toolCall.FunctionCall.Name,
-						Content:    fmt.Sprintf("Error parsing answer: %v. Please try again with valid JSON.", err),
+						Content: fmt.Sprintf(
+							"Error parsing answer: %v. Please try again with valid JSON.",
+							err,
+						),
 					},
 				},
 			}, nil
 		}
-		debugLog("AgenticClient: received answer #%d: short_answer=%v, answer=%s",
-			currentAnswerCount+1, answer.ShortAnswer, truncateString(answer.Answer, 100))
+		debugLog("AgenticClient: received answer: short_answer=%v, answer=%s",
+			answer.ShortAnswer, truncateString(answer.Answer, 100))
 
 		// Return success response and the answer
 		return llms.MessageContent{
@@ -255,7 +443,7 @@ func processToolCall(toolCall llms.ToolCall, index, total, currentAnswerCount in
 				llms.ToolCallResponse{
 					ToolCallID: toolCall.ID,
 					Name:       toolCall.FunctionCall.Name,
-					Content:    "Answer recorded successfully. If you have answered all questions, respond with a plain text message saying 'I am finished'. Otherwise, continue with the next question.",
+					Content:    "Answer recorded successfully.",
 				},
 			},
 		}, &answer
@@ -281,7 +469,12 @@ func processToolCall(toolCall llms.ToolCall, index, total, currentAnswerCount in
 }
 
 // callLLMWithRetry calls the LLM with retry logic for transient errors
-func callLLMWithRetry(ctx context.Context, llm llms.Model, messages []llms.MessageContent, tools []llms.Tool) (*llms.ContentResponse, error) {
+func callLLMWithRetry(
+	ctx context.Context,
+	llm llms.Model,
+	messages []llms.MessageContent,
+	tools []llms.Tool,
+) (*llms.ContentResponse, error) {
 	var lastErr error
 	for attempt := 1; attempt <= maxLLMRetries; attempt++ {
 		resp, err := llm.GenerateContent(ctx, messages, llms.WithTools(tools))
@@ -327,6 +520,9 @@ func initLLM(ctx context.Context, opts *AgenticCallOptions) (llms.Model, error)
 			openai.WithModel(opts.Model),
 		)
 	default:
-		return nil, fmt.Errorf("unsupported provider: %s (supported: google, anthropic, openai)", opts.Provider)
+		return nil, fmt.Errorf(
+			"unsupported provider: %s (supported: google, anthropic, openai)",
+			opts.Provider,
+		)
 	}
 }
diff --git a/pkg/llmclient/agentic_client_integration_test.go b/pkg/llmclient/agentic_client_integration_test.go
index 549aa77a..b80381dc 100644
--- a/pkg/llmclient/agentic_client_integration_test.go
+++ b/pkg/llmclient/agentic_client_integration_test.go
@@ -6,169 +6,196 @@ import (
 	"path/filepath"
 	"testing"
 
-	"github.com/grafana/plugin-validator/pkg/logme"
 	"github.com/grafana/plugin-validator/pkg/prettyprint"
 	"github.com/stretchr/testify/require"
 )
 
-func hasGeminiAPIKey() bool {
-	return os.Getenv("GEMINI_API_KEY") != ""
+type providerConfig struct {
+	name     string
+	provider string
+	model    string
+	envKey   string
 }
 
-func hasAnthropicAPIKey() bool {
-	return os.Getenv("ANTHROPIC_API_KEY") != ""
+var providers = []providerConfig{
+	{name: "Gemini", provider: "google", model: "gemini-2.5-flash", envKey: "GEMINI_API_KEY"},
+	{name: "Anthropic", provider: "anthropic", model: "claude-haiku-4-5", envKey: "ANTHROPIC_API_KEY"},
+	{name: "OpenAI", provider: "openai", model: "gpt-5-mini", envKey: "OPENAI_API_KEY"},
 }
 
-// TestAgenticClient_NoFilesystemAccess tests that the agent correctly identifies
-// when an application does NOT access the filesystem
-func TestAgenticClient_NoFilesystemAccess(t *testing.T) {
-	if !hasGeminiAPIKey() {
-		t.Skip("GEMINI_API_KEY not set, skipping agentic client integration test")
+func skipIfMissingKey(t *testing.T, p providerConfig) {
+	t.Helper()
+	if os.Getenv(p.envKey) == "" || os.Getenv("DEBUG") != "1" {
+		t.Skipf("%s not set or DEBUG!=1, skipping %s integration test", p.envKey, p.name)
 	}
+}
+
+func newClient(t *testing.T, p providerConfig) AgenticClient {
+	t.Helper()
+	client, err := NewAgenticClient(&AgenticCallOptions{
+		Provider: p.provider,
+		Model:    p.model,
+		APIKey:   os.Getenv(p.envKey),
+	})
+	require.NoError(t, err)
+	return client
+}
+
+func TestAgenticClient_EmptyQuestions(t *testing.T) {
+	for _, p := range providers {
+		t.Run(p.name, func(t *testing.T) {
+			skipIfMissingKey(t, p)
+
+			client := newClient(t, p)
 
-	opts := &AgenticCallOptions{
-		Provider: "google",
-		Model:    "gemini-2.0-flash",
-		APIKey:   os.Getenv("GEMINI_API_KEY"),
+			testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access"))
+			require.NoError(t, err)
+
+			answers, err := client.CallLLM(context.Background(), []string{}, testDataPath)
+			require.Error(t, err, "Empty questions should return error")
+			require.Contains(t, err.Error(), "at least one question is required")
+			require.Nil(t, answers)
+		})
 	}
+}
 
-	client, err := NewAgenticClient(opts)
-	require.NoError(t, err)
+func TestAgenticClient_NoFilesystemAccess(t *testing.T) {
+	for _, p := range providers {
+		t.Run(p.name, func(t *testing.T) {
+			skipIfMissingKey(t, p)
 
-	testDataPath, err := filepath.Abs(filepath.Join("testdata", "no_fs_access"))
-	require.NoError(t, err)
+			client := newClient(t, p)
+
+			testDataPath, err := filepath.Abs(filepath.Join("testdata", "no_fs_access"))
+			require.NoError(t, err)
 
-	prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations."
+			prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations."
 
-	answers, err := client.CallLLM(context.Background(), prompt, testDataPath)
-	logme.DebugFln("Agent answers:")
-	prettyprint.Print(answers)
+			answers, err := client.CallLLM(context.Background(), []string{prompt}, testDataPath)
+			prettyprint.Print(answers)
 
-	require.NoError(t, err, "CallLLM should not return error")
-	require.Len(t, answers, 1, "Should return exactly one answer")
+			require.NoError(t, err, "CallLLM should not return error")
+			require.Len(t, answers, 1, "Should return exactly one answer")
 
-	answer := answers[0]
-	require.NotEmpty(t, answer.Answer, "Answer field should be populated")
-	require.Equal(t, false, answer.ShortAnswer, "ShortAnswer should be false - this app does not access the filesystem")
+			answer := answers[0]
+			require.Equal(t, prompt, answer.Question, "Question field should match input question")
+			require.NotEmpty(t, answer.Answer, "Answer field should be populated")
+			require.Equal(t, false, answer.ShortAnswer,
+				"ShortAnswer should be false - this app does not access the filesystem")
 
-	t.Logf("Agent Answer: %s", answer.Answer)
-	t.Logf("Short Answer: %v", answer.ShortAnswer)
-	if len(answer.Files) > 0 {
-		t.Logf("Files: %v", answer.Files)
+			t.Logf("Agent Answer: %s", answer.Answer)
+			t.Logf("Short Answer: %v", answer.ShortAnswer)
+			if len(answer.Files) > 0 {
+				t.Logf("Files: %v", answer.Files)
+			}
+		})
 	}
 }
 
-// TestAgenticClient_FilesystemAccess tests that the agent correctly identifies
-// when an application DOES access the filesystem
 func TestAgenticClient_FilesystemAccess(t *testing.T) {
-	if !hasGeminiAPIKey() {
-		t.Skip("GEMINI_API_KEY not set, skipping agentic client integration test")
-	}
+	for _, p := range providers {
+		t.Run(p.name, func(t *testing.T) {
+			skipIfMissingKey(t, p)
 
-	opts := &AgenticCallOptions{
-		Provider: "google",
-		Model:    "gemini-2.0-flash",
-		APIKey:   os.Getenv("GEMINI_API_KEY"),
-	}
-
-	client, err := NewAgenticClient(opts)
-	require.NoError(t, err)
+			client := newClient(t, p)
 
-	testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access"))
-	require.NoError(t, err)
+			testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access"))
+			require.NoError(t, err)
 
-	prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations."
+			prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations."
 
-	answers, err := client.CallLLM(context.Background(), prompt, testDataPath)
-	logme.DebugFln("Agent answers:")
-	prettyprint.Print(answers)
+			answers, err := client.CallLLM(context.Background(), []string{prompt}, testDataPath)
+			prettyprint.Print(answers)
 
-	require.NoError(t, err, "CallLLM should not return error")
-	require.Len(t, answers, 1, "Should return exactly one answer")
+			require.NoError(t, err, "CallLLM should not return error")
+			require.Len(t, answers, 1, "Should return exactly one answer")
 
-	answer := answers[0]
-	require.NotEmpty(t, answer.Answer, "Answer field should be populated")
-	require.Equal(t, true, answer.ShortAnswer, "ShortAnswer should be true - this app accesses the filesystem via os.ReadFile")
+			answer := answers[0]
+			require.Equal(t, prompt, answer.Question, "Question field should match input question")
+			require.NotEmpty(t, answer.Answer, "Answer field should be populated")
+			require.Equal(t, true, answer.ShortAnswer,
+				"ShortAnswer should be true - this app accesses the filesystem via os.ReadFile")
 
-	t.Logf("Agent Answer: %s", answer.Answer)
-	t.Logf("Short Answer: %v", answer.ShortAnswer)
-	if len(answer.Files) > 0 {
-		t.Logf("Files: %v", answer.Files)
+			t.Logf("Agent Answer: %s", answer.Answer)
+			t.Logf("Short Answer: %v", answer.ShortAnswer)
+			if len(answer.Files) > 0 {
+				t.Logf("Files: %v", answer.Files)
+			}
+		})
 	}
 }
 
-// TestAgenticClient_NoFilesystemAccess_Anthropic tests the same scenario using Anthropic Claude
-func TestAgenticClient_NoFilesystemAccess_Anthropic(t *testing.T) {
-	if !hasAnthropicAPIKey() {
-		t.Skip("ANTHROPIC_API_KEY not set, skipping Anthropic agentic client integration test")
-	}
+func TestAgenticClient_TwoQuestions(t *testing.T) {
+	for _, p := range providers {
+		t.Run(p.name, func(t *testing.T) {
+			skipIfMissingKey(t, p)
 
-	opts := &AgenticCallOptions{
-		Provider: "anthropic",
-		Model:    "claude-sonnet-4-5",
-		APIKey:   os.Getenv("ANTHROPIC_API_KEY"),
-	}
+			client := newClient(t, p)
 
-	client, err := NewAgenticClient(opts)
-	require.NoError(t, err)
+			testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access"))
+			require.NoError(t, err)
 
-	testDataPath, err := filepath.Abs(filepath.Join("testdata", "no_fs_access"))
-	require.NoError(t, err)
+			questions := []string{
+				"Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations.",
+				"Which specific files contain the filesystem operations and what operations do they perform?",
+			}
 
-	prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations."
+			answers, err := client.CallLLM(context.Background(), questions, testDataPath)
+			prettyprint.Print(answers)
 
-	answers, err := client.CallLLM(context.Background(), prompt, testDataPath)
-	logme.DebugFln("Agent answers:")
-	prettyprint.Print(answers)
+			require.NoError(t, err, "CallLLM should not return error")
+			require.Len(t, answers, 2, "Should return exactly two answers")
 
-	require.NoError(t, err, "CallLLM should not return error")
-	require.Len(t, answers, 1, "Should return exactly one answer")
+			require.Equal(t, questions[0], answers[0].Question, "First answer's question should match")
+			require.NotEmpty(t, answers[0].Answer, "First answer should be populated")
+			require.Equal(t, true, answers[0].ShortAnswer,
+				"First answer should be true - app accesses filesystem")
 
-	answer := answers[0]
-	require.NotEmpty(t, answer.Answer, "Answer field should be populated")
-	require.Equal(t, false, answer.ShortAnswer, "ShortAnswer should be false - this app does not access the filesystem")
+			require.Equal(t, questions[1], answers[1].Question, "Second answer's question should match")
+			require.NotEmpty(t, answers[1].Answer, "Second answer should be populated")
 
-	t.Logf("Agent Answer: %s", answer.Answer)
-	t.Logf("Short Answer: %v", answer.ShortAnswer)
-	if len(answer.Files) > 0 {
-		t.Logf("Files: %v", answer.Files)
+			t.Logf("Answer 1: %s", answers[0].Answer)
+			t.Logf("Answer 2: %s", answers[1].Answer)
+		})
 	}
 }
 
-// TestAgenticClient_FilesystemAccess_Anthropic tests the same scenario using Anthropic Claude
-func TestAgenticClient_FilesystemAccess_Anthropic(t *testing.T) {
-	if !hasAnthropicAPIKey() {
-		t.Skip("ANTHROPIC_API_KEY not set, skipping Anthropic agentic client integration test")
-	}
+func TestAgenticClient_ThreeQuestions(t *testing.T) {
+	for _, p := range providers {
+		t.Run(p.name, func(t *testing.T) {
+			skipIfMissingKey(t, p)
 
-	opts := &AgenticCallOptions{
-		Provider: "anthropic",
-		Model:    "claude-sonnet-4-5",
-		APIKey:   os.Getenv("ANTHROPIC_API_KEY"),
-	}
+			client := newClient(t, p)
 
-	client, err := NewAgenticClient(opts)
-	require.NoError(t, err)
+			testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access"))
+			require.NoError(t, err)
 
-	testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access"))
-	require.NoError(t, err)
+			questions := []string{
+				"Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations.",
+				"Which specific files contain the filesystem operations and what operations do they perform?",
+				"Does this application use any caching mechanisms? If so, describe how the cache works.",
+			}
+
+			answers, err := client.CallLLM(context.Background(), questions, testDataPath)
+			prettyprint.Print(answers)
 
-	prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations."
+			require.NoError(t, err, "CallLLM should not return error")
+			require.Len(t, answers, 3, "Should return exactly three answers")
 
-	answers, err := client.CallLLM(context.Background(), prompt, testDataPath)
-	logme.DebugFln("Agent answers:")
-	prettyprint.Print(answers)
+			require.Equal(t, questions[0], answers[0].Question, "First answer's question should match")
+			require.NotEmpty(t, answers[0].Answer, "First answer should be populated")
+			require.Equal(t, true, answers[0].ShortAnswer,
+				"First answer should be true - app accesses filesystem")
 
-	require.NoError(t, err, "CallLLM should not return error")
-	require.Len(t, answers, 1, "Should return exactly one answer")
+			require.Equal(t, questions[1], answers[1].Question, "Second answer's question should match")
+			require.NotEmpty(t, answers[1].Answer, "Second answer should be populated")
 
-	answer := answers[0]
-	require.NotEmpty(t, answer.Answer, "Answer field should be populated")
-	require.Equal(t, true, answer.ShortAnswer, "ShortAnswer should be true - this app accesses the filesystem via os.ReadFile")
+			require.Equal(t, questions[2], answers[2].Question, "Third answer's question should match")
+			require.NotEmpty(t, answers[2].Answer, "Third answer should be populated")
 
-	t.Logf("Agent Answer: %s", answer.Answer)
-	t.Logf("Short Answer: %v", answer.ShortAnswer)
-	if len(answer.Files) > 0 {
-		t.Logf("Files: %v", answer.Files)
+			t.Logf("Answer 1: %s", answers[0].Answer)
+			t.Logf("Answer 2: %s", answers[1].Answer)
+			t.Logf("Answer 3: %s", answers[2].Answer)
+		})
 	}
 }
diff --git a/pkg/llmclient/agentic_debug.go b/pkg/llmclient/agentic_debug.go
index bf6f5050..7c93618d 100644
--- a/pkg/llmclient/agentic_debug.go
+++ b/pkg/llmclient/agentic_debug.go
@@ -7,7 +7,6 @@ import (
 	"os"
 	"path/filepath"
 	"sync"
-	"time"
 )
 
 var (
@@ -24,8 +23,7 @@ func initDebugLogger() {
 			return
 		}
 
-		timestamp := time.Now().Format("20060102-150405")
-		debugPath = filepath.Join(os.TempDir(), fmt.Sprintf("validator-agentic-%s.log", timestamp))
+		debugPath = filepath.Join(os.TempDir(), "validator-agentic.log")
 
 		f, err := os.OpenFile(debugPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
 		if err != nil {
diff --git a/pkg/llmclient/agentic_tools.go b/pkg/llmclient/agentic_tools.go
index dd0fd81c..5afb5e6f 100644
--- a/pkg/llmclient/agentic_tools.go
+++ b/pkg/llmclient/agentic_tools.go
@@ -135,7 +135,7 @@ func buildAgenticTools() []llms.Tool {
 						},
 						"short_answer": map[string]interface{}{
 							"type":        "boolean",
-							"description": "A boolean true/false answer to yes/no questions",
+							"description": "A boolean answer to the question: true means YES, false means NO. For example, if the question is 'Is the sky blue?' the short_answer is true. If the question is 'Is the sky green?' the short_answer is false.",
 						},
 						"files": map[string]interface{}{
 							"type":        "array",

From b8789906d4305f30f1e52347c1133916b0a6d9a4 Mon Sep 17 00:00:00 2001
From: Esteban Beltran <sergies@gmail.com>
Date: Tue, 3 Mar 2026 18:04:49 +0100
Subject: [PATCH 02/10] remove verbosity in tests

---
 .../agentic_client_integration_test.go        | 24 -------------------
 1 file changed, 24 deletions(-)

diff --git a/pkg/llmclient/agentic_client_integration_test.go b/pkg/llmclient/agentic_client_integration_test.go
index b80381dc..8c4ea088 100644
--- a/pkg/llmclient/agentic_client_integration_test.go
+++ b/pkg/llmclient/agentic_client_integration_test.go
@@ -6,7 +6,6 @@ import (
 	"path/filepath"
 	"testing"
 
-	"github.com/grafana/plugin-validator/pkg/prettyprint"
 	"github.com/stretchr/testify/require"
 )
 
@@ -72,7 +71,6 @@ func TestAgenticClient_NoFilesystemAccess(t *testing.T) {
 			prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations."
 
 			answers, err := client.CallLLM(context.Background(), []string{prompt}, testDataPath)
-			prettyprint.Print(answers)
 
 			require.NoError(t, err, "CallLLM should not return error")
 			require.Len(t, answers, 1, "Should return exactly one answer")
@@ -82,12 +80,6 @@ func TestAgenticClient_NoFilesystemAccess(t *testing.T) {
 			require.NotEmpty(t, answer.Answer, "Answer field should be populated")
 			require.Equal(t, false, answer.ShortAnswer,
 				"ShortAnswer should be false - this app does not access the filesystem")
-
-			t.Logf("Agent Answer: %s", answer.Answer)
-			t.Logf("Short Answer: %v", answer.ShortAnswer)
-			if len(answer.Files) > 0 {
-				t.Logf("Files: %v", answer.Files)
-			}
 		})
 	}
 }
@@ -105,7 +97,6 @@ func TestAgenticClient_FilesystemAccess(t *testing.T) {
 			prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations."
 
 			answers, err := client.CallLLM(context.Background(), []string{prompt}, testDataPath)
-			prettyprint.Print(answers)
 
 			require.NoError(t, err, "CallLLM should not return error")
 			require.Len(t, answers, 1, "Should return exactly one answer")
@@ -115,12 +106,6 @@ func TestAgenticClient_FilesystemAccess(t *testing.T) {
 			require.NotEmpty(t, answer.Answer, "Answer field should be populated")
 			require.Equal(t, true, answer.ShortAnswer,
 				"ShortAnswer should be true - this app accesses the filesystem via os.ReadFile")
-
-			t.Logf("Agent Answer: %s", answer.Answer)
-			t.Logf("Short Answer: %v", answer.ShortAnswer)
-			if len(answer.Files) > 0 {
-				t.Logf("Files: %v", answer.Files)
-			}
 		})
 	}
 }
@@ -141,7 +126,6 @@ func TestAgenticClient_TwoQuestions(t *testing.T) {
 			}
 
 			answers, err := client.CallLLM(context.Background(), questions, testDataPath)
-			prettyprint.Print(answers)
 
 			require.NoError(t, err, "CallLLM should not return error")
 			require.Len(t, answers, 2, "Should return exactly two answers")
@@ -153,9 +137,6 @@ func TestAgenticClient_TwoQuestions(t *testing.T) {
 
 			require.Equal(t, questions[1], answers[1].Question, "Second answer's question should match")
 			require.NotEmpty(t, answers[1].Answer, "Second answer should be populated")
-
-			t.Logf("Answer 1: %s", answers[0].Answer)
-			t.Logf("Answer 2: %s", answers[1].Answer)
 		})
 	}
 }
@@ -177,7 +158,6 @@ func TestAgenticClient_ThreeQuestions(t *testing.T) {
 			}
 
 			answers, err := client.CallLLM(context.Background(), questions, testDataPath)
-			prettyprint.Print(answers)
 
 			require.NoError(t, err, "CallLLM should not return error")
 			require.Len(t, answers, 3, "Should return exactly three answers")
@@ -192,10 +172,6 @@ func TestAgenticClient_ThreeQuestions(t *testing.T) {
 
 			require.Equal(t, questions[2], answers[2].Question, "Third answer's question should match")
 			require.NotEmpty(t, answers[2].Answer, "Third answer should be populated")
-
-			t.Logf("Answer 1: %s", answers[0].Answer)
-			t.Logf("Answer 2: %s", answers[1].Answer)
-			t.Logf("Answer 3: %s", answers[2].Answer)
 		})
 	}
 }

From 8ee25498070174b6ef71064464dc662a69431bf8 Mon Sep 17 00:00:00 2001
From: Esteban Beltran <sergies@gmail.com>
Date: Tue, 3 Mar 2026 18:18:59 +0100
Subject: [PATCH 03/10] add strict toppenaio

---
 pkg/llmclient/agentic_tools.go | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pkg/llmclient/agentic_tools.go b/pkg/llmclient/agentic_tools.go
index 5afb5e6f..6124bcfc 100644
--- a/pkg/llmclient/agentic_tools.go
+++ b/pkg/llmclient/agentic_tools.go
@@ -126,6 +126,7 @@ func buildAgenticTools() []llms.Tool {
 			Function: &llms.FunctionDefinition{
 				Name:        "submit_answer",
 				Description: "Submit your final answer to the question. Use this when you have gathered enough information.",
+				Strict: true,
 				Parameters: map[string]interface{}{
 					"type": "object",
 					"properties": map[string]interface{}{
@@ -140,14 +141,15 @@ func buildAgenticTools() []llms.Tool {
 						"files": map[string]interface{}{
 							"type":        "array",
 							"items":       map[string]interface{}{"type": "string"},
-							"description": "List of relevant files (optional)",
+							"description": "List of relevant files. Pass an empty array if not relevant.",
 						},
 						"code_snippet": map[string]interface{}{
 							"type":        "string",
-							"description": "A relevant code snippet (optional)",
+							"description": "A relevant code snippet. Pass an empty string if not relevant.",
 						},
 					},
-					"required": []string{"answer", "short_answer"},
+					"required":             []string{"answer", "short_answer", "files", "code_snippet"},
+					"additionalProperties": false,
 				},
 			},
 		},

From 77dfb068f48696bb2ecbe302ef50f59df5bedc48 Mon Sep 17 00:00:00 2001
From: Esteban Beltran <sergies@gmail.com>
Date: Wed, 4 Mar 2026 14:23:37 +0100
Subject: [PATCH 04/10] update

---
 docs/anthropic-choices-behavior.md            | 70 ++++++++++++++++++
 pkg/llmclient/agentic_client.go               | 74 +++++++++----------
 .../agentic_client_integration_test.go        | 42 +++++++++--
 3 files changed, 143 insertions(+), 43 deletions(-)
 create mode 100644 docs/anthropic-choices-behavior.md

diff --git a/docs/anthropic-choices-behavior.md b/docs/anthropic-choices-behavior.md
new file mode 100644
index 00000000..a578e4c3
--- /dev/null
+++ b/docs/anthropic-choices-behavior.md
@@ -0,0 +1,70 @@
+# Anthropic Choices and Message Serialization in go-langchain
+
+## Overview
+Anthropic's response structure and go-langchain's serialization behavior require special handling when building multi-turn conversations with tool use.
+
+## Response Structure (Anthropic → go-langchain)
+
+Anthropic API returns responses as an array of **content blocks**:
+```
+[text_block, tool_use_block, tool_use_block, ...]
+```
+
+go-langchain converts each content block into a **separate ContentChoice**:
+- `type: "text"` → `ContentChoice{Content: "...", ToolCalls: []}`
+- `type: "tool_use"` → `ContentChoice{Content: "", ToolCalls: [{...}]}`
+- `type: "thinking"` → `ContentChoice{Content: "", GenerationInfo: {...}}`
+
+**Key insight:** One Anthropic response can produce multiple Choices. For example:
+- Response with text + 2 tool calls → 3 Choices
+- Response with just text → 1 Choice
+
+## Serialization Constraint (go-langchain → Anthropic)
+
+The critical limitation is in `handleAIMessage()`:
+```go
+if toolCall, ok := msg.Parts[0].(llms.ToolCall); ok {
+    // Only Parts[0] is serialized!
+}
+```
+
+**This means:**
+- Only `Parts[0]` of a MessageContent is serialized back to Anthropic
+- If you create `MessageContent{Parts: [toolCall1, toolCall2]}`, only `toolCall1` is sent
+- Multiple ToolCalls in one message **will lose data**
+
+## Required Pattern: Interleaved Messages
+
+To work around this limitation, tool calls must be **interleaved** as separate messages:
+
+```
+AI message: Parts[toolCall1]
+Tool message: Parts[toolResult1]
+AI message: Parts[toolCall2]
+Tool message: Parts[toolResult2]
+```
+
+Not:
+```
+AI message: Parts[toolCall1, toolCall2]  // toolCall2 would be lost!
+Tool message: Parts[toolResult1, toolResult2]
+```
+
+## Why Merging Choices is Necessary
+
+When processing Anthropic's response:
+1. Anthropic returns separate content blocks (potentially text + multiple tools)
+2. go-langchain creates one Choice per block
+3. We must merge these Choices to get the complete response
+4. Then we must split them back into individual AI messages for serialization
+
+The merge preserves all information for processing, but the split ensures proper serialization.
+
+## Implementation Details in agentic_client.go
+
+The choice-merging code performs this merge:
+- Collects all content parts from separate Choices
+- Collects all ToolCalls from separate Choices
+- Creates one merged view for processing
+
+Then later in the tool call processing, it **reverses** this by creating one AI message per ToolCall to avoid the serialization bug.
diff --git a/pkg/llmclient/agentic_client.go b/pkg/llmclient/agentic_client.go
index e41ab881..a8bac0d4 100644
--- a/pkg/llmclient/agentic_client.go
+++ b/pkg/llmclient/agentic_client.go
@@ -38,8 +38,6 @@ You can only use one tool at a time.
 IMPORTANT: You are in non-interactive mode. No one will read your text answers, only tools.
 When you have gathered enough information, use submit_answer to provide your answer.`
 
-	questionAppendPrompt = `Start by listing the files in the repository and exploring the contents.`
-
 	budgetNudgePrompt = `You have only %d tool calls remaining. Wrap up your investigation and call submit_answer now with whatever information you have gathered so far.`
 
 	useToolsReminderPrompt = `You are in non-interactive mode. You must start using your tools now to explore the repository. When you have enough information, use submit_answer to provide your answer.`
@@ -73,6 +71,8 @@ type agenticClientImpl struct {
 	apiKey   string
 	model    string
 	provider string
+	tools    []llms.Tool
+	executor *toolExecutor
 }
 
 // NewAgenticClient creates a new AgenticClient with the given options
@@ -119,11 +119,9 @@ func (c *agenticClientImpl) CallLLM(
 		return nil, fmt.Errorf("failed to initialize LLM: %w", err)
 	}
 
-	// Build tools
-	tools := buildAgenticTools()
-
-	// Create tool executor
-	executor := newToolExecutor(repositoryPath)
+	// Initialize tools and executor for this repository
+	c.tools = buildAgenticTools()
+	c.executor = newToolExecutor(repositoryPath)
 
 	// Build initial messages with system prompt only (no user message yet)
 	messages := []llms.MessageContent{
@@ -152,14 +150,13 @@ func (c *agenticClientImpl) CallLLM(
 		debugLog("Question: %s", truncateString(question, 200))
 
 		originalQuestion := question
-		question = fmt.Sprintf("%s\n\n%s", question, questionAppendPrompt)
 
 		// Determine budget for this question
-		budget := maxToolCallsFirstQuestion
+		toolsBudget := maxToolCallsFirstQuestion
 		if questionIndex > 0 {
-			budget = maxToolCallsFollowUp
+			toolsBudget = maxToolCallsFollowUp
 		}
-		debugLog("Budget: %d tool calls", budget)
+		debugLog("Budget: %d tool calls", toolsBudget)
 
 		// Add the question as a human message
 		messages = append(messages, llms.TextParts(llms.ChatMessageTypeHuman, question))
@@ -169,9 +166,7 @@ func (c *agenticClientImpl) CallLLM(
 			ctx,
 			llm,
 			messages,
-			tools,
-			executor,
-			budget,
+			toolsBudget,
 			questionIndex,
 		)
 		messages = updatedMessages
@@ -212,12 +207,10 @@ func (c *agenticClientImpl) runQuestionLoop(
 	ctx context.Context,
 	llm llms.Model,
 	messages []llms.MessageContent,
-	tools []llms.Tool,
-	executor *toolExecutor,
-	budget int,
+	toolsBudget int,
 	questionIndex int,
 ) ([]llms.MessageContent, *AnswerSchema, error) {
-	toolCallsRemaining := budget
+	toolCallsRemaining := toolsBudget
 	consecutiveNoTools := 0
 	iteration := 0
 
@@ -239,7 +232,7 @@ func (c *agenticClientImpl) runQuestionLoop(
 
 		// Call LLM with retry logic
 		debugLog("AgenticClient: calling LLM...")
-		resp, err := callLLMWithRetry(ctx, llm, messages, tools)
+		resp, err := c.callLLMWithRetry(ctx, llm, messages)
 		if err != nil {
 			debugLog("AgenticClient: LLM call failed: %v", err)
 			return messages, nil, fmt.Errorf(
@@ -254,13 +247,18 @@ func (c *agenticClientImpl) runQuestionLoop(
 			return messages, nil, fmt.Errorf("no response from LLM")
 		}
 
-		// Log raw response for debugging
-		debugLog("AgenticClient: received response with %d choices", len(resp.Choices))
-		if choicesJSON, err := json.MarshalIndent(resp.Choices, "", "  "); err == nil {
-			debugLog("Raw response Choices:\n%s", string(choicesJSON))
-		}
-
-		// Merge all choices into one (Anthropic returns text and tool calls as separate choices)
+		// Merge all choices into one unified view for processing.
+		// 
+		// Background: Anthropic's API returns separate content blocks (text, tool_use, thinking)
+		// which go-langchain converts into separate ContentChoice objects. For example, a response
+		// with text + 2 tool calls becomes 3 separate Choices.
+		//
+		// We merge them here to process the complete response, but later (around line 360) we must
+		// split them back into separate AI messages because go-langchain's handleAIMessage() only
+		// serializes Parts[0] when sending back to Anthropic. Putting multiple tool calls in one
+		// message would lose all but the first.
+		//
+		// See docs/anthropic-choices-behavior.md for detailed explanation of this pattern.
 		mergedChoice := llms.ContentChoice{}
 		var allToolCalls []llms.ToolCall
 		var contentParts []string
@@ -372,10 +370,14 @@ func (c *agenticClientImpl) runQuestionLoop(
 		}
 
 		// Process each tool call as a separate AI message + tool result pair.
-		// langchaingo's Anthropic handleAIMessage only serializes Parts[0], so
-		// putting all tool calls in one message loses everything after the first.
-		// Interleaving ensures each tool_use has a matching tool_result in the
-		// immediately following user message.
+		// This is the "split" part of the merge-then-split pattern.
+		//
+		// Why: go-langchain's Anthropic handleAIMessage() only serializes Parts[0], so
+		// MessageContent{Parts: [toolCall1, toolCall2]} would lose toolCall2 when sent back.
+		// By creating one AI message per tool call, we ensure all tool calls are properly
+		// serialized. Each tool_use then has its matching tool_result in the following message.
+		//
+		// See docs/anthropic-choices-behavior.md for details on this serialization constraint.
 		for i, toolCall := range choice.ToolCalls {
 			toolCallsRemaining--
 
@@ -385,7 +387,7 @@ func (c *agenticClientImpl) runQuestionLoop(
 			}
 			messages = append(messages, aiMessage)
 
-			response, answer := processToolCall(toolCall, i, len(choice.ToolCalls), executor)
+			response, answer := c.processToolCall(toolCall, i, len(choice.ToolCalls))
 			messages = append(messages, response)
 			if answer != nil {
 				debugLog("AgenticClient: received answer for question %d", questionIndex+1)
@@ -400,10 +402,9 @@ func (c *agenticClientImpl) runQuestionLoop(
 }
 
 // processToolCall processes a single tool call and returns the response message and optional answer
-func processToolCall(
+func (c *agenticClientImpl) processToolCall(
 	toolCall llms.ToolCall,
 	index, total int,
-	executor *toolExecutor,
 ) (llms.MessageContent, *AnswerSchema) {
 	debugLog(
 		"AgenticClient: [%d/%d] executing tool: %s",
@@ -450,7 +451,7 @@ func processToolCall(
 	}
 
 	// Execute other tools
-	result, err := executor.execute(toolCall.FunctionCall.Name, toolCall.FunctionCall.Arguments)
+	result, err := c.executor.execute(toolCall.FunctionCall.Name, toolCall.FunctionCall.Arguments)
 	if err != nil {
 		result = fmt.Sprintf("Error: %v", err)
 	}
@@ -469,15 +470,14 @@ func processToolCall(
 }
 
 // callLLMWithRetry calls the LLM with retry logic for transient errors
-func callLLMWithRetry(
+func (c *agenticClientImpl) callLLMWithRetry(
 	ctx context.Context,
 	llm llms.Model,
 	messages []llms.MessageContent,
-	tools []llms.Tool,
 ) (*llms.ContentResponse, error) {
 	var lastErr error
 	for attempt := 1; attempt <= maxLLMRetries; attempt++ {
-		resp, err := llm.GenerateContent(ctx, messages, llms.WithTools(tools))
+		resp, err := llm.GenerateContent(ctx, messages, llms.WithTools(c.tools))
 		if err == nil {
 			return resp, nil
 		}
diff --git a/pkg/llmclient/agentic_client_integration_test.go b/pkg/llmclient/agentic_client_integration_test.go
index 8c4ea088..abf72676 100644
--- a/pkg/llmclient/agentic_client_integration_test.go
+++ b/pkg/llmclient/agentic_client_integration_test.go
@@ -18,7 +18,12 @@ type providerConfig struct {
 
 var providers = []providerConfig{
 	{name: "Gemini", provider: "google", model: "gemini-2.5-flash", envKey: "GEMINI_API_KEY"},
-	{name: "Anthropic", provider: "anthropic", model: "claude-haiku-4-5", envKey: "ANTHROPIC_API_KEY"},
+	{
+		name:     "Anthropic",
+		provider: "anthropic",
+		model:    "claude-haiku-4-5",
+		envKey:   "ANTHROPIC_API_KEY",
+	},
 	{name: "OpenAI", provider: "openai", model: "gpt-5-mini", envKey: "OPENAI_API_KEY"},
 }
 
@@ -130,12 +135,22 @@ func TestAgenticClient_TwoQuestions(t *testing.T) {
 			require.NoError(t, err, "CallLLM should not return error")
 			require.Len(t, answers, 2, "Should return exactly two answers")
 
-			require.Equal(t, questions[0], answers[0].Question, "First answer's question should match")
+			require.Equal(
+				t,
+				questions[0],
+				answers[0].Question,
+				"First answer's question should match",
+			)
 			require.NotEmpty(t, answers[0].Answer, "First answer should be populated")
 			require.Equal(t, true, answers[0].ShortAnswer,
 				"First answer should be true - app accesses filesystem")
 
-			require.Equal(t, questions[1], answers[1].Question, "Second answer's question should match")
+			require.Equal(
+				t,
+				questions[1],
+				answers[1].Question,
+				"Second answer's question should match",
+			)
 			require.NotEmpty(t, answers[1].Answer, "Second answer should be populated")
 		})
 	}
@@ -162,15 +177,30 @@ func TestAgenticClient_ThreeQuestions(t *testing.T) {
 			require.NoError(t, err, "CallLLM should not return error")
 			require.Len(t, answers, 3, "Should return exactly three answers")
 
-			require.Equal(t, questions[0], answers[0].Question, "First answer's question should match")
+			require.Equal(
+				t,
+				questions[0],
+				answers[0].Question,
+				"First answer's question should match",
+			)
 			require.NotEmpty(t, answers[0].Answer, "First answer should be populated")
 			require.Equal(t, true, answers[0].ShortAnswer,
 				"First answer should be true - app accesses filesystem")
 
-			require.Equal(t, questions[1], answers[1].Question, "Second answer's question should match")
+			require.Equal(
+				t,
+				questions[1],
+				answers[1].Question,
+				"Second answer's question should match",
+			)
 			require.NotEmpty(t, answers[1].Answer, "Second answer should be populated")
 
-			require.Equal(t, questions[2], answers[2].Question, "Third answer's question should match")
+			require.Equal(
+				t,
+				questions[2],
+				answers[2].Question,
+				"Third answer's question should match",
+			)
 			require.NotEmpty(t, answers[2].Answer, "Third answer should be populated")
 		})
 	}

From 970e69dd3f86334750918fcc81df2829b5369f19 Mon Sep 17 00:00:00 2001
From: Esteban Beltran <sergies@gmail.com>
Date: Wed, 4 Mar 2026 15:46:40 +0100
Subject: [PATCH 05/10] wip ditch langchain

---
 go.mod                               |   2 +
 go.sum                               |   4 +
 pkg/llmclient/agentic_client.go      | 148 ++++++-----
 pkg/llmclient/agentic_debug.go       |  41 +--
 pkg/llmclient/agentic_tools.go       |  17 +-
 pkg/llmprovider/gemini/client.go     | 380 +++++++++++++++++++++++++++
 pkg/llmprovider/langchain.go         | 250 ++++++++++++++++++
 pkg/llmprovider/langchain_adapter.go |  69 +++++
 pkg/llmprovider/types.go             | 171 ++++++++++++
 pkg/logme/logme.go                   |  42 +++
 10 files changed, 1010 insertions(+), 114 deletions(-)
 create mode 100644 pkg/llmprovider/gemini/client.go
 create mode 100644 pkg/llmprovider/langchain.go
 create mode 100644 pkg/llmprovider/langchain_adapter.go
 create mode 100644 pkg/llmprovider/types.go

diff --git a/go.mod b/go.mod
index de3a3278..50893c06 100644
--- a/go.mod
+++ b/go.mod
@@ -131,6 +131,7 @@ require (
 	github.com/googleapis/enterprise-certificate-proxy v0.3.6 // indirect
 	github.com/googleapis/gax-go/v2 v2.14.2 // indirect
 	github.com/gopherjs/gopherjs v1.17.2 // indirect
+	github.com/gorilla/websocket v1.5.3 // indirect
 	github.com/hhatto/gorst v0.0.0-20181029133204-ca9f730cac5b // indirect
 	github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect
 	github.com/jdkato/prose v1.2.1 // indirect
@@ -220,6 +221,7 @@ require (
 	golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect
 	gonum.org/v1/gonum v0.16.0 // indirect
 	google.golang.org/api v0.237.0 // indirect
+	google.golang.org/genai v1.48.0 // indirect
 	google.golang.org/genproto v0.0.0-20250707201910-8d1bb00bc6a7 // indirect
 	google.golang.org/genproto/googleapis/api v0.0.0-20251124214823-79d6a2a48846 // indirect
 	google.golang.org/genproto/googleapis/rpc v0.0.0-20251213004720-97cd9d5aeac2 // indirect
diff --git a/go.sum b/go.sum
index 35c11975..72cea00a 100644
--- a/go.sum
+++ b/go.sum
@@ -283,6 +283,8 @@ github.com/googleapis/gax-go/v2 v2.14.2 h1:eBLnkZ9635krYIPD+ag1USrOAI0Nr0QYF3+/3
 github.com/googleapis/gax-go/v2 v2.14.2/go.mod h1:ON64QhlJkhVtSqp4v1uaK92VyZ2gmvDQsweuyLV+8+w=
 github.com/gopherjs/gopherjs v1.17.2 h1:fQnZVsXk8uxXIStYb0N4bGk7jeyTalG/wsZjQ25dO0g=
 github.com/gopherjs/gopherjs v1.17.2/go.mod h1:pRRIvn/QzFLrKfvEz3qUuEhtE/zLCWfreZ6J5gM2i+k=
+github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
+github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
 github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=
 github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.1 h1:e9Rjr40Z98/clHv5Yg79Is0NtosR5LXRvdr7o/6NwbA=
 github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.1/go.mod h1:tIxuGz/9mpox++sgp9fJjHO0+q1X9/UOWd798aAm22M=
@@ -669,6 +671,8 @@ google.golang.org/api v0.237.0 h1:MP7XVsGZesOsx3Q8WVa4sUdbrsTvDSOERd3Vh4xj/wc=
 google.golang.org/api v0.237.0/go.mod h1:cOVEm2TpdAGHL2z+UwyS+kmlGr3bVWQQ6sYEqkKje50=
 google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
 google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
+google.golang.org/genai v1.48.0 h1:1vb15G291wAjJJueisMDpUhssljhEdJU2t5qTidrVPs=
+google.golang.org/genai v1.48.0/go.mod h1:A3kkl0nyBjyFlNjgxIwKq70julKbIxpSxqKO5gw/gmk=
 google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
 google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
 google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
diff --git a/pkg/llmclient/agentic_client.go b/pkg/llmclient/agentic_client.go
index a8bac0d4..3d1b8bb8 100644
--- a/pkg/llmclient/agentic_client.go
+++ b/pkg/llmclient/agentic_client.go
@@ -7,9 +7,9 @@ import (
 	"strings"
 	"time"
 
-	"github.com/tmc/langchaingo/llms"
+	"github.com/grafana/plugin-validator/pkg/llmprovider"
+	"github.com/grafana/plugin-validator/pkg/llmprovider/gemini"
 	"github.com/tmc/langchaingo/llms/anthropic"
-	"github.com/tmc/langchaingo/llms/googleai"
 	"github.com/tmc/langchaingo/llms/openai"
 )
 
@@ -71,7 +71,7 @@ type agenticClientImpl struct {
 	apiKey   string
 	model    string
 	provider string
-	tools    []llms.Tool
+	tools    []llmprovider.Tool
 	executor *toolExecutor
 }
 
@@ -114,7 +114,7 @@ func (c *agenticClientImpl) CallLLM(
 		Model:    c.model,
 		Provider: c.provider,
 	}
-	llm, err := initLLM(ctx, opts)
+	provider, err := initProvider(ctx, opts)
 	if err != nil {
 		return nil, fmt.Errorf("failed to initialize LLM: %w", err)
 	}
@@ -124,8 +124,8 @@ func (c *agenticClientImpl) CallLLM(
 	c.executor = newToolExecutor(repositoryPath)
 
 	// Build initial messages with system prompt only (no user message yet)
-	messages := []llms.MessageContent{
-		llms.TextParts(llms.ChatMessageTypeSystem, systemPrompt),
+	messages := []llmprovider.Message{
+		llmprovider.TextMessage(llmprovider.RoleSystem, systemPrompt),
 	}
 
 	// Print debug log file path before starting the loop
@@ -159,12 +159,12 @@ func (c *agenticClientImpl) CallLLM(
 		debugLog("Budget: %d tool calls", toolsBudget)
 
 		// Add the question as a human message
-		messages = append(messages, llms.TextParts(llms.ChatMessageTypeHuman, question))
+		messages = append(messages, llmprovider.TextMessage(llmprovider.RoleHuman, question))
 
 		// Run the question loop
 		updatedMessages, answer, err := c.runQuestionLoop(
 			ctx,
-			llm,
+			provider,
 			messages,
 			toolsBudget,
 			questionIndex,
@@ -205,11 +205,11 @@ func (c *agenticClientImpl) CallLLM(
 // Returns updated messages, the answer (or nil if budget exhausted), and error.
 func (c *agenticClientImpl) runQuestionLoop(
 	ctx context.Context,
-	llm llms.Model,
-	messages []llms.MessageContent,
+	provider llmprovider.Provider,
+	messages []llmprovider.Message,
 	toolsBudget int,
 	questionIndex int,
-) ([]llms.MessageContent, *AnswerSchema, error) {
+) ([]llmprovider.Message, *AnswerSchema, error) {
 	toolCallsRemaining := toolsBudget
 	consecutiveNoTools := 0
 	iteration := 0
@@ -224,15 +224,15 @@ func (c *agenticClientImpl) runQuestionLoop(
 		if !budgetNudged && toolCallsRemaining <= 5 {
 			budgetNudged = true
 			debugLog("AgenticClient: nudging model about low budget")
-			messages = append(messages, llms.TextParts(
-				llms.ChatMessageTypeHuman,
+			messages = append(messages, llmprovider.TextMessage(
+				llmprovider.RoleHuman,
 				fmt.Sprintf(budgetNudgePrompt, toolCallsRemaining),
 			))
 		}
 
 		// Call LLM with retry logic
 		debugLog("AgenticClient: calling LLM...")
-		resp, err := c.callLLMWithRetry(ctx, llm, messages)
+		resp, err := c.callLLMWithRetry(ctx, provider, messages)
 		if err != nil {
 			debugLog("AgenticClient: LLM call failed: %v", err)
 			return messages, nil, fmt.Errorf(
@@ -253,19 +253,23 @@ func (c *agenticClientImpl) runQuestionLoop(
 		// which go-langchain converts into separate ContentChoice objects. For example, a response
 		// with text + 2 tool calls becomes 3 separate Choices.
 		//
-		// We merge them here to process the complete response, but later (around line 360) we must
+		// We merge them here to process the complete response, but later we must
 		// split them back into separate AI messages because go-langchain's handleAIMessage() only
 		// serializes Parts[0] when sending back to Anthropic. Putting multiple tool calls in one
 		// message would lose all but the first.
 		//
 		// See docs/anthropic-choices-behavior.md for detailed explanation of this pattern.
-		mergedChoice := llms.ContentChoice{}
-		var allToolCalls []llms.ToolCall
+		mergedChoice := llmprovider.Choice{}
+		var allToolCalls []llmprovider.ToolCallPart
 		var contentParts []string
 
 		for i, ch := range resp.Choices {
-			debugLog("AgenticClient: processing choice %d: Content=%q, ToolCalls=%d",
-				i, truncateString(ch.Content, 100), len(ch.ToolCalls))
+			debugLog("AgenticClient: processing choice %d: Content=%q, ToolCalls=%d, Thinking=%d",
+				i, truncateString(ch.Content, 100), len(ch.ToolCalls), len(ch.Thinking))
+			for j, t := range ch.Thinking {
+				debugLog("AgenticClient:   thinking[%d]: text=%q sig=%v",
+					j, truncateString(t.Text, 150), t.Signature != "")
+			}
 
 			if ch.Content != "" {
 				contentParts = append(contentParts, ch.Content)
@@ -316,11 +320,11 @@ func (c *agenticClientImpl) runQuestionLoop(
 
 			// Add the AI response and remind to use tools
 			if choice.Content != "" {
-				messages = append(messages, llms.TextParts(llms.ChatMessageTypeAI, choice.Content))
+				messages = append(messages, llmprovider.TextMessage(llmprovider.RoleAI, choice.Content))
 			}
 			debugLog("AgenticClient: reminding agent to use tools")
-			messages = append(messages, llms.TextParts(
-				llms.ChatMessageTypeHuman,
+			messages = append(messages, llmprovider.TextMessage(
+				llmprovider.RoleHuman,
 				useToolsReminderPrompt,
 			))
 			toolCallsRemaining--
@@ -333,7 +337,7 @@ func (c *agenticClientImpl) runQuestionLoop(
 		// Validate submit_answer is called alone
 		hasSubmitAnswer := false
 		for _, toolCall := range choice.ToolCalls {
-			if toolCall.FunctionCall.Name == "submit_answer" {
+			if toolCall.Name == "submit_answer" {
 				hasSubmitAnswer = true
 				break
 			}
@@ -343,23 +347,23 @@ func (c *agenticClientImpl) runQuestionLoop(
 			// Add a single AI message with ALL tool calls so every
 			// tool_result below has a matching tool_use in the preceding
 			// assistant message.
-			aiParts := make([]llms.ContentPart, len(choice.ToolCalls))
+			aiParts := make([]llmprovider.Part, len(choice.ToolCalls))
 			for i, tc := range choice.ToolCalls {
 				aiParts[i] = tc
 			}
-			aiMessage := llms.MessageContent{
-				Role:  llms.ChatMessageTypeAI,
+			aiMessage := llmprovider.Message{
+				Role:  llmprovider.RoleAI,
 				Parts: aiParts,
 			}
 			messages = append(messages, aiMessage)
 			for _, toolCall := range choice.ToolCalls {
 				toolCallsRemaining--
-				errorResponse := llms.MessageContent{
-					Role: llms.ChatMessageTypeTool,
-					Parts: []llms.ContentPart{
-						llms.ToolCallResponse{
+				errorResponse := llmprovider.Message{
+					Role: llmprovider.RoleTool,
+					Parts: []llmprovider.Part{
+						llmprovider.ToolResultPart{
 							ToolCallID: toolCall.ID,
-							Name:       toolCall.FunctionCall.Name,
+							Name:       toolCall.Name,
 							Content:    submitAnswerAloneError,
 						},
 					},
@@ -381,9 +385,9 @@ func (c *agenticClientImpl) runQuestionLoop(
 		for i, toolCall := range choice.ToolCalls {
 			toolCallsRemaining--
 
-			aiMessage := llms.MessageContent{
-				Role:  llms.ChatMessageTypeAI,
-				Parts: []llms.ContentPart{toolCall},
+			aiMessage := llmprovider.Message{
+				Role:  llmprovider.RoleAI,
+				Parts: []llmprovider.Part{toolCall},
 			}
 			messages = append(messages, aiMessage)
 
@@ -403,29 +407,29 @@ func (c *agenticClientImpl) runQuestionLoop(
 
 // processToolCall processes a single tool call and returns the response message and optional answer
 func (c *agenticClientImpl) processToolCall(
-	toolCall llms.ToolCall,
+	toolCall llmprovider.ToolCallPart,
 	index, total int,
-) (llms.MessageContent, *AnswerSchema) {
+) (llmprovider.Message, *AnswerSchema) {
 	debugLog(
 		"AgenticClient: [%d/%d] executing tool: %s",
 		index+1,
 		total,
-		toolCall.FunctionCall.Name,
+		toolCall.Name,
 	)
-	debugLog("AgenticClient: tool args: %s", truncateString(toolCall.FunctionCall.Arguments, 500))
+	debugLog("AgenticClient: tool args: %s", truncateString(toolCall.Arguments, 500))
 
 	// Check for submit_answer
-	if toolCall.FunctionCall.Name == "submit_answer" {
+	if toolCall.Name == "submit_answer" {
 		var answer AnswerSchema
-		if err := json.Unmarshal([]byte(toolCall.FunctionCall.Arguments), &answer); err != nil {
+		if err := json.Unmarshal([]byte(toolCall.Arguments), &answer); err != nil {
 			debugLog("AgenticClient: failed to parse submit_answer: %v", err)
 			// Report parse error back to agent so it can retry
-			return llms.MessageContent{
-				Role: llms.ChatMessageTypeTool,
-				Parts: []llms.ContentPart{
-					llms.ToolCallResponse{
+			return llmprovider.Message{
+				Role: llmprovider.RoleTool,
+				Parts: []llmprovider.Part{
+					llmprovider.ToolResultPart{
 						ToolCallID: toolCall.ID,
-						Name:       toolCall.FunctionCall.Name,
+						Name:       toolCall.Name,
 						Content: fmt.Sprintf(
 							"Error parsing answer: %v. Please try again with valid JSON.",
 							err,
@@ -438,12 +442,12 @@ func (c *agenticClientImpl) processToolCall(
 			answer.ShortAnswer, truncateString(answer.Answer, 100))
 
 		// Return success response and the answer
-		return llms.MessageContent{
-			Role: llms.ChatMessageTypeTool,
-			Parts: []llms.ContentPart{
-				llms.ToolCallResponse{
+		return llmprovider.Message{
+			Role: llmprovider.RoleTool,
+			Parts: []llmprovider.Part{
+				llmprovider.ToolResultPart{
 					ToolCallID: toolCall.ID,
-					Name:       toolCall.FunctionCall.Name,
+					Name:       toolCall.Name,
 					Content:    "Answer recorded successfully.",
 				},
 			},
@@ -451,18 +455,18 @@ func (c *agenticClientImpl) processToolCall(
 	}
 
 	// Execute other tools
-	result, err := c.executor.execute(toolCall.FunctionCall.Name, toolCall.FunctionCall.Arguments)
+	result, err := c.executor.execute(toolCall.Name, toolCall.Arguments)
 	if err != nil {
 		result = fmt.Sprintf("Error: %v", err)
 	}
 	debugLog("AgenticClient: tool result: %s", truncateString(result, 300))
 
-	return llms.MessageContent{
-		Role: llms.ChatMessageTypeTool,
-		Parts: []llms.ContentPart{
-			llms.ToolCallResponse{
+	return llmprovider.Message{
+		Role: llmprovider.RoleTool,
+		Parts: []llmprovider.Part{
+			llmprovider.ToolResultPart{
 				ToolCallID: toolCall.ID,
-				Name:       toolCall.FunctionCall.Name,
+				Name:       toolCall.Name,
 				Content:    result,
 			},
 		},
@@ -472,12 +476,12 @@ func (c *agenticClientImpl) processToolCall(
 // callLLMWithRetry calls the LLM with retry logic for transient errors
 func (c *agenticClientImpl) callLLMWithRetry(
 	ctx context.Context,
-	llm llms.Model,
-	messages []llms.MessageContent,
-) (*llms.ContentResponse, error) {
+	provider llmprovider.Provider,
+	messages []llmprovider.Message,
+) (*llmprovider.Response, error) {
 	var lastErr error
 	for attempt := 1; attempt <= maxLLMRetries; attempt++ {
-		resp, err := llm.GenerateContent(ctx, messages, llms.WithTools(c.tools))
+		resp, err := provider.GenerateContent(ctx, messages, llmprovider.WithTools(c.tools))
 		if err == nil {
 			return resp, nil
 		}
@@ -500,25 +504,31 @@ func truncateString(s string, maxLen int) string {
 	return s[:maxLen] + "..."
 }
 
-// initLLM initializes the appropriate LLM based on provider
-func initLLM(ctx context.Context, opts *AgenticCallOptions) (llms.Model, error) {
+// initProvider initializes the appropriate provider based on configuration.
+// Gemini uses our native provider; Anthropic and OpenAI use langchain adapters
+// until they are migrated.
+func initProvider(ctx context.Context, opts *AgenticCallOptions) (llmprovider.Provider, error) {
 	switch opts.Provider {
 	case "google":
-		return googleai.New(
-			ctx,
-			googleai.WithAPIKey(opts.APIKey),
-			googleai.WithDefaultModel(opts.Model),
-		)
+		return gemini.New(ctx, opts.APIKey, opts.Model)
 	case "anthropic":
-		return anthropic.New(
+		llm, err := anthropic.New(
 			anthropic.WithToken(opts.APIKey),
 			anthropic.WithModel(opts.Model),
 		)
+		if err != nil {
+			return nil, err
+		}
+		return llmprovider.NewLangchainAdapter(llm), nil
 	case "openai":
-		return openai.New(
+		llm, err := openai.New(
 			openai.WithToken(opts.APIKey),
 			openai.WithModel(opts.Model),
 		)
+		if err != nil {
+			return nil, err
+		}
+		return llmprovider.NewLangchainAdapter(llm), nil
 	default:
 		return nil, fmt.Errorf(
 			"unsupported provider: %s (supported: google, anthropic, openai)",
diff --git a/pkg/llmclient/agentic_debug.go b/pkg/llmclient/agentic_debug.go
index 7c93618d..0b31075a 100644
--- a/pkg/llmclient/agentic_debug.go
+++ b/pkg/llmclient/agentic_debug.go
@@ -2,50 +2,19 @@ package llmclient
 
 import (
 	"fmt"
-	"io"
-	"log"
 	"os"
-	"path/filepath"
-	"sync"
-)
 
-var (
-	debugLogger *log.Logger
-	debugOnce   sync.Once
-	debugPath   string
+	"github.com/grafana/plugin-validator/pkg/logme"
 )
 
-func initDebugLogger() {
-	debugOnce.Do(func() {
-		debugVal := os.Getenv("DEBUG")
-		if debugVal != "1" && debugVal != "true" {
-			debugLogger = log.New(io.Discard, "", 0)
-			return
-		}
-
-		debugPath = filepath.Join(os.TempDir(), "validator-agentic.log")
-
-		f, err := os.OpenFile(debugPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
-		if err != nil {
-			fmt.Fprintf(os.Stderr, "AgenticClient: failed to create debug log file: %v\n", err)
-			debugLogger = log.New(io.Discard, "", 0)
-			return
-		}
-
-		debugLogger = log.New(f, "", log.Ltime|log.Lmicroseconds)
-	})
-}
-
-// debugLog writes a formatted message to the debug log file if DEBUG=1 or DEBUG=true
+// debugLog writes a formatted message to the LLM debug log file
 func debugLog(format string, args ...interface{}) {
-	initDebugLogger()
-	debugLogger.Printf(format, args...)
+	logme.LLMLog(format, args...)
 }
 
 // printDebugLogPath prints the debug log file path to stderr if debug is enabled
 func printDebugLogPath() {
-	initDebugLogger()
-	if debugPath != "" {
-		fmt.Fprintf(os.Stderr, "AgenticClient: debug log: %s\n", debugPath)
+	if p := logme.LLMLogPath(); p != "" {
+		fmt.Fprintf(os.Stderr, "AgenticClient: debug log: %s\n", p)
 	}
 }
diff --git a/pkg/llmclient/agentic_tools.go b/pkg/llmclient/agentic_tools.go
index 6124bcfc..c5b55f5c 100644
--- a/pkg/llmclient/agentic_tools.go
+++ b/pkg/llmclient/agentic_tools.go
@@ -12,7 +12,7 @@ import (
 	"strings"
 	"unicode/utf8"
 
-	"github.com/tmc/langchaingo/llms"
+	"github.com/grafana/plugin-validator/pkg/llmprovider"
 )
 
 const maxFileSize = 500 * 1024 // 500KB
@@ -47,11 +47,11 @@ var blockedGitFlags = []string{
 }
 
 // buildAgenticTools returns the list of tools available to the agent
-func buildAgenticTools() []llms.Tool {
-	return []llms.Tool{
+func buildAgenticTools() []llmprovider.Tool {
+	return []llmprovider.Tool{
 		{
 			Type: "function",
-			Function: &llms.FunctionDefinition{
+			Function: &llmprovider.FunctionDef{
 				Name:        "read_file",
 				Description: "Read the contents of a file at the given path",
 				Parameters: map[string]interface{}{
@@ -68,7 +68,7 @@ func buildAgenticTools() []llms.Tool {
 		},
 		{
 			Type: "function",
-			Function: &llms.FunctionDefinition{
+			Function: &llmprovider.FunctionDef{
 				Name:        "list_directory",
 				Description: "List files and directories at the given path",
 				Parameters: map[string]interface{}{
@@ -85,7 +85,7 @@ func buildAgenticTools() []llms.Tool {
 		},
 		{
 			Type: "function",
-			Function: &llms.FunctionDefinition{
+			Function: &llmprovider.FunctionDef{
 				Name:        "grep",
 				Description: "Search for a pattern in files. Returns matching lines with file names and line numbers.",
 				Parameters: map[string]interface{}{
@@ -106,7 +106,7 @@ func buildAgenticTools() []llms.Tool {
 		},
 		{
 			Type: "function",
-			Function: &llms.FunctionDefinition{
+			Function: &llmprovider.FunctionDef{
 				Name:        "git",
 				Description: "Execute a git command. Only allowed commands: log, show, diff, status, ls-files, blame, rev-parse, cat-file, checkout, fetch, pull, branch, tag.",
 				Parameters: map[string]interface{}{
@@ -123,10 +123,9 @@ func buildAgenticTools() []llms.Tool {
 		},
 		{
 			Type: "function",
-			Function: &llms.FunctionDefinition{
+			Function: &llmprovider.FunctionDef{
 				Name:        "submit_answer",
 				Description: "Submit your final answer to the question. Use this when you have gathered enough information.",
-				Strict: true,
 				Parameters: map[string]interface{}{
 					"type": "object",
 					"properties": map[string]interface{}{
diff --git a/pkg/llmprovider/gemini/client.go b/pkg/llmprovider/gemini/client.go
new file mode 100644
index 00000000..2ffa9f96
--- /dev/null
+++ b/pkg/llmprovider/gemini/client.go
@@ -0,0 +1,380 @@
+// Package gemini implements the llmprovider.Provider interface using the
+// Google GenAI SDK (google.golang.org/genai).  It properly preserves
+// thought_signatures for Gemini 3.x models.
+package gemini
+
+import (
+	"context"
+	"crypto/rand"
+	"encoding/hex"
+	"encoding/json"
+	"fmt"
+
+	"github.com/grafana/plugin-validator/pkg/llmprovider"
+	"github.com/grafana/plugin-validator/pkg/logme"
+	"google.golang.org/genai"
+)
+
+// Client implements llmprovider.Provider for Gemini via AI Studio.
+type Client struct {
+	client    *genai.Client
+	modelName string
+}
+
+// New creates a Gemini provider client using an AI Studio API key.
+func New(ctx context.Context, apiKey, modelName string) (*Client, error) {
+	if apiKey == "" {
+		return nil, fmt.Errorf("gemini: API key is required")
+	}
+	if modelName == "" {
+		return nil, fmt.Errorf("gemini: model name is required")
+	}
+
+	client, err := genai.NewClient(ctx, &genai.ClientConfig{
+		APIKey:  apiKey,
+		Backend: genai.BackendGeminiAPI,
+	})
+	if err != nil {
+		return nil, fmt.Errorf("gemini: failed to create client: %w", err)
+	}
+
+	return &Client{client: client, modelName: modelName}, nil
+}
+
+// GenerateContent sends messages to Gemini and returns the response.
+// It preserves thought_signatures for Gemini 3.x compatibility.
+func (c *Client) GenerateContent(
+	ctx context.Context,
+	messages []llmprovider.Message,
+	options ...llmprovider.CallOption,
+) (*llmprovider.Response, error) {
+	opts := &llmprovider.CallOptions{}
+	for _, o := range options {
+		o(opts)
+	}
+
+	// Extract system instruction from messages (Gemini handles it separately)
+	systemInstruction, conversationMessages := extractSystemMessage(messages)
+
+	// Convert our messages to genai.Content
+	contents, err := toGenAIContents(conversationMessages)
+	if err != nil {
+		return nil, fmt.Errorf("gemini: failed to convert messages: %w", err)
+	}
+
+	// Build config
+	config := buildConfig(opts, systemInstruction)
+
+	// Call Gemini API
+	resp, err := c.client.Models.GenerateContent(ctx, c.modelName, contents, config)
+	if err != nil {
+		return nil, fmt.Errorf("gemini: API error: %w", err)
+	}
+
+	// Convert response, preserving thought_signatures
+	return fromGenAIResponse(resp)
+}
+
+// extractSystemMessage pulls the system instruction out of the message list.
+// Gemini takes system instructions via config, not as a message role.
+func extractSystemMessage(messages []llmprovider.Message) (string, []llmprovider.Message) {
+	var system string
+	var rest []llmprovider.Message
+
+	for _, m := range messages {
+		if m.Role == llmprovider.RoleSystem {
+			// Concatenate all text parts from system messages
+			for _, p := range m.Parts {
+				if tp, ok := p.(llmprovider.TextPart); ok {
+					if system != "" {
+						system += "\n"
+					}
+					system += tp.Text
+				}
+			}
+		} else {
+			rest = append(rest, m)
+		}
+	}
+
+	return system, rest
+}
+
+// buildConfig creates the GenAI generation config from our options.
+func buildConfig(opts *llmprovider.CallOptions, systemInstruction string) *genai.GenerateContentConfig {
+	config := &genai.GenerateContentConfig{}
+
+	if systemInstruction != "" {
+		config.SystemInstruction = &genai.Content{
+			Parts: []*genai.Part{genai.NewPartFromText(systemInstruction)},
+		}
+	}
+
+	if opts.Temperature > 0 {
+		t := float32(opts.Temperature)
+		config.Temperature = &t
+	}
+	if opts.MaxTokens > 0 {
+		config.MaxOutputTokens = int32(opts.MaxTokens)
+	}
+	if opts.TopP > 0 {
+		p := float32(opts.TopP)
+		config.TopP = &p
+	}
+	if opts.TopK > 0 {
+		k := float32(opts.TopK)
+		config.TopK = &k
+	}
+	if len(opts.StopWords) > 0 {
+		config.StopSequences = opts.StopWords
+	}
+
+	// Convert tools
+	if len(opts.Tools) > 0 {
+		var declarations []*genai.FunctionDeclaration
+		for _, tool := range opts.Tools {
+			if tool.Function != nil {
+				decl := &genai.FunctionDeclaration{
+					Name:        tool.Function.Name,
+					Description: tool.Function.Description,
+				}
+				// Use ParametersJsonSchema for raw JSON schema passthrough
+				if tool.Function.Parameters != nil {
+					decl.ParametersJsonSchema = tool.Function.Parameters
+				}
+				declarations = append(declarations, decl)
+			}
+		}
+		if len(declarations) > 0 {
+			config.Tools = []*genai.Tool{{
+				FunctionDeclarations: declarations,
+			}}
+			config.ToolConfig = &genai.ToolConfig{
+				FunctionCallingConfig: &genai.FunctionCallingConfig{
+					Mode: genai.FunctionCallingConfigModeAuto,
+				},
+			}
+		}
+	}
+
+	return config
+}
+
+// --- Message conversion: llmprovider → genai ---
+
+func toGenAIContents(messages []llmprovider.Message) ([]*genai.Content, error) {
+	var contents []*genai.Content
+
+	for _, msg := range messages {
+		content := &genai.Content{
+			Role: toGenAIRole(msg.Role),
+		}
+
+		for _, part := range msg.Parts {
+			genaiPart, err := toGenAIPart(part)
+			if err != nil {
+				return nil, err
+			}
+			if genaiPart != nil {
+				content.Parts = append(content.Parts, genaiPart)
+			}
+		}
+
+		if len(content.Parts) > 0 {
+			contents = append(contents, content)
+		}
+	}
+
+	return contents, nil
+}
+
+func toGenAIRole(role llmprovider.Role) string {
+	switch role {
+	case llmprovider.RoleHuman:
+		return "user"
+	case llmprovider.RoleAI:
+		return "model"
+	case llmprovider.RoleTool:
+		return "user"
+	default:
+		return "user"
+	}
+}
+
+func toGenAIPart(part llmprovider.Part) (*genai.Part, error) {
+	switch p := part.(type) {
+	case llmprovider.TextPart:
+		return genai.NewPartFromText(p.Text), nil
+
+	case llmprovider.ToolCallPart:
+		// Parse arguments from JSON string to map
+		var args map[string]any
+		if p.Arguments != "" {
+			if err := json.Unmarshal([]byte(p.Arguments), &args); err != nil {
+				return nil, fmt.Errorf("gemini: failed to unmarshal tool arguments for %q: %w", p.Name, err)
+			}
+		}
+
+		genaiPart := genai.NewPartFromFunctionCall(p.Name, args)
+		if p.ID != "" {
+			genaiPart.FunctionCall.ID = p.ID
+		}
+
+		// Echo back thought fields exactly as received from the API.
+		genaiPart.Thought = p.Thought
+		if p.ThoughtSignature != "" {
+			genaiPart.ThoughtSignature = []byte(p.ThoughtSignature)
+		}
+
+		return genaiPart, nil
+
+	case llmprovider.ToolResultPart:
+		// Convert response content to map
+		var responseMap map[string]any
+		if err := json.Unmarshal([]byte(p.Content), &responseMap); err != nil {
+			// If it's not JSON, wrap it
+			responseMap = map[string]any{"result": p.Content}
+		}
+
+		genaiPart := genai.NewPartFromFunctionResponse(p.Name, responseMap)
+		if p.ToolCallID != "" {
+			genaiPart.FunctionResponse.ID = p.ToolCallID
+		}
+
+		return genaiPart, nil
+
+	case llmprovider.ThinkingPart:
+		// Thinking parts from previous responses need to be echoed back
+		genaiPart := &genai.Part{
+			Text:    p.Text,
+			Thought: true,
+		}
+		if p.Signature != "" {
+			genaiPart.ThoughtSignature = []byte(p.Signature)
+		}
+		return genaiPart, nil
+
+	default:
+		return nil, fmt.Errorf("gemini: unsupported part type %T", part)
+	}
+}
+
+// --- Response conversion: genai → llmprovider ---
+
+func fromGenAIResponse(resp *genai.GenerateContentResponse) (*llmprovider.Response, error) {
+	if resp == nil {
+		return nil, fmt.Errorf("gemini: nil response")
+	}
+
+	result := &llmprovider.Response{
+		Choices: make([]*llmprovider.Choice, 0, len(resp.Candidates)),
+	}
+
+	for candidateIdx, candidate := range resp.Candidates {
+		choice := &llmprovider.Choice{
+			StopReason:     string(candidate.FinishReason),
+			GenerationInfo: make(map[string]any),
+		}
+
+		if candidate.Content == nil || len(candidate.Content.Parts) == 0 {
+			raw, _ := json.MarshalIndent(candidate, "", "  ")
+			logme.LLMLog("gemini: candidate[%d] empty/nil content, raw candidate:\n%s", candidateIdx, string(raw))
+		}
+
+		if candidate.Content != nil {
+			for partIdx, part := range candidate.Content.Parts {
+				if part == nil {
+					continue
+				}
+
+				// Debug: log raw part fields so we can see exactly what the SDK returns
+				debugLogPart(partIdx, part)
+
+				// Thought/thinking parts
+				if part.Thought && part.FunctionCall == nil {
+					thinking := llmprovider.ThinkingPart{
+						Text: part.Text,
+					}
+					if len(part.ThoughtSignature) > 0 {
+						thinking.Signature = string(part.ThoughtSignature)
+					}
+					choice.Thinking = append(choice.Thinking, thinking)
+					continue
+				}
+
+				// Text content (non-thought)
+				if part.Text != "" && part.FunctionCall == nil && part.FunctionResponse == nil {
+					choice.Content = part.Text
+				}
+
+				// Function calls
+				if part.FunctionCall != nil {
+					id := part.FunctionCall.ID
+					if id == "" {
+						id = generateCallID()
+						logme.LLMLog("gemini: part[%d] FunctionCall has empty ID, generated: %s", partIdx, id)
+					}
+					tc := llmprovider.ToolCallPart{
+						ID:      id,
+						Name:    part.FunctionCall.Name,
+						Thought: part.Thought,
+					}
+
+					if part.FunctionCall.Args != nil {
+						argsJSON, err := json.Marshal(part.FunctionCall.Args)
+						if err != nil {
+							return nil, fmt.Errorf("gemini: failed to marshal function args: %w", err)
+						}
+						tc.Arguments = string(argsJSON)
+					}
+
+					// CRITICAL: Capture thought_signature from function call parts
+					if len(part.ThoughtSignature) > 0 {
+						tc.ThoughtSignature = string(part.ThoughtSignature)
+					}
+
+					choice.ToolCalls = append(choice.ToolCalls, tc)
+				}
+			}
+		}
+
+		// Token usage
+		if resp.UsageMetadata != nil {
+			choice.GenerationInfo["usage"] = map[string]any{
+				"prompt_tokens":     resp.UsageMetadata.PromptTokenCount,
+				"completion_tokens": resp.UsageMetadata.CandidatesTokenCount,
+				"total_tokens":      resp.UsageMetadata.TotalTokenCount,
+				"thoughts_tokens":   resp.UsageMetadata.ThoughtsTokenCount,
+			}
+		}
+
+		result.Choices = append(result.Choices, choice)
+	}
+
+	return result, nil
+}
+
+func generateCallID() string {
+	b := make([]byte, 8)
+	_, _ = rand.Read(b)
+	return "call_" + hex.EncodeToString(b)
+}
+
+func debugLogPart(idx int, p *genai.Part) {
+	hasFuncCall := p.FunctionCall != nil
+	hasFuncResp := p.FunctionResponse != nil
+	textLen := len(p.Text)
+	sigLen := len(p.ThoughtSignature)
+	logme.LLMLog("gemini: part[%d] Thought=%v Text=%d bytes FuncCall=%v FuncResp=%v ThoughtSig=%d bytes",
+		idx, p.Thought, textLen, hasFuncCall, hasFuncResp, sigLen)
+	if p.Thought && textLen > 0 {
+		preview := p.Text
+		if len(preview) > 200 {
+			preview = preview[:200] + "..."
+		}
+		logme.LLMLog("gemini: part[%d] thinking preview: %s", idx, preview)
+	}
+	if hasFuncCall {
+		logme.LLMLog("gemini: part[%d] FunctionCall: name=%s id=%s", idx, p.FunctionCall.Name, p.FunctionCall.ID)
+	}
+}
diff --git a/pkg/llmprovider/langchain.go b/pkg/llmprovider/langchain.go
new file mode 100644
index 00000000..75324935
--- /dev/null
+++ b/pkg/llmprovider/langchain.go
@@ -0,0 +1,250 @@
+package llmprovider
+
+import "github.com/tmc/langchaingo/llms"
+
+// This file provides conversion between llmprovider types and langchain
+// llms types. It exists only for the transition period while we migrate
+// providers one at a time. Once all providers use llmprovider types
+// directly, this file can be deleted.
+
+// --- Role conversion ---
+
+func RoleFromLangchain(r llms.ChatMessageType) Role {
+	switch r {
+	case llms.ChatMessageTypeSystem:
+		return RoleSystem
+	case llms.ChatMessageTypeHuman:
+		return RoleHuman
+	case llms.ChatMessageTypeAI:
+		return RoleAI
+	case llms.ChatMessageTypeTool:
+		return RoleTool
+	default:
+		return RoleHuman
+	}
+}
+
+func (r Role) ToLangchain() llms.ChatMessageType {
+	switch r {
+	case RoleSystem:
+		return llms.ChatMessageTypeSystem
+	case RoleHuman:
+		return llms.ChatMessageTypeHuman
+	case RoleAI:
+		return llms.ChatMessageTypeAI
+	case RoleTool:
+		return llms.ChatMessageTypeTool
+	default:
+		return llms.ChatMessageTypeHuman
+	}
+}
+
+// --- Message conversion ---
+
+func MessageFromLangchain(lc llms.MessageContent) Message {
+	msg := Message{
+		Role:  RoleFromLangchain(lc.Role),
+		Parts: make([]Part, 0, len(lc.Parts)),
+	}
+	for _, p := range lc.Parts {
+		msg.Parts = append(msg.Parts, PartFromLangchain(p))
+	}
+	return msg
+}
+
+func MessagesFromLangchain(lc []llms.MessageContent) []Message {
+	msgs := make([]Message, len(lc))
+	for i, m := range lc {
+		msgs[i] = MessageFromLangchain(m)
+	}
+	return msgs
+}
+
+func (m Message) ToLangchain() llms.MessageContent {
+	lc := llms.MessageContent{
+		Role:  m.Role.ToLangchain(),
+		Parts: make([]llms.ContentPart, 0, len(m.Parts)),
+	}
+	for _, p := range m.Parts {
+		lc.Parts = append(lc.Parts, PartToLangchain(p))
+	}
+	return lc
+}
+
+func MessagesToLangchain(msgs []Message) []llms.MessageContent {
+	lc := make([]llms.MessageContent, len(msgs))
+	for i, m := range msgs {
+		lc[i] = m.ToLangchain()
+	}
+	return lc
+}
+
+// --- Part conversion ---
+
+func PartFromLangchain(p llms.ContentPart) Part {
+	switch v := p.(type) {
+	case llms.TextContent:
+		return TextPart{Text: v.Text}
+	case llms.ToolCall:
+		tc := ToolCallPart{
+			ID:   v.ID,
+			Name: v.FunctionCall.Name,
+		}
+		if v.FunctionCall != nil {
+			tc.Arguments = v.FunctionCall.Arguments
+		}
+		return tc
+	case llms.ToolCallResponse:
+		return ToolResultPart{
+			ToolCallID: v.ToolCallID,
+			Name:       v.Name,
+			Content:    v.Content,
+		}
+	default:
+		// Unsupported part types become empty text
+		return TextPart{}
+	}
+}
+
+func PartToLangchain(p Part) llms.ContentPart {
+	switch v := p.(type) {
+	case TextPart:
+		return llms.TextContent{Text: v.Text}
+	case ToolCallPart:
+		return llms.ToolCall{
+			ID:   v.ID,
+			Type: "function",
+			FunctionCall: &llms.FunctionCall{
+				Name:      v.Name,
+				Arguments: v.Arguments,
+			},
+		}
+	case ToolResultPart:
+		return llms.ToolCallResponse{
+			ToolCallID: v.ToolCallID,
+			Name:       v.Name,
+			Content:    v.Content,
+		}
+	case ThinkingPart:
+		// Langchain has no equivalent; drop thinking parts
+		return llms.TextContent{}
+	default:
+		return llms.TextContent{}
+	}
+}
+
+// --- Response conversion ---
+
+func ResponseFromLangchain(lc *llms.ContentResponse) *Response {
+	if lc == nil {
+		return nil
+	}
+	resp := &Response{
+		Choices: make([]*Choice, len(lc.Choices)),
+	}
+	for i, c := range lc.Choices {
+		resp.Choices[i] = ChoiceFromLangchain(c)
+	}
+	return resp
+}
+
+func ChoiceFromLangchain(lc *llms.ContentChoice) *Choice {
+	if lc == nil {
+		return nil
+	}
+	ch := &Choice{
+		Content:        lc.Content,
+		StopReason:     lc.StopReason,
+		GenerationInfo: lc.GenerationInfo,
+	}
+	for _, tc := range lc.ToolCalls {
+		ch.ToolCalls = append(ch.ToolCalls, ToolCallPart{
+			ID:   tc.ID,
+			Name: tc.FunctionCall.Name,
+			Arguments: func() string {
+				if tc.FunctionCall != nil {
+					return tc.FunctionCall.Arguments
+				}
+				return ""
+			}(),
+		})
+	}
+	return ch
+}
+
+func (r *Response) ToLangchain() *llms.ContentResponse {
+	if r == nil {
+		return nil
+	}
+	lc := &llms.ContentResponse{
+		Choices: make([]*llms.ContentChoice, len(r.Choices)),
+	}
+	for i, c := range r.Choices {
+		lc.Choices[i] = c.ToLangchain()
+	}
+	return lc
+}
+
+func (c *Choice) ToLangchain() *llms.ContentChoice {
+	if c == nil {
+		return nil
+	}
+	lc := &llms.ContentChoice{
+		Content:        c.Content,
+		StopReason:     c.StopReason,
+		GenerationInfo: c.GenerationInfo,
+	}
+	for _, tc := range c.ToolCalls {
+		lc.ToolCalls = append(lc.ToolCalls, llms.ToolCall{
+			ID:   tc.ID,
+			Type: "function",
+			FunctionCall: &llms.FunctionCall{
+				Name:      tc.Name,
+				Arguments: tc.Arguments,
+			},
+		})
+	}
+	return lc
+}
+
+// --- Tool conversion ---
+
+func ToolFromLangchain(t llms.Tool) Tool {
+	tool := Tool{Type: t.Type}
+	if t.Function != nil {
+		tool.Function = &FunctionDef{
+			Name:        t.Function.Name,
+			Description: t.Function.Description,
+			Parameters:  t.Function.Parameters,
+		}
+	}
+	return tool
+}
+
+func ToolsFromLangchain(tools []llms.Tool) []Tool {
+	out := make([]Tool, len(tools))
+	for i, t := range tools {
+		out[i] = ToolFromLangchain(t)
+	}
+	return out
+}
+
+func (t Tool) ToLangchain() llms.Tool {
+	lc := llms.Tool{Type: t.Type}
+	if t.Function != nil {
+		lc.Function = &llms.FunctionDefinition{
+			Name:        t.Function.Name,
+			Description: t.Function.Description,
+			Parameters:  t.Function.Parameters,
+		}
+	}
+	return lc
+}
+
+func ToolsToLangchain(tools []Tool) []llms.Tool {
+	out := make([]llms.Tool, len(tools))
+	for i, t := range tools {
+		out[i] = t.ToLangchain()
+	}
+	return out
+}
diff --git a/pkg/llmprovider/langchain_adapter.go b/pkg/llmprovider/langchain_adapter.go
new file mode 100644
index 00000000..20ae105c
--- /dev/null
+++ b/pkg/llmprovider/langchain_adapter.go
@@ -0,0 +1,69 @@
+package llmprovider
+
+import (
+	"context"
+
+	"github.com/tmc/langchaingo/llms"
+)
+
+// LangchainAdapter wraps an llms.Model to implement the Provider interface.
+// This allows existing langchain-based providers (Anthropic, OpenAI) to be
+// used alongside native providers during the migration.
+type LangchainAdapter struct {
+	LLM llms.Model
+}
+
+func NewLangchainAdapter(llm llms.Model) *LangchainAdapter {
+	return &LangchainAdapter{LLM: llm}
+}
+
+func (a *LangchainAdapter) GenerateContent(
+	ctx context.Context,
+	messages []Message,
+	options ...CallOption,
+) (*Response, error) {
+	// Convert our messages to langchain messages
+	lcMessages := MessagesToLangchain(messages)
+
+	// Convert our options to langchain options
+	opts := &CallOptions{}
+	for _, o := range options {
+		o(opts)
+	}
+	lcOpts := toLangchainCallOptions(opts)
+
+	// Call the langchain model
+	lcResp, err := a.LLM.GenerateContent(ctx, lcMessages, lcOpts...)
+	if err != nil {
+		return nil, err
+	}
+
+	// Convert response back to our types
+	return ResponseFromLangchain(lcResp), nil
+}
+
+// toLangchainCallOptions converts our CallOptions to langchain CallOptions.
+func toLangchainCallOptions(opts *CallOptions) []llms.CallOption {
+	var lcOpts []llms.CallOption
+
+	if opts.MaxTokens > 0 {
+		lcOpts = append(lcOpts, llms.WithMaxTokens(opts.MaxTokens))
+	}
+	if opts.Temperature > 0 {
+		lcOpts = append(lcOpts, llms.WithTemperature(opts.Temperature))
+	}
+	if opts.TopP > 0 {
+		lcOpts = append(lcOpts, llms.WithTopP(opts.TopP))
+	}
+	if opts.TopK > 0 {
+		lcOpts = append(lcOpts, llms.WithTopK(opts.TopK))
+	}
+	if len(opts.StopWords) > 0 {
+		lcOpts = append(lcOpts, llms.WithStopWords(opts.StopWords))
+	}
+	if len(opts.Tools) > 0 {
+		lcOpts = append(lcOpts, llms.WithTools(ToolsToLangchain(opts.Tools)))
+	}
+
+	return lcOpts
+}
diff --git a/pkg/llmprovider/types.go b/pkg/llmprovider/types.go
new file mode 100644
index 00000000..e3b80056
--- /dev/null
+++ b/pkg/llmprovider/types.go
@@ -0,0 +1,171 @@
+// Package llmprovider defines a unified interface for LLM providers.
+//
+// These types mirror github.com/tmc/langchaingo/llms but add support for
+// provider-specific features like Gemini thought_signatures, Anthropic
+// thinking blocks, and OpenAI encrypted reasoning content.
+//
+// During migration, conversion helpers allow gradual adoption: one provider
+// can be migrated at a time while the rest continue using langchain types.
+package llmprovider
+
+import "context"
+
+// Provider is the core interface that all LLM provider clients implement.
+type Provider interface {
+	GenerateContent(ctx context.Context, messages []Message, options ...CallOption) (*Response, error)
+}
+
+// Role identifies the sender of a message.
+type Role string
+
+const (
+	RoleSystem Role = "system"
+	RoleHuman  Role = "human"
+	RoleAI     Role = "ai"
+	RoleTool   Role = "tool"
+)
+
+// Message is a single message in a conversation.
+type Message struct {
+	Role  Role
+	Parts []Part
+}
+
+// Part is a piece of content within a message.
+// Concrete types: TextPart, ToolCallPart, ToolResultPart, ThinkingPart.
+type Part interface {
+	partMarker()
+}
+
+// TextPart is plain text content.
+type TextPart struct {
+	Text string
+}
+
+func (TextPart) partMarker() {}
+
+// ToolCallPart represents a model's request to call a tool.
+type ToolCallPart struct {
+	ID        string
+	Name      string
+	Arguments string // JSON string
+
+	// Thought indicates whether this part was produced during model thinking.
+	// Must be echoed back exactly as received from the API.
+	Thought bool
+
+	// ThoughtSignature is the opaque token Gemini 3.x attaches to function
+	// call parts.  It must be echoed back in subsequent requests or the API
+	// returns a 400.  Nil/empty means no signature was provided.
+	ThoughtSignature string
+}
+
+func (ToolCallPart) partMarker() {}
+
+// ToolResultPart is the response from executing a tool.
+type ToolResultPart struct {
+	ToolCallID string
+	Name       string
+	Content    string
+}
+
+func (ToolResultPart) partMarker() {}
+
+// ThinkingPart holds reasoning/thinking content from the model.
+// Different providers represent this differently:
+//   - Gemini: thought text + thought_signature
+//   - Anthropic: thinking block with signature, or redacted_thinking
+//   - OpenAI: encrypted reasoning content
+type ThinkingPart struct {
+	Text      string
+	Signature string // Gemini thought_signature or Anthropic thinking signature
+	Encrypted string // OpenAI encrypted_content or Anthropic redacted_thinking data
+}
+
+func (ThinkingPart) partMarker() {}
+
+// Response is the result of a GenerateContent call.
+type Response struct {
+	Choices []*Choice
+}
+
+// Choice is a single response candidate.
+type Choice struct {
+	// Content is the text content of the response.
+	Content string
+
+	// StopReason is why the model stopped generating.
+	StopReason string
+
+	// ToolCalls requested by the model. These preserve ThoughtSignature
+	// so they can be echoed back in the next request.
+	ToolCalls []ToolCallPart
+
+	// Thinking contains reasoning/thinking content if the model produced any.
+	Thinking []ThinkingPart
+
+	// GenerationInfo holds arbitrary provider-specific metadata (token
+	// counts, safety ratings, etc.).
+	GenerationInfo map[string]any
+}
+
+// --- Call options ---
+
+// CallOption configures a GenerateContent call.
+type CallOption func(*CallOptions)
+
+// CallOptions holds all configurable parameters for a GenerateContent call.
+type CallOptions struct {
+	MaxTokens   int
+	Temperature float64
+	TopP        float64
+	TopK        int
+	StopWords   []string
+	Tools       []Tool
+}
+
+// Tool describes a tool the model can invoke.
+type Tool struct {
+	Type     string
+	Function *FunctionDef
+}
+
+// FunctionDef describes a callable function.
+type FunctionDef struct {
+	Name        string
+	Description string
+	Parameters  any // JSON Schema
+}
+
+// --- Option helpers ---
+
+func WithMaxTokens(n int) CallOption {
+	return func(o *CallOptions) { o.MaxTokens = n }
+}
+
+func WithTemperature(t float64) CallOption {
+	return func(o *CallOptions) { o.Temperature = t }
+}
+
+func WithTopP(p float64) CallOption {
+	return func(o *CallOptions) { o.TopP = p }
+}
+
+func WithTopK(k int) CallOption {
+	return func(o *CallOptions) { o.TopK = k }
+}
+
+func WithStopWords(words []string) CallOption {
+	return func(o *CallOptions) { o.StopWords = words }
+}
+
+func WithTools(tools []Tool) CallOption {
+	return func(o *CallOptions) { o.Tools = tools }
+}
+
+// --- Convenience constructors ---
+
+// TextMessage creates a Message with a single text part.
+func TextMessage(role Role, text string) Message {
+	return Message{Role: role, Parts: []Part{TextPart{Text: text}}}
+}
diff --git a/pkg/logme/logme.go b/pkg/logme/logme.go
index fc6c8d49..ece3e121 100644
--- a/pkg/logme/logme.go
+++ b/pkg/logme/logme.go
@@ -2,11 +2,53 @@ package logme
 
 import (
 	"fmt"
+	"io"
+	"log"
 	"os"
+	"path/filepath"
+	"sync"
 )
 
 var isDebugMode = os.Getenv("DEBUG") == "1"
 
+var (
+	llmLogger *log.Logger
+	llmOnce   sync.Once
+	llmPath   string
+)
+
+func initLLMLogger() {
+	llmOnce.Do(func() {
+		if !isDebugMode {
+			llmLogger = log.New(io.Discard, "", 0)
+			return
+		}
+
+		llmPath = filepath.Join(os.TempDir(), "validator-llm.log")
+		f, err := os.OpenFile(llmPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "logme: failed to open LLM log file: %v\n", err)
+			llmLogger = log.New(io.Discard, "", 0)
+			return
+		}
+
+		llmLogger = log.New(f, "", log.Ltime|log.Lmicroseconds)
+	})
+}
+
+// LLMLog writes a formatted message to the LLM debug log file in /tmp.
+// Only active when DEBUG=1.
+func LLMLog(format string, args ...interface{}) {
+	initLLMLogger()
+	llmLogger.Printf(format, args...)
+}
+
+// LLMLogPath returns the path to the LLM log file, or "" if not active.
+func LLMLogPath() string {
+	initLLMLogger()
+	return llmPath
+}
+
 func DebugFln(msg string, args ...interface{}) {
 	// check if ENV DEBUG is 1
 	if isDebugMode {

From 4d4042660e10dafec59a75935f40f0813c73994a Mon Sep 17 00:00:00 2001
From: Esteban Beltran <sergies@gmail.com>
Date: Wed, 4 Mar 2026 16:38:57 +0100
Subject: [PATCH 06/10] remove langchain from agentic client

---
 go.mod                                        |   3 +
 go.sum                                        |   5 +
 pkg/llmclient/README.md                       |   2 +-
 pkg/llmclient/agentic_client.go               |  50 +--
 pkg/llmprovider/anthropicprovider/client.go   | 297 ++++++++++++++++++
 .../{gemini => geminiprovider}/client.go      |   2 +-
 pkg/llmprovider/langchain.go                  | 250 ---------------
 pkg/llmprovider/langchain_adapter.go          |  69 ----
 pkg/llmprovider/openaiprovider/client.go      | 221 +++++++++++++
 pkg/llmprovider/types.go                      |   9 +-
 10 files changed, 543 insertions(+), 365 deletions(-)
 create mode 100644 pkg/llmprovider/anthropicprovider/client.go
 rename pkg/llmprovider/{gemini => geminiprovider}/client.go (99%)
 delete mode 100644 pkg/llmprovider/langchain.go
 delete mode 100644 pkg/llmprovider/langchain_adapter.go
 create mode 100644 pkg/llmprovider/openaiprovider/client.go

diff --git a/go.mod b/go.mod
index 50893c06..566675b3 100644
--- a/go.mod
+++ b/go.mod
@@ -73,6 +73,7 @@ require (
 	github.com/ProtonMail/go-crypto v1.3.0 // indirect
 	github.com/agext/levenshtein v1.2.3 // indirect
 	github.com/anchore/go-lzo v0.1.0 // indirect
+	github.com/anthropics/anthropic-sdk-go v1.26.0 // indirect
 	github.com/cloudflare/circl v1.6.1 // indirect
 	github.com/compose-spec/compose-go/v2 v2.8.1 // indirect
 	github.com/containerd/cgroups/v3 v3.0.5 // indirect
@@ -155,6 +156,7 @@ require (
 	github.com/moby/sys/userns v0.1.0 // indirect
 	github.com/montanaflynn/stats v0.7.1 // indirect
 	github.com/ncruces/go-strftime v0.1.9 // indirect
+	github.com/openai/openai-go v1.12.0 // indirect
 	github.com/opencontainers/go-digest v1.0.0 // indirect
 	github.com/opencontainers/image-spec v1.1.1 // indirect
 	github.com/opencontainers/runtime-spec v1.2.1 // indirect
@@ -185,6 +187,7 @@ require (
 	github.com/tidwall/jsonc v0.3.2 // indirect
 	github.com/tidwall/match v1.1.1 // indirect
 	github.com/tidwall/pretty v1.2.1 // indirect
+	github.com/tidwall/sjson v1.2.5 // indirect
 	github.com/tink-crypto/tink-go/v2 v2.4.0 // indirect
 	github.com/tklauser/go-sysconf v0.3.15 // indirect
 	github.com/tklauser/numcpus v0.10.0 // indirect
diff --git a/go.sum b/go.sum
index 72cea00a..342e8060 100644
--- a/go.sum
+++ b/go.sum
@@ -72,6 +72,8 @@ github.com/anchore/go-struct-converter v0.0.0-20250211213226-cce56d595160 h1:r8/
 github.com/anchore/go-struct-converter v0.0.0-20250211213226-cce56d595160/go.mod h1:rYqSE9HbjzpHTI74vwPvae4ZVYZd1lue2ta6xHPdblA=
 github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8=
 github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4=
+github.com/anthropics/anthropic-sdk-go v1.26.0 h1:oUTzFaUpAevfuELAP1sjL6CQJ9HHAfT7CoSYSac11PY=
+github.com/anthropics/anthropic-sdk-go v1.26.0/go.mod h1:qUKmaW+uuPB64iy1l+4kOSvaLqPXnHTTBKH6RVZ7q5Q=
 github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
 github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
 github.com/bmatcuk/doublestar/v4 v4.9.2 h1:b0mc6WyRSYLjzofB2v/0cuDUZ+MqoGyH3r0dVij35GI=
@@ -375,6 +377,8 @@ github.com/neurosnap/sentences v1.0.6 h1:iBVUivNtlwGkYsJblWV8GGVFmXzZzak907Ci8aA
 github.com/neurosnap/sentences v1.0.6/go.mod h1:pg1IapvYpWCJJm/Etxeh0+gtMf1rI1STY9S7eUCPbDc=
 github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k=
 github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY=
+github.com/openai/openai-go v1.12.0 h1:NBQCnXzqOTv5wsgNC36PrFEiskGfO5wccfCWDo9S1U0=
+github.com/openai/openai-go v1.12.0/go.mod h1:g461MYGXEXBVdV5SaR/5tNzNbSfwTBBefwc+LlDCK0Y=
 github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
 github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040=
@@ -478,6 +482,7 @@ github.com/terminalstatic/go-xsd-validate v0.1.6 h1:TenYeQ3eY631qNi1/cTmLH/s2slH
 github.com/terminalstatic/go-xsd-validate v0.1.6/go.mod h1:18lsvYFofBflqCrvo1umpABZ99+GneNTw2kEEc8UPJw=
 github.com/thoas/go-funk v0.9.3 h1:7+nAEx3kn5ZJcnDm2Bh23N2yOtweO14bi//dvRtgLpw=
 github.com/thoas/go-funk v0.9.3/go.mod h1:+IWnUfUmFO1+WVYQWQtIJHeRRdaIyyYglZN7xzUPe4Q=
+github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
 github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY=
 github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
 github.com/tidwall/jsonc v0.3.2 h1:ZTKrmejRlAJYdn0kcaFqRAKlxxFIC21pYq8vLa4p2Wc=
diff --git a/pkg/llmclient/README.md b/pkg/llmclient/README.md
index 41cb7a84..0cd94df1 100644
--- a/pkg/llmclient/README.md
+++ b/pkg/llmclient/README.md
@@ -2,7 +2,7 @@
 
 LLM client package for code analysis.
 
-- **AgenticClient**: Provider-agnostic agentic client using [langchaingo](https://github.com/tmc/langchaingo). Gives the LLM tools to explore a repository and answer questions about code.
+- **AgenticClient**: Provider-agnostic agentic client using native SDK implementations for each provider. Gives the LLM tools to explore a repository and answer questions about code.
 
 ## AgenticClient
 
diff --git a/pkg/llmclient/agentic_client.go b/pkg/llmclient/agentic_client.go
index 3d1b8bb8..22f53202 100644
--- a/pkg/llmclient/agentic_client.go
+++ b/pkg/llmclient/agentic_client.go
@@ -8,9 +8,9 @@ import (
 	"time"
 
 	"github.com/grafana/plugin-validator/pkg/llmprovider"
-	"github.com/grafana/plugin-validator/pkg/llmprovider/gemini"
-	"github.com/tmc/langchaingo/llms/anthropic"
-	"github.com/tmc/langchaingo/llms/openai"
+	"github.com/grafana/plugin-validator/pkg/llmprovider/anthropicprovider"
+	"github.com/grafana/plugin-validator/pkg/llmprovider/geminiprovider"
+	"github.com/grafana/plugin-validator/pkg/llmprovider/openaiprovider"
 )
 
 const (
@@ -248,17 +248,8 @@ func (c *agenticClientImpl) runQuestionLoop(
 		}
 
 		// Merge all choices into one unified view for processing.
-		// 
-		// Background: Anthropic's API returns separate content blocks (text, tool_use, thinking)
-		// which go-langchain converts into separate ContentChoice objects. For example, a response
-		// with text + 2 tool calls becomes 3 separate Choices.
-		//
-		// We merge them here to process the complete response, but later we must
-		// split them back into separate AI messages because go-langchain's handleAIMessage() only
-		// serializes Parts[0] when sending back to Anthropic. Putting multiple tool calls in one
-		// message would lose all but the first.
-		//
-		// See docs/anthropic-choices-behavior.md for detailed explanation of this pattern.
+		// Providers return a single Choice, but we merge defensively
+		// in case a provider returns multiple.
 		mergedChoice := llmprovider.Choice{}
 		var allToolCalls []llmprovider.ToolCallPart
 		var contentParts []string
@@ -376,12 +367,9 @@ func (c *agenticClientImpl) runQuestionLoop(
 		// Process each tool call as a separate AI message + tool result pair.
 		// This is the "split" part of the merge-then-split pattern.
 		//
-		// Why: go-langchain's Anthropic handleAIMessage() only serializes Parts[0], so
-		// MessageContent{Parts: [toolCall1, toolCall2]} would lose toolCall2 when sent back.
-		// By creating one AI message per tool call, we ensure all tool calls are properly
-		// serialized. Each tool_use then has its matching tool_result in the following message.
-		//
-		// See docs/anthropic-choices-behavior.md for details on this serialization constraint.
+		// Create one AI message per tool call, each followed by its tool_result.
+		// This keeps the conversation in strict alternating assistant/user order
+		// as required by Anthropic's API.
 		for i, toolCall := range choice.ToolCalls {
 			toolCallsRemaining--
 
@@ -505,30 +493,16 @@ func truncateString(s string, maxLen int) string {
 }
 
 // initProvider initializes the appropriate provider based on configuration.
-// Gemini uses our native provider; Anthropic and OpenAI use langchain adapters
+// initProvider creates the appropriate native provider for the given config.
 // until they are migrated.
 func initProvider(ctx context.Context, opts *AgenticCallOptions) (llmprovider.Provider, error) {
 	switch opts.Provider {
 	case "google":
-		return gemini.New(ctx, opts.APIKey, opts.Model)
+		return geminiprovider.New(ctx, opts.APIKey, opts.Model)
 	case "anthropic":
-		llm, err := anthropic.New(
-			anthropic.WithToken(opts.APIKey),
-			anthropic.WithModel(opts.Model),
-		)
-		if err != nil {
-			return nil, err
-		}
-		return llmprovider.NewLangchainAdapter(llm), nil
+		return anthropicprovider.New(opts.APIKey, opts.Model)
 	case "openai":
-		llm, err := openai.New(
-			openai.WithToken(opts.APIKey),
-			openai.WithModel(opts.Model),
-		)
-		if err != nil {
-			return nil, err
-		}
-		return llmprovider.NewLangchainAdapter(llm), nil
+		return openaiprovider.New(opts.APIKey, opts.Model)
 	default:
 		return nil, fmt.Errorf(
 			"unsupported provider: %s (supported: google, anthropic, openai)",
diff --git a/pkg/llmprovider/anthropicprovider/client.go b/pkg/llmprovider/anthropicprovider/client.go
new file mode 100644
index 00000000..5eeed004
--- /dev/null
+++ b/pkg/llmprovider/anthropicprovider/client.go
@@ -0,0 +1,297 @@
+// Package anthropicprovider implements the llmprovider.Provider interface
+// using the official Anthropic Go SDK (github.com/anthropics/anthropic-sdk-go).
+package anthropicprovider
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+
+	"github.com/anthropics/anthropic-sdk-go"
+	"github.com/anthropics/anthropic-sdk-go/option"
+	"github.com/grafana/plugin-validator/pkg/llmprovider"
+	"github.com/grafana/plugin-validator/pkg/logme"
+)
+
+// Client implements llmprovider.Provider for Anthropic.
+type Client struct {
+	client    *anthropic.Client
+	modelName string
+}
+
+// New creates an Anthropic provider client.
+func New(apiKey, modelName string) (*Client, error) {
+	if apiKey == "" {
+		return nil, fmt.Errorf("anthropic: API key is required")
+	}
+	if modelName == "" {
+		return nil, fmt.Errorf("anthropic: model name is required")
+	}
+
+	client := anthropic.NewClient(option.WithAPIKey(apiKey))
+
+	return &Client{client: &client, modelName: modelName}, nil
+}
+
+// GenerateContent sends messages to Anthropic and returns the response.
+func (c *Client) GenerateContent(
+	ctx context.Context,
+	messages []llmprovider.Message,
+	options ...llmprovider.CallOption,
+) (*llmprovider.Response, error) {
+	opts := &llmprovider.CallOptions{}
+	for _, o := range options {
+		o(opts)
+	}
+
+	system, msgs := extractSystemAndMessages(messages)
+
+	maxTokens := int64(4096)
+	if opts.MaxTokens > 0 {
+		maxTokens = int64(opts.MaxTokens)
+	}
+
+	params := anthropic.MessageNewParams{
+		Model:     anthropic.Model(c.modelName),
+		MaxTokens: maxTokens,
+		Messages:  msgs,
+	}
+
+	if len(system) > 0 {
+		params.System = system
+	}
+
+	if opts.Temperature > 0 {
+		params.Temperature = anthropic.Float(opts.Temperature)
+	}
+	if opts.TopP > 0 {
+		params.TopP = anthropic.Float(opts.TopP)
+	}
+	if len(opts.StopWords) > 0 {
+		params.StopSequences = opts.StopWords
+	}
+
+	if len(opts.Tools) > 0 {
+		params.Tools = toAnthropicTools(opts.Tools)
+	}
+
+	resp, err := c.client.Messages.New(ctx, params)
+	if err != nil {
+		return nil, fmt.Errorf("anthropic: API error: %w", err)
+	}
+
+	return fromAnthropicResponse(resp), nil
+}
+
+// --- Message conversion: llmprovider → anthropic ---
+
+// extractSystemAndMessages separates system messages (which go in a top-level
+// param) from conversation messages.
+func extractSystemAndMessages(messages []llmprovider.Message) ([]anthropic.TextBlockParam, []anthropic.MessageParam) {
+	var system []anthropic.TextBlockParam
+	var result []anthropic.MessageParam
+
+	for _, msg := range messages {
+		switch msg.Role {
+		case llmprovider.RoleSystem:
+			text := extractText(msg.Parts)
+			if text != "" {
+				system = append(system, anthropic.TextBlockParam{Text: text})
+			}
+
+		case llmprovider.RoleHuman:
+			blocks := toUserBlocks(msg.Parts)
+			if len(blocks) > 0 {
+				result = append(result, anthropic.NewUserMessage(blocks...))
+			}
+
+		case llmprovider.RoleAI:
+			blocks := toAssistantBlocks(msg.Parts)
+			if len(blocks) > 0 {
+				result = append(result, anthropic.NewAssistantMessage(blocks...))
+			}
+
+		case llmprovider.RoleTool:
+			// Anthropic sends tool results as user messages
+			blocks := toToolResultBlocks(msg.Parts)
+			if len(blocks) > 0 {
+				result = append(result, anthropic.NewUserMessage(blocks...))
+			}
+		}
+	}
+
+	return system, result
+}
+
+func toUserBlocks(parts []llmprovider.Part) []anthropic.ContentBlockParamUnion {
+	var blocks []anthropic.ContentBlockParamUnion
+	for _, p := range parts {
+		switch v := p.(type) {
+		case llmprovider.TextPart:
+			blocks = append(blocks, anthropic.NewTextBlock(v.Text))
+		case llmprovider.ToolResultPart:
+			blocks = append(blocks, anthropic.NewToolResultBlock(v.ToolCallID, v.Content, false))
+		}
+	}
+	return blocks
+}
+
+func toAssistantBlocks(parts []llmprovider.Part) []anthropic.ContentBlockParamUnion {
+	var blocks []anthropic.ContentBlockParamUnion
+	for _, p := range parts {
+		switch v := p.(type) {
+		case llmprovider.TextPart:
+			blocks = append(blocks, anthropic.NewTextBlock(v.Text))
+		case llmprovider.ToolCallPart:
+			// Parse the arguments string back to any for the input field
+			var input any
+			if err := json.Unmarshal([]byte(v.Arguments), &input); err != nil {
+				input = map[string]any{}
+			}
+			blocks = append(blocks, anthropic.NewToolUseBlock(v.ID, input, v.Name))
+		case llmprovider.ThinkingPart:
+			if v.Encrypted != "" {
+				blocks = append(blocks, anthropic.NewRedactedThinkingBlock(v.Encrypted))
+			} else if v.Text != "" {
+				blocks = append(blocks, anthropic.NewThinkingBlock(v.Signature, v.Text))
+			}
+		}
+	}
+	return blocks
+}
+
+func toToolResultBlocks(parts []llmprovider.Part) []anthropic.ContentBlockParamUnion {
+	var blocks []anthropic.ContentBlockParamUnion
+	for _, p := range parts {
+		if tr, ok := p.(llmprovider.ToolResultPart); ok {
+			blocks = append(blocks, anthropic.NewToolResultBlock(tr.ToolCallID, tr.Content, false))
+		}
+	}
+	return blocks
+}
+
+func extractText(parts []llmprovider.Part) string {
+	var text string
+	for _, p := range parts {
+		if tp, ok := p.(llmprovider.TextPart); ok {
+			if text != "" {
+				text += "\n"
+			}
+			text += tp.Text
+		}
+	}
+	return text
+}
+
+// --- Tool conversion ---
+
+func toAnthropicTools(tools []llmprovider.Tool) []anthropic.ToolUnionParam {
+	var result []anthropic.ToolUnionParam
+	for _, tool := range tools {
+		if tool.Function == nil {
+			continue
+		}
+
+		param := anthropic.ToolParam{
+			Name:        tool.Function.Name,
+			Description: anthropic.String(tool.Function.Description),
+		}
+
+		// Convert parameters to ToolInputSchemaParam
+		if tool.Function.Parameters != nil {
+			schema := toInputSchema(tool.Function.Parameters)
+			param.InputSchema = schema
+		}
+
+		result = append(result, anthropic.ToolUnionParam{OfTool: &param})
+	}
+	return result
+}
+
+func toInputSchema(params any) anthropic.ToolInputSchemaParam {
+	schema := anthropic.ToolInputSchemaParam{}
+
+	var m map[string]any
+	switch p := params.(type) {
+	case map[string]any:
+		m = p
+	default:
+		data, err := json.Marshal(p)
+		if err != nil {
+			return schema
+		}
+		if err := json.Unmarshal(data, &m); err != nil {
+			return schema
+		}
+	}
+
+	if props, ok := m["properties"]; ok {
+		schema.Properties = props
+	}
+	if req, ok := m["required"].([]any); ok {
+		for _, r := range req {
+			if s, ok := r.(string); ok {
+				schema.Required = append(schema.Required, s)
+			}
+		}
+	}
+
+	return schema
+}
+
+// --- Response conversion: anthropic → llmprovider ---
+
+func fromAnthropicResponse(resp *anthropic.Message) *llmprovider.Response {
+	choice := &llmprovider.Choice{
+		StopReason:     string(resp.StopReason),
+		GenerationInfo: make(map[string]any),
+	}
+
+	for _, block := range resp.Content {
+		switch v := block.AsAny().(type) {
+		case anthropic.TextBlock:
+			if choice.Content != "" {
+				choice.Content += "\n"
+			}
+			choice.Content += v.Text
+
+		case anthropic.ThinkingBlock:
+			logme.LLMLog("anthropic: thinking block (signature=%s, len=%d)", v.Signature[:min(20, len(v.Signature))], len(v.Thinking))
+			choice.Thinking = append(choice.Thinking, llmprovider.ThinkingPart{
+				Text:      v.Thinking,
+				Signature: v.Signature,
+			})
+
+		case anthropic.RedactedThinkingBlock:
+			logme.LLMLog("anthropic: redacted thinking block (data_len=%d)", len(v.Data))
+			choice.Thinking = append(choice.Thinking, llmprovider.ThinkingPart{
+				Encrypted: v.Data,
+			})
+
+		case anthropic.ToolUseBlock:
+			args := string(v.Input)
+			logme.LLMLog("anthropic: tool call: name=%s id=%s", v.Name, v.ID)
+			choice.ToolCalls = append(choice.ToolCalls, llmprovider.ToolCallPart{
+				ID:        v.ID,
+				Name:      v.Name,
+				Arguments: args,
+			})
+		}
+	}
+
+	choice.GenerationInfo["usage"] = map[string]any{
+		"input_tokens":  resp.Usage.InputTokens,
+		"output_tokens": resp.Usage.OutputTokens,
+	}
+
+	return &llmprovider.Response{
+		Choices: []*llmprovider.Choice{choice},
+	}
+}
+
+func min(a, b int) int {
+	if a < b {
+		return a
+	}
+	return b
+}
diff --git a/pkg/llmprovider/gemini/client.go b/pkg/llmprovider/geminiprovider/client.go
similarity index 99%
rename from pkg/llmprovider/gemini/client.go
rename to pkg/llmprovider/geminiprovider/client.go
index 2ffa9f96..f7ad596a 100644
--- a/pkg/llmprovider/gemini/client.go
+++ b/pkg/llmprovider/geminiprovider/client.go
@@ -1,7 +1,7 @@
 // Package gemini implements the llmprovider.Provider interface using the
 // Google GenAI SDK (google.golang.org/genai).  It properly preserves
 // thought_signatures for Gemini 3.x models.
-package gemini
+package geminiprovider
 
 import (
 	"context"
diff --git a/pkg/llmprovider/langchain.go b/pkg/llmprovider/langchain.go
deleted file mode 100644
index 75324935..00000000
--- a/pkg/llmprovider/langchain.go
+++ /dev/null
@@ -1,250 +0,0 @@
-package llmprovider
-
-import "github.com/tmc/langchaingo/llms"
-
-// This file provides conversion between llmprovider types and langchain
-// llms types. It exists only for the transition period while we migrate
-// providers one at a time. Once all providers use llmprovider types
-// directly, this file can be deleted.
-
-// --- Role conversion ---
-
-func RoleFromLangchain(r llms.ChatMessageType) Role {
-	switch r {
-	case llms.ChatMessageTypeSystem:
-		return RoleSystem
-	case llms.ChatMessageTypeHuman:
-		return RoleHuman
-	case llms.ChatMessageTypeAI:
-		return RoleAI
-	case llms.ChatMessageTypeTool:
-		return RoleTool
-	default:
-		return RoleHuman
-	}
-}
-
-func (r Role) ToLangchain() llms.ChatMessageType {
-	switch r {
-	case RoleSystem:
-		return llms.ChatMessageTypeSystem
-	case RoleHuman:
-		return llms.ChatMessageTypeHuman
-	case RoleAI:
-		return llms.ChatMessageTypeAI
-	case RoleTool:
-		return llms.ChatMessageTypeTool
-	default:
-		return llms.ChatMessageTypeHuman
-	}
-}
-
-// --- Message conversion ---
-
-func MessageFromLangchain(lc llms.MessageContent) Message {
-	msg := Message{
-		Role:  RoleFromLangchain(lc.Role),
-		Parts: make([]Part, 0, len(lc.Parts)),
-	}
-	for _, p := range lc.Parts {
-		msg.Parts = append(msg.Parts, PartFromLangchain(p))
-	}
-	return msg
-}
-
-func MessagesFromLangchain(lc []llms.MessageContent) []Message {
-	msgs := make([]Message, len(lc))
-	for i, m := range lc {
-		msgs[i] = MessageFromLangchain(m)
-	}
-	return msgs
-}
-
-func (m Message) ToLangchain() llms.MessageContent {
-	lc := llms.MessageContent{
-		Role:  m.Role.ToLangchain(),
-		Parts: make([]llms.ContentPart, 0, len(m.Parts)),
-	}
-	for _, p := range m.Parts {
-		lc.Parts = append(lc.Parts, PartToLangchain(p))
-	}
-	return lc
-}
-
-func MessagesToLangchain(msgs []Message) []llms.MessageContent {
-	lc := make([]llms.MessageContent, len(msgs))
-	for i, m := range msgs {
-		lc[i] = m.ToLangchain()
-	}
-	return lc
-}
-
-// --- Part conversion ---
-
-func PartFromLangchain(p llms.ContentPart) Part {
-	switch v := p.(type) {
-	case llms.TextContent:
-		return TextPart{Text: v.Text}
-	case llms.ToolCall:
-		tc := ToolCallPart{
-			ID:   v.ID,
-			Name: v.FunctionCall.Name,
-		}
-		if v.FunctionCall != nil {
-			tc.Arguments = v.FunctionCall.Arguments
-		}
-		return tc
-	case llms.ToolCallResponse:
-		return ToolResultPart{
-			ToolCallID: v.ToolCallID,
-			Name:       v.Name,
-			Content:    v.Content,
-		}
-	default:
-		// Unsupported part types become empty text
-		return TextPart{}
-	}
-}
-
-func PartToLangchain(p Part) llms.ContentPart {
-	switch v := p.(type) {
-	case TextPart:
-		return llms.TextContent{Text: v.Text}
-	case ToolCallPart:
-		return llms.ToolCall{
-			ID:   v.ID,
-			Type: "function",
-			FunctionCall: &llms.FunctionCall{
-				Name:      v.Name,
-				Arguments: v.Arguments,
-			},
-		}
-	case ToolResultPart:
-		return llms.ToolCallResponse{
-			ToolCallID: v.ToolCallID,
-			Name:       v.Name,
-			Content:    v.Content,
-		}
-	case ThinkingPart:
-		// Langchain has no equivalent; drop thinking parts
-		return llms.TextContent{}
-	default:
-		return llms.TextContent{}
-	}
-}
-
-// --- Response conversion ---
-
-func ResponseFromLangchain(lc *llms.ContentResponse) *Response {
-	if lc == nil {
-		return nil
-	}
-	resp := &Response{
-		Choices: make([]*Choice, len(lc.Choices)),
-	}
-	for i, c := range lc.Choices {
-		resp.Choices[i] = ChoiceFromLangchain(c)
-	}
-	return resp
-}
-
-func ChoiceFromLangchain(lc *llms.ContentChoice) *Choice {
-	if lc == nil {
-		return nil
-	}
-	ch := &Choice{
-		Content:        lc.Content,
-		StopReason:     lc.StopReason,
-		GenerationInfo: lc.GenerationInfo,
-	}
-	for _, tc := range lc.ToolCalls {
-		ch.ToolCalls = append(ch.ToolCalls, ToolCallPart{
-			ID:   tc.ID,
-			Name: tc.FunctionCall.Name,
-			Arguments: func() string {
-				if tc.FunctionCall != nil {
-					return tc.FunctionCall.Arguments
-				}
-				return ""
-			}(),
-		})
-	}
-	return ch
-}
-
-func (r *Response) ToLangchain() *llms.ContentResponse {
-	if r == nil {
-		return nil
-	}
-	lc := &llms.ContentResponse{
-		Choices: make([]*llms.ContentChoice, len(r.Choices)),
-	}
-	for i, c := range r.Choices {
-		lc.Choices[i] = c.ToLangchain()
-	}
-	return lc
-}
-
-func (c *Choice) ToLangchain() *llms.ContentChoice {
-	if c == nil {
-		return nil
-	}
-	lc := &llms.ContentChoice{
-		Content:        c.Content,
-		StopReason:     c.StopReason,
-		GenerationInfo: c.GenerationInfo,
-	}
-	for _, tc := range c.ToolCalls {
-		lc.ToolCalls = append(lc.ToolCalls, llms.ToolCall{
-			ID:   tc.ID,
-			Type: "function",
-			FunctionCall: &llms.FunctionCall{
-				Name:      tc.Name,
-				Arguments: tc.Arguments,
-			},
-		})
-	}
-	return lc
-}
-
-// --- Tool conversion ---
-
-func ToolFromLangchain(t llms.Tool) Tool {
-	tool := Tool{Type: t.Type}
-	if t.Function != nil {
-		tool.Function = &FunctionDef{
-			Name:        t.Function.Name,
-			Description: t.Function.Description,
-			Parameters:  t.Function.Parameters,
-		}
-	}
-	return tool
-}
-
-func ToolsFromLangchain(tools []llms.Tool) []Tool {
-	out := make([]Tool, len(tools))
-	for i, t := range tools {
-		out[i] = ToolFromLangchain(t)
-	}
-	return out
-}
-
-func (t Tool) ToLangchain() llms.Tool {
-	lc := llms.Tool{Type: t.Type}
-	if t.Function != nil {
-		lc.Function = &llms.FunctionDefinition{
-			Name:        t.Function.Name,
-			Description: t.Function.Description,
-			Parameters:  t.Function.Parameters,
-		}
-	}
-	return lc
-}
-
-func ToolsToLangchain(tools []Tool) []llms.Tool {
-	out := make([]llms.Tool, len(tools))
-	for i, t := range tools {
-		out[i] = t.ToLangchain()
-	}
-	return out
-}
diff --git a/pkg/llmprovider/langchain_adapter.go b/pkg/llmprovider/langchain_adapter.go
deleted file mode 100644
index 20ae105c..00000000
--- a/pkg/llmprovider/langchain_adapter.go
+++ /dev/null
@@ -1,69 +0,0 @@
-package llmprovider
-
-import (
-	"context"
-
-	"github.com/tmc/langchaingo/llms"
-)
-
-// LangchainAdapter wraps an llms.Model to implement the Provider interface.
-// This allows existing langchain-based providers (Anthropic, OpenAI) to be
-// used alongside native providers during the migration.
-type LangchainAdapter struct {
-	LLM llms.Model
-}
-
-func NewLangchainAdapter(llm llms.Model) *LangchainAdapter {
-	return &LangchainAdapter{LLM: llm}
-}
-
-func (a *LangchainAdapter) GenerateContent(
-	ctx context.Context,
-	messages []Message,
-	options ...CallOption,
-) (*Response, error) {
-	// Convert our messages to langchain messages
-	lcMessages := MessagesToLangchain(messages)
-
-	// Convert our options to langchain options
-	opts := &CallOptions{}
-	for _, o := range options {
-		o(opts)
-	}
-	lcOpts := toLangchainCallOptions(opts)
-
-	// Call the langchain model
-	lcResp, err := a.LLM.GenerateContent(ctx, lcMessages, lcOpts...)
-	if err != nil {
-		return nil, err
-	}
-
-	// Convert response back to our types
-	return ResponseFromLangchain(lcResp), nil
-}
-
-// toLangchainCallOptions converts our CallOptions to langchain CallOptions.
-func toLangchainCallOptions(opts *CallOptions) []llms.CallOption {
-	var lcOpts []llms.CallOption
-
-	if opts.MaxTokens > 0 {
-		lcOpts = append(lcOpts, llms.WithMaxTokens(opts.MaxTokens))
-	}
-	if opts.Temperature > 0 {
-		lcOpts = append(lcOpts, llms.WithTemperature(opts.Temperature))
-	}
-	if opts.TopP > 0 {
-		lcOpts = append(lcOpts, llms.WithTopP(opts.TopP))
-	}
-	if opts.TopK > 0 {
-		lcOpts = append(lcOpts, llms.WithTopK(opts.TopK))
-	}
-	if len(opts.StopWords) > 0 {
-		lcOpts = append(lcOpts, llms.WithStopWords(opts.StopWords))
-	}
-	if len(opts.Tools) > 0 {
-		lcOpts = append(lcOpts, llms.WithTools(ToolsToLangchain(opts.Tools)))
-	}
-
-	return lcOpts
-}
diff --git a/pkg/llmprovider/openaiprovider/client.go b/pkg/llmprovider/openaiprovider/client.go
new file mode 100644
index 00000000..4dd782cc
--- /dev/null
+++ b/pkg/llmprovider/openaiprovider/client.go
@@ -0,0 +1,221 @@
+// Package openai implements the llmprovider.Provider interface using the
+// official OpenAI Go SDK (github.com/openai/openai-go).
+package openaiprovider
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+
+	"github.com/grafana/plugin-validator/pkg/llmprovider"
+	"github.com/grafana/plugin-validator/pkg/logme"
+	"github.com/openai/openai-go"
+	"github.com/openai/openai-go/option"
+	"github.com/openai/openai-go/shared"
+)
+
+// Client implements llmprovider.Provider for OpenAI.
+type Client struct {
+	client    *openai.Client
+	modelName string
+}
+
+// New creates an OpenAI provider client.
+func New(apiKey, modelName string) (*Client, error) {
+	if apiKey == "" {
+		return nil, fmt.Errorf("openai: API key is required")
+	}
+	if modelName == "" {
+		return nil, fmt.Errorf("openai: model name is required")
+	}
+
+	client := openai.NewClient(option.WithAPIKey(apiKey))
+
+	return &Client{client: &client, modelName: modelName}, nil
+}
+
+// GenerateContent sends messages to OpenAI and returns the response.
+func (c *Client) GenerateContent(
+	ctx context.Context,
+	messages []llmprovider.Message,
+	options ...llmprovider.CallOption,
+) (*llmprovider.Response, error) {
+	opts := &llmprovider.CallOptions{}
+	for _, o := range options {
+		o(opts)
+	}
+
+	params := openai.ChatCompletionNewParams{
+		Model:    shared.ChatModel(c.modelName),
+		Messages: toOpenAIMessages(messages),
+	}
+
+	if opts.Temperature > 0 {
+		params.Temperature = openai.Float(opts.Temperature)
+	}
+	if opts.MaxTokens > 0 {
+		params.MaxCompletionTokens = openai.Int(int64(opts.MaxTokens))
+	}
+	if opts.TopP > 0 {
+		params.TopP = openai.Float(opts.TopP)
+	}
+	if len(opts.StopWords) > 0 {
+		params.Stop = openai.ChatCompletionNewParamsStopUnion{
+			OfStringArray: opts.StopWords,
+		}
+	}
+
+	if len(opts.Tools) > 0 {
+		params.Tools = toOpenAITools(opts.Tools)
+	}
+
+	resp, err := c.client.Chat.Completions.New(ctx, params)
+	if err != nil {
+		return nil, fmt.Errorf("openai: API error: %w", err)
+	}
+
+	return fromOpenAIResponse(resp), nil
+}
+
+// --- Message conversion: llmprovider → openai ---
+
+func toOpenAIMessages(messages []llmprovider.Message) []openai.ChatCompletionMessageParamUnion {
+	var result []openai.ChatCompletionMessageParamUnion
+
+	for _, msg := range messages {
+		switch msg.Role {
+		case llmprovider.RoleSystem:
+			text := extractText(msg.Parts)
+			result = append(result, openai.SystemMessage(text))
+
+		case llmprovider.RoleHuman:
+			text := extractText(msg.Parts)
+			result = append(result, openai.UserMessage(text))
+
+		case llmprovider.RoleAI:
+			result = append(result, toAssistantMessage(msg))
+
+		case llmprovider.RoleTool:
+			for _, part := range msg.Parts {
+				if tr, ok := part.(llmprovider.ToolResultPart); ok {
+					result = append(result, openai.ToolMessage(tr.Content, tr.ToolCallID))
+				}
+			}
+		}
+	}
+
+	return result
+}
+
+func toAssistantMessage(msg llmprovider.Message) openai.ChatCompletionMessageParamUnion {
+	text := extractText(msg.Parts)
+
+	var toolCalls []openai.ChatCompletionMessageToolCallParam
+	for _, part := range msg.Parts {
+		if tc, ok := part.(llmprovider.ToolCallPart); ok {
+			toolCalls = append(toolCalls, openai.ChatCompletionMessageToolCallParam{
+				ID: tc.ID,
+				Function: openai.ChatCompletionMessageToolCallFunctionParam{
+					Name:      tc.Name,
+					Arguments: tc.Arguments,
+				},
+			})
+		}
+	}
+
+	asst := openai.ChatCompletionAssistantMessageParam{}
+	if text != "" {
+		asst.Content.OfString = openai.String(text)
+	}
+	if len(toolCalls) > 0 {
+		asst.ToolCalls = toolCalls
+	}
+
+	return openai.ChatCompletionMessageParamUnion{OfAssistant: &asst}
+}
+
+func extractText(parts []llmprovider.Part) string {
+	var text string
+	for _, p := range parts {
+		if tp, ok := p.(llmprovider.TextPart); ok {
+			if text != "" {
+				text += "\n"
+			}
+			text += tp.Text
+		}
+	}
+	return text
+}
+
+// --- Tool conversion ---
+
+func toOpenAITools(tools []llmprovider.Tool) []openai.ChatCompletionToolParam {
+	var result []openai.ChatCompletionToolParam
+	for _, tool := range tools {
+		if tool.Function == nil {
+			continue
+		}
+
+		param := openai.ChatCompletionToolParam{
+			Function: shared.FunctionDefinitionParam{
+				Name:        tool.Function.Name,
+				Description: openai.String(tool.Function.Description),
+			},
+		}
+
+		// Convert parameters to FunctionParameters (map[string]any)
+		if tool.Function.Parameters != nil {
+			switch p := tool.Function.Parameters.(type) {
+			case map[string]any:
+				param.Function.Parameters = shared.FunctionParameters(p)
+			default:
+				// Marshal and unmarshal to get map[string]any
+				data, err := json.Marshal(p)
+				if err == nil {
+					var m map[string]any
+					if json.Unmarshal(data, &m) == nil {
+						param.Function.Parameters = shared.FunctionParameters(m)
+					}
+				}
+			}
+		}
+
+		result = append(result, param)
+	}
+	return result
+}
+
+// --- Response conversion: openai → llmprovider ---
+
+func fromOpenAIResponse(resp *openai.ChatCompletion) *llmprovider.Response {
+	result := &llmprovider.Response{
+		Choices: make([]*llmprovider.Choice, 0, len(resp.Choices)),
+	}
+
+	for _, c := range resp.Choices {
+		choice := &llmprovider.Choice{
+			Content:        c.Message.Content,
+			StopReason:     c.FinishReason,
+			GenerationInfo: make(map[string]any),
+		}
+
+		for _, tc := range c.Message.ToolCalls {
+			logme.LLMLog("openai: tool call: name=%s id=%s", tc.Function.Name, tc.ID)
+			choice.ToolCalls = append(choice.ToolCalls, llmprovider.ToolCallPart{
+				ID:        tc.ID,
+				Name:      tc.Function.Name,
+				Arguments: tc.Function.Arguments,
+			})
+		}
+
+		choice.GenerationInfo["usage"] = map[string]any{
+			"prompt_tokens":     resp.Usage.PromptTokens,
+			"completion_tokens": resp.Usage.CompletionTokens,
+			"total_tokens":      resp.Usage.TotalTokens,
+		}
+
+		result.Choices = append(result.Choices, choice)
+	}
+
+	return result
+}
diff --git a/pkg/llmprovider/types.go b/pkg/llmprovider/types.go
index e3b80056..d6a592e8 100644
--- a/pkg/llmprovider/types.go
+++ b/pkg/llmprovider/types.go
@@ -1,11 +1,8 @@
 // Package llmprovider defines a unified interface for LLM providers.
 //
-// These types mirror github.com/tmc/langchaingo/llms but add support for
-// provider-specific features like Gemini thought_signatures, Anthropic
-// thinking blocks, and OpenAI encrypted reasoning content.
-//
-// During migration, conversion helpers allow gradual adoption: one provider
-// can be migrated at a time while the rest continue using langchain types.
+// Each provider (Gemini, Anthropic, OpenAI) has a native SDK implementation
+// that supports provider-specific features like Gemini thought_signatures,
+// Anthropic thinking blocks, and OpenAI encrypted reasoning content.
 package llmprovider
 
 import "context"

From 300c064ba74d70c57cdfb80b050a44e169c44a91 Mon Sep 17 00:00:00 2001
From: Esteban Beltran <sergies@gmail.com>
Date: Wed, 4 Mar 2026 16:42:30 +0100
Subject: [PATCH 07/10] remve commetn

---
 pkg/llmclient/agentic_client.go | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pkg/llmclient/agentic_client.go b/pkg/llmclient/agentic_client.go
index 22f53202..eae435bd 100644
--- a/pkg/llmclient/agentic_client.go
+++ b/pkg/llmclient/agentic_client.go
@@ -492,9 +492,7 @@ func truncateString(s string, maxLen int) string {
 	return s[:maxLen] + "..."
 }
 
-// initProvider initializes the appropriate provider based on configuration.
 // initProvider creates the appropriate native provider for the given config.
-// until they are migrated.
 func initProvider(ctx context.Context, opts *AgenticCallOptions) (llmprovider.Provider, error) {
 	switch opts.Provider {
 	case "google":

From 901c4eb08656ee79af80723153fbed10ba5ce0dc Mon Sep 17 00:00:00 2001
From: Esteban Beltran <sergies@gmail.com>
Date: Wed, 4 Mar 2026 17:02:54 +0100
Subject: [PATCH 08/10] improve multi tool calling

---
 pkg/llmclient/agentic_client.go | 133 ++++++++++++--------------------
 1 file changed, 48 insertions(+), 85 deletions(-)

diff --git a/pkg/llmclient/agentic_client.go b/pkg/llmclient/agentic_client.go
index eae435bd..7c4b04dc 100644
--- a/pkg/llmclient/agentic_client.go
+++ b/pkg/llmclient/agentic_client.go
@@ -4,7 +4,6 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
-	"strings"
 	"time"
 
 	"github.com/grafana/plugin-validator/pkg/llmprovider"
@@ -247,49 +246,12 @@ func (c *agenticClientImpl) runQuestionLoop(
 			return messages, nil, fmt.Errorf("no response from LLM")
 		}
 
-		// Merge all choices into one unified view for processing.
-		// Providers return a single Choice, but we merge defensively
-		// in case a provider returns multiple.
-		mergedChoice := llmprovider.Choice{}
-		var allToolCalls []llmprovider.ToolCallPart
-		var contentParts []string
-
-		for i, ch := range resp.Choices {
-			debugLog("AgenticClient: processing choice %d: Content=%q, ToolCalls=%d, Thinking=%d",
-				i, truncateString(ch.Content, 100), len(ch.ToolCalls), len(ch.Thinking))
-			for j, t := range ch.Thinking {
-				debugLog("AgenticClient:   thinking[%d]: text=%q sig=%v",
-					j, truncateString(t.Text, 150), t.Signature != "")
-			}
-
-			if ch.Content != "" {
-				contentParts = append(contentParts, ch.Content)
-			}
-			if len(ch.ToolCalls) > 0 {
-				allToolCalls = append(allToolCalls, ch.ToolCalls...)
-			}
-			// Use StopReason from first non-empty one
-			if mergedChoice.StopReason == "" && ch.StopReason != "" {
-				mergedChoice.StopReason = ch.StopReason
-			}
-			// Use GenerationInfo from first choice
-			if i == 0 {
-				mergedChoice.GenerationInfo = ch.GenerationInfo
-			}
-		}
-
-		// Build merged choice — join all content parts so nothing is lost
-		// when Anthropic returns multiple text blocks (e.g. thinking + response).
-		if len(contentParts) > 0 {
-			mergedChoice.Content = strings.Join(contentParts, "\n")
-		}
-		mergedChoice.ToolCalls = allToolCalls
-
-		choice := mergedChoice
-		debugLog("AgenticClient: merged choice - Content=%q, ToolCalls=%d",
-			truncateString(choice.Content, 200), len(choice.ToolCalls))
-		if choice.Content != "" {
-			debugLog("AgenticClient: AI message: %s", truncateString(choice.Content, 200))
+		choice := resp.Choices[0]
+		debugLog("AgenticClient: choice - Content=%q, ToolCalls=%d, Thinking=%d",
+			truncateString(choice.Content, 200), len(choice.ToolCalls), len(choice.Thinking))
+		for j, t := range choice.Thinking {
+			debugLog("AgenticClient:   thinking[%d]: text=%q sig=%v",
+				j, truncateString(t.Text, 150), t.Signature != "")
 		}
 
 		// If no tool calls, check if we should nudge the agent
@@ -325,6 +287,23 @@ func (c *agenticClientImpl) runQuestionLoop(
 		// Reset consecutive no-tool counter when tools are used
 		consecutiveNoTools = 0
 
+		// Build the assistant message with all parts from the response:
+		// thinking blocks, text content, and tool calls.
+		var aiParts []llmprovider.Part
+		for _, t := range choice.Thinking {
+			aiParts = append(aiParts, t)
+		}
+		if choice.Content != "" {
+			aiParts = append(aiParts, llmprovider.TextPart{Text: choice.Content})
+		}
+		for _, tc := range choice.ToolCalls {
+			aiParts = append(aiParts, tc)
+		}
+		messages = append(messages, llmprovider.Message{
+			Role:  llmprovider.RoleAI,
+			Parts: aiParts,
+		})
+
 		// Validate submit_answer is called alone
 		hasSubmitAnswer := false
 		for _, toolCall := range choice.ToolCalls {
@@ -335,57 +314,41 @@ func (c *agenticClientImpl) runQuestionLoop(
 		}
 		if hasSubmitAnswer && len(choice.ToolCalls) > 1 {
 			debugLog("AgenticClient: submit_answer called with other tools - rejecting all")
-			// Add a single AI message with ALL tool calls so every
-			// tool_result below has a matching tool_use in the preceding
-			// assistant message.
-			aiParts := make([]llmprovider.Part, len(choice.ToolCalls))
-			for i, tc := range choice.ToolCalls {
-				aiParts[i] = tc
-			}
-			aiMessage := llmprovider.Message{
-				Role:  llmprovider.RoleAI,
-				Parts: aiParts,
-			}
-			messages = append(messages, aiMessage)
+			var resultParts []llmprovider.Part
 			for _, toolCall := range choice.ToolCalls {
 				toolCallsRemaining--
-				errorResponse := llmprovider.Message{
-					Role: llmprovider.RoleTool,
-					Parts: []llmprovider.Part{
-						llmprovider.ToolResultPart{
-							ToolCallID: toolCall.ID,
-							Name:       toolCall.Name,
-							Content:    submitAnswerAloneError,
-						},
-					},
-				}
-				messages = append(messages, errorResponse)
+				resultParts = append(resultParts, llmprovider.ToolResultPart{
+					ToolCallID: toolCall.ID,
+					Name:       toolCall.Name,
+					Content:    submitAnswerAloneError,
+				})
 			}
+			messages = append(messages, llmprovider.Message{
+				Role:  llmprovider.RoleTool,
+				Parts: resultParts,
+			})
 			continue
 		}
 
-		// Process each tool call as a separate AI message + tool result pair.
-		// This is the "split" part of the merge-then-split pattern.
-		//
-		// Create one AI message per tool call, each followed by its tool_result.
-		// This keeps the conversation in strict alternating assistant/user order
-		// as required by Anthropic's API.
+		// Execute tool calls and collect results into a single tool message.
+		var resultParts []llmprovider.Part
+		var answer *AnswerSchema
 		for i, toolCall := range choice.ToolCalls {
 			toolCallsRemaining--
-
-			aiMessage := llmprovider.Message{
-				Role:  llmprovider.RoleAI,
-				Parts: []llmprovider.Part{toolCall},
-			}
-			messages = append(messages, aiMessage)
-
-			response, answer := c.processToolCall(toolCall, i, len(choice.ToolCalls))
-			messages = append(messages, response)
-			if answer != nil {
-				debugLog("AgenticClient: received answer for question %d", questionIndex+1)
-				return messages, answer, nil
+			response, ans := c.processToolCall(toolCall, i, len(choice.ToolCalls))
+			resultParts = append(resultParts, response.Parts...)
+			if ans != nil {
+				answer = ans
 			}
 		}
+		messages = append(messages, llmprovider.Message{
+			Role:  llmprovider.RoleTool,
+			Parts: resultParts,
+		})
+		if answer != nil {
+			debugLog("AgenticClient: received answer for question %d", questionIndex+1)
+			return messages, answer, nil
+		}
 	}
 
 	// Budget exhausted without answer

From 53adf156ca3b4c044381bf87cd597d5d750645e8 Mon Sep 17 00:00:00 2001
From: Esteban Beltran <sergies@gmail.com>
Date: Thu, 5 Mar 2026 09:19:34 +0100
Subject: [PATCH 09/10] pass tools on llm call

---
 pkg/llmclient/agentic_client.go |  64 ++++--------
 pkg/llmclient/agentic_tools.go  | 174 ++++++++++++++++----------------
 pkg/llmclient/agentic_types.go  | 118 ++++++++++++++++++++++
 3 files changed, 225 insertions(+), 131 deletions(-)
 create mode 100644 pkg/llmclient/agentic_types.go

diff --git a/pkg/llmclient/agentic_client.go b/pkg/llmclient/agentic_client.go
index 7c4b04dc..9a6cd7a4 100644
--- a/pkg/llmclient/agentic_client.go
+++ b/pkg/llmclient/agentic_client.go
@@ -19,24 +19,6 @@ const (
 	maxConsecutiveNoTools     = 5
 	retryDelay                = 2 * time.Second
 
-	systemPrompt = `You are a code analysis assistant. You have tools to explore code in a repository.
-
-AVAILABLE TOOLS:
-- list_directory: List files at a path. Use "." for root.
-- read_file: Read a file's contents. This is your primary tool for understanding code.
-- grep: Search for a pattern across files.
-- git: Run read-only git commands (log, show, diff, status, etc.)
-- submit_answer: Submit your answers.
-
-STRATEGY:
-1. Use list_directory to see what files exist
-2. Use read_file to read the source code files
-3. Analyze the code to answer the question
-
-You can only use one tool at a time.
-IMPORTANT: You are in non-interactive mode. No one will read your text answers, only tools.
-When you have gathered enough information, use submit_answer to provide your answer.`
-
 	budgetNudgePrompt = `You have only %d tool calls remaining. Wrap up your investigation and call submit_answer now with whatever information you have gathered so far.`
 
 	useToolsReminderPrompt = `You are in non-interactive mode. You must start using your tools now to explore the repository. When you have enough information, use submit_answer to provide your answer.`
@@ -44,22 +26,6 @@ When you have gathered enough information, use submit_answer to provide your ans
 	submitAnswerAloneError = `Error: submit_answer must be called alone. When you have an answer, call submit_answer as a single tool call without any other tools in the same response.`
 )
 
-// AnswerSchema represents the structured response from the agentic client
-type AnswerSchema struct {
-	Question    string   `json:"question"`
-	Answer      string   `json:"answer"`
-	ShortAnswer bool     `json:"short_answer"`
-	Files       []string `json:"files,omitempty"`
-	CodeSnippet string   `json:"code_snippet,omitempty"`
-}
-
-// AgenticCallOptions contains configuration for the agentic LLM call
-type AgenticCallOptions struct {
-	Model    string // e.g. "gemini-2.0-flash"
-	Provider string // "google", "anthropic", "openai"
-	APIKey   string
-}
-
 // AgenticClient is an interface for agentic LLM interactions
 type AgenticClient interface {
 	CallLLM(ctx context.Context, questions []string, repositoryPath string) ([]AnswerSchema, error)
@@ -67,11 +33,12 @@ type AgenticClient interface {
 
 // agenticClientImpl implements AgenticClient
 type agenticClientImpl struct {
-	apiKey   string
-	model    string
-	provider string
-	tools    []llmprovider.Tool
-	executor *toolExecutor
+	apiKey       string
+	model        string
+	provider     string
+	tools        []llmprovider.Tool
+	systemPrompt string
+	executor     *toolExecutor
 }
 
 // NewAgenticClient creates a new AgenticClient with the given options
@@ -88,10 +55,18 @@ func NewAgenticClient(opts *AgenticCallOptions) (AgenticClient, error) {
 	if opts.Provider == "" {
 		return nil, fmt.Errorf("provider is required")
 	}
+
+	tools, err := resolveTools(opts)
+	if err != nil {
+		return nil, fmt.Errorf("resolving tools: %w", err)
+	}
+
 	return &agenticClientImpl{
-		apiKey:   opts.APIKey,
-		model:    opts.Model,
-		provider: opts.Provider,
+		apiKey:       opts.APIKey,
+		model:        opts.Model,
+		provider:     opts.Provider,
+		tools:        tools,
+		systemPrompt: buildSystemPrompt(opts.SystemPrompt, tools),
 	}, nil
 }
 
@@ -118,13 +93,12 @@ func (c *agenticClientImpl) CallLLM(
 		return nil, fmt.Errorf("failed to initialize LLM: %w", err)
 	}
 
-	// Initialize tools and executor for this repository
-	c.tools = buildAgenticTools()
+	// Initialize executor for this repository
 	c.executor = newToolExecutor(repositoryPath)
 
 	// Build initial messages with system prompt only (no user message yet)
 	messages := []llmprovider.Message{
-		llmprovider.TextMessage(llmprovider.RoleSystem, systemPrompt),
+		llmprovider.TextMessage(llmprovider.RoleSystem, c.systemPrompt),
 	}
 
 	// Print debug log file path before starting the loop
diff --git a/pkg/llmclient/agentic_tools.go b/pkg/llmclient/agentic_tools.go
index c5b55f5c..71a380a0 100644
--- a/pkg/llmclient/agentic_tools.go
+++ b/pkg/llmclient/agentic_tools.go
@@ -46,110 +46,112 @@ var blockedGitFlags = []string{
 	"--run",
 }
 
-// buildAgenticTools returns the list of tools available to the agent
-func buildAgenticTools() []llmprovider.Tool {
-	return []llmprovider.Tool{
-		{
-			Type: "function",
-			Function: &llmprovider.FunctionDef{
-				Name:        "read_file",
-				Description: "Read the contents of a file at the given path",
-				Parameters: map[string]interface{}{
-					"type": "object",
-					"properties": map[string]interface{}{
-						"path": map[string]interface{}{
-							"type":        "string",
-							"description": "The relative path to the file to read",
-						},
+// toolRegistry maps AgenticTool names to their llmprovider.Tool definitions.
+var toolRegistry = map[AgenticTool]llmprovider.Tool{
+	ToolReadFile: {
+		Type: "function",
+		Function: &llmprovider.FunctionDef{
+			Name:        "read_file",
+			Description: "Read the contents of a file at the given path",
+			Parameters: map[string]interface{}{
+				"type": "object",
+				"properties": map[string]interface{}{
+					"path": map[string]interface{}{
+						"type":        "string",
+						"description": "The relative path to the file to read",
 					},
-					"required": []string{"path"},
 				},
+				"required": []string{"path"},
 			},
 		},
-		{
-			Type: "function",
-			Function: &llmprovider.FunctionDef{
-				Name:        "list_directory",
-				Description: "List files and directories at the given path",
-				Parameters: map[string]interface{}{
-					"type": "object",
-					"properties": map[string]interface{}{
-						"path": map[string]interface{}{
-							"type":        "string",
-							"description": "The relative path to the directory to list (use '.' for root)",
-						},
+	},
+	ToolListDirectory: {
+		Type: "function",
+		Function: &llmprovider.FunctionDef{
+			Name:        "list_directory",
+			Description: "List files and directories at the given path",
+			Parameters: map[string]interface{}{
+				"type": "object",
+				"properties": map[string]interface{}{
+					"path": map[string]interface{}{
+						"type":        "string",
+						"description": "The relative path to the directory to list (use '.' for root)",
 					},
-					"required": []string{"path"},
 				},
+				"required": []string{"path"},
 			},
 		},
-		{
-			Type: "function",
-			Function: &llmprovider.FunctionDef{
-				Name:        "grep",
-				Description: "Search for a pattern in files. Returns matching lines with file names and line numbers.",
-				Parameters: map[string]interface{}{
-					"type": "object",
-					"properties": map[string]interface{}{
-						"pattern": map[string]interface{}{
-							"type":        "string",
-							"description": "The pattern to search for",
-						},
-						"path": map[string]interface{}{
-							"type":        "string",
-							"description": "Optional: directory or file to search in (defaults to '.')",
-						},
+	},
+	ToolGrep: {
+		Type: "function",
+		Function: &llmprovider.FunctionDef{
+			Name:        "grep",
+			Description: "Search for a pattern in files. Returns matching lines with file names and line numbers.",
+			Parameters: map[string]interface{}{
+				"type": "object",
+				"properties": map[string]interface{}{
+					"pattern": map[string]interface{}{
+						"type":        "string",
+						"description": "The pattern to search for",
+					},
+					"path": map[string]interface{}{
+						"type":        "string",
+						"description": "Optional: directory or file to search in (defaults to '.')",
 					},
-					"required": []string{"pattern"},
 				},
+				"required": []string{"pattern"},
 			},
 		},
-		{
-			Type: "function",
-			Function: &llmprovider.FunctionDef{
-				Name:        "git",
-				Description: "Execute a git command. Only allowed commands: log, show, diff, status, ls-files, blame, rev-parse, cat-file, checkout, fetch, pull, branch, tag.",
-				Parameters: map[string]interface{}{
-					"type": "object",
-					"properties": map[string]interface{}{
-						"args": map[string]interface{}{
-							"type":        "string",
-							"description": "The git command arguments (e.g., 'log -n 5' or 'show HEAD')",
-						},
+	},
+	ToolGit: {
+		Type: "function",
+		Function: &llmprovider.FunctionDef{
+			Name:        "git",
+			Description: "Execute a git command. Only allowed commands: log, show, diff, status, ls-files, blame, rev-parse, cat-file, checkout, fetch, pull, branch, tag.",
+			Parameters: map[string]interface{}{
+				"type": "object",
+				"properties": map[string]interface{}{
+					"args": map[string]interface{}{
+						"type":        "string",
+						"description": "The git command arguments (e.g., 'log -n 5' or 'show HEAD')",
 					},
-					"required": []string{"args"},
 				},
+				"required": []string{"args"},
 			},
 		},
-		{
-			Type: "function",
-			Function: &llmprovider.FunctionDef{
-				Name:        "submit_answer",
-				Description: "Submit your final answer to the question. Use this when you have gathered enough information.",
-				Parameters: map[string]interface{}{
-					"type": "object",
-					"properties": map[string]interface{}{
-						"answer": map[string]interface{}{
-							"type":        "string",
-							"description": "Your detailed answer explaining your findings",
-						},
-						"short_answer": map[string]interface{}{
-							"type":        "boolean",
-							"description": "A boolean answer to the question: true means YES, false means NO. For example, if the question is 'Is the sky blue?' the short_answer is true. If the question is 'Is the sky green?' the short_answer is false.",
-						},
-						"files": map[string]interface{}{
-							"type":        "array",
-							"items":       map[string]interface{}{"type": "string"},
-							"description": "List of relevant files. Pass an empty array if not relevant.",
-						},
-						"code_snippet": map[string]interface{}{
-							"type":        "string",
-							"description": "A relevant code snippet. Pass an empty string if not relevant.",
-						},
+	},
+}
+
+// submitAnswerTool returns the submit_answer tool definition.
+func submitAnswerTool() llmprovider.Tool {
+	return llmprovider.Tool{
+		Type: "function",
+		Function: &llmprovider.FunctionDef{
+			Name:        "submit_answer",
+			Description: "Submit your final answer to the question. Use this when you have gathered enough information.",
+			Parameters: map[string]interface{}{
+				"type": "object",
+				"properties": map[string]interface{}{
+					"answer": map[string]interface{}{
+						"type":        "string",
+						"description": "Your detailed answer explaining your findings",
+					},
+					"short_answer": map[string]interface{}{
+						"type":        "boolean",
+						"description": "A boolean answer to the question: true means YES, false means NO. For example, if the question is 'Is the sky blue?' the short_answer is true. If the question is 'Is the sky green?' the short_answer is false.",
+					},
+					"files": map[string]interface{}{
+						"type":        "array",
+						"items":       map[string]interface{}{"type": "string"},
+						"description": "List of relevant files. Pass an empty array if not relevant.",
+					},
+					"code_snippet": map[string]interface{}{
+						"type":        "string",
+						"description": "A relevant code snippet. Pass an empty string if not relevant.",
 					},
-					"required":             []string{"answer", "short_answer", "files", "code_snippet"},
-					"additionalProperties": false,
 				},
+				"required":             []string{"answer", "short_answer", "files", "code_snippet"},
+				"additionalProperties": false,
 			},
 		},
 	}
diff --git a/pkg/llmclient/agentic_types.go b/pkg/llmclient/agentic_types.go
new file mode 100644
index 00000000..8439a568
--- /dev/null
+++ b/pkg/llmclient/agentic_types.go
@@ -0,0 +1,118 @@
+package llmclient
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/grafana/plugin-validator/pkg/llmprovider"
+)
+
+const defaultSystemPromptIntro = `You are a code analysis assistant. You have tools to explore code in a repository.
+
+STRATEGY:
+1. Use list_directory to see what files exist
+2. Use read_file to read the source code files
+3. Analyze the code to answer the question`
+
+// AgenticTool identifies an exploration tool available to the agent.
+type AgenticTool string
+
+const (
+	ToolReadFile      AgenticTool = "read_file"
+	ToolListDirectory AgenticTool = "list_directory"
+	ToolGrep          AgenticTool = "grep"
+	ToolGit           AgenticTool = "git"
+)
+
+// ToolSet is a preset collection of tools.
+type ToolSet int
+
+const (
+	// DefaultTooling includes all exploration tools (read_file, list_directory,
+	// grep, git) plus submit_answer. This is the zero value.
+	DefaultTooling ToolSet = iota
+	// NoTools includes only submit_answer with no exploration tools.
+	NoTools
+)
+
+// AgenticCallOptions contains configuration for the agentic LLM call.
+type AgenticCallOptions struct {
+	Model    string // e.g. "gemini-2.5-flash"
+	Provider string // "google", "anthropic", "openai"
+	APIKey   string
+
+	// Tools selects specific exploration tools. When non-nil, takes precedence
+	// over ToolSet. submit_answer is always included regardless.
+	Tools []AgenticTool
+
+	// ToolSet selects a preset collection of tools. Used when Tools is nil.
+	// The zero value (DefaultTooling) includes all exploration tools.
+	ToolSet ToolSet
+
+	// SystemPrompt overrides the intro portion of the system prompt. The
+	// AVAILABLE TOOLS section is always auto-appended. When empty, a default
+	// intro is used.
+	SystemPrompt string
+}
+
+// AnswerSchema represents the structured response from the agentic client.
+type AnswerSchema struct {
+	Question    string   `json:"question"`
+	Answer      string   `json:"answer"`
+	ShortAnswer bool     `json:"short_answer"`
+	Files       []string `json:"files,omitempty"`
+	CodeSnippet string   `json:"code_snippet,omitempty"`
+}
+
+// defaultTools returns the full set of exploration tools.
+func defaultTools() []AgenticTool {
+	return []AgenticTool{ToolReadFile, ToolListDirectory, ToolGrep, ToolGit}
+}
+
+// resolveTools builds the final []llmprovider.Tool list from the options.
+// submit_answer is always appended.
+func resolveTools(opts *AgenticCallOptions) ([]llmprovider.Tool, error) {
+	var selected []AgenticTool
+	if opts.Tools != nil {
+		selected = opts.Tools
+	} else {
+		switch opts.ToolSet {
+		case DefaultTooling:
+			selected = defaultTools()
+		case NoTools:
+			// empty
+		default:
+			return nil, fmt.Errorf("unknown tool set: %d", opts.ToolSet)
+		}
+	}
+
+	tools := make([]llmprovider.Tool, 0, len(selected)+1)
+	for _, name := range selected {
+		def, ok := toolRegistry[name]
+		if !ok {
+			return nil, fmt.Errorf("unknown tool: %q", name)
+		}
+		tools = append(tools, def)
+	}
+	tools = append(tools, submitAnswerTool())
+	return tools, nil
+}
+
+// buildSystemPrompt composes the system prompt from an intro and the resolved tools.
+func buildSystemPrompt(intro string, tools []llmprovider.Tool) string {
+	if intro == "" {
+		intro = defaultSystemPromptIntro
+	}
+
+	var b strings.Builder
+	b.WriteString(intro)
+	b.WriteString("\n\nAVAILABLE TOOLS:\n")
+	for _, t := range tools {
+		if t.Function != nil {
+			fmt.Fprintf(&b, "- %s: %s\n", t.Function.Name, t.Function.Description)
+		}
+	}
+	b.WriteString("\nIMPORTANT: You are in non-interactive mode. No one will read your text answers, only tools.\nWhen you have gathered enough information, use submit_answer to provide your answer.")
+
+	return b.String()
+}

From 041c571be8542658cc85efeda7872c0fc88c2ee3 Mon Sep 17 00:00:00 2001
From: Esteban Beltran <sergies@gmail.com>
Date: Thu, 5 Mar 2026 14:42:31 +0100
Subject: [PATCH 10/10] remove stale doc

---
 docs/anthropic-choices-behavior.md | 70 ------------------------------
 1 file changed, 70 deletions(-)
 delete mode 100644 docs/anthropic-choices-behavior.md

diff --git a/docs/anthropic-choices-behavior.md b/docs/anthropic-choices-behavior.md
deleted file mode 100644
index a578e4c3..00000000
--- a/docs/anthropic-choices-behavior.md
+++ /dev/null
@@ -1,70 +0,0 @@
-# Anthropic Choices and Message Serialization in go-langchain
-
-## Overview
-Anthropic's response structure and go-langchain's serialization behavior require special handling when building multi-turn conversations with tool use.
-
-## Response Structure (Anthropic → go-langchain)
-
-Anthropic API returns responses as an array of **content blocks**:
-```
-[text_block, tool_use_block, tool_use_block, ...]
-```
-
-go-langchain converts each content block into a **separate ContentChoice**:
-- `type: "text"` → `ContentChoice{Content: "...", ToolCalls: []}`
-- `type: "tool_use"` → `ContentChoice{Content: "", ToolCalls: [{...}]}`
-- `type: "thinking"` → `ContentChoice{Content: "", GenerationInfo: {...}}`
-
-**Key insight:** One Anthropic response can produce multiple Choices. For example:
-- Response with text + 2 tool calls → 3 Choices
-- Response with just text → 1 Choice
-
-## Serialization Constraint (go-langchain → Anthropic)
-
-The critical limitation is in `handleAIMessage()`:
-```go
-if toolCall, ok := msg.Parts[0].(llms.ToolCall); ok {
-    // Only Parts[0] is serialized!
-}
-```
-
-**This means:**
-- Only `Parts[0]` of a MessageContent is serialized back to Anthropic
-- If you create `MessageContent{Parts: [toolCall1, toolCall2]}`, only `toolCall1` is sent
-- Multiple ToolCalls in one message **will lose data**
-
-## Required Pattern: Interleaved Messages
-
-To work around this limitation, tool calls must be **interleaved** as separate messages:
-
-```
-AI message: Parts[toolCall1]
-Tool message: Parts[toolResult1]
-AI message: Parts[toolCall2]
-Tool message: Parts[toolResult2]
-```
-
-Not:
-```
-AI message: Parts[toolCall1, toolCall2]  // toolCall2 would be lost!
-Tool message: Parts[toolResult1, toolResult2]
-```
-
-## Why Merging Choices is Necessary
-
-When processing Anthropic's response:
-1. Anthropic returns separate content blocks (potentially text + multiple tools)
-2. go-langchain creates one Choice per block
-3. We must merge these Choices to get the complete response
-4. Then we must split them back into individual AI messages for serialization
-
-The merge preserves all information for processing, but the split ensures proper serialization.
-
-## Implementation Details in agentic_client.go
-
-The choice-merging code performs this merge:
-- Collects all content parts from separate Choices
-- Collects all ToolCalls from separate Choices
-- Creates one merged view for processing
-
-Then later in the tool call processing, it **reverses** this by creating one AI message per ToolCall to avoid the serialization bug.