SerialKicked · SerialKicked · Mar 2, 2026 · Feb 26, 2026 · Feb 26, 2026
diff --git a/Docs/LLMSYSTEM.md b/Docs/LLMSYSTEM.md
@@ -662,20 +662,56 @@ This macro system ensures dynamic, contextual content that adapts to your curren
 
 The LLMEngine provides several events for real-time updates:
 
+### Structured Streaming Events (Recommended)
+
+The new channel-aware events provide richer information and cleanly separate different types of inference content:
+
 ```csharp
-// Full prompt generation
-LLMEngine.OnFullPromptReady += (sender, prompt) =>
+// Channel-aware streaming — receives typed segments (Text, Thinking, ToolCall, etc.)
+LLMEngine.OnInferenceSegment += (sender, segment) =>
 {
-    Console.WriteLine($"Generated prompt: {prompt}");
+    switch (segment.Channel)
+    {
+        case InferenceChannel.Text:
+            Console.Write(segment.Text); // Normal visible response text
+            break;
+        case InferenceChannel.Thinking:
+            // Chain-of-thought / thinking block content (hidden from users)
+            break;
+        case InferenceChannel.ToolCall when segment.IsComplete:
+            Console.WriteLine("LLM requested a tool call");
+            break;
+    }
 };
 
-// Streaming text generation
+// Structured completion — receives the full result with separated channels
+LLMEngine.OnInferenceCompleted += (sender, result) =>
+{
+    Console.WriteLine($"\nResponse: {result.Response}");
+
+    if (result.ThinkingContent != null)
+        Console.WriteLine($"[Thinking: {result.ThinkingContent}]");
+
+    Console.WriteLine($"Finish reason: {result.FinishReason}");
+
+    // Log only the visible response to history
+    LLMEngine.History.LogMessage(AuthorRole.Assistant, result.Response,
+                                LLMEngine.User, LLMEngine.Bot);
+};
+```
+
+### Legacy Streaming Events
+
+The following events are still supported for backward compatibility but are deprecated. Migrate to the structured events above for new code.
+
+```csharp
+// [Obsolete] Streaming text generation — fires for every token
 LLMEngine.OnInferenceStreamed += (sender, token) =>
 {
     Console.Write(token); // Real-time text output
 };
 
-// Generation completion
+// [Obsolete] Generation completion — returns the full raw response string
 LLMEngine.OnInferenceEnded += (sender, fullResponse) =>
 {
     Console.WriteLine($"\nGeneration complete: {fullResponse}");
@@ -684,6 +720,16 @@ LLMEngine.OnInferenceEnded += (sender, fullResponse) =>
     LLMEngine.History.LogMessage(AuthorRole.Assistant, fullResponse, 
                                 LLMEngine.User, LLMEngine.Bot);
 };
+```
+
+### Other Events
+
+```csharp
+// Full prompt generation
+LLMEngine.OnFullPromptReady += (sender, prompt) =>
+{
+    Console.WriteLine($"Generated prompt: {prompt}");
+};
 
 // Quick inference (non-streaming) completion
 LLMEngine.OnQuickInferenceEnded += (sender, response) =>

diff --git a/LLM/InferenceStream.cs b/LLM/InferenceStream.cs
@@ -0,0 +1,120 @@
+using System.Collections.Generic;
+
+namespace LetheAISharp.LLM
+{
+    /// <summary>
+    /// Identifies the content channel of an inference segment.
+    /// </summary>
+    public enum InferenceChannel
+    {
+        /// <summary> Normal visible text response. </summary>
+        Text,
+        /// <summary> Chain-of-thought / thinking block content. </summary>
+        Thinking,
+        /// <summary> The LLM is requesting a tool/function call. </summary>
+        ToolCall,
+        /// <summary> Result being fed back after tool execution. </summary>
+        ToolResult,
+        /// <summary> Error or system-level message. </summary>
+        System
+    }
+
+    /// <summary>
+    /// A single typed chunk emitted during streaming inference.
+    /// </summary>
+    public class InferenceSegment
+    {
+        /// <summary> What kind of content this segment carries. </summary>
+        public InferenceChannel Channel { get; init; }
+
+        /// <summary> The text delta (populated for Text and Thinking channels). </summary>
+        public string? Text { get; init; }
+
+        /// <summary> Tool call data (populated for ToolCall channel). </summary>
+        public ToolCallInfo? ToolCall { get; init; }
+
+        /// <summary> Tool result data (populated for ToolResult channel). </summary>
+        public ToolResultInfo? ToolResult { get; init; }
+
+        /// <summary> Whether this is the final chunk in its channel. </summary>
+        public bool IsComplete { get; init; }
+    }
+
+    /// <summary>
+    /// Data for an LLM-requested tool/function call.
+    /// </summary>
+    public class ToolCallInfo
+    {
+        /// <summary> Unique identifier for this call. </summary>
+        public string CallId { get; init; } = string.Empty;
+
+        /// <summary> Name of the function the LLM wants to invoke. </summary>
+        public string FunctionName { get; init; } = string.Empty;
+
+        /// <summary> Raw JSON arguments string. </summary>
+        public string ArgumentsJson { get; init; } = string.Empty;
+    }
+
+    /// <summary>
+    /// Data for the result returned after a tool call.
+    /// </summary>
+    public class ToolResultInfo
+    {
+        /// <summary> Identifier matching the originating <see cref="ToolCallInfo.CallId"/>. </summary>
+        public string CallId { get; init; } = string.Empty;
+
+        /// <summary> Name of the function that was invoked. </summary>
+        public string FunctionName { get; init; } = string.Empty;
+
+        /// <summary> Whether the tool execution succeeded. </summary>
+        public bool Success { get; init; }
+
+        /// <summary> Raw JSON result string. </summary>
+        public string ResultJson { get; init; } = string.Empty;
+
+        /// <summary> Error description if <see cref="Success"/> is false. </summary>
+        public string? Error { get; init; }
+    }
+
+    /// <summary>
+    /// The final structured result of a complete inference cycle.
+    /// </summary>
+    public class InferenceResult
+    {
+        /// <summary> The final visible text response (thinking blocks removed). </summary>
+        public string Response { get; init; } = string.Empty;
+
+        /// <summary> The thinking/CoT block content, if any. </summary>
+        public string? ThinkingContent { get; init; }
+
+        /// <summary> All tool calls made during this inference cycle. </summary>
+        public List<ToolCallRecord> ToolCalls { get; init; } = [];
+
+        /// <summary> The finish reason reported by the backend (e.g. "stop", "length", "tool_calls"). </summary>
+        public string? FinishReason { get; init; }
+    }
+
+    /// <summary>
+    /// A complete record of a single tool call and its result.
+    /// </summary>
+    public class ToolCallRecord
+    {
+        /// <summary> Unique identifier for this call. </summary>
+        public string CallId { get; init; } = string.Empty;
+
+        /// <summary> Name of the function that was invoked. </summary>
+        public string FunctionName { get; init; } = string.Empty;
+
+        /// <summary> Raw JSON arguments string. </summary>
+        public string ArgumentsJson { get; init; } = string.Empty;
+
+        /// <summary> Raw JSON result string. </summary>
+        public string ResultJson { get; init; } = string.Empty;
+
+        /// <summary> Whether the tool execution succeeded. </summary>
+        public bool Success { get; init; }
+
+        /// <summary> How long the tool execution took. </summary>
+        public System.TimeSpan Duration { get; init; }
+    }
+}
diff --git a/LLM/LLMEngine.cs b/LLM/LLMEngine.cs
@@ -73,14 +73,21 @@ public static int MaxContextLength {
         /// <summary> Called when this library has generated the full prompt, returns full prompt </summary>
         public static event EventHandler<string>? OnFullPromptReady;
         /// <summary> Called during inference each time the LLM outputs a new token, returns the generated token </summary>
+        [Obsolete("Use OnInferenceSegment for channel-aware streaming. This event only receives Text channel content.")]
         public static event EventHandler<string>? OnInferenceStreamed;
         /// <summary> Called once the inference has ended, returns the full string </summary>
+        [Obsolete("Use OnInferenceCompleted for structured results including thinking content and tool calls.")]
         public static event EventHandler<string>? OnInferenceEnded;
         /// <summary> Called when the system changes states (no init, busy, ready) </summary>
         public static event EventHandler<SystemStatus>? OnStatusChanged;
         /// <summary> Called when the bot persona is changed, returns the new bot (sender is always null) </summary>
         public static event EventHandler<BasePersona>? OnBotChanged;
 
+        /// <summary> Called during inference with typed, channel-tagged segments. Provides richer information than OnInferenceStreamed. </summary>
+        public static event EventHandler<InferenceSegment>? OnInferenceSegment;
+        /// <summary> Called when a complete inference cycle finishes. Provides structured results including thinking content and tool call records. </summary>
+        public static event EventHandler<InferenceResult>? OnInferenceCompleted;
+
         /// <summary> Set to true if the backend supports text-to-speech </summary>
         public static bool SupportsTTS => Client?.SupportsTTS ?? false;
 
@@ -97,9 +104,13 @@ public static int MaxContextLength {
 
         private static void RaiseOnFullPromptReady(string fullprompt) => OnFullPromptReady?.Invoke(Bot, fullprompt);
         private static void RaiseOnStatusChange(SystemStatus newStatus) => OnStatusChanged?.Invoke(Bot, newStatus);
+#pragma warning disable CS0618 // backward-compat raise helpers for obsolete events
         private static void RaiseOnInferenceStreamed(string addedString) => OnInferenceStreamed?.Invoke(Bot, addedString);
         private static void RaiseOnInferenceEnded(string fullString) => OnInferenceEnded?.Invoke(Bot, fullString);
+#pragma warning restore CS0618
         private static void RaiseOnQuickInferenceEnded(string fullprompt) => OnQuickInferenceEnded?.Invoke(Bot, fullprompt);
+        private static void RaiseInferenceSegment(InferenceSegment segment) => OnInferenceSegment?.Invoke(Bot, segment);
+        private static void RaiseInferenceCompleted(InferenceResult result) => OnInferenceCompleted?.Invoke(Bot, result);
 
         /// <summary> List of loaded plugins </summary>
         public static List<IContextPlugin> ContextPlugins { get; set; } = [];
@@ -161,6 +172,9 @@ public static InstructFormat Instruct {
 
         private static SystemStatus status = SystemStatus.NotInit;
         private static string StreamingTextProgress = string.Empty;
+        private static InferenceChannel _currentChannel = InferenceChannel.Text;
+        private static readonly StringBuilder _thinkingBuffer = new();
+        private static readonly StringBuilder _textBuffer = new();
         private static InstructFormat instruct = new() 
         { 
             AddNamesToPrompt = false,
@@ -396,6 +410,7 @@ public static async Task RerollLastMessage()
                 if (Status == SystemStatus.Busy)
                     return;
                 Status = SystemStatus.Busy;
+                ResetStreamingState();
                 StreamingTextProgress = Instruct.GetThinkPrefill();
                 if (Instruct.PrefillThinking && !string.IsNullOrEmpty(Instruct.ThinkingStart))
                 {
@@ -474,6 +489,7 @@ public static async Task SimpleQueryStreaming(object chatlog, CancellationToken
 
             using var _ = await AcquireModelSlotAsync(ctx).ConfigureAwait(false);
             Status = SystemStatus.Busy;
+            ResetStreamingState();
             await Client.GenerateTextStreaming(chatlog).ConfigureAwait(false);
         }
 
@@ -660,14 +676,57 @@ private static void Client_StreamingMessageReceived(object? sender, LLMTokenStre
                 }
                 Status = SystemStatus.Ready;
                 RaiseOnInferenceEnded(response);
+
+                // Build structured result for new event
+                var thinkingContent = _thinkingBuffer.Length > 0 ? _thinkingBuffer.ToString().Trim() : null;
+                var textResponse = Instruct.IsThinkFormat ? response.RemoveThinkingBlocks() : response;
+                var inferenceResult = new InferenceResult
+                {
+                    Response = textResponse,
+                    ThinkingContent = thinkingContent,
+                    ToolCalls = [],
+                    FinishReason = e.FinishReason
+                };
+                if (e.FinishReason == "tool_calls")
+                {
+                    RaiseInferenceSegment(new InferenceSegment { Channel = InferenceChannel.ToolCall, IsComplete = true });
+                }
+                RaiseInferenceCompleted(inferenceResult);
             }
             else
             {
                 StreamingTextProgress += e.Token;
+
+                // Detect channel transitions based on thinking delimiters
+                if (Instruct.IsThinkFormat)
+                {
+                    _currentChannel = Instruct.IsThinkingPrompt(StreamingTextProgress)
+                        ? InferenceChannel.Thinking
+                        : InferenceChannel.Text;
+                }
+
+                // Route token to the appropriate content buffer
+                if (_currentChannel == InferenceChannel.Thinking)
+                    _thinkingBuffer.Append(e.Token);
+                else
+                    _textBuffer.Append(e.Token);
+
+                RaiseInferenceSegment(new InferenceSegment { Channel = _currentChannel, Text = e.Token, IsComplete = false });
                 RaiseOnInferenceStreamed(e.Token);
             }
         }
 
+        /// <summary>
+        /// Resets all per-generation streaming state. Must be called before each new generation.
+        /// </summary>
+        private static void ResetStreamingState()
+        {
+            _currentChannel = InferenceChannel.Text;
+            _thinkingBuffer.Clear();
+            _textBuffer.Clear();
+            StreamingTextProgress = string.Empty;
+        }
+
         /// <summary>
         /// Change the current bot persona.
         /// </summary>
@@ -887,6 +946,7 @@ private static async Task StartGeneration(SingleMessage message)
 
             var genparams = await GenerateFullPrompt(message, pluginmessage).ConfigureAwait(false);
 
+            ResetStreamingState();
             StreamingTextProgress = Instruct.GetThinkPrefill();
             if (Instruct.PrefillThinking && !string.IsNullOrEmpty(Instruct.ThinkingStart))
             {