crewAIInc · lorenzejay · Jan 7, 2026 · Jan 4, 2026 · Jan 4, 2026 · Jan 4, 2026
diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py
@@ -925,11 +925,12 @@ def _handle_streaming_response(
             except Exception as e:
                 logging.debug(f"Error checking for tool calls: {e}")
 
+            # Track token usage and log callbacks if available in streaming mode
+            if usage_info:
+                self._track_token_usage_internal(usage_info)
+            self._handle_streaming_callbacks(callbacks, usage_info, last_chunk)
+
             if not tool_calls or not available_functions:
-                # Track token usage and log callbacks if available in streaming mode
-                if usage_info:
-                    self._track_token_usage_internal(usage_info)
-                self._handle_streaming_callbacks(callbacks, usage_info, last_chunk)
 
                 if response_model and self.is_litellm:
                     instructor_instance = InternalInstructor(
@@ -962,12 +963,7 @@ def _handle_streaming_response(
             if tool_result is not None:
                 return tool_result
 
-            # --- 10) Track token usage and log callbacks if available in streaming mode
-            if usage_info:
-                self._track_token_usage_internal(usage_info)
-            self._handle_streaming_callbacks(callbacks, usage_info, last_chunk)
-
-            # --- 11) Emit completion event and return response
+            # --- 10) Emit completion event and return response
             self._handle_emit_call_events(
                 response=full_response,
                 call_type=LLMCallType.LLM_CALL,
@@ -1148,6 +1144,10 @@ def _handle_non_streaming_response(
             if response_model:
                 params["response_model"] = response_model
             response = litellm.completion(**params)
+
+            if hasattr(response,"usage") and not isinstance(response.usage, type) and response.usage:
+                usage_info = response.usage
+                self._track_token_usage_internal(usage_info)
 
         except ContextWindowExceededError as e:
             # Convert litellm's context window error to our own exception type
@@ -1273,6 +1273,10 @@ async def _ahandle_non_streaming_response(
                 params["response_model"] = response_model
             response = await litellm.acompletion(**params)
 
+            if hasattr(response,"usage") and not isinstance(response.usage, type) and response.usage:
+                usage_info = response.usage
+                self._track_token_usage_internal(usage_info)
+
         except ContextWindowExceededError as e:
             raise LLMContextLengthExceededError(str(e)) from e
 
@@ -1359,6 +1363,7 @@ async def _ahandle_streaming_response(
         """
         full_response = ""
         chunk_count = 0
+
         usage_info = None
 
         accumulated_tool_args: defaultdict[int, AccumulatedToolArgs] = defaultdict(
@@ -1444,6 +1449,9 @@ async def _ahandle_streaming_response(
                             end_time=0,
                         )
 
+            if usage_info:
+                self._track_token_usage_internal(usage_info)
+
             if accumulated_tool_args and available_functions:
                 # Convert accumulated tool args to ChatCompletionDeltaToolCall objects
                 tool_calls_list: list[ChatCompletionDeltaToolCall] = [

diff --git a/lib/crewai/tests/cassettes/test_usage_info_non_streaming_with_acall.yaml b/lib/crewai/tests/cassettes/test_usage_info_non_streaming_with_acall.yaml
@@ -0,0 +1,113 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"user","content":"Tell me a joke."}],"model":"gpt-4o-mini","stop":[]}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '90'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 2.14.0
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.14
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-CvErx9mbnUKFHKkhPChO93eUzKJqy\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1767757889,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"Why did the scarecrow win an award?
+        \\n\\nBecause he was outstanding in his field!\",\n        \"refusal\": null,\n
+        \       \"annotations\": []\n      },\n      \"logprobs\": null,\n      \"finish_reason\":
+        \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 12,\n    \"completion_tokens\":
+        18,\n    \"total_tokens\": 30,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_29330a9688\"\n}\n"
+    headers:
+      Access-Control-Expose-Headers:
+      - ACCESS-CONTROL-XXX
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 07 Jan 2026 03:51:29 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - SET-COOKIE-XXX
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      content-length:
+      - '887'
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '466'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '483'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/lib/crewai/tests/cassettes/test_usage_info_non_streaming_with_call.yaml b/lib/crewai/tests/cassettes/test_usage_info_non_streaming_with_call.yaml
@@ -0,0 +1,113 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"user","content":"Tell me a joke."}],"model":"gpt-4o-mini","stop":[]}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '90'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 2.14.0
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.14
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-CugAsv9iAHdiGddGDHcZWEp7ZV7cB\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1767624522,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"Why don't skeletons fight each other?
+        \\n\\nThey don't have the guts!\",\n        \"refusal\": null,\n        \"annotations\":
+        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n
+        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 12,\n    \"completion_tokens\":
+        15,\n    \"total_tokens\": 27,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_29330a9688\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 05 Jan 2026 14:48:43 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - SET-COOKIE-XXX
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      content-length:
+      - '874'
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '424'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '1017'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+version: 1