truefoundry · harshiv-26 · Apr 29, 2026
diff --git a/providers/azure-open-ai/aoai-sora-2025-02-28.yaml b/providers/azure-open-ai/aoai-sora-2025-02-28.yaml
@@ -1,6 +1,8 @@
 costs:
     - output_cost_per_second: 0.1
       region: "*"
+deprecationDate: "2026-02-28"
+isDeprecated: true
 messages:
     options: []
 modalities:
@@ -15,13 +17,13 @@ removeParams:
     - temperature
     - stop
     - top_p
-    - "n"
+    - n
     - response_format
     - stream
     - tool_choice
     - parallel_tool_calls
 sources:
     - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models
-status: preview
+status: retired
 supportedModes:
     - video
diff --git a/providers/azure-open-ai/aoai-sora.yaml b/providers/azure-open-ai/aoai-sora.yaml
@@ -4,10 +4,12 @@ costs:
 modalities:
     input:
         - text
+        - image
     output:
         - video
 mode: video
 model: aoai-sora
+provisioning: serverless
 removeParams:
     - max_tokens
     - temperature
@@ -20,6 +22,7 @@ removeParams:
     - parallel_tool_calls
 sources:
     - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models
+    - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/video-generation
     - https://azure.microsoft.com/en-us/products/ai-services/video-generation
 status: preview
 supportedModes:

diff --git a/providers/azure-open-ai/azure-tts.yaml b/providers/azure-open-ai/azure-tts.yaml
@@ -8,6 +8,7 @@ modalities:
         - audio
 mode: text_to_speech
 model: azure-tts
+provisioning: serverless
 removeParams:
     - max_tokens
     - temperature
@@ -18,5 +19,6 @@ removeParams:
     - stream
     - tool_choice
     - parallel_tool_calls
+status: active
 supportedModes:
     - text_to_speech
diff --git a/providers/azure-open-ai/codex-mini-2025-05-16.yaml b/providers/azure-open-ai/codex-mini-2025-05-16.yaml
@@ -21,7 +21,7 @@ modalities:
         - pdf
     output:
         - text
-mode: chat
+mode: responses
 model: codex-mini-2025-05-16
 params:
     - defaultValue: 100000
@@ -31,6 +31,7 @@ params:
     - defaultValue: medium
       key: reasoning_effort
       type: string
+provisioning: serverless
 removeParams:
     - max_tokens
     - temperature
@@ -39,8 +40,10 @@ removeParams:
     - stop
     - parallel_tool_calls
 sources:
-    - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models
+    - https://learn.microsoft.com/en-us/azure/foundry/openai/concepts/model-retirement-schedule
+    - https://learn.microsoft.com/en-us/azure/foundry/foundry-models/concepts/models-sold-directly-by-azure
+    - https://learn.microsoft.com/en-us/azure/foundry/openai/how-to/codex
 status: active
 supportedModes:
-    - chat
+    - responses
 thinking: true
diff --git a/providers/azure-open-ai/command-r-plus.yaml b/providers/azure-open-ai/command-r-plus.yaml
@@ -1,9 +1,11 @@
 costs:
-    - input_cost_per_token: 0.000003
-      output_cost_per_token: 0.000015
+    - input_cost_per_token: 3e-6
+      output_cost_per_token: 1.5e-5
       region: "*"
+deprecationDate: "2025-09-15"
 features:
     - function_calling
+isDeprecated: true
 limits:
     context_window: 128000
     max_input_tokens: 128000
@@ -18,5 +20,6 @@ mode: chat
 model: command-r-plus
 sources:
     - https://docs.cohere.com/docs/command-r-plus
+status: deprecated
 supportedModes:
     - chat
diff --git a/providers/azure-open-ai/computer-use-preview-2025-04-15.yaml b/providers/azure-open-ai/computer-use-preview-2025-04-15.yaml
@@ -26,6 +26,9 @@ params:
       key: max_tokens
       maxValue: 1024
       minValue: 1
+provisioning: serverless
+sources:
+    - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/computer-use
 status: preview
 supportedModes:
     - chat

diff --git a/providers/azure-open-ai/container.yaml b/providers/azure-open-ai/container.yaml
@@ -1,5 +1,15 @@
 mode: chat
 model: container
+provisioning: serverless
+sources:
+    - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/responses
 status: active
 supportedModes:
     - chat
+
+# Note: "container" is not an LLM but the Azure OpenAI Code Interpreter sandbox
+# session SKU (used by the Assistants/Agents/Responses APIs). The LiteLLM
+# entry only exposes `code_interpreter_cost_per_session` which has no
+# corresponding field in the #Cost schema, so cost data is omitted.
+# Pricing reference: ~$0.033 per session in US/AU regions
+# (see https://azure.microsoft.com/en-us/pricing/details/azure-openai/).
diff --git a/providers/azure-open-ai/gpt-35-turbo-instruct-0914.yaml b/providers/azure-open-ai/gpt-35-turbo-instruct-0914.yaml
@@ -14,6 +14,7 @@ modalities:
         - text
 mode: completion
 model: gpt-35-turbo-instruct-0914
+provisioning: serverless
 removeParams:
     - tool_choice
     - parallel_tool_calls

diff --git a/providers/azure-open-ai/gpt-35-turbo-instruct.yaml b/providers/azure-open-ai/gpt-35-turbo-instruct.yaml
@@ -18,6 +18,7 @@ params:
       key: max_tokens
       maxValue: 4097
       minValue: 1
+provisioning: serverless
 removeParams:
     - tool_choice
     - parallel_tool_calls

diff --git a/providers/azure-open-ai/gpt-4-1106-Preview.yaml b/providers/azure-open-ai/gpt-4-1106-Preview.yaml
@@ -5,6 +5,7 @@ costs:
 features:
     - function_calling
     - parallel_function_calling
+    - tool_choice
 limits:
     context_window: 128000
     max_input_tokens: 128000
@@ -17,8 +18,11 @@ modalities:
         - text
 mode: chat
 model: gpt-4-1106-Preview
+provisioning: serverless
 sources:
     - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/legacy-models
+    - https://learn.microsoft.com/en-us/azure/foundry/openai/concepts/retired-models
+    - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models
 status: active
 supportedModes:
     - chat
diff --git a/providers/azure-open-ai/gpt-4-turbo-2024-04-09.yaml b/providers/azure-open-ai/gpt-4-turbo-2024-04-09.yaml
@@ -1,22 +1,24 @@
 costs:
-    - input_cost_per_token: 0.00001
-      output_cost_per_token: 0.00003
+    - input_cost_per_token: 1e-5
+      output_cost_per_token: 3e-5
       region: global
-    - input_cost_per_token: 0.000011
-      output_cost_per_token: 0.000033
+    - input_cost_per_token: 1.1e-5
+      output_cost_per_token: 3.3e-5
       region: datazone_us
-    - input_cost_per_token: 0.000011
-      output_cost_per_token: 0.000033
+    - input_cost_per_token: 1.1e-5
+      output_cost_per_token: 3.3e-5
       region: datazone_eu
-    - input_cost_per_token: 0.00001
-      output_cost_per_token: 0.00003
+    - input_cost_per_token: 1e-5
+      output_cost_per_token: 3e-5
       region: "*"
+deprecationDate: "2025-01-25"
 features:
     - function_calling
     - parallel_function_calling
     - tool_choice
     - structured_output
     - json_output
+isDeprecated: true
 limits:
     context_window: 128000
     max_input_tokens: 128000
@@ -32,6 +34,6 @@ mode: chat
 model: gpt-4-turbo-2024-04-09
 sources:
     - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/legacy-models
-status: active
+status: retired
 supportedModes:
     - chat
diff --git a/providers/azure-open-ai/gpt-4-turbo-jp.yaml b/providers/azure-open-ai/gpt-4-turbo-jp.yaml
@@ -21,7 +21,9 @@ modalities:
         - text
 mode: chat
 model: gpt-4-turbo-jp
+provisioning: serverless
 sources:
     - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/legacy-models
+status: active
 supportedModes:
     - chat
diff --git a/providers/azure-open-ai/gpt-4-turbo.yaml b/providers/azure-open-ai/gpt-4-turbo.yaml
@@ -18,7 +18,10 @@ modalities:
         - text
 mode: chat
 model: gpt-4-turbo
+provisioning: serverless
 sources:
-    - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/legacy-models
+    - https://learn.microsoft.com/en-us/azure/foundry/openai/concepts/model-retirement-schedule
+    - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/retired-models
+status: active
 supportedModes:
     - chat
diff --git a/providers/azure-open-ai/gpt-4-vision-preview.yaml b/providers/azure-open-ai/gpt-4-vision-preview.yaml
@@ -17,6 +17,7 @@ modalities:
         - text
 mode: chat
 model: gpt-4-vision-preview
+provisioning: serverless
 sources:
     - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/legacy-models
 status: active

diff --git a/providers/azure-open-ai/gpt-4.1-2025-04-14-text.yaml b/providers/azure-open-ai/gpt-4.1-2025-04-14-text.yaml
@@ -1,11 +1,17 @@
 costs:
+    - cache_read_input_token_cost: 5.5e-7
+      input_cost_per_token: 0.0000022
+      input_cost_per_token_batches: 0.0000011
+      output_cost_per_token: 0.0000088
+      output_cost_per_token_batches: 0.0000044
+      region: datazone_us
     - cache_read_input_token_cost: 5e-7
       input_cost_per_token: 0.000002
       input_cost_per_token_batches: 0.000001
       output_cost_per_token: 0.000008
       output_cost_per_token_batches: 0.000004
       region: "*"
-deprecationDate: "2026-10-14"
+deprecationDate: "2026-11-04"
 features:
     - function_calling
     - parallel_function_calling
@@ -30,8 +36,10 @@ params:
       maxValue: 32768
     - key: max_completion_tokens
       maxValue: 32768
+provisioning: serverless
 sources:
     - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models
+    - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/retired-models
 status: active
 supportedModes:
     - chat
diff --git a/providers/azure-open-ai/gpt-4.1-2025-04-14.yaml b/providers/azure-open-ai/gpt-4.1-2025-04-14.yaml
@@ -35,6 +35,7 @@ model: gpt-4.1-2025-04-14
 params:
     - key: max_tokens
       maxValue: 32768
+provisioning: serverless
 sources:
     - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models
 status: active

diff --git a/providers/azure-open-ai/gpt-4.1-mini-2025-04-14.yaml b/providers/azure-open-ai/gpt-4.1-mini-2025-04-14.yaml
@@ -35,8 +35,11 @@ model: gpt-4.1-mini-2025-04-14
 params:
     - key: max_tokens
       maxValue: 32768
+provisioning: serverless
 sources:
     - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models
+    - https://learn.microsoft.com/en-us/azure/foundry/openai/concepts/model-retirement-schedule
 status: active
 supportedModes:
     - chat
+    - responses
diff --git a/providers/azure-open-ai/gpt-4.1-mini.yaml b/providers/azure-open-ai/gpt-4.1-mini.yaml
@@ -37,10 +37,13 @@ params:
       key: max_completion_tokens
       maxValue: 32768
       minValue: 1
+provisioning: serverless
 removeParams:
     - max_tokens
 sources:
     - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models
+    - https://learn.microsoft.com/en-us/azure/foundry/openai/concepts/model-retirement-schedule
 status: active
 supportedModes:
     - chat
+    - responses
diff --git a/providers/azure-open-ai/gpt-4.1-nano-2025-04-14.yaml b/providers/azure-open-ai/gpt-4.1-nano-2025-04-14.yaml
@@ -37,7 +37,9 @@ params:
       key: max_tokens
       maxValue: 32768
       minValue: 1
+provisioning: serverless
 sources:
+    - https://learn.microsoft.com/en-us/azure/foundry/openai/concepts/model-retirement-schedule
     - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models
     - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/model-retirements
 status: active

diff --git a/providers/azure-open-ai/gpt-4.1-nano.yaml b/providers/azure-open-ai/gpt-4.1-nano.yaml
@@ -11,7 +11,7 @@ costs:
       output_cost_per_token: 4e-7
       output_cost_per_token_batches: 2e-7
       region: "*"
-deprecationDate: "2026-10-14"
+deprecationDate: "2026-11-04"
 features:
     - function_calling
     - parallel_function_calling
@@ -40,6 +40,7 @@ params:
     - defaultValue: null
       key: response_format
       type: string
+provisioning: serverless
 removeParams:
     - max_tokens
 sources:

diff --git a/providers/azure-open-ai/gpt-4.1.yaml b/providers/azure-open-ai/gpt-4.1.yaml
@@ -17,7 +17,7 @@ costs:
       output_cost_per_token: 0.0000088
       output_cost_per_token_batches: 0.0000044
       region: datazone_eu
-deprecationDate: "2026-10-14"
+deprecationDate: "2026-11-04"
 features:
     - function_calling
     - parallel_function_calling
@@ -45,10 +45,12 @@ params:
     - defaultValue: null
       key: response_format
       type: string
+provisioning: serverless
 removeParams:
     - max_tokens
 sources:
     - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models
 status: active
 supportedModes:
     - chat
+    - responses
diff --git a/providers/azure-open-ai/gpt-4o-2024-11-20.yaml b/providers/azure-open-ai/gpt-4o-2024-11-20.yaml
@@ -3,6 +3,10 @@ costs:
       input_cost_per_token: 0.0000025
       output_cost_per_token: 0.00001
       region: global
+    - cache_creation_input_token_cost: 0.00000138
+      input_cost_per_token: 0.00000275
+      output_cost_per_token: 0.000011
+      region: datazone_us
     - cache_creation_input_token_cost: 0.00000138
       input_cost_per_token: 0.00000275
       output_cost_per_token: 0.000011
@@ -31,6 +35,7 @@ model: gpt-4o-2024-11-20
 params:
     - key: max_tokens
       maxValue: 16384
+provisioning: serverless
 sources:
     - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models
 status: active

diff --git a/providers/azure-open-ai/gpt-4o-audio-mai.yaml b/providers/azure-open-ai/gpt-4o-audio-mai.yaml
@@ -28,6 +28,7 @@ params:
       key: max_tokens
       maxValue: 16384
       minValue: 1
+provisioning: serverless
 sources:
     - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models
 status: active

diff --git a/providers/azure-open-ai/gpt-4o-audio-preview-2024-10-01.yaml b/providers/azure-open-ai/gpt-4o-audio-preview-2024-10-01.yaml
@@ -32,6 +32,7 @@ modalities:
         - audio
 mode: chat
 model: gpt-4o-audio-preview-2024-10-01
+provisioning: serverless
 sources:
     - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models
     - https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/legacy-models