Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions providers/azure-open-ai/aoai-sora-2025-02-28.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
costs:
- output_cost_per_second: 0.1
region: "*"
deprecationDate: "2026-02-28"
isDeprecated: true
messages:
options: []
modalities:
Expand All @@ -15,13 +17,13 @@ removeParams:
- temperature
- stop
- top_p
- "n"
- n
- response_format
- stream
- tool_choice
- parallel_tool_calls
sources:
- https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models
status: preview
status: retired
supportedModes:
- video
3 changes: 3 additions & 0 deletions providers/azure-open-ai/aoai-sora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@ costs:
modalities:
input:
- text
- image
output:
- video
mode: video
model: aoai-sora
provisioning: serverless
removeParams:
- max_tokens
- temperature
Expand All @@ -20,6 +22,7 @@ removeParams:
- parallel_tool_calls
sources:
- https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models
- https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/video-generation
- https://azure.microsoft.com/en-us/products/ai-services/video-generation
status: preview
supportedModes:
Expand Down
2 changes: 2 additions & 0 deletions providers/azure-open-ai/azure-tts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ modalities:
- audio
mode: text_to_speech
model: azure-tts
provisioning: serverless
removeParams:
- max_tokens
- temperature
Expand All @@ -18,5 +19,6 @@ removeParams:
- stream
- tool_choice
- parallel_tool_calls
status: active
supportedModes:
- text_to_speech
9 changes: 6 additions & 3 deletions providers/azure-open-ai/codex-mini-2025-05-16.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ modalities:
- pdf
output:
- text
mode: chat
mode: responses
model: codex-mini-2025-05-16
params:
- defaultValue: 100000
Expand All @@ -31,6 +31,7 @@ params:
- defaultValue: medium
key: reasoning_effort
type: string
provisioning: serverless
removeParams:
- max_tokens
- temperature
Expand All @@ -39,8 +40,10 @@ removeParams:
- stop
- parallel_tool_calls
sources:
- https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models
- https://learn.microsoft.com/en-us/azure/foundry/openai/concepts/model-retirement-schedule
- https://learn.microsoft.com/en-us/azure/foundry/foundry-models/concepts/models-sold-directly-by-azure
- https://learn.microsoft.com/en-us/azure/foundry/openai/how-to/codex
status: active
supportedModes:
- chat
- responses
thinking: true
7 changes: 5 additions & 2 deletions providers/azure-open-ai/command-r-plus.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
costs:
- input_cost_per_token: 0.000003
output_cost_per_token: 0.000015
- input_cost_per_token: 3e-6
output_cost_per_token: 1.5e-5
region: "*"
deprecationDate: "2025-09-15"
features:
- function_calling
isDeprecated: true
limits:
context_window: 128000
max_input_tokens: 128000
Expand All @@ -18,5 +20,6 @@ mode: chat
model: command-r-plus
sources:
- https://docs.cohere.com/docs/command-r-plus
status: deprecated
supportedModes:
- chat
3 changes: 3 additions & 0 deletions providers/azure-open-ai/computer-use-preview-2025-04-15.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ params:
key: max_tokens
maxValue: 1024
minValue: 1
provisioning: serverless
sources:
- https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/computer-use
status: preview
supportedModes:
- chat
Expand Down
10 changes: 10 additions & 0 deletions providers/azure-open-ai/container.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
mode: chat
model: container
provisioning: serverless
sources:
- https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/responses
status: active
supportedModes:
- chat

# Note: "container" is not an LLM but the Azure OpenAI Code Interpreter sandbox
# session SKU (used by the Assistants/Agents/Responses APIs). The LiteLLM
# entry only exposes `code_interpreter_cost_per_session` which has no
# corresponding field in the #Cost schema, so cost data is omitted.
# Pricing reference: ~$0.033 per session in US/AU regions
# (see https://azure.microsoft.com/en-us/pricing/details/azure-openai/).
1 change: 1 addition & 0 deletions providers/azure-open-ai/gpt-35-turbo-instruct-0914.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ modalities:
- text
mode: completion
model: gpt-35-turbo-instruct-0914
provisioning: serverless
removeParams:
- tool_choice
- parallel_tool_calls
Expand Down
1 change: 1 addition & 0 deletions providers/azure-open-ai/gpt-35-turbo-instruct.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ params:
key: max_tokens
maxValue: 4097
minValue: 1
provisioning: serverless
removeParams:
- tool_choice
- parallel_tool_calls
Expand Down
4 changes: 4 additions & 0 deletions providers/azure-open-ai/gpt-4-1106-Preview.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ costs:
features:
- function_calling
- parallel_function_calling
- tool_choice
limits:
context_window: 128000
max_input_tokens: 128000
Expand All @@ -17,8 +18,11 @@ modalities:
- text
mode: chat
model: gpt-4-1106-Preview
provisioning: serverless
sources:
- https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/legacy-models
- https://learn.microsoft.com/en-us/azure/foundry/openai/concepts/retired-models
- https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models
status: active
supportedModes:
- chat
20 changes: 11 additions & 9 deletions providers/azure-open-ai/gpt-4-turbo-2024-04-09.yaml
Original file line number Diff line number Diff line change
@@ -1,22 +1,24 @@
costs:
- input_cost_per_token: 0.00001
output_cost_per_token: 0.00003
- input_cost_per_token: 1e-5
output_cost_per_token: 3e-5
region: global
- input_cost_per_token: 0.000011
output_cost_per_token: 0.000033
- input_cost_per_token: 1.1e-5
output_cost_per_token: 3.3e-5
region: datazone_us
- input_cost_per_token: 0.000011
output_cost_per_token: 0.000033
- input_cost_per_token: 1.1e-5
output_cost_per_token: 3.3e-5
region: datazone_eu
- input_cost_per_token: 0.00001
output_cost_per_token: 0.00003
- input_cost_per_token: 1e-5
output_cost_per_token: 3e-5
region: "*"
deprecationDate: "2025-01-25"
features:
- function_calling
- parallel_function_calling
- tool_choice
- structured_output
- json_output
isDeprecated: true
limits:
context_window: 128000
max_input_tokens: 128000
Expand All @@ -32,6 +34,6 @@ mode: chat
model: gpt-4-turbo-2024-04-09
sources:
- https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/legacy-models
status: active
status: retired
supportedModes:
- chat
2 changes: 2 additions & 0 deletions providers/azure-open-ai/gpt-4-turbo-jp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ modalities:
- text
mode: chat
model: gpt-4-turbo-jp
provisioning: serverless
sources:
- https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/legacy-models
status: active
supportedModes:
- chat
5 changes: 4 additions & 1 deletion providers/azure-open-ai/gpt-4-turbo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ modalities:
- text
mode: chat
model: gpt-4-turbo
provisioning: serverless
sources:
- https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/legacy-models
- https://learn.microsoft.com/en-us/azure/foundry/openai/concepts/model-retirement-schedule
- https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/retired-models
status: active
supportedModes:
- chat
1 change: 1 addition & 0 deletions providers/azure-open-ai/gpt-4-vision-preview.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ modalities:
- text
mode: chat
model: gpt-4-vision-preview
provisioning: serverless
sources:
- https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/legacy-models
status: active
Expand Down
10 changes: 9 additions & 1 deletion providers/azure-open-ai/gpt-4.1-2025-04-14-text.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
costs:
- cache_read_input_token_cost: 5.5e-7
input_cost_per_token: 0.0000022
input_cost_per_token_batches: 0.0000011
output_cost_per_token: 0.0000088
output_cost_per_token_batches: 0.0000044
region: datazone_us
- cache_read_input_token_cost: 5e-7
input_cost_per_token: 0.000002
input_cost_per_token_batches: 0.000001
output_cost_per_token: 0.000008
output_cost_per_token_batches: 0.000004
region: "*"
deprecationDate: "2026-10-14"
deprecationDate: "2026-11-04"
features:
- function_calling
- parallel_function_calling
Expand All @@ -30,8 +36,10 @@ params:
maxValue: 32768
- key: max_completion_tokens
maxValue: 32768
provisioning: serverless
sources:
- https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models
- https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/retired-models
status: active
supportedModes:
- chat
1 change: 1 addition & 0 deletions providers/azure-open-ai/gpt-4.1-2025-04-14.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ model: gpt-4.1-2025-04-14
params:
- key: max_tokens
maxValue: 32768
provisioning: serverless
sources:
- https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models
status: active
Expand Down
3 changes: 3 additions & 0 deletions providers/azure-open-ai/gpt-4.1-mini-2025-04-14.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,11 @@ model: gpt-4.1-mini-2025-04-14
params:
- key: max_tokens
maxValue: 32768
provisioning: serverless
sources:
- https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models
- https://learn.microsoft.com/en-us/azure/foundry/openai/concepts/model-retirement-schedule
status: active
supportedModes:
- chat
- responses
3 changes: 3 additions & 0 deletions providers/azure-open-ai/gpt-4.1-mini.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,13 @@ params:
key: max_completion_tokens
maxValue: 32768
minValue: 1
provisioning: serverless
removeParams:
- max_tokens
sources:
- https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models
- https://learn.microsoft.com/en-us/azure/foundry/openai/concepts/model-retirement-schedule
status: active
supportedModes:
- chat
- responses
2 changes: 2 additions & 0 deletions providers/azure-open-ai/gpt-4.1-nano-2025-04-14.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ params:
key: max_tokens
maxValue: 32768
minValue: 1
provisioning: serverless
sources:
- https://learn.microsoft.com/en-us/azure/foundry/openai/concepts/model-retirement-schedule
- https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models
- https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/model-retirements
status: active
Expand Down
3 changes: 2 additions & 1 deletion providers/azure-open-ai/gpt-4.1-nano.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ costs:
output_cost_per_token: 4e-7
output_cost_per_token_batches: 2e-7
region: "*"
deprecationDate: "2026-10-14"
deprecationDate: "2026-11-04"
features:
- function_calling
- parallel_function_calling
Expand Down Expand Up @@ -40,6 +40,7 @@ params:
- defaultValue: null
key: response_format
type: string
provisioning: serverless
removeParams:
- max_tokens
sources:
Expand Down
4 changes: 3 additions & 1 deletion providers/azure-open-ai/gpt-4.1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ costs:
output_cost_per_token: 0.0000088
output_cost_per_token_batches: 0.0000044
region: datazone_eu
deprecationDate: "2026-10-14"
deprecationDate: "2026-11-04"
features:
- function_calling
- parallel_function_calling
Expand Down Expand Up @@ -45,10 +45,12 @@ params:
- defaultValue: null
key: response_format
type: string
provisioning: serverless
removeParams:
- max_tokens
sources:
- https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models
status: active
supportedModes:
- chat
- responses
5 changes: 5 additions & 0 deletions providers/azure-open-ai/gpt-4o-2024-11-20.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ costs:
input_cost_per_token: 0.0000025
output_cost_per_token: 0.00001
region: global
- cache_creation_input_token_cost: 0.00000138
input_cost_per_token: 0.00000275
output_cost_per_token: 0.000011
region: datazone_us
- cache_creation_input_token_cost: 0.00000138
input_cost_per_token: 0.00000275
output_cost_per_token: 0.000011
Expand Down Expand Up @@ -31,6 +35,7 @@ model: gpt-4o-2024-11-20
params:
- key: max_tokens
maxValue: 16384
provisioning: serverless
sources:
- https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models
status: active
Expand Down
1 change: 1 addition & 0 deletions providers/azure-open-ai/gpt-4o-audio-mai.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ params:
key: max_tokens
maxValue: 16384
minValue: 1
provisioning: serverless
sources:
- https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models
status: active
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ modalities:
- audio
mode: chat
model: gpt-4o-audio-preview-2024-10-01
provisioning: serverless
sources:
- https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models
- https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/legacy-models
Expand Down
Loading
Loading