From 031b7d738ecaec555edfd2eba4a99b1203c353f9 Mon Sep 17 00:00:00 2001 From: Jonathan Irwin Date: Fri, 1 May 2026 22:32:21 -0400 Subject: [PATCH 1/2] chore: remove provider and region from inline TOML examples Strip `provider` and `region` lines from inline cerebrium.toml snippets across example/migration/partner pages, plus drop them from the toml-reference hardware table and its inline example. Co-Authored-By: Claude Opus 4.7 (1M context) --- migrations/replicate.mdx | 2 -- partner-services/deepgram.mdx | 1 - partner-services/rime.mdx | 1 - scaling/graceful-termination.mdx | 2 -- toml-reference/toml-reference.mdx | 16 ++++++---------- v4/examples/aiVoiceAgents.mdx | 2 -- ...ploy-an-llm-with-tensorrtllm-tritonserver.mdx | 2 -- v4/examples/gpt-oss.mdx | 2 -- v4/examples/high-throughput-embeddings.mdx | 1 - v4/examples/mistral-vllm.mdx | 2 -- v4/examples/realtime-voice-agents.mdx | 2 -- v4/examples/sdxl.mdx | 2 -- v4/examples/streaming-falcon-7B.mdx | 2 -- v4/examples/transcribe-whisper.mdx | 2 -- v4/examples/twilio-voice-agent.mdx | 2 -- 15 files changed, 6 insertions(+), 35 deletions(-) diff --git a/migrations/replicate.mdx b/migrations/replicate.mdx index 31d31d3..cc6dedb 100644 --- a/migrations/replicate.mdx +++ b/migrations/replicate.mdx @@ -33,8 +33,6 @@ shell_commands = [ ] [cerebrium.hardware] -region = "us-east-1" -provider = "aws" compute = "AMPERE_A10" cpu = 2 memory = 12.0 diff --git a/partner-services/deepgram.mdx b/partner-services/deepgram.mdx index 7414477..407d81c 100644 --- a/partner-services/deepgram.mdx +++ b/partner-services/deepgram.mdx @@ -280,7 +280,6 @@ disable_auth = true [cerebrium.hardware] cpu = 4 -region = "us-east-1" memory = 32 compute = "AMPERE_A10" gpu_count = 1 diff --git a/partner-services/rime.mdx b/partner-services/rime.mdx index a8277bc..f04ad41 100644 --- a/partner-services/rime.mdx +++ b/partner-services/rime.mdx @@ -36,7 +36,6 @@ cpu = 4 memory = 30 compute = "AMPERE_A10" gpu_count = 1 -region = "us-east-1" [cerebrium.scaling] min_replicas = 1 diff --git a/scaling/graceful-termination.mdx b/scaling/graceful-termination.mdx index 6c1a64f..b5ab2b0 100644 --- a/scaling/graceful-termination.mdx +++ b/scaling/graceful-termination.mdx @@ -135,8 +135,6 @@ cpu = 1 memory = 1.0 compute = "CPU" gpu_count = 0 -provider = "aws" -region = "us-east-1" [cerebrium.scaling] min_replicas = 0 diff --git a/toml-reference/toml-reference.mdx b/toml-reference/toml-reference.mdx index a91b172..e593a3d 100644 --- a/toml-reference/toml-reference.mdx +++ b/toml-reference/toml-reference.mdx @@ -110,14 +110,12 @@ The `[cerebrium.runtime.custom]` section configures custom web servers and runti The `[cerebrium.hardware]` section defines compute resources. -| Option | Type | Default | Description | -| --------- | ------- | ----------- | ------------------------------------ | -| cpu | float | required | Number of CPU cores | -| memory | float | required | Memory allocation in GB | -| compute | string | "CPU" | Compute type (CPU, AMPERE_A10, etc.) | -| gpu_count | integer | 0 | Number of GPUs | -| provider | string | "aws" | Cloud provider | -| region | string | "us-east-1" | Deployment region | +| Option | Type | Default | Description | +| --------- | ------- | -------- | ------------------------------------ | +| cpu | float | required | Number of CPU cores | +| memory | float | required | Memory allocation in GB | +| compute | string | "CPU" | Compute type (CPU, AMPERE_A10, etc.) | +| gpu_count | integer | 0 | Number of GPUs | Memory refers to RAM, not GPU VRAM. Ensure sufficient memory for your @@ -233,8 +231,6 @@ cpu = 4 memory = 16.0 compute = "AMPERE_A10" gpu_count = 1 -provider = "aws" -region = "us-east-1" [cerebrium.scaling] min_replicas = 0 diff --git a/v4/examples/aiVoiceAgents.mdx b/v4/examples/aiVoiceAgents.mdx index da74ec2..cf6ebe6 100644 --- a/v4/examples/aiVoiceAgents.mdx +++ b/v4/examples/aiVoiceAgents.mdx @@ -176,8 +176,6 @@ include = ["./*", "main.py", "cerebrium.toml"] exclude = ["./example_exclude"] [cerebrium.hardware] -region = "us-east-1" -provider = "aws" compute = "CPU" cpu = 6 memory = 18.0 diff --git a/v4/examples/deploy-an-llm-with-tensorrtllm-tritonserver.mdx b/v4/examples/deploy-an-llm-with-tensorrtllm-tritonserver.mdx index ed62226..2211e34 100644 --- a/v4/examples/deploy-an-llm-with-tensorrtllm-tritonserver.mdx +++ b/v4/examples/deploy-an-llm-with-tensorrtllm-tritonserver.mdx @@ -401,8 +401,6 @@ cpu = 4.0 memory = 40.0 compute = "AMPERE_A10" gpu_count = 1 -provider = "aws" -region = "us-east-1" [cerebrium.scaling] min_replicas = 0 diff --git a/v4/examples/gpt-oss.mdx b/v4/examples/gpt-oss.mdx index 47406a7..0270e5d 100644 --- a/v4/examples/gpt-oss.mdx +++ b/v4/examples/gpt-oss.mdx @@ -46,8 +46,6 @@ pre_build_commands = [ cpu = 8.0 memory = 18.0 compute = "HOPPER_H100" -provider = "aws" -region = "us-east-1" [cerebrium.scaling] min_replicas = 0 diff --git a/v4/examples/high-throughput-embeddings.mdx b/v4/examples/high-throughput-embeddings.mdx index 3172952..a66def9 100644 --- a/v4/examples/high-throughput-embeddings.mdx +++ b/v4/examples/high-throughput-embeddings.mdx @@ -51,7 +51,6 @@ Autoscaling criteria vary by hardware type and model selection. Define them in t cpu = 6.0 memory = 12.0 compute = "AMPERE_A10" -region = "us-east-1" [cerebrium.scaling] min_replicas = 0 diff --git a/v4/examples/mistral-vllm.mdx b/v4/examples/mistral-vllm.mdx index 9a154c8..df1f5ef 100644 --- a/v4/examples/mistral-vllm.mdx +++ b/v4/examples/mistral-vllm.mdx @@ -121,8 +121,6 @@ exclude = ["./example_exclude"] docker_base_image_url = "nvidia/cuda:12.1.1-runtime-ubuntu22.04" [cerebrium.hardware] -region = "us-east-1" -provider = "aws" compute = "AMPERE_A10" cpu = 2 memory = 16.0 diff --git a/v4/examples/realtime-voice-agents.mdx b/v4/examples/realtime-voice-agents.mdx index dc30d38..2b53326 100644 --- a/v4/examples/realtime-voice-agents.mdx +++ b/v4/examples/realtime-voice-agents.mdx @@ -181,8 +181,6 @@ include = ["./*", "main.py", "cerebrium.toml"] exclude = ["./example_exclude"] [cerebrium.hardware] -region = "us-east-1" -provider = "aws" compute = "CPU" cpu = 6 memory = 12.0 diff --git a/v4/examples/sdxl.mdx b/v4/examples/sdxl.mdx index 1a6a586..4a05677 100644 --- a/v4/examples/sdxl.mdx +++ b/v4/examples/sdxl.mdx @@ -34,8 +34,6 @@ include = ["./*", "main.py", "cerebrium.toml"] exclude = ["./.*", "./__*"] [cerebrium.hardware] -region = "us-east-1" -provider = "aws" compute = "AMPERE_A10" cpu = 2 memory = 16.0 diff --git a/v4/examples/streaming-falcon-7B.mdx b/v4/examples/streaming-falcon-7B.mdx index 9dde91b..1b7b912 100644 --- a/v4/examples/streaming-falcon-7B.mdx +++ b/v4/examples/streaming-falcon-7B.mdx @@ -146,8 +146,6 @@ exclude = ["./example_exclude"] docker_base_image_url = "nvidia/cuda:12.1.1-runtime-ubuntu22.04" [cerebrium.hardware] -region = "us-east-1" -provider = "aws" compute = "AMPERE_A10" cpu = 2 memory = 16.0 diff --git a/v4/examples/transcribe-whisper.mdx b/v4/examples/transcribe-whisper.mdx index ad4186f..01d7b60 100644 --- a/v4/examples/transcribe-whisper.mdx +++ b/v4/examples/transcribe-whisper.mdx @@ -114,8 +114,6 @@ exclude = ["./example_exclude"] docker_base_image_url = "nvidia/cuda:12.1.1-runtime-ubuntu22.04" [cerebrium.hardware] -region = "us-east-1" -provider = "aws" compute = "AMPERE_A10" cpu = 3 memory = 12.0 diff --git a/v4/examples/twilio-voice-agent.mdx b/v4/examples/twilio-voice-agent.mdx index 61eab6e..c1db243 100644 --- a/v4/examples/twilio-voice-agent.mdx +++ b/v4/examples/twilio-voice-agent.mdx @@ -264,8 +264,6 @@ Update `cerebrium.toml` with the following: ``` [cerebrium.hardware] -region = "us-east-1" -provider = "aws" compute = "CPU" cpu = 10 memory = 8.0 From 7d2ea0dc2b85031dfe10308480d84a37a1b2452e Mon Sep 17 00:00:00 2001 From: Jonathan Irwin Date: Mon, 4 May 2026 16:49:00 -0400 Subject: [PATCH 2/2] docs: keep provider and region in toml-reference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Revert the toml-reference changes — only inline TOML in example pages should be stripped. Co-Authored-By: Claude Opus 4.7 (1M context) --- toml-reference/toml-reference.mdx | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/toml-reference/toml-reference.mdx b/toml-reference/toml-reference.mdx index e593a3d..a91b172 100644 --- a/toml-reference/toml-reference.mdx +++ b/toml-reference/toml-reference.mdx @@ -110,12 +110,14 @@ The `[cerebrium.runtime.custom]` section configures custom web servers and runti The `[cerebrium.hardware]` section defines compute resources. -| Option | Type | Default | Description | -| --------- | ------- | -------- | ------------------------------------ | -| cpu | float | required | Number of CPU cores | -| memory | float | required | Memory allocation in GB | -| compute | string | "CPU" | Compute type (CPU, AMPERE_A10, etc.) | -| gpu_count | integer | 0 | Number of GPUs | +| Option | Type | Default | Description | +| --------- | ------- | ----------- | ------------------------------------ | +| cpu | float | required | Number of CPU cores | +| memory | float | required | Memory allocation in GB | +| compute | string | "CPU" | Compute type (CPU, AMPERE_A10, etc.) | +| gpu_count | integer | 0 | Number of GPUs | +| provider | string | "aws" | Cloud provider | +| region | string | "us-east-1" | Deployment region | Memory refers to RAM, not GPU VRAM. Ensure sufficient memory for your @@ -231,6 +233,8 @@ cpu = 4 memory = 16.0 compute = "AMPERE_A10" gpu_count = 1 +provider = "aws" +region = "us-east-1" [cerebrium.scaling] min_replicas = 0