From 031b7d738ecaec555edfd2eba4a99b1203c353f9 Mon Sep 17 00:00:00 2001
From: Jonathan Irwin <jonoirwinrsa@gmail.com>
Date: Fri, 1 May 2026 22:32:21 -0400
Subject: [PATCH 1/2] chore: remove provider and region from inline TOML
 examples

Strip `provider` and `region` lines from inline cerebrium.toml
snippets across example/migration/partner pages, plus drop them
from the toml-reference hardware table and its inline example.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 migrations/replicate.mdx                         |  2 --
 partner-services/deepgram.mdx                    |  1 -
 partner-services/rime.mdx                        |  1 -
 scaling/graceful-termination.mdx                 |  2 --
 toml-reference/toml-reference.mdx                | 16 ++++++----------
 v4/examples/aiVoiceAgents.mdx                    |  2 --
 ...ploy-an-llm-with-tensorrtllm-tritonserver.mdx |  2 --
 v4/examples/gpt-oss.mdx                          |  2 --
 v4/examples/high-throughput-embeddings.mdx       |  1 -
 v4/examples/mistral-vllm.mdx                     |  2 --
 v4/examples/realtime-voice-agents.mdx            |  2 --
 v4/examples/sdxl.mdx                             |  2 --
 v4/examples/streaming-falcon-7B.mdx              |  2 --
 v4/examples/transcribe-whisper.mdx               |  2 --
 v4/examples/twilio-voice-agent.mdx               |  2 --
 15 files changed, 6 insertions(+), 35 deletions(-)

diff --git a/migrations/replicate.mdx b/migrations/replicate.mdx
index 31d31d3..cc6dedb 100644
--- a/migrations/replicate.mdx
+++ b/migrations/replicate.mdx
@@ -33,8 +33,6 @@ shell_commands = [
 ]
 
 [cerebrium.hardware]
-region = "us-east-1"
-provider = "aws"
 compute = "AMPERE_A10"
 cpu = 2
 memory = 12.0
diff --git a/partner-services/deepgram.mdx b/partner-services/deepgram.mdx
index 7414477..407d81c 100644
--- a/partner-services/deepgram.mdx
+++ b/partner-services/deepgram.mdx
@@ -280,7 +280,6 @@ disable_auth = true
 
 [cerebrium.hardware]
 cpu = 4
-region = "us-east-1"
 memory = 32
 compute = "AMPERE_A10"
 gpu_count = 1
diff --git a/partner-services/rime.mdx b/partner-services/rime.mdx
index a8277bc..f04ad41 100644
--- a/partner-services/rime.mdx
+++ b/partner-services/rime.mdx
@@ -36,7 +36,6 @@ cpu = 4
 memory = 30
 compute = "AMPERE_A10"
 gpu_count = 1
-region = "us-east-1"
 
 [cerebrium.scaling]
 min_replicas = 1
diff --git a/scaling/graceful-termination.mdx b/scaling/graceful-termination.mdx
index 6c1a64f..b5ab2b0 100644
--- a/scaling/graceful-termination.mdx
+++ b/scaling/graceful-termination.mdx
@@ -135,8 +135,6 @@ cpu = 1
 memory = 1.0
 compute = "CPU"
 gpu_count = 0
-provider = "aws"
-region = "us-east-1"
 
 [cerebrium.scaling]
 min_replicas = 0
diff --git a/toml-reference/toml-reference.mdx b/toml-reference/toml-reference.mdx
index a91b172..e593a3d 100644
--- a/toml-reference/toml-reference.mdx
+++ b/toml-reference/toml-reference.mdx
@@ -110,14 +110,12 @@ The `[cerebrium.runtime.custom]` section configures custom web servers and runti
 
 The `[cerebrium.hardware]` section defines compute resources.
 
-| Option    | Type    | Default     | Description                          |
-| --------- | ------- | ----------- | ------------------------------------ |
-| cpu       | float   | required    | Number of CPU cores                  |
-| memory    | float   | required    | Memory allocation in GB              |
-| compute   | string  | "CPU"       | Compute type (CPU, AMPERE_A10, etc.) |
-| gpu_count | integer | 0           | Number of GPUs                       |
-| provider  | string  | "aws"       | Cloud provider                       |
-| region    | string  | "us-east-1" | Deployment region                    |
+| Option    | Type    | Default  | Description                          |
+| --------- | ------- | -------- | ------------------------------------ |
+| cpu       | float   | required | Number of CPU cores                  |
+| memory    | float   | required | Memory allocation in GB              |
+| compute   | string  | "CPU"    | Compute type (CPU, AMPERE_A10, etc.) |
+| gpu_count | integer | 0        | Number of GPUs                       |
 
 <Warning>
   Memory refers to RAM, not GPU VRAM. Ensure sufficient memory for your
@@ -233,8 +231,6 @@ cpu = 4
 memory = 16.0
 compute = "AMPERE_A10"
 gpu_count = 1
-provider = "aws"
-region = "us-east-1"
 
 [cerebrium.scaling]
 min_replicas = 0
diff --git a/v4/examples/aiVoiceAgents.mdx b/v4/examples/aiVoiceAgents.mdx
index da74ec2..cf6ebe6 100644
--- a/v4/examples/aiVoiceAgents.mdx
+++ b/v4/examples/aiVoiceAgents.mdx
@@ -176,8 +176,6 @@ include = ["./*", "main.py", "cerebrium.toml"]
 exclude = ["./example_exclude"]
 
 [cerebrium.hardware]
-region = "us-east-1"
-provider = "aws"
 compute = "CPU"
 cpu = 6
 memory = 18.0
diff --git a/v4/examples/deploy-an-llm-with-tensorrtllm-tritonserver.mdx b/v4/examples/deploy-an-llm-with-tensorrtllm-tritonserver.mdx
index ed62226..2211e34 100644
--- a/v4/examples/deploy-an-llm-with-tensorrtllm-tritonserver.mdx
+++ b/v4/examples/deploy-an-llm-with-tensorrtllm-tritonserver.mdx
@@ -401,8 +401,6 @@ cpu = 4.0
 memory = 40.0
 compute = "AMPERE_A10"
 gpu_count = 1
-provider = "aws"
-region = "us-east-1"
 
 [cerebrium.scaling]
 min_replicas = 0
diff --git a/v4/examples/gpt-oss.mdx b/v4/examples/gpt-oss.mdx
index 47406a7..0270e5d 100644
--- a/v4/examples/gpt-oss.mdx
+++ b/v4/examples/gpt-oss.mdx
@@ -46,8 +46,6 @@ pre_build_commands = [
 cpu = 8.0
 memory = 18.0
 compute = "HOPPER_H100"
-provider = "aws"
-region = "us-east-1"
 
 [cerebrium.scaling]
 min_replicas = 0
diff --git a/v4/examples/high-throughput-embeddings.mdx b/v4/examples/high-throughput-embeddings.mdx
index 3172952..a66def9 100644
--- a/v4/examples/high-throughput-embeddings.mdx
+++ b/v4/examples/high-throughput-embeddings.mdx
@@ -51,7 +51,6 @@ Autoscaling criteria vary by hardware type and model selection. Define them in t
 cpu = 6.0
 memory = 12.0
 compute = "AMPERE_A10"
-region = "us-east-1"
 
 [cerebrium.scaling]
 min_replicas = 0
diff --git a/v4/examples/mistral-vllm.mdx b/v4/examples/mistral-vllm.mdx
index 9a154c8..df1f5ef 100644
--- a/v4/examples/mistral-vllm.mdx
+++ b/v4/examples/mistral-vllm.mdx
@@ -121,8 +121,6 @@ exclude = ["./example_exclude"]
 docker_base_image_url = "nvidia/cuda:12.1.1-runtime-ubuntu22.04"
 
 [cerebrium.hardware]
-region = "us-east-1"
-provider = "aws"
 compute = "AMPERE_A10"
 cpu = 2
 memory = 16.0
diff --git a/v4/examples/realtime-voice-agents.mdx b/v4/examples/realtime-voice-agents.mdx
index dc30d38..2b53326 100644
--- a/v4/examples/realtime-voice-agents.mdx
+++ b/v4/examples/realtime-voice-agents.mdx
@@ -181,8 +181,6 @@ include = ["./*", "main.py", "cerebrium.toml"]
 exclude = ["./example_exclude"]
 
 [cerebrium.hardware]
-region = "us-east-1"
-provider = "aws"
 compute = "CPU"
 cpu = 6
 memory = 12.0
diff --git a/v4/examples/sdxl.mdx b/v4/examples/sdxl.mdx
index 1a6a586..4a05677 100644
--- a/v4/examples/sdxl.mdx
+++ b/v4/examples/sdxl.mdx
@@ -34,8 +34,6 @@ include = ["./*", "main.py", "cerebrium.toml"]
 exclude = ["./.*", "./__*"]
 
 [cerebrium.hardware]
-region = "us-east-1"
-provider = "aws"
 compute = "AMPERE_A10"
 cpu = 2
 memory = 16.0
diff --git a/v4/examples/streaming-falcon-7B.mdx b/v4/examples/streaming-falcon-7B.mdx
index 9dde91b..1b7b912 100644
--- a/v4/examples/streaming-falcon-7B.mdx
+++ b/v4/examples/streaming-falcon-7B.mdx
@@ -146,8 +146,6 @@ exclude = ["./example_exclude"]
 docker_base_image_url = "nvidia/cuda:12.1.1-runtime-ubuntu22.04"
 
 [cerebrium.hardware]
-region = "us-east-1"
-provider = "aws"
 compute = "AMPERE_A10"
 cpu = 2
 memory = 16.0
diff --git a/v4/examples/transcribe-whisper.mdx b/v4/examples/transcribe-whisper.mdx
index ad4186f..01d7b60 100644
--- a/v4/examples/transcribe-whisper.mdx
+++ b/v4/examples/transcribe-whisper.mdx
@@ -114,8 +114,6 @@ exclude = ["./example_exclude"]
 docker_base_image_url = "nvidia/cuda:12.1.1-runtime-ubuntu22.04"
 
 [cerebrium.hardware]
-region = "us-east-1"
-provider = "aws"
 compute = "AMPERE_A10"
 cpu = 3
 memory = 12.0
diff --git a/v4/examples/twilio-voice-agent.mdx b/v4/examples/twilio-voice-agent.mdx
index 61eab6e..c1db243 100644
--- a/v4/examples/twilio-voice-agent.mdx
+++ b/v4/examples/twilio-voice-agent.mdx
@@ -264,8 +264,6 @@ Update `cerebrium.toml` with the following:
 
 ```
 [cerebrium.hardware]
-region = "us-east-1"
-provider = "aws"
 compute = "CPU"
 cpu = 10
 memory = 8.0

From 7d2ea0dc2b85031dfe10308480d84a37a1b2452e Mon Sep 17 00:00:00 2001
From: Jonathan Irwin <jonoirwinrsa@gmail.com>
Date: Mon, 4 May 2026 16:49:00 -0400
Subject: [PATCH 2/2] docs: keep provider and region in toml-reference
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Revert the toml-reference changes — only inline TOML in example pages
should be stripped.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 toml-reference/toml-reference.mdx | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/toml-reference/toml-reference.mdx b/toml-reference/toml-reference.mdx
index e593a3d..a91b172 100644
--- a/toml-reference/toml-reference.mdx
+++ b/toml-reference/toml-reference.mdx
@@ -110,12 +110,14 @@ The `[cerebrium.runtime.custom]` section configures custom web servers and runti
 
 The `[cerebrium.hardware]` section defines compute resources.
 
-| Option    | Type    | Default  | Description                          |
-| --------- | ------- | -------- | ------------------------------------ |
-| cpu       | float   | required | Number of CPU cores                  |
-| memory    | float   | required | Memory allocation in GB              |
-| compute   | string  | "CPU"    | Compute type (CPU, AMPERE_A10, etc.) |
-| gpu_count | integer | 0        | Number of GPUs                       |
+| Option    | Type    | Default     | Description                          |
+| --------- | ------- | ----------- | ------------------------------------ |
+| cpu       | float   | required    | Number of CPU cores                  |
+| memory    | float   | required    | Memory allocation in GB              |
+| compute   | string  | "CPU"       | Compute type (CPU, AMPERE_A10, etc.) |
+| gpu_count | integer | 0           | Number of GPUs                       |
+| provider  | string  | "aws"       | Cloud provider                       |
+| region    | string  | "us-east-1" | Deployment region                    |
 
 <Warning>
   Memory refers to RAM, not GPU VRAM. Ensure sufficient memory for your
@@ -231,6 +233,8 @@ cpu = 4
 memory = 16.0
 compute = "AMPERE_A10"
 gpu_count = 1
+provider = "aws"
+region = "us-east-1"
 
 [cerebrium.scaling]
 min_replicas = 0