XyLearningProgramming · XyLearningProgramming · Feb 19, 2026 · Feb 19, 2026
diff --git a/deploy/helm/values.yaml b/deploy/helm/values.yaml
@@ -79,12 +79,11 @@ env: {}
 
 # Resource requests and limits for the container.
 # See https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
-# Tuned for Qwen3-0.6B-Q4_K_M (484 MB) on 1-CPU / 1 GB VPS nodes.
-# Previous values for Q8_0 (805 MB): limits cpu=3/mem=800Mi, requests cpu=50m/mem=32Mi
+# Tuned for Qwen3-0.6B-Q4_K_M (484 MB) + n_ctx=8192 KV cache (~448 MB) on 1-CPU / 1 GB VPS nodes.
 resources:
   limits:
     cpu: 1
-    memory: 700Mi
+    memory: 1Gi
   requests:
     cpu: 200m
     memory: 600Mi

diff --git a/slm_server/config.py b/slm_server/config.py
@@ -62,7 +62,7 @@ class Settings(BaseSettings):
         description="Owner label for /models list. Set SLM_MODEL_OWNER to override.",
     )
     n_ctx: int = Field(
-        4096, description="Maximum context window (input + generated tokens)."
+        8192, description="Maximum context window (input + generated tokens)."
     )
     n_threads: int = Field(
         2, description="Number of OpenMP threads llama‑cpp will spawn."