Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions deploy/helm/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,11 @@ env: {}

# Resource requests and limits for the container.
# See https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
# Tuned for Qwen3-0.6B-Q4_K_M (484 MB) on 1-CPU / 1 GB VPS nodes.
# Previous values for Q8_0 (805 MB): limits cpu=3/mem=800Mi, requests cpu=50m/mem=32Mi
# Tuned for Qwen3-0.6B-Q4_K_M (484 MB) + n_ctx=8192 KV cache (~448 MB) on 1-CPU / 1 GB VPS nodes.
resources:
limits:
cpu: 1
memory: 700Mi
memory: 1Gi
requests:
cpu: 200m
memory: 600Mi
Expand Down
2 changes: 1 addition & 1 deletion slm_server/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class Settings(BaseSettings):
description="Owner label for /models list. Set SLM_MODEL_OWNER to override.",
)
n_ctx: int = Field(
4096, description="Maximum context window (input + generated tokens)."
8192, description="Maximum context window (input + generated tokens)."
)
n_threads: int = Field(
2, description="Number of OpenMP threads llama‑cpp will spawn."
Expand Down