File tree Expand file tree Collapse file tree 2 files changed +3
-4
lines changed
Expand file tree Collapse file tree 2 files changed +3
-4
lines changed Original file line number Diff line number Diff line change @@ -79,12 +79,11 @@ env: {}
7979
8080# Resource requests and limits for the container.
8181# See https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
82- # Tuned for Qwen3-0.6B-Q4_K_M (484 MB) on 1-CPU / 1 GB VPS nodes.
83- # Previous values for Q8_0 (805 MB): limits cpu=3/mem=800Mi, requests cpu=50m/mem=32Mi
82+ # Tuned for Qwen3-0.6B-Q4_K_M (484 MB) + n_ctx=8192 KV cache (~448 MB) on 1-CPU / 1 GB VPS nodes.
8483resources :
8584 limits :
8685 cpu : 1
87- memory : 700Mi
86+ memory : 1Gi
8887 requests :
8988 cpu : 200m
9089 memory : 600Mi
Original file line number Diff line number Diff line change @@ -62,7 +62,7 @@ class Settings(BaseSettings):
6262 description = "Owner label for /models list. Set SLM_MODEL_OWNER to override." ,
6363 )
6464 n_ctx : int = Field (
65- 4096 , description = "Maximum context window (input + generated tokens)."
65+ 8192 , description = "Maximum context window (input + generated tokens)."
6666 )
6767 n_threads : int = Field (
6868 2 , description = "Number of OpenMP threads llama‑cpp will spawn."
You can’t perform that action at this time.
0 commit comments