@@ -27,34 +27,38 @@ ollama:
2727 - nomic-embed-text
2828
2929image :
30- tag : 0.17.7
30+ repository : intelanalytics/ipex-llm-inference-cpp-xpu
31+ tag : 2.3.0-SNAPSHOT
32+
33+ extraArgs :
34+ - /bin/bash
35+ - -c
36+ - |
37+ mkdir -p /llm/ollama
38+ cd /llm/ollama
39+ init-ollama
40+ exec ./ollama serve
3141
3242extraEnv :
33- # --- GPU settings for ARC ---
34- - name : OLLAMA_VULKAN
35- value : " 0"
43+ # --- GPU settings for ARC (Level Zero/SYCL via IPEX) ---
3644 - name : OLLAMA_NUM_GPU
3745 value : " 999"
38- - name : GGML_VK_DISABLE_COOPMAT # prevents cooperative matrix corruption on Intel Arc
39- value : " 1"
40- - name : GGML_VK_DISABLE_COOPMAT2
46+ - name : ZES_ENABLE_SYSMAN
4147 value : " 1"
4248
4349 # --- Memory ---
4450 - name : OLLAMA_CONTEXT_LENGTH
4551 value : " 16384"
46- - name : OLLAMA_KV_CACHE_TYPE
47- value : " "
4852 - name : OLLAMA_FLASH_ATTENTION
4953 value : " 0"
5054 - name : OLLAMA_GPU_OVERHEAD
51- value : " 536870912" # reserve 512MB — prevents edge-case OOM evictions
55+ value : " 536870912"
5256
5357 # --- Scheduling ---
5458 - name : OLLAMA_KEEP_ALIVE
5559 value : " 5m"
5660 - name : OLLAMA_MAX_LOADED_MODELS
57- value : " 1" # prevents VRAM thrashing between model switches
61+ value : " 1"
5862 - name : OLLAMA_NUM_PARALLEL
5963 value : " 1"
6064
@@ -69,6 +73,19 @@ persistentVolume:
6973 existingClaim : ollama-models-hostpath
7074
7175extraObjects :
76+ - apiVersion : v1
77+ kind : PersistentVolumeClaim
78+ metadata :
79+ name : ollama-models-hostpath
80+ namespace : ollama
81+ spec :
82+ accessModes :
83+ - ReadWriteMany
84+ resources :
85+ requests :
86+ storage : 200Gi
87+ volumeName : ollama-models-hostpath
88+ storageClassName : " "
7289 - apiVersion : v1
7390 kind : PersistentVolume
7491 metadata :
@@ -88,17 +105,4 @@ extraObjects:
88105 - key : intel.feature.node.kubernetes.io/gpu
89106 operator : In
90107 values :
91- - " true"
92- - apiVersion : v1
93- kind : PersistentVolumeClaim
94- metadata :
95- name : ollama-models-hostpath
96- namespace : ollama
97- spec :
98- accessModes :
99- - ReadWriteMany
100- resources :
101- requests :
102- storage : 200Gi
103- volumeName : ollama-models-hostpath
104- storageClassName : " "
108+ - " true"
0 commit comments