Skip to content

Commit 32cfdce

Browse files
committed
feat: switched to ipex-llm instead of ollama for gpu support
1 parent fe477cf commit 32cfdce

1 file changed

Lines changed: 29 additions & 25 deletions

File tree

kubernetes/overrides/ollama/values.yaml

Lines changed: 29 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -27,34 +27,38 @@ ollama:
2727
- nomic-embed-text
2828

2929
image:
30-
tag: 0.17.7
30+
repository: intelanalytics/ipex-llm-inference-cpp-xpu
31+
tag: 2.3.0-SNAPSHOT
32+
33+
extraArgs:
34+
- /bin/bash
35+
- -c
36+
- |
37+
mkdir -p /llm/ollama
38+
cd /llm/ollama
39+
init-ollama
40+
exec ./ollama serve
3141
3242
extraEnv:
33-
# --- GPU settings for ARC ---
34-
- name: OLLAMA_VULKAN
35-
value: "0"
43+
# --- GPU settings for ARC (Level Zero/SYCL via IPEX) ---
3644
- name: OLLAMA_NUM_GPU
3745
value: "999"
38-
- name: GGML_VK_DISABLE_COOPMAT # prevents cooperative matrix corruption on Intel Arc
39-
value: "1"
40-
- name: GGML_VK_DISABLE_COOPMAT2
46+
- name: ZES_ENABLE_SYSMAN
4147
value: "1"
4248

4349
# --- Memory ---
4450
- name: OLLAMA_CONTEXT_LENGTH
4551
value: "16384"
46-
- name: OLLAMA_KV_CACHE_TYPE
47-
value: ""
4852
- name: OLLAMA_FLASH_ATTENTION
4953
value: "0"
5054
- name: OLLAMA_GPU_OVERHEAD
51-
value: "536870912" # reserve 512MB — prevents edge-case OOM evictions
55+
value: "536870912"
5256

5357
# --- Scheduling ---
5458
- name: OLLAMA_KEEP_ALIVE
5559
value: "5m"
5660
- name: OLLAMA_MAX_LOADED_MODELS
57-
value: "1" # prevents VRAM thrashing between model switches
61+
value: "1"
5862
- name: OLLAMA_NUM_PARALLEL
5963
value: "1"
6064

@@ -69,6 +73,19 @@ persistentVolume:
6973
existingClaim: ollama-models-hostpath
7074

7175
extraObjects:
76+
- apiVersion: v1
77+
kind: PersistentVolumeClaim
78+
metadata:
79+
name: ollama-models-hostpath
80+
namespace: ollama
81+
spec:
82+
accessModes:
83+
- ReadWriteMany
84+
resources:
85+
requests:
86+
storage: 200Gi
87+
volumeName: ollama-models-hostpath
88+
storageClassName: ""
7289
- apiVersion: v1
7390
kind: PersistentVolume
7491
metadata:
@@ -88,17 +105,4 @@ extraObjects:
88105
- key: intel.feature.node.kubernetes.io/gpu
89106
operator: In
90107
values:
91-
- "true"
92-
- apiVersion: v1
93-
kind: PersistentVolumeClaim
94-
metadata:
95-
name: ollama-models-hostpath
96-
namespace: ollama
97-
spec:
98-
accessModes:
99-
- ReadWriteMany
100-
resources:
101-
requests:
102-
storage: 200Gi
103-
volumeName: ollama-models-hostpath
104-
storageClassName: ""
108+
- "true"

0 commit comments

Comments
 (0)