Merge pull request #14 from XyLearningProgramming/bugfix/model-download

XyLearningProgramming · web-flow · commit 8d33666accd6 · 2026-02-23T10:55:40.000+08:00
Bugfix/model download
diff --git a/.github/actions/helm-deploy/action.yml b/.github/actions/helm-deploy/action.yml
@@ -24,6 +24,10 @@ inputs:
     description: 'Kubernetes namespace'
     required: true
     default: 'backend'
+  helm_set_files:
+    description: 'Comma-separated --set-file args, e.g. "configMapData.download=scripts/download.sh"'
+    required: false
+    default: ''
   github_token:
     description: 'GitHub token for Helm installation'
     required: true
@@ -92,11 +96,23 @@ runs:
         ${{ steps.parse_env.outputs.helm_env_values }}
         EOF
         
+        # Build --set-file flags from comma-separated input
+        SET_FILE_FLAGS=""
+        if [ -n "${{ inputs.helm_set_files }}" ]; then
+          IFS=',' read -ra PAIRS <<< "${{ inputs.helm_set_files }}"
+          for pair in "${PAIRS[@]}"; do
+            pair="${pair#"${pair%%[![:space:]]*}"}"
+            pair="${pair%"${pair##*[![:space:]]}"}"
+            [ -n "$pair" ] && SET_FILE_FLAGS="$SET_FILE_FLAGS --set-file $pair"
+          done
+        fi
+
         # Deploy using Helm (values file won't be logged due to file redirection)
         helm upgrade --install slm-server ./deploy/helm \
           --namespace ${{ inputs.namespace }} \
           --create-namespace \
           --values "$temp_values" \
+          $SET_FILE_FLAGS \
           --wait \
           --timeout 10m
         
diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml
@@ -83,5 +83,6 @@ jobs:
         helm_values_env: ${{ secrets.HELM_VALUES_ENV }}
         helm_values_persistence_hostpath: ${{ secrets.HELM_VALUES_PERSISTENCE_HOSTPATH }}
         helm_values_persistence_nodename: ${{ secrets.HELM_VALUES_PERSISTENCE_NODENAME }}
+        helm_set_files: 'configMapData.download=scripts/download.sh'
         namespace: ${{ env.NAMESPACE }}
         github_token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
@@ -29,5 +29,6 @@ jobs:
         helm_values_env: ${{ secrets.HELM_VALUES_ENV }}
         helm_values_persistence_hostpath: ${{ secrets.HELM_VALUES_PERSISTENCE_HOSTPATH }}
         helm_values_persistence_nodename: ${{ secrets.HELM_VALUES_PERSISTENCE_NODENAME }}
+        helm_set_files: 'configMapData.download=scripts/download.sh'
         namespace: ${{ env.NAMESPACE }}
         github_token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/deploy/helm/templates/configmap.yaml b/deploy/helm/templates/configmap.yaml
@@ -1,9 +1,13 @@
+{{- if .Values.configMapData }}
 apiVersion: v1
 kind: ConfigMap
 metadata:
   name: {{ include "slm-server.fullname" . }}-scripts
   labels:
     {{- include "slm-server.labels" . | nindent 4 }}
 data:
-  download.sh: |-
-    {{- .Files.Get "scripts/download.sh" | nindent 4 }}
+  {{- range $key, $val := .Values.configMapData }}
+  {{ $key }}: |
+    {{- $val | nindent 4 }}
+  {{- end }}
+{{- end }}
diff --git a/deploy/helm/templates/deployment.yaml b/deploy/helm/templates/deployment.yaml
@@ -19,15 +19,10 @@ spec:
       affinity:
         {{- toYaml . | nindent 8 }}
       {{- end }}
+      {{- if .Values.initContainers }}
       initContainers:
-        - name: download-model
-          image: busybox:1.28
-          command: ["sh", "-c", "/scripts/download.sh"]
-          volumeMounts:
-            - name: models-storage
-              mountPath: {{ .Values.persistence.mountPath }}
-            - name: scripts
-              mountPath: /scripts
+        {{- tpl (toYaml .Values.initContainers) . | nindent 8 }}
+      {{- end }}
       containers:
         - name: {{ .Chart.Name }}
           image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
@@ -67,20 +62,13 @@ spec:
           {{- end }}
           resources:
             {{- toYaml .Values.resources | nindent 12 }}
+          {{- if .Values.volumeMounts }}
           volumeMounts:
-            - name: models-storage
-              mountPath: {{ .Values.persistence.mountPath }}
-      volumes:
-        - name: models-storage
-          {{- if .Values.persistence.enabled }}
-          persistentVolumeClaim:
-            claimName: {{ include "slm-server.fullname" . }}
-          {{- else }}
-          emptyDir: {}
+            {{- tpl (toYaml .Values.volumeMounts) . | nindent 12 }}
           {{- end }}
-        - name: scripts
-          configMap:
-            name: {{ include "slm-server.fullname" . }}-scripts
-            defaultMode: 0755
+      {{- if .Values.volumes }}
+      volumes:
+        {{- tpl (toYaml .Values.volumes) . | nindent 8 }}
+      {{- end }}
   strategy:
     {{- toYaml .Values.strategy | nindent 4 }}
diff --git a/deploy/helm/values.yaml b/deploy/helm/values.yaml
@@ -128,5 +128,35 @@ probes:
     successThreshold: 1
     failureThreshold: 5
 
+# Content is injected at deploy time via: --set-file configMapData.download=scripts/download.sh
+configMapData: {}
+
+# Volumes and volumeMounts rendered via tpl so template expressions work.
+volumes:
+  - name: models-storage
+    persistentVolumeClaim:
+      claimName: '{{ include "slm-server.fullname" . }}'
+  - name: scripts
+    configMap:
+      name: '{{ include "slm-server.fullname" . }}-scripts'
+      defaultMode: 0755
+      items:
+        - key: download
+          path: download.sh
+
+volumeMounts:
+  - name: models-storage
+    mountPath: /app/models
+
+initContainers:
+  - name: download-model
+    image: curlimages/curl:latest
+    command: ["sh", "-c", "MODEL_DIR=/app/models sh /scripts/download.sh"]
+    volumeMounts:
+      - name: models-storage
+        mountPath: /app/models
+      - name: scripts
+        mountPath: /scripts
+
 strategy:
   type: Recreate
diff --git a/scripts/download.sh b/scripts/download.sh
@@ -1,65 +1,45 @@
-#!/bin/bash
+#!/bin/sh
+#
+# Download model files for slm-server.
+#
+# When run inside the init container, MODEL_DIR is set by the caller
+# (the Helm-rendered configmap). For local use it defaults to
+# ../models relative to this script.
 
-set -ex
+set -e
 
-# Get the absolute path of the directory where the script is located
-SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd)
-
-# Original (official Qwen repo, Q8_0 only):
-#   https://huggingface.co/Qwen/Qwen3-0.6B-GGUF  ->  Qwen3-0.6B-Q8_0.gguf
-# Switched to second-state community repo for Q4_K_M quantization.
-# See README.md "Model Choice" section for rationale.
-REPO_URL="https://huggingface.co/second-state/Qwen3-0.6B-GGUF"
-# Set model directory relative to the script's location
-MODEL_DIR="$SCRIPT_DIR/../models"
-
-# Create the directory if it doesn't exist
+MODEL_DIR="${MODEL_DIR:-$(cd -- "$(dirname "$0")" && pwd)/../models}"
 mkdir -p "$MODEL_DIR"
 
-# --- Files to download ---
-FILES_TO_DOWNLOAD=(
-    "Qwen3-0.6B-Q4_K_M.gguf"
-    # Previous default: "Qwen3-0.6B-Q8_0.gguf" (805 MB, from Qwen/Qwen3-0.6B-GGUF)
-)
-
-echo "Downloading Qwen3-0.6B-GGUF model and params files..."
-
-for file in "${FILES_TO_DOWNLOAD[@]}"; do
-    if [ -f "$MODEL_DIR/$file" ]; then
-        echo "$file already exists, skipping download."
-    else
-        echo "Downloading $file..."
-        wget -P "$MODEL_DIR" "$REPO_URL/resolve/main/$file" || {
-            echo "Failed to download $file with wget, trying curl..."
-            curl -L -o "$MODEL_DIR/$file" "$REPO_URL/resolve/main/$file"
-        }
-    fi
-done
-
-# --- Embedding model: all-MiniLM-L6-v2 (ONNX, quantized UINT8 for AVX2) ---
-EMBEDDING_REPO_URL="https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2"
-EMBEDDING_MODEL_DIR="$MODEL_DIR/all-MiniLM-L6-v2"
-
-mkdir -p "$EMBEDDING_MODEL_DIR/onnx"
-
-EMBEDDING_FILES=(
-    "onnx/model_quint8_avx2.onnx"
-    "tokenizer.json"
-)
-
-echo "Downloading all-MiniLM-L6-v2 ONNX embedding model..."
-
-for file in "${EMBEDDING_FILES[@]}"; do
-    dest="$EMBEDDING_MODEL_DIR/$file"
-    if [ -f "$dest" ]; then
-        echo "$file already exists, skipping download."
-    else
-        echo "Downloading $file..."
-        wget -O "$dest" "$EMBEDDING_REPO_URL/resolve/main/$file" || {
-            echo "Failed to download $file with wget, trying curl..."
-            curl -L -o "$dest" "$EMBEDDING_REPO_URL/resolve/main/$file"
-        }
-    fi
-done
-
-echo "Download process complete! Files are in $MODEL_DIR"
+# --- Chat LLM: Qwen3-0.6B (Q4_K_M quantisation from second-state) ---
+GGUF_REPO="https://huggingface.co/second-state/Qwen3-0.6B-GGUF"
+GGUF_FILE="Qwen3-0.6B-Q4_K_M.gguf"
+
+if [ -f "$MODEL_DIR/$GGUF_FILE" ]; then
+  echo "$GGUF_FILE already exists, skipping."
+else
+  echo "Downloading $GGUF_FILE ..."
+  curl -fSL -o "$MODEL_DIR/$GGUF_FILE" "$GGUF_REPO/resolve/main/$GGUF_FILE"
+fi
+
+# --- Embedding: all-MiniLM-L6-v2 (ONNX, quantized UINT8 for AVX2) ---
+EMBED_REPO="https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2"
+EMBED_DIR="$MODEL_DIR/all-MiniLM-L6-v2"
+mkdir -p "$EMBED_DIR/onnx"
+
+if [ -f "$EMBED_DIR/tokenizer.json" ]; then
+  echo "tokenizer.json already exists, skipping."
+else
+  echo "Downloading tokenizer.json ..."
+  curl -fSL -o "$EMBED_DIR/tokenizer.json" "$EMBED_REPO/resolve/main/tokenizer.json"
+fi
+
+ONNX_FILE="model_quint8_avx2.onnx"
+if [ -f "$EMBED_DIR/onnx/$ONNX_FILE" ]; then
+  echo "$ONNX_FILE already exists, skipping."
+else
+  echo "Downloading $ONNX_FILE ..."
+  curl -fSL -o "$EMBED_DIR/onnx/$ONNX_FILE" "$EMBED_REPO/resolve/main/onnx/$ONNX_FILE"
+fi
+
+echo "Download complete. Files are in $MODEL_DIR"