|
1 | | -#!/bin/bash |
| 1 | +#!/bin/sh |
| 2 | +# |
| 3 | +# Download model files for slm-server. |
| 4 | +# |
| 5 | +# When run inside the init container, MODEL_DIR is set by the caller |
| 6 | +# (the Helm-rendered configmap). For local use it defaults to |
| 7 | +# ../models relative to this script. |
2 | 8 |
|
3 | | -set -ex |
| 9 | +set -e |
4 | 10 |
|
5 | | -# Get the absolute path of the directory where the script is located |
6 | | -SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd) |
7 | | - |
8 | | -# Original (official Qwen repo, Q8_0 only): |
9 | | -# https://huggingface.co/Qwen/Qwen3-0.6B-GGUF -> Qwen3-0.6B-Q8_0.gguf |
10 | | -# Switched to second-state community repo for Q4_K_M quantization. |
11 | | -# See README.md "Model Choice" section for rationale. |
12 | | -REPO_URL="https://huggingface.co/second-state/Qwen3-0.6B-GGUF" |
13 | | -# Set model directory relative to the script's location |
14 | | -MODEL_DIR="$SCRIPT_DIR/../models" |
15 | | - |
16 | | -# Create the directory if it doesn't exist |
| 11 | +MODEL_DIR="${MODEL_DIR:-$(cd -- "$(dirname "$0")" && pwd)/../models}" |
17 | 12 | mkdir -p "$MODEL_DIR" |
18 | 13 |
|
19 | | -# --- Files to download --- |
20 | | -FILES_TO_DOWNLOAD=( |
21 | | - "Qwen3-0.6B-Q4_K_M.gguf" |
22 | | - # Previous default: "Qwen3-0.6B-Q8_0.gguf" (805 MB, from Qwen/Qwen3-0.6B-GGUF) |
23 | | -) |
24 | | - |
25 | | -echo "Downloading Qwen3-0.6B-GGUF model and params files..." |
26 | | - |
27 | | -for file in "${FILES_TO_DOWNLOAD[@]}"; do |
28 | | - if [ -f "$MODEL_DIR/$file" ]; then |
29 | | - echo "$file already exists, skipping download." |
30 | | - else |
31 | | - echo "Downloading $file..." |
32 | | - wget -P "$MODEL_DIR" "$REPO_URL/resolve/main/$file" || { |
33 | | - echo "Failed to download $file with wget, trying curl..." |
34 | | - curl -L -o "$MODEL_DIR/$file" "$REPO_URL/resolve/main/$file" |
35 | | - } |
36 | | - fi |
37 | | -done |
38 | | - |
39 | | -# --- Embedding model: all-MiniLM-L6-v2 (ONNX, quantized UINT8 for AVX2) --- |
40 | | -EMBEDDING_REPO_URL="https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2" |
41 | | -EMBEDDING_MODEL_DIR="$MODEL_DIR/all-MiniLM-L6-v2" |
42 | | - |
43 | | -mkdir -p "$EMBEDDING_MODEL_DIR/onnx" |
44 | | - |
45 | | -EMBEDDING_FILES=( |
46 | | - "onnx/model_quint8_avx2.onnx" |
47 | | - "tokenizer.json" |
48 | | -) |
49 | | - |
50 | | -echo "Downloading all-MiniLM-L6-v2 ONNX embedding model..." |
51 | | - |
52 | | -for file in "${EMBEDDING_FILES[@]}"; do |
53 | | - dest="$EMBEDDING_MODEL_DIR/$file" |
54 | | - if [ -f "$dest" ]; then |
55 | | - echo "$file already exists, skipping download." |
56 | | - else |
57 | | - echo "Downloading $file..." |
58 | | - wget -O "$dest" "$EMBEDDING_REPO_URL/resolve/main/$file" || { |
59 | | - echo "Failed to download $file with wget, trying curl..." |
60 | | - curl -L -o "$dest" "$EMBEDDING_REPO_URL/resolve/main/$file" |
61 | | - } |
62 | | - fi |
63 | | -done |
64 | | - |
65 | | -echo "Download process complete! Files are in $MODEL_DIR" |
| 14 | +# --- Chat LLM: Qwen3-0.6B (Q4_K_M quantisation from second-state) --- |
| 15 | +GGUF_REPO="https://huggingface.co/second-state/Qwen3-0.6B-GGUF" |
| 16 | +GGUF_FILE="Qwen3-0.6B-Q4_K_M.gguf" |
| 17 | + |
| 18 | +if [ -f "$MODEL_DIR/$GGUF_FILE" ]; then |
| 19 | + echo "$GGUF_FILE already exists, skipping." |
| 20 | +else |
| 21 | + echo "Downloading $GGUF_FILE ..." |
| 22 | + curl -fSL -o "$MODEL_DIR/$GGUF_FILE" "$GGUF_REPO/resolve/main/$GGUF_FILE" |
| 23 | +fi |
| 24 | + |
| 25 | +# --- Embedding: all-MiniLM-L6-v2 (ONNX, quantized UINT8 for AVX2) --- |
| 26 | +EMBED_REPO="https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2" |
| 27 | +EMBED_DIR="$MODEL_DIR/all-MiniLM-L6-v2" |
| 28 | +mkdir -p "$EMBED_DIR/onnx" |
| 29 | + |
| 30 | +if [ -f "$EMBED_DIR/tokenizer.json" ]; then |
| 31 | + echo "tokenizer.json already exists, skipping." |
| 32 | +else |
| 33 | + echo "Downloading tokenizer.json ..." |
| 34 | + curl -fSL -o "$EMBED_DIR/tokenizer.json" "$EMBED_REPO/resolve/main/tokenizer.json" |
| 35 | +fi |
| 36 | + |
| 37 | +ONNX_FILE="model_quint8_avx2.onnx" |
| 38 | +if [ -f "$EMBED_DIR/onnx/$ONNX_FILE" ]; then |
| 39 | + echo "$ONNX_FILE already exists, skipping." |
| 40 | +else |
| 41 | + echo "Downloading $ONNX_FILE ..." |
| 42 | + curl -fSL -o "$EMBED_DIR/onnx/$ONNX_FILE" "$EMBED_REPO/resolve/main/onnx/$ONNX_FILE" |
| 43 | +fi |
| 44 | + |
| 45 | +echo "Download complete. Files are in $MODEL_DIR" |
0 commit comments