Skip to content

Commit bd1c799

Browse files
🐛 trying to fix tag error
2 parents 6cef2d2 + 91cc1bd commit bd1c799

File tree

13 files changed

+811
-39
lines changed

13 files changed

+811
-39
lines changed

.github/actions/helm-deploy/action.yml

Lines changed: 41 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,47 +24,62 @@ inputs:
2424
description: 'Kubernetes namespace'
2525
required: true
2626
default: 'backend'
27+
github_token:
28+
description: 'GitHub token for Helm installation'
29+
required: true
2730

2831
runs:
2932
using: 'composite'
30-
steps:
31-
- name: Set up Kubernetes config
32-
shell: bash
33-
run: |
34-
mkdir -p $HOME/.kube
35-
echo "${{ inputs.kube_config_data }}" | base64 -d > $HOME/.kube/config
36-
chmod 600 $HOME/.kube/config
37-
33+
steps:
3834
- name: Install Helm
3935
uses: azure/setup-helm@v3
4036
with:
4137
version: 'latest'
38+
env:
39+
GITHUB_TOKEN: ${{ inputs.github_token }}
4240

41+
- name: Set up Kubernetes config
42+
shell: bash
43+
run: |
44+
mkdir -p $HOME/.kube
45+
echo "${{ inputs.kube_config_data }}" > $HOME/.kube/config
46+
chmod 600 $HOME/.kube/config
47+
4348
- name: Parse environment variables
4449
id: parse_env
4550
shell: bash
4651
run: |
4752
if [ -n "${{ inputs.helm_values_env }}" ]; then
48-
echo "helm_env_values<<EOF" >> $GITHUB_OUTPUT
53+
# Create temporary file to avoid exposing secrets in logs
54+
temp_file=$(mktemp)
4955
echo "${{ inputs.helm_values_env }}" | while IFS='=' read -r key value; do
5056
# Skip commented lines and empty lines
5157
if [[ "$key" =~ ^#.*$ ]] || [ -z "$key" ]; then
5258
continue
5359
fi
5460
if [ -n "$key" ] && [ -n "$value" ]; then
55-
echo " $key: \"$value\""
61+
echo "::add-mask::$value"
62+
echo " $key: \"$value\"" >> "$temp_file"
5663
fi
57-
done >> $GITHUB_OUTPUT
64+
done
65+
66+
# Output the parsed values without exposing them in logs
67+
echo "helm_env_values<<EOF" >> $GITHUB_OUTPUT
68+
cat "$temp_file" >> $GITHUB_OUTPUT
5869
echo "EOF" >> $GITHUB_OUTPUT
70+
rm "$temp_file"
5971
else
6072
echo "helm_env_values=" >> $GITHUB_OUTPUT
6173
fi
62-
74+
6375
- name: Deploy with Helm
6476
shell: bash
6577
run: |
66-
# Create temporary values file
67-
cat > /tmp/override-values.yaml << EOF
78+
# Create temporary values file with restricted permissions
79+
temp_values=$(mktemp)
80+
chmod 600 "$temp_values"
81+
82+
cat > "$temp_values" << EOF
6883
image:
6984
repository: ${{ inputs.registry_repository }}
7085
tag: "${{ inputs.image_tag }}"
@@ -77,13 +92,23 @@ runs:
7792
${{ steps.parse_env.outputs.helm_env_values }}
7893
EOF
7994
80-
# Deploy using Helm
95+
# Deploy using Helm (values file won't be logged due to file redirection)
8196
helm upgrade --install slm-server ./deploy/helm \
8297
--namespace ${{ inputs.namespace }} \
8398
--create-namespace \
84-
--values /tmp/override-values.yaml \
99+
--values "$temp_values" \
85100
--wait \
86101
--timeout 10m
102+
103+
# Clean up temporary file
104+
rm "$temp_values"
105+
106+
- name: Cleanup on cancellation
107+
if: cancelled()
108+
shell: bash
109+
run: |
110+
echo "Workflow cancelled, attempting helm rollback..."
111+
helm rollback slm-server 0 -n ${{ inputs.namespace }} --wait --timeout 5m || true
87112
88113
- name: Verify deployment
89114
shell: bash

.github/workflows/cd.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,3 +84,4 @@ jobs:
8484
helm_values_persistence_hostpath: ${{ secrets.HELM_VALUES_PERSISTENCE_HOSTPATH }}
8585
helm_values_persistence_nodename: ${{ secrets.HELM_VALUES_PERSISTENCE_NODENAME }}
8686
namespace: ${{ env.NAMESPACE }}
87+
github_token: ${{ secrets.GITHUB_TOKEN }}

.github/workflows/ci.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,10 @@ jobs:
3636
uv run pytest tests/ --ignore=tests/e2e/ --cov=slm_server --cov-report=xml --cov-report=term-missing
3737
3838
- name: Upload coverage to Codecov
39-
uses: codecov/codecov-action@v4
39+
uses: codecov/codecov-action@v5
4040
with:
4141
file: ./coverage.xml
4242
flags: unittests
43-
name: codecov-umbrella
4443
fail_ci_if_error: false
4544
token: ${{ secrets.CODECOV_TOKEN }}
4645
slug: XyLearningProgramming/slm_server

.github/workflows/deploy.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,4 @@ jobs:
3030
helm_values_persistence_hostpath: ${{ secrets.HELM_VALUES_PERSISTENCE_HOSTPATH }}
3131
helm_values_persistence_nodename: ${{ secrets.HELM_VALUES_PERSISTENCE_NODENAME }}
3232
namespace: ${{ env.NAMESPACE }}
33+
github_token: ${{ secrets.GITHUB_TOKEN }}

README.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
1-
# 🤖 SLM Server
1+
# Small-Language-Model Server
22

33
[![CI Pipeline](https://github.com/XyLearningProgramming/slm_server/actions/workflows/ci.yml/badge.svg)](https://github.com/XyLearningProgramming/slm_server/actions/workflows/ci.yml)
44
[![codecov](https://codecov.io/gh/XyLearningProgramming/slm_server/branch/main/graph/badge.svg)](https://codecov.io/gh/XyLearningProgramming/slm_server)
55
[![Docker](https://img.shields.io/badge/docker-ready-blue.svg)](https://hub.docker.com/r/x3huang/slm_server)
66
[![License](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
77

8-
> 🚀 **Production-ready FastAPI model server** for small language models with OpenAI-compatible API, built-in observability, and enterprise-grade deployment tools.
8+
🚀 A light model server that serves small language models (default: `Qwen3-0.6B-GGUF`) as a **thin wrapper** around `llama-cpp` exposing the OpenAI-compatible `/chat/completions` API. Core logic is just <100 lines under `./slm_server/app.py`!
99

10-
A light model server that serves small language models (default: `Qwen3-0.6B-GGUF`) using `llama-cpp` via the OpenAI-compatible `/chat/completions` API. Designed for resource-constrained environments with comprehensive monitoring and deployment automation.
10+
> This is still a WIP project. Issues, pull-requests are welcome. I mainly use this repo to deploy a SLM model as part of the backend on my own site [x3huang.dev](https://x3huang.dev/) while trying my best to keep this repo model-agonistic.
1111
1212
## ✨ Features
1313

14+
![Thin wrapper around llama cpp](./docs/20250712_slm_img1.jpg)
15+
1416
- 🔌 **OpenAI-compatible API** - Drop-in replacement with `/chat/completions` endpoint and streaming support
1517
-**Llama.cpp integration** - High-performance inference optimized for limited CPU and memory resources
1618
- 📊 **Production observability** - Built-in logging, Prometheus metrics, and OpenTelemetry tracing (all configurable)
@@ -50,7 +52,7 @@ docker run -p 8000:8000 -v $(pwd)/models:/app/models slm_server
5052
### Test the API
5153

5254
```bash
53-
curl -X POST http://localhost:8000/chat/completions \
55+
curl -X POST http://localhost:8000/api/v1/chat/completions \
5456
-H "Content-Type: application/json" \
5557
-d '{
5658
"model": "qwen",

deploy/helm/templates/NOTES.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,16 @@
66
{{- end }}
77
{{- end }}
88
{{- else if contains "NodePort" .Values.service.type }}
9-
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "slm_server.fullname" . }})
9+
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "slm-server.fullname" . }})
1010
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
1111
echo http://$NODE_IP:$NODE_PORT
1212
{{- else if contains "LoadBalancer" .Values.service.type }}
1313
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
14-
You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "slm_server.fullname" . }}'
15-
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "slm_server.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
14+
You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "slm-server.fullname" . }}'
15+
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "slm-server.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
1616
echo http://$SERVICE_IP:{{ .Values.service.port }}
1717
{{- else if contains "ClusterIP" .Values.service.type }}
18-
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "slm_server.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
18+
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "slm-server.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
1919
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
2020
echo "Visit http://127.0.0.1:8080 to use your application"
2121
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT

deploy/helm/templates/hpa.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@
22
apiVersion: autoscaling/v2
33
kind: HorizontalPodAutoscaler
44
metadata:
5-
name: {{ include "slm_server.fullname" . }}
5+
name: {{ include "slm-server.fullname" . }}
66
labels:
7-
{{- include "slm_server.labels" . | nindent 4 }}
7+
{{- include "slm-server.labels" . | nindent 4 }}
88
spec:
99
scaleTargetRef:
1010
apiVersion: apps/v1
1111
kind: Deployment
12-
name: {{ include "slm_server.fullname" . }}
12+
name: {{ include "slm-server.fullname" . }}
1313
minReplicas: {{ .Values.autoscaling.minReplicas }}
1414
maxReplicas: {{ .Values.autoscaling.maxReplicas }}
1515
metrics:

deploy/helm/templates/ingress.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
apiVersion: networking.k8s.io/v1
33
kind: Ingress
44
metadata:
5-
name: {{ include "slm_server.fullname" . }}
5+
name: {{ include "slm-server.fullname" . }}
66
labels:
7-
{{- include "slm_server.labels" . | nindent 4 }}
7+
{{- include "slm-server.labels" . | nindent 4 }}
88
{{- with .Values.ingress.annotations }}
99
annotations:
1010
{{- toYaml . | nindent 4 }}
@@ -35,7 +35,7 @@ spec:
3535
{{- end }}
3636
backend:
3737
service:
38-
name: {{ include "slm_server.fullname" $ }}
38+
name: {{ include "slm-server.fullname" $ }}
3939
port:
4040
number: {{ $.Values.service.port }}
4141
{{- end }}

deploy/helm/templates/pv.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ spec:
1010
storage: {{ .Values.persistence.size }}
1111
accessModes:
1212
- {{ .Values.persistence.accessMode }}
13-
hostPath:
13+
local:
1414
path: {{ .Values.persistence.hostPath }}
1515
nodeAffinity:
1616
required:
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
apiVersion: v1
22
kind: Pod
33
metadata:
4-
name: "{{ include "slm_server.fullname" . }}-test-connection"
4+
name: "{{ include "slm-server.fullname" . }}-test-connection"
55
labels:
6-
{{- include "slm_server.labels" . | nindent 4 }}
6+
{{- include "slm-server.labels" . | nindent 4 }}
77
annotations:
88
"helm.sh/hook": test
99
spec:
1010
containers:
1111
- name: wget
1212
image: busybox
1313
command: ['wget']
14-
args: ['{{ include "slm_server.fullname" . }}:{{ .Values.service.port }}']
14+
args: ['{{ include "slm-server.fullname" . }}:{{ .Values.service.port }}']
1515
restartPolicy: Never

0 commit comments

Comments
 (0)