run model in container

alexcheng1982 · alexcheng1982 · commit b9991ea2a7a0 · 2025-09-18T20:30:39.000+12:00
diff --git a/README.md b/README.md
@@ -15,3 +15,4 @@ Build the native agent using the following command.
 mvn -Pnative -DskipTests package
 ```
 
+Start the application and access the chat UI at `http://localhost:8080/webjars/chat-agent-ui/index.html`.
diff --git a/compose.yaml b/compose.yaml
@@ -0,0 +1,35 @@
+services:
+  model-runner:
+    image: ghcr.io/ggml-org/llama.cpp:server
+    volumes:
+      - model-files:/models
+    command:
+      - "--host"
+      - "0.0.0.0"
+      - "--port"
+      - "8080"
+      - "-n"
+      - "512"
+      - "-m"
+      - "/models/Qwen3-0.6B-Q8_0.gguf"
+    ports:
+      - "8180:8080"
+    depends_on:
+      model-downloader:
+        condition: service_completed_successfully
+
+  model-downloader:
+    image: ghcr.io/alexcheng1982/model-downloader
+    restart: "no"
+    volumes:
+      - model-files:/models
+    command:
+      - "hf"
+      - "download"
+      - "unsloth/Qwen3-0.6B-GGUF"
+      - "Qwen3-0.6B-Q8_0.gguf"
+      - "--local-dir"
+      - "/models"
+
+volumes:
+  model-files: 
diff --git a/src/main/resources/application-container.yaml b/src/main/resources/application-container.yaml
@@ -0,0 +1,8 @@
+spring:
+  ai:
+    openai:
+      baseUrl: http://localhost:8180
+      api-key: local
+      chat:
+        options:
+          temperature: 0.0

Original file line number	Diff line number	Diff line change
`@@ -15,3 +15,4 @@ Build the native agent using the following command.`
`15`	`15`	`mvn -Pnative -DskipTests package`
`16`	`16`	```
`17`	`17`
	`18`	+Start the application and access the chat UI at `http://localhost:8080/webjars/chat-agent-ui/index.html`.