EricApgar · EricApgar · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -0,0 +1,23 @@
+name: Tests
+
+on:
+  pull_request:
+    branches: [main]
+  push:
+    branches: [main]
+
+jobs:
+  test:
+    runs-on: self-hosted  # Requires a self-hosted runner with a GPU and model weights.
+                          # Register one at: Settings -> Actions -> Runners -> New self-hosted runner.
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install package
+        run: uv sync --extra dev
+
+      - name: Run tests
+        run: uv run pytest -v
+        env:
+          LLM_MODEL_CACHE: ${{ secrets.LLM_MODEL_CACHE }}
diff --git a/README.md b/README.md
@@ -1,13 +1,21 @@
 # llm-server
-Host an LLM as a non-blocking HTTP server with a simple interface for managing and querying models. Built on [FastAPI](https://fastapi.tiangolo.com/) and [uvicorn](https://www.uvicorn.org/), it wraps the [large-language-model](https://github.com/EricApgar/large-language-model) library to expose LLM inference over a REST API.
+Host an LLM on a non-blocking HTTP server with a simple interface for managing and querying models. This allows you to host an LLM on LLM-capable harware and then access it over the network from devices that can't run an LLM locally.
+
+Built on [FastAPI](https://fastapi.tiangolo.com/) and [uvicorn](https://www.uvicorn.org/), it wraps the [large-language-model](https://github.com/EricApgar/large-language-model) library to expose LLM inference over a REST API.
+
+A user friendly GUI interface is included (but **not** required) for general use.
+
+<div align="center">
+  <img src="assets/LLM Server GUI.png" alt="LLM Server GUI" width="400"/>
+</div>
 
 # Setup
 See [Releases](https://github.com/EricApgar/llm-server/releases) to install from wheel file.
 
 See ```pyproject.toml``` for required Python version and dependencies.
 
 ## Optional Dependencies
-This library uses optional dependencies for additional features. See the ```pyproject.toml``` for the list of optional library tags. To install with the GUI support, use the ```gui``` tag.
+This library uses optional dependencies for additional features. See the ```pyproject.toml``` for the list of optional library tags. To install with GUI support, use the ```gui``` tag.
 
 ## Library
 Install this repo as a library into another project.
@@ -60,8 +68,9 @@ pip install -e ".[gui]"
 Running an LLM requires an NVIDIA GPU with ideally a large number of TOPS. See the [large-language-model](https://github.com/EricApgar/large-language-model) repo for hardware details and GPU driver setup.
 
 # Usage
-Create a server, register a model with a tag, load it, and start serving.
+Create a server, register a model with a tag, load it, and start serving. Then send a request and receive a response.
 
+## Server
 ```
 import llm_server
 
@@ -75,22 +84,46 @@ server.start()  # Non-blocking.
 server.stop()
 ```
 
+## Requester
+```
+URL = 'http://127.0.0.1:8001/ask'
+details = {...}  # See request body examples below.
+
+response = requests.post(URL, json=details, timeout=15)
+data = response.json()
+print(data['text'])
+```
+
 # API Endpoints
 | Method | Endpoint | Description |
 |-|-|-|
-| GET | `/` | Health check — returns `"Running."` |
-| GET | `/get-models` | List all registered models and their tags |
-| GET | `/ask-test` | Send a test prompt (`"Tell me a joke."`) to the first available model |
-| POST | `/ask` | Send a prompt to a specific model |
+| GET | `/` | Health check — returns `"Running."`. |
+| GET | `/get-models` | List all available hosted models and their tags. |
+| GET | `/ask-test` | Send a test prompt (`"Tell me a joke."`) to the first available model. |
+| POST | `/ask` | Send a prompt to a specific model. |
 
 ### POST /ask — Request Body
+
+#### Text Example
 ```
 {
     "tag": "gpt",
     "prompt": "Tell me a joke.",
-    "images": [],
     "max_tokens": 64,
     "temperature": 0.9
 }
 ```
-`prompt` may also be a conversation dict (see [llm-conversation](https://github.com/EricApgar/llm-conversation)). `images` accepts base64-encoded PNG strings for multimodal models.
+* `prompt` may also be a Conversation formatted dict output (see [llm-conversation](https://github.com/EricApgar/llm-conversation)). 
+    * ```llm_conversation.Conversation.to_dict()```
+
+#### Image Example
+```
+{
+    "tag": "Phi4",
+    "prompt": "Describe the image.",
+    "images": [llm_server.encode_image(<path to image>)],
+    "max_tokens": 64,
+}
+```
+* ```temperature``` arg currently not supported for Phi-4-multimodal-instruct.
+* ```images``` accepts base64-encoded PNG strings for multimodal models. Encoder is provided by ```llm_server```.
diff --git a/assets/LLM Server GUI.png b/assets/LLM Server GUI.png
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "llm_server"  # The pip install <name>.
-version = "0.1.0"
+version = "0.2.0"
 description = "Template for repos that are intended to be packaged libraries."
 readme = "README.md"
 authors = [{ name = "Eric Apgar" }]
@@ -17,6 +17,10 @@ dependencies = [
 gui = [
     "nicegui>=3.8.0",
 ]
+dev = [
+    "pytest>=8.0.0",
+    "requests>=2.32.0",
+]
 
 [tool.uv.sources]
 llm = { git = "https://github.com/EricApgar/large-language-model" }

diff --git a/scripts/client_test.py b/scripts/client_test.py
@@ -1,45 +1,42 @@
-import json
 import requests
-from io import BytesIO
-import base64
 
 from PIL import Image as PillowImage
 from llm_conversation import Conversation
 
-from llm_server.gui_app import run_gui
+from llm_server.helper.helper import encode_image
 
 
-def pil_to_api_b64(pil_image: PillowImage.Image) -> str:
-  buffer = BytesIO()
-  pil_image.save(buffer, format='PNG')
+# TODO: Point to endpoint of hosted llm_server.Server().
+URL = 'http://127.0.0.1:8001/ask'
 
-  return base64.b64encode(buffer.getvalue()).decode('ascii')
-
-
-URL = 'https://127.0.0.1:8000/ask'
-
-# images = [pil_to_api_b64(PillowImage.open('<path to image>'))]
+# TODO: Edit as needed.
+images = [encode_image(image=PillowImage.open(r'/home/eric/Desktop/monkey.png'))]
 
+# TODO: Edit as needed.
 c = Conversation()
 c.set_overall_prompt(text='Pretend to be a person named John Doe.')
 c.add_context(text='Your favorite color is onyx.')
 c.add_response(role='user', text='Whats your name and favorite color?')
 prompt = c.to_dict()
 
-REQUEST_DETAILS = {
+# TODO: Edit as needed.
+REQUEST_DETAILS_TEXT = {
 	'tag': 'GPT',
-	'prompt': prompt, #'Name a primary color.',
-	# 'images': images,
-	'max_tokens': 64,
-	'temperature': 0.9,}
+	'prompt': prompt,
+	'temperature': .9}
+
+REQUEST_DETAILS_IMAGE = {
+	'tag': 'Phi-4',
+	'prompt': 'Describe the image.',
+	'images': images}
 
 
 def main() -> None:
 
 	try:
-		response = requests.post(URL, json=REQUEST_DETAILS, timeout=15)
+		response = requests.post(URL, json=REQUEST_DETAILS_TEXT, timeout=15)
 		data = response.json()
-		print(json.dumps(data, indent=4))
+		print(data['text'])
 
 	except Exception as e:
 		raise(e)
@@ -49,6 +46,6 @@ def main() -> None:
 
 if __name__ in {'__main__', '__mp_main__'}:
 
-	run_gui()
-
-	# main()
+	# TODO: Make sure a server instance (llm_server.Server() or 
+	# llm_server.server_gui()) is running before running main().
+	main()
diff --git a/src/llm_server/__init__.py b/src/llm_server/__init__.py
@@ -1,27 +1,33 @@
 from __future__ import annotations
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING
 
 
 __all__ = [
     'Server',
-    'server_gui',
+    'run_gui',
+    'encode_image',
 ]
 
 
 if TYPE_CHECKING:
     from .server import Server
-    from .gui_app import run_gui as server_gui
+    from .gui_app import run_gui as run_gui
+    from .helper.helper import encode_image
 
 
 def __getattr__(name: str):
     if name == 'Server':
         from .server import Server
         globals()[name] = Server
         return Server
-    elif name == 'server_gui':
-        from .gui_app import run_gui as server_gui
-        globals()[name] = server_gui
-        return server_gui
+    elif name == 'run_gui':
+        from .gui_app import run_gui
+        globals()[name] = run_gui
+        return run_gui
+    elif name == 'encode_image':
+        from .helper.helper import encode_image
+        globals()[name] = encode_image
+        return encode_image
     else:
         raise AttributeError(f'module {__name__!r} has no attribute {name!r}')
 

diff --git a/src/llm_server/backend.py b/src/llm_server/backend.py
@@ -110,6 +110,9 @@ def ask(self, details: Request) -> str:
         else:
             del input_args['images']
 
+        # Remove unset (None) fields so defaults are used downstream.
+        input_args = {k: v for k, v in input_args.items() if v is not None}
+
         if isinstance(input_args['prompt'], dict):
             c = Conversation()
             c.from_dict(data=input_args['prompt'])

diff --git a/src/llm_server/gui_app.py b/src/llm_server/gui_app.py
@@ -1,7 +1,6 @@
 import queue
 import weakref
 import re
-from dataclasses import dataclass
 import socket
 
 from nicegui import ui, run

diff --git a/src/llm_server/helper/helper.py b/src/llm_server/helper/helper.py
@@ -1,5 +1,21 @@
+from io import BytesIO
+import base64
 from typing import Optional, overload
 
+from PIL import Image as PillowImage
+
+
+def encode_image(image: PillowImage.Image) -> str:
+    """
+    Encode a PIL image as a base64 string for API transport.
+    """
+
+    buffer = BytesIO()
+    image.save(buffer, format='PNG')
+    encoded_output = base64.b64encode(buffer.getvalue()).decode('ascii')
+
+    return encoded_output
+
 
 class Endpoint:
 

diff --git a/src/llm_server/schemas.py b/src/llm_server/schemas.py
@@ -6,7 +6,7 @@ class Request(BaseModel):
     prompt: str | dict = Field(..., description='LLM prompt or conversation (llm-conversation.to_dict).')
     images: list[str] | None = Field(default=None, description='Each image should be base64-encoded image bytes.', examples=[None])
     max_tokens: int = Field(default=256, description='Max token count for generation.')
-    temperature: float = Field(default=0.9, description='Temperature of generated response.')
+    temperature: float | None = Field(default=None, description='Temperature of generated response.')
 
 
 class Response(BaseModel):

diff --git a/tests/test_server.py b/tests/test_server.py
@@ -0,0 +1,65 @@
+"""
+Tests for llm_server.Server.
+
+These tests can load a real model when hosting the server, so if running a full
+test they require local model weights. Sync the environment first, then pass
+LLM_MODEL_CACHE inline when invoking pytest so it only exists for that one
+command and does not persist in your shell:
+
+    uv sync --extra dev
+    LLM_MODEL_CACHE=<path to model cache dir> pytest
+
+Tests are skipped automatically if LLM_MODEL_CACHE is not set.
+
+Make sure the virtual environment is active.
+"""
+
+import os
+import time
+
+import pytest
+import requests
+
+import llm_server
+
+
+MODEL_CACHE = os.environ.get('LLM_MODEL_CACHE')
+
+
+@pytest.fixture(scope='module')
+def server():
+    s = llm_server.Server()
+    s.set_host(ip_address='127.0.0.1', port=8001)
+    s.start()
+    time.sleep(1)  # Allow uvicorn to finish starting.
+    yield s
+    s.stop()
+
+
+@pytest.fixture(scope='module')
+def server_with_model():
+    if not MODEL_CACHE:
+        pytest.skip('LLM_MODEL_CACHE environment variable not set.')
+    s = llm_server.Server()
+    s.set_host(ip_address='127.0.0.1', port=8002)
+    s.add_model(tag='gpt', name='openai/gpt-oss-20b')
+    s.load_model(tag='gpt', location=MODEL_CACHE)
+    s.start()
+    time.sleep(1)  # Allow uvicorn to finish starting.
+    yield s
+    s.stop()
+
+
+def test_server_running(server):
+    response = requests.get('http://127.0.0.1:8001/', timeout=5)
+    assert response.json() == 'Running.'
+
+
+def test_ask(server_with_model):
+    response = requests.post(
+        'http://127.0.0.1:8002/ask',
+        json={'tag': 'gpt', 'prompt': 'Name a primary color.'},
+        timeout=60)
+    data = response.json()
+    assert isinstance(data['text'], str)
+    assert len(data['text']) > 0