diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
new file mode 100644
index 0000000..0769b6f
--- /dev/null
+++ b/.github/workflows/tests.yml
@@ -0,0 +1,23 @@
+name: Tests
+
+on:
+ pull_request:
+ branches: [main]
+ push:
+ branches: [main]
+
+jobs:
+ test:
+ runs-on: self-hosted # Requires a self-hosted runner with a GPU and model weights.
+ # Register one at: Settings -> Actions -> Runners -> New self-hosted runner.
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Install package
+ run: uv sync --extra dev
+
+ - name: Run tests
+ run: uv run pytest -v
+ env:
+ LLM_MODEL_CACHE: ${{ secrets.LLM_MODEL_CACHE }}
diff --git a/README.md b/README.md
index 901051d..54d5338 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,13 @@
# llm-server
-Host an LLM as a non-blocking HTTP server with a simple interface for managing and querying models. Built on [FastAPI](https://fastapi.tiangolo.com/) and [uvicorn](https://www.uvicorn.org/), it wraps the [large-language-model](https://github.com/EricApgar/large-language-model) library to expose LLM inference over a REST API.
+Host an LLM on a non-blocking HTTP server with a simple interface for managing and querying models. This allows you to host an LLM on LLM-capable harware and then access it over the network from devices that can't run an LLM locally.
+
+Built on [FastAPI](https://fastapi.tiangolo.com/) and [uvicorn](https://www.uvicorn.org/), it wraps the [large-language-model](https://github.com/EricApgar/large-language-model) library to expose LLM inference over a REST API.
+
+A user friendly GUI interface is included (but **not** required) for general use.
+
+
+

+
# Setup
See [Releases](https://github.com/EricApgar/llm-server/releases) to install from wheel file.
@@ -7,7 +15,7 @@ See [Releases](https://github.com/EricApgar/llm-server/releases) to install from
See ```pyproject.toml``` for required Python version and dependencies.
## Optional Dependencies
-This library uses optional dependencies for additional features. See the ```pyproject.toml``` for the list of optional library tags. To install with the GUI support, use the ```gui``` tag.
+This library uses optional dependencies for additional features. See the ```pyproject.toml``` for the list of optional library tags. To install with GUI support, use the ```gui``` tag.
## Library
Install this repo as a library into another project.
@@ -60,8 +68,9 @@ pip install -e ".[gui]"
Running an LLM requires an NVIDIA GPU with ideally a large number of TOPS. See the [large-language-model](https://github.com/EricApgar/large-language-model) repo for hardware details and GPU driver setup.
# Usage
-Create a server, register a model with a tag, load it, and start serving.
+Create a server, register a model with a tag, load it, and start serving. Then send a request and receive a response.
+## Server
```
import llm_server
@@ -75,22 +84,46 @@ server.start() # Non-blocking.
server.stop()
```
+## Requester
+```
+URL = 'http://127.0.0.1:8001/ask'
+details = {...} # See request body examples below.
+
+response = requests.post(URL, json=details, timeout=15)
+data = response.json()
+print(data['text'])
+```
+
# API Endpoints
| Method | Endpoint | Description |
|-|-|-|
-| GET | `/` | Health check — returns `"Running."` |
-| GET | `/get-models` | List all registered models and their tags |
-| GET | `/ask-test` | Send a test prompt (`"Tell me a joke."`) to the first available model |
-| POST | `/ask` | Send a prompt to a specific model |
+| GET | `/` | Health check — returns `"Running."`. |
+| GET | `/get-models` | List all available hosted models and their tags. |
+| GET | `/ask-test` | Send a test prompt (`"Tell me a joke."`) to the first available model. |
+| POST | `/ask` | Send a prompt to a specific model. |
### POST /ask — Request Body
+
+#### Text Example
```
{
"tag": "gpt",
"prompt": "Tell me a joke.",
- "images": [],
"max_tokens": 64,
"temperature": 0.9
}
```
-`prompt` may also be a conversation dict (see [llm-conversation](https://github.com/EricApgar/llm-conversation)). `images` accepts base64-encoded PNG strings for multimodal models.
+* `prompt` may also be a Conversation formatted dict output (see [llm-conversation](https://github.com/EricApgar/llm-conversation)).
+ * ```llm_conversation.Conversation.to_dict()```
+
+#### Image Example
+```
+{
+ "tag": "Phi4",
+ "prompt": "Describe the image.",
+ "images": [llm_server.encode_image()],
+ "max_tokens": 64,
+}
+```
+* ```temperature``` arg currently not supported for Phi-4-multimodal-instruct.
+* ```images``` accepts base64-encoded PNG strings for multimodal models. Encoder is provided by ```llm_server```.
\ No newline at end of file
diff --git a/assets/LLM Server GUI.png b/assets/LLM Server GUI.png
new file mode 100644
index 0000000..afa2e1a
Binary files /dev/null and b/assets/LLM Server GUI.png differ
diff --git a/pyproject.toml b/pyproject.toml
index ff0d341..dbc72d2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "llm_server" # The pip install .
-version = "0.1.0"
+version = "0.2.0"
description = "Template for repos that are intended to be packaged libraries."
readme = "README.md"
authors = [{ name = "Eric Apgar" }]
@@ -17,6 +17,10 @@ dependencies = [
gui = [
"nicegui>=3.8.0",
]
+dev = [
+ "pytest>=8.0.0",
+ "requests>=2.32.0",
+]
[tool.uv.sources]
llm = { git = "https://github.com/EricApgar/large-language-model" }
diff --git a/scripts/client_test.py b/scripts/client_test.py
index 099c13a..9e10d87 100644
--- a/scripts/client_test.py
+++ b/scripts/client_test.py
@@ -1,45 +1,42 @@
-import json
import requests
-from io import BytesIO
-import base64
from PIL import Image as PillowImage
from llm_conversation import Conversation
-from llm_server.gui_app import run_gui
+from llm_server.helper.helper import encode_image
-def pil_to_api_b64(pil_image: PillowImage.Image) -> str:
- buffer = BytesIO()
- pil_image.save(buffer, format='PNG')
+# TODO: Point to endpoint of hosted llm_server.Server().
+URL = 'http://127.0.0.1:8001/ask'
- return base64.b64encode(buffer.getvalue()).decode('ascii')
-
-
-URL = 'https://127.0.0.1:8000/ask'
-
-# images = [pil_to_api_b64(PillowImage.open(''))]
+# TODO: Edit as needed.
+images = [encode_image(image=PillowImage.open(r'/home/eric/Desktop/monkey.png'))]
+# TODO: Edit as needed.
c = Conversation()
c.set_overall_prompt(text='Pretend to be a person named John Doe.')
c.add_context(text='Your favorite color is onyx.')
c.add_response(role='user', text='Whats your name and favorite color?')
prompt = c.to_dict()
-REQUEST_DETAILS = {
+# TODO: Edit as needed.
+REQUEST_DETAILS_TEXT = {
'tag': 'GPT',
- 'prompt': prompt, #'Name a primary color.',
- # 'images': images,
- 'max_tokens': 64,
- 'temperature': 0.9,}
+ 'prompt': prompt,
+ 'temperature': .9}
+
+REQUEST_DETAILS_IMAGE = {
+ 'tag': 'Phi-4',
+ 'prompt': 'Describe the image.',
+ 'images': images}
def main() -> None:
try:
- response = requests.post(URL, json=REQUEST_DETAILS, timeout=15)
+ response = requests.post(URL, json=REQUEST_DETAILS_TEXT, timeout=15)
data = response.json()
- print(json.dumps(data, indent=4))
+ print(data['text'])
except Exception as e:
raise(e)
@@ -49,6 +46,6 @@ def main() -> None:
if __name__ in {'__main__', '__mp_main__'}:
- run_gui()
-
- # main()
+ # TODO: Make sure a server instance (llm_server.Server() or
+ # llm_server.server_gui()) is running before running main().
+ main()
diff --git a/src/llm_server/__init__.py b/src/llm_server/__init__.py
index 85fb36b..ca2afd0 100644
--- a/src/llm_server/__init__.py
+++ b/src/llm_server/__init__.py
@@ -1,16 +1,18 @@
from __future__ import annotations
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING
__all__ = [
'Server',
- 'server_gui',
+ 'run_gui',
+ 'encode_image',
]
if TYPE_CHECKING:
from .server import Server
- from .gui_app import run_gui as server_gui
+ from .gui_app import run_gui as run_gui
+ from .helper.helper import encode_image
def __getattr__(name: str):
@@ -18,10 +20,14 @@ def __getattr__(name: str):
from .server import Server
globals()[name] = Server
return Server
- elif name == 'server_gui':
- from .gui_app import run_gui as server_gui
- globals()[name] = server_gui
- return server_gui
+ elif name == 'run_gui':
+ from .gui_app import run_gui
+ globals()[name] = run_gui
+ return run_gui
+ elif name == 'encode_image':
+ from .helper.helper import encode_image
+ globals()[name] = encode_image
+ return encode_image
else:
raise AttributeError(f'module {__name__!r} has no attribute {name!r}')
diff --git a/src/llm_server/backend.py b/src/llm_server/backend.py
index 6cf002f..82e514e 100644
--- a/src/llm_server/backend.py
+++ b/src/llm_server/backend.py
@@ -110,6 +110,9 @@ def ask(self, details: Request) -> str:
else:
del input_args['images']
+ # Remove unset (None) fields so defaults are used downstream.
+ input_args = {k: v for k, v in input_args.items() if v is not None}
+
if isinstance(input_args['prompt'], dict):
c = Conversation()
c.from_dict(data=input_args['prompt'])
diff --git a/src/llm_server/gui_app.py b/src/llm_server/gui_app.py
index 53ef34c..9e7308c 100644
--- a/src/llm_server/gui_app.py
+++ b/src/llm_server/gui_app.py
@@ -1,7 +1,6 @@
import queue
import weakref
import re
-from dataclasses import dataclass
import socket
from nicegui import ui, run
diff --git a/src/llm_server/helper/helper.py b/src/llm_server/helper/helper.py
index 7b2b53c..026645a 100644
--- a/src/llm_server/helper/helper.py
+++ b/src/llm_server/helper/helper.py
@@ -1,5 +1,21 @@
+from io import BytesIO
+import base64
from typing import Optional, overload
+from PIL import Image as PillowImage
+
+
+def encode_image(image: PillowImage.Image) -> str:
+ """
+ Encode a PIL image as a base64 string for API transport.
+ """
+
+ buffer = BytesIO()
+ image.save(buffer, format='PNG')
+ encoded_output = base64.b64encode(buffer.getvalue()).decode('ascii')
+
+ return encoded_output
+
class Endpoint:
diff --git a/src/llm_server/schemas.py b/src/llm_server/schemas.py
index 3b12b28..3b5405d 100644
--- a/src/llm_server/schemas.py
+++ b/src/llm_server/schemas.py
@@ -6,7 +6,7 @@ class Request(BaseModel):
prompt: str | dict = Field(..., description='LLM prompt or conversation (llm-conversation.to_dict).')
images: list[str] | None = Field(default=None, description='Each image should be base64-encoded image bytes.', examples=[None])
max_tokens: int = Field(default=256, description='Max token count for generation.')
- temperature: float = Field(default=0.9, description='Temperature of generated response.')
+ temperature: float | None = Field(default=None, description='Temperature of generated response.')
class Response(BaseModel):
diff --git a/tests/test_server.py b/tests/test_server.py
new file mode 100644
index 0000000..4db6d9c
--- /dev/null
+++ b/tests/test_server.py
@@ -0,0 +1,65 @@
+"""
+Tests for llm_server.Server.
+
+These tests can load a real model when hosting the server, so if running a full
+test they require local model weights. Sync the environment first, then pass
+LLM_MODEL_CACHE inline when invoking pytest so it only exists for that one
+command and does not persist in your shell:
+
+ uv sync --extra dev
+ LLM_MODEL_CACHE= pytest
+
+Tests are skipped automatically if LLM_MODEL_CACHE is not set.
+
+Make sure the virtual environment is active.
+"""
+
+import os
+import time
+
+import pytest
+import requests
+
+import llm_server
+
+
+MODEL_CACHE = os.environ.get('LLM_MODEL_CACHE')
+
+
+@pytest.fixture(scope='module')
+def server():
+ s = llm_server.Server()
+ s.set_host(ip_address='127.0.0.1', port=8001)
+ s.start()
+ time.sleep(1) # Allow uvicorn to finish starting.
+ yield s
+ s.stop()
+
+
+@pytest.fixture(scope='module')
+def server_with_model():
+ if not MODEL_CACHE:
+ pytest.skip('LLM_MODEL_CACHE environment variable not set.')
+ s = llm_server.Server()
+ s.set_host(ip_address='127.0.0.1', port=8002)
+ s.add_model(tag='gpt', name='openai/gpt-oss-20b')
+ s.load_model(tag='gpt', location=MODEL_CACHE)
+ s.start()
+ time.sleep(1) # Allow uvicorn to finish starting.
+ yield s
+ s.stop()
+
+
+def test_server_running(server):
+ response = requests.get('http://127.0.0.1:8001/', timeout=5)
+ assert response.json() == 'Running.'
+
+
+def test_ask(server_with_model):
+ response = requests.post(
+ 'http://127.0.0.1:8002/ask',
+ json={'tag': 'gpt', 'prompt': 'Name a primary color.'},
+ timeout=60)
+ data = response.json()
+ assert isinstance(data['text'], str)
+ assert len(data['text']) > 0
diff --git a/uv.lock b/uv.lock
index 2b62547..45941f9 100644
--- a/uv.lock
+++ b/uv.lock
@@ -445,11 +445,10 @@ wheels = [
[[package]]
name = "llm"
-version = "0.4.2"
-source = { git = "https://github.com/EricApgar/large-language-model#275714b6a3c52eb7ce436e7707a30b29de5ff162" }
+version = "0.4.3"
+source = { git = "https://github.com/EricApgar/large-language-model#4f85188e9b30b91caf7bdc64e53bbf4e905e8e43" }
dependencies = [
{ name = "llm-conversation" },
- { name = "pytest" },
{ name = "sentence-transformers" },
{ name = "torch" },
{ name = "torchvision" },
@@ -473,7 +472,7 @@ source = { git = "https://github.com/EricApgar/llm-conversation#ce9d59af41f4a018
[[package]]
name = "llm-server"
-version = "0.1.0"
+version = "0.2.0"
source = { editable = "." }
dependencies = [
{ name = "fastapi" },
@@ -482,6 +481,10 @@ dependencies = [
]
[package.optional-dependencies]
+dev = [
+ { name = "pytest" },
+ { name = "requests" },
+]
gui = [
{ name = "nicegui" },
]
@@ -491,9 +494,11 @@ requires-dist = [
{ name = "fastapi", specifier = ">=0.135.1" },
{ name = "llm", extras = ["all"], git = "https://github.com/EricApgar/large-language-model" },
{ name = "nicegui", marker = "extra == 'gui'", specifier = ">=3.8.0" },
+ { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" },
+ { name = "requests", marker = "extra == 'dev'", specifier = ">=2.32.0" },
{ name = "uvicorn", specifier = ">=0.41.0" },
]
-provides-extras = ["gui"]
+provides-extras = ["gui", "dev"]
[[package]]
name = "markdown2"
@@ -1295,9 +1300,9 @@ dependencies = [
{ name = "torch" },
]
wheels = [
- { url = "https://download.pytorch.org/whl/cu130/torchvision-0.25.0%2Bcu130-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3646e6c8fa5066da392d0ff13002cc683301386fe1933f8f1432fc5292e5d288" },
- { url = "https://download.pytorch.org/whl/cu130/torchvision-0.25.0%2Bcu130-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c2f5e38f0cd57a2796e4503c0f13365deba01dbc167ef820f0beec7ca96f5f2e" },
- { url = "https://download.pytorch.org/whl/cu130/torchvision-0.25.0%2Bcu130-cp312-cp312-win_amd64.whl", hash = "sha256:7dd245ee7df0ceb00125e57615de31ca7232bf046143c2c3fe7a3b321bb50958" },
+ { url = "https://download-r2.pytorch.org/whl/cu130/torchvision-0.25.0%2Bcu130-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3646e6c8fa5066da392d0ff13002cc683301386fe1933f8f1432fc5292e5d288" },
+ { url = "https://download-r2.pytorch.org/whl/cu130/torchvision-0.25.0%2Bcu130-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c2f5e38f0cd57a2796e4503c0f13365deba01dbc167ef820f0beec7ca96f5f2e" },
+ { url = "https://download-r2.pytorch.org/whl/cu130/torchvision-0.25.0%2Bcu130-cp312-cp312-win_amd64.whl", hash = "sha256:7dd245ee7df0ceb00125e57615de31ca7232bf046143c2c3fe7a3b321bb50958" },
]
[[package]]