opsmanager1 · opsmanager1 · Apr 16, 2026 · Apr 16, 2026 · Apr 16, 2026 · Apr 16, 2026
diff --git a/.env.example b/.env.example
@@ -5,6 +5,10 @@ VERSION=<app_version>
 
 HOST=<host>
 PORT=<port>
+FASTAPI_HOST_PORT=<fastapi_host_port>
+FASTAPI_CONTAINER_PORT=<fastapi_container_port>
+REDIS_HOST_PORT=<redis_host_port>
+REDIS_CONTAINER_PORT=<redis_container_port>
 
 DATABASE_URL=postgresql://<db_user>:<db_password>@<db_host>:5432/<db_name>?sslmode=disable
 SECRET_KEY=<secret_key>
@@ -44,6 +48,10 @@ OLLAMA_RETRY_BACKOFF_SECONDS=<ollama_retry_backoff_seconds>
 OLLAMA_SYSTEM_PROMPT=<ollama_system_prompt>
 LLM_PROMPT_MAX_LENGTH=<llm_prompt_max_length>
 FEATURE_SPEC_HISTORY_DEFAULT_LIMIT=<feature_spec_history_default_limit>
+CELERY_BROKER_URL=<celery_broker_url>
+CELERY_RESULT_BACKEND=<celery_result_backend>
+CELERY_TASK_MAX_RETRIES=<celery_task_max_retries>
+CELERY_TASK_RETRY_BASE_SECONDS=<celery_task_retry_base_seconds>
 
 TEST_DB_HOST=<test_db_host>
 TEST_DB_PORT=<test_db_port>

diff --git a/README.md b/README.md
@@ -2,7 +2,11 @@
 
 # Specification Generator API
 
-Production-ready FastAPI backend for authentication, LLM-powered specification generation, health checks, and secure API middleware baseline.
+Production-ready FastAPI backend with async LLM generation via Celery + Redis, JWT auth, and Docker Compose orchestration.
+
+<p align="center">
+  <img alt="FastAPI" src="https://fastapi.tiangolo.com/img/logo-margin/logo-teal.png" width="220"/>
+</p>
 
 <p align="center">
     <img alt="python" src="https://img.shields.io/badge/Python-3.10-3776AB?logo=python&logoColor=white"/>
@@ -19,7 +23,9 @@ Production-ready FastAPI backend for authentication, LLM-powered specification g
 
 - JWT authentication based on fastapi-users
 - User registration and user management endpoints
-- Feature specification generation endpoints powered by Ollama
+- Async feature specification generation with Celery tasks
+- Redis as Celery broker/result backend
+- Ollama integration for LLM responses
 - Readiness and health probes for runtime checks
 - Alembic database migrations
 - Security middleware baseline:
@@ -38,29 +44,40 @@ Production-ready FastAPI backend for authentication, LLM-powered specification g
 - SQLAlchemy 2.0
 - Alembic
 - fastapi-users
+- Celery
+- Redis
 - PostgreSQL (via DATABASE_URL)
 - Ollama (LLM provider)
 - Pytest
 
+## Architecture (Async Flow)
+
+1. Client calls `POST /api/v1/feature-spec/generate`.
+2. API stores a run row and enqueues Celery task to Redis.
+3. API returns immediately with `task_id` and `processing` status.
+4. Celery worker calls Ollama and persists success/error in DB.
+5. Client polls `GET /api/v1/feature-spec/tasks/{task_id}` for result.
+
 ## Project Structure
 
 - app/main.py: FastAPI app bootstrap and router registration
 - app/core/: settings, database wiring, startup/lifespan, bootstrap logic
 - app/api/: health, readiness, OpenAPI customization
 - app/middlewares/: security middleware composition and implementations
 - app/modules/auth/: auth domain (models, schemas, dependencies, router)
-- app/modules/feature_spec/: feature spec API, schemas, prompts, providers
+- app/modules/feature_spec/: API layer + application services for feature spec
+- app/infrastructure/: Celery app, task workers, Ollama client
 - app/scripts/: utility scripts (admin and prompt/model bootstrap)
 - alembic/: migration config and versions
-- docker-compose.yml: containerized app run
+- docker-compose.yml: app + celery-worker + redis + ollama
 
 ## Setup Guide
 
 ### Prerequisites
 
 - Python 3.10+
 - PostgreSQL database
-- Optional: Docker + Docker Compose (recommended for VPS)
+- Docker + Docker Compose (recommended)
 
 ### 1) Configure environment
 
@@ -70,6 +87,8 @@ Required minimum:
 
 - DATABASE_URL
 - SECRET_KEY
+- CELERY_BROKER_URL
+- CELERY_RESULT_BACKEND
 
 Recommended auth bootstrap values:
 
@@ -81,10 +100,20 @@ LLM values:
 
 - OLLAMA_BASE_URL
 - OLLAMA_MODEL
+- OLLAMA_TIMEOUT
+
+Compose ports (host -> container):
+
+- FASTAPI_HOST_PORT=8005
+- FASTAPI_CONTAINER_PORT=8001
+- REDIS_HOST_PORT=6380
+- REDIS_CONTAINER_PORT=6379
 
 For Docker Compose in this project use:
 
 - OLLAMA_BASE_URL=http://ollama:11434
+- CELERY_BROKER_URL=redis://redis:6379/0
+- CELERY_RESULT_BACKEND=redis://redis:6379/1
 
 ### 2A) Run locally
 
@@ -102,7 +131,11 @@ pip install -r requirements-dev.txt
 python -m alembic upgrade head
 python -m app.scripts.bootstrap_admin
 python -m app.scripts.bootstrap_prompt_template
-python -m uvicorn app.main:app --host 0.0.0.0 --port 8000
+# terminal 1: api
+python -m uvicorn app.main:app --host 0.0.0.0 --port 8005
+
+# terminal 2: worker
+celery -A app.infrastructure.celery_app:celery_app worker --loglevel=INFO
 ```
 
 ### 2B) Run with Docker
@@ -119,28 +152,32 @@ Notes:
   - prompt template bootstrap script (`python -m app.scripts.bootstrap_prompt_template`)
   - Ollama model bootstrap (`python -m app.scripts.ensure_ollama_model`)
   - uvicorn app startup
+- Celery worker runs in a dedicated container (`celery-worker`).
+- Redis runs only in Docker Compose and is used internally by service name `redis`.
 - On first deploy, startup may take longer while the configured `OLLAMA_MODEL` is downloaded.
 - FastAPI container reaches Ollama via internal Docker network URL: http://ollama:11434
 
-Verify Ollama API:
+Verify services:
 
 ```bash
-curl http://localhost:11434/api/generate -d '{
-  "model": "mistral",
-  "prompt": "hello",
-  "stream": false
-}'
+docker compose ps
+docker compose logs -f app
+docker compose logs -f celery-worker
 ```
 
 ## API Docs
 
-When server is running locally:
+When server is running locally (custom port):
 
-- Swagger UI: http://localhost:8000/docs
-- ReDoc: http://localhost:8000/redoc (pinned ReDoc 2.x script)
-- OpenAPI JSON: http://localhost:8000/openapi.json
+- Swagger UI: http://localhost:8005/docs
+- ReDoc: http://localhost:8005/redoc (pinned ReDoc 2.x script)
+- OpenAPI JSON: http://localhost:8005/openapi.json
 
-For Docker Compose deployment, use port 8001 instead of 8000.
+For Docker Compose deployment (default host mapping):
+
+- Swagger UI: http://localhost:8005/docs
+- ReDoc: http://localhost:8005/redoc
+- OpenAPI JSON: http://localhost:8005/openapi.json
 
 ## API Endpoints
 
@@ -175,16 +212,56 @@ Login note:
 ### Feature Spec
 
 - POST /api/v1/feature-spec/generate
+- GET /api/v1/feature-spec/tasks/{task_id}
 - GET /api/v1/feature-spec/history?limit=10
 
-Request body example for generation:
+Generate request:
 
 ```json
 {
   "feature_idea": "payment for premium posts"
 }
 ```
 
+Generate response:
+
+```json
+{
+  "task_id": "3b44daff-1e83-4328-925f-62c22a9163d2",
+  "status": "processing"
+}
+```
+
+Task status response examples:
+
+```json
+{
+  "task_id": "3b44daff-1e83-4328-925f-62c22a9163d2",
+  "status": "PENDING"
+}
+```
+
+```json
+{
+  "task_id": "3b44daff-1e83-4328-925f-62c22a9163d2",
+  "status": "SUCCESS",
+  "result": {
+    "run_id": 10,
+    "status": "success",
+    "feature_idea": "payment for premium posts",
+    "feature_summary": {
+      "user_stories": [],
+      "acceptance_criteria": [],
+      "db_models_and_api_endpoints": {
+        "db_models": [],
+        "api_endpoints": []
+      },
+      "risk_assessment": []
+    }
+  }
+}
+```
+
 ## Quality Checks
 
 Run linter:
@@ -225,8 +302,20 @@ If app cannot connect to DB:
 - Verify DATABASE_URL
 - Verify DB network access and sslmode if needed
 
-If LLM requests fail:
+If Celery tasks stay in `PENDING`:
+
+- Check worker is healthy: `docker compose ps`
+- Check worker logs: `docker compose logs -f celery-worker`
+- Verify Redis URLs in `.env` point to `redis` service inside Docker network
+
+If LLM requests fail or timeout:
 
 - Verify OLLAMA_BASE_URL
 - Ensure Ollama is running and model is available
 - For Docker deployment, ensure OLLAMA_BASE_URL is http://ollama:11434
+- Increase `OLLAMA_TIMEOUT` for long generations
+
+If compose prints `variable is not set` for random token-like names:
+
+- Your `.env` likely has `$` inside secret values
+- Escape `$` as `$$` in `.env` values used by Docker Compose
diff --git a/app/core/settings.py b/app/core/settings.py
@@ -68,7 +68,7 @@ class Settings(BaseSettings):
 
     OLLAMA_BASE_URL: str = "http://localhost:11434"
     OLLAMA_MODEL: str = "mistral"
-    OLLAMA_TIMEOUT: int = 120
+    OLLAMA_TIMEOUT: int = 180
     OLLAMA_CONNECT_TIMEOUT: int = 10
     OLLAMA_MAX_RETRIES: int = 2
     OLLAMA_RETRY_BACKOFF_SECONDS: float = 1.0
@@ -81,6 +81,11 @@ class Settings(BaseSettings):
     LLM_PROMPT_MAX_LENGTH: int = 8000
     FEATURE_SPEC_HISTORY_DEFAULT_LIMIT: int = 10
 
+    CELERY_BROKER_URL: str = "redis://redis:6379/0"
+    CELERY_RESULT_BACKEND: str = "redis://redis:6379/1"
+    CELERY_TASK_MAX_RETRIES: int = 3
+    CELERY_TASK_RETRY_BASE_SECONDS: int = 2
+
     model_config = SettingsConfigDict(
         env_file=".env",
         case_sensitive=True,

diff --git a/app/infrastructure/__init__.py b/app/infrastructure/__init__.py
@@ -0,0 +1 @@
+"""Infrastructure layer modules."""
diff --git a/app/infrastructure/celery_app.py b/app/infrastructure/celery_app.py
@@ -0,0 +1,21 @@
+from celery import Celery
+
+from app.core.settings import settings
+
+
+celery_app = Celery(
+    "specification_generator",
+    broker=settings.CELERY_BROKER_URL,
+    backend=settings.CELERY_RESULT_BACKEND,
+    include=["app.infrastructure.tasks.feature_spec_tasks"],
+)
+
+celery_app.conf.update(
+    broker_connection_retry_on_startup=True,
+    task_track_started=True,
+    task_serializer="json",
+    result_serializer="json",
+    accept_content=["json"],
+    timezone="UTC",
+    enable_utc=True,
+)
diff --git a/app/infrastructure/ollama_client.py b/app/infrastructure/ollama_client.py
@@ -0,0 +1,39 @@
+import httpx
+
+from app.core.settings import settings
+
+
+class OllamaSyncClient:
+    def __init__(self) -> None:
+        self._base_url = settings.OLLAMA_BASE_URL.rstrip("/")
+        self._model = settings.OLLAMA_MODEL
+        self._timeout = httpx.Timeout(
+            float(settings.OLLAMA_TIMEOUT),
+            connect=float(settings.OLLAMA_CONNECT_TIMEOUT),
+        )
+        self._system_prompt = settings.OLLAMA_SYSTEM_PROMPT
+
+    def _build_payload(self, user_prompt: str) -> dict:
+        messages = []
+        if self._system_prompt.strip():
+            messages.append({"role": "system", "content": self._system_prompt})
+        messages.append({"role": "user", "content": user_prompt})
+        return {
+            "model": self._model,
+            "stream": False,
+            "format": "json",
+            "messages": messages,
+        }
+
+    def generate(self, user_prompt: str) -> str:
+        url = f"{self._base_url}/api/chat"
+        payload = self._build_payload(user_prompt)
+        with httpx.Client(timeout=self._timeout) as client:
+            response = client.post(url, json=payload, timeout=self._timeout)
+            response.raise_for_status()
+            data = response.json()
+            content = data.get("message", {}).get("content", "")
+            return content if isinstance(content, str) else str(content)
+
+
+ollama_sync_client = OllamaSyncClient()
diff --git a/app/infrastructure/tasks/__init__.py b/app/infrastructure/tasks/__init__.py
@@ -0,0 +1 @@
+"""Celery task modules."""