N3uralCreativity · N3uralCreativity · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026
@@ -1,12 +1,24 @@
 LAI_ENV=local
 LAI_HF_TOKEN=
+LAI_OPENAI_API_KEY=
+LAI_ANTHROPIC_API_KEY=
+LAI_GEMINI_API_KEY=
 LAI_MODEL_CATALOG=configs/models/catalog.yaml
 LAI_ROUTING_POLICY=configs/routing/policies.yaml
 LAI_PROMPT_ROOT=configs/prompts
 LAI_HUGGINGFACE_CACHE_DIR=data/cache/huggingface
 LAI_AIRLLM_SHARDS_DIR=data/models/airllm-shards
+LAI_RAW_MODELS_DIR=data/models/raw
 LAI_ARTIFACTS_DIR=data/artifacts
+LAI_STATE_DIR=data/state
+LAI_DATABASE_PATH=data/state/lai.db
 LAI_LOGS_DIR=logs
 LAI_ALLOW_OVERNIGHT_JOBS=true
 LAI_ENABLE_GPU=true
 LAI_MAX_ROUTER_TOKENS=2048
+LAI_DEFAULT_TIMEOUT_SECONDS=120
+LAI_DEFAULT_MAX_OUTPUT_TOKENS=1024
+LAI_DEFAULT_TEMPERATURE=0.2
+LAI_QUEUE_POLL_INTERVAL_SECONDS=5
+LAI_WORKER_IDLE_SLEEP_SECONDS=2.0
+LAI_MAX_RETRY_ATTEMPTS=1
@@ -21,7 +21,7 @@ jobs:
       - name: Install package
         run: |
           python -m pip install --upgrade pip
-          python -m pip install -e .[dev]
+          python -m pip install -e .[dev,api]
 
       - name: Ruff
         run: ruff check .

@@ -23,5 +23,9 @@ build/
 data/cache/
 data/models/
 data/artifacts/
+data/state/*
+!data/state/.gitignore
+evals/results/*
+!evals/results/.gitignore
 logs/*
 !logs/.gitignore
@@ -78,18 +78,145 @@ cd LAI
 py -3.11 -m venv .venv
 .venv\Scripts\Activate.ps1
 python -m pip install --upgrade pip
-python -m pip install -e .[dev]
+python -m pip install -e .[dev,api]
 Copy-Item .env.example .env
 python -m lai.cli doctor
 ```
 
-To add the large-model runtime later:
+To add the local heavy-model and provider backends later:
 
 ```powershell
-python -m pip install -e .[dev,api]
-python -m pip install airllm
+python -m pip install -e .[local,providers]
+```
+
+## Current commands
+
+```powershell
+python -m lai.cli doctor
+python -m lai.cli models list
+python -m lai.cli models check
+python -m lai.cli workstation validate
+python -m lai.cli workstation validate --profile airllm
+python -m lai.cli route explain "Summarize this note."
+python -m lai.cli run "Create a detailed implementation strategy."
+python -m lai.cli jobs list
+python -m lai.cli jobs stale
+python -m lai.cli jobs recover
+python -m lai.cli jobs replay <job-id> --queue-mode queued
+python -m lai.cli worker run --once
+python -m lai.cli worker run --max-jobs 3
+python -m lai.cli worker run --until-idle --max-idle-cycles 1
+python -m lai.cli worker status
+python -m lai.cli worker serve --poll-interval 10
+python -m lai.cli worker stop
+python -m lai.cli smoke providers
+python -m lai.cli smoke providers --live
+python -m lai.cli smoke local --live --include-airllm
+python -m lai.cli smoke latest
+lai-worker run --poll-interval 10
+lai-worker stop
+python -m lai.cli eval route --no-save
+```
+
+## Current API
+
+After installing the `api` extra:
+
+```powershell
+uvicorn lai.api.app:create_api --factory --reload
+```
+
+Available endpoints:
+
+- `GET /`
+- `GET /dashboard`
+- `GET /health`
+- `GET /models`
+- `GET /workstation/readiness`
+- `GET /smoke/latest`
+- `GET /smoke/results`
+- `GET /smoke/results/{result_id}`
+- `POST /smoke/run`
+- `GET /worker/status`
+- `POST /route/explain`
+- `POST /jobs`
+- `GET /jobs`
+- `GET /jobs/stale`
+- `GET /jobs/{job_id}`
+- `GET /jobs/{job_id}/timeline`
+- `GET /jobs/{job_id}/execution`
+- `POST /jobs/recover`
+- `POST /jobs/{job_id}/replay`
+- `GET /jobs/{job_id}/artifacts`
+- `GET /jobs/{job_id}/artifacts/{artifact_id}`
+- `POST /jobs/{job_id}/cancel`
+- `POST /worker/run`
+
+The API now also serves a read-mostly dashboard at `/dashboard` with live model health,
+workstation readiness for heavy local execution, route explanation, recent job inspection,
+stage telemetry, provider execution summaries, artifact/trace browsing, replay actions,
+persisted worker monitoring, saved smoke diagnostics with history drill-down,
+per-check metadata/output previews, stale-job detection/recovery for interrupted runs,
+bounded queue worker controls including an until-idle drain path, and live provider
+progress phases for long-running execution stages.
+
+## Dedicated worker service
+
+For always-on local queue processing, run the dedicated worker service instead of manually
+triggering bounded worker batches:
+
+```powershell
+lai-worker run --poll-interval 10
+```
+
+The service:
+
+- acquires a lock at `data/state/worker-service.lock`
+- watches for a graceful stop signal at `data/state/worker-service.stop`
+- writes service logs to `logs/worker-service.log`
+- recovers interrupted running jobs on startup when a stale lock is explicitly replaced
+- auto-recovers stale running jobs during daemon cycles using the configured recovery timeout
+- keeps the persisted worker state fresh for the CLI, API, and dashboard
+
+To stop it gracefully:
+
+```powershell
+lai-worker stop
+```
+
+## Provider smoke diagnostics
+
+Use readiness-only smoke checks to verify credentials, optional dependencies, and provider health
+without making live requests:
+
+```powershell
+python -m lai.cli smoke providers
+python -m lai.cli smoke local
 ```
 
+Use live mode only when you want a real tiny prompt executed:
+
+```powershell
+python -m lai.cli smoke providers --live
+python -m lai.cli smoke local --live --include-airllm
+```
+
+Smoke results are saved under `evals/results/smoke/` by default.
+
+## Workstation readiness
+
+Validate the local machine before you depend on the heavy local path:
+
+```powershell
+python -m lai.cli workstation validate
+python -m lai.cli workstation validate --profile airllm
+```
+
+This surfaces Python compatibility, local package readiness, Hugging Face credentials,
+disk headroom, RAM posture, GPU availability, and AirLLM-specific readiness in one report.
+For step-by-step remediation and overnight run guidance, see
+`docs/setup/airllm-runbook.md`.
+
 ## Initial GitHub rules encoded in this repo
 
 - Pull request template and issue forms for consistent planning.
@@ -100,10 +227,10 @@ python -m pip install airllm
 
 ## Near-term priorities
 
-1. Implement the model registry and routing engine under `src/lai/`.
-2. Add the first AirLLM runtime adapter and smoke-test workflows.
-3. Introduce an API surface in `apps/api`.
-4. Add evaluation scenarios that compare small-model routing against large-model final execution.
+1. Expand eval scenarios and richer reviewer/final-output refinement.
+2. Add operator-facing controls for smoke result retention and cleanup over time.
+3. Add worker-service lock inspection and supervised restart tooling.
+4. Add partial-output capture where provider SDKs expose safe non-streaming response chunks.
 
 ## References
 

@@ -1,3 +1,20 @@
 # API App
 
-This folder is reserved for the future control-plane API. It will expose request submission, job status, artifact retrieval, and model availability endpoints.
+The first API surface lives in `src/lai/api/app.py` and mirrors the CLI-first orchestration core.
+
+Current endpoints:
+
+- `GET /health`
+- `GET /models`
+- `POST /route/explain`
+- `POST /jobs`
+- `GET /jobs`
+- `GET /jobs/{job_id}`
+- `POST /jobs/{job_id}/cancel`
+
+Run it after installing the `api` extra:
+
+```powershell
+python -m pip install -e .[dev,api]
+uvicorn lai.api.app:create_api --factory --reload
+```
@@ -1,3 +1,25 @@
 # Web App
 
-This folder is reserved for the future dashboard that will surface routing decisions, queue state, artifacts, and model health.
+The first dashboard is now served directly by the FastAPI app at `/dashboard`.
+
+Current dashboard capabilities:
+
+- live health and catalog summary
+- workstation readiness summary for heavy local and AirLLM execution paths
+- route explanation form
+- job submission and recent queue inspection
+- persisted stage telemetry timeline for planner, executor, and reviewer flow
+- provider execution summaries with duration, live progress phases, output preview, and artifact linkage per stage
+- artifact and trace browsing for persisted jobs
+- job replay controls for inline and queued reruns
+- stale-job detection and one-click queue recovery for interrupted long-running jobs
+- persisted worker monitoring with heartbeat, current job, queue depth, and latest daemon recovery summary
+- service-aware worker monitoring with daemon lock and stop-signal visibility
+- saved smoke diagnostics for provider and local readiness, with recent-run history browsing
+- per-check smoke metadata and live-output preview inspection from saved results
+- bounded live worker controls for processing queued jobs, including queue drain until idle
+- model health cards
+- job output inspector
+
+The implementation intentionally stays lightweight for now by using static assets served from
+the API package instead of a separate frontend build pipeline.
@@ -1,3 +1,22 @@
 # Worker App
 
-This folder is reserved for long-running local or remote workers that execute model jobs, including AirLLM-backed heavy inference tasks.
+This area now maps to the dedicated local worker service surface used for always-on queue
+processing.
+
+Primary entrypoints:
+
+- `lai-worker run --poll-interval 10`
+- `lai-worker stop`
+- `python -m lai.cli worker serve --poll-interval 10`
+- `python -m lai.cli worker stop`
+
+Service runtime contract:
+
+- lock file: `data/state/worker-service.lock`
+- stop signal: `data/state/worker-service.stop`
+- service log: `logs/worker-service.log`
+- persisted worker state: `data/state/lai.db`
+
+The worker service repeatedly drains queued jobs until idle, sleeps for the configured poll
+interval, then checks again. This keeps the platform ready for overnight work without requiring
+manual `worker run` commands.
@@ -52,3 +52,42 @@ models:
       allow_layer_sharding: true
       allow_prefetching: true
       allow_cpu_fallback: true
+
+  - id: openai-general
+    role: executor
+    runtime: openai
+    model: gpt-5.4-mini
+    context_window: 128000
+    capabilities:
+      - planning
+      - summarization
+      - critique
+      - validation
+      - deep-reasoning
+      - long-form-generation
+
+  - id: anthropic-general
+    role: executor
+    runtime: anthropic
+    model: claude-sonnet-4-20250514
+    context_window: 200000
+    capabilities:
+      - planning
+      - summarization
+      - critique
+      - validation
+      - deep-reasoning
+      - long-form-generation
+
+  - id: gemini-general
+    role: executor
+    runtime: gemini
+    model: gemini-2.5-flash
+    context_window: 1000000
+    capabilities:
+      - planning
+      - summarization
+      - critique
+      - validation
+      - deep-reasoning
+      - long-form-generation
@@ -42,5 +42,5 @@ tiers:
 fallbacks:
   when_gpu_unavailable:
     planner_model_id: router-small
-    executor_model_id: verifier-medium
-    reviewer_model_id: verifier-medium
+    executor_model_id: openai-general
+    reviewer_model_id: openai-general
@@ -0,0 +1,2 @@
+*
+!.gitignore