Proxy-Pointer · tasuke-pochira · May 27, 2026 · May 28, 2026
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,12 @@
+.git
+.github
+.pytest_cache
+.venv
+venv
+__pycache__
+*.pyc
+.env
+runtime
+**/data/index
+**/data/trees
+**/data/output
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -0,0 +1,23 @@
+name: tests
+
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+
+jobs:
+  pytest:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: astral-sh/setup-uv@v5
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - name: Install
+        run: uv sync --all-extras --group test
+      - name: Check whitespace
+        run: git diff --check
+      - name: Run tests
+        run: uv run --all-extras --group test pytest -q
diff --git a/DocComparator/.env.example b/DocComparator/.env.example
@@ -18,3 +18,6 @@ LLAMA_CLOUD_API_KEY=your_llama_cloud_api_key_here
 
 # Optional: bounded parallel LLM comparisons per selected Doc 1 section
 # DC_COMPARE_CONCURRENCY=3
+
+# Optional: bounded parallel selected-section pipeline
+# DC_SECTION_CONCURRENCY=2
diff --git a/DocComparator/README.md b/DocComparator/README.md
@@ -178,6 +178,7 @@ All configuration is centralized in `src/pprag_doc_comparator/config.py`. Overri
 | `DC_EMBEDDING_BATCH_SIZE` | `20`            | Number of chunks embedded per Gemini request during indexing |
 | `DC_EMBEDDING_BATCH_DELAY` | `1`            | Seconds to wait between embedding batches during indexing |
 | `DC_COMPARE_CONCURRENCY` | `3`              | Maximum parallel LLM section comparisons per selected Doc 1 section |
+| `DC_SECTION_CONCURRENCY` | `2`              | Maximum selected Doc 1 sections cross-retrieved and compared in parallel |
 
 ### Indexing Throughput
 
@@ -194,6 +195,8 @@ DocComparator compares each selected Doc 1 section against up to
 bounded concurrency controlled by `DC_COMPARE_CONCURRENCY`, while preserving
 input order in the final report. Lower the value if you hit LLM rate limits;
 increase it if your quota allows more parallel requests.
+The selected-section pipeline can also run multiple Doc 1 sections concurrently
+with `DC_SECTION_CONCURRENCY`; output ordering remains deterministic.
 
 ---
 

diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,21 @@
+FROM python:3.11-slim
+
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=1
+
+WORKDIR /app
+
+RUN pip install --no-cache-dir uv
+
+COPY pyproject.toml uv.lock README.md ./
+COPY src ./src
+COPY Text-Only ./Text-Only
+COPY MultiModal ./MultiModal
+COPY DocComparator ./DocComparator
+
+RUN uv sync --all-extras --group test
+
+EXPOSE 8501
+
+CMD ["uv", "run", "pprag", "doctor"]
diff --git a/README.md b/README.md
@@ -152,6 +152,32 @@ All include sample data so you can clone, build the index, and start exploring i
 
 ---
 
+## Runtime Checks and Deployment
+
+Run a local readiness check without loading optional application stacks:
+
+```bash
+pprag doctor
+pprag doctor --json
+```
+
+Run local runtime evaluations for cache and concurrency behavior:
+
+```bash
+pprag eval runtime
+```
+
+Container scaffolding is included for hosted pilots:
+
+```bash
+docker compose up doc-comparator
+```
+
+Use `deploy/production.env.example` as a template for private runtime settings.
+Do not commit real secrets.
+
+---
+
 ## Author
 **Partha Sarkar**
 

diff --git a/deploy/production.env.example b/deploy/production.env.example
@@ -0,0 +1,22 @@
+# Copy to a private env file before use. Do not commit real secrets.
+GOOGLE_API_KEY=
+LLAMA_CLOUD_API_KEY=
+
+# Runtime roots
+PPRAG_PROJECT_ROOT=/app
+
+# Explicit trust is required before loading locally generated FAISS metadata.
+PPRAG_TRUST_LOCAL_FAISS=1
+DC_TRUST_FAISS_INDEX=1
+PP_TRUST_FAISS_INDEX=1
+
+# Metadata-only audit log. Prompts and document text are not written by default.
+PPRAG_AUDIT_LOG=/app/runtime/audit/pprag.jsonl
+
+# Throughput controls
+PP_EMBEDDING_BATCH_SIZE=20
+PP_EMBEDDING_BATCH_DELAY=1
+DC_EMBEDDING_BATCH_SIZE=20
+DC_EMBEDDING_BATCH_DELAY=1
+DC_COMPARE_CONCURRENCY=3
+DC_SECTION_CONCURRENCY=2
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,22 @@
+services:
+  doc-comparator:
+    build: .
+    command: uv run pprag compare serve --server.address 0.0.0.0 --server.port 8501
+    env_file:
+      - ./deploy/production.env.example
+    ports:
+      - "8501:8501"
+    volumes:
+      - ./DocComparator/data:/app/DocComparator/data
+      - ./runtime/audit:/app/runtime/audit
+
+  multimodal:
+    build: .
+    command: uv run pprag multimodal serve --server.address 0.0.0.0 --server.port 8502
+    env_file:
+      - ./deploy/production.env.example
+    ports:
+      - "8502:8502"
+    volumes:
+      - ./MultiModal/data:/app/MultiModal/data
+      - ./runtime/audit:/app/runtime/audit
diff --git a/src/pprag/audit.py b/src/pprag/audit.py
@@ -0,0 +1,41 @@
+"""Optional JSONL audit-event logging for enterprise deployments."""
+from __future__ import annotations
+
+import json
+import os
+import time
+import uuid
+from pathlib import Path
+from typing import Any
+
+
+def audit_log_path() -> str | None:
+    """Return the configured audit log path, if audit logging is enabled."""
+    return os.getenv("PPRAG_AUDIT_LOG") or os.getenv("DC_AUDIT_LOG")
+
+
+def new_event_id() -> str:
+    """Return a short unique id for correlating runtime events."""
+    return uuid.uuid4().hex[:16]
+
+
+def write_audit_event(event_type: str, **fields: Any) -> None:
+    """Append a single metadata-only audit event when configured.
+
+    This intentionally avoids prompts and document text. Callers should pass
+    ids, counts, timings, ratings, and status values only.
+    """
+    path = audit_log_path()
+    if not path:
+        return
+
+    payload = {
+        "event_id": new_event_id(),
+        "event_type": event_type,
+        "timestamp": time.time(),
+        **fields,
+    }
+    log_path = Path(path).expanduser()
+    log_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(log_path, "a", encoding="utf-8") as fh:
+        fh.write(json.dumps(payload, sort_keys=True, default=str) + "\n")
diff --git a/src/pprag/cli.py b/src/pprag/cli.py
@@ -99,6 +99,12 @@ def build_parser() -> argparse.ArgumentParser:
     compare_serve = compare_sub.add_parser("serve", help="Start the DocComparator Streamlit UI")
     compare_serve.add_argument("args", nargs=argparse.REMAINDER)
 
+    doctor = subparsers.add_parser("doctor", help="Check local runtime readiness")
+    doctor.add_argument("args", nargs=argparse.REMAINDER)
+
+    eval_parser = subparsers.add_parser("eval", help="Run local runtime evaluations")
+    eval_parser.add_argument("args", nargs=argparse.REMAINDER)
+
     return parser
 
 
@@ -159,6 +165,12 @@ def main(argv: Sequence[str] | None = None) -> int:
             if args.command in ("ui", "serve"):
                 return _run_streamlit("compare", "pprag_doc_comparator", args.args)
 
+        if args.modality == "doctor":
+            return _run_module("full", "pprag", "doctor", args.args)
+
+        if args.modality == "eval":
+            return _run_module("full", "pprag", "eval", args.args)
+
     except MissingExtraError as exc:
         parser.exit(2, f"{exc}\n")
 

diff --git a/src/pprag/config_validation.py b/src/pprag/config_validation.py
@@ -0,0 +1,81 @@
+"""Runtime configuration checks for local and hosted deployments."""
+from __future__ import annotations
+
+import importlib.util
+import os
+from pathlib import Path
+from typing import Any
+
+
+def env_status(name: str, *, required: bool = False) -> dict[str, Any]:
+    """Return a redacted environment-variable status entry."""
+    value = os.getenv(name)
+    ok = bool(value) or not required
+    return {
+        "name": name,
+        "required": required,
+        "present": bool(value),
+        "ok": ok,
+    }
+
+
+def dependency_status(import_name: str, *, required: bool = False) -> dict[str, Any]:
+    """Return whether an importable dependency is installed."""
+    present = importlib.util.find_spec(import_name) is not None
+    return {
+        "name": import_name,
+        "required": required,
+        "present": present,
+        "ok": present or not required,
+    }
+
+
+def path_status(path: str | Path, *, create: bool = False) -> dict[str, Any]:
+    """Return whether a runtime path exists and is writable."""
+    path = Path(path).expanduser()
+    if create:
+        try:
+            path.mkdir(parents=True, exist_ok=True)
+        except OSError:
+            pass
+    exists = path.exists()
+    writable = os.access(path, os.W_OK) if exists else os.access(path.parent, os.W_OK)
+    return {
+        "path": str(path),
+        "exists": exists,
+        "writable": writable,
+        "ok": exists and writable,
+    }
+
+
+def validate_runtime(root: str | Path = ".") -> dict[str, Any]:
+    """Run lightweight deployment checks without importing optional stacks."""
+    root = Path(root).resolve()
+    paths = [
+        path_status(root / "Text-Only" / "data", create=False),
+        path_status(root / "MultiModal" / "data", create=False),
+        path_status(root / "DocComparator" / "data", create=False),
+    ]
+    env = [
+        env_status("GOOGLE_API_KEY", required=False),
+        env_status("LLAMA_CLOUD_API_KEY", required=False),
+        env_status("PPRAG_TRUST_LOCAL_FAISS", required=False),
+        env_status("PPRAG_AUDIT_LOG", required=False),
+        env_status("DC_AUDIT_LOG", required=False),
+    ]
+    dependencies = [
+        dependency_status("google.generativeai", required=False),
+        dependency_status("langchain_community", required=False),
+        dependency_status("faiss", required=False),
+        dependency_status("streamlit", required=False),
+        dependency_status("llama_cloud", required=False),
+    ]
+
+    checks = [*paths, *env, *dependencies]
+    return {
+        "root": str(root),
+        "ok": all(item.get("ok", False) for item in checks if "ok" in item),
+        "paths": paths,
+        "environment": env,
+        "dependencies": dependencies,
+    }
diff --git a/src/pprag/doctor.py b/src/pprag/doctor.py
@@ -0,0 +1,34 @@
+"""CLI runtime checks for Proxy-Pointer deployments."""
+from __future__ import annotations
+
+import argparse
+import json
+from typing import Sequence
+
+from pprag.config_validation import validate_runtime
+
+
+def main(argv: Sequence[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description="Check local Proxy-Pointer runtime readiness.")
+    parser.add_argument("--root", default=".", help="Repository or deployment root to inspect.")
+    parser.add_argument("--json", action="store_true", help="Print machine-readable JSON.")
+    args = parser.parse_args(argv)
+
+    result = validate_runtime(args.root)
+    if args.json:
+        print(json.dumps(result, indent=2, sort_keys=True))
+    else:
+        status = "OK" if result["ok"] else "WARN"
+        print(f"Runtime check: {status}")
+        print(f"Root: {result['root']}")
+        for group in ("paths", "environment", "dependencies"):
+            print(f"\n{group}:")
+            for item in result[group]:
+                name = item.get("name") or item.get("path")
+                marker = "ok" if item.get("ok") else "check"
+                print(f"  [{marker}] {name}")
+    return 0 if result["ok"] else 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())