powerbrain/.env.example at master · nuetzliches/powerbrain · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# ============================================================
#  Powerbrain – Environment Variables
#  Copy to .env and adjust: cp .env.example .env
#
#  All secrets are stored exclusively in ./secrets/*.txt
#  and mounted as Docker Secrets (/run/secrets/*).
#
#  Secrets (NOT in .env, but in secrets/*.txt):
#    pg_password.txt           – PostgreSQL password
#    vault_hmac_secret.txt     – HMAC secret for Vault
#    forgejo_token.txt         – Forgejo API token
#    github_pat.txt            – GitHub PAT (AI Proxy)
#    anthropic_api_key.txt     – Anthropic API Key (AI Proxy)
#    mcp_auth_token.txt        – Token for pb-proxy -> mcp-server auth
#    ingestion_auth_token.txt  – Service token for ingestion API (B-50)
# ============================================================

# ── Forgejo (existing instance) ─────────────────────────────
# URL only -- token is in secrets/forgejo_token.txt
FORGEJO_URL=http://forgejo.local:3000

# ── Reranker ────────────────────────────────────────────────
# Backend: powerbrain (built-in Cross-Encoder), tei (HuggingFace TEI), cohere
RERANKER_BACKEND=powerbrain
RERANKER_ENABLED=true

# Built-in reranker (RERANKER_BACKEND=powerbrain):
#   Requires: docker compose --profile local-reranker up
#   Models:
#     cross-encoder/ms-marco-MiniLM-L-6-v2   (fast, EN)
#     cross-encoder/ms-marco-MiniLM-L-12-v2  (more accurate, EN)
#     BAAI/bge-reranker-v2-m3                (multilingual DE+EN)
RERANKER_MODEL=cross-encoder/ms-marco-MiniLM-L-6-v2
# RERANKER_URL=http://reranker:8082   # default for built-in

# Remote reranker (RERANKER_BACKEND=tei or cohere):
#   No local-reranker profile needed — talks to external service directly.
# RERANKER_BACKEND=tei
# RERANKER_URL=https://ai.example.com  # or https://api.cohere.com
# RERANKER_API_KEY=                     # required for cohere, optional for TEI
# RERANKER_MODEL=my-rerank-model        # required for cohere
#
# GDPR: External backends (cohere, remote TEI) send document content outside
# your infrastructure. Ensure compliance with your data processing agreements.
# See docs/gdpr-external-ai-services.md

# ── Authentication ──────────────────────────────────────────
# Authentication (true = require API keys, false = allow unauthenticated access)
AUTH_REQUIRED=true

# ── Rate Limiting ───────────────────────────────────────────
# Toggle rate limiting on/off
RATE_LIMIT_ENABLED=true
# Requests per minute per agent, by role
RATE_LIMIT_ANALYST=60
RATE_LIMIT_DEVELOPER=120
RATE_LIMIT_ADMIN=300

# ── Summarization ───────────────────────────────────────────
SUMMARIZATION_ENABLED=true

# ── Context Layers (L0/L1/L2) ─────────────────────────────
# Pre-computed document abstracts (L0) and overviews (L1) at ingestion time
# Disable to skip L0/L1 generation (only L2 chunks stored)
LAYER_GENERATION_ENABLED=true

# ── LLM / Embedding Provider ──────────────────────────────
# Separate URLs for embedding and LLM (summarization) providers.
# Supports: Ollama (>=0.1.24), vLLM, HuggingFace TEI, infinity, OpenAI.
# Falls back to OLLAMA_URL (http://ollama:11434) if not set.
# EMBEDDING_PROVIDER_URL=http://ollama:11434
# EMBEDDING_MODEL=nomic-embed-text
# EMBEDDING_API_KEY=
# LLM_PROVIDER_URL=http://ollama:11434
# LLM_MODEL=qwen2.5:3b
# LLM_API_KEY=

# ── Summarization Pool (optional, decoupled) ──────────────
# By default, MCP summarisation reuses LLM_* above. Override these to
# route the in-pipeline summary call to its own endpoint / model so it
# never contends with the pb-proxy agent loop on a shared Ollama slot.
# Common topologies:
#   1) Sidecar split — second local Ollama with a smaller model:
#      docker compose --profile local-llm --profile summary-llm up -d
#      docker exec pb-ollama-summary ollama pull qwen2.5:1.5b
#      SUMMARIZATION_PROVIDER_URL=http://ollama-summary:11434
#      SUMMARIZATION_MODEL=qwen2.5:1.5b
#   2) Hosted summary — keep agent loop local, summarise via OpenAI:
#      SUMMARIZATION_PROVIDER_URL=https://api.openai.com
#      SUMMARIZATION_MODEL=gpt-4o-mini
#      SUMMARIZATION_API_KEY=sk-...
# See docs/plans/2026-04-20-separate-summary-llm-pool.md.
# SUMMARIZATION_PROVIDER_URL=
# SUMMARIZATION_MODEL=
# SUMMARIZATION_API_KEY=
# SUMMARIZATION_TIMEOUT=15

# ── GPU Stack (optional, docker compose --profile gpu) ────
# VLLM_MODEL=llava-hf/llava-1.5-7b-hf
# TEI_MODEL=nomic-ai/nomic-embed-text-v1
# HF_TOKEN=

# ── OPAL — Real-Time Policy Sync (optional, docker compose --profile opal) ──
# Watches a git repo for policy changes and pushes them to OPA in real-time.
# OPAL_POLICY_REPO_URL=http://forgejo.local:3000/pb-org/pb-policies
# OPAL_POLICY_REPO_BRANCH=main
# OPAL_POLL_INTERVAL=30

# ── TLS (optional, activate with: docker compose --profile tls up) ──
# DOMAIN=kb.example.com

# ── AI Provider Proxy (optional, docker compose --profile proxy) ──
# PROXY_PORT=8090
# TOOL_REFRESH_INTERVAL=60
# MAX_ITERATIONS=10
# TOOL_CALL_TIMEOUT=30
# REQUEST_TIMEOUT=120
# FAIL_MODE=closed
# OPENAI_API_KEY=sk-...
# ANTHROPIC_API_KEY=sk-ant-...

# ── pb-worker (maintenance jobs) ───────────────────────────
# AUDIT_RETENTION_DAYS=365
# PENDING_REVIEW_GRACE_MINUTES=0
# WORKER_LOG_LEVEL=INFO

# ── Performance Tuning (T1) ────────────────────────────────
# Embedding cache (in-process LRU, per service)
# EMBEDDING_CACHE_SIZE=2048
# EMBEDDING_CACHE_TTL=3600
# EMBEDDING_CACHE_ENABLED=true

# OPA access policy result cache (MCP server only)
# OPA_CACHE_TTL=60
# OPA_CACHE_ENABLED=true

# PostgreSQL connection pool sizes (all services)
# PG_POOL_MIN=2
# PG_POOL_MAX=10

# ── Telemetry and Metrics ─────────────────────────────────
# OpenTelemetry tracing (sends spans to Tempo via OTLP gRPC)
# OTEL_ENABLED=true
# OTLP_ENDPOINT=http://tempo:4317

# Include per-request _telemetry block in search/chat responses
# TELEMETRY_IN_RESPONSE=true