Chore: Clean up and better comment .env.example

jruokola · jruokola · commit 7b03a8c1abea · 2025-11-19T10:33:02.000+02:00
diff --git a/.env.example b/.env.example
@@ -1,18 +1,8 @@
 # CodeGraph Configuration Example
 # Copy this file to .env and update the values for your environment
 
-# ============================================================================
-# CodeGraph Core Configuration (Simplified Setup)
-# ============================================================================
-
-# Minimal Setup - Auto-detect embedding provider (ONNX, Ollama, or OpenAI)
-CODEGRAPH_EMBEDDING_PROVIDER=auto
-
-# That's it for basic usage! CodeGraph will auto-detect everything else.
-# Uncomment and customize the settings below if you need more control.
-
-# ============================================================================
-# Storage and Installation Configuration
+ ============================================================================
+# Storage and Installation Configuration (LEGACY FOR FAISS+RocksDB)
 # ============================================================================
 
 # Storage path for CodeGraph data (indexes, cache, etc.)
@@ -29,17 +19,21 @@ CODEGRAPH_EMBEDDING_PROVIDER=auto
 # CODEGRAPH_DOMAIN=example.com
 
 # ============================================================================
-# Embedding Provider Configuration
+# Embedding Provider Configuration (Local)
 # ============================================================================
-# Provider options: "auto", "onnx", "ollama", "openai", "jina", or "lmstudio"
-# CODEGRAPH_EMBEDDING_PROVIDER=auto
+# Provider options: "onnx", "ollama", "openai", "jina", or "lmstudio"
+# CODEGRAPH_EMBEDDING_PROVIDER=ollama
 
 # ONNX: Specify model path (or leave empty for auto-detection from HuggingFace cache)
 # CODEGRAPH_LOCAL_MODEL=/path/to/your/onnx/model
 
 # Ollama: Specify embedding model name
 # CODEGRAPH_EMBEDDING_MODEL=all-minilm:latest
 # CODEGRAPH_OLLAMA_URL=http://localhost:11434
+# CODEGRAPH_EMBEDDING_MODEL=qwen3-embedding:4b
+# CODEGRAPH_EMBEDDING_DIMENSION=2560 # Important! Has to match the actual output dimensionality of the embedding model you use!
+# CODEGRAPH_MAX_CHUNK_TOKENS=28000 # Important! The ctx window of you embedding model qwen3-embedding:4b has 32K tokens so we chunk at 28K and f.ex. all-minilm has 512 tokens so chunk at f.ex. 256 tokens
+# Semantic-Chunker is used by default by Codegraph
 
 # LM Studio: Best for MLX + Flash Attention 2 (recommended on macOS)
 # Default: jina-embeddings-v4 (2048 or 1024 dimensions)
@@ -48,11 +42,23 @@ CODEGRAPH_EMBEDDING_PROVIDER=auto
 # CODEGRAPH_LMSTUDIO_URL=http://localhost:1234
 # CODEGRAPH_EMBEDDING_DIMENSION=2048
 
-# Batch size for embedding generation (applies to all providers)
-# CODEGRAPH_EMBEDDING_BATCH_SIZE=32  # Default: 32, valid range: 1-4096
+# ============================================================================
+# Reranking Provider Configuration (Local)
+# ============================================================================
+# CODEGRAPH_RERANKING_PROVIDER = lmstudio
+# CODEGRAPH_RERANKING_MODEL = jina-reranker-v3
+# CODEGRAPH_RERANKING_CANDIDATES = 512 # (fetched results by semantic search: semantic search > 512 results > rerank > top 10)
+
 
+# ============================================================================
+# Embedding Provider Configuration (Cloud)
+# ============================================================================
 # OpenAI: Model name (API key configured below in Security section)
 # CODEGRAPH_EMBEDDING_MODEL=text-embedding-3-small
+# CODEGRAPH_EMBEDDING_DIMENSION=1536 # Important! Has to match the actual output dimensionality of the embedding model you use! 1536 for small, 3072 for large
+
+# Batch size for embedding generation (applies to all providers)
+# CODEGRAPH_EMBEDDING_BATCH_SIZE=32  # Default: 32, valid range: 1-64 Ollama and LMStudio throttle the throughput with larger batches even when you would have the memory to do so the gains of f.ex. 1024 batches are not that significant to compared to 64
 
 # Jina AI: Cloud embeddings with reranking (requires JINA_API_KEY)
 # CODEGRAPH_EMBEDDING_PROVIDER=jina
@@ -61,12 +67,12 @@ CODEGRAPH_EMBEDDING_PROVIDER=auto
 # JINA_EMBEDDING_DIMENSION=2048 (supports 1024, 512, 256 but only 1024 has pre-defined column and HNSW index in surrealdb schema)
 # JINA_API_KEY=your-jina-api-key-here
 # JINA_MAX_TEXTS=512 # Leverage Jina API Batch functionality max 512 documents with 8192 tokens each remember to set --max-concurrent 1 when indexing
-# JINA_MAX_TOKENS=7000
+# JINA_MAX_TOKENS=7000 # Substitute for CODEGRAPH_MAX_TOKENS
 # JINA_API_BASE=https://api.jina.ai/v1
 # JINA_API_TASK=code.passage # used when embedding data, code.query is used when searching data
-# JINA_TASK=code.passage  # Alternative name
-# JINA_LATE_CHUNKING=false
-# JINA_TRUNCATE=true # truncate texts and embeddings if over limit
+# JINA_TASK=code.passage  # Alternative name for JINA_API_TASK
+# JINA_LATE_CHUNKING=false # Leverage Jina AIs advanced long-context chunking features for more accurate embeddings
+# JINA_TRUNCATE=true # truncate texts and embeddings if over dimension limit
 # JINA_REQUEST_DELAY_MS=600 # small delay not to throttle the API when batching
 
 # Jina Reranking Configuration
@@ -75,15 +81,16 @@ CODEGRAPH_EMBEDDING_PROVIDER=auto
 # JINA_ENABLE_RERANKING=true
 # JINA_RERANKING_ENABLED=true  # Alternative name
 # JINA_RERANKING_MODEL=jina-reranker-v3
-# JINA_RERANKING_TOP_N=10
+# JINA_RERANKING_TOP_N=10 # (fetched results by semantic search: semantic search > 512 results > rerank > top 10)
+# CODEGRAPH_RERANKING_CANDIDATES = 512 # (fetched results by semantic search: semantic search > 512 results > rerank > top 10)
 
-# Jina Relationship Embeddings Configuration
+# Jina Relationship Embeddings Configuration, You don't really need to touch this CODEGRAPH_EMBEDDING_BATCH_SIZE is usually enough
 # JINA_REL_BATCH_SIZE=50
 # JINA_REL_MAX_TEXTS=50
 
 # Jina Batching for Large Indexing Operations
 # CODEGRAPH_JINA_BATCH_SIZE=2000
-# CODEGRAPH_JINA_BATCH_MINUTES=9.0 # used by estimate command, this is how long it took on average to index the codegraph codebase nodes with Jina
+# CODEGRAPH_JINA_BATCH_MINUTES=9.0 # used by codegraph estimate command, this is how long it took on average to index the codegraph codebase with Jina
 
 # ============================================================================
 # Dual-Mode Search Configuration
@@ -130,20 +137,20 @@ CODEGRAPH_EMBEDDING_PROVIDER=auto
 # SURREALDB_PASSWORD=root  # Alternative name
 #
 # Important: HNSW index dimension must match embedding provider
-# - Jina: Variable Matryoska dimensions depending on model 2048, 1024, 512, 256
+# - Jina: Variable Matryoska dimensions depending on model 2048-dims configured
 # - OpenAI: Small 1536 dimensions, Large 3072 dimensions
 # - Local ONNX: typically 384 qdrant/all-mini-llm-onnx
-# - Local Ollama: qwen3-embedder:0.6b-8b 1024, 2048, 4096, embeddingsgemma 768, all-mini-llm 384
-# - 384 (all-mini-llm:latest)
+# - Local Ollama: qwen3-embedder:0.6b-8b 1024, 2056, 4096, embeddingsgemma: 768, all-minilm: 384
+# - 384 (all-minilm:latest)
 # - 768 (embeddingsgemma:latest)
 # - 1024 (qwen3-embedding:0.6b)
 # - 1536 (text-embedding-3-small)
-# - 2048 (qwen3-embedding:4b, jina-embeddings-v4)
+# - 2048 (jina-embeddings-v4)
+# - 2056 (qwen3-embedding:4b)
 # - 3072 (text-embedding-3-large)
 # - 4096 (qwen3-embedding:8b)
-# The following embedding fields and HNSW indexes have been built into the surrealdb schema and codegraph codebase
-# For pure speed use onnx or ollama all-mini-llm
-# For considerably better retriaval switch to Ollama and qwen3-embedding:0.6b
+# For pure speed use onnx or ollama all-mini-llm ~10mins to index the whole codegraph codebase
+# For considerably better retriaval switch to qwen3-embedding:0.6b or code language aware 4b or 8b versions for even better accuracy
 # Scale qwen3-embedding model per criticality of accuracy on Ollma
 # For enhancing local accuracy enable CODEGRAPH_RERANKING_PROVIDER=lmstudio and f.ex. CODEGRAPH_RERANKING_MODEL=qwen-reranker-3:0.6b
 # For true SOTA use jina provider and jina-embeddings-v4 with the jina-reranker-v3 - takes longer to index but works better
@@ -162,23 +169,20 @@ CODEGRAPH_EMBEDDING_PROVIDER=auto
 # Set to enable local LLM insights generation
 
 # LM Studio with DeepSeek Coder v2 Lite Instruct (or what ever fits in your vGPU memory)
-# Supported LLM provider options: "auto", "onnx", "lmstudio", "openai", "claude" or "ollama"
+# Supported LLM provider options: "lmstudio", "openai", "anthropic" or "ollama"
 # Superior MLX support, Flash Attention 2, KV-cache and Distillation model support on macOS
 # CODEGRAPH_LLM_PROVIDER=lmstudio
 # LLM_PROVIDER=lmstudio  # Alternative name
 # CODEGRAPH_MODEL=lmstudio-community/DeepSeek-Coder-V2-Lite-Instruct-GGUF/DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf
 # CODEGRAPH_LMSTUDIO_URL=http://localhost:1234
-# CODEGRAPH_CONTEXT_WINDOW=32000
+# CODEGRAPH_CONTEXT_WINDOW=32000 # The context window of the used model directly affects the quality and depth of the results generated by the agentic_ MCP-server tools
 # CODEGRAPH_TEMPERATURE=0.1
 
-# Reasoning effort for complex queries (affects token usage)
-# CODEGRAPH_REASONING_EFFORT=medium
-
 # Ollama (alternative)
-# LLM model (e.g., "qwen2.5-coder:14b", "Kimi-K2-Instruct")
-# CODEGRAPH_MODEL=qwen2.5-coder:14b
+# LLM model (e.g., "qwen3:4b", "Kimi-K2-Instruct")
+# CODEGRAPH_MODEL=qwen3:4b
 # CODEGRAPH_OLLAMA_URL=http://localhost:11434
-# CODEGRAPH_CONTEXT_WINDOW=252000 # Max for Ollama depends on the model though
+# CODEGRAPH_CONTEXT_WINDOW=252000 # The context window of the used model directly affects the quality and depth of the results generated by the agentic_ MCP-server tools
 
 # Anthropic (cloud - 200K/1M tokens)
 # CODEGRAPH_LLM_PROVIDER=anthropic
@@ -188,10 +192,10 @@ CODEGRAPH_EMBEDDING_PROVIDER=auto
 
 # OpenAI (cloud - 200K/400K tokens)
 # CODEGRAPH_LLM_PROVIDER=openai
-# CODEGRAPH_MODEL=gpt-5-codex-mini
+# CODEGRAPH_MODEL=gpt-5.1-codex-mini
 # OPENAI_API_KEY=sk-your-key-here
 # OPENAI_ORG_ID=your_fabulous_org
-# CODEGRAPH_CONTEXT_WINDOW=200000  # 400K tokens
+# CODEGRAPH_CONTEXT_WINDOW=200000  # 400K tokens for gpt-5.1 and gpt-5.1-codex
 # CODEGRAPH_REASONING_BUDGET=medium
 
 # xAI (cloud - 252K/2M context window, $0.50-$1.50/M tokens!)
@@ -202,10 +206,10 @@ CODEGRAPH_EMBEDDING_PROVIDER=auto
 # CODEGRAPH_REASONING_BUDGET=high
 
 # MCP-server code insights agent max output tokens - uses the CODEGRAPH_MODEL
-# MCP_CODE_AGENT_MAX_OUTPUT_TOKENS=52000
+# MCP_CODE_AGENT_MAX_OUTPUT_TOKENS=52000 # F.ex. Claude Code hard-caps this to 64K tokens and usually crashes is such an large output is produced by an MCP-server
 
 # ============================================================================
-# Performance & Caching Configuration
+# Performance & Caching Configuration (LEGACY only for FAISS+RocksDB)
 # ============================================================================
 
 # Performance mode (affects optimization settings)
@@ -222,17 +226,7 @@ CODEGRAPH_EMBEDDING_PROVIDER=auto
 # CODEGRAPH_SYMBOL_DB_BATCH_SIZE=1000     # Batch size for database writes
 
 # ============================================================================
-# Qwen-Coder-2.5-128K Integration (Legacy)
-# ============================================================================
-
-# Qwen model configuration
-# CODEGRAPH_ENABLE_QWEN=true
-# CODEGRAPH_QWEN_MAX_TOKENS=128000        # Max completion tokens (0 disables limit)
-# CODEGRAPH_QWEN_TIMEOUT_SECS=180         # Request timeout before fallback (0 disables)
-# CODEGRAPH_QWEN_CONNECT_TIMEOUT_MS=5000  # Connection timeout to Ollama
-
-# ============================================================================
-# Server Configuration
+# Server Configuration (LEGACY for REST-API use NAPI instead)
 # ============================================================================
 
 # Server host and port (for HTTP/REST API)
@@ -258,20 +252,20 @@ RUST_LOG=warn
 # CODEGRAPH_CMD="cargo run -p codegraph-mcp --bin codegraph --"
 
 # ============================================================================
-# Security Configuration (for production deployments)
+# Security Configuration (LEGACY for REST-API)
 # ============================================================================
 
 # JWT Authentication
-JWT_SECRET=replace_with_secure_random_secret_minimum_32_characters_long
-JWT_EXPIRY_HOURS=24
+# JWT_SECRET=replace_with_secure_random_secret_minimum_32_characters_long
+# JWT_EXPIRY_HOURS=24
 
 # API Key Configuration  
-API_KEY_PREFIX=cgk
+# API_KEY_PREFIX=cgk
 
 # Server Configuration
-HOST=127.0.0.1
-PORT=8080
-ENVIRONMENT=development
+# HOST=127.0.0.1
+# PORT=8080
+# ENVIRONMENT=development
 
 # TLS/HTTPS Configuration (for production)
 # TLS_CERT_PATH=/path/to/certificate.pem
@@ -283,49 +277,48 @@ ENVIRONMENT=development
 # REDIS_URL=redis://localhost:6379
 
 # Rate Limiting
-RATE_LIMIT_ANONYMOUS=60
-RATE_LIMIT_USER=1000
-RATE_LIMIT_PREMIUM=5000
-RATE_LIMIT_ADMIN=10000
+# RATE_LIMIT_ANONYMOUS=60
+# RATE_LIMIT_USER=1000
+# RATE_LIMIT_PREMIUM=5000
+# RATE_LIMIT_ADMIN=10000
 
 # Security Settings
-MAX_REQUEST_SIZE=10485760  # 10MB
-SESSION_TIMEOUT_HOURS=24
-PASSWORD_MIN_LENGTH=12
+# MAX_REQUEST_SIZE=10485760  # 10MB
+# SESSION_TIMEOUT_HOURS=24
+# PASSWORD_MIN_LENGTH=12
 
 # Logging (see RUST_LOG above for CodeGraph core logging)
 # LOG_LEVEL=info  # For application-level logging
-SECURITY_LOG_LEVEL=warn
-LOG_FORMAT=json
+# SECURITY_LOG_LEVEL=warn
+# LOG_FORMAT=json
 
 # Monitoring
-METRICS_ENABLED=true
-PROMETHEUS_PORT=9090
+# METRICS_ENABLED=true
+# PROMETHEUS_PORT=9090
 
 # External Services
 # SENTRY_DSN=https://your-sentry-dsn
 # ANALYTICS_KEY=your_analytics_key
 
 # Development/Testing Only
-DEV_MODE=true
-DISABLE_AUTH=false  # Never set to true in production!
-ENABLE_DEBUG_ENDPOINTS=false
+# DEV_MODE=true
+# DISABLE_AUTH=false  # Never set to true in production!
+# ENABLE_DEBUG_ENDPOINTS=false
 
 # ============================================================================
-# HTTP Server Configuration (when using --transport http)
+# MCP Server Configuration (when using --transport http)
 # ============================================================================
 
 # Host address to bind HTTP server (default: 127.0.0.1)
 # Use 0.0.0.0 to allow external connections
 CODEGRAPH_HTTP_HOST=127.0.0.1
 
-# Port for HTTP server (default: 3000)
-CODEGRAPH_HTTP_PORT=3000
+# Port for HTTP server (default: 3003)
+CODEGRAPH_HTTP_PORT=3003
 
 # SSE keep-alive interval in seconds (default: 15)
 # Prevents proxy timeouts for long-running agentic operations
 CODEGRAPH_HTTP_KEEP_ALIVE=15
 
-# MCP Test Transport (for test_agentic_mcp.py)
-# MCP_TRANSPORT=stdio  # Use stdio transport (starts server automatically)
-MCP_TRANSPORT=http     # Use HTTP/SSE transport (requires running server)
+# MCP Test Transport (for test_agentic_mcp.py) can be set by just running codegraph start http --port xxxx or codegraph start stdio
+# MCP_TRANSPORT=http     # Use HTTP/SSE transport (requires running server)
diff --git a/crates/codegraph-mcp/src/bin/codegraph.rs b/crates/codegraph-mcp/src/bin/codegraph.rs
@@ -937,11 +937,11 @@ async fn handle_start(
                 .map_err(|e| anyhow::anyhow!("Server error: {}", e))?;
         }
         TransportType::Http {
-            host: _host,
-            port: _port,
-            tls: _tls,
-            cert: _cert,
-            key: _key,
+            host,
+            port,
+            tls,
+            cert,
+            key,
             cors: _,
         } => {
             #[cfg(not(feature = "server-http"))]