taracode/config.example.yaml at main · tara-vision/taracode · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
# Tara Code Configuration File
# Copy this to ~/.taracode/config.yaml

# =============================================================================
# SINGLE HOST CONFIGURATION (Legacy - still fully supported)
# =============================================================================
# Ollama Server Host (required for single-host mode)
# The base URL of your Ollama server
host: http://localhost:11434

# API Key (optional)
# Leave empty or omit for local servers without authentication
key: ""

# Model name (optional)
# The model is auto-detected from your Ollama server via /v1/models endpoint.
# Only specify this if auto-detection fails or you want a specific model.
# model: gemma3:27b

# =============================================================================
# MULTI-HOST CONFIGURATION (v2.0)
# =============================================================================
# Configure multiple LLM hosts with fallback support and per-agent assignment.
# When using multi-host mode, the 'host' field above is ignored.
#
# hosts:
#   # Primary GPU server - high-performance models
#   primary:
#     url: http://gpu-server:11434
#     vendor: ollama           # optional: ollama, vllm, llama.cpp (auto-detect if omitted)
#     # api_key: ""            # optional: API key if required
#     models: [gemma3:27b, qwen2.5-coder:32b]  # optional: available models
#     timeout: 30s             # connection timeout
#     priority: 1              # lower = higher priority
#
#   # Local fallback - lightweight models
#   local:
#     url: http://localhost:11434
#     fallback: primary        # use 'primary' if this host is unavailable
#     priority: 2
#
# # Which host to use by default
# default_host: primary
#
# =============================================================================
# PER-AGENT HOST ASSIGNMENT
# =============================================================================
# Assign specific hosts to agents in the agents section of config:
#
# agents:
#   planner:
#     model: gemma3:12b
#     host: local            # Use local for quick planning
#
#   coder:
#     model: qwen2.5-coder:32b
#     host: primary          # Use powerful GPU for code generation
#
#   reviewer:
#     model: llama3.2:3b
#     host: local            # Use lightweight model for reviews
#
# Run /hosts in the REPL to see host status and health.
# =============================================================================

# =============================================================================
# MODEL GENERATION OPTIONS (v2.0.4)
# =============================================================================
# Control how the LLM generates responses. These apply to the main chat.
# Agents inherit top_p and num_predict as defaults (agents have their own temperature).
model:
  # Sampling randomness: 0.0 = deterministic, 2.0 = maximum randomness (default: 0.7)
  temperature: 0.7

  # Nucleus sampling: only consider tokens within this cumulative probability (default: 0.9)
  # Lower = more focused, higher = more diverse
  top_p: 0.9

  # Maximum tokens per response, 0 = model default / no limit (default: 0)
  # Useful for controlling response length and context window usage
  num_predict: 0

# Search Configuration
search:
  # Primary search provider: "brave", "duckduckgo", or "searxng"
  # If brave_api_key is set and primary is not specified, Brave will be used automatically
  primary: duckduckgo

  # Fallback provider when primary fails
  fallback: searxng

  # Brave Search API key (optional)
  # Get your API key from: https://brave.com/search/api/
  # When configured, Brave becomes the default primary search provider
  # brave_api_key: your-brave-api-key-here

  # Custom SearXNG instance URL (optional)
  # searxng_instance: https://your-searxng-instance.com

  # Search timeout (default: 10s)
  timeout: 10s

  # Number of retries before falling back (default: 1)
  retry_count: 1

# Context budget display
# Shows token usage in prompt like [5.0k/32k]
show_context_budget: true
max_context_tokens: 32768

# Edit preview mode
# Shows diff before applying edit_file operations
preview_edits: true
preview_threshold: 0

# Security configuration
security:
  # Default severity filter for security scans (empty = all severities)
  # Examples: "HIGH,CRITICAL" or "MEDIUM,HIGH,CRITICAL"
  default_severity: ""

# MCP (Model Context Protocol) Configuration
# MCP enables integration with external tools and services via MCP servers.
# Servers are spawned as subprocesses and communicate via JSON-RPC over stdio.
mcp:
  # Enable or disable MCP support (default: true)
  enabled: true

  # List of MCP server configurations
  servers:
    # GitHub MCP Server - provides GitHub integration tools
    # - name: github
    #   command: npx
    #   args: ["-y", "@modelcontextprotocol/server-github"]
    #   env:
    #     GITHUB_TOKEN: "${GITHUB_TOKEN}"  # Uses env var expansion
    #   auto_connect: false                # Set true to connect on startup
    #   timeout: 30s                       # Connection timeout

    # Brave Search MCP Server - provides web search capabilities
    # - name: brave-search
    #   command: npx
    #   args: ["-y", "@modelcontextprotocol/server-brave-search"]
    #   env:
    #     BRAVE_API_KEY: "${BRAVE_API_KEY}"

    # Filesystem MCP Server - provides filesystem access
    # - name: filesystem
    #   command: npx
    #   args: ["-y", "@modelcontextprotocol/server-filesystem", "/path/to/allowed/directory"]

    # PostgreSQL MCP Server - provides database access
    # - name: postgres
    #   command: npx
    #   args: ["-y", "@modelcontextprotocol/server-postgres"]
    #   env:
    #     POSTGRES_CONNECTION_STRING: "${DATABASE_URL}"

# Memory (Persistent Project Knowledge) Configuration
memory:
  # Enable or disable memory feature (default: true)
  enabled: true

  # Maximum number of memories per project (default: 500)
  max_memories: 500

  # Maximum tokens to inject into prompt (default: 2000)
  max_context_tokens: 2000

  # Auto-cleanup memories not used in N days (default: 90)
  retention_days: 90

  # Detect and suggest memories from conversation (default: true)
  auto_capture: true

# Context Management (v2.0.2)
context:
  # Maximum lines per tool output (default: 500, 0 = unlimited)
  # Prevents large tool outputs from consuming the context window
  max_tool_output_lines: 500

  # Maximum characters per tool output (default: 15000, 0 = unlimited)
  max_tool_output_chars: 15000

  # Maximum tool call iterations per message (default: 10)
  # Limits how many consecutive tool calls the AI can make per user message
  max_tool_iterations: 10

  # Auto-compact conversation when context is filling up (default: true)
  # Uses LLM to summarize older messages, keeping recent ones intact
  compaction_enabled: true

  # Trigger compaction at this fraction of max_context_tokens (default: 0.75)
  # 0.75 = compact when 75% of context window is used
  compaction_threshold: 0.75

  # Number of recent message pairs to keep during compaction (default: 4)
  # These messages are never summarized
  compaction_keep_recent: 4

# Upgrade (Auto-update) Configuration
upgrade:
  # Check for updates on startup (default: true)
  # Set to false to disable automatic update checks
  auto_check: true

  # Automatically install updates without prompting (default: false)
  # When true, updates are installed immediately without confirmation
  # Use with caution - recommended to keep this false
  auto_upgrade: false

  # Show changelog when update is available (default: true)
  show_changelog: true