cli_engineer/cli_engineer.toml at main · trilogy-group/cli_engineer · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
# cli_engineer Configuration File
#
# API keys are stored in environment variables:
# - OPENROUTER_API_KEY for OpenRouter
# - GEMINI_API_KEY for Gemini
# - OPENAI_API_KEY for OpenAI
# - ANTHROPIC_API_KEY for Anthropic
# - Ollama runs locally and doesn't require an API key

# Execution settings
[execution]
max_iterations = 5

# Code execution settings
enable_code_execution = true
allowed_commands = [
    "ls", "ps", "pwd", "echo", "cat", "head", "tail", "wc", "grep", "find",
    "which", "whereis", "mkdir", "touch", "cp", "mv", "chmod", "du", "df",
    "free", "uptime", "date", "time",
    "git log", "git diff", "git status", "git show",
    "cargo build", "cargo test", "cargo run", "cargo check", "cargo update", "cargo format",
    "python", "python3", "uv",
    "npm", "npm test", "npm run", "yarn", "node",
    "make", "cmake", "gcc", "clang",
    "javac", "java", "mvn", "gradle",
    "go", "dotnet", "php", "ruby", "bundle", "rake",
    "docker", "docker-compose",
    "curl", "wget", "ping"
]

# UI settings
[ui]
colorful = true
progress_bars = true
metrics = true
output_format = "terminal"

# Context settings
[context]
max_tokens = 100000
compression_threshold = 0.6
cache_enabled = true

# AI PROVIDERS - First, set enabled = true on the provider you want to use.
# Then, uncomment its model that you want to use!

# OPENAI MODELS (require OPENAI_API_KEY):
[ai_providers.openai]
enabled = false
temperature = 1

model = "gpt-4.1" # Flagship GPT model for coding tasks
cost_per_1m_input_tokens = 2.00
cost_per_1m_output_tokens = 8.00
max_tokens = 1047576

# model = "o4-mini" # Faster, more affordable reasoning model
# cost_per_1m_input_tokens = 1.10
# cost_per_1m_output_tokens = 4.40
# max_tokens = 128000

# model = "o3" # Most powerful reasoning model
# cost_per_1m_input_tokens = 10.00
# cost_per_1m_output_tokens = 40.00
# max_tokens = 128000

# CLAUDE MODELS (requires ANTHROPIC_API_KEY):
[ai_providers.anthropic]
enabled = false
temperature = 1
model = "claude-sonnet-4-0" # Claude 4 with extended thinking support
cost_per_1m_input_tokens = 3.00
cost_per_1m_output_tokens = 15.00
max_tokens = 200000

# model = "claude-opus-4-0" # Most capable model
# cost_per_1m_input_tokens = 15.00
# cost_per_1m_output_tokens = 75.00
# max_tokens = 200000

# COST-EFFECTIVE CLOUD OPTIONS (require OPENROUTER_API_KEY)
[ai_providers.openrouter]
enabled = false
temperature = 1
# deepseek/deepseek-r1-0528-qwen3-8b - Advanced reasoning in small, affordable model
# model = "deepseek/deepseek-r1-0528-qwen3-8b"
# cost_per_1m_input_tokens = 0.06
# cost_per_1m_output_tokens = 0.09
# max_tokens = 65536

# model = "inception/mercury-coder-small-beta" # - Lightning-fast, diffusion coding model
# cost_per_1m_input_tokens = 0.25
# cost_per_1m_output_tokens = 1.00
# max_tokens = 32768

# model = "qwen/qwen3-235b-a22b" # - Powerful, affordable reasoning model
# cost_per_1m_input_tokens = 0.13
# cost_per_1m_output_tokens = 0.85
# max_tokens = 41000

# model = "microsoft/phi-4-reasoning-plus" # Efficient general purpose
# cost_per_1m_input_tokens = 0.07
# cost_per_1m_output_tokens = 0.35
# max_tokens = 33000

model = "google/gemini-2.5-pro" # First-place positioning on the LMArena leaderboard
cost_per_1m_input_tokens = 1.25
cost_per_1m_output_tokens = 10.00
max_tokens = 1047576

# GEMINI MODELS (requires GEMINI_API_KEY):
[ai_providers.gemini]
enabled = false
temperature = 0.2
model = "gemini-2.5-pro"
cost_per_1m_input_tokens = 1.25
cost_per_1m_output_tokens = 10.00
max_tokens = 1047576

# model = "models/gemini-2.5-flash-preview-05-20"
# cost_per_1m_input_tokens = 0.25
# cost_per_1m_output_tokens = 2.00
# max_tokens = 1047576


# Ollama - Local LLM inference (no API key required)
# Install: curl -fsSL https://ollama.ai/install.sh | sh
# Pull model: ollama pull <model_name>
# Serve model: ollama run <model_name>
#
# CONSUMER GPU RECOMMENDATIONS (4B-14B parameters):
#
# For 8GB VRAM
#   qwen3:4b, gemma3:4b
#
# For 12GB VRAM
#   qwen3:8b, deepseek-r1:8b
#
# For 16GB+ VRAM
#   qwen3:14b, gemma3:12b, phi4-14b

# LOCAL MODELS (free, requires Ollama running locally)
[ai_providers.ollama]
enabled = false
temperature = 0.7
base_url = "http://localhost:11434"

model = "qwen3:4b"
max_tokens = 40000

# RECOMMENDED MODELS:

# model = "deepseek-r1:8b" # Updated R1 reasoning and Qwen 3 model: DeepSeek-R1-0528-Qwen3-8B
# max_tokens = 128000

# qwen3:14b - High performance, requires more VRAM
# model = "qwen3:14b"
# max_tokens = 40000

# deepseek-r1:7b - Compact reasoning (older variant)
# model = "deepseek-r1:7b"
# max_tokens = 128000

# model = "phi4-14b" - Microsoft's open source reasoning model
# max_tokens = 16384

# gemma3:4b - Google's compact model
# model = "gemma3:4b"
# max_tokens = 128000

# gemma3:12b - Stronger performance
# model = "gemma3:12b"
# max_tokens = 128000

# xAI MODELS (require XAI_API_KEY):
[ai_providers.xai]
enabled = true
temperature = 0.7

model = "grok-4"
cost_per_1m_input_tokens = 3.0
cost_per_1m_output_tokens = 15.0
max_tokens = 256000

# model = "grok-2-1212"
# cost_per_1m_input_tokens = 2.0
# cost_per_1m_output_tokens = 10.0
# max_tokens = 131072

# model = "grok-2-vision-1212"
# cost_per_1m_input_tokens = 2.0
# cost_per_1m_output_tokens = 10.0
# max_tokens = 131072

# Artifact directory
artifact_dir = "./"

# Isolated execution environment
isolated_execution = false

# Cleanup artifacts on exit
cleanup_on_exit = false

# Disable automatic git repository initialization unless explicitly requested
disable_auto_git = true

# Parallel task execution
parallel_enabled = true


# =====================================================================
#  MCP Servers Configuration
# ---------------------------------------------------------------------
#  cli_engineer can dynamically discover and call external tools via the
#  Model-Context Protocol (MCP).  Each entry below represents either a
#  remote HTTPS/SSE server or a local child-process (stdio) server.
#
#  • Use [[mcp.servers]] to add multiple servers.
#  • Only ONE of `base_url` or `command` should be provided.
#  • Any string of the form "${ENV_VAR}" will be replaced with the value of
#    that environment variable at runtime, so secrets never have to live
#    inside your config file.
# =====================================================================

[[mcp.servers]]
name     = "Firecrawl Search and Scrape"       # friendly label
command  = "npx"                        # executable to spawn (stdio MCP)
args     = ["-y", "firecrawl-mcp"]      # full argument list

# API key is resolved from env variable FIRECRAWL_API_KEY
api_key  = "${FIRECRAWL_API_KEY}"

# Extra env overrides passed to the child process
[mcp.servers.env]
FIRECRAWL_API_KEY = "${FIRECRAWL_API_KEY}"


# [[mcp.servers]]
# name      = "cli_engineer Docs"
# base_url  = "https://gitmcp.io/trilogy-group/cli_engineer"  # remote server
# # No local command necessary here
# NOTE: This server returns HTML (web page) instead of MCP API responses.
# It appears to be a documentation site rather than an actual MCP server.