hound/config.yaml.example at main · scabench-org/hound · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# Example configuration showing model-specific context limits

# API keys configuration
openai:
  api_key_env: OPENAI_API_KEY
  # Optional: override base URL (env var OPENAI_BASE_URL takes precedence)
  # base_url: https://api.openai.com

gemini:
  api_key_env: GOOGLE_API_KEY
  # Vertex AI (optional). When enabled, the Gemini provider will use Vertex AI
  # instead of AI Studio. Credentials are resolved from Application Default
  # Credentials (ADC) or a service account, not the GOOGLE_API_KEY.
  vertex_ai:
    enabled: false
    # Your Google Cloud Project ID (alternatively set VERTEX_PROJECT_ID or GOOGLE_CLOUD_PROJECT)
    project_id: ""
    # Region/location for Vertex AI (e.g., "us-central1"; alternatively set VERTEX_LOCATION or GOOGLE_CLOUD_REGION)
    region: ""

anthropic:
  api_key_env: ANTHROPIC_API_KEY

xai:
  api_key_env: XAI_API_KEY

# Model configuration with context limits
models:
  # Graph building model - needs large context
  graph:
    # GPT-4.1 WORKS WELL HERE BUT IS VERY EXPENSIVE!
    # provider: openai
    # model: gpt-4.1
    # max_context: 1000000

    provider: gemini
    model: gemini-2.5-pro
    max_context: 1000000
    thinking_enabled: true
    thinking_budget: -1

  # Scout/agent model for exploration
  scout:
    provider: openai
    model: gpt-5-mini
    max_context: 256000
    # reasoning_effort: low

  # Strategic thinking model
  # For planning and reasoning about security issues
  strategist:
    provider: openai
    model: gpt-5
    max_context: 256000
    plan_reasoning_effort: medium
    hypothesize_reasoning_effort: high

  # Lightweight utility model for quick, low-cost tasks (e.g., dedup)
  lightweight:
    provider: openai
    model: gpt-4o-mini
    # No large context or special reasoning required

  # QA model
  finalize:
    provider: openai
    model: gpt-5
    reasoning_effort: high
    # No max_context specified - will use global default
    # reasoning_effort: medium  # Options: low, medium, high (GPT-5 models only)

  # Reporting model
  reporting:
    provider: openai
    model: gpt-4o
    # No max_context specified - will use global default

# Global context settings (used when model doesn't specify max_context)
context:
  max_tokens: 256000           # Default for models without specific max_context
  compression_threshold: 0.75  # Compress history when 75% full

# Timeouts and retries
timeouts:
  request_seconds: 300

retries:
  max_attempts: 3
  backoff_min_seconds: 1
  backoff_max_seconds: 2

# Notes:
# - Each model can have its own max_context setting
# - Graph model uses large context for building iterations (sees more code)
# - Guidance model uses smaller context for initial discovery/design phase
# - Discovery phase and building iterations use different context limits
# - Models without max_context use the global context.max_tokens value
# - Token counting is done using the specific model for accuracy