forked from alibaba/sec-code-bench
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.example.yaml
More file actions
57 lines (46 loc) · 1.84 KB
/
config.example.yaml
File metadata and controls
57 lines (46 loc) · 1.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# SecCodeBench Configuration File - Example
# Copy this file to config.yaml and modify with your own settings
# Language configurations
# Each entry specifies a language, its locale, and the path to the benchmark file
lang_configs:
- language: cpp
benchmark: ./datasets/benchmark/cpp/c.json
- language: python
benchmark: ./datasets/benchmark/python/python.json
- language: go
benchmark: ./datasets/benchmark/go/go.json
- language: java
benchmark: ./datasets/benchmark/java/java.json
- language: nodejs
benchmark: ./datasets/benchmark/nodejs/nodejs.json
# Evaluation LLM configuration
# The main LLM model to be evaluated
eval_llm:
provider: OPENAI
model: your-model-name
api_key: your-api-key-here
endpoint: https://your-endpoint-name
# Experiment configuration
experiment:
cycle: 10 # Number of experiment cycles for each test case
parameters: '{"enable_thinking": true}' # Optional: JSON string of parameters for LLM API calls, e.g., '{"enable_thinking": true}'
rpm_limit: 60 # Optional: RPM (Requests Per Minute) limit for evaluated LLM models (default: 60)
# Judge LLMs for evaluation
# IMPORTANT: Should be an odd number (1, 3, 5, etc.) for majority voting
# Multiple judge models help ensure fair and consistent evaluation
judge_llms:
- provider: OPENAI
model: judge-model-1
api_key: your-api-key-here
endpoint: https://your-endpoint-name
- provider: OPENAI
model: judge-model-2
api_key: your-api-key-here
endpoint: https://your-endpoint-name
- provider: OPENAI
model: judge-model-3
api_key: your-api-key-here
endpoint: https://your-endpoint-name
# Directory configuration (used by Docker entrypoint; native runs use CLI --log-dir)
directories:
container_result: /dockershare # Path inside container; host dir is set by env LOCAL_RESULT_DIR when using docker-compose