MASArena/run_benchmark.sh at main · ofoooo/MASArena · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/bin/bash
# Benchmark Runner Script
# Usage: ./run_benchmark.sh [benchmark_name] [agent_system] [limit] [mcp_config_file] [concurrency] [optimizer] [train_size] [test_size]

# Default values
BENCHMARK=${1:-math}
AGENT_SYSTEM=${2:-agentverse} # single_agent, supervisor_mas, swarm, agentverse
LIMIT=${3:-2}
MCP_CONFIG=${4:-}
CONCURRENCY=${5:-6}
OPTIMIZER=${6:-} # New optional argument for the optimizer
TRAIN_SIZE=${7:-}
TEST_SIZE=${8:-}

# Create necessary directories
mkdir -p results metrics

# Print header
echo "====================================================="
echo "Running Multi-Agent Benchmark"
echo "====================================================="
echo "Benchmark: $BENCHMARK"
if [ -n "$OPTIMIZER" ]; then
  echo "Optimizer: $OPTIMIZER"
  if [ -n "$TRAIN_SIZE" ]; then
    echo "Train Size: $TRAIN_SIZE"
  fi
  if [ -n "$TEST_SIZE" ]; then
    echo "Test Size: $TEST_SIZE"
  fi
  echo "Agent System (post-optimization): $AGENT_SYSTEM"
else
  echo "Agent System: $AGENT_SYSTEM"
fi
echo "Limit: $LIMIT"
if [ -n "$MCP_CONFIG" ]; then
  echo "MCP Config File: $MCP_CONFIG"
  echo "Using MCP tools: yes"
else
  echo "Using MCP tools: no"
fi
if [ -n "$CONCURRENCY" ]; then
  echo "Concurrency: $CONCURRENCY"
  echo "Running asynchronously: yes"
else
  echo "Running asynchronously: no"
fi
echo "====================================================="

# Activate virtual environment if exists
if [ -d ".venv" ]; then
  source .venv/bin/activate
fi

# Build MCP flags if config provided
if [ -n "$MCP_CONFIG" ]; then
  MCP_FLAGS="--use-mcp-tools --mcp-config-file $MCP_CONFIG"
else
  MCP_FLAGS=""
fi

# Build concurrency flags if provided
if [ -n "$CONCURRENCY" ]; then
  ASYNC_FLAGS="--async-run --concurrency $CONCURRENCY"
else
  ASYNC_FLAGS=""
fi

# Build optimizer flags if provided
if [ -n "$OPTIMIZER" ]; then
  OPTIMIZER_FLAGS="--run-optimizer $OPTIMIZER"
  if [ -n "$TRAIN_SIZE" ]; then
    OPTIMIZER_FLAGS="$OPTIMIZER_FLAGS --train_size $TRAIN_SIZE"
  fi
  if [ -n "$TEST_SIZE" ]; then
    OPTIMIZER_FLAGS="$OPTIMIZER_FLAGS --test_size $TEST_SIZE"
  fi
else
  OPTIMIZER_FLAGS=""
fi

# Run the benchmark or optimizer
python main.py \
  --benchmark "$BENCHMARK" \
  --agent-system "$AGENT_SYSTEM" \
  --limit "$LIMIT" \
  $MCP_FLAGS \
  $ASYNC_FLAGS \
  $OPTIMIZER_FLAGS

# Exit with the same status as the Python script
exit $?