-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcli.py
More file actions
98 lines (73 loc) · 3.4 KB
/
cli.py
File metadata and controls
98 lines (73 loc) · 3.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
"""Command-line interface for BASED Eval - Multi-game AI evaluation framework.
This is the unified CLI entry point for all BASED eval games:
- `based codenames` - Run Codenames games
- `based chainlex` - Run ChainLex-1 games (single-player word association)
- `based connections` - Run Connections puzzles
- `based analytics` - Analytics and reporting tools
"""
import sys
from pathlib import Path
import typer
from rich.console import Console
# Add connections/src to path for connections_eval imports
connections_path = Path(__file__).parent / "connections" / "src"
if str(connections_path) not in sys.path:
sys.path.insert(0, str(connections_path))
# Import game-specific CLIs
from codenames.cli_codenames import app as codenames_app
from chainlex.cli_chainlex import app as chainlex_app
from connections_eval.cli import app as connections_app
from shared.cli_analytics import app as analytics_app
# Main application
app = typer.Typer(
help="BASED Eval - Benchmark for Association, Sorting, and Entity Deduction",
no_args_is_help=True,
)
console = Console()
# Register subcommands
app.add_typer(codenames_app, name="codenames", help="Run Codenames games for AI evaluation")
app.add_typer(chainlex_app, name="chainlex", help="Run ChainLex-1 games (single-player word association)")
app.add_typer(connections_app, name="connections", help="Run Connections puzzles for AI evaluation")
app.add_typer(analytics_app, name="analytics", help="Analytics and reporting tools")
@app.callback()
def main():
"""BASED Eval - Multi-game AI evaluation framework.
Run AI models on various games to evaluate their reasoning and language abilities.
Examples:
# Run a Codenames game
uv run based codenames run --red gemini-flash --blue claude-haiku
# Run a ChainLex-1 game (single-player, cost-efficient)
uv run based chainlex run --model gemini-flash
# Run Connections puzzles
uv run based connections run --model gemini-flash --puzzles 10
# Check analytics
uv run based analytics trial-balance
uv run based analytics cost-report
uv run based analytics leaderboard
"""
pass
@app.command()
def version():
"""Show version information."""
from codenames import __version__ as codenames_version
from shared import __version__ as shared_version
console.print("[bold]BASED Eval[/bold]")
console.print(f" codenames: {codenames_version}")
console.print(f" shared: {shared_version}")
# Legacy commands for backward compatibility
# These will show a deprecation warning and redirect to the new commands
@app.command(hidden=True)
def run():
"""[DEPRECATED] Use 'based codenames run' instead."""
console.print("[yellow]⚠️ 'based run' is deprecated. Use 'based codenames run' instead.[/yellow]")
console.print("[dim]Example: uv run based codenames run --red gemini-flash --blue claude-haiku[/dim]")
raise typer.Exit(1)
@app.command(name="list-models", hidden=True)
def list_models_legacy():
"""[DEPRECATED] Use 'based codenames list-models' or 'based connections list-models' instead."""
console.print("[yellow]⚠️ 'based list-models' is deprecated.[/yellow]")
console.print("[dim]Use: uv run based codenames list-models[/dim]")
console.print("[dim] or: uv run based connections list-models[/dim]")
raise typer.Exit(1)
if __name__ == "__main__":
app()