Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 31 additions & 6 deletions benchtools/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,7 @@ def from_folders(cls, bench_path):
for task_dir in task_list:
# load the tasks
task_path = os.path.join(task_folder, task_dir)
task_content = os.listdir(task_path)
if 'task_info.yml' in task_content:
task_info_file = os.path.join(task_path, 'task_info.yml')
task = Task.from_dict(task_info_file)
else:
task = Task.from_txt_csv(task_path)
task = Task.from_txt_csv(task_path)
tasks.append(task)
else:
tasks = []
Expand Down Expand Up @@ -166,6 +161,36 @@ def from_yaml(cls, bench_path):
return cls(name = info['bench_name'], bench_path =bench_path,
concept= info['concept'], tasks=tasks)

@classmethod
def load(cls, bench_path):
'''
Load a benchmark object from a given path.
If the path given is has a yaml tasks file, load tasks from
it and generate Task objects and add them to the bench.
Otherwise load the bench object from existing task folders.

Parameters:
-----------
bench_path: str
The path to the benchmark folder. The folder should contain the about.md file,
tasks.yaml file or tasks folder.

Returns:
--------
Bench
An instance of the Bench class with the loaded benchmark.
'''
# check folder to see if folder or yaml type to load benchmark
if not os.path.isdir(benchmark_path):
raise ValueError("The passed path doesn't exist.")
else:
content = os.listdir(benchmark_path)
if 'tasks.yml' in content:
return cls.from_yaml(benchmark_path)
else:
return cls.from_folders(benchmark_path)


@staticmethod
def load_info(bench_path):
with open(os.path.join(bench_path, 'info.yml'), 'r') as f:
Expand Down
74 changes: 33 additions & 41 deletions benchtools/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,37 +100,36 @@ def init(benchmark_name, path, about, no_git):
@click.option('-s','--task-source', type=str,help="The relative path to content that already exists`", required=True)
@click.option('-t','--task-type', type=click.Choice(['folders', 'list']),
help="The type of the task content being added. Options are csv or yml", required=True)
def add_task(task_name, bench_path, task_source,task_type):
def add_task(task_name, benchmark_path, task_source,task_type):
"""
Set up a new task.

"""

if os.path.exists(bench_path):
benchmark = Bench.load(bench_path)
if task_source:
if os.path.isdir(task_source):
task = Task.from_txt_csv(task_source)
elif os.path.isfile(task_source):
task = Task.from_yaml(task_source)
elif task_type:
match task_type:
case 'folders':
storage_type = 'csv'
case 'list':
storage_type = 'yaml'
task = Task.from_example(name=task_name, storage_type=storage_type)
task.write()
else:
click.echo("Invalid task content type. Either provide content with --task-source or specify the type of task content with --type.")
exit(4356)
benchmark = Bench.load(bench_path)

# Create Task object
if task_source:
if os.path.isdir(task_source):
task = Task.from_txt_csv(task_source)
elif os.path.isfile(task_source):
task = Task.from_yaml(task_source)
elif task_type:
match task_type:
case 'folders':
storage_type = 'csv'
case 'list':
storage_type = 'yaml'
task = Task.from_example(name=task_name, storage_type=storage_type)
# task.write()
else:
click.echo("Invalid task content type. Either provide content with --task-source or specify the type of task content with --type.")
exit(4356)

# TODO: handle adding to benchmark with metadata
# benchmark.add_task(task)
task.write(bench_path)
# Add Task to Bench, will write as well
benchmark.add_task(task)
click.echo(f"Added {task_name} to {benchmark.bench_name} benchmark successfully!")
else:
click.echo("No benchmark reposiory at " + bench_path)



@benchtool.command()
Expand All @@ -155,19 +154,14 @@ def run_task(benchmark_path: str, task_name, runner_type, model, api_url, log_pa
# Create BenchRunner object
runner = BenchRunner(runner_type, model, api_url)

# check folder to see if folder or yaml type to load benchmark
if os.path.isdir(benchmark_path):
content = os.listdir(benchmark_path)
if 'tasks.yml' in content:
benchmark = Bench.from_yaml(benchmark_path)
else:
benchmark = Bench.from_folders(benchmark_path)
benchmark = Bench.load(bench_path)

click.echo(f"Running {task_name} now")

click.echo(f"Running {task_name} of benchmark {benchmark.bench_name} now")
benchmark.run_task(task_name, runner, log_path)

@benchtool.command()
@click.argument('benchmark-path', required = True, type=str)
@click.argument('benchmark-path', required = False, type=str, default='.')
@click.option('-r', '--runner-type', type=click.Choice(['ollama', 'openai', 'aws']),
default="ollama", help="The engine that will run your LLM.")
@click.option('-m', '--model', type=str, default="gemma3",
Expand All @@ -179,22 +173,20 @@ def run_task(benchmark_path: str, task_name, runner_type, model, api_url, log_pa
def run(benchmark_path: str, runner_type: str, model: str, api_url: str, log_path: str):
"""
Running the benchmark and generating logs
, help="The path to the benchmark repository where all the task reside."
Parameters:
benchmark-path: The path to the benchmark repository where all the task reside.
"""
# Create BenchRunner object
runner = BenchRunner(runner_type, model, api_url)

# check folder to see if folder or yaml type to load benchmark
if os.path.isdir(benchmark_path):
content = os.listdir(benchmark_path)
if 'tasks.yml' in content:
benchmark = Bench.from_yaml(benchmark_path)
else:
benchmark = Bench.from_folders(benchmark_path)
benchmark = Bench.load(bench_path)


click.echo(f"Running {benchmark.bench_name} now")
benchmark.run(runner, log_path)



@click.group()
def betterbench():
"""
Expand Down