From ed6a28073cee4e95593984e93185c9a2f8ec25c2 Mon Sep 17 00:00:00 2001 From: AymanBx Date: Thu, 26 Feb 2026 07:07:26 +0000 Subject: [PATCH 1/4] Benchmark: from_folder was fixed after being changed through resolving a conflict --- benchtools/benchmark.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/benchtools/benchmark.py b/benchtools/benchmark.py index bf01989..56ce7d4 100644 --- a/benchtools/benchmark.py +++ b/benchtools/benchmark.py @@ -121,12 +121,7 @@ def from_folders(cls, bench_path): for task_dir in task_list: # load the tasks task_path = os.path.join(task_folder, task_dir) - task_content = os.listdir(task_path) - if 'task_info.yml' in task_content: - task_info_file = os.path.join(task_path, 'task_info.yml') - task = Task.from_dict(task_info_file) - else: - task = Task.from_txt_csv(task_path) + task = Task.from_txt_csv(task_path) tasks.append(task) else: tasks = [] From a7f992e6f5c2de82b820d597ef74eaad7b0b29b3 Mon Sep 17 00:00:00 2001 From: AymanBx Date: Thu, 26 Feb 2026 07:54:09 +0000 Subject: [PATCH 2/4] CLI: unifying Bench object loading for side commands like add-task and run-task --- benchtools/cli.py | 52 +++++++++++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/benchtools/cli.py b/benchtools/cli.py index 3b67c3b..eac2613 100644 --- a/benchtools/cli.py +++ b/benchtools/cli.py @@ -100,14 +100,22 @@ def init(benchmark_name, path, about, no_git): @click.option('-s','--task-source', type=str,help="The relative path to content that already exists`", required=True) @click.option('-t','--task-type', type=click.Choice(['folders', 'list']), help="The type of the task content being added. Options are csv or yml", required=True) -def add_task(task_name, bench_path, task_source,task_type): +def add_task(task_name, benchmark_path, task_source,task_type): """ Set up a new task. """ - if os.path.exists(bench_path): - benchmark = Bench.load(bench_path) + # check folder to see if folder or yaml type to load benchmark + if os.path.isdir(benchmark_path): + content = os.listdir(benchmark_path) + if 'info.yml' in content: + benchmark = Bench.from_yaml(benchmark_path) + else: + benchmark = Bench.from_folders(benchmark_path) + + if benchmark.written: + # Create Task object if task_source: if os.path.isdir(task_source): task = Task.from_txt_csv(task_source) @@ -120,17 +128,17 @@ def add_task(task_name, bench_path, task_source,task_type): case 'list': storage_type = 'yaml' task = Task.from_example(name=task_name, storage_type=storage_type) - task.write() + # task.write() else: click.echo("Invalid task content type. Either provide content with --task-source or specify the type of task content with --type.") exit(4356) - # TODO: handle adding to benchmark with metadata - # benchmark.add_task(task) - task.write(bench_path) + # Add Task to Bench + benchmark.add_task(task) click.echo(f"Added {task_name} to {benchmark.bench_name} benchmark successfully!") - else: - click.echo("No benchmark reposiory at " + bench_path) + + else: + click.echo(f"path {benchmark_path} doesn't seem to have a benchmark in it") @benchtool.command() @@ -158,16 +166,19 @@ def run_task(benchmark_path: str, task_name, runner_type, model, api_url, log_pa # check folder to see if folder or yaml type to load benchmark if os.path.isdir(benchmark_path): content = os.listdir(benchmark_path) - if 'tasks.yml' in content: + if 'info.yml' in content: benchmark = Bench.from_yaml(benchmark_path) else: benchmark = Bench.from_folders(benchmark_path) - - click.echo(f"Running {task_name} now") - benchmark.run_task(task_name, runner, log_path) + + if benchmark.bench_name: + click.echo(f"Running {task_name} of benchmark {benchmark.bench_name} now") + benchmark.run_task(task_name, runner, log_path) + else: + click.echo(f"path {benchmark_path} doesn't seem to have a benchmark in it") @benchtool.command() -@click.argument('benchmark-path', required = True, type=str) +@click.argument('benchmark-path', required = False, type=str, default='.') @click.option('-r', '--runner-type', type=click.Choice(['ollama', 'openai', 'aws']), default="ollama", help="The engine that will run your LLM.") @click.option('-m', '--model', type=str, default="gemma3", @@ -179,7 +190,8 @@ def run_task(benchmark_path: str, task_name, runner_type, model, api_url, log_pa def run(benchmark_path: str, runner_type: str, model: str, api_url: str, log_path: str): """ Running the benchmark and generating logs - , help="The path to the benchmark repository where all the task reside." + Parameters: + benchmark-path: The path to the benchmark repository where all the task reside. """ # Create BenchRunner object runner = BenchRunner(runner_type, model, api_url) @@ -187,12 +199,16 @@ def run(benchmark_path: str, runner_type: str, model: str, api_url: str, log_pat # check folder to see if folder or yaml type to load benchmark if os.path.isdir(benchmark_path): content = os.listdir(benchmark_path) - if 'tasks.yml' in content: + if 'info.yml' in content: benchmark = Bench.from_yaml(benchmark_path) else: benchmark = Bench.from_folders(benchmark_path) - click.echo(f"Running {benchmark.bench_name} now") - benchmark.run(runner, log_path) + + if benchmark.bench_name: + click.echo(f"Running {benchmark.bench_name} now") + benchmark.run(runner, log_path) + else: + click.echo(f"path {benchmark_path} doesn't seem to have a benchmark in it") @click.group() From 10ae84ce2672809df48aa5d90dfc189f730aa26f Mon Sep 17 00:00:00 2001 From: Ayman Sandouk <111829133+AymanBx@users.noreply.github.com> Date: Fri, 27 Feb 2026 14:25:08 -0500 Subject: [PATCH 3/4] Apply suggestions from code review Co-authored-by: Sarah Brown --- benchtools/cli.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/benchtools/cli.py b/benchtools/cli.py index eac2613..05ee09b 100644 --- a/benchtools/cli.py +++ b/benchtools/cli.py @@ -133,7 +133,7 @@ def add_task(task_name, benchmark_path, task_source,task_type): click.echo("Invalid task content type. Either provide content with --task-source or specify the type of task content with --type.") exit(4356) - # Add Task to Bench + # Add Task to Bench, will write as well benchmark.add_task(task) click.echo(f"Added {task_name} to {benchmark.bench_name} benchmark successfully!") @@ -166,7 +166,7 @@ def run_task(benchmark_path: str, task_name, runner_type, model, api_url, log_pa # check folder to see if folder or yaml type to load benchmark if os.path.isdir(benchmark_path): content = os.listdir(benchmark_path) - if 'info.yml' in content: + if 'tasks.yml' in content: benchmark = Bench.from_yaml(benchmark_path) else: benchmark = Bench.from_folders(benchmark_path) @@ -199,7 +199,7 @@ def run(benchmark_path: str, runner_type: str, model: str, api_url: str, log_pat # check folder to see if folder or yaml type to load benchmark if os.path.isdir(benchmark_path): content = os.listdir(benchmark_path) - if 'info.yml' in content: + if 'tasks.yml' in content: benchmark = Bench.from_yaml(benchmark_path) else: benchmark = Bench.from_folders(benchmark_path) From ecc31a78b29db9cb0db58416a2ac37ca1f1b14ed Mon Sep 17 00:00:00 2001 From: AymanBx Date: Fri, 6 Mar 2026 18:21:43 +0000 Subject: [PATCH 4/4] Bench: Adding load method that checks tasks.yaml or task folders. Applying to CLI --- benchtools/benchmark.py | 30 ++++++++++++++++ benchtools/cli.py | 78 ++++++++++++++--------------------------- 2 files changed, 57 insertions(+), 51 deletions(-) diff --git a/benchtools/benchmark.py b/benchtools/benchmark.py index 56ce7d4..698e4c3 100644 --- a/benchtools/benchmark.py +++ b/benchtools/benchmark.py @@ -161,6 +161,36 @@ def from_yaml(cls, bench_path): return cls(name = info['bench_name'], bench_path =bench_path, concept= info['concept'], tasks=tasks) + @classmethod + def load(cls, bench_path): + ''' + Load a benchmark object from a given path. + If the path given is has a yaml tasks file, load tasks from + it and generate Task objects and add them to the bench. + Otherwise load the bench object from existing task folders. + + Parameters: + ----------- + bench_path: str + The path to the benchmark folder. The folder should contain the about.md file, + tasks.yaml file or tasks folder. + + Returns: + -------- + Bench + An instance of the Bench class with the loaded benchmark. + ''' + # check folder to see if folder or yaml type to load benchmark + if not os.path.isdir(benchmark_path): + raise ValueError("The passed path doesn't exist.") + else: + content = os.listdir(benchmark_path) + if 'tasks.yml' in content: + return cls.from_yaml(benchmark_path) + else: + return cls.from_folders(benchmark_path) + + @staticmethod def load_info(bench_path): with open(os.path.join(bench_path, 'info.yml'), 'r') as f: diff --git a/benchtools/cli.py b/benchtools/cli.py index 05ee09b..b006371 100644 --- a/benchtools/cli.py +++ b/benchtools/cli.py @@ -106,39 +106,30 @@ def add_task(task_name, benchmark_path, task_source,task_type): """ - # check folder to see if folder or yaml type to load benchmark - if os.path.isdir(benchmark_path): - content = os.listdir(benchmark_path) - if 'info.yml' in content: - benchmark = Bench.from_yaml(benchmark_path) - else: - benchmark = Bench.from_folders(benchmark_path) + benchmark = Bench.load(bench_path) - if benchmark.written: - # Create Task object - if task_source: - if os.path.isdir(task_source): - task = Task.from_txt_csv(task_source) - elif os.path.isfile(task_source): - task = Task.from_yaml(task_source) - elif task_type: - match task_type: - case 'folders': - storage_type = 'csv' - case 'list': - storage_type = 'yaml' - task = Task.from_example(name=task_name, storage_type=storage_type) - # task.write() - else: - click.echo("Invalid task content type. Either provide content with --task-source or specify the type of task content with --type.") - exit(4356) + # Create Task object + if task_source: + if os.path.isdir(task_source): + task = Task.from_txt_csv(task_source) + elif os.path.isfile(task_source): + task = Task.from_yaml(task_source) + elif task_type: + match task_type: + case 'folders': + storage_type = 'csv' + case 'list': + storage_type = 'yaml' + task = Task.from_example(name=task_name, storage_type=storage_type) + # task.write() + else: + click.echo("Invalid task content type. Either provide content with --task-source or specify the type of task content with --type.") + exit(4356) # Add Task to Bench, will write as well benchmark.add_task(task) click.echo(f"Added {task_name} to {benchmark.bench_name} benchmark successfully!") - else: - click.echo(f"path {benchmark_path} doesn't seem to have a benchmark in it") @benchtool.command() @@ -163,19 +154,11 @@ def run_task(benchmark_path: str, task_name, runner_type, model, api_url, log_pa # Create BenchRunner object runner = BenchRunner(runner_type, model, api_url) - # check folder to see if folder or yaml type to load benchmark - if os.path.isdir(benchmark_path): - content = os.listdir(benchmark_path) - if 'tasks.yml' in content: - benchmark = Bench.from_yaml(benchmark_path) - else: - benchmark = Bench.from_folders(benchmark_path) + benchmark = Bench.load(bench_path) + - if benchmark.bench_name: - click.echo(f"Running {task_name} of benchmark {benchmark.bench_name} now") - benchmark.run_task(task_name, runner, log_path) - else: - click.echo(f"path {benchmark_path} doesn't seem to have a benchmark in it") + click.echo(f"Running {task_name} of benchmark {benchmark.bench_name} now") + benchmark.run_task(task_name, runner, log_path) @benchtool.command() @click.argument('benchmark-path', required = False, type=str, default='.') @@ -196,19 +179,12 @@ def run(benchmark_path: str, runner_type: str, model: str, api_url: str, log_pat # Create BenchRunner object runner = BenchRunner(runner_type, model, api_url) - # check folder to see if folder or yaml type to load benchmark - if os.path.isdir(benchmark_path): - content = os.listdir(benchmark_path) - if 'tasks.yml' in content: - benchmark = Bench.from_yaml(benchmark_path) - else: - benchmark = Bench.from_folders(benchmark_path) + benchmark = Bench.load(bench_path) + - if benchmark.bench_name: - click.echo(f"Running {benchmark.bench_name} now") - benchmark.run(runner, log_path) - else: - click.echo(f"path {benchmark_path} doesn't seem to have a benchmark in it") + click.echo(f"Running {benchmark.bench_name} now") + benchmark.run(runner, log_path) + @click.group()