From a2e8d7bafda1ea0341e561c64c388386d7d4ffb6 Mon Sep 17 00:00:00 2001 From: AymanBx Date: Tue, 24 Feb 2026 07:25:41 +0000 Subject: [PATCH 1/3] Small bug --- benchtools/benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchtools/benchmark.py b/benchtools/benchmark.py index bf01989..cec39e7 100644 --- a/benchtools/benchmark.py +++ b/benchtools/benchmark.py @@ -212,7 +212,7 @@ def initialize_dir(self, no_git=False): # store tasks task_types = set([task.storage_type for task in self.tasks.values()]) if 'csv' in task_types: - os.mkdir(self.bench_path,'tasks') + os.mkdir(os.path.join(self.bench_path,'tasks')) for task_name, task_object in self.tasks.items(): task_object.write(self.bench_path) From 104e57611a4195ecdfcf591cd3cf888e52a04983 Mon Sep 17 00:00:00 2001 From: AymanBx Date: Tue, 24 Feb 2026 07:28:08 +0000 Subject: [PATCH 2/3] task.py: adding bedrock-runtime as a runner option --- benchtools/task.py | 38 ++++++++++++++++++++++++++++++++++---- pyproject.toml | 3 ++- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/benchtools/task.py b/benchtools/task.py index 4988e40..343ac43 100644 --- a/benchtools/task.py +++ b/benchtools/task.py @@ -1,7 +1,9 @@ # defines a class object for a task # from openai import OpenAI import os -import yaml # requires pyyaml +import yaml +import json +import boto3 import pandas as pd from ollama import chat, ChatResponse, Client from benchtools.logger import init_log_folder, log_interaction @@ -204,11 +206,18 @@ def generate_prompts(self): # TODO: consider if this could be a generator function if there are a lot of variants, to avoid memory issues. For now, we will assume that the number of variants is small enough to generate all prompts at once. if self.variant_values: id_prompt_list = [] - for value_set in self.variant_values: + + keys = self.variant_values.keys() + + for i in range(len(list(self.variant_values.values())[0])): + single_dict={} prompt = self.template - prompt = prompt.format(**value_set) - prompt_id = self.prompt_id_generator(self.task_id,value_set) + for key in keys: + single_dict.update({key: self.variant_values[key][i]}) + prompt = prompt.format(**single_dict) + prompt_id = self.prompt_id_generator(self.task_id,single_dict) id_prompt_list.append((prompt_id,prompt)) + return id_prompt_list else: return [(self.name, self.template)] @@ -260,6 +269,9 @@ def write_csv(self, target_folder): ''' write the task to a csv file with a task.txt template file ''' + # Create task folder + os.mkdir(os.path.join(target_folder, self.task_id)) + # write the template with open(os.path.join(target_folder,self.task_id, 'template.txt'), 'w') as f: f.write(self.template) @@ -358,6 +370,24 @@ def run(self, runner=BenchRunner(), log_dir='logs', benchmark=None, bench_path=N ) response = chat_completion.choices[0].message.content responses.append(response) + case "bedrock": + bedrock_client = boto3.client('bedrock-runtime') + completeion = bedrock_client.invoke_model( + modelId = runner.model, + body = json.dumps( + { + 'messages': [ + { + 'role': 'user', + 'content': sub_task + } + ] + } + ) + ) + response = json.loads(completeion['body'].read()) + response = response['choices'][0]['message']['content'] + responses.append(response) case _: print(f"Runner type {runner.runner_type} not supported") return None diff --git a/pyproject.toml b/pyproject.toml index 6d1d33e..cb19c90 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,8 @@ dependencies = [ "pandas", "datasets", "openai", - "ollama" + "ollama", + "boto3" ] requires-python = ">=3.10" authors = [ From 545ab306b907a4aab758bfff2abc65c1d83ed443 Mon Sep 17 00:00:00 2001 From: AymanBx Date: Tue, 24 Feb 2026 08:17:23 +0000 Subject: [PATCH 3/3] changing runner option to bedrock --- benchtools/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchtools/cli.py b/benchtools/cli.py index 3b67c3b..f0cc65e 100644 --- a/benchtools/cli.py +++ b/benchtools/cli.py @@ -136,7 +136,7 @@ def add_task(task_name, bench_path, task_source,task_type): @benchtool.command() @click.argument('benchmark-path', required = True, type=str) @click.argument('task_name', required = True) -@click.option('-r', '--runner-type', type=click.Choice(['ollama', 'openai', 'aws']), +@click.option('-r', '--runner-type', type=click.Choice(['ollama', 'openai', 'bedrock']), default="ollama", help="The engine that will run your LLM.") @click.option('-m', '--model', type=str, default="gemma3", help="The LLM to be benchmarked.") @@ -168,7 +168,7 @@ def run_task(benchmark_path: str, task_name, runner_type, model, api_url, log_pa @benchtool.command() @click.argument('benchmark-path', required = True, type=str) -@click.option('-r', '--runner-type', type=click.Choice(['ollama', 'openai', 'aws']), +@click.option('-r', '--runner-type', type=click.Choice(['ollama', 'openai', 'bedrock']), default="ollama", help="The engine that will run your LLM.") @click.option('-m', '--model', type=str, default="gemma3", help="The LLM to be benchmarked.")