Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benchtools/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def initialize_dir(self, no_git=False):
# store tasks
task_types = set([task.storage_type for task in self.tasks.values()])
if 'csv' in task_types:
os.mkdir(self.bench_path,'tasks')
os.mkdir(os.path.join(self.bench_path,'tasks'))
for task_name, task_object in self.tasks.items():
task_object.write(self.bench_path)

Expand Down
4 changes: 2 additions & 2 deletions benchtools/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def add_task(task_name, bench_path, task_source,task_type):
@benchtool.command()
@click.argument('benchmark-path', required = True, type=str)
@click.argument('task_name', required = True)
@click.option('-r', '--runner-type', type=click.Choice(['ollama', 'openai', 'aws']),
@click.option('-r', '--runner-type', type=click.Choice(['ollama', 'openai', 'bedrock']),
default="ollama", help="The engine that will run your LLM.")
@click.option('-m', '--model', type=str, default="gemma3",
help="The LLM to be benchmarked.")
Expand Down Expand Up @@ -168,7 +168,7 @@ def run_task(benchmark_path: str, task_name, runner_type, model, api_url, log_pa

@benchtool.command()
@click.argument('benchmark-path', required = True, type=str)
@click.option('-r', '--runner-type', type=click.Choice(['ollama', 'openai', 'aws']),
@click.option('-r', '--runner-type', type=click.Choice(['ollama', 'openai', 'bedrock']),
default="ollama", help="The engine that will run your LLM.")
@click.option('-m', '--model', type=str, default="gemma3",
help="The LLM to be benchmarked.")
Expand Down
38 changes: 34 additions & 4 deletions benchtools/task.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# defines a class object for a task
# from openai import OpenAI
import os
import yaml # requires pyyaml
import yaml
import json
import boto3
import pandas as pd
from ollama import chat, ChatResponse, Client
from benchtools.logger import init_log_folder, log_interaction
Expand Down Expand Up @@ -204,11 +206,18 @@ def generate_prompts(self):
# TODO: consider if this could be a generator function if there are a lot of variants, to avoid memory issues. For now, we will assume that the number of variants is small enough to generate all prompts at once.
if self.variant_values:
id_prompt_list = []
for value_set in self.variant_values:

keys = self.variant_values.keys()

for i in range(len(list(self.variant_values.values())[0])):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no for i in range loops, that's not pythonic style and is very hard to parse

single_dict={}
prompt = self.template
prompt = prompt.format(**value_set)
prompt_id = self.prompt_id_generator(self.task_id,value_set)
for key in keys:
single_dict.update({key: self.variant_values[key][i]})
prompt = prompt.format(**single_dict)
prompt_id = self.prompt_id_generator(self.task_id,single_dict)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no. if the thing isn't working then it's becaue data got loaded wrong. this makes no sense

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tested it before and after. Before it was giving an error about the passed data not being Map data (not verbatim).

I looked up the format method and from what I saw it takes a simple dict object with key-value. I didn't see an instance where it took a dict of key-[list of values].

unless I didin't understand what exactly you were trying to do...

id_prompt_list.append((prompt_id,prompt))

return id_prompt_list
else:
return [(self.name, self.template)]
Expand Down Expand Up @@ -260,6 +269,9 @@ def write_csv(self, target_folder):
'''
write the task to a csv file with a task.txt template file
'''
# Create task folder
os.mkdir(os.path.join(target_folder, self.task_id))

# write the template
with open(os.path.join(target_folder,self.task_id, 'template.txt'), 'w') as f:
f.write(self.template)
Expand Down Expand Up @@ -358,6 +370,24 @@ def run(self, runner=BenchRunner(), log_dir='logs', benchmark=None, bench_path=N
)
response = chat_completion.choices[0].message.content
responses.append(response)
case "bedrock":
bedrock_client = boto3.client('bedrock-runtime')
completeion = bedrock_client.invoke_model(
modelId = runner.model,
body = json.dumps(
{
'messages': [
{
'role': 'user',
'content': sub_task
}
]
}
)
)
response = json.loads(completeion['body'].read())
response = response['choices'][0]['message']['content']
responses.append(response)
case _:
print(f"Runner type {runner.runner_type} not supported")
return None
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ dependencies = [
"pandas",
"datasets",
"openai",
"ollama"
"ollama",
"boto3"
]
requires-python = ">=3.10"
authors = [
Expand Down