Skip to content

Commit 40176c3

Browse files
authored
Merge pull request #46 from ml4sts/32-logging
Logger: initial logging algorithm
2 parents 29a220e + e18d4bc commit 40176c3

8 files changed

Lines changed: 361 additions & 246 deletions

File tree

benchtools/benchmark.py

Lines changed: 70 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,11 @@
55
import requests
66
import yaml
77
# from pathlib import Path # ???
8-
from .task import Task
8+
from benchtools.task import Task
99
from pathlib import PurePath
10+
from benchtools.runner import BenchRunner
11+
from .utils import load_asset
12+
1013

1114
about_template = """# {bench_name}
1215
@@ -42,7 +45,7 @@ class Bench():
4245
run()
4346
Run one task or all tasks of the benchmark.
4447
'''
45-
def __init__(self, name, base_path='.', bench_path=None, concept = None, tasks=[]):
48+
def __init__(self, name, base_path='.', bench_path=None, concept=None, tasks=[]):
4649
'''
4750
Initialize the benchmark object with the name and path to the benchmark folder.
4851
@@ -52,11 +55,10 @@ def __init__(self, name, base_path='.', bench_path=None, concept = None, tasks=[
5255
name of the benchmark will be used for folder
5356
path: str or buffer
5457
path where the benchmark will be stored
55-
5658
tasks: list of Task objects
57-
list of tasks to be included in the benchmark. Each task should be an instance of the
58-
59+
list of tasks to be included in the benchmark. Each task should be an instance of the Task class
5960
'''
61+
6062
# set up the object attributes
6163
self.display_name = name.strip()
6264
self.concept = concept if concept else f'a benchmark about {name.strip()}'
@@ -66,21 +68,26 @@ def __init__(self, name, base_path='.', bench_path=None, concept = None, tasks=[
6668
self.base_path = PurePath(bench_path).parent
6769
self.bench_path = bench_path
6870
else:
71+
# TODO: this way we don't have a base_path if above were true
6972
self.base_path = base_path
7073
self.bench_path = os.path.join(base_path, self.bench_name)
7174

7275
self.tasks_folder = os.path.join(self.bench_path, 'tasks')
7376
if tasks:
74-
self.tasks = {t.name:t for t in tasks} # initialize a task object for each task.
77+
# All task objects have to be initialized before adding them to a benchmark
78+
self.tasks = {t.name:t for t in tasks}
7579
else:
7680
self.tasks = {}
7781

82+
# Written if the benchmark directory has been initialized
7883
self.written = os.path.exists(self.bench_path)
7984

85+
8086
@classmethod
8187
def from_folders(cls, bench_path):
8288
'''
83-
Load a benchmark from a given path. The path should point to the benchmark folder.
89+
Load a benchmark object from a given path.
90+
The path should point to the benchmark folder.
8491
8592
Parameters:
8693
-----------
@@ -113,7 +120,12 @@ def from_folders(cls, bench_path):
113120
for task_dir in task_list:
114121
# load the tasks
115122
task_path = os.path.join(task_folder, task_dir)
116-
task = Task.from_txt_csv(task_path)
123+
task_content = os.listdir(task_path)
124+
if 'task_info.yml' in task_content:
125+
task_info_file = os.path.join(task_path, 'task_info.yml')
126+
task = Task.from_dict(task_info_file)
127+
else:
128+
task = Task.from_txt_csv(task_path)
117129
tasks.append(task)
118130
else:
119131
tasks = []
@@ -126,7 +138,7 @@ def from_folders(cls, bench_path):
126138
@classmethod
127139
def from_yaml(cls, bench_path):
128140
"""
129-
Load tasks from a YAML file and generate Task objects.
141+
Load tasks from a YAML file and generate Task objects and add them to the bench
130142
131143
Parameters
132144
----------
@@ -185,12 +197,9 @@ def initialize_dir(self, no_git=False):
185197
# Create a benchmarks folder with tasks in them
186198
tasks_path = os.path.join(self.bench_path, "tasks")
187199
os.mkdir(tasks_path)
188-
log_path = os.path.join(self.bench_path, "logs") # Do we want a log dir?
189-
os.mkdir(log_path) # Do we want a log dir?
190200

191201
# Create about.md
192202
about_path = os.path.join(self.bench_path, "about.md")
193-
194203
about_body = f"*{self.concept}*"
195204
about_text= about_template.format(bench_name=self.bench_name,
196205
text = about_body)
@@ -252,36 +261,67 @@ def init_repo(self, bench_path):
252261
except:
253262
print("git might not be initialized in your system. Please run \"git init . \" when setup")
254263
# Get python gitignore template and create .gitignore
255-
ignore_text = requests.get("https://raw.githubusercontent.com/github/gitignore/refs/heads/main/Python.gitignore")
256-
if ignore_text.status_code == 200:
257-
with open(".gitignore", 'a') as f:
258-
f.write(ignore_text.text)
264+
ignore_text = load_asset('.gitignore')
265+
# ignore_text = requests.get("https://raw.githubusercontent.com/github/gitignore/refs/heads/main/Python.gitignore")
266+
# if ignore_text.status_code == 200:
267+
with open(".gitignore", 'a') as f:
268+
f.write(ignore_text)
259269
os.chdir(current_dir)
260270

261271

262-
def add_task(self, task_object):
263-
# TODO: Look at content to create Task objects and add them to tasks
264-
# setup_task(self.tasks_folder, task_name, task_source))
272+
def add_task(self, task_object:Task):
265273

266-
# self.tasks.append(task)
274+
# Add task object to bench's tasks
267275
self.tasks[task_object.name] = task_object
268276

277+
# Check if written or not to write the task in the directory
278+
if self.written:
279+
task_folder = os.path.join(self.tasks_folder, task_object.id)
280+
if not os.path.exists(task_folder):
281+
os.mkdir(task_folder)
282+
else:
283+
# TODO: What happens if true?
284+
pass
285+
task_object.write(task_folder)
269286

270-
def run(self,model='gemma3',runner_type="ollama", api_url=None,):
287+
288+
def run(self, runner=BenchRunner(), log_dir=None):
271289
'''
272290
Run the benchmark by running each task in the benchmark and logging the interactions.
273291
Parameters:
274292
-----------
275-
model: str default 'gemma3'
276-
The name of the model to use for running the tasks. Default is 'gemma3'.
293+
runner: BenchRunner
294+
define which runner should be used for the task.
295+
296+
runner.model : string
297+
the model to run the task on
298+
runner.api_url : string
299+
the url of the api to use for the task
300+
runner.runner_type: {ollama,openai}
301+
to use the Ollama runner, the script expects the model to be installed, and `ollama serve` running on localhost:11434
302+
to use OpenAI runner, you must have an API key set in your OPENAI_API_KEY environment variable
303+
log_dir: str
304+
Path to where the logs should be saved
277305
'''
278-
if not self.written:
306+
if not log_dir and not self.written:
279307
raise ValueError("Benchmark has not been written to disk yet, need to write in order to log.")
280-
# TODO deal with results
308+
309+
# Run each task
281310
for name, task in self.tasks.items():
282-
self.run_task(task, model,runner_type, api_url)
311+
self.run_task(task, runner, log_dir)
312+
313+
314+
def run_task(self, target_task=None, runner=BenchRunner(), log_dir=None):
315+
'''
316+
run a specific task
317+
'''
318+
if not log_dir and not self.written:
319+
raise ValueError("Benchmark has not been written to disk yet, need to write in order to log.")
320+
321+
# If user doesn't specify a log_dir, default to logs folder inside bench folder
322+
if not log_dir:
323+
log_dir = os.path.join(self.bench_path, 'logs')
283324

284-
def run_task(self, target_task=None, model='gemma3',runner_type="ollama", api_url=None):
285325
if not(target_task):
286326
# TODO: use a generator and make this have a state
287327
target_task = list[self.tasks.keys()][0]
@@ -292,8 +332,9 @@ def run_task(self, target_task=None, model='gemma3',runner_type="ollama", api_ur
292332
task_object = target_task
293333
else:
294334
raise ValueError("target_task should be either a string (task name) or a Task object.")
335+
336+
# TODO: Add log_dir to attributes?
295337

296-
logging_path = os.path.join(self.bench_path, 'logs')
297-
return task_object.run(model,runner_type, api_url,logging_path)
338+
return task_object.run(runner, log_dir, self.bench_name, self.bench_path)
298339

299340

benchtools/betterbench.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,7 @@ def better_session(bench_path) -> dict:
9494
# Loop until user opts out
9595
for question, criteria in main_checklist.items():
9696
# TODO: add if(bench_checklist[skipped])
97-
# print(question) # DEbugging
98-
# # print(vals)
97+
9998
if len(criteria) == 4:
10099
choice = click.prompt(f"{question}?\nEnter to skip. q to end this session...", type=click.Choice(["yes", "no", 'q', ''], case_sensitive=False), show_choices=True, default='')
101100
else:
@@ -114,7 +113,7 @@ def better_session(bench_path) -> dict:
114113
score=0,
115114
)
116115
bench_checklist[question] = yaml.dump(item)
117-
print(bench_checklist[question])
116+
click.echo(bench_checklist[question])
118117
case 'yes':
119118
score = click.prompt(f"Please pick score level:\n0- {criteria[0]}\n5- {criteria[1]}\n10- {criteria[2]}\n15- {criteria[3]}\n", type=click.Choice([0, 5, 10, 15]), show_choices=True, default=5)
120119
justification = click.prompt("Justification? ")
@@ -125,7 +124,7 @@ def better_session(bench_path) -> dict:
125124
score=score,
126125
)
127126
bench_checklist[question] = yaml.dump(item)
128-
print(bench_checklist[question])
127+
click.echo(bench_checklist[question])
129128
case '':
130129
continue
131130

@@ -137,7 +136,7 @@ def better_session(bench_path) -> dict:
137136

138137

139138

140-
print(checklist_path) #debugging
139+
141140
# Save current checklist into the benchmark repo
142141
if os.path.exists(checklist_path):
143142
with open(checklist_path, 'w') as f:

benchtools/cli.py

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,9 @@ def init(benchmark_name, path, about, no_git):
9797
@benchtool.command()
9898
@click.argument('task-name', required = True, type=str, )
9999
@click.option('-p','--benchmark-path', default='.', help="The path to the benchmark repository where the task will be added.", type=str)
100-
@click.option('-s','task-source', type=str,help="The relative path to content that already exists`", required=True)
101-
@click.option('-t','--task-type', type=click.Choice(['folders', 'list']), help="The type of the task content being added. Options are csv or yml", required=True)
100+
@click.option('-s','--task-source', type=str,help="The relative path to content that already exists`", required=True)
101+
@click.option('-t','--task-type', type=click.Choice(['folders', 'list']),
102+
help="The type of the task content being added. Options are csv or yml", required=True)
102103
def add_task(task_name, bench_path, task_source,task_type):
103104
"""
104105
Set up a new task.
@@ -135,25 +136,54 @@ def add_task(task_name, bench_path, task_source,task_type):
135136
@benchtool.command()
136137
@click.argument('benchmark-path', required = True, type=str)
137138
@click.argument('task_name', required = True)
138-
def run_task(benchmark_path: str, task_name):
139+
@click.option('-r', '--runner-type', type=click.Choice(['ollama', 'openai', 'aws']),
140+
default="ollama", help="The engine that will run your LLM.")
141+
@click.option('-m', '--model', type=str, default="gemma3",
142+
help="The LLM to be benchmarked.")
143+
@click.option('-a', '--api-url', type=str, default=None,
144+
help="The api call required to access the runner engine.")
145+
@click.option('-l', '--log-path', type=str, default=None,
146+
help="The path to a log directory.")
147+
def run_task(benchmark_path: str, task_name, runner_type, model, api_url, log_path):
139148
"""
140149
Running the tasks and generating logs
141150
142151
, help="The path to the benchmark repository where all the task reside."
143152
, help="The name of the specific task you would like to run"
144153
"""
145154

146-
benchmark = Bench.load(benchmark_path)
155+
# Create BenchRunner object
156+
runner = BenchRunner(runner_type, model, api_url)
157+
158+
# check folder to see if folder or yaml type to load benchmark
159+
if os.path.isdir(benchmark_path):
160+
content = os.listdir(benchmark_path)
161+
if 'tasks.yml' in content:
162+
benchmark = Bench.from_yaml(benchmark_path)
163+
else:
164+
benchmark = Bench.from_folders(benchmark_path)
165+
147166
click.echo(f"Running {task_name} now")
148-
benchmark.run([task_name])
167+
benchmark.run_task(task_name, runner, log_path)
149168

150169
@benchtool.command()
151170
@click.argument('benchmark-path', required = True, type=str)
152-
def run(benchmark_path: str):
171+
@click.option('-r', '--runner-type', type=click.Choice(['ollama', 'openai', 'aws']),
172+
default="ollama", help="The engine that will run your LLM.")
173+
@click.option('-m', '--model', type=str, default="gemma3",
174+
help="The LLM to be benchmarked.")
175+
@click.option('-a', '--api-url', type=str, default=None,
176+
help="The api call required to access the runner engine.")
177+
@click.option('-l', '--log-path', type=str, default=None,
178+
help="The path to a log directory.")
179+
def run(benchmark_path: str, runner_type: str, model: str, api_url: str, log_path: str):
153180
"""
154181
Running the benchmark and generating logs
155182
, help="The path to the benchmark repository where all the task reside."
156183
"""
184+
# Create BenchRunner object
185+
runner = BenchRunner(runner_type, model, api_url)
186+
157187
# check folder to see if folder or yaml type to load benchmark
158188
if os.path.isdir(benchmark_path):
159189
content = os.listdir(benchmark_path)
@@ -162,7 +192,7 @@ def run(benchmark_path: str):
162192
else:
163193
benchmark = Bench.from_folders(benchmark_path)
164194
click.echo(f"Running {benchmark.bench_name} now")
165-
benchmark.run()
195+
benchmark.run(runner, log_path)
166196

167197

168198
@click.group()

0 commit comments

Comments
 (0)