55import requests
66import yaml
77# from pathlib import Path # ???
8- from .task import Task
8+ from benchtools .task import Task
99from pathlib import PurePath
10+ from benchtools .runner import BenchRunner
11+ from .utils import load_asset
12+
1013
1114about_template = """# {bench_name}
1215
@@ -42,7 +45,7 @@ class Bench():
4245 run()
4346 Run one task or all tasks of the benchmark.
4447 '''
45- def __init__ (self , name , base_path = '.' , bench_path = None , concept = None , tasks = []):
48+ def __init__ (self , name , base_path = '.' , bench_path = None , concept = None , tasks = []):
4649 '''
4750 Initialize the benchmark object with the name and path to the benchmark folder.
4851
@@ -52,11 +55,10 @@ def __init__(self, name, base_path='.', bench_path=None, concept = None, tasks=[
5255 name of the benchmark will be used for folder
5356 path: str or buffer
5457 path where the benchmark will be stored
55-
5658 tasks: list of Task objects
57- list of tasks to be included in the benchmark. Each task should be an instance of the
58-
59+ list of tasks to be included in the benchmark. Each task should be an instance of the Task class
5960 '''
61+
6062 # set up the object attributes
6163 self .display_name = name .strip ()
6264 self .concept = concept if concept else f'a benchmark about { name .strip ()} '
@@ -66,21 +68,26 @@ def __init__(self, name, base_path='.', bench_path=None, concept = None, tasks=[
6668 self .base_path = PurePath (bench_path ).parent
6769 self .bench_path = bench_path
6870 else :
71+ # TODO: this way we don't have a base_path if above were true
6972 self .base_path = base_path
7073 self .bench_path = os .path .join (base_path , self .bench_name )
7174
7275 self .tasks_folder = os .path .join (self .bench_path , 'tasks' )
7376 if tasks :
74- self .tasks = {t .name :t for t in tasks } # initialize a task object for each task.
77+ # All task objects have to be initialized before adding them to a benchmark
78+ self .tasks = {t .name :t for t in tasks }
7579 else :
7680 self .tasks = {}
7781
82+ # Written if the benchmark directory has been initialized
7883 self .written = os .path .exists (self .bench_path )
7984
85+
8086 @classmethod
8187 def from_folders (cls , bench_path ):
8288 '''
83- Load a benchmark from a given path. The path should point to the benchmark folder.
89+ Load a benchmark object from a given path.
90+ The path should point to the benchmark folder.
8491
8592 Parameters:
8693 -----------
@@ -113,7 +120,12 @@ def from_folders(cls, bench_path):
113120 for task_dir in task_list :
114121 # load the tasks
115122 task_path = os .path .join (task_folder , task_dir )
116- task = Task .from_txt_csv (task_path )
123+ task_content = os .listdir (task_path )
124+ if 'task_info.yml' in task_content :
125+ task_info_file = os .path .join (task_path , 'task_info.yml' )
126+ task = Task .from_dict (task_info_file )
127+ else :
128+ task = Task .from_txt_csv (task_path )
117129 tasks .append (task )
118130 else :
119131 tasks = []
@@ -126,7 +138,7 @@ def from_folders(cls, bench_path):
126138 @classmethod
127139 def from_yaml (cls , bench_path ):
128140 """
129- Load tasks from a YAML file and generate Task objects.
141+ Load tasks from a YAML file and generate Task objects and add them to the bench
130142
131143 Parameters
132144 ----------
@@ -185,12 +197,9 @@ def initialize_dir(self, no_git=False):
185197 # Create a benchmarks folder with tasks in them
186198 tasks_path = os .path .join (self .bench_path , "tasks" )
187199 os .mkdir (tasks_path )
188- log_path = os .path .join (self .bench_path , "logs" ) # Do we want a log dir?
189- os .mkdir (log_path ) # Do we want a log dir?
190200
191201 # Create about.md
192202 about_path = os .path .join (self .bench_path , "about.md" )
193-
194203 about_body = f"*{ self .concept } *"
195204 about_text = about_template .format (bench_name = self .bench_name ,
196205 text = about_body )
@@ -252,36 +261,67 @@ def init_repo(self, bench_path):
252261 except :
253262 print ("git might not be initialized in your system. Please run \" git init . \" when setup" )
254263 # Get python gitignore template and create .gitignore
255- ignore_text = requests .get ("https://raw.githubusercontent.com/github/gitignore/refs/heads/main/Python.gitignore" )
256- if ignore_text .status_code == 200 :
257- with open (".gitignore" , 'a' ) as f :
258- f .write (ignore_text .text )
264+ ignore_text = load_asset ('.gitignore' )
265+ # ignore_text = requests.get("https://raw.githubusercontent.com/github/gitignore/refs/heads/main/Python.gitignore")
266+ # if ignore_text.status_code == 200:
267+ with open (".gitignore" , 'a' ) as f :
268+ f .write (ignore_text )
259269 os .chdir (current_dir )
260270
261271
262- def add_task (self , task_object ):
263- # TODO: Look at content to create Task objects and add them to tasks
264- # setup_task(self.tasks_folder, task_name, task_source))
272+ def add_task (self , task_object :Task ):
265273
266- # self.tasks.append( task)
274+ # Add task object to bench's tasks
267275 self .tasks [task_object .name ] = task_object
268276
277+ # Check if written or not to write the task in the directory
278+ if self .written :
279+ task_folder = os .path .join (self .tasks_folder , task_object .id )
280+ if not os .path .exists (task_folder ):
281+ os .mkdir (task_folder )
282+ else :
283+ # TODO: What happens if true?
284+ pass
285+ task_object .write (task_folder )
269286
270- def run (self ,model = 'gemma3' ,runner_type = "ollama" , api_url = None ,):
287+
288+ def run (self , runner = BenchRunner (), log_dir = None ):
271289 '''
272290 Run the benchmark by running each task in the benchmark and logging the interactions.
273291 Parameters:
274292 -----------
275- model: str default 'gemma3'
276- The name of the model to use for running the tasks. Default is 'gemma3'.
293+ runner: BenchRunner
294+ define which runner should be used for the task.
295+
296+ runner.model : string
297+ the model to run the task on
298+ runner.api_url : string
299+ the url of the api to use for the task
300+ runner.runner_type: {ollama,openai}
301+ to use the Ollama runner, the script expects the model to be installed, and `ollama serve` running on localhost:11434
302+ to use OpenAI runner, you must have an API key set in your OPENAI_API_KEY environment variable
303+ log_dir: str
304+ Path to where the logs should be saved
277305 '''
278- if not self .written :
306+ if not log_dir and not self .written :
279307 raise ValueError ("Benchmark has not been written to disk yet, need to write in order to log." )
280- # TODO deal with results
308+
309+ # Run each task
281310 for name , task in self .tasks .items ():
282- self .run_task (task , model ,runner_type , api_url )
311+ self .run_task (task , runner , log_dir )
312+
313+
314+ def run_task (self , target_task = None , runner = BenchRunner (), log_dir = None ):
315+ '''
316+ run a specific task
317+ '''
318+ if not log_dir and not self .written :
319+ raise ValueError ("Benchmark has not been written to disk yet, need to write in order to log." )
320+
321+ # If user doesn't specify a log_dir, default to logs folder inside bench folder
322+ if not log_dir :
323+ log_dir = os .path .join (self .bench_path , 'logs' )
283324
284- def run_task (self , target_task = None , model = 'gemma3' ,runner_type = "ollama" , api_url = None ):
285325 if not (target_task ):
286326 # TODO: use a generator and make this have a state
287327 target_task = list [self .tasks .keys ()][0 ]
@@ -292,8 +332,9 @@ def run_task(self, target_task=None, model='gemma3',runner_type="ollama", api_ur
292332 task_object = target_task
293333 else :
294334 raise ValueError ("target_task should be either a string (task name) or a Task object." )
335+
336+ # TODO: Add log_dir to attributes?
295337
296- logging_path = os .path .join (self .bench_path , 'logs' )
297- return task_object .run (model ,runner_type , api_url ,logging_path )
338+ return task_object .run (runner , log_dir , self .bench_name , self .bench_path )
298339
299340
0 commit comments