-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun.py
More file actions
87 lines (64 loc) · 3.63 KB
/
run.py
File metadata and controls
87 lines (64 loc) · 3.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import subprocess
import os
import pandas as pd
def submit_job(job_script):
try:
# Submit the job using sbatch
f = subprocess.run(['sbatch', job_script], check=True , capture_output=True, text=True)
print(f)
# Get the Job ID
job_id = f.stdout.split()[-1]
return job_id
except Exception as e:
print(f"An error occurred while submitting {job_script}: {e}")
if __name__ == "__main__":
c = pd.read_csv('configurations.csv', comment='#',
dtype={'prefix':str,
'nodelist':str,
'nsteps':int, 'backend':str, 'caware':int, 'order':int, 'precision':str,
'nnodes':int, 'nparts-per-node':int,
'etype':str, 'nelements':int,
'partition':str, 'gpu':str
},
skipinitialspace=True,
)
# List to store all the Job IDs
job_ids = []
# Iterate over the list of scripts and submit each one
for prefix, partition, gpu, nodelist, nsteps, backend, caware, order, precision, nnodes, ntasks, etype, nelements, in \
zip( c['prefix'], c['partition'],c['gpu'],c['nodelist'], c['nsteps'], c['backend'], c['caware'], c['order'], c['precision'], c['nnodes'], c['nparts-per-node'],c['etype'],c['nelements'],):
if 'spitfire-ng' in nodelist:
cluster = 'spitfire'
base_dir = "/mnt/share/sambit98/EFFORT_BENCHMARK/benchmark/"
elif 'ac' in nodelist:
cluster = 'ACES'
base_dir = "/scratch/user/u.sm121949/EFFORT_BENCHMARK/benchmark/"
elif 'fc' in nodelist:
cluster = 'FASTER'
base_dir = "/scratch/user/sambit98/EFFORT_BENCHMARK/benchmark/"
elif 'g' in nodelist:
cluster = 'Grace'
base_dir = "/scratch/user/sambit98/EFFORT_BENCHMARK/benchmark/"
else: raise ValueError(f"Cluster not supported")
soln_dir = base_dir + "solns/"
script_dir = base_dir + "scripts/"
sbatch_script = f"{prefix}partition{partition}_gpu{gpu}_nodelist{nodelist}_steps{nsteps}_backend{backend}_caware{caware}_order{order}_precision{precision}_nodes{nnodes}_tasks{ntasks*nnodes}_{etype}{nelements}.sh"
script_name = os.path.basename(sbatch_script)
# The directory name is the same as the script name without the extension
job_directory = script_name.split('.')[0]
print(f"Does the directory for {script_name} exist? \t", end="")
# Check if the directory has been created, if not then submit the job
if not os.path.exists(soln_dir+job_directory):
print("NO")
# Create the directory and go there.
os.makedirs(soln_dir+job_directory)
os.chdir(soln_dir+job_directory)
job_id = submit_job(script_dir+sbatch_script)
# Append the Job ID to the list
with open(f"{prefix}partition{partition}_gpu{gpu}_steps{nsteps}_backend{backend}_caware{caware}_order{order}_precision{precision}_job_ids_{pd.Timestamp.now().strftime('%Y-%m-%d_%H-%M')}.txt", 'a') as file:
file.write(f"nodelist{nodelist}_nodes{nnodes}_tasks{ntasks*nnodes}_{etype}{nelements},{job_id}\n")
os.chdir(soln_dir)
job_ids.append(job_id)
print(f"Job ID: {job_id} \t script: {sbatch_script}")
else:
print(f"YES! SKIPPING SIMULATION! The directory for {script_name} already exists. Delete it to resubmit the job.")