|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +""" This module controls and parses the large runs that includes |
| 4 | +sweeping multiple parameters. """ |
1 | 5 | import itertools |
2 | 6 | import os |
3 | 7 | import sys |
|
7 | 11 | from scipy import stats |
8 | 12 |
|
9 | 13 | # Define the global dictionary |
10 | | -params_dict = { |
| 14 | +PARAMS_DICT = { |
11 | 15 | "--seed": [1, 2], |
12 | 16 | "--place_algorithm": ["criticality_timing"], |
13 | | - "--place_agent_epsilon": [0.3] |
| 17 | + "--place_agent_epsilon": [0.3], |
14 | 18 | } |
15 | 19 |
|
16 | 20 | # Set to True if you only care about specific metrics |
17 | | -keep_metrics_only = True |
18 | | -parsed_metrics = ["num_io", "num_LAB"] |
| 21 | +KEEP_METRICS_ONLY = True |
| 22 | +PARSED_METRICS = ["num_io", "num_LAB"] |
19 | 23 |
|
20 | 24 |
|
21 | 25 | def safe_gmean(series): |
| 26 | + """Calculate the geomeans of a series in a safe way even for large numbers""" |
22 | 27 | series = series.replace({0: np.nan}) |
23 | 28 | return stats.gmean(series.dropna()) |
24 | 29 |
|
25 | | -def generate_combinations(params_dict): |
26 | | - keys = list(params_dict.keys()) |
27 | | - values = list(params_dict.values()) |
| 30 | + |
| 31 | +def generate_combinations(): |
| 32 | + """Generates all the parameter combinations between the input parameters values.""" |
| 33 | + keys = list(PARAMS_DICT.keys()) |
| 34 | + values = list(PARAMS_DICT.values()) |
28 | 35 | combinations = list(itertools.product(*values)) |
29 | | - |
| 36 | + |
30 | 37 | lines = [] |
31 | 38 | for combination in combinations: |
32 | | - params_str = ' '.join(f"{key} {value}" for key, value in zip(keys, combination)) |
| 39 | + params_str = " ".join(f"{key} {value}" for key, value in zip(keys, combination)) |
33 | 40 | lines.append(f"script_params_list_add={params_str}\n") |
34 | 41 | return lines |
35 | 42 |
|
36 | | -def parse_results(input_path, params_dict): |
| 43 | + |
| 44 | +def parse_results(input_path): |
| 45 | + """ |
| 46 | + Parse the output results |
| 47 | + """ |
37 | 48 | # Find the runXXX directory with the largest XXX |
38 | | - run_dirs = [d for d in os.listdir(input_path) if d.startswith("run") and d[3:].isdigit()] |
| 49 | + run_dirs = [ |
| 50 | + d for d in os.listdir(input_path) if d.startswith("run") and d[3:].isdigit() |
| 51 | + ] |
39 | 52 | if not run_dirs: |
40 | 53 | print("No runXXX directories found in the specified input path.") |
41 | 54 | sys.exit(1) |
42 | | - |
43 | | - largest_run_dir = max(run_dirs, key=lambda d: int(d[3:])) |
44 | | - largest_run_path = os.path.join(input_path, largest_run_dir) |
45 | | - |
| 55 | + |
| 56 | + largest_run_path = os.path.join(input_path, max(run_dirs, key=lambda d: int(d[3:]))) |
| 57 | + |
46 | 58 | # Path to parse_results.txt and full_res.csv |
47 | | - parse_results_path = os.path.join(largest_run_path, "parse_results.txt") |
48 | 59 | full_res_csv_path = os.path.join(largest_run_path, "full_res.csv") |
49 | | - |
50 | | - if not os.path.exists(parse_results_path): |
51 | | - print(f"{parse_results_path} not found.") |
| 60 | + |
| 61 | + if not os.path.exists(os.path.join(largest_run_path, "parse_results.txt")): |
| 62 | + print(f"parse_results.txt not found.") |
52 | 63 | sys.exit(1) |
53 | | - |
| 64 | + |
54 | 65 | # Read the parse_results.txt file and write to full_res.csv |
55 | | - with open(parse_results_path, "r") as txt_file, open(full_res_csv_path, "w", newline='') as csv_file: |
56 | | - reader = csv.reader(txt_file, delimiter='\t') |
| 66 | + with open( |
| 67 | + os.path.join(largest_run_path, "parse_results.txt"), "r" |
| 68 | + ) as txt_file, open(full_res_csv_path, "w", newline="") as csv_file: |
| 69 | + reader = csv.reader(txt_file, delimiter="\t") |
57 | 70 | writer = csv.writer(csv_file) |
58 | | - |
| 71 | + |
59 | 72 | headers = next(reader) |
60 | 73 | script_params_index = headers.index("script_params") |
61 | | - |
62 | | - # Create new headers with params_dict keys |
63 | | - new_headers = headers[:script_params_index] + list(params_dict.keys()) + headers[script_params_index + 1:] |
| 74 | + |
| 75 | + # Create new headers with PARAMS_DICT keys |
| 76 | + new_headers = ( |
| 77 | + headers[:script_params_index] |
| 78 | + + list(PARAMS_DICT.keys()) |
| 79 | + + headers[script_params_index + 1 :] |
| 80 | + ) |
64 | 81 | writer.writerow(new_headers) |
65 | | - |
| 82 | + |
66 | 83 | for row in reader: |
67 | 84 | script_params_value = row[script_params_index] |
68 | | - script_params_dict = parse_script_params(script_params_value, params_dict) |
69 | | - new_row = row[:script_params_index] + [script_params_dict.get(key, '') for key in params_dict.keys()] + row[script_params_index + 1:] |
| 85 | + script_params_dict = parse_script_params(script_params_value) |
| 86 | + new_row = ( |
| 87 | + row[:script_params_index] |
| 88 | + + [script_params_dict.get(key, "") for key in PARAMS_DICT] |
| 89 | + + row[script_params_index + 1 :] |
| 90 | + ) |
70 | 91 | writer.writerow(new_row) |
71 | | - |
72 | | - print(f"Converted {parse_results_path} to {full_res_csv_path}") |
73 | | - |
| 92 | + |
| 93 | + print(f"Converted parse_results.txt to {full_res_csv_path}") |
| 94 | + |
74 | 95 | # Generate avg_seed.csv if --seed column exists |
75 | 96 | generate_avg_seed_csv(full_res_csv_path, largest_run_path) |
76 | 97 | print(f"Generated average seed results") |
77 | 98 |
|
78 | | - # Generate gmean_res.csv |
79 | | - generate_geomean_res_csv(os.path.join(largest_run_path, "avg_seed.csv"), largest_run_path, params_dict) |
| 99 | + # Generate gmean_res.csv |
| 100 | + generate_geomean_res_csv( |
| 101 | + os.path.join(largest_run_path, "avg_seed.csv"), largest_run_path |
| 102 | + ) |
80 | 103 | print(f"Generated geometric average results over all the circuits") |
81 | 104 |
|
82 | 105 | generate_xlsx(largest_run_path) |
83 | 106 | print(f"Generated xlsx that merges all the result csv files") |
84 | 107 |
|
| 108 | + |
85 | 109 | def generate_xlsx(largest_run_path): |
86 | | - csv_files = [os.path.join(largest_run_path, "full_res.csv"), |
87 | | - os.path.join(largest_run_path, "avg_seed.csv"), |
88 | | - os.path.join(largest_run_path, "geomean_res.csv")] |
| 110 | + """Generate a xlsx file that includes the full results, average results over the seed |
| 111 | + and the geometrically averaged results over all the benchmarks.""" |
| 112 | + |
| 113 | + csv_files = [ |
| 114 | + os.path.join(largest_run_path, "full_res.csv"), |
| 115 | + os.path.join(largest_run_path, "avg_seed.csv"), |
| 116 | + os.path.join(largest_run_path, "geomean_res.csv"), |
| 117 | + ] |
89 | 118 | sheet_names = ["Full res", "Avg. seeds", "Summary"] |
90 | | - output_excel_file = os.path.join(largest_run_path, "summary.xlsx") |
| 119 | + output_excel_file = os.path.join(largest_run_path, "summary.xlsx") |
91 | 120 | # Create an Excel writer object |
92 | | - with pd.ExcelWriter(output_excel_file) as writer: |
| 121 | + # pylint: disable=abstract-class-instantiated |
| 122 | + with pd.ExcelWriter(output_excel_file, engine="xlsxwriter") as writer: |
93 | 123 | for csv_file, sheet_name in zip(csv_files, sheet_names): |
94 | 124 | # Read each CSV file |
95 | 125 | df = pd.read_csv(csv_file) |
96 | | - |
| 126 | + |
97 | 127 | # Write each DataFrame to a different sheet |
98 | 128 | df.to_excel(writer, sheet_name=sheet_name, index=False) |
99 | | - |
100 | | -def parse_script_params(script_params, params_dict): |
101 | | - parsed_params = {key: '' for key in params_dict.keys()} |
102 | | - |
103 | | - parts = script_params.split('_') |
| 129 | + |
| 130 | + |
| 131 | +def parse_script_params(script_params): |
| 132 | + """Helper function to parse the script params values from earch row in |
| 133 | + the parse_results.txt""" |
| 134 | + |
| 135 | + parsed_params = {key: "" for key in PARAMS_DICT} |
| 136 | + |
| 137 | + parts = script_params.split("_") |
104 | 138 | i = 0 |
105 | | - |
| 139 | + |
106 | 140 | while i < len(parts): |
107 | | - for key in params_dict.keys(): |
108 | | - key_parts = key.split('_') |
| 141 | + for key in PARAMS_DICT: |
| 142 | + key_parts = key.split("_") |
109 | 143 | key_length = len(key_parts) |
110 | | - |
111 | | - if parts[i:i+key_length] == key_parts: |
| 144 | + |
| 145 | + if parts[i : i + key_length] == key_parts: |
112 | 146 | value_parts = [] |
113 | 147 | j = i + key_length |
114 | | - |
115 | | - while j < len(parts) and not any(parts[j:j+len(k.split('_'))] == k.split('_') for k in params_dict.keys()): |
| 148 | + |
| 149 | + while j < len(parts) and not any( |
| 150 | + parts[j : j + len(k.split("_"))] == k.split("_") |
| 151 | + for k in PARAMS_DICT |
| 152 | + ): |
116 | 153 | value_parts.append(parts[j]) |
117 | 154 | j += 1 |
118 | | - |
119 | | - parsed_params[key] = '_'.join(value_parts) |
| 155 | + |
| 156 | + parsed_params[key] = "_".join(value_parts) |
120 | 157 | i = j - 1 |
121 | 158 | break |
122 | | - |
| 159 | + |
123 | 160 | i += 1 |
124 | 161 |
|
125 | 162 | return parsed_params |
126 | 163 |
|
| 164 | + |
127 | 165 | def generate_avg_seed_csv(full_res_csv_path, output_dir): |
128 | | - |
| 166 | + """Generate the average results over the seeds""" |
129 | 167 | df = pd.read_csv(full_res_csv_path) |
130 | 168 |
|
131 | | - if keep_metrics_only: |
132 | | - col_to_keep = ['circuit', 'arch'] |
133 | | - col_to_keep.extend(list(params_dict.keys())) |
134 | | - col_to_keep.extend(parsed_metrics) |
| 169 | + if KEEP_METRICS_ONLY: |
| 170 | + col_to_keep = ["circuit", "arch"] |
| 171 | + col_to_keep.extend(list(PARAMS_DICT.keys())) |
| 172 | + col_to_keep.extend(PARSED_METRICS) |
135 | 173 | df = df.drop(columns=[col for col in df.columns if col not in col_to_keep]) |
136 | 174 |
|
137 | 175 | # Check if '--seed' column is present |
138 | | - if '--seed' in df.columns: |
139 | | - # Determine the grouping keys: ['circuit', 'arch'] + keys from params_dict that are present in the dataframe |
140 | | - grouping_keys = ['circuit', 'arch'] + [key for key in params_dict.keys() if key in df.columns and key != "--seed"] |
141 | | - |
| 176 | + if "--seed" in df.columns: |
| 177 | + # Determine the grouping keys: ['circuit', 'arch'] + keys from PARAMS_DICT that |
| 178 | + # are present in the dataframe |
| 179 | + grouping_keys = ["circuit", "arch"] + [ |
| 180 | + key for key in PARAMS_DICT if key in df.columns and key != "--seed" |
| 181 | + ] |
| 182 | + |
142 | 183 | # Group by specified keys and compute the mean for numeric columns |
143 | 184 | df_grouped = df.groupby(grouping_keys).mean(numeric_only=True).reset_index() |
144 | | - |
| 185 | + |
145 | 186 | # Drop the '--seed' column if it exists |
146 | | - if '--seed' in df_grouped.columns: |
147 | | - df_grouped.drop(columns=['--seed'], inplace=True) |
| 187 | + if "--seed" in df_grouped.columns: |
| 188 | + df_grouped.drop(columns=["--seed"], inplace=True) |
148 | 189 | else: |
149 | 190 | df_grouped = df |
150 | 191 |
|
151 | 192 | # Save the resulting dataframe to a CSV file |
152 | 193 | avg_seed_csv_path = os.path.join(output_dir, "avg_seed.csv") |
153 | 194 | df_grouped.to_csv(avg_seed_csv_path, index=False) |
154 | 195 |
|
155 | | -def generate_geomean_res_csv(full_res_csv_path, output_dir, params_dict): |
| 196 | + |
| 197 | +def generate_geomean_res_csv(full_res_csv_path, output_dir): |
| 198 | + """Generate the geometric average results over the different circuits""" |
| 199 | + |
156 | 200 | df = pd.read_csv(full_res_csv_path) |
157 | 201 |
|
158 | | - param_columns = [key for key in params_dict.keys() if key != '--seed'] |
| 202 | + param_columns = [key for key in PARAMS_DICT if key != "--seed"] |
159 | 203 | non_param_columns = [col for col in df.columns if col not in param_columns] |
160 | 204 |
|
161 | | - geomean_df = df.groupby(param_columns).agg( |
162 | | - {col: (lambda x: '' if x.dtype == 'object' else safe_gmean(x)) for col in non_param_columns} |
163 | | - ).reset_index() |
| 205 | + geomean_df = ( |
| 206 | + df.groupby(param_columns) |
| 207 | + .agg( |
| 208 | + { |
| 209 | + col: (lambda x: "" if x.dtype == "object" else safe_gmean(x)) |
| 210 | + for col in non_param_columns |
| 211 | + } |
| 212 | + ) |
| 213 | + .reset_index() |
| 214 | + ) |
164 | 215 |
|
165 | | - geomean_df.drop(columns=['circuit'], inplace=True) |
166 | | - geomean_df.drop(columns=['arch'], inplace=True) |
| 216 | + geomean_df.drop(columns=["circuit"], inplace=True) |
| 217 | + geomean_df.drop(columns=["arch"], inplace=True) |
167 | 218 |
|
168 | 219 | geomean_res_csv_path = os.path.join(output_dir, "geomean_res.csv") |
169 | 220 | geomean_df.to_csv(geomean_res_csv_path, index=False) |
170 | 221 |
|
| 222 | + |
171 | 223 | def main(): |
| 224 | + """Main function""" |
| 225 | + |
172 | 226 | if len(sys.argv) < 3: |
173 | 227 | print("Usage: script.py <option> <path_to_directory>") |
174 | 228 | sys.exit(1) |
175 | | - |
| 229 | + |
176 | 230 | option = sys.argv[1] |
177 | 231 | directory_path = sys.argv[2] |
178 | | - |
| 232 | + |
179 | 233 | if option == "--generate": |
180 | 234 | # Generate the combinations |
181 | | - lines = generate_combinations(params_dict) |
182 | | - |
| 235 | + lines = generate_combinations() |
| 236 | + |
183 | 237 | # Define the path to the config file |
184 | 238 | config_path = os.path.join(directory_path, "config", "config.txt") |
185 | | - |
| 239 | + |
186 | 240 | # Ensure the config directory exists |
187 | 241 | os.makedirs(os.path.dirname(config_path), exist_ok=True) |
188 | | - |
| 242 | + |
189 | 243 | # Append the lines to the config file |
190 | 244 | with open(config_path, "a") as file: |
191 | 245 | file.writelines(lines) |
192 | | - |
| 246 | + |
193 | 247 | print(f"Appended lines to {config_path}") |
194 | | - |
| 248 | + |
195 | 249 | elif option == "--parse": |
196 | | - parse_results(directory_path, params_dict) |
197 | | - |
| 250 | + parse_results(directory_path) |
| 251 | + |
198 | 252 | else: |
199 | 253 | print("Invalid option. Use --generate or --parse") |
200 | 254 | sys.exit(1) |
201 | 255 |
|
| 256 | + |
202 | 257 | if __name__ == "__main__": |
203 | 258 | main() |
204 | | - |
|
0 commit comments