7575FLAGS = flags .FLAGS
7676
7777
78- def get_summary_df (workload , workload_df , include_test_split = False ):
78+ def get_summary_df (workload , workload_df ):
7979 print (f' WORKLOAD: { workload } ' )
8080 validation_metric , validation_target = (
81- scoring_utils .get_workload_metrics_and_targets (workload , split = 'validation' )
81+ scoring_utils .get_workload_metrics_and_targets (workload )
8282 )
8383
8484 is_minimized = performance_profile .check_if_minimized (validation_metric )
@@ -127,7 +127,7 @@ def get_summary_df(workload, workload_df, include_test_split=False):
127127
128128 # compute the step times
129129 def delta (series ):
130- return series .shift ( 1 , fill_value = 0 ) - series
130+ return series .apply ( lambda x : np . diff ( x , prepend = 0 ))
131131
132132 accumulated_time_intervals = delta (workload_df ['accumulated_submission_time' ])
133133 step_intervals = delta (workload_df ['global_step' ])
@@ -136,57 +136,27 @@ def delta(series):
136136 f'WARNING: The number of evals may be too low to calculate reliable step time for { workload } '
137137 )
138138
139- summary_df ['step_time (s)' ] = np .median (
140- (accumulated_time_intervals / step_intervals ).iloc [0 ]
141- )
142-
143- summary_df ['step_hint' ] = scoring_utils .get_workload_stephint (workload )
144-
145- # test metrics
146- if include_test_split :
147- test_metric , test_target = scoring_utils .get_workload_metrics_and_targets (
148- workload , split = 'test'
139+ # Flatten all intervals from all trials and take the global median
140+ with np .errstate (divide = 'ignore' , invalid = 'ignore' ):
141+ all_ratios = np .concatenate (
142+ (accumulated_time_intervals / step_intervals ).values
149143 )
144+ summary_df ['step_time (s)' ] = np .nanmedian (all_ratios )
150145
151- summary_df ['test target metric name' ] = test_metric
152- summary_df ['test target metric value' ] = test_target
153-
154- summary_df ['test target reached' ] = (
155- workload_df [test_metric ]
156- .apply (lambda x : target_op (x , test_target ))
157- .apply (np .any )
158- )
159- summary_df ['best metric value on test' ] = workload_df [test_metric ].apply (
160- lambda x : best_op (x )
161- )
162- workload_df ['index best eval on test' ] = workload_df [test_metric ].apply (
163- lambda x : idx_op (x )
164- )
165- summary_df ['time to best eval on test (s)' ] = workload_df .apply (
166- lambda x : x ['accumulated_submission_time' ][x ['index best eval on test' ]],
167- axis = 1 ,
168- )
169- summary_df ['time to target on test (s)' ] = summary_df .apply (
170- lambda x : x ['time to best eval on test (s)' ]
171- if x ['test target reached' ]
172- else np .inf ,
173- axis = 1 ,
174- )
146+ summary_df ['step_hint' ] = scoring_utils .get_workload_stephint (workload )
175147
176148 return summary_df
177149
178150
179- def get_submission_summary (df , include_test_split = False ):
151+ def get_submission_summary (df ):
180152 """Summarizes the submission results into metric and time tables
181153 organized by workload.
182154 """
183155
184156 dfs = []
185157 print (df )
186158 for workload , group in df .groupby ('workload' ):
187- summary_df = get_summary_df (
188- workload , group , include_test_split = include_test_split
189- )
159+ summary_df = get_summary_df (workload , group )
190160 dfs .append (summary_df )
191161
192162 df = pd .concat (dfs )
0 commit comments