File tree Expand file tree Collapse file tree 4 files changed +12
-0
lines changed
reference_algorithms/schedule_free/pytorch Expand file tree Collapse file tree 4 files changed +12
-0
lines changed Original file line number Diff line number Diff line change @@ -344,6 +344,7 @@ def eval_model(self,
344344 eval_metrics ['validation/' + k ] = v
345345 eval_metrics ['validation/num_examples' ] = self .num_validation_examples
346346 # Evaluate on the test set. TODO(znado): always eval on the test set.
347+
347348 try :
348349 if self .num_test_examples is not None :
349350 logging .info ('Evaluating on the test split.' )
Original file line number Diff line number Diff line change @@ -31,6 +31,8 @@ def target_metric_name(self) -> str:
3131
3232 def has_reached_validation_target (self , eval_result : Dict [str ,
3333 float ]) -> bool :
34+ print ("eval_result['validation/loss']:" , eval_result ['validation/loss' ])
35+ print ("self.validation_target_value:" , self .validation_target_value )
3436 return eval_result ['validation/loss' ] < self .validation_target_value
3537
3638 @property
@@ -78,10 +80,14 @@ def num_eval_train_examples(self) -> int:
7880 @property
7981 def num_validation_examples (self ) -> int :
8082 return 83_274_637
83+ #return math.ceil(262_144 / self.eval_batch_size)
84+
8185
8286 @property
8387 def num_test_examples (self ) -> int :
8488 return 95_000_000
89+ #return math.ceil(262_144 / self.eval_batch_size)
90+
8591
8692 @property
8793 def train_mean (self ):
Original file line number Diff line number Diff line change @@ -276,6 +276,10 @@ def closure():
276276
277277 loss = optimizer_state ['optimizer' ].step (closure )
278278
279+ logging .info ('%d) loss = %0.3f' ,
280+ global_step ,
281+ loss .item ())
282+
279283 return (optimizer_state , current_param_container , new_model_state )
280284
281285
Original file line number Diff line number Diff line change @@ -375,6 +375,7 @@ def train_once(
375375 except spec .TrainingCompleteError :
376376 train_state ['training_complete' ] = True
377377 global_step += 1
378+ logging .info (f'Global step: { global_step } , batch size: { len (batch )} ' )
378379 if (max_global_steps is not None ) and (global_step == max_global_steps ):
379380 train_state ['training_complete' ] = True
380381
You can’t perform that action at this time.
0 commit comments