logging some key information

init-22 · init-22 · commit b6768b25e31e · 2025-06-01T16:57:50.000Z
diff --git a/algoperf/spec.py b/algoperf/spec.py
@@ -344,6 +344,7 @@ def eval_model(self,
       eval_metrics['validation/' + k] = v
     eval_metrics['validation/num_examples'] = self.num_validation_examples
     # Evaluate on the test set. TODO(znado): always eval on the test set.
+
     try:
       if self.num_test_examples is not None:
         logging.info('Evaluating on the test split.')
diff --git a/algoperf/workloads/criteo1tb/workload.py b/algoperf/workloads/criteo1tb/workload.py
@@ -31,6 +31,8 @@ def target_metric_name(self) -> str:
 
   def has_reached_validation_target(self, eval_result: Dict[str,
                                                             float]) -> bool:
+    print("eval_result['validation/loss']:", eval_result['validation/loss'])
+    print("self.validation_target_value:", self.validation_target_value)
     return eval_result['validation/loss'] < self.validation_target_value
 
   @property
@@ -78,10 +80,14 @@ def num_eval_train_examples(self) -> int:
   @property
   def num_validation_examples(self) -> int:
     return 83_274_637
+    #return  math.ceil(262_144 / self.eval_batch_size)
+  
 
   @property
   def num_test_examples(self) -> int:
     return 95_000_000
+    #return math.ceil(262_144 / self.eval_batch_size)
+  
 
   @property
   def train_mean(self):
diff --git a/reference_algorithms/schedule_free/pytorch/submission.py b/reference_algorithms/schedule_free/pytorch/submission.py
@@ -276,6 +276,10 @@ def closure():
 
   loss = optimizer_state['optimizer'].step(closure)
 
+  logging.info('%d) loss = %0.3f',
+              global_step,
+              loss.item())
+
   return (optimizer_state, current_param_container, new_model_state)
 
 
diff --git a/submission_runner.py b/submission_runner.py
@@ -375,6 +375,7 @@ def train_once(
     except spec.TrainingCompleteError:
       train_state['training_complete'] = True
     global_step += 1
+    logging.info(f'Global step: {global_step}, batch size: {len(batch)}')
     if (max_global_steps is not None) and (global_step == max_global_steps):
       train_state['training_complete'] = True