mlcommons · arav-agarwal2 · Feb 2, 2026 · Feb 3, 2026 · Feb 23, 2026 · Feb 23, 2026
@@ -1,4 +1,4 @@
-# Automatic code formatting
+# Automatic code formatting.
 name: "Code formatting"
 on:
   push:

@@ -2,6 +2,14 @@
 
 Please follow the [official submission automation page](https://docs.mlcommons.org/inference/submission/) for doing a submission. It wraps all the submission related files listed below. 
 
+# Installation
+
+Install the prerequisites for the submission checker using the provided `requirements.txt` file:
+
+```bash
+pip install -r requirements.txt
+```
+
 ## `truncate_accuracy_log.py` (Mandatory)
 
 ### Inputs

@@ -14,19 +14,13 @@
 
 import argparse
 import json
-import logging
+from loguru import logger
 import os
 import re
 import sys
 
 # pylint: disable=missing-docstring
 
-logging.basicConfig(
-    level=logging.INFO,
-    format="[%(asctime)s %(filename)s:%(lineno)d %(levelname)s] %(message)s",
-)
-
-
 class MLPerfLog:
     def __init__(self, log_path, strict=True):
         """
@@ -39,7 +33,7 @@ def __init__(self, log_path, strict=True):
         self.endpoints_marker = ":::ENDPTS"
         self.marker = ""
         self.log_is_endpoints = False
-        self.logger = logging.getLogger("MLPerfLog")
+        self.logger = logger
         self.messages = []
         with open(log_path, "r", encoding="utf-8") as f:
             for i, line in enumerate(f):

@@ -24,12 +24,7 @@
 import re
 import traceback
 import uuid
-import logging
-
-
-logging.basicConfig(level=logging.INFO)
-log = logging.getLogger("main")
-
+from loguru import logger as log
 
 class LineWithoutTimeStamp(Exception):
     pass

@@ -3,7 +3,7 @@
 """
 
 import argparse
-import logging
+from loguru import logger as log
 import os
 import sys
 import shutil
@@ -12,10 +12,6 @@
 import submission_checker_old as checker
 
 
-logging.basicConfig(level=logging.INFO)
-log = logging.getLogger("main")
-
-
 HELP_TEXT = """
 pick an existing submission directory and create a brand new submission tree with
     possible results being inferred from already measured ones. The original submission directory is not modified.
@@ -92,7 +88,7 @@ def delete_empty_dirs(src):
         return False
     if all([delete_empty_dirs(os.path.join(src, file))
            for file in os.listdir(src)]):
-        log.info("Removing empty dir: (%s)", src)
+        log.info("Removing empty dir: {src}", src=src)
         os.rmdir(src)
         return True
 
@@ -388,15 +384,14 @@ def infer_scenario_results(args, config):
             for directory in ["results"]:
                 log_path = os.path.join(division, submitter, directory)
                 if not os.path.exists(log_path):
-                    log.error("no submission in %s", log_path)
+                    log.error("no submission in {log_path}", log_path=log_path)
                     continue
 
                 for system_desc in list_dir(log_path):
                     system_id_json = os.path.join(division, submitter, "systems",
                                                   system_desc + ".json")
                     if not os.path.exists(system_id_json):
-                        log.error("no system_desc for %s/%s/%s", division, submitter,
-                                  system_desc)
+                        log.error("no system_desc for {division}/{submitter}/{system_desc}", division=division, submitter=submitter, system_desc=system_desc)
                         continue
 
                     with open(system_id_json) as system_info:
@@ -405,9 +400,9 @@ def infer_scenario_results(args, config):
                     valid_system_types = ["datacenter", "edge",
                                           "datacenter,edge", "edge,datacenter"]
                     if system_type not in valid_system_types:
-                        log.error("Division %s, submitter %s, "
-                                  "system %s has invalid system type (%s)",
-                                  division, submitter, system_id_json, system_type)
+                        log.error("Division {division}, submitter {submitter}, "
+                                  "system {system_id_json} has invalid system type ({system_type})",
+                                  division=division, submitter=submitter, system_id_json=system_id_json, system_type=system_type)
 
                     config.set_type(system_type)
 
@@ -422,9 +417,9 @@ def infer_scenario_results(args, config):
                         mlperf_model = config.get_mlperf_model(
                             model, extra_model_mapping)
                         if not mlperf_model:
-                            log.error("Division %s, submitter %s, system %s has "
-                                      "invalid model (%s)", division, submitter,
-                                      system_id_json, model)
+                            log.error("Division {division}, submitter {submitter}, system {system_id_json} has "
+                                      "invalid model ({model})", division=division, submitter=submitter,
+                                      system_id_json=system_id_json, model=model)
                             continue
 
                         if mlperf_model not in config.required:
@@ -485,11 +480,11 @@ def infer_scenario_results(args, config):
                                     for tobeinferredpath in tobeinferredpaths:
                                         inferred_scenario = os.path.basename(
                                             tobeinferredpath)
-                                        log.info("Division %s, submitter %s, system %s, "
-                                                 "model %s: \
-                                                inferring %s results from %s",
-                                                 division, submitter, system_desc, model,
-                                                 inferred_scenario, "singlestream")
+                                        log.info("Division {division}, submitter {submitter}, system {system_desc}, "
+                                                 "model {model}: "
+                                                 "inferring {inferred_scenario} results from {singlestream}",
+                                                 division=division, submitter=submitter, system_desc=system_desc, model=model,
+                                                 inferred_scenario=inferred_scenario, singlestream="singlestream")
                                         shutil.copytree(
                                             scenario_path, tobeinferredpath)
 
@@ -498,9 +493,9 @@ def infer_scenario_results(args, config):
                                     # infer MS from SS
                                     for tobeinferredpath in [
                                             multistream_scenario_path]:
-                                        log.info("Division %s, submitter %s, system %s, model %s: \
-                                                inferring %s results from %s", division, submitter,
-                                                 system_desc, model, "multistream", "singlestream")
+                                        log.info("Division {division}, submitter {submitter}, system {system_desc}, model {model}: \
+                                                inferring {inferred_scenario} results from {singlestream}", division=division, submitter=submitter, system_desc=system_desc, model=model,
+                                                 inferred_scenario="multistream", singlestream="singlestream")
                                         shutil.copytree(
                                             scenario_path, multistream_scenario_path)
                                 elif not os.path.exists(offline_scenario_path):
@@ -519,9 +514,9 @@ def infer_scenario_results(args, config):
                                 for tobeinferredpath in [
                                         offline_scenario_path]:
                                     if not os.path.exists(tobeinferredpath):
-                                        log.info("Division %s, submitter %s, system %s, model %s: \
-                                                inferring %s results from %s", division, submitter,
-                                                 system_desc, model, "offline", "multistream")
+                                        log.info("Division {division}, submitter {submitter}, system {system_desc}, model {model}: \
+                                                inferring {inferred_scenario} results from {multistream}", division=division, submitter=submitter, system_desc=system_desc, model=model,
+                                                 inferred_scenario="offline", multistream="multistream")
 
                                         shutil.copytree(
                                             scenario_path, tobeinferredpath)
@@ -538,9 +533,8 @@ def infer_scenario_results(args, config):
                                 low_accuracy_model_path = os.path.join(log_path, system_desc,
                                                                        low_accuracy_model)
                                 if not os.path.exists(low_accuracy_model_path):
-                                    log.info("Division %s, submitter %s, system %s: \
-                                            copying %s results to %s", division, submitter,
-                                             system_desc, model, low_accuracy_model)
+                                    log.info("Division {division}, submitter {submitter}, system {system_desc}: \
+                                            copying {model} results to {low_accuracy_model}", division=division, submitter=submitter, system_desc=system_desc, model=model, low_accuracy_model=low_accuracy_model)
 
                                     shutil.copytree(high_accuracy_model_path,
                                                     low_accuracy_model_path)

@@ -0,0 +1 @@
+loguru
@@ -122,10 +122,10 @@ def accuracy_result_check(self):
                 elif acc is not None:
                     all_accuracy_valid = False
                     self.log.warning(
-                        "%s accuracy not met: expected=%f, found=%s",
-                        self.path,
-                        acc_target,
-                        acc,
+                        "{path} accuracy not met: expected={acc_target}, found={acc}",
+                        path=self.path,
+                        acc_target=acc_target,
+                        acc=acc,
                     )
                 if acc:
                     result_acc[acc_type] = acc
@@ -147,10 +147,10 @@ def accuracy_result_check(self):
                     ):
                         acc_limit_check = False
                         self.log.warning(
-                            "%s accuracy not met: upper limit=%f, found=%s",
-                            self.path,
-                            acc_limit,
-                            acc,
+                            "{path} accuracy not met: upper limit={acc_limit}, found={acc}",
+                            path=self.path,
+                            acc_limit=acc_limit,
+                            acc=acc,
                         )
                     acc = None
             if all(acc_seen) and hash_val:
@@ -159,7 +159,9 @@ def accuracy_result_check(self):
         if acc_upper_limit is not None:
             is_valid &= acc_limit_check
         if not hash_val:
-            self.log.error("%s not hash value for accuracy.txt", self.path)
+            self.log.error(
+                "{path} not hash value for accuracy.txt",
+                path=self.path)
             is_valid = False
         self.submission_logs.loader_data["accuracy_metrics"] = result_acc
         if self.division.lower() == "open":
@@ -174,11 +176,15 @@ def accuracy_json_check(self):
                 exceed `MAX_ACCURACY_LOG_SIZE`, False otherwise.
         """
         if not os.path.exists(self.accuracy_json):
-            self.log.error("%s is missing", self.accuracy_json)
+            self.log.error(
+                "{accuracy_json} is missing",
+                accuracy_json=self.accuracy_json)
             return False
         else:
             if os.stat(self.accuracy_json).st_size > MAX_ACCURACY_LOG_SIZE:
-                self.log.error("%s is not truncated", self.accuracy_json)
+                self.log.error(
+                    "{accuracy_json} is not truncated",
+                    accuracy_json=self.accuracy_json)
                 return False
         return True
 
@@ -198,13 +204,13 @@ def loadgen_errors_check(self):
                 for error in self.mlperf_log.get_errors():
                     if "Loadgen built with uncommitted changes!" not in error["value"]:
                         has_other_errors = True
-            self.log.error("%s contains errors:", self.path)
+            self.log.error("{path} contains errors:", path=self.path)
             for error in self.mlperf_log.get_errors():
-                self.log.error("%s", error["value"])
+                self.log.error("{error_value}", error_value=error["value"])
 
             if not self.config.ignore_uncommited or has_other_errors:
                 self.log.error(
-                    "%s has loadgen errors, number of errors: %s", self.path, self.mlperf_log.num_errors()
+                    "{path} has loadgen errors, number of errors: {num_errors}", path=self.path, num_errors=self.mlperf_log.num_errors()
                 )
                 return False
         return True
@@ -223,7 +229,7 @@ def dataset_check(self):
         """
         if self.config.skip_dataset_size_check:
             self.log.info(
-                "%s Skipping dataset size check", self.path
+                "{path} Skipping dataset size check", path=self.path
             )
             return True
         expected_qsl_total_count = self.config.get_accuracy_sample_count(
@@ -235,7 +241,7 @@ def dataset_check(self):
 
         if qsl_total_count != expected_qsl_total_count:
             self.log.error(
-                "%s accurcy run does not cover all dataset, accuracy samples: %s, dataset size: %s", self.path, qsl_total_count, expected_qsl_total_count
+                "{path} accuracy run does not cover all dataset, accuracy samples: {qsl_total_count}, dataset size: {expected_qsl_total_count}", path=self.path, qsl_total_count=qsl_total_count, expected_qsl_total_count=expected_qsl_total_count
             )
             return False
         return True

@@ -27,10 +27,10 @@ def run_checks(self):
             except BaseException:
                 valid &= False
                 self.log.error(
-                    "Execution occurred in running check %s. Running %s in %s",
-                    self.path,
-                    check.__name__,
-                    self.__class__.__name__)
+                    "Execution occurred in running check {check_name}. Running {check_name} in {class_name}",
+                    path=self.path,
+                    check_name=check.__name__,
+                    class_name=self.__class__.__name__)
         return valid
 
     def execute(self, check):
@@ -39,13 +39,13 @@ def execute(self, check):
 
     def __call__(self):
         """Allows the check instance to be called like a function."""
-        self.log.info("Starting %s for: %s", self.name, self.path)
+        self.log.info("Starting {name} for: {path}", name=self.name, path=self.path)
         valid = self.run_checks()
         if valid:
-            self.log.info("All %s checks passed for: %s", self.name, self.path)
+            self.log.info("All {name} checks passed for: {path}", name=self.name, path=self.path)
         else:
             self.log.error(
-                "Some %s Checks failed for: %s",
-                self.name,
-                self.path)
+                "Some {name} Checks failed for: {path}",
+                name=self.name,
+                path=self.path)
         return valid