ROCm · ethanwee1 · May 11, 2026
diff --git a/.automation_scripts/pytorch-unit-test-scripts/detect_log_failures.py b/.automation_scripts/pytorch-unit-test-scripts/detect_log_failures.py
@@ -77,13 +77,23 @@
 
 
 def classify_log_file(filename):
-    """Return (platform, test_config, shard_num) from a log filename like rocm3.txt."""
+    """Return (platform, test_config, shard_num) from a log filename like rocm3.txt.
+
+    Commit-vs-commit parity prefixes log files with the short commit SHA
+    (for example, 09e0c59b_rocm3.txt). In that mode the SHA label is the
+    platform name used by generate_summary.py, so preserve it here.
+    """
     stem = Path(filename).stem
+    label = None
+    m = re.match(r"(?P<label>[0-9a-f]{8,40})_(?P<stem>.+)", stem)
+    if m:
+        label = m.group("label")[:8]
+        stem = m.group("stem")
     for prefix, (platform, test_config) in sorted(LOG_FILE_MAP.items(), key=lambda x: -len(x[0])):
         if stem.startswith(prefix):
             remainder = stem[len(prefix):]
             if remainder.isdigit():
-                return platform, test_config, int(remainder)
+                return label or platform, test_config, int(remainder)
     return None, None, None
 
 

diff --git a/.automation_scripts/pytorch-unit-test-scripts/download_testlogs b/.automation_scripts/pytorch-unit-test-scripts/download_testlogs
@@ -287,7 +287,7 @@ def download_artifacts(wf, prefixes=[], test_folder=".", allowed_substrings=None
     )
     os.chdir("..")
 # for older runs, add 'created':'<=YYYY-MM-DD'. see https://docs.github.com/en/search-github/getting-started-with-searching-on-github/understanding-the-search-syntax#query-for-dates
-def download_workflow_run(created=None, max_pages=10, workflow=None, sha=None, ignore_status=False, status='success', error_msg='Error downloading workflow runs'):
+def download_workflow_run(created=None, max_pages=10, workflow=None, sha=None, branch=None, ignore_status=False, status='success', error_msg='Error downloading workflow runs'):
     if not workflow:
         raise Exception("Workflow must be specified")
     for page in range(max_pages):
@@ -299,8 +299,10 @@ def download_workflow_run(created=None, max_pages=10, workflow=None, sha=None, i
             params['created'] = created
         if sha:
             params['head_sha'] = sha
+            if branch:
+                params['branch'] = branch
         else:
-            params['branch'] = "main"
+            params['branch'] = branch or "main"
         print(".")
 
         # Uncomment below for additional debug info
@@ -316,6 +318,10 @@ def download_workflow_run(created=None, max_pages=10, workflow=None, sha=None, i
             raise Exception(response.text)
         if not workflow_runs:
             continue
+        if branch:
+            workflow_runs = [wf for wf in workflow_runs if wf.get('head_branch') == branch]
+            if not workflow_runs:
+                continue
         # Prefer completed runs over in-progress ones. When multiple
         # runs exist for the same SHA, the most recent may still be
         # running and have no artifacts yet.
@@ -496,6 +502,7 @@ def main():
         sha = args.sha1
         pr_id = None
     status = "success"
+    sha_branch = "main" if args.sha1 else None
     print(sha)
 
     # When comparing two commits, prefix log filenames with short SHAs
@@ -516,7 +523,7 @@ def main():
         #https://docs.github.com/en/rest/actions/workflow-runs#list-workflow-runs-for-a-repository
         periodic_fallback_used = False
         try:
-            periodic_wf = download_workflow_run(created=args.created, max_pages=args.max_pages, workflow=ROCmWorkflowNames["distributed"], sha=periodic_sha, ignore_status=args.ignore_status, status=status, error_msg=error_msg)
+            periodic_wf = download_workflow_run(created=args.created, max_pages=args.max_pages, workflow=ROCmWorkflowNames["distributed"], sha=periodic_sha, branch=sha_branch, ignore_status=args.ignore_status, status=status, error_msg=error_msg)
         except (IndexError, Exception):
             periodic_wf = None
         periodic_fallbacks = {
@@ -526,7 +533,7 @@ def main():
         if periodic_wf is None and arch in periodic_fallbacks:
             fallback_wf, fallback_prefix = periodic_fallbacks[arch]
             print(f"Distributed not found in {ROCmWorkflowNames['distributed']}, falling back to {fallback_wf}")
-            periodic_wf = download_workflow_run(created=args.created, max_pages=args.max_pages, workflow=fallback_wf, sha=periodic_sha, ignore_status=args.ignore_status, status=status, error_msg=error_msg)
+            periodic_wf = download_workflow_run(created=args.created, max_pages=args.max_pages, workflow=fallback_wf, sha=periodic_sha, branch=sha_branch, ignore_status=args.ignore_status, status=status, error_msg=error_msg)
             periodic_fallback_used = True
         if periodic_wf is None:
             raise Exception(error_msg)
@@ -578,7 +585,7 @@ def main():
         error_msg="Error: rocm workflow not found in scanned workflow runs. Try increasing max_pages."
         default_fallback_used = False
         try:
-            rocm_wf = download_workflow_run(created=args.created, max_pages=args.max_pages, workflow=ROCmWorkflowNames["default"], sha=rocm_sha, ignore_status=args.ignore_status, status=status, error_msg=error_msg)
+            rocm_wf = download_workflow_run(created=args.created, max_pages=args.max_pages, workflow=ROCmWorkflowNames["default"], sha=rocm_sha, branch=sha_branch, ignore_status=args.ignore_status, status=status, error_msg=error_msg)
         except (IndexError, Exception):
             rocm_wf = None
         default_fallbacks = {
@@ -587,7 +594,7 @@ def main():
         if rocm_wf is None and arch in default_fallbacks:
             fallback_wf, fallback_prefix = default_fallbacks[arch]
             print(f"Default not found in {ROCmWorkflowNames['default']}, falling back to {fallback_wf}")
-            rocm_wf = download_workflow_run(created=args.created, max_pages=args.max_pages, workflow=fallback_wf, sha=rocm_sha, ignore_status=args.ignore_status, status=status, error_msg=error_msg)
+            rocm_wf = download_workflow_run(created=args.created, max_pages=args.max_pages, workflow=fallback_wf, sha=rocm_sha, branch=sha_branch, ignore_status=args.ignore_status, status=status, error_msg=error_msg)
             default_fallback_used = True
             rocm_job_prefix['default'] = fallback_prefix
         if rocm_wf is None:
@@ -632,7 +639,7 @@ def main():
         print(f"Finding ROCm inductor tests in workflow '{ROCmWorkflowNames['inductor']}' by sha: {inductor_rocm_sha}")
         print("===========================================")
         error_msg="Error: inductor workflow not found in scanned workflow runs. Try increasing max_pages."
-        inductor_wf_rocm = download_workflow_run(created=args.created, max_pages=args.max_pages, workflow=ROCmWorkflowNames["inductor"], sha=inductor_rocm_sha, ignore_status=args.ignore_status, status=status, error_msg=error_msg)
+        inductor_wf_rocm = download_workflow_run(created=args.created, max_pages=args.max_pages, workflow=ROCmWorkflowNames["inductor"], sha=inductor_rocm_sha, branch=sha_branch, ignore_status=args.ignore_status, status=status, error_msg=error_msg)
         print(f"Using workflow '{ROCmWorkflowNames['inductor']}' with id:{inductor_wf_rocm['id']} for ROCm inductor")
 
         folder_list = get_or_create_test_folder(inductor_wf_rocm)
@@ -678,11 +685,15 @@ def main():
         if not args.ignore_status:
             params['status'] = status
         params['head_sha'] = sha
+        if sha_branch:
+            params['branch'] = sha_branch
         resp = requests.get(
             f"https://api.github.com/repos/pytorch/pytorch/actions/workflows/{CUDAWorkflowNames['default']}.yml/runs",
             headers=authentication_headers, params=params,
         )
         trunk_runs = resp.json().get('workflow_runs', [])
+        if sha_branch:
+            trunk_runs = [run for run in trunk_runs if run.get('head_branch') == sha_branch]
 
         for run in trunk_runs:
             jobs = get_workflow_jobs(run)
@@ -717,8 +728,13 @@ def main():
                             headers=authentication_headers,
                         )
                         trunk_wf = resp.json()
-                    print(f"CUDA test jobs are in trunk run {trunk_wf['id']} (found via check-runs)")
-                all_cuda_jobs = list(cuda_test_jobs)
+                    if sha_branch and trunk_wf.get('head_branch') != sha_branch:
+                        print(f"Skipping CUDA check-run from non-{sha_branch} branch run {trunk_wf.get('id')}")
+                        trunk_wf = None
+                        cuda_test_jobs = []
+                    else:
+                        print(f"CUDA test jobs are in trunk run {trunk_wf['id']} (found via check-runs)")
+                        all_cuda_jobs = list(cuda_test_jobs)
 
         if trunk_wf is None:
             trunk_wf = trunk_runs[0] if trunk_runs else None
@@ -793,7 +809,7 @@ def main():
             # find tests in inductor workflow with given sha and success status
             #https://docs.github.com/en/rest/actions/workflow-runs#list-workflow-runs-for-a-repository
             error_msg="Error: inductor workflow not found in scanned workflow runs. Try increasing max_pages."
-            inductor_wf_cuda = download_workflow_run(created=args.created, max_pages=args.max_pages, workflow=CUDAWorkflowNames["inductor"], sha=inductor_sha, ignore_status=args.ignore_status, status=status, error_msg=error_msg)
+            inductor_wf_cuda = download_workflow_run(created=args.created, max_pages=args.max_pages, workflow=CUDAWorkflowNames["inductor"], sha=inductor_sha, branch=sha_branch, ignore_status=args.ignore_status, status=status, error_msg=error_msg)
             print(f"Using workflow '{CUDAWorkflowNames['inductor']}' with id:{inductor_wf_cuda['id']} for CUDA inductor")
 
             folder_list = get_or_create_test_folder(inductor_wf_cuda)
@@ -840,7 +856,7 @@ def main():
             try:
                 baseline_default_wf = download_workflow_run(
                     created=args.created, max_pages=args.max_pages,
-                    workflow=ROCmWorkflowNames["default"], sha=baseline_sha,
+                    workflow=ROCmWorkflowNames["default"], sha=baseline_sha, branch="main",
                     ignore_status=args.ignore_status, status=status,
                     error_msg=f"Baseline default workflow not found for {baseline_sha}",
                 )
@@ -872,7 +888,7 @@ def main():
             try:
                 baseline_dist_wf = download_workflow_run(
                     created=args.created, max_pages=args.max_pages,
-                    workflow=ROCmWorkflowNames["distributed"], sha=baseline_sha,
+                    workflow=ROCmWorkflowNames["distributed"], sha=baseline_sha, branch="main",
                     ignore_status=args.ignore_status, status=status,
                     error_msg=f"Baseline distributed workflow not found for {baseline_sha}",
                 )
@@ -904,7 +920,7 @@ def main():
             try:
                 baseline_inductor_wf = download_workflow_run(
                     created=args.created, max_pages=args.max_pages,
-                    workflow=ROCmWorkflowNames["inductor"], sha=baseline_sha,
+                    workflow=ROCmWorkflowNames["inductor"], sha=baseline_sha, branch="main",
                     ignore_status=args.ignore_status, status=status,
                     error_msg=f"Baseline inductor workflow not found for {baseline_sha}",
                 )
@@ -943,7 +959,7 @@ def main():
         try:
             inductor_periodic_wf = download_workflow_run(
                 created=args.created, max_pages=args.max_pages,
-                workflow="inductor-periodic", sha=sha,
+                workflow="inductor-periodic", sha=sha, branch=sha_branch,
                 ignore_status=args.ignore_status, status=status,
                 error_msg=error_msg,
             )

diff --git a/.automation_scripts/pytorch-unit-test-scripts/summarize_xml_testreports.py b/.automation_scripts/pytorch-unit-test-scripts/summarize_xml_testreports.py
@@ -297,10 +297,15 @@ def summarize_xml_files(args):
     # test file level running time: ROCm and CUDA
     test_file_level_ROCm: Dict[Tuple[str], float] = {}
     test_file_level_CUDA: Dict[Tuple[str], float] = {}
+    test_file_shards_ROCm: Dict[Tuple[str], set] = {}
+    test_file_shards_CUDA: Dict[Tuple[str], set] = {}
     for (k,v) in list(test_cases_set1_running_time.items()):
           test_file_name = k[0]
           test_config_name = k[2]
           tar_tup_rocm = (test_file_name, test_config_name,)
+          test_file_shards_ROCm.setdefault(tar_tup_rocm, set())
+          if v.get("shard"):
+              test_file_shards_ROCm[tar_tup_rocm].add(v["shard"])
           if test_file_level_ROCm.get(tar_tup_rocm) == None:
               test_file_level_ROCm[ ( test_file_name, test_config_name ) ] = v["running_time_xml"]
           else:
@@ -309,6 +314,9 @@ def summarize_xml_files(args):
           test_file_name = k[0]
           test_config_name = k[2]
           tar_tup_cuda = (test_file_name, test_config_name)
+          test_file_shards_CUDA.setdefault(tar_tup_cuda, set())
+          if v.get("shard"):
+              test_file_shards_CUDA[tar_tup_cuda].add(v["shard"])
           if test_file_level_CUDA.get(tar_tup_cuda) == None:
               test_file_level_CUDA[ ( test_file_name, test_config_name ) ] = v["running_time_xml"]
           else:
@@ -588,43 +596,56 @@ def sorting_key(e):
 
     # write test file running time to file
     test_file_running_time_for_csv = {}
+    set1_running_time_col = f"{set1_name}_running_time"
+    set2_running_time_col = f"{set2_name}_running_time"
+    set1_tests_run_col = f"{set1_name}_tests_run"
+    set2_tests_run_col = f"{set2_name}_tests_run"
+    set1_test_shards_col = f"{set1_name}_test_shards"
+    set2_test_shards_col = f"{set2_name}_test_shards"
+    set1_passed_col = f"{set1_name}_passed"
+    set1_skipped_col = f"{set1_name}_skipped"
+    set1_missed_col = f"{set1_name}_missed"
     for key_rocm in test_file_level_ROCm.keys():
         item_values = {}
         item_values["test_file"] = key_rocm[0]
         item_values["test_config"] = key_rocm[1]
-        item_values["rocm_running_time"] = test_file_level_ROCm[key_rocm]
-        item_values["cuda_running_time"] = 0.0
+        item_values[set1_running_time_col] = test_file_level_ROCm[key_rocm]
+        item_values[set2_running_time_col] = 0.0
         if key_rocm in test_file_level_CUDA.keys():
-            item_values["cuda_running_time"] = test_file_level_CUDA[key_rocm]
-        item_values["abs_time_diff"] = item_values["rocm_running_time"] - item_values["cuda_running_time"]
+            item_values[set2_running_time_col] = test_file_level_CUDA[key_rocm]
+        item_values["abs_time_diff"] = item_values[set1_running_time_col] - item_values[set2_running_time_col]
         item_values["relative_time_diff"] = 0.0
-        if item_values["cuda_running_time"] != 0.0:
-            item_values["relative_time_diff"] = 100 * (item_values["rocm_running_time"] - item_values["cuda_running_time"]) / item_values["cuda_running_time"]
+        if item_values[set2_running_time_col] != 0.0:
+            item_values["relative_time_diff"] = 100 * (item_values[set1_running_time_col] - item_values[set2_running_time_col]) / item_values[set2_running_time_col]
         # Add test counts
-        item_values["rocm_tests_run"] = test_file_counts_ROCm.get(key_rocm, {}).get('tests_run', 0)
-        item_values["cuda_tests_run"] = test_file_counts_CUDA.get(key_rocm, 0)
-        item_values["rocm_passed"] = test_file_counts_ROCm.get(key_rocm, {}).get('passed', 0)
-        item_values["rocm_skipped"] = test_file_counts_ROCm.get(key_rocm, {}).get('skipped', 0)
-        item_values["rocm_missed"] = test_file_counts_ROCm.get(key_rocm, {}).get('missed', 0)
+        item_values[set1_tests_run_col] = test_file_counts_ROCm.get(key_rocm, {}).get('tests_run', 0)
+        item_values[set2_tests_run_col] = test_file_counts_CUDA.get(key_rocm, 0)
+        item_values[set1_test_shards_col] = len(test_file_shards_ROCm.get(key_rocm, set()))
+        item_values[set2_test_shards_col] = len(test_file_shards_CUDA.get(key_rocm, set()))
+        item_values[set1_passed_col] = test_file_counts_ROCm.get(key_rocm, {}).get('passed', 0)
+        item_values[set1_skipped_col] = test_file_counts_ROCm.get(key_rocm, {}).get('skipped', 0)
+        item_values[set1_missed_col] = test_file_counts_ROCm.get(key_rocm, {}).get('missed', 0)
         test_file_running_time_for_csv[key_rocm] = item_values
 
     for key_cuda in test_file_level_CUDA.keys():
         if not key_cuda in test_file_level_ROCm.keys():
             item_values = {}
             item_values["test_file"] = key_cuda[0]
             item_values["test_config"] = key_cuda[1]
-            item_values["rocm_running_time"] = 0.0
-            item_values["cuda_running_time"] = test_file_level_CUDA[key_cuda]
-            item_values["abs_time_diff"] = item_values["rocm_running_time"] - item_values["cuda_running_time"]
+            item_values[set1_running_time_col] = 0.0
+            item_values[set2_running_time_col] = test_file_level_CUDA[key_cuda]
+            item_values["abs_time_diff"] = item_values[set1_running_time_col] - item_values[set2_running_time_col]
             item_values["relative_time_diff"] = 0.0
-            if item_values["cuda_running_time"] != 0.0:
-                item_values["relative_time_diff"] = 100 * (item_values["rocm_running_time"] - item_values["cuda_running_time"]) / item_values["cuda_running_time"]
+            if item_values[set2_running_time_col] != 0.0:
+                item_values["relative_time_diff"] = 100 * (item_values[set1_running_time_col] - item_values[set2_running_time_col]) / item_values[set2_running_time_col]
             # Add test counts
-            item_values["rocm_tests_run"] = test_file_counts_ROCm.get(key_cuda, {}).get('tests_run', 0)
-            item_values["cuda_tests_run"] = test_file_counts_CUDA.get(key_cuda, 0)
-            item_values["rocm_passed"] = test_file_counts_ROCm.get(key_cuda, {}).get('passed', 0)
-            item_values["rocm_skipped"] = test_file_counts_ROCm.get(key_cuda, {}).get('skipped', 0)
-            item_values["rocm_missed"] = test_file_counts_ROCm.get(key_cuda, {}).get('missed', 0)
+            item_values[set1_tests_run_col] = test_file_counts_ROCm.get(key_cuda, {}).get('tests_run', 0)
+            item_values[set2_tests_run_col] = test_file_counts_CUDA.get(key_cuda, 0)
+            item_values[set1_test_shards_col] = len(test_file_shards_ROCm.get(key_cuda, set()))
+            item_values[set2_test_shards_col] = len(test_file_shards_CUDA.get(key_cuda, set()))
+            item_values[set1_passed_col] = test_file_counts_ROCm.get(key_cuda, {}).get('passed', 0)
+            item_values[set1_skipped_col] = test_file_counts_ROCm.get(key_cuda, {}).get('skipped', 0)
+            item_values[set1_missed_col] = test_file_counts_ROCm.get(key_cuda, {}).get('missed', 0)
             test_file_running_time_for_csv[key_cuda] = item_values
 
     test_file_running_time_for_csv = dict(sorted(test_file_running_time_for_csv.items()))
@@ -634,24 +655,28 @@ def sorting_key_running_time(e):
           return 0
         elif e == "test_config":
           return 1
-        elif e == "rocm_running_time":
+        elif e == set1_running_time_col:
           return 2
-        elif e == "cuda_running_time":
+        elif e == set2_running_time_col:
           return 3
         elif e == "abs_time_diff":
           return 4
         elif e == "relative_time_diff":
           return 5
-        elif e == "rocm_tests_run":
+        elif e == set1_tests_run_col:
           return 6
-        elif e == "cuda_tests_run":
+        elif e == set2_tests_run_col:
           return 7
-        elif e == "rocm_passed":
+        elif e == set1_test_shards_col:
           return 8
-        elif e == "rocm_skipped":
+        elif e == set2_test_shards_col:
           return 9
-        elif e == "rocm_missed":
+        elif e == set1_passed_col:
           return 10
+        elif e == set1_skipped_col:
+          return 11
+        elif e == set1_missed_col:
+          return 12
         else:
           return 100