ROCm · pablo-garay · May 29, 2026 · May 29, 2026 · May 29, 2026 · May 29, 2026
diff --git a/.automation_scripts/pytorch-unit-test-scripts/detect_log_failures.py b/.automation_scripts/pytorch-unit-test-scripts/detect_log_failures.py
@@ -292,6 +292,16 @@ def scan_logs(logs_dir):
         job_total = shard_totals.get((platform, test_config), 0)
         job_shard_str = f"{shard_num}/{job_total}" if job_total else str(shard_num)
 
+        # If download_testlogs left a "<log>.job_url" file next to this log,
+        # it contains the URL of the upstream pytorch CI job that produced
+        # the log. We surface it in the LOG-BASED FAILURES table as a link
+        # to that job's page. Empty for older runs that predate this.
+        job_url_file = os.path.join(logs_dir, fname + ".job_url")
+        job_url = ""
+        if os.path.isfile(job_url_file):
+            with open(job_url_file) as f:
+                job_url = f.read().strip()
+
         filepath = os.path.join(logs_dir, fname)
         results, consistent_failures, flaky_tests = parse_log_file(filepath)
 
@@ -306,6 +316,7 @@ def scan_logs(logs_dir):
                 "test_name": ft["method"],
                 "job_shard": job_shard_str,
                 "test_shard": ft["test_shard"],
+                "job_url": job_url,
             })
 
         # Record every (test_file, test_shard) observed in this log file,
@@ -365,6 +376,7 @@ def scan_logs(logs_dir):
                 "category": "+".join(categories),
                 "reason": reason,
                 "exit_codes": ",".join(str(c) for c in info["exit_codes"]),
+                "job_url": job_url,
             })
 
         for test_path, shard_str in consistent_failures:
@@ -384,6 +396,7 @@ def scan_logs(logs_dir):
                 "category": "CONSISTENT_FAILURE",
                 "reason": f"{test_class}::{test_name}" if test_class else "",
                 "exit_codes": "",
+                "job_url": job_url,
             })
 
     def _sort_shards(vals):
@@ -420,6 +433,7 @@ def write_csv_report(failures, output_path):
         "log_file", "platform", "test_config", "test_file",
         "job_shard", "test_shard",
         "status", "category", "reason", "exit_codes",
+        "job_url",
     ]
     with open(output_path, "w", newline="") as f:
         writer = csv.DictWriter(f, fieldnames=fieldnames)
@@ -441,6 +455,7 @@ def write_flaky_report(flaky, output_path):
     fieldnames = [
         "log_file", "platform", "test_config", "test_file",
         "test_class", "test_name", "job_shard", "test_shard",
+        "job_url",
     ]
     with open(output_path, "w", newline="") as f:
         writer = csv.DictWriter(f, fieldnames=fieldnames)

diff --git a/.automation_scripts/pytorch-unit-test-scripts/download_testlogs b/.automation_scripts/pytorch-unit-test-scripts/download_testlogs
@@ -90,6 +90,14 @@ def write_test_log_to_file(filename, test_key, jobs, sha):
     with open(filename, "w", encoding="utf-8") as f:
         f.write(response.text)
 
+    # Save the upstream pytorch CI job's page URL next to the log so
+    # detect_log_failures.py can later surface it as a link in the
+    # LOG-BASED FAILURES table of the parity summary.
+    job_url = js[0].get('html_url', '')
+    if job_url:
+        with open(filename + ".job_url", "w", encoding="utf-8") as f:
+            f.write(job_url)
+
 def get_workflow_jobs(wf):
     """Get all jobs for a workflow run."""
     if wf is None:
@@ -239,11 +247,13 @@ def _shorten_unzipped_dirs():
       unzipped-test-reports-runattempt1-test-default-1-6-linux.rocm.gpu.gfx942.1_68613413431.zip
       unzipped-test-reports-runattempt1-test-osdc-default-1-5-mt-l-x86aavx2-29-113-l4_73385044118.zip
     to:
-      test-default-1-6
-      test-default-1-5
+      test-default-1-6_68613413431
+      test-default-1-5_73385044118
 
     Preserves the 'test-<config>' prefix so that summarize_xml_testreports.py
-    can still detect workflow type via substring matching.
+    can still detect workflow type via substring matching. The trailing
+    '_<jobid>' is the upstream pytorch CI job id, used to link to the
+    failing job from the parity summary.
     """
     from pathlib import Path
     for d in sorted(Path(".").glob("unzipped-*")):
@@ -252,6 +262,13 @@ def _shorten_unzipped_dirs():
         m = re.search(r'test-(?:osdc-)?(default|distributed|inductor)-(\d+)-(\d+)', d.name)
         if m:
             short_name = f"test-{m.group(1)}-{m.group(2)}-{m.group(3)}"
+            # The original artifact name ends with "_<jobid>.zip" where
+            # <jobid> is the upstream pytorch CI job id (e.g.
+            # ..._68613413431.zip). Carry it onto short_name so
+            # summarize_xml_testreports.py can link to that job.
+            job_id_match = re.search(r'_(\d{6,})\.zip$', d.name)
+            if job_id_match:
+                short_name += f"_{job_id_match.group(1)}"
             if not Path(short_name).exists():
                 d.rename(short_name)
                 print(f"  Renamed {d.name} -> {short_name}")
@@ -297,6 +314,9 @@ def download_xml_files(workflow_run_id, workflow_run_attempts, prefixes=[], allo
 
     _shorten_unzipped_dirs()
 
+    with open("_wf_run_id", "w") as f:
+        f.write(str(workflow_run_id))
+
     # Delete raw zip files now that contents are extracted
     for path in artifact_paths:
         try:

diff --git a/.automation_scripts/pytorch-unit-test-scripts/generate_summary.py b/.automation_scripts/pytorch-unit-test-scripts/generate_summary.py
@@ -3,6 +3,7 @@
 import argparse
 import csv
 import os
+import re
 import sys
 
 
@@ -289,10 +290,12 @@ def collect_failed_tests(arch_data, archs, s1_name, s2_name):
                     'test_name': r.get('test_name', ''),
                     'test_config': r.get('test_config', ''),
                     f'shard_{s1_name}': r.get(f'shard_{s1_name}', ''),
+                    f'job_url_{s1_name}': r.get(f'job_url_{s1_name}', ''),
                     f'status_{s1_name}': s1,
                 }
                 if has_set2:
                     entry[f'shard_{s2_name}'] = r.get(f'shard_{s2_name}', '')
+                    entry[f'job_url_{s2_name}'] = r.get(f'job_url_{s2_name}', '')
                     entry[f'status_{s2_name}'] = s2
                 failed.append(entry)
 
@@ -418,6 +421,7 @@ def load_flaky_tests_as_log_failures(filepaths):
                     'category': 'FLAKY',
                     'reason': f'{test_class}::{test_name}' if test_class else test_name,
                     'exit_codes': '',
+                    'job_url': row.get('job_url', ''),
                 })
     return entries
 
@@ -691,6 +695,16 @@ def _xml_test_shard(t, platform):
                _norm_test_file(t.get('test_file', '')))
         return _format_test_shards(shard_lookup.get(key, ''))
 
+    def _job_id_link(url):
+        if not url:
+            return ''
+        # Use the job id (digits after "/job/" in the URL) as the visible
+        # link label so the cell reads e.g. [76905282313](...).
+        m = re.search(r'/job/(\d+)', url)
+        if not m:
+            return ''
+        return f'[{m.group(1)}]({url})'
+
     cols = ['Arch', 'Test Config', 'Test File', 'Test Class', 'Test Name',
             f'Job-Level Shard ({s1_name})',
             f'Test-Level Shard ({s1_name})']
@@ -701,6 +715,9 @@ def _xml_test_shard(t, platform):
     if has_set2:
         cols.append(f'Status ({s2_name})')
     cols.append('Also Failing In')
+    cols.append(f'Job ID ({s1_name})')
+    if has_set2:
+        cols.append(f'Job ID ({s2_name})')
 
     if s1_failed:
         lines.append(f'### FAILED TESTS ({len(s1_failed)})')
@@ -718,7 +735,11 @@ def _xml_test_shard(t, platform):
             line += f" | {t[f'status_{s1_name}']}"
             if has_set2:
                 line += f" | {t.get(f'status_{s2_name}', '')}"
-            line += f" | {t.get('also_failing_in', '')} |"
+            line += f" | {t.get('also_failing_in', '')}"
+            line += f" | {_job_id_link(t.get(f'job_url_{s1_name}', ''))}"
+            if has_set2:
+                line += f" | {_job_id_link(t.get(f'job_url_{s2_name}', ''))}"
+            line += ' |'
             lines.append(line)
         lines.append('')
     else:
@@ -748,8 +769,8 @@ def _xml_test_shard(t, platform):
             lines.append('These test failures were detected from CI log files but have no XML report')
             lines.append('(typically due to timeouts, crashes, or process kills).')
             lines.append('')
-            lines.append('| Arch | Platform | Test Config | Test File | Test Class | Test Name | Job-Level Shard | Test-Level Shard | Category | Also Failing In |')
-            lines.append('| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |')
+            lines.append('| Arch | Platform | Test Config | Test File | Test Class | Test Name | Job-Level Shard | Test-Level Shard | Category | Also Failing In | Job ID |')
+            lines.append('| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |')
             for lf in rocm_log_failures:
                 test_class, test_name = _parse_log_failure_names(lf)
                 lines.append(
@@ -759,7 +780,8 @@ def _xml_test_shard(t, platform):
                     f"| {lf.get('job_shard', '')} "
                     f"| {lf.get('test_shard', lf.get('shard', ''))} "
                     f"| {lf.get('category', '')} "
-                    f"| {lf.get('also_failing_in', '')} |"
+                    f"| {lf.get('also_failing_in', '')} "
+                    f"| {_job_id_link(lf.get('job_url', ''))} |"
                 )
             lines.append('')
 

diff --git a/.automation_scripts/pytorch-unit-test-scripts/summarize_xml_testreports.py b/.automation_scripts/pytorch-unit-test-scripts/summarize_xml_testreports.py
@@ -69,6 +69,18 @@ def _extract_shard(dirname):
 def parse_xml_reports_as_dict(workflow_run_id, workflow_run_attempt, tag, path="."):
     test_config = ""
     test_cases = {}
+
+    # download_testlogs writes the upstream pytorch CI workflow run id
+    # into "_wf_run_id" alongside the shard dirs. We combine it with each
+    # shard dir's trailing "_<job_id>" to form the URL
+    # https://github.com/pytorch/pytorch/actions/runs/<wf>/job/<job_id>
+    # surfaced as the "Job ID" column in the FAILED TESTS table.
+    wf_run_id = ""
+    wf_id_file = os.path.join(path, "_wf_run_id")
+    if os.path.isfile(wf_id_file):
+        with open(wf_id_file) as f:
+            wf_run_id = f.read().strip()
+
     items_list = os.listdir(path)
     for dir in items_list:
         new_dir = path + '/' + dir + '/'
@@ -80,6 +92,11 @@ def parse_xml_reports_as_dict(workflow_run_id, workflow_run_attempt, tag, path="
             elif "test-inductor" in new_dir:
                 test_config = TestConfigName.inductor.name
             shard = _extract_shard(dir)
+            jid = re.search(r'_(\d+)$', dir)
+            job_url = (
+                f"https://github.com/pytorch/pytorch/actions/runs/{wf_run_id}/job/{jid.group(1)}"
+                if wf_run_id and jid else ""
+            )
             for xml_report in Path(new_dir).glob("**/*.xml"):
                 try:
                     new_cases = parse_xml_report(
@@ -94,6 +111,7 @@ def parse_xml_reports_as_dict(workflow_run_id, workflow_run_attempt, tag, path="
                     continue
                 for key, case in new_cases.items():
                     case["shard"] = shard
+                    case["job_url"] = job_url
                     existing = test_cases.get(key)
                     if existing is None or _status_priority(case) > _status_priority(existing):
                         test_cases[key] = case
@@ -472,6 +490,8 @@ def summarize_xml_files(args):
         item_values["test_config"] = config_name
         item_values[f"shard_{set1_name}"] = v_values.get('shard', '') if v_values else ''
         item_values[f"shard_{set2_name}"] = v1_values.get('shard', '') if v1_values else ''
+        item_values[f"job_url_{set1_name}"] = v_values.get('job_url', '') if v_values else ''
+        item_values[f"job_url_{set2_name}"] = v1_values.get('job_url', '') if v1_values else ''
         # get test related info
         item_values[f"message_{set1_name}"] = get_test_message(v[0])
         item_values[f"message_{set2_name}"] = get_test_message(v[1]) if set2_path else ""
@@ -564,6 +584,10 @@ def sorting_key(e):
           return 21
         elif e == f"shard_{set2_name}":
           return 22
+        elif e == f"job_url_{set1_name}":
+          return 23
+        elif e == f"job_url_{set2_name}":
+          return 24
         elif e == "workflow_run_attempt" or e == "job_id":
           return 1000
         else: