From 11af07fa10452d3355fdbb4bc51222f350724c88 Mon Sep 17 00:00:00 2001
From: Garay-Fernandez <pgarayfe@amd.com>
Date: Fri, 29 May 2026 15:36:41 -0700
Subject: [PATCH 1/4] [CI] Surface upstream pytorch CI job link in parity
 summary

The parity summary's FAILED TESTS and LOG-BASED FAILURES tables list the
failing test tuples but stop short of pointing the reviewer at the
upstream pytorch/pytorch CI job that actually ran the test - making it
several extra clicks to land on the stacktrace.

download_testlogs already knows the job id of every artifact and log file
it pulls. Persist it through the pipeline and surface it as a clickable
"Job ID" column at the end of both tables:

- download_testlogs: keep the trailing "_<jobid>" segment of the original
  artifact name when shortening unzipped XML dirs, and write a single
  "_wf_run_id" file at the parent rocm_xml/cuda_xml level. For per-log
  artifacts, write a companion "<filename>.job_url" file with the
  canonical html_url from the GitHub API job object.
- summarize_xml_testreports.py: read _wf_run_id once, parse "_<jobid>"
  off each test-<cfg>-N-N dir, stamp a job_url on every test case, and
  emit job_url_{set1_name}/job_url_{set2_name} columns in the per-arch
  CSV.
- detect_log_failures.py: read the per-log .job_url file and stamp
  job_url on every emitted failure/flaky row; add job_url to both CSV
  writers.
- generate_summary.py: propagate job_url_* through collect_failed_tests
  and through the flaky-as-log-failure loader, and add a "Job ID" column
  at the end of both markdown tables rendered as [<jobid>](<url>).

Every read uses .get(..., '') / os.path.isfile, so existing artifacts and
CSVs without the new fields render as empty cells and the pipeline keeps
working unchanged.

Signed-off-by: Garay-Fernandez <pgarayfe@amd.com>
---
 .../detect_log_failures.py                    | 17 ++++++
 .../download_testlogs                         | 52 ++++++++++++++-----
 .../generate_summary.py                       | 32 ++++++++++--
 .../summarize_xml_testreports.py              | 28 ++++++++++
 4 files changed, 113 insertions(+), 16 deletions(-)
diff --git a/.automation_scripts/pytorch-unit-test-scripts/detect_log_failures.py b/.automation_scripts/pytorch-unit-test-scripts/detect_log_failures.py
index 0156624c3597..00a534f0146c 100755
--- a/.automation_scripts/pytorch-unit-test-scripts/detect_log_failures.py
+++ b/.automation_scripts/pytorch-unit-test-scripts/detect_log_failures.py
@@ -292,6 +292,18 @@ def scan_logs(logs_dir):
         job_total = shard_totals.get((platform, test_config), 0)
         job_shard_str = f"{shard_num}/{job_total}" if job_total else str(shard_num)
 
+        # Companion .job_url file written by download_testlogs.write_test_log_to_file.
+        # Absent for log archives produced before that change landed; in that
+        # case job_url stays empty and the column renders as a blank cell.
+        job_url_file = os.path.join(logs_dir, fname + ".job_url")
+        job_url = ""
+        if os.path.isfile(job_url_file):
+            try:
+                with open(job_url_file) as jf:
+                    job_url = jf.read().strip()
+            except OSError:
+                pass
+
         filepath = os.path.join(logs_dir, fname)
         results, consistent_failures, flaky_tests = parse_log_file(filepath)
 
@@ -306,6 +318,7 @@ def scan_logs(logs_dir):
                 "test_name": ft["method"],
                 "job_shard": job_shard_str,
                 "test_shard": ft["test_shard"],
+                "job_url": job_url,
             })
 
         # Record every (test_file, test_shard) observed in this log file,
@@ -365,6 +378,7 @@ def scan_logs(logs_dir):
                 "category": "+".join(categories),
                 "reason": reason,
                 "exit_codes": ",".join(str(c) for c in info["exit_codes"]),
+                "job_url": job_url,
             })
 
         for test_path, shard_str in consistent_failures:
@@ -384,6 +398,7 @@ def scan_logs(logs_dir):
                 "category": "CONSISTENT_FAILURE",
                 "reason": f"{test_class}::{test_name}" if test_class else "",
                 "exit_codes": "",
+                "job_url": job_url,
             })
 
     def _sort_shards(vals):
@@ -420,6 +435,7 @@ def write_csv_report(failures, output_path):
         "log_file", "platform", "test_config", "test_file",
         "job_shard", "test_shard",
         "status", "category", "reason", "exit_codes",
+        "job_url",
     ]
     with open(output_path, "w", newline="") as f:
         writer = csv.DictWriter(f, fieldnames=fieldnames)
@@ -441,6 +457,7 @@ def write_flaky_report(flaky, output_path):
     fieldnames = [
         "log_file", "platform", "test_config", "test_file",
         "test_class", "test_name", "job_shard", "test_shard",
+        "job_url",
     ]
     with open(output_path, "w", newline="") as f:
         writer = csv.DictWriter(f, fieldnames=fieldnames)
diff --git a/.automation_scripts/pytorch-unit-test-scripts/download_testlogs b/.automation_scripts/pytorch-unit-test-scripts/download_testlogs
index ac4214f99fec..20c64d42eeb9 100755
--- a/.automation_scripts/pytorch-unit-test-scripts/download_testlogs
+++ b/.automation_scripts/pytorch-unit-test-scripts/download_testlogs
@@ -90,6 +90,18 @@ def write_test_log_to_file(filename, test_key, jobs, sha):
     with open(filename, "w", encoding="utf-8") as f:
         f.write(response.text)
 
+    # Write the pytorch/pytorch CI job URL to a companion .job_url file next
+    # to this log so detect_log_failures.py can surface it in the LOG-BASED
+    # FAILURES table of the parity summary, giving reviewers a click-through
+    # to the failing job's logs for the stacktrace.
+    job_url = js[0].get('html_url') or ''
+    if job_url:
+        try:
+            with open(filename + ".job_url", "w", encoding="utf-8") as f:
+                f.write(job_url)
+        except OSError as e:
+            print(f"  WARNING: failed to write {filename}.job_url: {e}")
+
 def get_workflow_jobs(wf):
     """Get all jobs for a workflow run."""
     if wf is None:
@@ -239,24 +251,32 @@ def _shorten_unzipped_dirs():
       unzipped-test-reports-runattempt1-test-default-1-6-linux.rocm.gpu.gfx942.1_68613413431.zip
       unzipped-test-reports-runattempt1-test-osdc-default-1-5-mt-l-x86aavx2-29-113-l4_73385044118.zip
     to:
-      test-default-1-6
-      test-default-1-5
-
-    Preserves the 'test-<config>' prefix so that summarize_xml_testreports.py
-    can still detect workflow type via substring matching.
+      test-default-1-6_68613413431
+      test-default-1-5_73385044118
+
+    Preserves the 'test-<config>' prefix so summarize_xml_testreports.py can
+    still detect workflow type via substring matching. The trailing
+    '_<jobid>' encodes the upstream pytorch/pytorch CI job id, which
+    summarize_xml_testreports.py combines with the workflow run id to build
+    a direct link to the failing job. Existing consumers tolerate the
+    suffix: `re.match(r'test-\\w+-(\\d+)-(\\d+)', dirname)` is unanchored at
+    the end, and `"test-default" in dirname` is a substring check.
     """
     from pathlib import Path
     for d in sorted(Path(".").glob("unzipped-*")):
         if not d.is_dir():
             continue
         m = re.search(r'test-(?:osdc-)?(default|distributed|inductor)-(\d+)-(\d+)', d.name)
-        if m:
-            short_name = f"test-{m.group(1)}-{m.group(2)}-{m.group(3)}"
-            if not Path(short_name).exists():
-                d.rename(short_name)
-                print(f"  Renamed {d.name} -> {short_name}")
-            else:
-                print(f"  WARNING: {short_name} already exists, keeping {d.name}")
+        if not m:
+            continue
+        job_id_match = re.search(r'_(\d{6,})\.zip$', d.name)
+        job_id_suffix = f"_{job_id_match.group(1)}" if job_id_match else ""
+        short_name = f"test-{m.group(1)}-{m.group(2)}-{m.group(3)}{job_id_suffix}"
+        if not Path(short_name).exists():
+            d.rename(short_name)
+            print(f"  Renamed {d.name} -> {short_name}")
+        else:
+            print(f"  WARNING: {short_name} already exists, keeping {d.name}")
 
 def download_xml_files(workflow_run_id, workflow_run_attempts, prefixes=[], allowed_substrings=None):
     # Get from S3 artifacts
@@ -297,6 +317,14 @@ def download_xml_files(workflow_run_id, workflow_run_attempts, prefixes=[], allo
 
     _shorten_unzipped_dirs()
 
+    # Workflow run id used by summarize_xml_testreports.py to build per-job
+    # URLs (combined with the '_<jobid>' suffix on each test-<cfg>-N-N dir).
+    try:
+        with open("_wf_run_id", "w") as f:
+            f.write(str(workflow_run_id))
+    except OSError as e:
+        print(f"  WARNING: failed to write _wf_run_id: {e}")
+
     # Delete raw zip files now that contents are extracted
     for path in artifact_paths:
         try:
diff --git a/.automation_scripts/pytorch-unit-test-scripts/generate_summary.py b/.automation_scripts/pytorch-unit-test-scripts/generate_summary.py
index 406f4b49b78c..bc187bcef1cb 100644
--- a/.automation_scripts/pytorch-unit-test-scripts/generate_summary.py
+++ b/.automation_scripts/pytorch-unit-test-scripts/generate_summary.py
@@ -3,6 +3,7 @@
 import argparse
 import csv
 import os
+import re
 import sys
 
 
@@ -289,10 +290,12 @@ def collect_failed_tests(arch_data, archs, s1_name, s2_name):
                     'test_name': r.get('test_name', ''),
                     'test_config': r.get('test_config', ''),
                     f'shard_{s1_name}': r.get(f'shard_{s1_name}', ''),
+                    f'job_url_{s1_name}': r.get(f'job_url_{s1_name}', ''),
                     f'status_{s1_name}': s1,
                 }
                 if has_set2:
                     entry[f'shard_{s2_name}'] = r.get(f'shard_{s2_name}', '')
+                    entry[f'job_url_{s2_name}'] = r.get(f'job_url_{s2_name}', '')
                     entry[f'status_{s2_name}'] = s2
                 failed.append(entry)
 
@@ -418,6 +421,7 @@ def load_flaky_tests_as_log_failures(filepaths):
                     'category': 'FLAKY',
                     'reason': f'{test_class}::{test_name}' if test_class else test_name,
                     'exit_codes': '',
+                    'job_url': row.get('job_url', ''),
                 })
     return entries
 
@@ -691,6 +695,18 @@ def _xml_test_shard(t, platform):
                _norm_test_file(t.get('test_file', '')))
         return _format_test_shards(shard_lookup.get(key, ''))
 
+    def _job_url_cell(url):
+        """Render the job-url column as a markdown link labeled with the job_id.
+
+        Empty when no URL is present, so the column gracefully degrades for
+        rows produced before the upstream pipeline starts emitting job_url.
+        """
+        if not url:
+            return ''
+        m = re.search(r'/job/(\d+)', url)
+        label = m.group(1) if m else 'job'
+        return f'[{label}]({url})'
+
     cols = ['Arch', 'Test Config', 'Test File', 'Test Class', 'Test Name',
             f'Job-Level Shard ({s1_name})',
             f'Test-Level Shard ({s1_name})']
@@ -701,6 +717,9 @@ def _xml_test_shard(t, platform):
     if has_set2:
         cols.append(f'Status ({s2_name})')
     cols.append('Also Failing In')
+    cols.append(f'Job ID ({s1_name})')
+    if has_set2:
+        cols.append(f'Job ID ({s2_name})')
 
     if s1_failed:
         lines.append(f'### FAILED TESTS ({len(s1_failed)})')
@@ -718,7 +737,11 @@ def _xml_test_shard(t, platform):
             line += f" | {t[f'status_{s1_name}']}"
             if has_set2:
                 line += f" | {t.get(f'status_{s2_name}', '')}"
-            line += f" | {t.get('also_failing_in', '')} |"
+            line += f" | {t.get('also_failing_in', '')}"
+            line += f" | {_job_url_cell(t.get(f'job_url_{s1_name}', ''))}"
+            if has_set2:
+                line += f" | {_job_url_cell(t.get(f'job_url_{s2_name}', ''))}"
+            line += ' |'
             lines.append(line)
         lines.append('')
     else:
@@ -748,8 +771,8 @@ def _xml_test_shard(t, platform):
             lines.append('These test failures were detected from CI log files but have no XML report')
             lines.append('(typically due to timeouts, crashes, or process kills).')
             lines.append('')
-            lines.append('| Arch | Platform | Test Config | Test File | Test Class | Test Name | Job-Level Shard | Test-Level Shard | Category | Also Failing In |')
-            lines.append('| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |')
+            lines.append('| Arch | Platform | Test Config | Test File | Test Class | Test Name | Job-Level Shard | Test-Level Shard | Category | Also Failing In | Job ID |')
+            lines.append('| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |')
             for lf in rocm_log_failures:
                 test_class, test_name = _parse_log_failure_names(lf)
                 lines.append(
@@ -759,7 +782,8 @@ def _xml_test_shard(t, platform):
                     f"| {lf.get('job_shard', '')} "
                     f"| {lf.get('test_shard', lf.get('shard', ''))} "
                     f"| {lf.get('category', '')} "
-                    f"| {lf.get('also_failing_in', '')} |"
+                    f"| {lf.get('also_failing_in', '')} "
+                    f"| {_job_url_cell(lf.get('job_url', ''))} |"
                 )
             lines.append('')
 
diff --git a/.automation_scripts/pytorch-unit-test-scripts/summarize_xml_testreports.py b/.automation_scripts/pytorch-unit-test-scripts/summarize_xml_testreports.py
index 72e587bbf54b..603fef48030f 100755
--- a/.automation_scripts/pytorch-unit-test-scripts/summarize_xml_testreports.py
+++ b/.automation_scripts/pytorch-unit-test-scripts/summarize_xml_testreports.py
@@ -69,6 +69,21 @@ def _extract_shard(dirname):
 def parse_xml_reports_as_dict(workflow_run_id, workflow_run_attempt, tag, path="."):
     test_config = ""
     test_cases = {}
+
+    # The '_wf_run_id' file is written by download_testlogs.download_xml_files
+    # and identifies the upstream pytorch/pytorch CI workflow run these XML
+    # reports came from. Combined with the '_<jobid>' suffix on each
+    # test-<cfg>-N-N directory, it lets us build a direct link to the
+    # failing job page so reviewers can jump straight to the stacktrace.
+    wf_run_id = ""
+    wf_id_file = os.path.join(path, "_wf_run_id")
+    if os.path.isfile(wf_id_file):
+        try:
+            with open(wf_id_file) as f:
+                wf_run_id = f.read().strip()
+        except OSError:
+            pass
+
     items_list = os.listdir(path)
     for dir in items_list:
         new_dir = path + '/' + dir + '/'
@@ -80,6 +95,11 @@ def parse_xml_reports_as_dict(workflow_run_id, workflow_run_attempt, tag, path="
             elif "test-inductor" in new_dir:
                 test_config = TestConfigName.inductor.name
             shard = _extract_shard(dir)
+            m_jid = re.search(r'_(\d{6,})$', dir)
+            job_id = m_jid.group(1) if m_jid else ""
+            job_url = ""
+            if wf_run_id and job_id:
+                job_url = f"https://github.com/pytorch/pytorch/actions/runs/{wf_run_id}/job/{job_id}"
             for xml_report in Path(new_dir).glob("**/*.xml"):
                 try:
                     new_cases = parse_xml_report(
@@ -94,6 +114,8 @@ def parse_xml_reports_as_dict(workflow_run_id, workflow_run_attempt, tag, path="
                     continue
                 for key, case in new_cases.items():
                     case["shard"] = shard
+                    if job_url:
+                        case["job_url"] = job_url
                     existing = test_cases.get(key)
                     if existing is None or _status_priority(case) > _status_priority(existing):
                         test_cases[key] = case
@@ -472,6 +494,8 @@ def summarize_xml_files(args):
         item_values["test_config"] = config_name
         item_values[f"shard_{set1_name}"] = v_values.get('shard', '') if v_values else ''
         item_values[f"shard_{set2_name}"] = v1_values.get('shard', '') if v1_values else ''
+        item_values[f"job_url_{set1_name}"] = v_values.get('job_url', '') if v_values else ''
+        item_values[f"job_url_{set2_name}"] = v1_values.get('job_url', '') if v1_values else ''
         # get test related info
         item_values[f"message_{set1_name}"] = get_test_message(v[0])
         item_values[f"message_{set2_name}"] = get_test_message(v[1]) if set2_path else ""
@@ -564,6 +588,10 @@ def sorting_key(e):
           return 21
         elif e == f"shard_{set2_name}":
           return 22
+        elif e == f"job_url_{set1_name}":
+          return 23
+        elif e == f"job_url_{set2_name}":
+          return 24
         elif e == "workflow_run_attempt" or e == "job_id":
           return 1000
         else:

From 842bb2995390e4be256dbfd15dfa0e65b7da4b1f Mon Sep 17 00:00:00 2001
From: Garay-Fernandez <pgarayfe@amd.com>
Date: Fri, 29 May 2026 15:58:22 -0700
Subject: [PATCH 2/4] [CI] Tighten parity summary job-url plumbing

Drop comment bloat and a try/except layer that didn't match the
surrounding style:

- _shorten_unzipped_dirs: keep the original `if m:` structure and add the
  job id suffix inside it, instead of restructuring with `if not m:
  continue` + extra locals.
- write_test_log_to_file / download_xml_files / scan_logs /
  parse_xml_reports_as_dict: drop the try/except around the small
  per-file reads and writes; other file IO in these functions doesn't
  guard either.
- parse_xml_reports_as_dict: always set case["job_url"] (empty string
  when absent), mirroring how case["shard"] is set unconditionally one
  line above.
- generate_summary.py: rename _job_url_cell -> _job_id_link to reflect
  what the markdown cell actually shows; drop its docstring.

No behavior change.

Signed-off-by: Garay-Fernandez <pgarayfe@amd.com>
---
 .../detect_log_failures.py                    | 11 ++--
 .../download_testlogs                         | 53 +++++++------------
 .../generate_summary.py                       | 16 ++----
 .../summarize_xml_testreports.py              | 26 ++++-----
 4 files changed, 36 insertions(+), 70 deletions(-)

diff --git a/.automation_scripts/pytorch-unit-test-scripts/detect_log_failures.py b/.automation_scripts/pytorch-unit-test-scripts/detect_log_failures.py
index 00a534f0146c..e980802ee5d2 100755
--- a/.automation_scripts/pytorch-unit-test-scripts/detect_log_failures.py
+++ b/.automation_scripts/pytorch-unit-test-scripts/detect_log_failures.py
@@ -292,17 +292,12 @@ def scan_logs(logs_dir):
         job_total = shard_totals.get((platform, test_config), 0)
         job_shard_str = f"{shard_num}/{job_total}" if job_total else str(shard_num)
 
-        # Companion .job_url file written by download_testlogs.write_test_log_to_file.
-        # Absent for log archives produced before that change landed; in that
-        # case job_url stays empty and the column renders as a blank cell.
+        # Written by download_testlogs next to each log file.
         job_url_file = os.path.join(logs_dir, fname + ".job_url")
         job_url = ""
         if os.path.isfile(job_url_file):
-            try:
-                with open(job_url_file) as jf:
-                    job_url = jf.read().strip()
-            except OSError:
-                pass
+            with open(job_url_file) as f:
+                job_url = f.read().strip()
 
         filepath = os.path.join(logs_dir, fname)
         results, consistent_failures, flaky_tests = parse_log_file(filepath)
diff --git a/.automation_scripts/pytorch-unit-test-scripts/download_testlogs b/.automation_scripts/pytorch-unit-test-scripts/download_testlogs
index 20c64d42eeb9..f49f1dac814b 100755
--- a/.automation_scripts/pytorch-unit-test-scripts/download_testlogs
+++ b/.automation_scripts/pytorch-unit-test-scripts/download_testlogs
@@ -90,17 +90,10 @@ def write_test_log_to_file(filename, test_key, jobs, sha):
     with open(filename, "w", encoding="utf-8") as f:
         f.write(response.text)
 
-    # Write the pytorch/pytorch CI job URL to a companion .job_url file next
-    # to this log so detect_log_failures.py can surface it in the LOG-BASED
-    # FAILURES table of the parity summary, giving reviewers a click-through
-    # to the failing job's logs for the stacktrace.
-    job_url = js[0].get('html_url') or ''
+    job_url = js[0].get('html_url', '')
     if job_url:
-        try:
-            with open(filename + ".job_url", "w", encoding="utf-8") as f:
-                f.write(job_url)
-        except OSError as e:
-            print(f"  WARNING: failed to write {filename}.job_url: {e}")
+        with open(filename + ".job_url", "w", encoding="utf-8") as f:
+            f.write(job_url)
 
 def get_workflow_jobs(wf):
     """Get all jobs for a workflow run."""
@@ -254,29 +247,26 @@ def _shorten_unzipped_dirs():
       test-default-1-6_68613413431
       test-default-1-5_73385044118
 
-    Preserves the 'test-<config>' prefix so summarize_xml_testreports.py can
-    still detect workflow type via substring matching. The trailing
-    '_<jobid>' encodes the upstream pytorch/pytorch CI job id, which
-    summarize_xml_testreports.py combines with the workflow run id to build
-    a direct link to the failing job. Existing consumers tolerate the
-    suffix: `re.match(r'test-\\w+-(\\d+)-(\\d+)', dirname)` is unanchored at
-    the end, and `"test-default" in dirname` is a substring check.
+    Preserves the 'test-<config>' prefix so that summarize_xml_testreports.py
+    can still detect workflow type via substring matching. The trailing
+    '_<jobid>' is the upstream pytorch CI job id, used to link to the
+    failing job from the parity summary.
     """
     from pathlib import Path
     for d in sorted(Path(".").glob("unzipped-*")):
         if not d.is_dir():
             continue
         m = re.search(r'test-(?:osdc-)?(default|distributed|inductor)-(\d+)-(\d+)', d.name)
-        if not m:
-            continue
-        job_id_match = re.search(r'_(\d{6,})\.zip$', d.name)
-        job_id_suffix = f"_{job_id_match.group(1)}" if job_id_match else ""
-        short_name = f"test-{m.group(1)}-{m.group(2)}-{m.group(3)}{job_id_suffix}"
-        if not Path(short_name).exists():
-            d.rename(short_name)
-            print(f"  Renamed {d.name} -> {short_name}")
-        else:
-            print(f"  WARNING: {short_name} already exists, keeping {d.name}")
+        if m:
+            short_name = f"test-{m.group(1)}-{m.group(2)}-{m.group(3)}"
+            jid = re.search(r'_(\d{6,})\.zip$', d.name)
+            if jid:
+                short_name += f"_{jid.group(1)}"
+            if not Path(short_name).exists():
+                d.rename(short_name)
+                print(f"  Renamed {d.name} -> {short_name}")
+            else:
+                print(f"  WARNING: {short_name} already exists, keeping {d.name}")
 
 def download_xml_files(workflow_run_id, workflow_run_attempts, prefixes=[], allowed_substrings=None):
     # Get from S3 artifacts
@@ -317,13 +307,8 @@ def download_xml_files(workflow_run_id, workflow_run_attempts, prefixes=[], allo
 
     _shorten_unzipped_dirs()
 
-    # Workflow run id used by summarize_xml_testreports.py to build per-job
-    # URLs (combined with the '_<jobid>' suffix on each test-<cfg>-N-N dir).
-    try:
-        with open("_wf_run_id", "w") as f:
-            f.write(str(workflow_run_id))
-    except OSError as e:
-        print(f"  WARNING: failed to write _wf_run_id: {e}")
+    with open("_wf_run_id", "w") as f:
+        f.write(str(workflow_run_id))
 
     # Delete raw zip files now that contents are extracted
     for path in artifact_paths:
diff --git a/.automation_scripts/pytorch-unit-test-scripts/generate_summary.py b/.automation_scripts/pytorch-unit-test-scripts/generate_summary.py
index bc187bcef1cb..4256f6352998 100644
--- a/.automation_scripts/pytorch-unit-test-scripts/generate_summary.py
+++ b/.automation_scripts/pytorch-unit-test-scripts/generate_summary.py
@@ -695,17 +695,11 @@ def _xml_test_shard(t, platform):
                _norm_test_file(t.get('test_file', '')))
         return _format_test_shards(shard_lookup.get(key, ''))
 
-    def _job_url_cell(url):
-        """Render the job-url column as a markdown link labeled with the job_id.
-
-        Empty when no URL is present, so the column gracefully degrades for
-        rows produced before the upstream pipeline starts emitting job_url.
-        """
+    def _job_id_link(url):
         if not url:
             return ''
         m = re.search(r'/job/(\d+)', url)
-        label = m.group(1) if m else 'job'
-        return f'[{label}]({url})'
+        return f'[{m.group(1)}]({url})' if m else f'[job]({url})'
 
     cols = ['Arch', 'Test Config', 'Test File', 'Test Class', 'Test Name',
             f'Job-Level Shard ({s1_name})',
@@ -738,9 +732,9 @@ def _job_url_cell(url):
             if has_set2:
                 line += f" | {t.get(f'status_{s2_name}', '')}"
             line += f" | {t.get('also_failing_in', '')}"
-            line += f" | {_job_url_cell(t.get(f'job_url_{s1_name}', ''))}"
+            line += f" | {_job_id_link(t.get(f'job_url_{s1_name}', ''))}"
             if has_set2:
-                line += f" | {_job_url_cell(t.get(f'job_url_{s2_name}', ''))}"
+                line += f" | {_job_id_link(t.get(f'job_url_{s2_name}', ''))}"
             line += ' |'
             lines.append(line)
         lines.append('')
@@ -783,7 +777,7 @@ def _job_url_cell(url):
                     f"| {lf.get('test_shard', lf.get('shard', ''))} "
                     f"| {lf.get('category', '')} "
                     f"| {lf.get('also_failing_in', '')} "
-                    f"| {_job_url_cell(lf.get('job_url', ''))} |"
+                    f"| {_job_id_link(lf.get('job_url', ''))} |"
                 )
             lines.append('')
 
diff --git a/.automation_scripts/pytorch-unit-test-scripts/summarize_xml_testreports.py b/.automation_scripts/pytorch-unit-test-scripts/summarize_xml_testreports.py
index 603fef48030f..0991130b4f21 100755
--- a/.automation_scripts/pytorch-unit-test-scripts/summarize_xml_testreports.py
+++ b/.automation_scripts/pytorch-unit-test-scripts/summarize_xml_testreports.py
@@ -70,19 +70,12 @@ def parse_xml_reports_as_dict(workflow_run_id, workflow_run_attempt, tag, path="
     test_config = ""
     test_cases = {}
 
-    # The '_wf_run_id' file is written by download_testlogs.download_xml_files
-    # and identifies the upstream pytorch/pytorch CI workflow run these XML
-    # reports came from. Combined with the '_<jobid>' suffix on each
-    # test-<cfg>-N-N directory, it lets us build a direct link to the
-    # failing job page so reviewers can jump straight to the stacktrace.
+    # Written by download_testlogs alongside the test-<cfg>-N-N dirs.
     wf_run_id = ""
     wf_id_file = os.path.join(path, "_wf_run_id")
     if os.path.isfile(wf_id_file):
-        try:
-            with open(wf_id_file) as f:
-                wf_run_id = f.read().strip()
-        except OSError:
-            pass
+        with open(wf_id_file) as f:
+            wf_run_id = f.read().strip()
 
     items_list = os.listdir(path)
     for dir in items_list:
@@ -95,11 +88,11 @@ def parse_xml_reports_as_dict(workflow_run_id, workflow_run_attempt, tag, path="
             elif "test-inductor" in new_dir:
                 test_config = TestConfigName.inductor.name
             shard = _extract_shard(dir)
-            m_jid = re.search(r'_(\d{6,})$', dir)
-            job_id = m_jid.group(1) if m_jid else ""
-            job_url = ""
-            if wf_run_id and job_id:
-                job_url = f"https://github.com/pytorch/pytorch/actions/runs/{wf_run_id}/job/{job_id}"
+            jid = re.search(r'_(\d+)$', dir)
+            job_url = (
+                f"https://github.com/pytorch/pytorch/actions/runs/{wf_run_id}/job/{jid.group(1)}"
+                if wf_run_id and jid else ""
+            )
             for xml_report in Path(new_dir).glob("**/*.xml"):
                 try:
                     new_cases = parse_xml_report(
@@ -114,8 +107,7 @@ def parse_xml_reports_as_dict(workflow_run_id, workflow_run_attempt, tag, path="
                     continue
                 for key, case in new_cases.items():
                     case["shard"] = shard
-                    if job_url:
-                        case["job_url"] = job_url
+                    case["job_url"] = job_url
                     existing = test_cases.get(key)
                     if existing is None or _status_priority(case) > _status_priority(existing):
                         test_cases[key] = case

From cb9a83d5226e30b77a3cae86ad61db5ad4783c24 Mon Sep 17 00:00:00 2001
From: Garay-Fernandez <pgarayfe@amd.com>
Date: Fri, 29 May 2026 16:12:40 -0700
Subject: [PATCH 3/4] [CI] Document the per-log .job_url and _wf_run_id files

Explain why these reads/writes exist: how download_testlogs hands the
upstream pytorch CI job id to summarize_xml_testreports.py (via the
"_<jobid>" suffix on each shard dir + a "_wf_run_id" file at the parent),
and the job page URL to detect_log_failures.py (via a "<log>.job_url"
file next to each log). Also rename the local "jid" match to
"job_id_match" so the code reads on first pass without context.

No behavior change.

Signed-off-by: Garay-Fernandez <pgarayfe@amd.com>
---
 .../detect_log_failures.py                          |  5 ++++-
 .../pytorch-unit-test-scripts/download_testlogs     | 13 ++++++++++---
 .../summarize_xml_testreports.py                    |  6 +++++-
 3 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/.automation_scripts/pytorch-unit-test-scripts/detect_log_failures.py b/.automation_scripts/pytorch-unit-test-scripts/detect_log_failures.py
index e980802ee5d2..bc1e4c0693b5 100755
--- a/.automation_scripts/pytorch-unit-test-scripts/detect_log_failures.py
+++ b/.automation_scripts/pytorch-unit-test-scripts/detect_log_failures.py
@@ -292,7 +292,10 @@ def scan_logs(logs_dir):
         job_total = shard_totals.get((platform, test_config), 0)
         job_shard_str = f"{shard_num}/{job_total}" if job_total else str(shard_num)
 
-        # Written by download_testlogs next to each log file.
+        # If download_testlogs left a "<log>.job_url" file next to this log,
+        # it contains the URL of the upstream pytorch CI job that produced
+        # the log. We surface it in the LOG-BASED FAILURES table as a link
+        # to that job's page. Empty for older runs that predate this.
         job_url_file = os.path.join(logs_dir, fname + ".job_url")
         job_url = ""
         if os.path.isfile(job_url_file):
diff --git a/.automation_scripts/pytorch-unit-test-scripts/download_testlogs b/.automation_scripts/pytorch-unit-test-scripts/download_testlogs
index f49f1dac814b..cd75c602d8e9 100755
--- a/.automation_scripts/pytorch-unit-test-scripts/download_testlogs
+++ b/.automation_scripts/pytorch-unit-test-scripts/download_testlogs
@@ -90,6 +90,9 @@ def write_test_log_to_file(filename, test_key, jobs, sha):
     with open(filename, "w", encoding="utf-8") as f:
         f.write(response.text)
 
+    # Save the upstream pytorch CI job's page URL next to the log so
+    # detect_log_failures.py can later surface it as a link in the
+    # LOG-BASED FAILURES table of the parity summary.
     job_url = js[0].get('html_url', '')
     if job_url:
         with open(filename + ".job_url", "w", encoding="utf-8") as f:
@@ -259,9 +262,13 @@ def _shorten_unzipped_dirs():
         m = re.search(r'test-(?:osdc-)?(default|distributed|inductor)-(\d+)-(\d+)', d.name)
         if m:
             short_name = f"test-{m.group(1)}-{m.group(2)}-{m.group(3)}"
-            jid = re.search(r'_(\d{6,})\.zip$', d.name)
-            if jid:
-                short_name += f"_{jid.group(1)}"
+            # The original artifact name ends with "_<jobid>.zip" where
+            # <jobid> is the upstream pytorch CI job id (e.g.
+            # ..._68613413431.zip). Carry it onto short_name so
+            # summarize_xml_testreports.py can link to that job.
+            job_id_match = re.search(r'_(\d{6,})\.zip$', d.name)
+            if job_id_match:
+                short_name += f"_{job_id_match.group(1)}"
             if not Path(short_name).exists():
                 d.rename(short_name)
                 print(f"  Renamed {d.name} -> {short_name}")
diff --git a/.automation_scripts/pytorch-unit-test-scripts/summarize_xml_testreports.py b/.automation_scripts/pytorch-unit-test-scripts/summarize_xml_testreports.py
index 0991130b4f21..97c4424d89b0 100755
--- a/.automation_scripts/pytorch-unit-test-scripts/summarize_xml_testreports.py
+++ b/.automation_scripts/pytorch-unit-test-scripts/summarize_xml_testreports.py
@@ -70,7 +70,11 @@ def parse_xml_reports_as_dict(workflow_run_id, workflow_run_attempt, tag, path="
     test_config = ""
     test_cases = {}
 
-    # Written by download_testlogs alongside the test-<cfg>-N-N dirs.
+    # download_testlogs writes the upstream pytorch CI workflow run id
+    # into "_wf_run_id" alongside the shard dirs. We combine it with each
+    # shard dir's trailing "_<job_id>" to form the URL
+    # https://github.com/pytorch/pytorch/actions/runs/<wf>/job/<job_id>
+    # surfaced as the "Job ID" column in the FAILED TESTS table.
     wf_run_id = ""
     wf_id_file = os.path.join(path, "_wf_run_id")
     if os.path.isfile(wf_id_file):

From d0120b1e943d8ff4f834c80084b1bc2a35c26bff Mon Sep 17 00:00:00 2001
From: Garay-Fernandez <pgarayfe@amd.com>
Date: Fri, 29 May 2026 16:27:01 -0700
Subject: [PATCH 4/4] [CI] Clarify _job_id_link helper

Split the trailing ternary that was deciding both the label and the
whole f-string into a "get label, then build link" flow. Drop the
[job](url) fallback: both URL writers (summarize_xml_testreports.py and
write_test_log_to_file) produce URLs containing "/job/<digits>", so the
fallback never fired in practice and a cell labeled "job" wouldn't tell
a reviewer anything. If the URL is malformed, render an empty cell -
same as when no URL is available.

No behavior change for the URLs we actually emit.

Signed-off-by: Garay-Fernandez <pgarayfe@amd.com>
---
 .../pytorch-unit-test-scripts/generate_summary.py           | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.automation_scripts/pytorch-unit-test-scripts/generate_summary.py b/.automation_scripts/pytorch-unit-test-scripts/generate_summary.py
index 4256f6352998..c17322d6e8db 100644
--- a/.automation_scripts/pytorch-unit-test-scripts/generate_summary.py
+++ b/.automation_scripts/pytorch-unit-test-scripts/generate_summary.py
@@ -698,8 +698,12 @@ def _xml_test_shard(t, platform):
     def _job_id_link(url):
         if not url:
             return ''
+        # Use the job id (digits after "/job/" in the URL) as the visible
+        # link label so the cell reads e.g. [76905282313](...).
         m = re.search(r'/job/(\d+)', url)
-        return f'[{m.group(1)}]({url})' if m else f'[job]({url})'
+        if not m:
+            return ''
+        return f'[{m.group(1)}]({url})'
 
     cols = ['Arch', 'Test Config', 'Test File', 'Test Class', 'Test Name',
             f'Job-Level Shard ({s1_name})',