11name : MSDO Toolchain Version Probe
22
3- # Runs MSDO to install tools as a side effect, then scrapes the install
4- # directories to record exact resolved versions into toolchain-versions.json.
5- # The breach monitor reads this file instead of guessing "latest" from registries.
3+ # Resolves the exact tool versions pinned by MSDO's .gdntool configs and writes
4+ # them to .github/toolchain-versions.json before the breach monitor runs.
5+ #
6+ # Design: uses 'guardian init' only (via existingFilename to skip full scan).
7+ # guardian init downloads Microsoft.Security.DevOps.Tools.Configuration — a tiny
8+ # NuGet package containing the .gdntool XML files that define pinned versions.
9+ # No tool binaries are downloaded or executed. Runs in ~15 seconds.
10+ #
11+ # Cache: keyed by OS + week. Cold start once per week; warm runs re-use the
12+ # cached CLI + Tools.Configuration and just call 'guardian init --force' directly.
613
714on :
815 schedule :
9- - cron : ' 0 4 * * 1 ' # Weekly Monday 04 :00 UTC
16+ - cron : ' 0 11 * * * ' # Daily 11 :00 UTC
1017 workflow_dispatch :
1118
1219permissions :
1320 contents : write
21+ actions : write # needed to dispatch the breach monitor after committing versions
1422
1523jobs :
1624 probe :
@@ -20,31 +28,79 @@ jobs:
2028 steps :
2129 - uses : actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
2230
23- # Run MSDO so it downloads and installs all tool binaries into .gdn/i/.
24- # Scan may find nothing (no real targets) — that is fine. We only care
25- # about the side effect: tool packages installed in .gdn/i/{type}/.
26- - name : Install MSDO tools
27- id : msdo
31+ - name : Compute weekly cache key
32+ id : week
33+ run : echo "key=$(date +%Y-%W)" >> "$GITHUB_OUTPUT"
34+
35+ # Cache the MSDO CLI + Tools.Configuration (~10 MB, contains .gdntool files).
36+ # Keyed by week: busts every Monday so version pins stay fresh.
37+ - name : Restore MSDO CLI cache
38+ id : cache
39+ uses : actions/cache@1bd1e32a3bdc45362d1e726936510720a7c6158d # v4.2.2
40+ with :
41+ path : /home/runner/work/_msdo/versions
42+ key : msdo-cli-linux-x64-${{ steps.week.outputs.key }}
43+
44+ # Cache miss path: use the MSDO action with a dummy SARIF to trigger
45+ # 'guardian init' (which downloads the CLI + Tools.Configuration) without
46+ # running any scan tools. 'guardian upload' will fail gracefully — that's fine.
47+ - name : Create dummy SARIF (skip-scan sentinel)
48+ if : steps.cache.outputs.cache-hit != 'true'
49+ run : |
50+ echo '{"version":"2.1.0","runs":[]}' > /tmp/dummy.sarif
51+
52+ - name : Install MSDO CLI via guardian init (cache miss)
53+ if : steps.cache.outputs.cache-hit != 'true'
2854 uses : microsoft/security-devops-action@main
29- continue-on-error : true
55+ continue-on-error : true # guardian upload will fail — that's expected
3056 with :
31- tools : bandit,binskim,checkov,eslint,templateanalyzer,terrascan,trivy
57+ existingFilename : /tmp/dummy.sarif
3258
33- - name : Collect resolved tool versions from install dirs
59+ # Cache hit path: guardian binary already exists. Re-run 'guardian init'
60+ # to refresh the workspace .gdn config pointing at the cached CLI.
61+ - name : Run guardian init (cache hit)
62+ if : steps.cache.outputs.cache-hit == 'true'
63+ run : |
64+ guardian=$(find /home/runner/work/_msdo/versions -maxdepth 4 -name 'guardian' -type f 2>/dev/null | sort -V | tail -1)
65+ if [[ -z "$guardian" ]]; then
66+ echo "::error::guardian binary not found in cache — cache may be corrupt"
67+ exit 1
68+ fi
69+ echo "Guardian binary: $guardian"
70+ "$guardian" init --force
71+
72+ # Parse pinned versions from .gdntool XML files in the Tools.Configuration package.
73+ # These files define EXACTLY which NuGet/npm package version guardian will download
74+ # for each tool — no tool binaries are needed to read them.
75+ - name : Parse tool versions from .gdntool configs
3476 id : collect
3577 run : |
3678 python3 - <<'PYEOF'
37- import os, json, re, pathlib, datetime
79+ import os, json, re, pathlib, datetime, sys
80+ import xml.etree.ElementTree as ET
81+
82+ versions_base = pathlib.Path('/home/runner/work/_msdo/versions')
83+
84+ # Tools.Configuration is installed inside the CLI package directory:
85+ # _msdo/versions/Microsoft.Security.Devops.Cli.linux-x64.{ver}/tools/Config/Tools/
86+ def cli_version(p):
87+ # Extract semver tuple from path e.g. .../Cli.linux-x64.0.215.0/tools/Config/Tools
88+ m = re.search(r'\.(\d+)\.(\d+)\.(\d+)[/\\]', str(p))
89+ return tuple(int(x) for x in m.groups()) if m else (0, 0, 0)
3890
39- # MSDO installs packages into $RUNNER_TEMP/../_msdo/packages/nuget/{PackageName}.{version}/
40- # and npm tools into $RUNNER_TEMP/../_msdo/packages/node_modules/{tool}/package.json
41- runner_temp = pathlib.Path(os.environ.get('RUNNER_TEMP', '/tmp'))
42- MSDO_PACKAGES = runner_temp.parent / '_msdo' / 'packages'
91+ config_dirs = sorted(versions_base.glob('*/tools/Config/Tools'), key=cli_version)
92+ if not config_dirs:
93+ print('ERROR: Config/Tools not found — guardian init may not have run', file=sys.stderr)
94+ gh_out = os.environ.get('GITHUB_OUTPUT', '')
95+ if gh_out:
96+ open(gh_out, 'a').write('skip_commit=true\n')
97+ sys.exit(0)
4398
44- VER_PAT = re.compile(r'^(.+?)\.(v?\d+\.\d+(?:\.\d+)*(?:[-+][0-9A-Za-z.-]+)?)$', re.IGNORECASE)
99+ config_tools = config_dirs[-1]
100+ gdntool_files = sorted(config_tools.glob('**/*.gdntool'))
101+ print(f'Found {len(gdntool_files)} .gdntool files in {config_tools}')
45102
46- # Actual Guardian NuGet package names → canonical tool names.
47- # Pattern: Microsoft.Guardian.{Tool}Redist_{platform}.{version}
103+ # Map Guardian NuGet package names (lowercase) → canonical tool names
48104 PKG_TO_TOOL = {
49105 'microsoft.guardian.banditredist_linux_amd64': 'bandit',
50106 'microsoft.guardian.banditredist_win_amd64': 'bandit',
@@ -60,54 +116,101 @@ jobs:
60116 }
61117
62118 tools = {}
63- raw_dirs = {}
64-
65- # NuGet packages (all tools except eslint)
66- nuget_dir = MSDO_PACKAGES / 'nuget'
67- if nuget_dir.exists():
68- entries = sorted(d.name for d in nuget_dir.iterdir() if d.is_dir())
69- raw_dirs['nuget'] = entries
70- for name in entries:
71- m = VER_PAT.match(name)
72- if not m:
119+ raw_gdntools = {}
120+ VER_RE = re.compile(r'\d+\.\d+(?:\.\d+)*(?:[-+][0-9A-Za-z.-]+)?')
121+
122+ for f in gdntool_files:
123+ content = f.read_text(encoding='utf-8', errors='replace')
124+ raw_gdntools[f.name] = content
125+
126+ # --- Strategy 1: standard XML attribute scan ---
127+ # Look for elements with Name/PackageName + Version attributes
128+ try:
129+ root = ET.fromstring(content)
130+ for elem in root.iter():
131+ for name_key in ('Name', 'PackageName', 'package', 'id'):
132+ pkg = (elem.get(name_key) or '').strip().lower()
133+ if not pkg:
134+ continue
135+ canonical = PKG_TO_TOOL.get(pkg)
136+ if not canonical:
137+ continue
138+ for ver_key in ('Version', 'version', 'PackageVersion'):
139+ ver = (elem.get(ver_key) or '').strip()
140+ if ver and VER_RE.match(ver):
141+ tools[canonical] = ver
142+ break
143+ except ET.ParseError:
144+ pass
145+
146+ # --- Strategy 2: child element text scan ---
147+ # <PackageName>Microsoft.Guardian.TrivyRedist_linux_amd64</PackageName>
148+ # <Version>0.69.3</Version>
149+ try:
150+ root = ET.fromstring(content)
151+ for elem in root.iter():
152+ children = {c.tag: (c.text or '').strip() for c in elem}
153+ pkg = children.get('PackageName', children.get('Name', children.get('Id', ''))).lower()
154+ ver = children.get('Version', children.get('PackageVersion', ''))
155+ if pkg and ver:
156+ canonical = PKG_TO_TOOL.get(pkg)
157+ if canonical and VER_RE.match(ver):
158+ tools[canonical] = ver
159+ except ET.ParseError:
160+ pass
161+
162+ # --- Strategy 3: regex fallback on raw XML text (per-tool) ---
163+ # Runs for each tool not yet resolved, regardless of other tools.
164+ # Handles malformed XML or unexpected schemas.
165+ for pkg_lower, canonical in PKG_TO_TOOL.items():
166+ if canonical in tools:
73167 continue
74- pkg_lower = m.group(1).lower()
75- version = m.group(2)
76- canonical = PKG_TO_TOOL.get(pkg_lower)
77- if canonical is None:
168+ if pkg_lower in content.lower():
169+ m = re.search(
170+ re.escape(pkg_lower) + r'[^"\'<>]*["\'>][\s\S]{0,200}?' +
171+ r'(\d+\.\d+(?:\.\d+)*)',
172+ content.lower()
173+ )
174+ if m:
175+ tools[canonical] = m.group(1)
176+
177+ # eslint: installed via npm — version is in the npm package spec inside
178+ # the .gdntool for eslint. Try to find it from the raw XML dump.
179+ if 'eslint' not in tools:
180+ for fname, content in raw_gdntools.items():
181+ if 'eslint' not in fname.lower() and 'eslint' not in content.lower():
78182 continue
79- tools[canonical] = version
183+ m = re.search(r'eslint[@=](\d+\.\d+(?:\.\d+)*)', content, re.IGNORECASE)
184+ if m:
185+ tools['eslint'] = m.group(1)
186+ break
80187
81- # eslint is installed via npm into node_modules/eslint/package.json
82- eslint_pkg = MSDO_PACKAGES / 'node_modules' / 'eslint' / 'package.json'
83- if eslint_pkg.exists():
84- eslint_version = json.loads(eslint_pkg.read_text()).get('version')
85- if eslint_version:
86- tools['eslint'] = eslint_version
188+ # Dump raw .gdntool content so we can inspect the schema on first run
189+ print('\n=== RAW .gdntool FILES (schema discovery) ===')
190+ for fname, content in raw_gdntools.items():
191+ print(f'\n--- {fname} ---')
192+ print(content[:2000]) # first 2KB per file
87193
88- output = {
89- 'generated_at': datetime.datetime.now(datetime.timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ'),
90- 'msdo_cli_version': os.environ.get('MSDO_INSTALLEDVERSION', 'unknown'),
91- 'tools': tools,
92- 'raw_dirs': raw_dirs,
93- }
194+ print(f'\n=== RESOLVED VERSIONS ===')
195+ print(json.dumps(tools, indent=2))
94196
95- expected = set(PKG_TO_TOOL.values())
96- missing = expected - set(tools.keys())
97197 if not tools:
98- print('WARNING : no tool versions resolved — MSDO packages dir empty or MSDO failed. Skipping commit to preserve last known-good state. ')
198+ print('\nWARNING : no versions resolved from .gdntool files — check raw output above ')
99199 gh_out = os.environ.get('GITHUB_OUTPUT', '')
100200 if gh_out:
101- with open(gh_out, 'a') as f:
102- f.write('skip_commit=true\n')
103- import sys; sys.exit(0)
104- if missing:
105- print(f'WARNING: expected tools not found in install dirs: {sorted(missing)}')
201+ open(gh_out, 'a').write('skip_commit=true\n')
202+ sys.exit(0)
203+
204+ output = {
205+ 'generated_at': datetime.datetime.now(datetime.timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ'),
206+ 'msdo_cli_version': os.environ.get('MSDO_INSTALLEDVERSION', 'unknown'),
207+ 'tools': tools,
208+ 'raw_gdntools': list(raw_gdntools.keys()),
209+ }
106210
107211 out = pathlib.Path('.github/toolchain-versions.json')
108212 out.parent.mkdir(parents=True, exist_ok=True)
109213 out.write_text(json.dumps(output, indent=2) + '\n')
110- print(json.dumps(output, indent=2))
111214 PYEOF
112215
113216 - name : Commit updated versions
@@ -122,3 +225,14 @@ jobs:
122225 git commit -m "chore(ci): update toolchain-versions.json [skip ci]"
123226 git push
124227 fi
228+
229+ # Trigger the breach monitor only after versions are committed.
230+ # This guarantees the monitor always reads fresh versions — no schedule
231+ # race condition between the two workflows.
232+ - name : Trigger breach monitor
233+ if : steps.collect.outputs.skip_commit != 'true'
234+ env :
235+ GH_TOKEN : ${{ github.token }}
236+ run : |
237+ gh workflow run msdo-breach-monitor.lock.yml --ref main
238+ echo "Breach monitor dispatched — will read freshly committed toolchain-versions.json"
0 commit comments