From b4a1df1e7aba9ae9877249dafbf84c7c1e84e175 Mon Sep 17 00:00:00 2001 From: Radek Doulik Date: Thu, 26 Mar 2026 17:49:30 +0100 Subject: [PATCH 1/5] [NO-REVIEW] Batch WASM CoreCLR library test suites on Helix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reduce Helix queue pressure by grouping ~172 individual WASM CoreCLR library test work items into ~23 batched work items (87% reduction). Changes: - Add eng/testing/WasmBatchRunner.sh: batch runner that extracts and runs multiple test suites sequentially within a single work item, with per-suite result isolation - Add greedy bin-packing inline MSBuild task (_GroupWorkItems) that distributes test archives into balanced batches by file size - Add _AddBatchedWorkItemsForLibraryTests target gated on WasmBatchLibraryTests property (defaults true for CoreCLR+Chrome) - Sample apps excluded from batching, kept as individual work items - Can be disabled with /p:WasmBatchLibraryTests=false Expected impact: - 172 → ~23 Helix work items (87% queue pressure reduction) - ~6% machine time savings (~26 minutes) - Longest batch ~18 minutes (well-balanced bin-packing) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- eng/testing/WasmBatchRunner.sh | 89 ++++++++++ src/libraries/sendtohelix-browser.targets | 195 +++++++++++++++++++++- 2 files changed, 283 insertions(+), 1 deletion(-) create mode 100755 eng/testing/WasmBatchRunner.sh diff --git a/eng/testing/WasmBatchRunner.sh b/eng/testing/WasmBatchRunner.sh new file mode 100755 index 00000000000000..038086502f3809 --- /dev/null +++ b/eng/testing/WasmBatchRunner.sh @@ -0,0 +1,89 @@ +#!/usr/bin/env bash + +EXECUTION_DIR=$(dirname "$0") + +if [[ -z "$HELIX_WORKITEM_UPLOAD_ROOT" ]]; then + ORIGINAL_UPLOAD_ROOT="$PWD/test-results" +else + ORIGINAL_UPLOAD_ROOT="$HELIX_WORKITEM_UPLOAD_ROOT" +fi + +BATCH_DIR="$PWD" +SUITE_COUNT=0 +FAIL_COUNT=0 +SUITE_NAMES=() +SUITE_EXIT_CODES=() +SUITE_DURATIONS=() + +echo "=== WasmBatchRunner ===" +echo "BATCH_DIR=$BATCH_DIR" +echo "ORIGINAL_UPLOAD_ROOT=$ORIGINAL_UPLOAD_ROOT" + +for zipFile in "$BATCH_DIR"/*.zip; do + if [[ ! -f "$zipFile" ]]; then + echo "No .zip files found in $BATCH_DIR" + exit 1 + fi + + suiteName=$(basename "$zipFile" .zip) + suiteDir="$BATCH_DIR/$suiteName" + + echo "" + echo "========================= BEGIN $suiteName =============================" + + mkdir -p "$suiteDir" + unzip -q -o "$zipFile" -d "$suiteDir" + + export HELIX_WORKITEM_UPLOAD_ROOT="$ORIGINAL_UPLOAD_ROOT/$suiteName" + mkdir -p "$HELIX_WORKITEM_UPLOAD_ROOT" + + pushd "$suiteDir" >/dev/null + + chmod +x RunTests.sh + + startTime=$(date +%s) + ./RunTests.sh "$@" + suiteExitCode=$? + endTime=$(date +%s) + + popd >/dev/null + + duration=$((endTime - startTime)) + + SUITE_NAMES+=("$suiteName") + SUITE_EXIT_CODES+=("$suiteExitCode") + SUITE_DURATIONS+=("$duration") + SUITE_COUNT=$((SUITE_COUNT + 1)) + + if [[ $suiteExitCode -ne 0 ]]; then + FAIL_COUNT=$((FAIL_COUNT + 1)) + echo "----- FAIL $suiteName — exit code $suiteExitCode — ${duration}s -----" + else + echo "----- PASS $suiteName — ${duration}s -----" + fi + + echo "========================= END $suiteName ===============================" +done + +echo "" +echo "=== Batch Summary ===" +printf "%-60s %-6s %s\n" "Suite" "Status" "Duration" +printf "%-60s %-6s %s\n" "-----" "------" "--------" + +for i in "${!SUITE_NAMES[@]}"; do + if [[ ${SUITE_EXIT_CODES[$i]} -eq 0 ]]; then + status="PASS" + else + status="FAIL" + fi + printf "%-60s %-6s %ss\n" "${SUITE_NAMES[$i]}" "$status" "${SUITE_DURATIONS[$i]}" +done + +echo "" +echo "Total: $SUITE_COUNT | Passed: $((SUITE_COUNT - FAIL_COUNT)) | Failed: $FAIL_COUNT" + +if [[ $FAIL_COUNT -ne 0 ]]; then + exit 1 +fi + +exit 0 diff --git a/src/libraries/sendtohelix-browser.targets b/src/libraries/sendtohelix-browser.targets index dc2fc9f44c8c89..0684ff0499cc62 100644 --- a/src/libraries/sendtohelix-browser.targets +++ b/src/libraries/sendtohelix-browser.targets @@ -41,10 +41,15 @@ '$(Scenario)' == 'WasmTestOnChrome' or '$(Scenario)' == 'WasmTestOnFirefox'">true + true + false + <_WasmBatchLargeThreshold Condition="'$(_WasmBatchLargeThreshold)' == ''">52428800 + PrepareHelixCorrelationPayload_Wasm; _AddWorkItemsForLibraryTests; + _AddBatchedWorkItemsForLibraryTests; _AddWorkItemsForBuildWasmApps @@ -172,6 +177,135 @@ + + + + + + + + + + + + + +(); +foreach (var item in Items) +{ + long size = 0; + if (File.Exists(item.ItemSpec)) + { + size = new FileInfo(item.ItemSpec).Length; + } + itemsWithSize.Add((item, size)); +} + +// Sort largest first for greedy bin-packing +itemsWithSize.Sort((a, b) => b.size.CompareTo(a.size)); + +var result = new List(); +int negativeBatchId = -1; + +// Separate large items (each gets its own batch) +var smallItems = new List<(ITaskItem item, long size)>(); +foreach (var entry in itemsWithSize) +{ + if (entry.size > LargeThreshold) + { + var newItem = new TaskItem(entry.item); + newItem.SetMetadata("BatchId", negativeBatchId.ToString()); + negativeBatchId--; + result.Add(newItem); + } + else + { + smallItems.Add(entry); + } +} + +// Greedy bin-packing for small items +if (smallItems.Count > 0) +{ + int numBatches = Math.Min(BatchSize, smallItems.Count); + var batchSizes = new long[numBatches]; + var batchAssignments = new List[numBatches]; + for (int i = 0; i < numBatches; i++) + batchAssignments[i] = new List(); + + foreach (var entry in smallItems) + { + // Find batch with smallest total size + int minIdx = 0; + for (int i = 1; i < numBatches; i++) + { + if (batchSizes[i] < batchSizes[minIdx]) + minIdx = i; + } + batchSizes[minIdx] += entry.size; + var newItem = new TaskItem(entry.item); + newItem.SetMetadata("BatchId", minIdx.ToString()); + batchAssignments[minIdx].Add(newItem); + } + + for (int i = 0; i < numBatches; i++) + result.AddRange(batchAssignments[i]); +} + +GroupedItems = result.ToArray(); +]]> + + + + + + + + + + + + + + + + +(); +foreach (var item in GroupedItems) +{ + string bid = item.GetMetadata("BatchId"); + if (!counts.ContainsKey(bid)) counts[bid] = 0; + counts[bid]++; +} + +var result = new List(); +foreach (var batchId in BatchIds) +{ + string bid = batchId.ItemSpec; + int count = counts.ContainsKey(bid) ? counts[bid] : 1; + int totalMinutes = Math.Max(10, count * 2); + var ts = TimeSpan.FromMinutes(totalMinutes); + + var helixItem = new TaskItem(ItemPrefix + "Batch-" + bid); + helixItem.SetMetadata("PayloadDirectory", BatchOutputDir + "batch-" + bid + "/"); + helixItem.SetMetadata("Command", "chmod +x WasmBatchRunner.sh && ./WasmBatchRunner.sh"); + helixItem.SetMetadata("Timeout", ts.ToString(@"hh\:mm\:ss")); + result.Add(helixItem); +} + +TimedItems = result.ToArray(); +]]> + + + + - + <_WasmWorkItem Include="$(TestArchiveRoot)browseronly/**/*.zip" Condition="'$(Scenario)' == 'WasmTestOnChrome' or '$(Scenario)' == 'WasmTestOnFirefox'" /> <_WasmWorkItem Include="$(TestArchiveRoot)chromeonly/**/*.zip" Condition="'$(Scenario)' == 'WasmTestOnChrome'" /> @@ -273,4 +407,63 @@ + + + + + + <_WasmBatchWorkItem Include="$(TestArchiveRoot)browseronly/**/*.zip" Condition="'$(Scenario)' == 'WasmTestOnChrome' or '$(Scenario)' == 'WasmTestOnFirefox'" /> + <_WasmBatchWorkItem Include="$(TestArchiveRoot)chromeonly/**/*.zip" Condition="'$(Scenario)' == 'WasmTestOnChrome'" /> + + + + + <_WasmBatchSampleZip Condition="'$(Scenario)' == 'WasmTestOnV8'" Include="$(TestArchiveRoot)runonly/**/*.Console.V8.*.Sample.zip" /> + <_WasmBatchSampleZip Condition="'$(Scenario)' == 'WasmTestOnChrome'" Include="$(TestArchiveRoot)runonly/**/*.Browser.*.Sample.zip" /> + + + %(Identity) + $(HelixCommand) + $(_workItemTimeout) + + + + + <_WasmBatchDefaultItems Include="$(WorkItemArchiveWildCard)" Exclude="$(HelixCorrelationPayload)" /> + + + + + <_WasmBatchAllItems Include="@(_WasmBatchWorkItem)" /> + <_WasmBatchAllItems Include="@(_WasmBatchDefaultItems)" /> + + + + <_GroupWorkItems Items="@(_WasmBatchAllItems)" BatchSize="20" LargeThreshold="$(_WasmBatchLargeThreshold)"> + + + + + + <_WasmBatchId Include="@(_WasmGroupedItem -> '%(BatchId)')" /> + <_WasmUniqueBatchId Include="@(_WasmBatchId->Distinct())" /> + + + + + + + + + <_ComputeBatchTimeout GroupedItems="@(_WasmGroupedItem)" BatchIds="@(_WasmUniqueBatchId)" + ItemPrefix="$(WorkItemPrefix)" BatchOutputDir="$(IntermediateOutputPath)helix-batches/"> + + + + + + + From a751466b323341c22a994e07f003d711cfe8c7df Mon Sep 17 00:00:00 2001 From: Radek Doulik Date: Thu, 26 Mar 2026 18:09:35 +0100 Subject: [PATCH 2/5] Address PR review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove unused EXECUTION_DIR variable from WasmBatchRunner.sh - Use PayloadArchive (ZIP) instead of PayloadDirectory to pass sendtohelixhelp.proj validation - Use HelixCommand with RunTests.sh→WasmBatchRunner.sh substitution to preserve env var setup and pre-commands Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- eng/testing/WasmBatchRunner.sh | 2 -- src/libraries/sendtohelix-browser.targets | 18 +++++++++++++++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/eng/testing/WasmBatchRunner.sh b/eng/testing/WasmBatchRunner.sh index 038086502f3809..a6c4ca7b0064fd 100755 --- a/eng/testing/WasmBatchRunner.sh +++ b/eng/testing/WasmBatchRunner.sh @@ -1,7 +1,5 @@ #!/usr/bin/env bash -EXECUTION_DIR=$(dirname "$0") - if [[ -z "$HELIX_WORKITEM_UPLOAD_ROOT" ]]; then ORIGINAL_UPLOAD_ROOT="$PWD/test-results" else diff --git a/src/libraries/sendtohelix-browser.targets b/src/libraries/sendtohelix-browser.targets index 0684ff0499cc62..c14f3c559b22e7 100644 --- a/src/libraries/sendtohelix-browser.targets +++ b/src/libraries/sendtohelix-browser.targets @@ -294,8 +294,7 @@ foreach (var batchId in BatchIds) var ts = TimeSpan.FromMinutes(totalMinutes); var helixItem = new TaskItem(ItemPrefix + "Batch-" + bid); - helixItem.SetMetadata("PayloadDirectory", BatchOutputDir + "batch-" + bid + "/"); - helixItem.SetMetadata("Command", "chmod +x WasmBatchRunner.sh && ./WasmBatchRunner.sh"); + helixItem.SetMetadata("BatchDir", BatchOutputDir + "batch-" + bid + "/"); helixItem.SetMetadata("Timeout", ts.ToString(@"hh\:mm\:ss")); result.Add(helixItem); } @@ -462,8 +461,21 @@ TimedItems = result.ToArray(); + + + + + + <_WasmBatchHelixCommand>$(HelixCommand.Replace('./RunTests.sh', 'chmod +x WasmBatchRunner.sh && ./WasmBatchRunner.sh')) + + - + + $(IntermediateOutputPath)helix-batches/%(Identity).zip + $(_WasmBatchHelixCommand) + From face27d297b25e942a24aca77a2095a068329c3f Mon Sep 17 00:00:00 2001 From: Radek Doulik Date: Thu, 26 Mar 2026 22:38:50 +0100 Subject: [PATCH 3/5] Fix batch timeout: 30m min, 20m/suite for WASM overhead Batch--1 (1 item) and Batch-5 (8 items) timed out in CI because the 2min/suite formula was too aggressive. System.IO.Compression alone takes 11m, System.Security.Cryptography takes 17m, and Microsoft.Bcl.Memory takes 6m. With 19/21 batches passing and the longest at 17m24s, a 30m minimum provides adequate headroom. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/libraries/sendtohelix-browser.targets | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/libraries/sendtohelix-browser.targets b/src/libraries/sendtohelix-browser.targets index c14f3c559b22e7..75bd3b26b67499 100644 --- a/src/libraries/sendtohelix-browser.targets +++ b/src/libraries/sendtohelix-browser.targets @@ -290,7 +290,9 @@ foreach (var batchId in BatchIds) { string bid = batchId.ItemSpec; int count = counts.ContainsKey(bid) ? counts[bid] : 1; - int totalMinutes = Math.Max(10, count * 2); + // 20 minutes per suite to account for WASM startup overhead + test execution; + // minimum 30 minutes to handle the heaviest individual suites (e.g. Cryptography ~17m) + int totalMinutes = Math.Max(30, count * 20); var ts = TimeSpan.FromMinutes(totalMinutes); var helixItem = new TaskItem(ItemPrefix + "Batch-" + bid); From 54346c1dc1fed1157d5ab9d7c19c800201bf0808 Mon Sep 17 00:00:00 2001 From: Radek Doulik Date: Fri, 27 Mar 2026 08:41:38 +0100 Subject: [PATCH 4/5] Address code review feedback - Restore HELIX_WORKITEM_UPLOAD_ROOT after batch loop for post-commands - Clean up extracted suite directories to free disk between suites - Remove stale batch staging directory before creating new batches - Fix stale timeout comment to match actual values (20m/suite, 30m min) - Remove dead V8/Firefox conditions (batching only runs for Chrome) - Remove unused System.Linq import Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- eng/testing/WasmBatchRunner.sh | 16 +++++++++++++++- src/libraries/sendtohelix-browser.targets | 15 +++++++-------- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/eng/testing/WasmBatchRunner.sh b/eng/testing/WasmBatchRunner.sh index a6c4ca7b0064fd..98f6354a435fe7 100755 --- a/eng/testing/WasmBatchRunner.sh +++ b/eng/testing/WasmBatchRunner.sh @@ -30,7 +30,16 @@ for zipFile in "$BATCH_DIR"/*.zip; do echo "========================= BEGIN $suiteName =============================" mkdir -p "$suiteDir" - unzip -q -o "$zipFile" -d "$suiteDir" + if ! unzip -q -o "$zipFile" -d "$suiteDir"; then + echo "ERROR: Failed to extract $zipFile" + FAIL_COUNT=$((FAIL_COUNT + 1)) + SUITE_NAMES+=("$suiteName") + SUITE_EXIT_CODES+=("1") + SUITE_DURATIONS+=("0") + SUITE_COUNT=$((SUITE_COUNT + 1)) + rm -rf "$suiteDir" + continue + fi export HELIX_WORKITEM_UPLOAD_ROOT="$ORIGINAL_UPLOAD_ROOT/$suiteName" mkdir -p "$HELIX_WORKITEM_UPLOAD_ROOT" @@ -46,6 +55,8 @@ for zipFile in "$BATCH_DIR"/*.zip; do popd >/dev/null + rm -rf "$suiteDir" + duration=$((endTime - startTime)) SUITE_NAMES+=("$suiteName") @@ -63,6 +74,9 @@ for zipFile in "$BATCH_DIR"/*.zip; do echo "========================= END $suiteName ===============================" done +# Restore so Helix post-commands write artifacts to the expected root +export HELIX_WORKITEM_UPLOAD_ROOT="$ORIGINAL_UPLOAD_ROOT" + echo "" echo "=== Batch Summary ===" printf "%-60s %-6s %s\n" "Suite" "Status" "Duration" diff --git a/src/libraries/sendtohelix-browser.targets b/src/libraries/sendtohelix-browser.targets index 75bd3b26b67499..bcc08d152de99d 100644 --- a/src/libraries/sendtohelix-browser.targets +++ b/src/libraries/sendtohelix-browser.targets @@ -189,7 +189,6 @@ - - + - <_WasmBatchWorkItem Include="$(TestArchiveRoot)browseronly/**/*.zip" Condition="'$(Scenario)' == 'WasmTestOnChrome' or '$(Scenario)' == 'WasmTestOnFirefox'" /> - <_WasmBatchWorkItem Include="$(TestArchiveRoot)chromeonly/**/*.zip" Condition="'$(Scenario)' == 'WasmTestOnChrome'" /> + <_WasmBatchWorkItem Include="$(TestArchiveRoot)browseronly/**/*.zip" /> + <_WasmBatchWorkItem Include="$(TestArchiveRoot)chromeonly/**/*.zip" /> - <_WasmBatchSampleZip Condition="'$(Scenario)' == 'WasmTestOnV8'" Include="$(TestArchiveRoot)runonly/**/*.Console.V8.*.Sample.zip" /> - <_WasmBatchSampleZip Condition="'$(Scenario)' == 'WasmTestOnChrome'" Include="$(TestArchiveRoot)runonly/**/*.Browser.*.Sample.zip" /> + <_WasmBatchSampleZip Include="$(TestArchiveRoot)runonly/**/*.Browser.*.Sample.zip" /> %(Identity) @@ -451,13 +449,14 @@ TimedItems = result.ToArray(); <_WasmUniqueBatchId Include="@(_WasmBatchId->Distinct())" /> - + + - + <_ComputeBatchTimeout GroupedItems="@(_WasmGroupedItem)" BatchIds="@(_WasmUniqueBatchId)" ItemPrefix="$(WorkItemPrefix)" BatchOutputDir="$(IntermediateOutputPath)helix-batches/"> From 8922444e693dc35a93c688b32c2306dfd22826d7 Mon Sep 17 00:00:00 2001 From: Radek Doulik Date: Fri, 27 Mar 2026 13:56:17 +0100 Subject: [PATCH 5/5] Improve error handling: capture unzip exit code, validate HelixCommand Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- eng/testing/WasmBatchRunner.sh | 8 +++++--- src/libraries/sendtohelix-browser.targets | 2 ++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/eng/testing/WasmBatchRunner.sh b/eng/testing/WasmBatchRunner.sh index 98f6354a435fe7..22a5c20b689f99 100755 --- a/eng/testing/WasmBatchRunner.sh +++ b/eng/testing/WasmBatchRunner.sh @@ -30,11 +30,13 @@ for zipFile in "$BATCH_DIR"/*.zip; do echo "========================= BEGIN $suiteName =============================" mkdir -p "$suiteDir" - if ! unzip -q -o "$zipFile" -d "$suiteDir"; then - echo "ERROR: Failed to extract $zipFile" + unzip -q -o "$zipFile" -d "$suiteDir" + unzipExitCode=$? + if [[ $unzipExitCode -ne 0 ]]; then + echo "ERROR: Failed to extract $zipFile (exit code: $unzipExitCode)" FAIL_COUNT=$((FAIL_COUNT + 1)) SUITE_NAMES+=("$suiteName") - SUITE_EXIT_CODES+=("1") + SUITE_EXIT_CODES+=("$unzipExitCode") SUITE_DURATIONS+=("0") SUITE_COUNT=$((SUITE_COUNT + 1)) rm -rf "$suiteDir" diff --git a/src/libraries/sendtohelix-browser.targets b/src/libraries/sendtohelix-browser.targets index bcc08d152de99d..f93858ba92b47c 100644 --- a/src/libraries/sendtohelix-browser.targets +++ b/src/libraries/sendtohelix-browser.targets @@ -468,6 +468,8 @@ TimedItems = result.ToArray(); Overwrite="true" /> + <_WasmBatchHelixCommand>$(HelixCommand.Replace('./RunTests.sh', 'chmod +x WasmBatchRunner.sh && ./WasmBatchRunner.sh'))