Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions .github/workflows/ci-platform-siracusa-tiled.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,22 @@ jobs:
with:
docker_image_deeploy: ${{ inputs.docker_image_deeploy }}

# Training tests - L2 singlebuffer
siracusa-training-tiled-l2-singlebuffer:
# Training tests - L2 (SB + DB combined so the runner emits a single
# SB-vs-DB cycle comparison table to $GITHUB_STEP_SUMMARY).
siracusa-training-tiled-l2:
needs: select-env
uses: ./.github/workflows/_runner-siracusa-tiled.yml
with:
runner: ${{ needs.select-env.outputs.runner }}
docker-image: ${{ needs.select-env.outputs.image }}
pytest-marker: "training and l2 and singlebuffer"
pytest-marker: "training and l2"

# Training tests - L3 singlebuffer (models that spill weights/activations to L3)
siracusa-training-tiled-l3-singlebuffer:
# Training tests - L3 (SB + DB combined; DB uses TrainingDBOnlyL3Tiler so
# the L2 staging budget doesn't double).
siracusa-training-tiled-l3:
needs: select-env
uses: ./.github/workflows/_runner-siracusa-tiled.yml
with:
runner: ${{ needs.select-env.outputs.runner }}
docker-image: ${{ needs.select-env.outputs.image }}
pytest-marker: "training and l3 and singlebuffer"
pytest-marker: "training and l3"
9 changes: 3 additions & 6 deletions .github/workflows/ci-platform-siracusa.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,9 @@
name: CI • Siracusa

"on":
push:
branches:
- "**"
tags:
- "v*.*.*"
pull_request:
# Auto-trigger disabled in TrainDeeploy fork: untiled Siracusa is not
# exercised by training/DB work. Re-enable by restoring the push: /
# pull_request: blocks.
workflow_dispatch:
inputs:
docker_image_deeploy:
Expand Down
78 changes: 78 additions & 0 deletions DeeployTest/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,81 @@ def toolchain(request):
def cmake_args(request):
"""Return additional CMake arguments."""
return request.config.getoption("--cmake-args")


# ---------------------------------------------------------------------------
# Training cycle summary: at session end, scan $GITHUB_STEP_SUMMARY for any
# training cycle section emitted by run_and_assert_test, join SB and DB rows
# by (test, l1), and append a comparison table with speedup.
# ---------------------------------------------------------------------------
def _parse_training_section(section_body: str):
"""Parse rows of `| test | l1 | mode | train_cycles | opt_cycles | weight_sram |`.

Returns list of dicts with keys: test, l1, mode, train, opt, sram.
"""
rows = []
for line in section_body.splitlines():
if not line.startswith("| "):
continue
if "train_cycles" in line or "------" in line:
continue
cells = [c.strip() for c in line.strip("|").split("|")]
if len(cells) < 6:
continue
try:
train = int(cells[3].replace(",", ""))
opt = int(cells[4].replace(",", ""))
sram = int(cells[5].replace(",", ""))
except ValueError:
continue
rows.append({"test": cells[0], "l1": cells[1], "mode": cells[2], "train": train, "opt": opt, "sram": sram})
return rows


def pytest_terminal_summary(terminalreporter, exitstatus, config):
summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
if not summary_path or not os.path.exists(summary_path):
return
try:
with open(summary_path, "r") as f:
existing = f.read()
except Exception:
return

# Find every "## Siracusa L? training cycles" section and append a join.
import re as _re
for heading in _re.findall(r"^## (Siracusa L[23] training cycles)$", existing, flags = _re.MULTILINE):
start = existing.find(f"## {heading}")
rest = existing[start + len(f"## {heading}"):]
next_section = rest.find("\n## ")
body = rest if next_section == -1 else rest[:next_section]
rows = _parse_training_section(body)
if not rows:
continue
# Join SB and DB rows by (test, l1).
by_key: dict = {}
for r in rows:
by_key.setdefault((r["test"], r["l1"]), {})[r["mode"]] = r
try:
with open(summary_path, "a") as f:
f.write(f"\n### {heading} — SB vs DB speedup\n\n")
f.write("| Test | L1 (B) | SB train | DB train | train Δ | SB opt | DB opt | opt Δ |\n")
f.write("|------|--------|----------|----------|---------|--------|--------|-------|\n")
for (test, l1), modes in sorted(by_key.items()):
sb = modes.get("SB")
db = modes.get("DB")
sb_t = f"{sb['train']:,}" if sb else "—"
db_t = f"{db['train']:,}" if db else "—"
sb_o = f"{sb['opt']:,}" if sb else "—"
db_o = f"{db['opt']:,}" if db else "—"
if sb and db and sb['train'] > 0:
delta_t = f"{(sb['train'] - db['train']) / sb['train'] * 100:+.1f}%"
else:
delta_t = "—"
if sb and db and sb['opt'] > 0:
delta_o = f"{(sb['opt'] - db['opt']) / sb['opt'] * 100:+.1f}%"
else:
delta_o = "—"
f.write(f"| {test} | {l1} | {sb_t} | {db_t} | {delta_t} | {sb_o} | {db_o} | {delta_o} |\n")
except Exception:
pass
21 changes: 13 additions & 8 deletions DeeployTest/testMVPOptimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from testUtils.codeGenerateTraining import build_shared_buffer_maps, generateOptimizerTestNetwork
from testUtils.platformMapping import mapDeployer, mapPlatform, setupMemoryPlatform
from testUtils.testRunner import TestGeneratorArgumentParser
from testUtils.tilingUtils import TrainingSBTiler
from testUtils.tilingUtils import TrainingDBTiler, TrainingSBTiler
from testUtils.trainingUtils import _mockScheduler, add_optimizer_training_dir_arg

from Deeploy.AbstractDataTypes import PointerClass
Expand Down Expand Up @@ -102,15 +102,17 @@ def generateTiledOptimizerNetwork(args) -> None:
AnnotateDefaultMemoryLevel(memoryHierarchy),
])

# 7. Wrap with SBTiler (single-buffering; optimizer is forward-only, no lifetime extension needed).
unique_params = f"{args.dumpdir}_L1{args.l1}_L2{args.l2}_{args.defaultMemLevel}_optimizer"
# 7. Wrap with tiler. SB by default; --doublebuffer switches to TrainingDBTiler.
unique_params = f"{args.dumpdir}_L1{args.l1}_L2{args.l2}_{args.defaultMemLevel}_optimizer_DB{args.doublebuffer}"
testIdentifier = hashlib.md5(unique_params.encode()).hexdigest()[:16]

# TrainingSBTiler extends all input buffer lifetimes to the end of the
# schedule (via TrainingMemoryScheduler). This prevents the allocator from
# reusing the space of a consumed input (e.g. fc1 weight) for a later
# output (e.g. fc2 updated weight), which would corrupt the weight buffer.
deployer = TilerDeployerWrapper(deployer, TrainingSBTiler, testName = testIdentifier, workDir = args.dumpdir)
# TrainingSBTiler/TrainingDBTiler extend all input buffer lifetimes to the
# end of the schedule (via TrainingMemoryScheduler). This prevents the
# allocator from reusing the space of a consumed input (e.g. fc1 weight)
# for a later output (e.g. fc2 updated weight), which would corrupt the
# weight buffer.
tilerCls = TrainingDBTiler if args.doublebuffer else TrainingSBTiler
deployer = TilerDeployerWrapper(deployer, tilerCls, testName = testIdentifier, workDir = args.dumpdir)
deployer.tiler.visualizeMemoryAlloc = args.plotMemAlloc
deployer.tiler.memoryAllocStrategy = args.memAllocStrategy
deployer.tiler.searchStrategy = args.searchStrategy
Expand Down Expand Up @@ -159,6 +161,9 @@ def generateTiledOptimizerNetwork(args) -> None:
type = str,
default = "L2",
help = "Default memory level for IO buffers. Default: L2.")
parser.add_argument("--doublebuffer",
action = "store_true",
help = "Enable double buffering for tile DMA transfers (TrainingDBTiler).")
parser.add_argument("--memAllocStrategy",
type = str,
default = "MiniMalloc",
Expand Down
16 changes: 12 additions & 4 deletions DeeployTest/testMVPTraining.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from testUtils.codeGenerateTraining import generateTrainingTestNetwork
from testUtils.platformMapping import mapDeployer, mapPlatform, setupMemoryPlatform
from testUtils.testRunner import TestGeneratorArgumentParser
from testUtils.tilingUtils import TrainingSBTiler
from testUtils.tilingUtils import TrainingDBOnlyL3Tiler, TrainingDBTiler, TrainingSBTiler
from testUtils.trainingUtils import _GRAD_ACC, _infer_data_size, _infer_n_accum, _infer_num_data_inputs, \
_infer_total_mb, _load_reference_losses, _mockScheduler, add_training_inference_args
from testUtils.typeMapping import inferTypeAndOffset
Expand Down Expand Up @@ -132,11 +132,16 @@ def generateTiledTrainingNetwork(args) -> None:
AnnotateDefaultMemoryLevel(memoryHierarchy),
])

# 9. Wrap with tiler (TrainingSBTiler: SB strategy + extended input lifetimes for backward pass).
unique_params = f"{args.dumpdir}_L1{args.l1}_L2{args.l2}_{args.defaultMemLevel}"
# 9. Wrap with tiler. SB by default; --doublebuffer switches to TrainingDBTiler
# (DB strategy + same TrainingMemoryScheduler input-lifetime extension).
unique_params = f"{args.dumpdir}_L1{args.l1}_L2{args.l2}_{args.defaultMemLevel}_DB{args.doublebuffer}"
testIdentifier = hashlib.md5(unique_params.encode()).hexdigest()[:16]

deployer = TilerDeployerWrapper(deployer, TrainingSBTiler, testName = testIdentifier, workDir = args.dumpdir)
if args.doublebuffer:
tilerCls = TrainingDBOnlyL3Tiler if args.defaultMemLevel == "L3" else TrainingDBTiler
else:
tilerCls = TrainingSBTiler
deployer = TilerDeployerWrapper(deployer, tilerCls, testName = testIdentifier, workDir = args.dumpdir)
deployer.tiler.visualizeMemoryAlloc = args.plotMemAlloc
deployer.tiler.memoryAllocStrategy = args.memAllocStrategy
deployer.tiler.searchStrategy = args.searchStrategy
Expand Down Expand Up @@ -245,6 +250,9 @@ def generateTiledTrainingNetwork(args) -> None:
type = str,
default = "L2",
help = "Default memory level for IO buffers. Default: L2.")
parser.add_argument("--doublebuffer",
action = "store_true",
help = "Enable double buffering for tile DMA transfers (TrainingDBTiler).")
parser.add_argument("--memAllocStrategy",
type = str,
default = "MiniMalloc",
Expand Down
59 changes: 58 additions & 1 deletion DeeployTest/testUtils/pytestRunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# SPDX-License-Identifier: Apache-2.0

import os
import re
from pathlib import Path
from typing import List, Literal, Optional

Expand All @@ -17,6 +18,16 @@
'run_simulation',
]

# Tracks which Markdown sections we've already emitted a header for inside the
# current pytest session. Keeps run_and_assert_test idempotent across
# parametrised cases that share a section.
_METRIC_SECTIONS_WRITTEN: set = set()

# `BENCH train_cycles=<N> opt_cycles=<M> weight_sram=<K>` — printed once per
# training run by the test harness; captured here so we can append a cycles
# row to $GITHUB_STEP_SUMMARY for SB-vs-DB comparison.
_TRAIN_BENCH_RE = re.compile(r"BENCH train_cycles=(\d+) opt_cycles=(\d+) weight_sram=(\d+)")


def get_worker_id() -> str:
"""
Expand Down Expand Up @@ -122,10 +133,53 @@ def create_test_config(
return config


def run_and_assert_test(test_name: str, config: DeeployTestConfig, skipgen: bool, skipsim: bool) -> None:
def _emit_training_cycle_row(test_name: str, config: DeeployTestConfig, stdout: str, metric_section: str) -> None:
"""Parse `BENCH train_cycles=...` from the test's stdout and append a row
to $GITHUB_STEP_SUMMARY under `## {metric_section}`. The header is emitted
once per (section, session) pair via _METRIC_SECTIONS_WRITTEN.

No-op when not running under GitHub Actions or when no BENCH line was
captured (e.g. inference tests, --skipsim runs).
"""
summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
if not summary_path:
return
m = _TRAIN_BENCH_RE.search(stdout or "")
if not m:
return
train_cycles, opt_cycles, weight_sram = m.group(1), m.group(2), m.group(3)
db_flag = "DB" if "--doublebuffer" in (config.gen_args or []) else "SB"
l1 = "—"
for arg in config.gen_args or []:
if arg.startswith("--l1="):
l1 = arg.split("=", 1)[1]
break
try:
with open(summary_path, "a") as f:
if metric_section not in _METRIC_SECTIONS_WRITTEN:
f.write(f"\n## {metric_section}\n\n")
f.write("| Test | L1 (B) | Mode | train_cycles | opt_cycles | weight_sram |\n")
f.write("|------|--------|------|--------------|------------|-------------|\n")
_METRIC_SECTIONS_WRITTEN.add(metric_section)
f.write(f"| {test_name} | {l1} | {db_flag} | {int(train_cycles):,} | {int(opt_cycles):,} | "
f"{int(weight_sram):,} |\n")
except Exception:
# Best-effort: never let summary IO failure mask a real test result.
pass


def run_and_assert_test(test_name: str,
config: DeeployTestConfig,
skipgen: bool,
skipsim: bool,
metric_section: Optional[str] = None) -> None:
"""
Shared helper function to run a test and assert its results.

When `metric_section` is non-None and $GITHUB_STEP_SUMMARY is set, append
a cycle-count row to that Markdown section so reviewers can see SB-vs-DB
deltas directly in the workflow summary panel.

Raises:
AssertionError: If test fails or has errors
"""
Expand All @@ -136,3 +190,6 @@ def run_and_assert_test(test_name: str, config: DeeployTestConfig, skipgen: bool

if result.error_count >= 0:
assert result.error_count == 0, (f"Found {result.error_count} errors out of {result.total_count} tests")

if metric_section:
_emit_training_cycle_row(test_name, config, result.stdout, metric_section)
Loading
Loading