Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
ff30086
Add Siracusa_w_redmule platform (RedMulE accelerator integration)
runwangdl Apr 13, 2026
a5c8c5f
drop Float test fixtures
runwangdl Apr 13, 2026
0f4401f
Add CI skeleton for Siracusa_w_redmule (tiled)
runwangdl Apr 14, 2026
7105c15
Run pre-commit autoformat over RedMule files
runwangdl Apr 14, 2026
2dbb9ac
ci: gate GAP9 workflows on upstream org to skip on forks
runwangdl Apr 14, 2026
ef1e5dc
ci: skip gh-pages publish on forks
runwangdl Apr 14, 2026
4cc596f
Populate Siracusa_w_redmule tiled kernel test matrix
runwangdl Apr 14, 2026
f98f1f4
ci(redmule): force runwangdl/deeploy:redmule image on push/PR
runwangdl Apr 14, 2026
4bb02d0
ci(redmule): authenticate container pulls for private ghcr.io image
runwangdl Apr 14, 2026
b682739
ci(redmule): add CCT_train tiled-L3 job to Siracusa+RedMulE CI
runwangdl May 10, 2026
39bb8f1
fix(redmule): lower bias-less Gemm to MatMul so CCT_train tiling stop…
runwangdl May 10, 2026
1782a88
fix(redmule+upstream-transpose): unblock CCT_train codegen end-to-end
runwangdl May 10, 2026
78a05d4
fix(redmule): unmap Conv from RedMulE engine; drop weight-layout pass
runwangdl May 10, 2026
9d85037
style: pre-commit autoformat (yapf / isort / autoflake) + drop EOF bl…
runwangdl May 10, 2026
61bdb78
ci(perf): expose GVSoC cycle counts for Siracusa(+RedMulE) training jobs
runwangdl May 10, 2026
4517cc9
feat(redmule): RedMulE-accelerated FP32 Conv2d via im2col + matmul
runwangdl May 10, 2026
6de609a
style+ci: fix lint on 4517cc9; emit RedMulE cycle/speedup summary in …
runwangdl May 10, 2026
46e4f3c
feat(redmule): add ResNet8 + MobileNetV1 to training matrix; multi-mo…
runwangdl May 10, 2026
28b18a8
feat(redmule): RedMulE-accelerated PWConvGradW / PWConvGradX kernels
runwangdl May 10, 2026
68d1639
fix(redmule): ship PWConvGradX-only path; defer PWConvGradW routing
runwangdl May 10, 2026
5b59d3a
feat(redmule): chunked PWConvGradW + scattered PWConvGradX; isolate k…
runwangdl May 10, 2026
6711cc4
fix(ci): empty L3_SINGLEBUFFER_TRAINING_MODELS broke pytest collection
runwangdl May 10, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 161 additions & 0 deletions .github/workflows/_runner-siracusa-redmule-tiled.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
# SPDX-FileCopyrightText: 2026 ETH Zurich and University of Bologna
#
# SPDX-License-Identifier: Apache-2.0

---
name: _runner-siracusa-redmule-tiled-sequential

"on":
workflow_call:
inputs:
runner:
required: true
type: string
docker-image:
required: true
type: string
pytest-marker:
required: true
type: string
# Extra flags injected into the pytest command, between -v and the -m
# marker filter. Default keeps the original 4-worker xdist behavior;
# callers that want simulator stdout (e.g. GVSoC cycle counts) in the
# CI log can override with "-s -p no:xdist" to disable capture and
# the parallel worker plugin (xdist eats per-test stdout).
pytest-flags:
required: false
type: string
default: "-n 4"

jobs:
test-runner-siracusa-redmule-tiled:
runs-on: ${{ inputs.runner }}
container:
image: ${{ inputs.docker-image }}
credentials:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
steps:
- name: Mark workspace as safe
run: git config --global --add safe.directory '*'
- name: Checkout Repo
uses: actions/checkout@v4
with:
submodules: recursive
- name: Build Deeploy
shell: bash
run: pip install -e .
- name: Run Test
run: |
cd DeeployTest
mkdir -p /app/.ccache
export CCACHE_DIR=/app/.ccache
set -o pipefail
pytest test_platforms.py -v ${{ inputs.pytest-flags }} -m "siracusa_redmule_tiled and ${{ inputs.pytest-marker }}" 2>&1 | tee /tmp/pytest_out.log
shell: bash
- name: Report cycle counts (RedMulE side, with speedup vs Siracusa)
if: always()
shell: bash
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
REPO: ${{ github.repository }}
MARKER: ${{ inputs.pytest-marker }}
run: |
python3 - <<'PY'
import json, os, re, sys, urllib.request, pathlib
LOG_PATH = "/tmp/pytest_out.log"
PAT = re.compile(r'^BENCH train_cycles=(\d+) opt_cycles=(\d+) weight_sram=(\d+)')

if not pathlib.Path(LOG_PATH).exists():
print("no pytest log found; skipping")
sys.exit(0)

# 1. parse RedMulE side's BENCH lines (one per training model)
rmu = []
with open(LOG_PATH) as fh:
for line in fh:
m = PAT.search(line)
if m:
rmu.append({
'train': int(m.group(1)), 'opt': int(m.group(2)),
'sram': int(m.group(3))})
if not rmu:
print("No BENCH line in pytest output (kernel-only job?). Skipping summary.")
sys.exit(0)

out = []
marker = os.environ.get('MARKER', '?')
sha = os.environ.get('HEAD_SHA', '')[:7]
out.append(f"## Siracusa + RedMulE cycles ({marker})")
out.append("")
out.append("| weight_sram | train_cycles | opt_cycles |")
out.append("|---:|---:|---:|")
for r in rmu:
out.append(f"| {r['sram']:,} | {r['train']:,} | {r['opt']:,} |")
out.append("")
out.append(f"_Counted on commit `{sha}` via GVSoC._")

# 2. best-effort: find Siracusa baseline on same SHA, build speedup table
repo = os.environ.get('REPO', '')
head_sha = os.environ.get('HEAD_SHA', '')
tok = os.environ.get('GH_TOKEN', '')

def gh(url):
req = urllib.request.Request(url, headers={'Authorization': f'bearer {tok}'})
with urllib.request.urlopen(req, timeout=20) as r:
return r.read()

try:
runs = json.loads(gh(
f"https://api.github.com/repos/{repo}/actions/runs"
f"?head_sha={head_sha}&per_page=30"))
base_run_id = next(
(r['id'] for r in runs.get('workflow_runs', [])
if r['name'] == 'CI • Siracusa (Tiled)' and r['event'] == 'push'),
None)
if base_run_id is None:
out += ["", "_No matching `Siracusa (Tiled)` push run on this SHA — speedup diff skipped._"]
else:
jobs = json.loads(gh(
f"https://api.github.com/repos/{repo}/actions/runs/{base_run_id}/jobs"))
base_job_id = next(
(j['id'] for j in jobs.get('jobs', [])
if 'training' in j['name'].lower()
and 'l3' in j['name'].lower()
and j.get('conclusion') == 'success'),
None)
if base_job_id is None:
out += ["", "_Siracusa training-L3 baseline job not finished/green yet — speedup diff skipped._"]
else:
txt = gh(f"https://api.github.com/repos/{repo}/actions/jobs/{base_job_id}/logs").decode('utf-8','replace')
base = {}
for line in txt.splitlines():
m = PAT.search(line)
if m:
base[int(m.group(3))] = {
'train': int(m.group(1)),
'opt': int(m.group(2))}
out += ["", "## Speedup vs Siracusa baseline (matched by weight_sram)", ""]
out += ["| weight_sram | Siracusa train | + RedMulE train | sp<sub>train</sub> | Siracusa opt | + RedMulE opt | sp<sub>opt</sub> |"]
out += ["|---:|---:|---:|:---:|---:|---:|:---:|"]
for r in rmu:
b = base.get(r['sram'])
if b is None:
out.append(f"| {r['sram']:,} | — | {r['train']:,} | _no match_ | — | {r['opt']:,} | — |")
else:
st = b['train'] / r['train']
so = b['opt'] / r['opt']
out.append(
f"| {r['sram']:,} | {b['train']:,} | {r['train']:,} | **{st:.3f}×** "
f"| {b['opt']:,} | {r['opt']:,} | **{so:.3f}×** |")
except Exception as e:
out += ["", f"_Baseline lookup failed: `{type(e).__name__}: {e}` — RedMulE numbers above are still valid._"]

text = "\n".join(out) + "\n"
print(text)
sp = os.environ.get('GITHUB_STEP_SUMMARY')
if sp:
with open(sp, 'a') as f:
f.write(text)
PY
33 changes: 32 additions & 1 deletion .github/workflows/_runner-siracusa-tiled.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@ name: _runner-siracusa-tiled
pytest-marker:
required: true
type: string
# Extra flags injected into the pytest command (between -v and the -m
# marker filter). Default empty preserves the existing sequential
# invocation; callers that want simulator stdout (e.g. GVSoC cycle
# counts) in the CI log can override with "-s" to disable capture.
pytest-flags:
required: false
type: string
default: ""

jobs:
test-runner-siracusa-tiled:
Expand All @@ -36,5 +44,28 @@ jobs:
- name: Run Test
run: |
cd DeeployTest
pytest test_platforms.py -v -m "siracusa_tiled and ${{ inputs.pytest-marker }}"
set -o pipefail
pytest test_platforms.py -v ${{ inputs.pytest-flags }} -m "siracusa_tiled and ${{ inputs.pytest-marker }}" 2>&1 | tee /tmp/pytest_out.log
shell: bash
- name: Report cycle counts (Siracusa baseline)
if: always()
shell: bash
run: |
# Emit every BENCH line from the test stdout into the run summary so
# the RedMulE-side workflow can diff against these numbers for the
# same SHA. Non-training jobs (kernel-only matrices) produce no
# BENCH lines and the step is a quiet no-op.
if ! grep -q '^BENCH train_cycles=' /tmp/pytest_out.log 2>/dev/null; then
echo "No BENCH line found (probably a kernel-only job); skipping summary."
exit 0
fi
echo "## Siracusa baseline training cycles" >> "$GITHUB_STEP_SUMMARY"
echo "" >> "$GITHUB_STEP_SUMMARY"
echo "| model (weight_sram) | train_cycles | opt_cycles |" >> "$GITHUB_STEP_SUMMARY"
echo "|---|---:|---:|" >> "$GITHUB_STEP_SUMMARY"
grep '^BENCH train_cycles=' /tmp/pytest_out.log | while read -r line; do
tc=$(echo "$line" | sed -nE 's/.*train_cycles=([0-9]+).*/\1/p')
oc=$(echo "$line" | sed -nE 's/.*opt_cycles=([0-9]+).*/\1/p')
ws=$(echo "$line" | sed -nE 's/.*weight_sram=([0-9]+).*/\1/p')
echo "| weight_sram=${ws} | ${tc} | ${oc} |" >> "$GITHUB_STEP_SUMMARY"
done
7 changes: 7 additions & 0 deletions .github/workflows/ci-platform-gap9-tiled.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,16 @@ concurrency:
cancel-in-progress: true

jobs:
# GAP9 CI requires access to the private ghcr.io/pulp-platform/deeploy-gap9
# image; gate on upstream org so forks skip cleanly.
select-env:
if: github.repository_owner == 'pulp-platform'
uses: ./.github/workflows/_select-env.yml
with:
docker_image_deeploy: ${{ github.event.inputs.docker_image_deeploy || 'ghcr.io/runwangdl/deeploy:gap9' }}

gap9-kernels-tiled-singlebuffer-L2:
if: github.repository_owner == 'pulp-platform'
needs: select-env
uses: ./.github/workflows/_runner-gap9-tiled.yml
with:
Expand All @@ -35,6 +39,7 @@ jobs:
pytest-markers: "gap9_tiled and kernels and singlebuffer and l2"

gap9-kernels-tiled-doublebuffer-L2:
if: github.repository_owner == 'pulp-platform'
needs: select-env
uses: ./.github/workflows/_runner-gap9-tiled.yml
with:
Expand All @@ -43,6 +48,7 @@ jobs:
pytest-markers: "gap9_tiled and kernels and doublebuffer and l2"

gap9-models-tiled-singlebuffer-L2:
if: github.repository_owner == 'pulp-platform'
needs: select-env
uses: ./.github/workflows/_runner-gap9-tiled.yml
with:
Expand All @@ -51,6 +57,7 @@ jobs:
pytest-markers: "gap9_tiled and models and singlebuffer and l2"

gap9-models-tiled-doublebuffer-L2:
if: github.repository_owner == 'pulp-platform'
needs: select-env
uses: ./.github/workflows/_runner-gap9-tiled.yml
with:
Expand Down
5 changes: 5 additions & 0 deletions .github/workflows/ci-platform-gap9.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,16 @@ concurrency:
cancel-in-progress: true

jobs:
# GAP9 CI requires access to the private ghcr.io/pulp-platform/deeploy-gap9
# image; gate on upstream org so forks skip cleanly.
select-env:
if: github.repository_owner == 'pulp-platform'
uses: ./.github/workflows/_select-env.yml
with:
docker_image_deeploy: ${{ github.event.inputs.docker_image_deeploy || 'ghcr.io/runwangdl/deeploy:gap9' }}

gap9-kernels:
if: github.repository_owner == 'pulp-platform'
needs: select-env
uses: ./.github/workflows/_runner-gap9.yml
with:
Expand All @@ -36,6 +40,7 @@ jobs:
pytest-marker: "kernels"

gap9-models:
if: github.repository_owner == 'pulp-platform'
needs: select-env
uses: ./.github/workflows/_runner-gap9.yml
with:
Expand Down
63 changes: 63 additions & 0 deletions .github/workflows/ci-platform-siracusa-redmule-tiled.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# SPDX-FileCopyrightText: 2026 ETH Zurich and University of Bologna
#
# SPDX-License-Identifier: Apache-2.0

---
name: CI • Siracusa + RedMulE (Tiled)

"on":
push:
branches:
- "**"
tags:
- "v*.*.*"
pull_request:
workflow_dispatch:
inputs:
docker_image_deeploy:
description: "Deeploy Image to use"
required: false
default: "ghcr.io/runwangdl/deeploy:redmule"

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
select-env:
uses: ./.github/workflows/_select-env.yml
with:
# RedMulE CI needs the fork's custom Docker image that bundles a
# GVSoC build with the light_redmule model. Fall back to
# runwangdl/deeploy:redmule on push/PR events (when no input is
# provided) rather than the upstream devel image.
docker_image_deeploy: ${{ inputs.docker_image_deeploy || 'ghcr.io/runwangdl/deeploy:redmule' }}

siracusa-redmule-kernels-tiled-singlebuffer-L2:
needs: select-env
uses: ./.github/workflows/_runner-siracusa-redmule-tiled.yml
with:
runner: ${{ needs.select-env.outputs.runner }}
docker-image: ${{ needs.select-env.outputs.image }}
pytest-marker: "kernels and singlebuffer and l2"

siracusa-redmule-kernels-tiled-doublebuffer-L2:
needs: select-env
uses: ./.github/workflows/_runner-siracusa-redmule-tiled.yml
with:
runner: ${{ needs.select-env.outputs.runner }}
docker-image: ${{ needs.select-env.outputs.image }}
pytest-marker: "kernels and doublebuffer and l2"

siracusa-redmule-training-tiled-singlebuffer-L3:
needs: select-env
uses: ./.github/workflows/_runner-siracusa-redmule-tiled.yml
with:
runner: ${{ needs.select-env.outputs.runner }}
docker-image: ${{ needs.select-env.outputs.image }}
pytest-marker: "training and singlebuffer and l3"
# Disable pytest's stdout capture so GVSoC's "Cycles" report from the
# cct_train simulation lands in the CI log; needs -p no:xdist because
# the parallel worker plugin would otherwise re-buffer stdout. Only
# one test case in this matrix anyway, so dropping -n 4 is harmless.
pytest-flags: "-s -p no:xdist"
4 changes: 4 additions & 0 deletions .github/workflows/ci-platform-siracusa-tiled.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,7 @@ jobs:
runner: ${{ needs.select-env.outputs.runner }}
docker-image: ${{ needs.select-env.outputs.image }}
pytest-marker: "training and l3 and singlebuffer"
# -s makes GVSoC's per-test "Cycles" report visible in the CI log,
# so cct_train cycle counts on plain Siracusa can be diffed against
# the Siracusa+RedMulE run for an apples-to-apples speedup number.
pytest-flags: "-s"
4 changes: 2 additions & 2 deletions .github/workflows/infra-generate-documentation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ jobs:
sphinx-build docs _build
- name: Prepare Multipages
uses: xeratec/gh-pages-multibranch@pr/support_tags
if: ${{ github.event_name == 'push' || github.event_name == 'workflow_dispatch' }}
if: ${{ (github.event_name == 'push' || github.event_name == 'workflow_dispatch') && github.repository_owner == 'pulp-platform' }}
with:
directory: _build
- name: Deploy to GitHub Pages
uses: peaceiris/actions-gh-pages@v3
if: ${{ github.event_name == 'push' || github.event_name == 'workflow_dispatch'}}
if: ${{ (github.event_name == 'push' || github.event_name == 'workflow_dispatch') && github.repository_owner == 'pulp-platform' }}
with:
publish_branch: gh-pages
github_token: ${{ secrets.GITHUB_TOKEN }}
Expand Down
10 changes: 6 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ if(TOOLCHAIN STREQUAL GCC)
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
endif()

set(platform MemPool CACHE STRING "Platform (MemPool, SoftHier, QEMU, Siracusa, Siracusa_w_neureka, PULP-Open, GAP9, Generic, Snitch)")
set_property(CACHE platform PROPERTY STRINGS MemPool SoftHier QEMU Siracusa Siracusa_w_neureka PULP-Open GAP9 Generic Snitch)
set(platform MemPool CACHE STRING "Platform (MemPool, SoftHier, QEMU, Siracusa, Siracusa_w_neureka, Siracusa_w_redmule, PULP-Open, GAP9, Generic, Snitch)")
set_property(CACHE platform PROPERTY STRINGS MemPool SoftHier QEMU Siracusa Siracusa_w_neureka Siracusa_w_redmule PULP-Open GAP9 Generic Snitch)

if(platform STREQUAL MemPool)
message(STATUS "Building for platform 'MemPool'")
Expand All @@ -31,6 +31,8 @@ elseif(platform STREQUAL Siracusa)
message(STATUS "Building for platform 'Siracusa'")
elseif(platform STREQUAL Siracusa_w_neureka)
message(STATUS "Building for platform 'Siracusa_w_neureka'")
elseif(platform STREQUAL Siracusa_w_redmule)
message(STATUS "Building for platform 'Siracusa_w_redmule'")
elseif(platform STREQUAL PULPOpen)
message(STATUS "Building for platform 'PULP-Open'")
elseif(platform STREQUAL GAP9)
Expand Down Expand Up @@ -196,7 +198,7 @@ if(platform STREQUAL QEMU-ARM)

endif()

if(platform STREQUAL Siracusa OR platform STREQUAL Siracusa_w_neureka OR platform STREQUAL PULPOpen)
if(platform STREQUAL Siracusa OR platform STREQUAL Siracusa_w_neureka OR platform STREQUAL Siracusa_w_redmule OR platform STREQUAL PULPOpen)

if(TOOLCHAIN STREQUAL LLVM)
set(CMAKE_TOOLCHAIN_FILE ${CMAKE_CURRENT_LIST_DIR}/cmake/pulp/toolchain_llvm.cmake)
Expand All @@ -206,7 +208,7 @@ if(platform STREQUAL Siracusa OR platform STREQUAL Siracusa_w_neureka OR platfor

include(${CMAKE_CURRENT_LIST_DIR}/cmake/pulp/pulp.cmake)

if(platform STREQUAL Siracusa OR platform STREQUAL Siracusa_w_neureka)
if(platform STREQUAL Siracusa OR platform STREQUAL Siracusa_w_neureka OR platform STREQUAL Siracusa_w_redmule)
include(${CMAKE_CURRENT_LIST_DIR}/cmake/pulp/siracusa/siracusa.cmake)
elseif(platform STREQUAL PULPOpen)
include(${CMAKE_CURRENT_LIST_DIR}/cmake/pulp/pulp-open/pulp-open.cmake)
Expand Down
Empty file.
Loading
Loading