diff --git a/.github/workflows/engine-bench-renode-synth.yml b/.github/workflows/engine-bench-renode-synth.yml index 09ddd27..f6078e8 100644 --- a/.github/workflows/engine-bench-renode-synth.yml +++ b/.github/workflows/engine-bench-renode-synth.yml @@ -306,7 +306,7 @@ jobs: if line.startswith('E,'): print(f"R1,{variant},{line}") elif line.startswith(('drops,', 'samples,', 'build,', - 'cycles_per_sec,', 'target_samples,')): + 'cycles_per_sec,', 'target_samples,', 'overhead_cycles,')): print(f"M,R1,{variant},{line}") elif line == '=== END ===': print(f"M,R1,{variant},END") @@ -322,7 +322,7 @@ jobs: if line.startswith('E,'): print(f"R1,{variant},{line}") elif line.startswith(('drops,', 'samples,', 'build,', - 'cycles_per_sec,', 'target_samples,')): + 'cycles_per_sec,', 'target_samples,', 'overhead_cycles,')): print(f"M,R1,{variant},{line}") elif line == '=== END ===': print(f"M,R1,{variant},END") @@ -341,7 +341,7 @@ jobs: if line.startswith('E,'): print(f"R1,{variant},{line}") elif line.startswith(('drops,', 'samples,', 'build,', - 'cycles_per_sec,', 'target_samples,')): + 'cycles_per_sec,', 'target_samples,', 'overhead_cycles,')): print(f"M,R1,{variant},{line}") elif line == '=== END ===': print(f"M,R1,{variant},END") @@ -357,7 +357,7 @@ jobs: if line.startswith('E,'): print(f"R1,{variant},{line}") elif line.startswith(('drops,', 'samples,', 'build,', - 'cycles_per_sec,', 'target_samples,')): + 'cycles_per_sec,', 'target_samples,', 'overhead_cycles,')): print(f"M,R1,{variant},{line}") elif line == '=== END ===': print(f"M,R1,{variant},END") @@ -377,7 +377,7 @@ jobs: if line.startswith('E,'): print(f"R1,{variant},{line}") elif line.startswith(('drops,', 'samples,', 'build,', - 'cycles_per_sec,', 'target_samples,')): + 'cycles_per_sec,', 'target_samples,', 'overhead_cycles,')): print(f"M,R1,{variant},{line}") elif line == '=== END ===': print(f"M,R1,{variant},END") diff --git a/benches/engine_control/README.md b/benches/engine_control/README.md index 1a3db31..2049879 100644 --- a/benches/engine_control/README.md +++ b/benches/engine_control/README.md @@ -46,6 +46,46 @@ This replaces the in-firmware histogram+mean approach whose mean divisor (reader `count`) diverged from the numerator (ISR event sum) when the sweep truncated early, invalidating the published deltas. +## Framework overhead compensation + +Every `algo_cycles` and `handoff_cycles` value emitted on the wire is +the raw measurement **minus** a constant `bench_overhead_cycles`, +measured at boot before any per-event timing begins. The +`measure_overhead()` routine in `src/main.c` runs + +```c +start = k_cycle_get_32(); +end = k_cycle_get_32(); +delta = end - start; +``` + +1000 times under `irq_lock`, sorts the deltas, and stores the +**median** as `bench_overhead_cycles`. That value is then subtracted +(saturating at 0) from every per-event count before it reaches the +CSV stream, so what's reported is the work between the cycle-counter +reads, not the cost of the cycle-counter reads themselves. + +The compensation is **visible**: the measured value is emitted as a +metadata line `overhead_cycles,` in the CSV header, preserved +into the artifact bundle, and surfaced in `analyze.py`'s report header +as "Overhead subtracted (cycles): baseline ...; gale ..." — a +reviewer can audit the subtraction and re-add it if they want the +raw numbers back. + +This matches the upstream Zephyr 4.4 `ztest_bench` framework's `ctrl` +benchmark pattern (`subsys/testsuite/ztest/benchmark/`), which +measures and subtracts the cost of an `empty_function` call from +every reported result. Pre-compensation and post-compensation numbers +are **different measurements** — do not combine them in a single +comparison table. + +## Scope and non-claims + +See [SCOPE.md](SCOPE.md) for the explicit list of what this bench +measures and what it does NOT measure. That file is the source of +truth for any downstream copy (blog posts, reports). Do not embed +scope claims in published copy without first updating SCOPE.md. + ## Building ```sh diff --git a/benches/engine_control/SCOPE.md b/benches/engine_control/SCOPE.md new file mode 100644 index 0000000..0c71720 --- /dev/null +++ b/benches/engine_control/SCOPE.md @@ -0,0 +1,148 @@ +# `engine_control` bench — scope, non-claims, and source of truth + +This file is the **source of truth** for what the `engine_control` +benchmark measures, what it does not measure, and what kind of +evidence its numbers constitute. Subsequent blog posts, reports, +internal memos, and external citations import language from here. +**Do not** embed scope claims directly in published copy without +first updating this file. Inconsistency between published copy and +this file is a defect in the published copy. + +## What is measured + +Cycle counts on the named target at the named clock frequency, under +nominal contention from the bench harness only (no peripheral +traffic, no DMA, no inter-core activity, no production workload): + +- **`algo_cycles`** — ISR-side `control_step()` execution time: + cycle counter at ISR entry → cycle counter immediately after + `control_step()` returns. Pure C, identical between baseline and + gale builds; serves as the integrity check (medians must agree + within 10%). +- **`handoff_cycles`** — ISR-side primitive cost: cycle counter + immediately after `control_step()` → cycle counter at end of ISR. + Covers `ring_buf_put` + `k_sem_give`. The measured engineering + delta between baseline (stock Zephyr primitives) and gale + (verified-Rust replacements) lives here. + +Both values have **framework overhead compensation** applied: a +constant `bench_overhead_cycles` (median of 1000 empty +`k_cycle_get_32()`-pair measurements taken at boot under `irq_lock`) +is subtracted from every emitted value. The compensation constant is +emitted in the CSV header (`overhead_cycles,`) and surfaced in +the analyzer's report header so any reader can audit and re-add it. +Matches Zephyr 4.4 `ztest_bench`'s `ctrl` pattern. + +The current measurement target is one of: +- **Renode 1.16.0** (CI default, container-pinned), or +- **Renode nightly** (CI cycle-model A/B control), or +- **Real silicon** (when item 1 lands; STM32F4 Discovery via SWO/DWT + capture). + +The current Cortex-M target clock is **168 MHz** on `stm32f4_disco`, +**100 kHz tick** on `qemu_cortex_m3` (smoke). Numbers are not +comparable across these targets at face value because the cycle unit +differs. + +## What is NOT measured + +This bench produces engineering measurements; it is **not** +certification evidence and does **not** measure any of the following: + +- **Peripheral contention** — no SPI, I²C, UART RX, GPIO toggle, or + bus-master traffic during the measurement window. The ISR is + driven by an internal `k_timer`, not by an external sensor. +- **DMA-driven I/O** — real flight controllers receive sensor data + via DMA-complete IRQs with bursty alignment characteristics + (cache, bus arbitration). This bench uses a synthetic timer ISR + with no DMA path. +- **SMP / multi-core** — single-CPU only. The `gale_spinlock` + primitive ships in the codebase but its actual hazard + (concurrent CAS from another core) is **not exercised by this + bench**. SMP coverage is a separate workflow (`zephyr-smp-test` + on `qemu_x86_64`) with known runtime issues. +- **WCET (Worst-Case Execution Time)** — the bench reports observed + cycle distributions. It does **not** prove a worst-case bound. + Establishing WCET requires static analysis tooling such as + **AbsInt aiT**, **Rapita RapiTime**, or **OTAWA** combined with + microarchitectural models for the specific MCU. Worst-case-observed + numbers, when added later under the bench-rigor work item 6, are + **not** WCET claims and must be labeled as `worst_observed`, + not `wcet`. The distinction is unambiguous and not negotiable in + published copy: an observation is not a proof. +- **Power consumption** — the bench measures cycles, not energy or + current. For embedded deployment the relevant figure is often + µJ/op or mA average, neither of which this bench produces. +- **Memory pressure** — peak heap, peak stack high-water mark, slab + fragmentation. Stack high-water-mark capture is planned (work + item 5, gated on real-silicon anchor first). +- **Fault tolerance** — stuck-sensor inputs, dropped messages, + scheduler-induced timeouts, watchdog resets. The bench operates + under **nominal** scheduling only. Fault-injection coverage is + out of scope here and belongs in a v2 of the flight bench. +- **Long-duration drift** — runs are seconds to minutes, not hours. + 32-bit cycle-counter wrap behavior, accumulated heap fragmentation, + ring-buffer head/tail drift over multi-hour operation are not + observable in this bench. + +## Status of the published delta + +The headline `−34.5%` handoff-cycle delta (gale vs GCC baseline) is: + +- **Real** — the cross-Renode A/B (1.16.0 vs nightly) shows 0.0% + drift on identical ELFs across simulator versions, ruling out the + cycle model as the source. The synth-vs-rustc-direct cross-check + shows synth's codegen agrees with (in fact slightly outperforms) + rustc-direct, ruling out a synth miscompile. +- **Tool-bounded** — produced by the on-target `k_cycle_get_32` + reading inside Renode's per-block cost simulation. **Not** anchored + to a real silicon measurement until work item 1 lands. +- **Workload-bounded** — measured in the engine_control ISR shape + (one timer ISR, one ring + sem hop). **Not** generalizable to + composed workloads (use `flight_control` for that, with its own + scope file). + +## What kind of evidence this is + +**Engineering measurement** under controlled simulation, with the +methodology and toolchain enumerated in the build manifest. Suitable +for: + +- Internal regression detection (CI-gated p99 ≤ 2× baseline asserts) +- Engineering decisions about primitive choice +- Public claims of the form "we measure X cycles under conditions Y" + with conditions Y enumerated above + +**Not** suitable for: + +- Certification submissions to DO-178C, ISO 26262, IEC 61508, or any + other safety standard. Certification evidence requires qualified + tools, independent verification, requirements traceability, and + WCET via static analysis — none of which this bench provides. +- Marketing copy that elides the conditions +- Citation as "verified-for-flight" performance + +Short version for first paragraphs of any blog post: *"Cycle +measurements under Renode-simulated Cortex-M4F at 168 MHz on a +synthetic ISR workload. Engineering measurement, not certification +evidence; see SCOPE.md for the full enumeration of what is and isn't +measured."* + +## When to update this file + +Whenever: + +- The measurement target changes (e.g., real silicon arrives — work + item 1). +- The compensation regime changes (e.g., overhead compensation lands + — work item 2; algorithm or constants change later). +- The non-claims list changes (e.g., SMP coverage is added; fault + injection is added in a v2). +- A reviewer raises a scope question that the current text does not + unambiguously answer. + +Pre-compensation and post-compensation numbers are **different +measurements**. When the compensation regime changes, anchor +explicitly in published copy: *"Numbers below are +overhead-compensated; pre-compensation reference values are at +[link]"* — never combine them in the same comparison table. diff --git a/benches/engine_control/analyze.py b/benches/engine_control/analyze.py index aabfa41..14ffe78 100644 --- a/benches/engine_control/analyze.py +++ b/benches/engine_control/analyze.py @@ -50,6 +50,11 @@ class Meta: build: str = "?" cycles_per_sec: int = 0 target_samples: int = 0 + # Per-run framework overhead (cycles), measured at boot via + # measure_overhead() in main.c and subtracted from every algo / + # handoff value before emit. Tracked here so the report header + # can surface the compensation that's been applied. + overhead_cycles: dict[str, int] = field(default_factory=dict) # Per-run drops/samples, keyed by run id ("R1", "R2", ...) drops: dict[str, int] = field(default_factory=dict) samples: dict[str, int] = field(default_factory=dict) @@ -109,6 +114,11 @@ def parse_events(path: Path) -> tuple[list[Sample], Meta]: meta.target_samples = int(parts[4]) except ValueError: pass + elif tail == "overhead_cycles" and len(parts) >= 5: + try: + meta.overhead_cycles[run] = int(parts[4]) + except ValueError: + pass return samples, meta @@ -271,6 +281,19 @@ def render(base_s: list[Sample], gale_s: list[Sample], if hz: lines.append(f"- Cycle counter: {hz:,} Hz " f"(1 cycle ≈ {1e9/hz:.1f} ns)") + # Surface the framework-overhead compensation that's been applied + # on-target so a reviewer can audit the subtraction. Per audit P7 + # / ztest_bench parity: every algo / handoff value below is the + # raw measurement minus this constant. + base_oh = base_m.overhead_cycles + gale_oh = gale_m.overhead_cycles + if base_oh or gale_oh: + b_str = ", ".join(f"{r}={v}" for r, v in sorted(base_oh.items())) \ + or "n/a" + g_str = ", ".join(f"{r}={v}" for r, v in sorted(gale_oh.items())) \ + or "n/a" + lines.append(f"- Overhead subtracted (cycles): " + f"baseline {b_str}; gale {g_str}") lines.append("") # Per-step tables diff --git a/benches/engine_control/src/main.c b/benches/engine_control/src/main.c index 3c8cccc..e5091f9 100644 --- a/benches/engine_control/src/main.c +++ b/benches/engine_control/src/main.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include "control.h" @@ -204,12 +205,55 @@ static void crank_isr(struct k_timer *t) static uint32_t count = 0; +/* + * Bench framework overhead — measured at boot before any per-event + * timing begins (see measure_overhead). Subtracted from every algo / + * handoff cycle count emitted to the CSV stream so the published + * numbers reflect work between the cycle-counter reads, not the cost + * of the cycle-counter reads themselves. The measured value is also + * emitted as a metadata line so reviewers can audit the compensation + * step. Same idiom as Zephyr 4.4 ztest_bench's `ctrl` benchmark. + */ +#define OVERHEAD_SAMPLES 1000U +static uint32_t bench_overhead_cycles = 0U; + +static int cmp_u32(const void *a, const void *b) +{ + uint32_t x = *(const uint32_t *)a; + uint32_t y = *(const uint32_t *)b; + return (x > y) - (x < y); +} + +static void measure_overhead(void) +{ + static uint32_t samples[OVERHEAD_SAMPLES]; + unsigned int key = irq_lock(); + for (uint32_t i = 0; i < OVERHEAD_SAMPLES; i++) { + uint32_t a = k_cycle_get_32(); + uint32_t b = k_cycle_get_32(); + samples[i] = b - a; + } + irq_unlock(key); + qsort(samples, OVERHEAD_SAMPLES, sizeof(uint32_t), cmp_u32); + bench_overhead_cycles = samples[OVERHEAD_SAMPLES / 2]; /* median */ +} + +/* Saturating subtraction — never report a negative cycle count. With + * a quiet measurement window plus interrupt-locked overhead probe, the + * compensated value should rarely if ever underflow, but we clip to 0 + * defensively rather than silently wrapping. */ +static inline uint32_t compensate(uint32_t raw) +{ + return raw > bench_overhead_cycles ? raw - bench_overhead_cycles : 0U; +} + static void emit_event(const struct crank_sample *s) { uint32_t handoff = g_handoff_by_slot[s->seq % RING_CAPACITY_SAMPLES]; printf("E,%u,%u,%u,%u,%u\n", (unsigned)s->seq, (unsigned)s->step, (unsigned)s->rpm, - (unsigned)s->algo_cycles, (unsigned)handoff); + (unsigned)compensate(s->algo_cycles), + (unsigned)compensate(handoff)); } static void reader_loop(void) @@ -239,7 +283,12 @@ static void print_csv_header(void) ); printf("cycles_per_sec,%u\n", hz); printf("target_samples,%u\n", TOTAL_SAMPLES); + /* Visible compensation: every algo / handoff value below has had + * this many cycles subtracted. Median of 1000 empty cycle-counter + * read pairs measured at boot under irq_lock (see measure_overhead). */ + printf("overhead_cycles,%u\n", bench_overhead_cycles); printf("# event rows: E,,,,,\n"); + printf("# algo / handoff cycles are AFTER subtracting overhead_cycles\n"); } static void print_csv_footer(void) @@ -343,6 +392,12 @@ int main(void) k_thread_priority_set(k_current_get(), 5); + /* Measure framework overhead BEFORE the CSV header so the value + * is recorded in the header line. Runs at thread context with + * IRQs locked for the inner loop only — no other threads exist + * yet, so this is the quietest the system will ever be. */ + measure_overhead(); + /* Emit CSV header BEFORE starting the sweep so stdout ordering * is deterministic: header, then events interleaved with sweep * progress printk, then footer. */ diff --git a/benches/engine_control/tag_events.py b/benches/engine_control/tag_events.py index 4e77fea..f81cf4d 100644 --- a/benches/engine_control/tag_events.py +++ b/benches/engine_control/tag_events.py @@ -33,6 +33,7 @@ def main(argv: list[str]) -> int: print(f"M,R{run_id},{variant},END") elif (line.startswith("cycles_per_sec,") or line.startswith("target_samples,") + or line.startswith("overhead_cycles,") or line.startswith("build,")): print(f"M,R{run_id},{variant},{line}") elif line.startswith("#"):