From 689ac227e334414dbe82b2fd0798bd6b13a8589e Mon Sep 17 00:00:00 2001 From: swoiszwillo Date: Sun, 7 Jun 2026 08:38:07 -0600 Subject: [PATCH 1/3] test: stress coverage for k_sem/k_event race windows + divergence notes Closes the coverage gaps from the #41/#42 design review (issue #43): - timeout-vs-give stress (k_sem) and timeout-vs-post stress (k_event): sweep the giver/poster firing time across the taker's 20 ms timeout (early / same-tick / late) for 100 iterations, checking conservation invariants on every outcome and probing the reserved notification index for stranded notifications after each cycle. The giver runs as a raw FreeRTOS task pinned to the other core on multicore targets (k_thread_create pins to core 0). - give-racing-the-park: 1000 zero-delay handshake iterations hammer the unlock-sample-relock recheck window and the enqueue->park window inside k_sem_take. - multi-waiter beyond two: 4-waiter conservation, FIFO order among equal-priority waiters (pins the strict '>' in z_sem_pop_waiter), k_sem_reset waking 3 waiters, single k_event_post waking 3 waiters. - FromISR coverage (HW-gated on CONFIG_K_TIMER_DISPATCH_ISR): k_sem_give from real ISR context with a prompt-wake latency bound proving the portYIELD_FROM_ISR path (mid-tick expiry via K_USEC); k_sem_take(K_NO_WAIT) from ISR (upstream isr-ok contract); k_event_post from ISR waking 3 waiters (accumulate-yield-once path). Doc notes on the declarations (inline-divergence convention): - k_sem_take: ISR-legal only with K_NO_WAIT (upstream contract); waiter priority is cached at enqueue (k_thread_priority_set on a blocked waiter does not re-sort the wake order -- upstream re-sorts). - k_sem_give: documented wake order (highest priority, FIFO among equals). - k_sem_reset: task context only. Upstream verification showed upstream's k_sem_reset is NOT isr-ok either (no @isr_ok; calls z_reschedule), so this is parity, not a divergence -- the #43 item proposing an ISR-safe reset was based on a wrong premise. The stale-priority note is NOT added to k_event_wait: k_event wakes ALL satisfied waiters with no priority ordering, so there is no stale value to document. linux suite: 204/0 (197 + 7; the 3 ISR tests compile out on linux). Refs #43 Co-Authored-By: Claude Opus 4.8 (1M context) --- .../zkernel/include/boreas/zephyr/kernel.h | 12 +- test/main/test_k_event_mt.c | 188 ++++++++++ test/main/test_k_sem.c | 340 ++++++++++++++++++ 3 files changed, 539 insertions(+), 1 deletion(-) diff --git a/components/zkernel/include/boreas/zephyr/kernel.h b/components/zkernel/include/boreas/zephyr/kernel.h index b28bbd5..b1f7579 100644 --- a/components/zkernel/include/boreas/zephyr/kernel.h +++ b/components/zkernel/include/boreas/zephyr/kernel.h @@ -215,12 +215,22 @@ int k_sem_init(struct k_sem *sem, unsigned int initial_count, unsigned int limit * reach into the semaphore's waiter list from abort, so the * dead thread would leave a dangling waiter node. The same * applies to aborting a thread that is inside k_sem_give. + * @note ISR context: legal only with K_NO_WAIT (the upstream + * contract). The K_NO_WAIT paths make no FreeRTOS calls. + * @note Divergence: a waiter's priority is sampled when it enqueues; + * k_thread_priority_set on a blocked thread does not re-sort + * the wake order (upstream re-sorts the pend queue). */ int k_sem_take(struct k_sem *sem, k_timeout_t timeout); +/** Give the semaphore (ISR-safe). Wakes the highest-priority waiter, + * FIFO among equal priorities (upstream wake order). */ void k_sem_give(struct k_sem *sem); /** Zero the count and wake all waiters; their takes return -EAGAIN * (upstream parity -- the previous FreeRTOS-backed implementation - * could only drain the count). */ + * could only drain the count). + * + * @note Task context only -- must not be called from an ISR. Matches + * upstream, whose k_sem_reset is likewise not isr-ok. */ void k_sem_reset(struct k_sem *sem); unsigned int k_sem_count_get(struct k_sem *sem); diff --git a/test/main/test_k_event_mt.c b/test/main/test_k_event_mt.c index 23f69ee..0f1a39b 100644 --- a/test/main/test_k_event_mt.c +++ b/test/main/test_k_event_mt.c @@ -9,6 +9,14 @@ #include "unity.h" #include "zephyr/kernel.h" +#include "sdkconfig.h" + +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" + +#ifdef CONFIG_K_TIMER_DISPATCH_ISR +#include "esp_attr.h" +#endif #define EVT_DONE BIT(0) #define EVT_GO BIT(1) @@ -164,10 +172,190 @@ static void test_event_post_wakes_all_with_safe_waiter(void) TEST_ASSERT_EQUAL(0, k_event_test(&mt_evt, EVT_GO)); } +/* ---------------------------------------------------------------- + * #43: timeout-vs-post race stress (mirror of the k_sem test -- + * k_event shares the architecture, so the consume-the-in-flight- + * notification branch of z_event_wait_internal needs the same edge + * coverage). Invariants per outcome of a wait_safe vs a racing post: + * + * got == EVT_GO -> the bits were consumed (test() == 0) + * got == 0 -> the post landed after the timeout, with no + * waiter to target (test() == EVT_GO) + * + * and after draining, the reserved notification index must be clean. + * The poster is a RAW FreeRTOS task pinned to the other core on + * multicore targets (k_thread_create pins to core 0). + * ---------------------------------------------------------------- */ + +#define EVT_STRESS_ITERS 100 + +#if CONFIG_FREERTOS_NUMBER_OF_CORES > 1 +#define EVT_POSTER_CORE 1 +#else +#define EVT_POSTER_CORE 0 +#endif + +static struct k_sem evt_stress_go; +static struct k_sem evt_stress_ack; + +static void evt_stress_poster_task(void *arg) +{ + (void)arg; + for (int i = 0; i < EVT_STRESS_ITERS; i++) { + k_sem_take(&evt_stress_go, K_FOREVER); + /* Sweep across the waiter's 20 ms timeout: 18/19 wake it, + * 20 lands on the timeout tick (the race), 21/22 post + * into an empty waiter list after the timeout. */ + k_msleep(18 + (i % 5)); + k_event_post(&mt_evt, EVT_GO); + k_sem_give(&evt_stress_ack); + } + vTaskDelete(NULL); +} + +static void test_event_timeout_vs_post_stress(void) +{ + k_event_init(&mt_evt); + TEST_ASSERT_EQUAL(0, k_sem_init(&evt_stress_go, 0, 1)); + TEST_ASSERT_EQUAL(0, k_sem_init(&evt_stress_ack, 0, 1)); + + TEST_ASSERT_EQUAL(pdPASS, xTaskCreatePinnedToCore(evt_stress_poster_task, "evt_stress", + 4096, NULL, 6, NULL, EVT_POSTER_CORE)); + + int timeouts = 0; + + for (int i = 0; i < EVT_STRESS_ITERS; i++) { + k_sem_give(&evt_stress_go); + + uint32_t got = k_event_wait_safe(&mt_evt, EVT_GO, false, K_MSEC(20)); + + /* The ack orders this iteration's post before the checks. */ + TEST_ASSERT_EQUAL(0, k_sem_take(&evt_stress_ack, K_SECONDS(2))); + + if (got != 0) { + TEST_ASSERT_EQUAL(EVT_GO, got); + TEST_ASSERT_EQUAL(0, k_event_test(&mt_evt, EVT_GO)); + } else { + /* Timed out; the post latched into the word. */ + TEST_ASSERT_EQUAL(EVT_GO, k_event_test(&mt_evt, EVT_GO)); + TEST_ASSERT_EQUAL(EVT_GO, k_event_wait_safe(&mt_evt, EVT_GO, false, + K_NO_WAIT)); /* drain */ + timeouts++; + } + + /* No stranded notification on the reserved index: an empty + * wait must miss both immediately and after blocking. */ + TEST_ASSERT_EQUAL(0, k_event_wait(&mt_evt, EVT_GO, false, K_NO_WAIT)); + TEST_ASSERT_EQUAL(0, k_event_wait(&mt_evt, EVT_GO, false, K_MSEC(2))); + } + + /* The sweep must produce both outcomes or the race was never + * exercised (18/19 ms reliably wake, 21/22 ms reliably miss). */ + TEST_ASSERT_NOT_EQUAL(0, timeouts); + TEST_ASSERT_NOT_EQUAL(EVT_STRESS_ITERS, timeouts); + + k_msleep(20); /* let the self-deleting poster be reaped */ +} + +/* ---------------------------------------------------------------- + * #43: a single post wakes 3+ waiters (the chain-wake walk beyond + * the two-waiter regression above). + * ---------------------------------------------------------------- */ + +K_THREAD_STACK_DEFINE(trio_stack0, 2048); +K_THREAD_STACK_DEFINE(trio_stack1, 2048); +K_THREAD_STACK_DEFINE(trio_stack2, 2048); +static struct k_thread trio_threads[3]; +static volatile uint32_t trio_got[3]; + +static void trio_waiter_entry(void *p1, void *p2, void *p3) +{ + (void)p2; + (void)p3; + int idx = (int)(intptr_t)p1; + + trio_got[idx] = k_event_wait(&mt_evt, EVT_GO, false, K_SECONDS(2)); +} + +static void trio_spawn(int idx, k_thread_stack_t *stack, size_t stack_size) +{ + k_thread_create(&trio_threads[idx], stack, stack_size, trio_waiter_entry, + (void *)(intptr_t)idx, NULL, NULL, 5, 0, K_NO_WAIT); +} + +static void trio_park_all(void) +{ + trio_got[0] = 0xdead; + trio_got[1] = 0xdead; + trio_got[2] = 0xdead; + trio_spawn(0, trio_stack0, K_THREAD_STACK_SIZEOF(trio_stack0)); + trio_spawn(1, trio_stack1, K_THREAD_STACK_SIZEOF(trio_stack1)); + trio_spawn(2, trio_stack2, K_THREAD_STACK_SIZEOF(trio_stack2)); + k_msleep(20); +} + +static void trio_join_and_check(void) +{ + for (int i = 0; i < 3; i++) { + TEST_ASSERT_EQUAL(0, k_thread_join(&trio_threads[i], K_SECONDS(2))); + TEST_ASSERT_EQUAL(EVT_GO, trio_got[i]); + } +} + +static void test_event_post_wakes_three_waiters(void) +{ + k_event_init(&mt_evt); + trio_park_all(); + + k_event_post(&mt_evt, EVT_GO); /* ONE post must wake all three */ + + trio_join_and_check(); +} + +/* ---------------------------------------------------------------- + * #43: FromISR post -- the multi-waiter accumulate-then-yield-once + * path of z_event_post_internal in real ISR context (HW only; prior + * art: the gated tests in test_k_timer.c). + * ---------------------------------------------------------------- */ + +#ifdef CONFIG_K_TIMER_DISPATCH_ISR + +static volatile bool evt_isr_was_isr; + +static void IRAM_ATTR event_isr_post_cb(struct k_timer *timer) +{ + ARG_UNUSED(timer); + evt_isr_was_isr = xPortInIsrContext(); + k_event_post(&mt_evt, EVT_GO); +} + +static void test_event_isr_post_wakes_all(void) +{ + struct k_timer timer; + + k_event_init(&mt_evt); + evt_isr_was_isr = false; + trio_park_all(); + + k_timer_init(&timer, event_isr_post_cb, NULL); + k_timer_start(&timer, K_MSEC(10), K_NO_WAIT); + + trio_join_and_check(); + k_timer_stop(&timer); + TEST_ASSERT_TRUE_MESSAGE(evt_isr_was_isr, "poster did not run in ISR context"); +} + +#endif /* CONFIG_K_TIMER_DISPATCH_ISR */ + void test_k_event_mt_group(void) { RUN_TEST(test_event_wait_from_thread); RUN_TEST(test_event_wait_all_from_thread); RUN_TEST(test_event_wait_safe_from_thread); RUN_TEST(test_event_post_wakes_all_with_safe_waiter); + RUN_TEST(test_event_timeout_vs_post_stress); + RUN_TEST(test_event_post_wakes_three_waiters); +#ifdef CONFIG_K_TIMER_DISPATCH_ISR + RUN_TEST(test_event_isr_post_wakes_all); +#endif } diff --git a/test/main/test_k_sem.c b/test/main/test_k_sem.c index d128ab9..ca93218 100644 --- a/test/main/test_k_sem.c +++ b/test/main/test_k_sem.c @@ -7,6 +7,15 @@ #include "unity.h" #include "zephyr/kernel.h" +#include "sdkconfig.h" + +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" + +#ifdef CONFIG_K_TIMER_DISPATCH_ISR +#include "esp_attr.h" +#include "esp_timer.h" +#endif static void test_sem_init_and_count(void) { @@ -364,6 +373,328 @@ static void test_sem_give_wakes_highest_priority_waiter(void) TEST_ASSERT_EQUAL(0, k_thread_join(&sem_hi_thread, K_SECONDS(2))); } +/* ---------------------------------------------------------------- + * #43: timeout-vs-give race stress. + * + * The consume-the-in-flight-give branch of k_sem_take (woken with + * got == 0) and the give-after-timeout latch only execute when a give + * collides with the take's timeout edge -- no other test in the suite + * ever times out a take while a give is in flight. Sweep the giver's + * firing time across the taker's 20 ms timeout (early / same tick / + * late) and check the conservation invariants on every outcome: + * + * ret == 0 -> the unit was consumed (count == 0) + * ret == -EAGAIN -> the give landed after the timeout (count == 1) + * + * and after draining, the reserved notification index must be clean: + * an immediate take fails -EBUSY and a short blocking take times out + * (a stranded notification would satisfy it instantly). + * + * The giver is a RAW FreeRTOS task: k_thread_create pins to core 0, + * and on multicore targets the giver must run on the other core for + * true concurrency in the race windows. + * ---------------------------------------------------------------- */ + +#define STRESS_ITERS 100 + +#if CONFIG_FREERTOS_NUMBER_OF_CORES > 1 +#define STRESS_GIVER_CORE 1 +#else +#define STRESS_GIVER_CORE 0 +#endif + +static struct k_sem stress_sem; +static struct k_sem stress_go; +static struct k_sem stress_ack; + +static void stress_giver_task(void *arg) +{ + (void)arg; + for (int i = 0; i < STRESS_ITERS; i++) { + k_sem_take(&stress_go, K_FOREVER); + /* Sweep across the taker's 20 ms timeout: 18/19 wake the + * taker, 20 lands on the timeout tick (the race), 21/22 + * latch into the count after the timeout. */ + k_msleep(18 + (i % 5)); + k_sem_give(&stress_sem); + k_sem_give(&stress_ack); + } + vTaskDelete(NULL); +} + +static void test_sem_timeout_vs_give_stress(void) +{ + TEST_ASSERT_EQUAL(0, k_sem_init(&stress_sem, 0, 1)); + TEST_ASSERT_EQUAL(0, k_sem_init(&stress_go, 0, 1)); + TEST_ASSERT_EQUAL(0, k_sem_init(&stress_ack, 0, 1)); + + TEST_ASSERT_EQUAL(pdPASS, xTaskCreatePinnedToCore(stress_giver_task, "sem_stress", 4096, + NULL, 6, NULL, STRESS_GIVER_CORE)); + + int timeouts = 0; + + for (int i = 0; i < STRESS_ITERS; i++) { + k_sem_give(&stress_go); + + int ret = k_sem_take(&stress_sem, K_MSEC(20)); + + /* The ack orders this iteration's give before the checks. */ + TEST_ASSERT_EQUAL(0, k_sem_take(&stress_ack, K_SECONDS(2))); + + if (ret == 0) { + TEST_ASSERT_EQUAL(0, k_sem_count_get(&stress_sem)); + } else { + TEST_ASSERT_EQUAL(-EAGAIN, ret); + TEST_ASSERT_EQUAL(1, k_sem_count_get(&stress_sem)); + TEST_ASSERT_EQUAL(0, k_sem_take(&stress_sem, K_NO_WAIT)); /* drain */ + timeouts++; + } + + /* No stranded notification on the reserved index: an empty + * take must fail both immediately and after blocking. */ + TEST_ASSERT_EQUAL(-EBUSY, k_sem_take(&stress_sem, K_NO_WAIT)); + TEST_ASSERT_EQUAL(-EAGAIN, k_sem_take(&stress_sem, K_MSEC(2))); + } + + /* The sweep must produce both outcomes or the race was never + * exercised (18/19 ms reliably wake, 21/22 ms reliably latch). */ + TEST_ASSERT_NOT_EQUAL(0, timeouts); + TEST_ASSERT_NOT_EQUAL(STRESS_ITERS, timeouts); + + k_msleep(20); /* let the self-deleting giver be reaped */ +} + +/* ---------------------------------------------------------------- + * #43: give racing the park (give-before-block, not give-before-take). + * + * A zero-delay giver hammers the windows INSIDE k_sem_take: the + * unlock-sample-relock recheck window and the enqueue -> + * notification-wait window. On multicore the giver runs concurrently + * on the other core; on unicore the higher-priority giver preempts at + * the kernel calls inside those windows. + * ---------------------------------------------------------------- */ + +#define PARK_RACE_ITERS 1000 + +static void park_race_giver_task(void *arg) +{ + (void)arg; + for (int i = 0; i < PARK_RACE_ITERS; i++) { + k_sem_take(&stress_go, K_FOREVER); + k_sem_give(&stress_sem); /* zero delay */ + } + vTaskDelete(NULL); +} + +static void test_sem_give_racing_park(void) +{ + TEST_ASSERT_EQUAL(0, k_sem_init(&stress_sem, 0, 1)); + TEST_ASSERT_EQUAL(0, k_sem_init(&stress_go, 0, 1)); + + TEST_ASSERT_EQUAL(pdPASS, xTaskCreatePinnedToCore(park_race_giver_task, "park_race", 4096, + NULL, 6, NULL, STRESS_GIVER_CORE)); + + for (int i = 0; i < PARK_RACE_ITERS; i++) { + k_sem_give(&stress_go); + TEST_ASSERT_EQUAL(0, k_sem_take(&stress_sem, K_MSEC(100))); + } + + /* Units conserved: every give was consumed by exactly one take. */ + TEST_ASSERT_EQUAL(0, k_sem_count_get(&stress_sem)); + TEST_ASSERT_EQUAL(-EBUSY, k_sem_take(&stress_sem, K_NO_WAIT)); + + k_msleep(20); /* let the self-deleting giver be reaped */ +} + +/* ---------------------------------------------------------------- + * #43: multi-waiter beyond two -- conservation, FIFO order among + * equal priorities, and reset waking 3+. + * ---------------------------------------------------------------- */ + +K_THREAD_STACK_DEFINE(mw_stack0, 4096); +K_THREAD_STACK_DEFINE(mw_stack1, 4096); +K_THREAD_STACK_DEFINE(mw_stack2, 4096); +K_THREAD_STACK_DEFINE(mw_stack3, 4096); +static struct k_thread mw_threads[4]; +static struct k_sem mw_sem; +static volatile int mw_result[4]; +static volatile int mw_order[4]; +static volatile int mw_next; + +static void mw_waiter_entry(void *p1, void *p2, void *p3) +{ + (void)p2; + (void)p3; + int idx = (int)(intptr_t)p1; + + mw_result[idx] = k_sem_take(&mw_sem, K_SECONDS(2)); + if (mw_result[idx] == 0) { + /* Wakes are serialized: the main task sleeps between + * gives, and all waiters share core 0. */ + mw_order[mw_next++] = idx; + } +} + +static void mw_spawn(int idx, k_thread_stack_t *stack, size_t stack_size) +{ + k_thread_create(&mw_threads[idx], stack, stack_size, mw_waiter_entry, (void *)(intptr_t)idx, + NULL, NULL, 5, 0, K_NO_WAIT); +} + +static void test_sem_multi_waiter_conservation(void) +{ + TEST_ASSERT_EQUAL(0, k_sem_init(&mw_sem, 0, 4)); + mw_next = 0; + + /* 4 waiters park... */ + mw_spawn(0, mw_stack0, K_THREAD_STACK_SIZEOF(mw_stack0)); + mw_spawn(1, mw_stack1, K_THREAD_STACK_SIZEOF(mw_stack1)); + mw_spawn(2, mw_stack2, K_THREAD_STACK_SIZEOF(mw_stack2)); + mw_spawn(3, mw_stack3, K_THREAD_STACK_SIZEOF(mw_stack3)); + k_msleep(20); + + /* ...4 gives wake each exactly once, units conserved. */ + for (int i = 0; i < 4; i++) { + k_sem_give(&mw_sem); + } + for (int i = 0; i < 4; i++) { + TEST_ASSERT_EQUAL(0, k_thread_join(&mw_threads[i], K_SECONDS(2))); + TEST_ASSERT_EQUAL(0, mw_result[i]); + } + TEST_ASSERT_EQUAL(4, mw_next); + TEST_ASSERT_EQUAL(0, k_sem_count_get(&mw_sem)); + TEST_ASSERT_EQUAL(-EBUSY, k_sem_take(&mw_sem, K_NO_WAIT)); +} + +static void test_sem_equal_priority_waiters_fifo(void) +{ + TEST_ASSERT_EQUAL(0, k_sem_init(&mw_sem, 0, 4)); + mw_next = 0; + + /* Enqueue 3 equal-priority waiters in a known order (the sleeps + * guarantee each is parked before the next spawns). */ + mw_spawn(0, mw_stack0, K_THREAD_STACK_SIZEOF(mw_stack0)); + k_msleep(10); + mw_spawn(1, mw_stack1, K_THREAD_STACK_SIZEOF(mw_stack1)); + k_msleep(10); + mw_spawn(2, mw_stack2, K_THREAD_STACK_SIZEOF(mw_stack2)); + k_msleep(10); + + /* Sequential gives must wake them FIFO (z_sem_pop_waiter's + * strict '>' comparison is what preserves enqueue order among + * equal priorities -- this test pins that). */ + for (int i = 0; i < 3; i++) { + k_sem_give(&mw_sem); + k_msleep(10); + } + for (int i = 0; i < 3; i++) { + TEST_ASSERT_EQUAL(0, k_thread_join(&mw_threads[i], K_SECONDS(2))); + } + TEST_ASSERT_EQUAL(0, mw_order[0]); + TEST_ASSERT_EQUAL(1, mw_order[1]); + TEST_ASSERT_EQUAL(2, mw_order[2]); +} + +static void test_sem_reset_wakes_all_waiters(void) +{ + TEST_ASSERT_EQUAL(0, k_sem_init(&mw_sem, 0, 4)); + mw_next = 0; + + mw_spawn(0, mw_stack0, K_THREAD_STACK_SIZEOF(mw_stack0)); + mw_spawn(1, mw_stack1, K_THREAD_STACK_SIZEOF(mw_stack1)); + mw_spawn(2, mw_stack2, K_THREAD_STACK_SIZEOF(mw_stack2)); + k_msleep(20); + + /* One reset wakes ALL waiters with -EAGAIN (the wake-all loop in + * k_sem_reset is otherwise only tested with a single waiter). */ + k_sem_reset(&mw_sem); + + for (int i = 0; i < 3; i++) { + TEST_ASSERT_EQUAL(0, k_thread_join(&mw_threads[i], K_SECONDS(2))); + TEST_ASSERT_EQUAL(-EAGAIN, mw_result[i]); + } + TEST_ASSERT_EQUAL(0, k_sem_count_get(&mw_sem)); + + /* The sem stays functional after the reset. */ + k_sem_give(&mw_sem); + TEST_ASSERT_EQUAL(0, k_sem_take(&mw_sem, K_NO_WAIT)); +} + +/* ---------------------------------------------------------------- + * #43: FromISR give -- real ISR context via k_timer ISR dispatch + * (HW only; prior art: the gated tests in test_k_timer.c). + * ---------------------------------------------------------------- */ + +#ifdef CONFIG_K_TIMER_DISPATCH_ISR + +static struct k_sem isr_give_sem; +static volatile int64_t isr_give_us; +static volatile bool isr_give_was_isr; + +static void IRAM_ATTR sem_isr_give_cb(struct k_timer *timer) +{ + ARG_UNUSED(timer); + isr_give_was_isr = xPortInIsrContext(); + isr_give_us = esp_timer_get_time(); + k_sem_give(&isr_give_sem); +} + +static void test_sem_isr_give_prompt_wake(void) +{ + struct k_timer timer; + + TEST_ASSERT_EQUAL(0, k_sem_init(&isr_give_sem, 0, 1)); + isr_give_us = 0; + isr_give_was_isr = false; + + k_timer_init(&timer, sem_isr_give_cb, NULL); + /* Mid-tick expiry (10.5 ms at 1000 Hz): if the FromISR give + * failed to request the context switch (portYIELD_FROM_ISR), + * the wake would be deferred to the next tick interrupt, + * ~500 us later -- the latency bound below would fail. */ + k_timer_start(&timer, K_USEC(10500), K_NO_WAIT); + + TEST_ASSERT_EQUAL(0, k_sem_take(&isr_give_sem, K_FOREVER)); + int64_t latency_us = esp_timer_get_time() - isr_give_us; + + k_timer_stop(&timer); + TEST_ASSERT_TRUE_MESSAGE(isr_give_was_isr, "giver did not run in ISR context"); + TEST_ASSERT_TRUE_MESSAGE(latency_us < 400, "ISR give did not wake the waiter promptly"); +} + +static struct k_sem isr_take_sem; +static volatile int isr_take_ret1; +static volatile int isr_take_ret2; + +static void IRAM_ATTR sem_isr_take_cb(struct k_timer *timer) +{ + ARG_UNUSED(timer); + /* Upstream contract: k_sem_take is isr-ok with K_NO_WAIT (the + * K_NO_WAIT paths make no FreeRTOS calls). */ + isr_take_ret1 = k_sem_take(&isr_take_sem, K_NO_WAIT); + isr_take_ret2 = k_sem_take(&isr_take_sem, K_NO_WAIT); +} + +static void test_sem_isr_take_no_wait(void) +{ + struct k_timer timer; + + TEST_ASSERT_EQUAL(0, k_sem_init(&isr_take_sem, 1, 1)); + isr_take_ret1 = 0xbad; + isr_take_ret2 = 0xbad; + + k_timer_init(&timer, sem_isr_take_cb, NULL); + k_timer_start(&timer, K_MSEC(10), K_NO_WAIT); + k_msleep(50); + k_timer_stop(&timer); + + TEST_ASSERT_EQUAL(0, isr_take_ret1); + TEST_ASSERT_EQUAL(-EBUSY, isr_take_ret2); + TEST_ASSERT_EQUAL(0, k_sem_count_get(&isr_take_sem)); +} + +#endif /* CONFIG_K_TIMER_DISPATCH_ISR */ + void test_k_sem_group(void) { RUN_TEST(test_sem_stack_forever_same_prio); @@ -383,4 +714,13 @@ void test_k_sem_group(void) RUN_TEST(test_sem_init_invalid_args); RUN_TEST(test_sem_reset_wakes_waiter_eagain); RUN_TEST(test_sem_give_wakes_highest_priority_waiter); + RUN_TEST(test_sem_timeout_vs_give_stress); + RUN_TEST(test_sem_give_racing_park); + RUN_TEST(test_sem_multi_waiter_conservation); + RUN_TEST(test_sem_equal_priority_waiters_fifo); + RUN_TEST(test_sem_reset_wakes_all_waiters); +#ifdef CONFIG_K_TIMER_DISPATCH_ISR + RUN_TEST(test_sem_isr_give_prompt_wake); + RUN_TEST(test_sem_isr_take_no_wait); +#endif } From 27d8a63fe38546e0036e4ae99eefa3eacf66c5c7 Mon Sep 17 00:00:00 2001 From: swoiszwillo Date: Sun, 7 Jun 2026 08:49:11 -0600 Subject: [PATCH 2/3] fix: review feedback -- IRAM-resident take/wait, robust stress asserts Review fan-out findings folded: - k_sem_take, z_event_wait_internal, and the four k_event_wait* wrappers are now K_ISR_SAFE (IRAM-resident). The documented isr-ok-with-K_NO_WAIT contract was unsound for flash-resident code: the esp_timer ISR is allocated ESP_INTR_FLAG_IRAM, so the take/wait fast paths would fault if the ISR fired during a concurrent flash operation. New HW-gated tests pin the IRAM residency (prior art: the iram_attr tests in test_k_timer.c). - Prompt-wake test retuned: expiry at 10.1 ms (was 10.5) makes a missing portYIELD_FROM_ISR cost ~900 us instead of ~500, so the 700 us bound has wide margin against both false pass (deferred wake) and false fail (cache-cold first run, tick coincidence). - The both-outcomes sweep asserts are now HW-only: on the linux target a loaded CI host can stall the tick clock and collapse the sweep onto one outcome. The per-iteration conservation invariants (which hold under every interleaving) still run everywhere. - BUILD_ASSERT(CONFIG_FREERTOS_HZ >= 1000): the 18..22 ms sweep separation quantizes away at coarser ticks. - Multi-waiter state hygiene: mw_result/mw_order sentinel-reset per test (stale values from a prior test can no longer satisfy assertions), mw_next is a Zephyr atomic_t. - kernel.h: precise wording on the K_NO_WAIT-from-ISR note (spinlock only, no task-notify/blocking calls, IRAM-resident). - test_k_event_mt.c: pre-existing sizeof(stack) call sites migrated to K_THREAD_STACK_SIZEOF; FreeRTOS-priority-convention reminder on the priority-wake test. linux suite: 204/0 x3. Refs #43 Co-Authored-By: Claude Opus 4.8 (1M context) --- .../zkernel/include/boreas/zephyr/kernel.h | 5 +- components/zkernel/src/k_event.c | 20 +++-- components/zkernel/src/k_sem.c | 7 +- test/main/test_k_event_mt.c | 39 ++++++++-- test/main/test_k_sem.c | 73 +++++++++++++++---- 5 files changed, 114 insertions(+), 30 deletions(-) diff --git a/components/zkernel/include/boreas/zephyr/kernel.h b/components/zkernel/include/boreas/zephyr/kernel.h index b1f7579..d489852 100644 --- a/components/zkernel/include/boreas/zephyr/kernel.h +++ b/components/zkernel/include/boreas/zephyr/kernel.h @@ -216,7 +216,10 @@ int k_sem_init(struct k_sem *sem, unsigned int initial_count, unsigned int limit * dead thread would leave a dangling waiter node. The same * applies to aborting a thread that is inside k_sem_give. * @note ISR context: legal only with K_NO_WAIT (the upstream - * contract). The K_NO_WAIT paths make no FreeRTOS calls. + * contract). The K_NO_WAIT paths take only the ISR-safe + * spinlock (no task-notify or blocking FreeRTOS calls), and + * the function is IRAM-resident so the contract holds in + * IRAM-only ISR contexts (e.g. esp_timer ISR dispatch). * @note Divergence: a waiter's priority is sampled when it enqueues; * k_thread_priority_set on a blocked thread does not re-sort * the wake order (upstream re-sorts the pend queue). diff --git a/components/zkernel/src/k_event.c b/components/zkernel/src/k_event.c index acc3706..ca16c2f 100644 --- a/components/zkernel/src/k_event.c +++ b/components/zkernel/src/k_event.c @@ -147,8 +147,11 @@ uint32_t K_ISR_SAFE k_event_clear(struct k_event *event, uint32_t events) return z_event_post_internal(event, 0, events); } -static uint32_t z_event_wait_internal(struct k_event *event, uint32_t events, bool all, bool reset, - bool clear, k_timeout_t timeout) +/* IRAM-resident (K_ISR_SAFE) like k_sem_take: the wait family is + * documented isr-ok with K_NO_WAIT (k_event_test rides it), so it must + * be callable from IRAM-only ISR contexts. */ +static uint32_t K_ISR_SAFE z_event_wait_internal(struct k_event *event, uint32_t events, bool all, + bool reset, bool clear, k_timeout_t timeout) { /* The waiter node lives on THIS stack frame; it is unlinked under * the event lock (by a waker or by our timeout path) before we @@ -245,23 +248,26 @@ static uint32_t z_event_wait_internal(struct k_event *event, uint32_t events, bo } } -uint32_t k_event_wait(struct k_event *event, uint32_t events, bool reset, k_timeout_t timeout) +uint32_t K_ISR_SAFE k_event_wait(struct k_event *event, uint32_t events, bool reset, + k_timeout_t timeout) { return z_event_wait_internal(event, events, false, reset, false, timeout); } -uint32_t k_event_wait_all(struct k_event *event, uint32_t events, bool reset, k_timeout_t timeout) +uint32_t K_ISR_SAFE k_event_wait_all(struct k_event *event, uint32_t events, bool reset, + k_timeout_t timeout) { return z_event_wait_internal(event, events, true, reset, false, timeout); } -uint32_t k_event_wait_safe(struct k_event *event, uint32_t events, bool reset, k_timeout_t timeout) +uint32_t K_ISR_SAFE k_event_wait_safe(struct k_event *event, uint32_t events, bool reset, + k_timeout_t timeout) { return z_event_wait_internal(event, events, false, reset, true, timeout); } -uint32_t k_event_wait_all_safe(struct k_event *event, uint32_t events, bool reset, - k_timeout_t timeout) +uint32_t K_ISR_SAFE k_event_wait_all_safe(struct k_event *event, uint32_t events, bool reset, + k_timeout_t timeout) { return z_event_wait_internal(event, events, true, reset, true, timeout); } diff --git a/components/zkernel/src/k_sem.c b/components/zkernel/src/k_sem.c index 784118d..7f6847f 100644 --- a/components/zkernel/src/k_sem.c +++ b/components/zkernel/src/k_sem.c @@ -65,7 +65,12 @@ static struct z_sem_waiter *z_sem_pop_waiter(struct k_sem *sem) return best; } -int k_sem_take(struct k_sem *sem, k_timeout_t timeout) +/* IRAM-resident (K_ISR_SAFE): upstream documents k_sem_take as isr-ok + * with K_NO_WAIT, and ESP-IDF ISRs that may run while the flash cache + * is disabled (esp_timer ISR dispatch is allocated ESP_INTR_FLAG_IRAM) + * can only call IRAM code -- without this, the documented contract + * would fault during a concurrent flash operation. */ +int K_ISR_SAFE k_sem_take(struct k_sem *sem, k_timeout_t timeout) { /* The waiter node lives on THIS stack frame. It is only ever * linked while we are inside this function, and it is unlinked diff --git a/test/main/test_k_event_mt.c b/test/main/test_k_event_mt.c index 0f1a39b..c4a6193 100644 --- a/test/main/test_k_event_mt.c +++ b/test/main/test_k_event_mt.c @@ -45,8 +45,9 @@ static void test_event_wait_from_thread(void) memset(&setter_thread, 0, sizeof(setter_thread)); /* Spawn thread that will set EVT_DONE after 50ms */ - k_thread_create(&setter_thread, setter_stack, sizeof(setter_stack), event_setter_entry, - (void *)(uintptr_t)EVT_DONE, NULL, NULL, 5, 0, K_NO_WAIT); + k_thread_create(&setter_thread, setter_stack, K_THREAD_STACK_SIZEOF(setter_stack), + event_setter_entry, (void *)(uintptr_t)EVT_DONE, NULL, NULL, 5, 0, + K_NO_WAIT); /* Block waiting for the event -- should wake when thread sets it */ uint32_t got = k_event_wait(&mt_evt, EVT_DONE, false, K_MSEC(500)); @@ -80,7 +81,7 @@ static void test_event_wait_all_from_thread(void) k_event_init(&mt_evt); memset(&setter2_thread, 0, sizeof(setter2_thread)); - k_thread_create(&setter2_thread, setter2_stack, sizeof(setter2_stack), + k_thread_create(&setter2_thread, setter2_stack, K_THREAD_STACK_SIZEOF(setter2_stack), event_multi_setter_entry, &mt_evt, NULL, NULL, 5, 0, K_NO_WAIT); /* Wait for BOTH bits -- thread sets them ~30ms apart */ @@ -101,8 +102,9 @@ static void test_event_wait_safe_from_thread(void) k_event_init(&mt_evt); memset(&setter_thread, 0, sizeof(setter_thread)); - k_thread_create(&setter_thread, setter_stack, sizeof(setter_stack), event_setter_entry, - (void *)(uintptr_t)EVT_DONE, NULL, NULL, 5, 0, K_NO_WAIT); + k_thread_create(&setter_thread, setter_stack, K_THREAD_STACK_SIZEOF(setter_stack), + event_setter_entry, (void *)(uintptr_t)EVT_DONE, NULL, NULL, 5, 0, + K_NO_WAIT); /* Block until the setter posts, consuming the matched bits. */ uint32_t got = k_event_wait_safe(&mt_evt, EVT_DONE, false, K_MSEC(500)); @@ -189,6 +191,10 @@ static void test_event_post_wakes_all_with_safe_waiter(void) #define EVT_STRESS_ITERS 100 +/* The 18..22 ms sweep below only separates "reliably wake" from + * "reliably miss" with 1 ms ticks (see the k_sem stress test). */ +BUILD_ASSERT(CONFIG_FREERTOS_HZ >= 1000, "stress sweep requires 1 ms (or finer) ticks"); + #if CONFIG_FREERTOS_NUMBER_OF_CORES > 1 #define EVT_POSTER_CORE 1 #else @@ -249,10 +255,17 @@ static void test_event_timeout_vs_post_stress(void) TEST_ASSERT_EQUAL(0, k_event_wait(&mt_evt, EVT_GO, false, K_MSEC(2))); } +#if !CONFIG_IDF_TARGET_LINUX /* The sweep must produce both outcomes or the race was never - * exercised (18/19 ms reliably wake, 21/22 ms reliably miss). */ + * exercised (18/19 ms reliably wake, 21/22 ms reliably miss). + * HW only: on the linux target a loaded CI host can stall the + * tick clock and collapse the whole sweep onto one outcome -- + * the per-iteration invariants above hold regardless. */ TEST_ASSERT_NOT_EQUAL(0, timeouts); TEST_ASSERT_NOT_EQUAL(EVT_STRESS_ITERS, timeouts); +#else + (void)timeouts; +#endif k_msleep(20); /* let the self-deleting poster be reaped */ } @@ -345,6 +358,19 @@ static void test_event_isr_post_wakes_all(void) TEST_ASSERT_TRUE_MESSAGE(evt_isr_was_isr, "poster did not run in ISR context"); } +extern int _iram_text_start; +extern int _iram_text_end; + +static void test_k_event_wait_iram_attr(void) +{ + /* The isr-ok-with-K_NO_WAIT contract (k_event_test rides this) + * requires IRAM residency -- see test_k_sem_take_iram_attr. */ + uintptr_t fn = (uintptr_t)k_event_wait; + TEST_ASSERT_TRUE_MESSAGE( + (fn >= (uintptr_t)&_iram_text_start && fn < (uintptr_t)&_iram_text_end), + "k_event_wait is not in IRAM address range"); +} + #endif /* CONFIG_K_TIMER_DISPATCH_ISR */ void test_k_event_mt_group(void) @@ -357,5 +383,6 @@ void test_k_event_mt_group(void) RUN_TEST(test_event_post_wakes_three_waiters); #ifdef CONFIG_K_TIMER_DISPATCH_ISR RUN_TEST(test_event_isr_post_wakes_all); + RUN_TEST(test_k_event_wait_iram_attr); #endif } diff --git a/test/main/test_k_sem.c b/test/main/test_k_sem.c index ca93218..0e013f8 100644 --- a/test/main/test_k_sem.c +++ b/test/main/test_k_sem.c @@ -340,7 +340,9 @@ K_THREAD_STACK_DEFINE(sem_hi_stack, 4096); static struct k_thread sem_lo_thread; static struct k_thread sem_hi_thread; static struct k_sem prio_sem; -static volatile int first_woken; /* 4 = low, 6 = high */ +/* 4 = low, 6 = high: k_thread_create takes FreeRTOS priorities (higher + * number = higher priority), NOT upstream Zephyr's inverted scheme. */ +static volatile int first_woken; static void prio_waiter_entry(void *p1, void *p2, void *p3) { @@ -397,6 +399,11 @@ static void test_sem_give_wakes_highest_priority_waiter(void) #define STRESS_ITERS 100 +/* The 18..22 ms sweep below only separates "reliably wake" from + * "reliably latch" with 1 ms ticks; at e.g. 100 Hz the whole sweep + * quantizes onto the timeout boundary. */ +BUILD_ASSERT(CONFIG_FREERTOS_HZ >= 1000, "stress sweep requires 1 ms (or finer) ticks"); + #if CONFIG_FREERTOS_NUMBER_OF_CORES > 1 #define STRESS_GIVER_CORE 1 #else @@ -456,10 +463,17 @@ static void test_sem_timeout_vs_give_stress(void) TEST_ASSERT_EQUAL(-EAGAIN, k_sem_take(&stress_sem, K_MSEC(2))); } +#if !CONFIG_IDF_TARGET_LINUX /* The sweep must produce both outcomes or the race was never - * exercised (18/19 ms reliably wake, 21/22 ms reliably latch). */ + * exercised (18/19 ms reliably wake, 21/22 ms reliably latch). + * HW only: on the linux target a loaded CI host can stall the + * tick clock and collapse the whole sweep onto one outcome -- + * the per-iteration invariants above hold regardless. */ TEST_ASSERT_NOT_EQUAL(0, timeouts); TEST_ASSERT_NOT_EQUAL(STRESS_ITERS, timeouts); +#else + (void)timeouts; +#endif k_msleep(20); /* let the self-deleting giver be reaped */ } @@ -519,7 +533,7 @@ static struct k_thread mw_threads[4]; static struct k_sem mw_sem; static volatile int mw_result[4]; static volatile int mw_order[4]; -static volatile int mw_next; +static atomic_t mw_next; static void mw_waiter_entry(void *p1, void *p2, void *p3) { @@ -529,10 +543,20 @@ static void mw_waiter_entry(void *p1, void *p2, void *p3) mw_result[idx] = k_sem_take(&mw_sem, K_SECONDS(2)); if (mw_result[idx] == 0) { - /* Wakes are serialized: the main task sleeps between - * gives, and all waiters share core 0. */ - mw_order[mw_next++] = idx; + mw_order[atomic_add(&mw_next, 1)] = idx; + } +} + +static void mw_reset_state(void) +{ + /* Sentinels: a stale value from a previous test must not be able + * to satisfy this test's assertions (e.g. a lost wake leaving a + * prior run's 0 in mw_result). */ + for (int i = 0; i < 4; i++) { + mw_result[i] = 0xbad; + mw_order[i] = -1; } + atomic_set(&mw_next, 0); } static void mw_spawn(int idx, k_thread_stack_t *stack, size_t stack_size) @@ -544,7 +568,7 @@ static void mw_spawn(int idx, k_thread_stack_t *stack, size_t stack_size) static void test_sem_multi_waiter_conservation(void) { TEST_ASSERT_EQUAL(0, k_sem_init(&mw_sem, 0, 4)); - mw_next = 0; + mw_reset_state(); /* 4 waiters park... */ mw_spawn(0, mw_stack0, K_THREAD_STACK_SIZEOF(mw_stack0)); @@ -569,7 +593,7 @@ static void test_sem_multi_waiter_conservation(void) static void test_sem_equal_priority_waiters_fifo(void) { TEST_ASSERT_EQUAL(0, k_sem_init(&mw_sem, 0, 4)); - mw_next = 0; + mw_reset_state(); /* Enqueue 3 equal-priority waiters in a known order (the sleeps * guarantee each is parked before the next spawns). */ @@ -598,7 +622,7 @@ static void test_sem_equal_priority_waiters_fifo(void) static void test_sem_reset_wakes_all_waiters(void) { TEST_ASSERT_EQUAL(0, k_sem_init(&mw_sem, 0, 4)); - mw_next = 0; + mw_reset_state(); mw_spawn(0, mw_stack0, K_THREAD_STACK_SIZEOF(mw_stack0)); mw_spawn(1, mw_stack1, K_THREAD_STACK_SIZEOF(mw_stack1)); @@ -648,18 +672,21 @@ static void test_sem_isr_give_prompt_wake(void) isr_give_was_isr = false; k_timer_init(&timer, sem_isr_give_cb, NULL); - /* Mid-tick expiry (10.5 ms at 1000 Hz): if the FromISR give - * failed to request the context switch (portYIELD_FROM_ISR), - * the wake would be deferred to the next tick interrupt, - * ~500 us later -- the latency bound below would fail. */ - k_timer_start(&timer, K_USEC(10500), K_NO_WAIT); + /* Early-in-tick expiry (10.1 ms at 1000 Hz): if the FromISR give + * failed to request the context switch (portYIELD_FROM_ISR), the + * wake would be deferred to the next tick interrupt ~900 us + * later, failing the bound below; a genuine yield wakes in tens + * of microseconds. (The causal model assumes giver ISR and + * waiter share a core: the esp_timer ISR and the main task are + * both CPU0 by default.) */ + k_timer_start(&timer, K_USEC(10100), K_NO_WAIT); TEST_ASSERT_EQUAL(0, k_sem_take(&isr_give_sem, K_FOREVER)); int64_t latency_us = esp_timer_get_time() - isr_give_us; k_timer_stop(&timer); TEST_ASSERT_TRUE_MESSAGE(isr_give_was_isr, "giver did not run in ISR context"); - TEST_ASSERT_TRUE_MESSAGE(latency_us < 400, "ISR give did not wake the waiter promptly"); + TEST_ASSERT_TRUE_MESSAGE(latency_us < 700, "ISR give did not wake the waiter promptly"); } static struct k_sem isr_take_sem; @@ -693,6 +720,21 @@ static void test_sem_isr_take_no_wait(void) TEST_ASSERT_EQUAL(0, k_sem_count_get(&isr_take_sem)); } +extern int _iram_text_start; +extern int _iram_text_end; + +static void test_k_sem_take_iram_attr(void) +{ + /* The isr-ok-with-K_NO_WAIT contract requires IRAM residency: + * the esp_timer ISR is allocated ESP_INTR_FLAG_IRAM, so flash- + * resident code would fault if the ISR fired during a flash + * operation (the suite cannot provoke that; pin it statically). */ + uintptr_t fn = (uintptr_t)k_sem_take; + TEST_ASSERT_TRUE_MESSAGE( + (fn >= (uintptr_t)&_iram_text_start && fn < (uintptr_t)&_iram_text_end), + "k_sem_take is not in IRAM address range"); +} + #endif /* CONFIG_K_TIMER_DISPATCH_ISR */ void test_k_sem_group(void) @@ -722,5 +764,6 @@ void test_k_sem_group(void) #ifdef CONFIG_K_TIMER_DISPATCH_ISR RUN_TEST(test_sem_isr_give_prompt_wake); RUN_TEST(test_sem_isr_take_no_wait); + RUN_TEST(test_k_sem_take_iram_attr); #endif } From c668c1c3d4eec8e4b58b165ea51c3aa7a41a0325 Mon Sep 17 00:00:00 2001 From: swoiszwillo Date: Sun, 7 Jun 2026 09:01:36 -0600 Subject: [PATCH 3/3] fix: read mw_next via atomic_get (Copilot review) Co-Authored-By: Claude Opus 4.8 (1M context) --- test/main/test_k_sem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/main/test_k_sem.c b/test/main/test_k_sem.c index 0e013f8..4802dc1 100644 --- a/test/main/test_k_sem.c +++ b/test/main/test_k_sem.c @@ -585,7 +585,7 @@ static void test_sem_multi_waiter_conservation(void) TEST_ASSERT_EQUAL(0, k_thread_join(&mw_threads[i], K_SECONDS(2))); TEST_ASSERT_EQUAL(0, mw_result[i]); } - TEST_ASSERT_EQUAL(4, mw_next); + TEST_ASSERT_EQUAL(4, atomic_get(&mw_next)); TEST_ASSERT_EQUAL(0, k_sem_count_get(&mw_sem)); TEST_ASSERT_EQUAL(-EBUSY, k_sem_take(&mw_sem, K_NO_WAIT)); }