diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..e8b7324 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,59 @@ +# Changelog + +Notable and breaking changes for downstream projects. Versions are not +yet tagged; entries reference the merge PR. + +## Unreleased + +### Migration checklist (bumping across the 2026-06 hardening series) + +1. **Add `sdkconfig.boreas` to your defaults list** (#41) — in your + project `CMakeLists.txt`, before `project.cmake`: + ```cmake + set(SDKCONFIG_DEFAULTS "sdkconfig.defaults;components/boreas/sdkconfig.boreas") + ``` + Then regenerate config once: `rm -rf build sdkconfig && idf.py + set-target `. A compile-time error names the file if missing. +2. **Audit `k_work_submit*` / `k_work_schedule*` / `k_work_reschedule*` + return checks** (#37) — success is no longer `0`. Upstream-parity + codes: `0` = no-op (already queued/scheduled), `1` = queued/armed, + `2` = was running and queued again, negative = error (`-ENODEV` for + an unstarted queue, previously `-EINVAL`). `< 0` error checks are + unaffected; `== 0` success checks must change. +3. **Audit boolean uses of `k_work_cancel`** (#38) — **polarity + inverted**: it now returns the remaining busy state (`int`), so old + `true` meant *cancelled* while new nonzero means *still busy*. + `if (k_work_cancel(&w))` flips meaning. `k_work_cancel_sync` now + returns `bool` was-pending (was `int 0`). +4. **Collapse triple-cancel workarounds** (#38) — self-rescheduling + delayables are now stopped reliably with one + `k_work_cancel_delayable_sync()` call; submission during a cancel + is rejected with `-EBUSY`. +5. **Do not use task-notification index 1** in application code (#41) + — reserved for zkernel blocking primitives. +6. **Do not abort threads blocked in `k_sem_take`** (#41) — see the + `@note` on `k_sem_take`; upstream unpends aborted threads, Boreas + cannot. + +### Changed + +- **k_sem is notification-backed** (#41): no FreeRTOS control block; + `K_SEM_DEFINE` is a true compile-time initializer (usable from any + constructor); `k_sem_reset` wakes waiters with `-EAGAIN`; + `k_sem_give` wakes the highest-priority waiter; `k_sem_init` returns + `-EINVAL` for invalid limits. When `k_sem_take` returns, the kernel + holds no references into the caller's struct. +- **k_work cancel family enforces `K_WORK_CANCELING`** (#38) and + removes a queued-again-while-running instance on cancel. +- **k_work return codes match upstream Zephyr** (#37); the schedule + no-op window and schedule-while-running semantics now match upstream. +- **k_thread lifecycle honors the caller-owned-memory contract** (#18): + a returning entry function terminates the thread; `k_thread_join` + reclaims it (codes: `-EBUSY` no-wait, `-EDEADLK` self-join, + `-EAGAIN` timeout) and no longer false-joins suspended threads. + +### Fixed + +- The 2026-04 stack-local `k_sem` scheduler corruption was root-caused + to the pre-#18 k_thread zombie defect and is fixed since #18; the + trigger shapes are permanent regression tests (#39, issue #21). diff --git a/README.md b/README.md index 70e5802..46d87fd 100644 --- a/README.md +++ b/README.md @@ -21,11 +21,19 @@ Add Boreas to your ESP-IDF project as a git submodule or local path: ```bash # As submodule git submodule add https://github.com/intercreate/boreas.git components/boreas +``` -# In your top-level CMakeLists.txt +```cmake +# In your top-level CMakeLists.txt (before project.cmake is included) set(EXTRA_COMPONENT_DIRS components/boreas/components) +set(SDKCONFIG_DEFAULTS "sdkconfig.defaults;components/boreas/sdkconfig.boreas") ``` +`sdkconfig.boreas` carries the configuration Boreas requires (with the +rationale documented inline); a compile-time error names it if missing. +After adding or updating it, regenerate your config once: +`rm -rf build sdkconfig && idf.py set-target `. + Then include headers: ```c diff --git a/components/zkernel/README.md b/components/zkernel/README.md index 4a0e3ee..fc8142d 100644 --- a/components/zkernel/README.md +++ b/components/zkernel/README.md @@ -25,8 +25,10 @@ this principle, and any divergence belongs in an `@note` on the declaration. Current status: `k_thread` severs synchronously on silicon and documents the linux best-effort window; `k_work`/`k_work_sync` unlink synchronously via -their own state machine; the k_timer linux backend dequeues synchronously on -stop; `k_sem`/`k_mutex`/`k_msgq`/`k_event` waiters are unlinked by FreeRTOS +their own state machine; `k_sem` is notification-backed (nothing of the +caller's memory ever enters a kernel list); the k_timer linux backend +dequeues synchronously on stop; `k_mutex`/`k_msgq`/`k_event` waiters are +unlinked by FreeRTOS before the blocking call returns (a *blocked* caller's frame is necessarily live, and the control-block updates that wake a waiter complete before the waiter runs — but the giving/sending context may be preempted by the woken @@ -80,9 +82,25 @@ k_sem_count_get(&sem); |--------|---------| | `0` | Success | | `-EBUSY` | Not available (K_NO_WAIT) | -| `-EAGAIN` | Timeout expired | - -**ISR-safe:** `k_sem_give` (uses `xSemaphoreGiveFromISR` automatically). +| `-EAGAIN` | Timeout expired, or the semaphore was reset while waiting | +| `-EINVAL` | `k_sem_init` with `limit == 0` or `initial > limit` | + +**Notification-backed** (no FreeRTOS control block): the count and waiter +list live in the caller-owned struct; blocking rides direct-to-task +notifications on **reserved index 1** (requires +`CONFIG_FREERTOS_TASK_NOTIFICATION_ARRAY_ENTRIES >= 2`, enforced at compile +time). Consequences: + +- `K_SEM_DEFINE` is a true compile-time initializer (usable from any + constructor — upstream parity) +- `k_sem_reset` wakes all waiters with `-EAGAIN` (upstream parity) +- `k_sem_give` wakes the highest-priority waiter (upstream parity) +- when `k_sem_take` returns, the kernel holds zero references into the + caller's struct — the design principle above, by construction + +**ISR-safe:** `k_sem_give` (uses `vTaskNotifyGiveIndexedFromISR` +automatically). Do not use task-notification index 1 directly in +application code. ## Mutex (`k_mutex`) @@ -276,3 +294,17 @@ Ordered initialization. Entries are emplaced into the `.sys_init_entries` linker | `CONFIG_ZKERNEL_SYS_INIT` | y | Enable SYS_INIT framework | | `CONFIG_ZKERNEL_SYS_INIT_MAX_ENTRIES` | 32 | Max SYS_INIT registrations | | `CONFIG_ZKERNEL_FATAL_CAPTURE` | n | Save fatal context to NVS | + +Required configuration ships in **`sdkconfig.boreas`** at the repo root — +add it to your project's defaults list (before `project.cmake`): + +```cmake +set(SDKCONFIG_DEFAULTS "sdkconfig.defaults;path/to/boreas/sdkconfig.boreas") +``` + +It currently sets `CONFIG_FREERTOS_TASK_NOTIFICATION_ARRAY_ENTRIES=2` — +zkernel reserves task-notification index 1 for its blocking primitives; +index 0 stays free for ESP-IDF internals. A compile-time `#error` backstops +the requirement (Kconfig cannot set another component's int symbol +automatically: `select` is bool-only and cross-component int defaults lose +the parse-order race). diff --git a/components/zkernel/include/boreas/zephyr/kernel.h b/components/zkernel/include/boreas/zephyr/kernel.h index 42aed8e..6f568ab 100644 --- a/components/zkernel/include/boreas/zephyr/kernel.h +++ b/components/zkernel/include/boreas/zephyr/kernel.h @@ -170,64 +170,57 @@ static inline void k_yield(void) * Semaphore * ---------------------------------------------------------------- */ +/* Notification-backed (no FreeRTOS control block): count/limit/waiter + * list live here, guarded by the lock; blocking rides direct-to-task + * notifications on a reserved index, whose state is kernel-owned. See + * the README design principle -- when k_sem_take returns, the kernel + * holds no references into this struct. */ struct k_sem { - SemaphoreHandle_t handle; - StaticSemaphore_t buffer; + uint32_t count; + uint32_t limit; + sys_dlist_t waiters; /* of z_sem_waiter, caller-stack resident */ + portMUX_TYPE lock; }; /** - * Statically declare a semaphore and auto-initialize it with the - * given @p initial count and @p limit before main() runs. - * - * Implementation note: FreeRTOS counting semaphores require a runtime - * xSemaphoreCreateCountingStatic() call, so true compile-time init - * isn't possible. The macro emits a per-instance constructor that - * runs at startup. - * - * @note ESP-IDF Xtensa caveat: ESP-IDF iterates `.init_array` in - * DESCENDING order on Xtensa (see esp_system/startup.c - * do_global_ctors). Default-priority user constructors run - * BEFORE prioritized ones, and within a single TU the LAST- - * declared constructor runs first. Adding a constructor - * priority does not help -- prioritized constructors run - * AFTER unprioritized ones in the descending iteration. As a - * result the K_SEM_DEFINE'd sem is NOT guaranteed ready - * inside arbitrary user constructors on ESP-IDF Xtensa. It IS - * ready by: - * - app_main() - * - SYS_INIT() callbacks - * - constructors declared earlier in the same TU - * (textually) than the K_SEM_DEFINE - * - * For sems consumed from constructor context, prefer manual - * k_sem_init() in a SYS_INIT callback. - * - * @note Archive-stripping: when this macro expands inside a static - * archive, the constructor can be stripped by the linker - * unless pulled in via WHOLE_ARCHIVE (idf_component_register - * WHOLE_ARCHIVE). User application code typically isn't in - * such an archive; library/component code is. + * Statically define a fully-initialized semaphore. True compile-time + * initializer (matches upstream Zephyr): usable from constructors and + * SYS_INIT callbacks without any runtime init step. */ -/* Indirection helpers so __LINE__ expands before token-pasting. The - * line-number-based ctor name avoids generating a file-scope - * identifier that begins with underscore (reserved by C11 7.1.3) and - * is robust to caller-supplied names that themselves begin with - * underscore. Caveat: two K_SEM_DEFINE on the same source line will - * collide; not expected in practice. */ -#define K_SEM_CONCAT_(a, b) a##b -#define K_SEM_CONCAT(a, b) K_SEM_CONCAT_(a, b) -#define K_SEM_INIT_CTOR_NAME(line) K_SEM_CONCAT(k_sem_init_ctor_, line) - #define K_SEM_DEFINE(name, _initial, _limit) \ - struct k_sem name = {0}; \ - __attribute__((constructor)) static void K_SEM_INIT_CTOR_NAME(__LINE__)(void) \ - { \ - k_sem_init(&name, (_initial), (_limit)); \ - } + struct k_sem name = { \ + .count = (_initial), \ + .limit = (_limit), \ + .waiters = SYS_DLIST_STATIC_INIT(&name.waiters), \ + .lock = portMUX_INITIALIZER_UNLOCKED, \ + }; \ + BUILD_ASSERT(((_limit) != 0) && ((_initial) <= (_limit)), \ + "K_SEM_DEFINE: limit must be nonzero and >= initial") /* upstream parity */ +/** + * @retval 0 on success + * @retval -EINVAL if @p limit is zero or @p initial_count exceeds it + * (matches upstream) + */ int k_sem_init(struct k_sem *sem, unsigned int initial_count, unsigned int limit); +/** + * @retval 0 on success + * @retval -EBUSY if K_NO_WAIT and the semaphore was unavailable + * @retval -EAGAIN on timeout, or if the semaphore was reset while + * waiting (matches upstream k_sem_reset semantics) + * + * @note Divergence: a thread blocked in k_sem_take must NOT be + * aborted (k_thread_abort / vTaskDelete). Upstream Zephyr + * unpends an aborted thread from any wait queue; Boreas cannot + * reach into the semaphore's waiter list from abort, so the + * dead thread would leave a dangling waiter node. The same + * applies to aborting a thread that is inside k_sem_give. + */ int k_sem_take(struct k_sem *sem, k_timeout_t timeout); void k_sem_give(struct k_sem *sem); +/** Zero the count and wake all waiters; their takes return -EAGAIN + * (upstream parity -- the previous FreeRTOS-backed implementation + * could only drain the count). */ void k_sem_reset(struct k_sem *sem); unsigned int k_sem_count_get(struct k_sem *sem); @@ -851,6 +844,12 @@ void k_thread_name_set(struct k_thread *thread, const char *name); * runners, not guaranteed under idle starvation). Upstream * Zephyr's k_thread_abort does not block this way; on silicon * reclamation is synchronous and does not block. + * + * @note Divergence: do not abort a thread that is blocked in + * k_sem_take (or inside k_sem_give) -- upstream unpends aborted + * threads from wait queues; Boreas cannot reach into the + * notification-backed semaphore's waiter list from here (see + * the @note on k_sem_take). */ void k_thread_abort(struct k_thread *thread); void k_thread_suspend(struct k_thread *thread); diff --git a/components/zkernel/src/k_sem.c b/components/zkernel/src/k_sem.c index 0a90904..d65003a 100644 --- a/components/zkernel/src/k_sem.c +++ b/components/zkernel/src/k_sem.c @@ -1,6 +1,15 @@ /* * SPDX-License-Identifier: Apache-2.0 * Copyright 2026 Intercreate + * + * Notification-backed k_sem: the count, limit, and waiter list live in + * the caller-owned struct k_sem and are only ever touched under its + * lock by THIS file; blocking and wakeup ride FreeRTOS direct-to-task + * notifications, whose state lives inside the TCB (kernel-owned). + * Nothing of the caller's memory ever enters a kernel list, which + * structurally enforces the object-lifetime principle (zkernel README; + * issues #21/#22/#40): when k_sem_take returns, the kernel holds zero + * references into the caller's storage. */ #include "zephyr/kernel.h" @@ -8,51 +17,209 @@ #include #include "esp_attr.h" -#include "esp_log.h" #include "sdkconfig.h" -static const char *TAG = "k_sem"; +#include "zkernel_internal.h" + +#if configTASK_NOTIFICATION_ARRAY_ENTRIES < 2 +#error "Boreas zkernel reserves task-notification index 1 for blocking primitives (index 0 stays free for ESP-IDF internals such as esp_ipc/pthread/eth). Set CONFIG_FREERTOS_TASK_NOTIFICATION_ARRAY_ENTRIES=2 (or higher) in sdkconfig." +#endif + +/* Reserved for zkernel: index 0 is used by ESP-IDF internals. */ +#define Z_SEM_NOTIFY_INDEX 1 + +struct z_sem_waiter { + sys_dnode_t node; + TaskHandle_t task; + UBaseType_t prio; /* cached at enqueue (no FreeRTOS calls under the sem lock) */ + bool woken; /* a give/reset targeted this waiter; set under the lock */ + bool reset; /* woken by k_sem_reset -> take returns -EAGAIN */ +}; int k_sem_init(struct k_sem *sem, unsigned int initial_count, unsigned int limit) { - sem->handle = xSemaphoreCreateCountingStatic(limit, initial_count, &sem->buffer); - if (sem->handle == NULL) { - ESP_LOGE(TAG, "Failed to create semaphore"); - return -ENOMEM; + if (limit == 0 || initial_count > limit) { + return -EINVAL; /* matches upstream */ } + + sem->count = initial_count; + sem->limit = limit; + sys_dlist_init(&sem->waiters); + sem->lock = (portMUX_TYPE)portMUX_INITIALIZER_UNLOCKED; return 0; } +/* Pop the wake target: highest cached priority, FIFO among equals + * (upstream wakes the highest-priority waiter). Caller holds the lock. + * Plain code over the caller-owned list -- no FreeRTOS calls. */ +static struct z_sem_waiter *z_sem_pop_waiter(struct k_sem *sem) +{ + struct z_sem_waiter *best = NULL; + sys_dnode_t *n; + + for (n = sys_dlist_peek_head(&sem->waiters); n != NULL; + n = sys_dlist_peek_next(&sem->waiters, n)) { + struct z_sem_waiter *w = CONTAINER_OF(n, struct z_sem_waiter, node); + + if (best == NULL || w->prio > best->prio) { + best = w; + } + } + if (best != NULL) { + sys_dlist_remove(&best->node); + best->woken = true; + } + return best; +} + int k_sem_take(struct k_sem *sem, k_timeout_t timeout) { - BaseType_t ret = xSemaphoreTake(sem->handle, k_timeout_to_ticks(timeout)); - if (ret == pdTRUE) { - return 0; + /* The waiter node lives on THIS stack frame. It is only ever + * linked while we are inside this function, and it is unlinked + * (by the giver or by our timeout path) under the sem lock before + * we return -- synchronous severance by construction. */ + struct z_sem_waiter w = {0}; + + /* Task identity is sampled OUTSIDE the lock (uxTaskPriorityGet + * enters FreeRTOS's own critical section -- no FreeRTOS calls + * happen under the sem lock), and ONLY on the must-block path: + * the fast paths must work before the scheduler starts (NULL + * current task), which is what makes K_SEM_DEFINE usable from + * constructors. Unlock-sample-relock requires re-checking the + * count, hence the loop. */ + for (;;) { + z_kernel_lock(&sem->lock); + + if (sem->count > 0) { + sem->count--; + z_kernel_unlock(&sem->lock); + return 0; + } + + if (k_timeout_is_no_wait(timeout)) { + z_kernel_unlock(&sem->lock); + return -EBUSY; + } + + if (w.task != NULL) { + break; /* sampled on a previous pass; enqueue under THIS lock */ + } + + z_kernel_unlock(&sem->lock); + w.task = xTaskGetCurrentTaskHandle(); + w.prio = uxTaskPriorityGet(NULL); + } + + sys_dlist_append(&sem->waiters, &w.node); + z_kernel_unlock(&sem->lock); + + bool forever = k_timeout_is_forever(timeout); + TickType_t total = forever ? 0 : k_timeout_to_ticks(timeout); + TickType_t start = xTaskGetTickCount(); + + for (;;) { + TickType_t wait = portMAX_DELAY; + + if (!forever) { + /* Unsigned tick diff is wraparound-safe. */ + TickType_t elapsed = xTaskGetTickCount() - start; + + wait = (elapsed >= total) ? 0 : (total - elapsed); + } + + uint32_t got = ulTaskNotifyTakeIndexed(Z_SEM_NOTIFY_INDEX, pdTRUE, wait); + + z_kernel_lock(&sem->lock); + if (w.woken) { + /* A giver/reset unlinked us. If our wait timed out in + * the same instant (got == 0), its notification is + * still in flight -- consume it before returning so + * it cannot poison a later take on this task (the + * consume-the-in-flight-give pattern from the k_work + * sync slot). */ + bool reset = w.reset; + + z_kernel_unlock(&sem->lock); + if (got == 0) { + (void)ulTaskNotifyTakeIndexed(Z_SEM_NOTIFY_INDEX, pdTRUE, + portMAX_DELAY); + } + return reset ? -EAGAIN : 0; + } + + if (got == 0 && !forever) { + /* Timed out, not targeted: unlink ourselves. After + * this unlock no giver can see the node. */ + sys_dlist_remove(&w.node); + z_kernel_unlock(&sem->lock); + return -EAGAIN; + } + + /* Spurious wake (stale notification from code misusing the + * reserved index): loop; the wait recompute above absorbs + * the elapsed time. */ + z_kernel_unlock(&sem->lock); } - return k_timeout_is_no_wait(timeout) ? -EBUSY : -EAGAIN; } void K_ISR_SAFE k_sem_give(struct k_sem *sem) { - if (xPortInIsrContext()) { - BaseType_t wake = pdFALSE; - xSemaphoreGiveFromISR(sem->handle, &wake); - if (wake) { - portYIELD_FROM_ISR(wake); + TaskHandle_t to_wake = NULL; + + z_kernel_lock(&sem->lock); + struct z_sem_waiter *w = z_sem_pop_waiter(sem); + + if (w != NULL) { + /* Copy the handle under the lock; never touch the waiter's + * stack-resident node after unlock. The waiter cannot return + * from k_sem_take until our notification lands (woken==true + * forces it into the consume path), so the handle stays + * valid. */ + to_wake = w->task; + } else if (sem->count < sem->limit) { + sem->count++; + } + z_kernel_unlock(&sem->lock); + + if (to_wake != NULL) { + if (xPortInIsrContext()) { + BaseType_t woken = pdFALSE; + + vTaskNotifyGiveIndexedFromISR(to_wake, Z_SEM_NOTIFY_INDEX, &woken); + portYIELD_FROM_ISR(woken); + } else { + xTaskNotifyGiveIndexed(to_wake, Z_SEM_NOTIFY_INDEX); } - } else { - xSemaphoreGive(sem->handle); } } void k_sem_reset(struct k_sem *sem) { - /* Drain all tokens */ - while (xSemaphoreTake(sem->handle, 0) == pdTRUE) { + /* Upstream semantics: zero the count and wake all waiters, whose + * k_sem_take calls return -EAGAIN. (The FreeRTOS-backed + * implementation could only drain the count.) */ + for (;;) { + TaskHandle_t to_wake = NULL; + + z_kernel_lock(&sem->lock); + struct z_sem_waiter *w = z_sem_pop_waiter(sem); + + if (w != NULL) { + w->reset = true; + to_wake = w->task; + } else { + sem->count = 0; + } + z_kernel_unlock(&sem->lock); + + if (to_wake == NULL) { + return; + } + xTaskNotifyGiveIndexed(to_wake, Z_SEM_NOTIFY_INDEX); } } unsigned int k_sem_count_get(struct k_sem *sem) { - return (unsigned int)uxSemaphoreGetCount(sem->handle); + return __atomic_load_n(&sem->count, __ATOMIC_RELAXED); } diff --git a/components/zkernel/src/k_work.c b/components/zkernel/src/k_work.c index 540bbba..685acee 100644 --- a/components/zkernel/src/k_work.c +++ b/components/zkernel/src/k_work.c @@ -12,6 +12,8 @@ #include #include "esp_attr.h" +#include "zkernel_internal.h" + /* Internal queue flag (k_work_q.flags). Not exposed in the public * header because it's an implementation detail. */ #define Z_WORK_QUEUE_STARTED BIT(0) @@ -33,27 +35,6 @@ static StackType_t sys_wq_stack[CONFIG_SYSTEM_WORKQUEUE_STACK_SIZE / sizeof(StackType_t)]; struct k_work_q k_sys_work_q; -/* Branch-once helpers around portENTER/EXIT_CRITICAL: ESP-IDF needs - * different macros for ISR vs task context. Inlined to avoid runtime - * overhead in the common (task) case. */ -static ALWAYS_INLINE void z_work_lock(struct k_work_q *queue) -{ - if (xPortInIsrContext()) { - portENTER_CRITICAL_ISR(&queue->lock); - } else { - portENTER_CRITICAL(&queue->lock); - } -} - -static ALWAYS_INLINE void z_work_unlock(struct k_work_q *queue) -{ - if (xPortInIsrContext()) { - portEXIT_CRITICAL_ISR(&queue->lock); - } else { - portEXIT_CRITICAL(&queue->lock); - } -} - /* ---------------------------------------------------------------- * Work Queue Thread * ---------------------------------------------------------------- */ @@ -123,7 +104,7 @@ static int K_ISR_SAFE k_work_submit_internal(struct k_work_q *queue, struct k_wo return -EINVAL; } - z_work_lock(queue); + z_kernel_lock(&queue->lock); uint32_t flags = __atomic_load_n(&work->flags, __ATOMIC_RELAXED); @@ -131,13 +112,13 @@ static int K_ISR_SAFE k_work_submit_internal(struct k_work_q *queue, struct k_wo /* Rejected while a cancellation is in progress -- checked * before the already-queued case, matching upstream's * order. See k_work_cancel_sync(). */ - z_work_unlock(queue); + z_kernel_unlock(&queue->lock); return -EBUSY; } if (flags & K_WORK_QUEUED) { /* Already queued -- no-op (upstream retval 0) */ - z_work_unlock(queue); + z_kernel_unlock(&queue->lock); return 0; } @@ -145,9 +126,9 @@ static int K_ISR_SAFE k_work_submit_internal(struct k_work_q *queue, struct k_wo __atomic_store_n(&work->queue, queue, __ATOMIC_RELEASE); sys_dlist_append(&queue->pending, &work->node); - z_work_unlock(queue); + z_kernel_unlock(&queue->lock); - /* k_sem_give is already ISR-safe (uses xSemaphoreGiveFromISR). */ + /* k_sem_give is ISR-safe (ISR-aware lock + FromISR notify path). */ k_sem_give(&queue->sem); /* Upstream retvals: 1 = was idle, now queued; 2 = was running and @@ -181,7 +162,7 @@ int k_work_cancel(struct k_work *work) struct k_work_sync *sync_to_release = NULL; - z_work_lock(queue); + z_kernel_lock(&queue->lock); /* Re-validate the queue under lock: between our read of work->queue * and acquiring this queue's lock, the worker may have popped the @@ -189,7 +170,7 @@ int k_work_cancel(struct k_work *work) * queue. We hold the wrong lock for that case; bail rather than * remove from a list we don't own. Cancel is best-effort. */ if (__atomic_load_n(&work->queue, __ATOMIC_RELAXED) != queue) { - z_work_unlock(queue); + z_kernel_unlock(&queue->lock); return (int)k_work_busy_get(work); } @@ -218,7 +199,7 @@ int k_work_cancel(struct k_work *work) } } - z_work_unlock(queue); + z_kernel_unlock(&queue->lock); if (sync_to_release != NULL) { k_sem_give(&sync_to_release->sem); @@ -341,9 +322,10 @@ int k_work_queue_drain(struct k_work_q *queue, bool plug) { (void)plug; /* Boreas does not implement plugging; reserved for upstream parity. */ - /* Sentinel is stack-allocated -- safe because xSemaphoreGive does - * not access the sem after returning, so by the time k_sem_take - * wakes the drain caller, the worker is finished with `s`. */ + /* Sentinel is stack-allocated -- safe because the notification- + * backed k_sem severs all references before k_sem_take returns, + * so by the time the drain caller wakes, the worker is finished + * with `s`. */ struct z_work_drain_sentinel s; k_work_init(&s.work, z_work_drain_handler); k_sem_init(&s.sem, 0, 1); diff --git a/components/zkernel/src/zkernel_internal.h b/components/zkernel/src/zkernel_internal.h new file mode 100644 index 0000000..ea3c9d6 --- /dev/null +++ b/components/zkernel/src/zkernel_internal.h @@ -0,0 +1,35 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2026 Intercreate + * + * Private helpers shared by zkernel sources. Not installed; not part + * of the public API. + */ + +#pragma once + +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" + +#include "esp_attr.h" + +/* ISR-aware critical section: ESP-IDF needs different macros for ISR + * vs task context. Inlined to avoid runtime overhead in the common + * (task) case. */ +static ALWAYS_INLINE void z_kernel_lock(portMUX_TYPE *lock) +{ + if (xPortInIsrContext()) { + portENTER_CRITICAL_ISR(lock); + } else { + portENTER_CRITICAL(lock); + } +} + +static ALWAYS_INLINE void z_kernel_unlock(portMUX_TYPE *lock) +{ + if (xPortInIsrContext()) { + portEXIT_CRITICAL_ISR(lock); + } else { + portEXIT_CRITICAL(lock); + } +} diff --git a/examples/blink_device_model/CMakeLists.txt b/examples/blink_device_model/CMakeLists.txt index 96875fd..a5b3cc3 100644 --- a/examples/blink_device_model/CMakeLists.txt +++ b/examples/blink_device_model/CMakeLists.txt @@ -2,6 +2,9 @@ cmake_minimum_required(VERSION 3.16) set(EXTRA_COMPONENT_DIRS "../../components") +# Boreas required configuration (notification-index reservation etc.) +set(SDKCONFIG_DEFAULTS "sdkconfig.defaults;${CMAKE_CURRENT_LIST_DIR}/../../sdkconfig.boreas") + include($ENV{IDF_PATH}/tools/cmake/project.cmake) project(blink_device_model) diff --git a/examples/log_demo/CMakeLists.txt b/examples/log_demo/CMakeLists.txt index 070698e..8ef897e 100644 --- a/examples/log_demo/CMakeLists.txt +++ b/examples/log_demo/CMakeLists.txt @@ -2,6 +2,9 @@ cmake_minimum_required(VERSION 3.16) set(EXTRA_COMPONENT_DIRS "../../components") +# Boreas required configuration (notification-index reservation etc.) +set(SDKCONFIG_DEFAULTS "sdkconfig.defaults;${CMAKE_CURRENT_LIST_DIR}/../../sdkconfig.boreas") + include($ENV{IDF_PATH}/tools/cmake/project.cmake) project(log_demo) diff --git a/examples/shell_demo/CMakeLists.txt b/examples/shell_demo/CMakeLists.txt index 555f600..ceceb36 100644 --- a/examples/shell_demo/CMakeLists.txt +++ b/examples/shell_demo/CMakeLists.txt @@ -1,6 +1,9 @@ cmake_minimum_required(VERSION 3.16) set(EXTRA_COMPONENT_DIRS "../../components") +# Boreas required configuration (notification-index reservation etc.) +set(SDKCONFIG_DEFAULTS "sdkconfig.defaults;${CMAKE_CURRENT_LIST_DIR}/../../sdkconfig.boreas") + include($ENV{IDF_PATH}/tools/cmake/project.cmake) project(shell_demo) diff --git a/examples/uart_loopback/CMakeLists.txt b/examples/uart_loopback/CMakeLists.txt index 236979e..dce147c 100644 --- a/examples/uart_loopback/CMakeLists.txt +++ b/examples/uart_loopback/CMakeLists.txt @@ -2,6 +2,9 @@ cmake_minimum_required(VERSION 3.16) set(EXTRA_COMPONENT_DIRS "../../components") +# Boreas required configuration (notification-index reservation etc.) +set(SDKCONFIG_DEFAULTS "sdkconfig.defaults;${CMAKE_CURRENT_LIST_DIR}/../../sdkconfig.boreas") + include($ENV{IDF_PATH}/tools/cmake/project.cmake) project(uart_loopback) diff --git a/examples/uart_rs485_echo/CMakeLists.txt b/examples/uart_rs485_echo/CMakeLists.txt index 3714cd7..d9911e1 100644 --- a/examples/uart_rs485_echo/CMakeLists.txt +++ b/examples/uart_rs485_echo/CMakeLists.txt @@ -2,6 +2,9 @@ cmake_minimum_required(VERSION 3.16) set(EXTRA_COMPONENT_DIRS "../../components") +# Boreas required configuration (notification-index reservation etc.) +set(SDKCONFIG_DEFAULTS "sdkconfig.defaults;${CMAKE_CURRENT_LIST_DIR}/../../sdkconfig.boreas") + include($ENV{IDF_PATH}/tools/cmake/project.cmake) project(uart_rs485_echo) diff --git a/examples/work_queue_demo/CMakeLists.txt b/examples/work_queue_demo/CMakeLists.txt index bea9051..b76aebe 100644 --- a/examples/work_queue_demo/CMakeLists.txt +++ b/examples/work_queue_demo/CMakeLists.txt @@ -2,6 +2,9 @@ cmake_minimum_required(VERSION 3.16) set(EXTRA_COMPONENT_DIRS "../../components") +# Boreas required configuration (notification-index reservation etc.) +set(SDKCONFIG_DEFAULTS "sdkconfig.defaults;${CMAKE_CURRENT_LIST_DIR}/../../sdkconfig.boreas") + include($ENV{IDF_PATH}/tools/cmake/project.cmake) project(work_queue_demo) diff --git a/sdkconfig.boreas b/sdkconfig.boreas new file mode 100644 index 0000000..b2432ca --- /dev/null +++ b/sdkconfig.boreas @@ -0,0 +1,16 @@ +# Boreas required configuration +# +# Add this file to your project's SDKCONFIG_DEFAULTS (before +# project.cmake is included): +# +# set(SDKCONFIG_DEFAULTS "sdkconfig.defaults;path/to/boreas/sdkconfig.boreas") +# +# Kconfig cannot set these automatically from a component: `select` is +# bool-only, and a component-supplied `default` for an int it does not +# own loses the parse-order race to the owning component. A compile-time +# #error backstops each requirement. + +# zkernel reserves task-notification index 1 for its blocking +# primitives (k_sem); index 0 stays free for ESP-IDF internals +# (esp_ipc, pthread, ethernet/usb drivers). +CONFIG_FREERTOS_TASK_NOTIFICATION_ARRAY_ENTRIES=2 diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 7a51086..85b8306 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -8,6 +8,9 @@ if("${IDF_TARGET}" STREQUAL "linux") set(COMPONENTS main zkernel zsys zshell zdevice) endif() +# Boreas required configuration (notification-index reservation etc.) +set(SDKCONFIG_DEFAULTS "sdkconfig.defaults;${CMAKE_CURRENT_LIST_DIR}/../sdkconfig.boreas") + include($ENV{IDF_PATH}/tools/cmake/project.cmake) project(boreas_test) diff --git a/test/main/test_k_sem.c b/test/main/test_k_sem.c index 9adccca..d128ab9 100644 --- a/test/main/test_k_sem.c +++ b/test/main/test_k_sem.c @@ -84,21 +84,40 @@ static void test_sem_take_no_wait_empty(void) } /* ---------------------------------------------------------------- - * K_SEM_DEFINE auto-init regression test. + * K_SEM_DEFINE static-init test. * - * K_SEM_DEFINE must produce a fully-initialized semaphore by the - * time app_main() runs. We verify the post-startup count matches - * the macro's `initial` argument and that take/give work without - * an explicit k_sem_init() call. - * - * Note: ESP-IDF Xtensa iterates .init_array in descending order, so - * we cannot reliably consume the sem from another constructor in - * the same TU (see K_SEM_DEFINE doxygen). The "ready by app_main" - * contract is what's actually deliverable; that's what we test. + * K_SEM_DEFINE is a true compile-time initializer (upstream parity, + * #26): the sem is usable from ANY constructor -- the old + * constructor-emitting macro could not guarantee that on ESP-IDF + * Xtensa (descending .init_array iteration). ctor_take_result proves + * a constructor in this TU can consume the sem before main(). * ---------------------------------------------------------------- */ K_SEM_DEFINE(auto_sem, 2, 5); +static int ctor_take_result = -1; + +__attribute__((constructor)) static void sem_ctor_consumer(void) +{ + ctor_take_result = k_sem_take(&auto_sem, K_NO_WAIT); + if (ctor_take_result == 0) { + k_sem_give(&auto_sem); /* restore for the app_main test */ + } +} + +static void test_sem_define_usable_in_constructor(void) +{ + TEST_ASSERT_EQUAL(0, ctor_take_result); +} + +static void test_sem_init_invalid_args(void) +{ + struct k_sem sem; + + TEST_ASSERT_EQUAL(-EINVAL, k_sem_init(&sem, 0, 0)); + TEST_ASSERT_EQUAL(-EINVAL, k_sem_init(&sem, 6, 5)); +} + static void test_sem_auto_init_pre_main(void) { /* Initial count from the macro must be visible by app_main. */ @@ -275,6 +294,76 @@ static void test_sem_april_periodic_status_sync(void) } } +/* ---------------------------------------------------------------- + * Notification-backed semantics: reset wakes waiters; priority wake + * ---------------------------------------------------------------- */ + +K_THREAD_STACK_DEFINE(sem_reset_stack, 4096); +static struct k_thread sem_resetter_thread; + +static void sem_resetter_entry(void *p1, void *p2, void *p3) +{ + (void)p2; + (void)p3; + k_msleep(30); + k_sem_reset((struct k_sem *)p1); +} + +static void test_sem_reset_wakes_waiter_eagain(void) +{ + struct k_sem sem; /* stack-local: also exercises severance */ + + TEST_ASSERT_EQUAL(0, k_sem_init(&sem, 0, 1)); + + k_thread_create(&sem_resetter_thread, sem_reset_stack, + K_THREAD_STACK_SIZEOF(sem_reset_stack), sem_resetter_entry, &sem, NULL, + NULL, 5, 0, K_NO_WAIT); + + /* Blocked take must be woken by the reset with -EAGAIN + * (upstream parity; the FreeRTOS-backed sem could not do this). */ + TEST_ASSERT_EQUAL(-EAGAIN, k_sem_take(&sem, K_SECONDS(2))); + TEST_ASSERT_EQUAL(0, k_sem_count_get(&sem)); + TEST_ASSERT_EQUAL(0, k_thread_join(&sem_resetter_thread, K_SECONDS(2))); +} + +K_THREAD_STACK_DEFINE(sem_lo_stack, 4096); +K_THREAD_STACK_DEFINE(sem_hi_stack, 4096); +static struct k_thread sem_lo_thread; +static struct k_thread sem_hi_thread; +static struct k_sem prio_sem; +static volatile int first_woken; /* 4 = low, 6 = high */ + +static void prio_waiter_entry(void *p1, void *p2, void *p3) +{ + (void)p2; + (void)p3; + if (k_sem_take(&prio_sem, K_SECONDS(2)) == 0 && first_woken == 0) { + first_woken = (int)(intptr_t)p1; + } +} + +static void test_sem_give_wakes_highest_priority_waiter(void) +{ + TEST_ASSERT_EQUAL(0, k_sem_init(&prio_sem, 0, 2)); + first_woken = 0; + + /* Low-priority waiter enqueues FIRST -- FIFO would wake it. */ + k_thread_create(&sem_lo_thread, sem_lo_stack, K_THREAD_STACK_SIZEOF(sem_lo_stack), + prio_waiter_entry, (void *)(intptr_t)4, NULL, NULL, 4, 0, K_NO_WAIT); + k_msleep(20); + k_thread_create(&sem_hi_thread, sem_hi_stack, K_THREAD_STACK_SIZEOF(sem_hi_stack), + prio_waiter_entry, (void *)(intptr_t)6, NULL, NULL, 6, 0, K_NO_WAIT); + k_msleep(20); + + k_sem_give(&prio_sem); /* upstream: highest-priority waiter wins */ + k_msleep(20); + TEST_ASSERT_EQUAL(6, first_woken); + + k_sem_give(&prio_sem); /* release the second waiter */ + TEST_ASSERT_EQUAL(0, k_thread_join(&sem_lo_thread, K_SECONDS(2))); + TEST_ASSERT_EQUAL(0, k_thread_join(&sem_hi_thread, K_SECONDS(2))); +} + void test_k_sem_group(void) { RUN_TEST(test_sem_stack_forever_same_prio); @@ -290,4 +379,8 @@ void test_k_sem_group(void) RUN_TEST(test_sem_init_at_limit); RUN_TEST(test_sem_take_no_wait_empty); RUN_TEST(test_sem_auto_init_pre_main); + RUN_TEST(test_sem_define_usable_in_constructor); + RUN_TEST(test_sem_init_invalid_args); + RUN_TEST(test_sem_reset_wakes_waiter_eagain); + RUN_TEST(test_sem_give_wakes_highest_priority_waiter); }