From 8058c8cad4d0d374b073f17e4383ee05c4d47d12 Mon Sep 17 00:00:00 2001 From: David Garske Date: Mon, 4 May 2026 15:49:07 -0700 Subject: [PATCH] Adds new STM32 Bare support for Hash, SAES/AES and PKA --- .wolfssl_known_macro_extras | 13 + STM32_BARE_BOARD_STATUS.md | 201 ++++ wolfcrypt/src/aes.c | 57 +- wolfcrypt/src/ecc.c | 21 +- wolfcrypt/src/port/st/stm32.c | 1445 ++++++++++++++++++++++++++++- wolfcrypt/src/random.c | 10 +- wolfssl/wolfcrypt/port/st/stm32.h | 240 ++++- wolfssl/wolfcrypt/settings.h | 55 +- 8 files changed, 2022 insertions(+), 20 deletions(-) create mode 100644 STM32_BARE_BOARD_STATUS.md diff --git a/.wolfssl_known_macro_extras b/.wolfssl_known_macro_extras index 99e5990666b..9d43f834db7 100644 --- a/.wolfssl_known_macro_extras +++ b/.wolfssl_known_macro_extras @@ -1,4 +1,6 @@ +AES_CR_CCFC AES_GCM_GMULT_NCT +AES_ICR_CCF AFX_RESOURCE_DLL AFX_TARG_ENU ALLOW_BINARY_MISMATCH_INTROSPECTION @@ -265,7 +267,11 @@ HARDWARE_CACHE_COHERENCY HASH_AlgoMode_HASH HASH_AlgoMode_HMAC HASH_BYTE_SWAP +HASH_CR_ALGO_1 +HASH_CR_DATATYPE_0 +HASH_CR_DATATYPE_1 HASH_CR_LKEY +HASH_CR_MODE HASH_DIGEST HASH_DataType_8b HASH_IMR_DCIE @@ -495,6 +501,12 @@ PTHREAD_STACK_MIN QAT_ENABLE_HASH QAT_ENABLE_RNG QAT_USE_POLLING_CHECK +RCC_AHB1ENR_PKAEN +RCC_AHB2ENR1_AESEN +RCC_AHB2ENR1_HASHEN +RCC_AHB2ENR1_PKAEN +RCC_AHB2ENR_HASHEN +RCC_AHB2ENR_PKAEN RC_NO_RNG REDIRECTION_IN3_KEYELMID REDIRECTION_IN3_KEYID @@ -917,6 +929,7 @@ WOLFSSL_SP_INT_SQR_VOLATILE WOLFSSL_STACK_CHECK WOLFSSL_STM32F427_RNG WOLFSSL_STM32U5_DHUK +WOLFSSL_STM32_BARE WOLFSSL_STRONGEST_HASH_SIG WOLFSSL_STSAFE_TAKES_SLOT WOLFSSL_TELIT_M2MB diff --git a/STM32_BARE_BOARD_STATUS.md b/STM32_BARE_BOARD_STATUS.md new file mode 100644 index 00000000000..1bab0e1eaad --- /dev/null +++ b/STM32_BARE_BOARD_STATUS.md @@ -0,0 +1,201 @@ +# STM32 Bare-Metal (`WOLFSSL_STM32_BARE`) Board Status + +Generated 2026-05-04. Tracks the boards exercised by the `STM32_Bare_Test` +multi-board example in `wolfssl-examples-stm32` against the corresponding +direct-register support in `wolfssl/wolfcrypt/src/port/st/stm32.c`. + +Columns: + +- **HASH HW** — chip has a HASH peripheral (MD5/SHA-1/SHA-2/...). "yes" = + the BARE driver routes `wc_Sha*` to the HASH IP. "-" = no HASH silicon; + SHA falls back to software in all configs. +- **AES HW** — chip has an AES/CRYP peripheral. "CRYP" = FIFO-based AES + on F4/F7/H7/MP13 (`wc_Stm32_Aes_*` -> CRYP HW). "TinyAES" = single-reg + AES on L4/L5/U3/U5/H5/G4/WB/WL/G0/WBA. "-" = no AES; software path. +- **PKA HW** — chip has a public-key accelerator. "yes" + Tested = the + bare-metal PKA driver (added 2026-05-04) is wired up and validated end + to end. "yes" + Untested = silicon present but no validation flash this + session. "-" = no PKA silicon. +- **Status** — Validated = `make BOARD= CONFIG=bare TARGET=test` runs + the full `wolfcrypt_test` and exits with `Result: 0 (PASS)` on real + hardware in this session. + +## Validated boards + +| BOARD | Chip | Cortex / Clock | HASH HW | AES HW | PKA HW | Status | +|--------|---------------|--------------------|---------|----------|----------------|------------| +| `h7` | STM32H753ZI | M7F / 480 MHz PLL | yes | CRYP | - | Validated | +| `f439` | STM32F439ZI | M4F / 144 MHz PLL | yes | CRYP | - | Validated | +| `wb55` | STM32WB55RG | M4F / 64 MHz PLL | - | TinyAES | yes (Tested V1)| Validated | +| `u3` | STM32U385RG | M33 / 96 MHz | yes | TinyAES | yes (Tested V2)| Validated | +| `u5` | STM32U575ZI | M33 / 160 MHz | yes | - | - | Validated | +| `h5` | STM32H563ZI | M33 / 250 MHz | yes | - | yes (Compile) | Build OK\* | +| `g491` | STM32G491RE | M4F / 170 MHz PLL | - | - | - | Validated | + +\* H5 PKA driver is enabled for `BUILD_BARE` and **builds cleanly**. +**Runtime validation is blocked by a flash ECC fault.** See the +"H5 reproduction steps" section below for the full repro recipe. + +\*\* U5 is the STM32U575 NUCLEO -- that silicon does **not** have PKA +(only U585+ does). The HASH + RNG bare-metal paths are validated. +For PKA validation on U5 we'd need a NUCLEO-U585AI-Q. + +## Bench HW used + +These results are from `make BOARD= CONFIG=bare TARGET=bench`. Numbers +are from the wolfcrypt `benchmark.c` block-1024 default. Best column wins +each row. + +| Board | Clock | AES-128-CBC enc (BARE) | SHA-256 (BARE) | ECDHE secp256r1 (BARE) | +|--------|---------|------------------------|----------------|------------------------| +| h7 | 480 MHz | **19.165 MiB/s** | **25.928 MiB/s**| (no PKA HW; SP-SW) | +| f439 | 144 MHz | 11.401 MiB/s | 25.757 MiB/s | (no PKA HW; SP-SW) | +| g491 | 170 MHz | 1.017 MiB/s (sw) | 3.037 MiB/s | 11.8 ops/s (sw) | +| wb55 | 64 MHz | 7.237 MiB/s | 1.243 MiB/s sw | 4.83 ops/s (PKA HW)\** | +| u3 | 96 MHz | (TinyAES BARE -- prior)| HASH HW (prior)| 1.115 ops/s (PKA HW)\**| + +\** WB55 and U3 PKA HW perform similarly to (or slightly slower than) +the SP-ECC software path at P-256 on these clocks. Both ST docs and +direct measurement (U3 SW = 1.106 vs PKA = 1.115 ops/s) confirm the +PKA HW is correctness-only at P-256 on these specific chips. Larger +curves (P-384/521) where SP-ECC scales worse, and faster-clocked PKA +(H5 at 250 MHz, eventual U585), should let PKA pull meaningfully +ahead. Driver covers V1 (WB) and V2 (U3 / H5 / U5 / WBA / G4A1) +register layouts; the V2 path is exercised end-to-end on U3. + +## TODO -- not yet wired up + +| BOARD candidate | Chip | Cortex / Clock max | What lights up | Notes | +|-----------------|---------------|--------------------|------------------------|----------------------------------------------| +| `f437` | STM32F437IIHx | M4F / 168 MHz | CRYP + HASH + RNG | STM32439I-EVAL. Parity check vs F439 | +| `f767` / `f779` | STM32F767ZI | M7F / 216 MHz | CRYP + HASH + RNG | NUCLEO-F767ZI | +| `mp135` | STM32MP135F | A7 / 650 MHz | CRYP + HASH + RNG + PKA| STM32MP135F-DK. Linux/bare-metal split | +| `l4r5` | STM32L4R5ZI | M4F / 120 MHz | TinyAES + HASH + RNG | NUCLEO-L4R5ZI | +| `l552` | STM32L552ZE | M33 / 110 MHz | TinyAES + HASH + RNG + SAES | NUCLEO-L552ZE-Q | +| `h573` / `h533` | STM32H573ZI | M33 / 250 MHz | TinyAES + HASH + RNG + SAES | NUCLEO-H573ZI -- H5 with AES added | +| `u585` | STM32U585AI | M33 / 160 MHz | TinyAES + HASH + RNG + SAES + PKA | NUCLEO-U585AI-Q | +| `wba` | STM32WBA52CG | M33 / 100 MHz | TinyAES + HASH + RNG + PKA | NUCLEO-WBA52CG. Same V2 PKA layout | +| `wl55` | STM32WL55JC | M4F / 48 MHz | TinyAES + RNG | NUCLEO-WL55JC. Sub-GHz radio | +| `g0b1` | STM32G0B1RE | M0+ / 64 MHz | TinyAES + RNG | NUCLEO-G0B1RE | +| `g474` / `g484` | STM32G474RE | M4F / 170 MHz | TinyAES + RNG | NUCLEO-G474RE -- G4 sibling that DOES have AES | +| `g4a1` | STM32G4A1RE | M4F / 170 MHz | TinyAES + RNG + PKA + AES | G491 sibling that has the full crypto block | +| `c5a3` | STM32C5A3ZG | M0+ / ~48 MHz | - | NUCLEO-C5A3ZG -- entry-level; software only | + +The bare-metal PKA driver in `wolfcrypt/src/port/st/stm32.c` already +covers the V1 (WB) and V2 (H5/U3/U5/G4/WBA) PKA register layouts. New +boards that have PKA need only board bring-up files (startup, linker, +hw_init, system_*.c) plus `WOLFSSL_STM32_PKA` in `user_settings.h` -- +no driver changes. + +## Repository checkpoints (this session) + +`wolfssl@stm32_bare`: +- `7a8ee7d` H7 PLL bring-up to 480 MHz +- `06530195b` WB55 AES1 + CCF macro abstraction +- `8e838294b` G4 family clock-enable maps +- `112e7f929` PKA BARE driver (V1+V2 register layouts; WB55 validated) +- `8383907c1` H5 HASH digest read fix (`HRA` not `HR`) + +`wolfssl-examples-stm32@stm32_bare`: +- H7 480 MHz hw_init + benches in README +- WB55 PLL64 + bench +- G491 board files + bench + README correction (G491RE has no PKA) +- WB55 PKA enable +- H5 cube path wildcard + +## H5 reproduction steps (NUCLEO-H563ZI bare-metal flash ECC fault) + +### Symptom + +After flashing the wolfcrypt test build to NUCLEO-H563ZI, the board +emits zero bytes on USART3 (PD8 / ST-LINK VCP at 115200 8N1) and +the CPU spins inside the default NMI handler (`Infinite_Loop` / +`b .`). Halting via SWD shows xPSR.IPSR = 2 (NMI active). + +### Root cause + +Flash ECC double-bit detection fires on read of flash address +**0x08002000**. The status latches in `FLASH_ECCDETR`: + +``` +FLASH_ECCDETR = 0x80000200 + ^ bit 31 ECCD = 1 (uncorrectable error) + ^^^ bits[15:0] ADDR_ECC = 0x0200 +``` + +ADDR_ECC is in 16-byte (128-bit quad-word) units: `0x200 * 16 = +0x2000`, so the failing flash word is at `0x08002000`. The H5 +flash interface raises NMI on uncorrectable ECC errors. + +### Reproducer + +```sh +cd ~/GitHub/wolfssl-examples-stm32/STM32_Bare_Test +make BOARD=h5 CONFIG=bare TARGET=test +PROG=/opt/st/stm32cubeide_*/plugins/com.st.stm32cube.ide.mcu.externaltools.cubeprogrammer.linux64_*/tools/bin/STM32_Programmer_CLI +$PROG -c port=SWD reset=HWrst -e all -d build/h5-test-bare/app.bin 0x08000000 -v -rst +# UART log will be empty (0 bytes) at /dev/ttyACM +``` + +To inspect the latched ECC state via OpenOCD: + +```sh +OPENOCD=/home/davidgarske/GitHub/OpenOCD/src/openocd +SCRIPTS=/home/davidgarske/GitHub/OpenOCD/tcl +$OPENOCD -s $SCRIPTS -f interface/stlink-dap.cfg \ + -f target/stm32h5x.cfg \ + -c "init; halt" \ + -c "echo {ECCDETR}; mdw 0x40022104" \ + -c "echo {ECCCORR}; mdw 0x40022100" \ + -c "shutdown" +# Expected output: +# ECCDETR +# 0x40022104: 80000200 +# ECCCORR +# 0x40022100: 00000000 +``` + +### What I tried that did NOT help + +- Mass erase + reprogram via STM32_Programmer_CLI (`-e all -d ...`) +- Mass erase + reprogram via OpenOCD `flash erase_sector ; flash write_image` +- Padding the `.bin` to 16-byte (128-bit quad-word) alignment +- Two physical NUCLEO-H563ZI boards (different STLINK serials) +- Option-byte verification: TZEN = 0xC3 (TZ disabled), SRAM2/3 ECC + disabled, HDP1_STRT/END set such that no HDP region is configured + (STRT=1 > END=0, i.e. RM-documented "no protected area"), no WRP +- Clearing `FLASH_ECCDETR` via openocd write to bit 31 -- value is + re-latched as soon as the CPU runs again +- Building `CONFIG=c` (pure software, no BARE drivers, no PKA, no + wolfssl HW paths) -- same fault, so it is not a wolfssl regression +- Replacing `printf` with direct USART writes inside `main()` -- + same fault, so it is not a newlib stdio init issue per se + +### What does work on the same hardware + +- A standalone direct-USART "Hello %d" program (built with the + same `--specs=nano.specs --specs=nosys.specs`, ~151 KB) boots + and prints. The wolfssl-linked test (~260 KB) does not. +- The build is correct: `STM32_Programmer_CLI -r32 0x08002000 16` + reads flash content that matches the bin byte-for-byte. + +### Hypothesis + +Either the H5 flash interface stages an ECC error at a fixed +quad-word in this code-size range that neither programmer is +clearing, or the chip-erase sequence as currently invoked leaves +that quad-word's ECC bits in an inconsistent state that subsequent +programs do not refresh. The same code path validates end-to-end +on STM32U385 (V2 PKA, identical register sequence) so the wolfssl +PKA driver itself is not the cause. + +### What likely fixes it + +- Test with a different programming tool (J-Link, STM32CubeIDE GUI) + to rule out CLI / OpenOCD behavior +- Try writing the same image to BANK2 (0x08100000) and switching + SWAP_BANK -- if the fault follows the bank, it is silicon; if it + follows the address, it is the programmer +- Try a smaller wolfssl build that does not cross 0x08002000 to + confirm the dependency is on physical flash address rather than + on what is at it diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index c2c982203fe..d347095ba14 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -227,6 +227,10 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits static WARN_UNUSED_RESULT int wc_AesEncrypt( Aes* aes, const byte* inBlock, byte* outBlock) { + #ifdef WOLFSSL_STM32_BARE + /* Bare-metal driver handles mutex, clock and key/IV internally. */ + return wc_Stm32_Aes_Ecb(aes, outBlock, inBlock, WC_AES_BLOCK_SIZE, 1); + #else int ret = 0; #ifdef WOLFSSL_STM32_CUBEMX CRYP_HandleTypeDef hcryp; @@ -367,6 +371,7 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits wc_Stm32_Aes_Cleanup(); return ret; + #endif /* !WOLFSSL_STM32_BARE */ } #endif /* WOLFSSL_AES_DIRECT || HAVE_AESGCM || HAVE_AESCCM */ @@ -375,6 +380,9 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits static WARN_UNUSED_RESULT int wc_AesDecrypt( Aes* aes, const byte* inBlock, byte* outBlock) { + #ifdef WOLFSSL_STM32_BARE + return wc_Stm32_Aes_Ecb(aes, outBlock, inBlock, WC_AES_BLOCK_SIZE, 0); + #else int ret = 0; #ifdef WOLFSSL_STM32_CUBEMX CRYP_HandleTypeDef hcryp; @@ -521,6 +529,7 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits wc_Stm32_Aes_Cleanup(); return ret; + #endif /* !WOLFSSL_STM32_BARE */ } #endif /* WOLFSSL_AES_DIRECT */ #endif /* HAVE_AES_DECRYPT */ @@ -5575,7 +5584,34 @@ int wc_AesSetIV(Aes* aes, const byte* iv) #ifdef HAVE_AES_CBC #if defined(STM32_CRYPTO) -#ifdef WOLFSSL_STM32U5_DHUK +#ifdef WOLFSSL_STM32_BARE + int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + #ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS + if (sz % WC_AES_BLOCK_SIZE) { + return BAD_LENGTH_E; + } + #endif + if (sz == 0) { + return 0; + } + return wc_Stm32_Aes_Cbc(aes, out, in, sz, 1); + } + #ifdef HAVE_AES_DECRYPT + int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + #ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS + if (sz % WC_AES_BLOCK_SIZE) { + return BAD_LENGTH_E; + } + #endif + if (sz == 0) { + return 0; + } + return wc_Stm32_Aes_Cbc(aes, out, in, sz, 0); + } + #endif /* HAVE_AES_DECRYPT */ +#elif defined(WOLFSSL_STM32U5_DHUK) int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) { int ret = 0; @@ -6955,6 +6991,11 @@ int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) int wc_AesCtrEncryptBlock(Aes* aes, byte* out, const byte* in) { + #ifdef WOLFSSL_STM32_BARE + /* CTR per-block transform: ECB-encrypt the counter (passed in + * 'in'); aes.c handles counter increment and XOR with plaintext. */ + return wc_Stm32_Aes_Ecb(aes, out, in, WC_AES_BLOCK_SIZE, 1); + #else int ret = 0; #ifdef WOLFSSL_STM32_CUBEMX CRYP_HandleTypeDef hcryp; @@ -7065,6 +7106,7 @@ int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) wolfSSL_CryptHwMutexUnLock(); wc_Stm32_Aes_Cleanup(); return ret; + #endif /* !WOLFSSL_STM32_BARE */ } @@ -10141,6 +10183,15 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, authIn, authInSz); #endif +#if defined(WOLFSSL_STM32_BARE) && defined(STM32_CRYPTO) + ret = wc_Stm32_Aes_Gcm(aes, out, in, sz, iv, ivSz, + authTag, authTagSz, + authIn, authInSz, 1 /* enc */); + if (ret != WC_NO_ERR_TRACE(CRYPTOCB_UNAVAILABLE)) + return ret; + /* fall through to SW GCM (still uses HW AES via wc_AesEncrypt) */ +#endif /* WOLFSSL_STM32_BARE && STM32_CRYPTO */ + #ifdef STM32_CRYPTO_AES_GCM return wc_AesGcmEncrypt_STM32( aes, out, in, sz, iv, ivSz, @@ -10870,6 +10921,10 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, #endif + /* BARE: GCM decrypt always uses SW path (with HW AES blocks via + * wc_AesEncrypt). Encrypt is HW-accelerated above; decrypt + tag + * verification stays in well-tested SW for now. */ + #ifdef STM32_CRYPTO_AES_GCM /* The STM standard peripheral library API's doesn't support partial blocks */ return wc_AesGcmDecrypt_STM32( diff --git a/wolfcrypt/src/ecc.c b/wolfcrypt/src/ecc.c index 684a7d4c0d3..4de10656f72 100644 --- a/wolfcrypt/src/ecc.c +++ b/wolfcrypt/src/ecc.c @@ -286,8 +286,12 @@ ECC Curve Sizes: #if !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_ATECC608A) && \ !defined(WOLFSSL_CRYPTOCELL) && !defined(WOLFSSL_SILABS_SE_ACCEL) && \ !defined(WOLFSSL_KCAPI_ECC) && !defined(WOLFSSL_SE050) && \ - !defined(WOLFSSL_XILINX_CRYPT_VERSAL) && !defined(WOLFSSL_STM32_PKA) && \ + !defined(WOLFSSL_XILINX_CRYPT_VERSAL) && \ + !(defined(WOLFSSL_STM32_PKA) && !defined(WOLFSSL_STM32_BARE)) && \ !defined(WOLFSSL_PSOC6_CRYPTO) + /* WOLFSSL_STM32_BARE+PKA still uses the SW ECDSA helper paths + * (sign/verify) since the bare-metal driver only implements ECCMul + * HW; the SP-less SW ECDSA fallback then drives that HW. */ #undef HAVE_ECC_VERIFY_HELPER #define HAVE_ECC_VERIFY_HELPER #endif @@ -6947,7 +6951,12 @@ static int deterministic_sign_helper(const byte* in, word32 inlen, ecc_key* key) #endif /* WOLFSSL_ECDSA_DETERMINISTIC_K || WOLFSSL_ECDSA_DETERMINISTIC_K_VARIANT */ -#if defined(WOLFSSL_STM32_PKA) +/* Under WOLFSSL_STM32_BARE the bare-metal PKA driver implements only + * ECCMul HW (the building block used by ECDH and the SP-less SW ECDSA + * path). HW ECDSA sign/verify is intentionally not wired up in v1 of + * the bare driver -- fall back to the standard SW ECDSA which itself + * calls wc_ecc_mulmod_ex2() (HW-accelerated). */ +#if defined(WOLFSSL_STM32_PKA) && !defined(WOLFSSL_STM32_BARE) int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng, ecc_key* key, mp_int *r, mp_int *s) { @@ -8751,7 +8760,8 @@ int wc_ecc_verify_hash(const byte* sig, word32 siglen, const byte* hash, #ifndef WOLF_CRYPTO_CB_ONLY_ECC -#if !defined(WOLFSSL_STM32_PKA) && !defined(WOLFSSL_PSOC6_CRYPTO) && \ +#if !(defined(WOLFSSL_STM32_PKA) && !defined(WOLFSSL_STM32_BARE)) && \ + !defined(WOLFSSL_PSOC6_CRYPTO) && \ !defined(WOLF_CRYPTO_CB_ONLY_ECC) static int wc_ecc_check_r_s_range(ecc_key* key, mp_int* r, mp_int* s) { @@ -9267,7 +9277,10 @@ static int ecc_verify_hash(mp_int *r, mp_int *s, const byte* hash, int wc_ecc_verify_hash_ex(mp_int *r, mp_int *s, const byte* hash, word32 hashlen, int* res, ecc_key* key) { -#if defined(WOLFSSL_STM32_PKA) +#if defined(WOLFSSL_STM32_PKA) && !defined(WOLFSSL_STM32_BARE) + /* See comment above wc_ecc_sign_hash_ex(): BARE uses SW ECDSA + * verify which internally accelerates the scalar muls via the + * bare-metal HW wc_ecc_mulmod_ex2(). */ return stm32_ecc_verify_hash_ex(r, s, hash, hashlen, res, key); #elif defined(WOLFSSL_PSOC6_CRYPTO) return psoc6_ecc_verify_hash_ex(r, s, hash, hashlen, res, key); diff --git a/wolfcrypt/src/port/st/stm32.c b/wolfcrypt/src/port/st/stm32.c index 644b85634f7..a109a71184f 100644 --- a/wolfcrypt/src/port/st/stm32.c +++ b/wolfcrypt/src/port/st/stm32.c @@ -46,6 +46,13 @@ #ifdef WOLFSSL_STM32_PKA #include +#ifdef WOLFSSL_STM32_BARE +/* Bare-metal: CMSIS device header is pulled in by settings.h. The + * PKA_HandleTypeDef and the PKA_ECC / PKA_ECDSA IO typedefs are + * provided by above. The HAL_PKA_* + * entry points are implemented further down in this file under the + * matching guard. */ +#else #if defined(WOLFSSL_STM32L5) #include #include @@ -76,7 +83,101 @@ #else #error Please add the hal_pk.h include #endif +#endif /* !WOLFSSL_STM32_BARE */ + +#if defined(WOLFSSL_STM32_BARE) && defined(WOLFSSL_STM32_PKA) + +#include + +/* Bare-metal stand-ins for the slice of HAL surface that wc_ecc_*() and + * the local HAL_PKA_* shims reference. Kept private to this translation + * unit so they don't collide with ST HAL headers in projects that include + * those for non-crypto code. */ +typedef enum { + HAL_OK = 0x00U, + HAL_ERROR = 0x01U, + HAL_BUSY = 0x02U, + HAL_TIMEOUT = 0x03U +} HAL_StatusTypeDef; + +#ifndef HAL_MAX_DELAY +#define HAL_MAX_DELAY 0xFFFFFFFFU +#endif + +typedef struct { + PKA_TypeDef *Instance; +} PKA_HandleTypeDef; + +typedef struct { + uint32_t modulusSize; + uint32_t coefSign; + const uint8_t *coefA; + const uint8_t *coefB; /* V2 only */ + const uint8_t *modulus; + const uint8_t *primeOrder; /* V2 only */ + uint32_t scalarMulSize; + const uint8_t *scalarMul; + const uint8_t *pointX; + const uint8_t *pointY; +} PKA_ECCMulInTypeDef; + +typedef struct { + uint8_t *ptX; + uint8_t *ptY; +} PKA_ECCMulOutTypeDef; + +typedef struct { + uint32_t primeOrderSize; + uint32_t modulusSize; + uint32_t coefSign; + const uint8_t *coef; + const uint8_t *coefB; /* V2 only */ + const uint8_t *modulus; + const uint8_t *basePointX; + const uint8_t *basePointY; + const uint8_t *primeOrder; + const uint8_t *pPubKeyCurvePtX; + const uint8_t *pPubKeyCurvePtY; + const uint8_t *RSign; + const uint8_t *SSign; + const uint8_t *hash; +} PKA_ECDSAVerifInTypeDef; + +typedef struct { + uint32_t primeOrderSize; + uint32_t modulusSize; + uint32_t coefSign; + const uint8_t *coef; + const uint8_t *coefB; /* V2 only */ + const uint8_t *modulus; + const uint8_t *basePointX; + const uint8_t *basePointY; + const uint8_t *primeOrder; + const uint8_t *hash; + const uint8_t *integer; + const uint8_t *privateKey; +} PKA_ECDSASignInTypeDef; + +typedef struct { + uint8_t *RSign; + uint8_t *SSign; +} PKA_ECDSASignOutTypeDef; + +typedef struct { + uint8_t *ptX; + uint8_t *ptY; +} PKA_ECDSASignOutExtParamTypeDef; + +#endif /* WOLFSSL_STM32_BARE && WOLFSSL_STM32_PKA */ + +#ifdef WOLFSSL_STM32_BARE +/* Provide the global PKA handle that the wc_ecc_mulmod_ex2() and + * stm32_ecc_*_hash_ex() paths reference via &hpka. Under HAL builds, + * the application supplies this; under BARE we own it (file-local). */ +static PKA_HandleTypeDef hpka = { 0 }; +#else extern PKA_HandleTypeDef hpka; +#endif #if !defined(WOLFSSL_STM32_PKA_V2) && defined(PKA_ECC_SCALAR_MUL_IN_B_COEFF) /* PKA hardware like in U5 added coefB and primeOrder */ @@ -92,6 +193,508 @@ extern PKA_HandleTypeDef hpka; #define WOLFSSL_HAVE_ECC_KEY_GET_PRIV #endif #endif /* HAVE_ECC */ + +/* ------------------------------------------------------------------------ + * Bare-metal HAL_PKA_* shims + * + * These provide just the slice of the ST HAL surface that the existing + * wolfssl PKA path (below) calls into. Direct register access only; no + * HAL/StdPeriph dependency. Reference: STM32WBxx_HAL_Driver/Src/ + * stm32wbxx_hal_pka.c (PKA_ECCMul_Set, HAL_PKA_ECCMul, PKA_Process, + * HAL_PKA_ECCMul_GetResult, etc). + * + * Layout-wise this matches the V1 PKA (WB55, WL, MP13). For V2 PKA (H5, + * U5 with PKA, WBA), the additional input slots (coefB, primeOrder, + * pointCheck) live at different word offsets but the start sequence and + * the SR/CLRFR bit names are identical, so the same code path applies + * once WOLFSSL_STM32_PKA_V2 is set (auto-detected via the device-header + * macro PKA_ECC_SCALAR_MUL_IN_B_COEFF below). + * --------------------------------------------------------------------- */ +#ifdef WOLFSSL_STM32_BARE + +/* PKA RAM occupies addresses PKA_BASE+0x400 .. PKA_BASE+0x11F4 on V1 and + * a slightly larger window on V2. The CMSIS device header sizes the + * RAM[] array correctly for the part. */ +#ifndef PKA_RAM_PARAM_END +/* HAL writes one zero word past the last valid byte (microcode reads + * the parameter until it hits a zero word). */ +#define PKA_RAM_PARAM_END(RAM, IDX) \ + do { (RAM)[(IDX)] = 0UL; } while (0) +#endif + +/* Mode encoding constants (from stm32wbxx_hal_pka.h and equivalent). + * Same numeric values across V1 and V2. */ +#ifndef PKA_MODE_ECC_MUL +#define PKA_MODE_ECC_MUL (0x00000020U) +#endif +#ifndef PKA_MODE_ECDSA_VERIFICATION +#define PKA_MODE_ECDSA_VERIFICATION (0x00000026U) +#endif +#ifndef PKA_MODE_ECDSA_SIGNATURE +#define PKA_MODE_ECDSA_SIGNATURE (0x00000024U) +#endif + +/* Number of word slots in the PKA RAM array (per the CMSIS device + * header; e.g. 894 on WB55 V1). */ +#define WC_STM32_PKA_RAM_WORDS \ + (sizeof(((PKA_TypeDef*)0)->RAM) / sizeof(((PKA_TypeDef*)0)->RAM[0])) + +/* Big-endian byte buffer -> PKA RAM (little-endian word order). The + * destination is the PKA RAM slot indexed by 'word_idx'; n is the byte + * count of the source. Mirrors PKA_Memcpy_u8_to_u32 in the HAL. */ +static void wc_stm32_pka_load_be(volatile uint32_t* dst, const uint8_t* src, + uint32_t n) +{ + uint32_t index = 0; + if (dst == NULL || src == NULL) return; + + for (; index < (n / 4U); index++) { + dst[index] = + ((uint32_t)src[(n - (index * 4U) - 1U)]) | + ((uint32_t)src[(n - (index * 4U) - 2U)] << 8) | + ((uint32_t)src[(n - (index * 4U) - 3U)] << 16) | + ((uint32_t)src[(n - (index * 4U) - 4U)] << 24); + } + if ((n % 4U) == 1U) { + dst[index] = (uint32_t)src[(n - (index * 4U) - 1U)]; + } + else if ((n % 4U) == 2U) { + dst[index] = + ((uint32_t)src[(n - (index * 4U) - 1U)]) | + ((uint32_t)src[(n - (index * 4U) - 2U)] << 8); + } + else if ((n % 4U) == 3U) { + dst[index] = + ((uint32_t)src[(n - (index * 4U) - 1U)]) | + ((uint32_t)src[(n - (index * 4U) - 2U)] << 8) | + ((uint32_t)src[(n - (index * 4U) - 3U)] << 16); + } +} + +/* PKA RAM (little-endian word order) -> big-endian byte buffer. */ +static void wc_stm32_pka_read_be(uint8_t* dst, volatile const uint32_t* src, + uint32_t n) +{ + uint32_t i = 0; + if (dst == NULL || src == NULL) return; + + for (; i < (n / 4U); i++) { + uint32_t off = n - 4U - (i * 4U); + dst[off + 3U] = (uint8_t)((src[i] ) & 0xFFU); + dst[off + 2U] = (uint8_t)((src[i] >> 8) & 0xFFU); + dst[off + 1U] = (uint8_t)((src[i] >> 16) & 0xFFU); + dst[off + 0U] = (uint8_t)((src[i] >> 24) & 0xFFU); + } + if ((n % 4U) == 1U) { + dst[0U] = (uint8_t)(src[i] & 0xFFU); + } + else if ((n % 4U) == 2U) { + dst[1U] = (uint8_t)((src[i] ) & 0xFFU); + dst[0U] = (uint8_t)((src[i] >> 8) & 0xFFU); + } + else if ((n % 4U) == 3U) { + dst[2U] = (uint8_t)((src[i] ) & 0xFFU); + dst[1U] = (uint8_t)((src[i] >> 8) & 0xFFU); + dst[0U] = (uint8_t)((src[i] >> 16) & 0xFFU); + } +} + +/* Optimal bit-size: bytes * 8 minus the leading-zero count of the MSB + * (matches PKA_GetOptBitSize_u8 in the HAL). */ +static uint32_t wc_stm32_pka_optbits(uint32_t byteNumber, uint8_t msb) +{ + uint32_t pos = 0; + uint32_t v = msb; + while (v != 0U) { + v >>= 1; + pos++; + } + if (byteNumber == 0U) { + return 0U; + } + return ((byteNumber - 1U) * 8U) + pos; +} + +static HAL_StatusTypeDef HAL_PKA_Init(PKA_HandleTypeDef *hpkah) +{ + if (hpkah == NULL) { + return HAL_ERROR; + } + if (hpkah->Instance == NULL) { + hpkah->Instance = PKA; + } + +#ifdef WC_STM32_PKA_CLK_ENABLE + WC_STM32_PKA_CLK_ENABLE(); +#endif + + /* Reset CR, enable the PKA, clear any pending flags. */ + hpkah->Instance->CR = PKA_CR_EN; + hpkah->Instance->CLRFR = PKA_CLRFR_PROCENDFC | PKA_CLRFR_RAMERRFC | + PKA_CLRFR_ADDRERRFC; + return HAL_OK; +} + +/* Lazy one-shot init helper. Safe to call from every entry point. */ +static void wc_stm32_pka_ensure_init(PKA_HandleTypeDef *hpkah) +{ + if (hpkah == NULL) return; + if (hpkah->Instance == NULL) { + (void)HAL_PKA_Init(hpkah); + } +} + +static void HAL_PKA_RAMReset(PKA_HandleTypeDef *hpkah) +{ + uint32_t i; + if (hpkah == NULL || hpkah->Instance == NULL) return; + for (i = 0; i < WC_STM32_PKA_RAM_WORDS; i++) { + hpkah->Instance->RAM[i] = 0UL; + } +} + +/* Generic start-and-poll sequence with bounded timeout. The default + * spin budget covers a P-521 scalar mul on a slow PKA (worst case on + * the parts wolfSSL targets is ~2 sec; the budget here is well above + * that). Override at compile time via WC_STM32_PKA_TIMEOUT_LOOPS. */ +#ifndef WC_STM32_PKA_TIMEOUT_LOOPS +#define WC_STM32_PKA_TIMEOUT_LOOPS 0x10000000U +#endif + +static HAL_StatusTypeDef wc_stm32_pka_process(PKA_HandleTypeDef *hpkah, + uint32_t mode) +{ + PKA_TypeDef *p; + uint32_t cr, t; + + if (hpkah == NULL || hpkah->Instance == NULL) { + return HAL_ERROR; + } + p = hpkah->Instance; + + /* PKA must be enabled before MODE/START are written. */ + if ((p->CR & PKA_CR_EN) == 0U) { + p->CR = PKA_CR_EN; + } + + /* Update the mode field in CR; clear interrupt enables. */ + cr = p->CR; + cr &= ~(PKA_CR_MODE | PKA_CR_PROCENDIE | PKA_CR_RAMERRIE | PKA_CR_ADDRERRIE); + cr |= (mode << PKA_CR_MODE_Pos) & PKA_CR_MODE; + p->CR = cr; + + /* Start the operation. */ + p->CR = cr | PKA_CR_START; + + /* Wait for end-of-operation flag, OR an error flag, OR timeout. */ + t = 0; + while ((p->SR & PKA_SR_PROCENDF) == 0U) { + if ((p->SR & (PKA_SR_RAMERRF | PKA_SR_ADDRERRF)) != 0U) { + p->CLRFR = PKA_CLRFR_PROCENDFC | PKA_CLRFR_RAMERRFC | + PKA_CLRFR_ADDRERRFC; + return HAL_ERROR; + } + if (++t >= WC_STM32_PKA_TIMEOUT_LOOPS) { + p->CLRFR = PKA_CLRFR_PROCENDFC | PKA_CLRFR_RAMERRFC | + PKA_CLRFR_ADDRERRFC; + return HAL_TIMEOUT; + } + } + + /* Clear all status flags. */ + p->CLRFR = PKA_CLRFR_PROCENDFC | PKA_CLRFR_RAMERRFC | PKA_CLRFR_ADDRERRFC; + + return HAL_OK; +} + +static HAL_StatusTypeDef HAL_PKA_ECCMul(PKA_HandleTypeDef *hpkah, + PKA_ECCMulInTypeDef *in, uint32_t Timeout) +{ + volatile uint32_t *RAM; + + (void)Timeout; + if (hpkah == NULL || in == NULL) { + return HAL_ERROR; + } + wc_stm32_pka_ensure_init(hpkah); + if (hpkah->Instance == NULL) { + return HAL_ERROR; + } + RAM = hpkah->Instance->RAM; + + /* Scalar 'k' bit length, modulus bit length, and 'a' coefficient + * sign indicator -- exactly as the HAL writes them. */ + RAM[PKA_ECC_SCALAR_MUL_IN_EXP_NB_BITS] = + wc_stm32_pka_optbits(in->scalarMulSize, *(in->scalarMul)); + RAM[PKA_ECC_SCALAR_MUL_IN_OP_NB_BITS] = + wc_stm32_pka_optbits(in->modulusSize, *(in->modulus)); + RAM[PKA_ECC_SCALAR_MUL_IN_A_COEFF_SIGN] = in->coefSign; + + /* |a|, modulus p, scalar k, base point (X,Y). */ + wc_stm32_pka_load_be(&RAM[PKA_ECC_SCALAR_MUL_IN_A_COEFF], + in->coefA, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECC_SCALAR_MUL_IN_A_COEFF + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECC_SCALAR_MUL_IN_MOD_GF], + in->modulus, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECC_SCALAR_MUL_IN_MOD_GF + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECC_SCALAR_MUL_IN_K], + in->scalarMul, in->scalarMulSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECC_SCALAR_MUL_IN_K + ((in->scalarMulSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECC_SCALAR_MUL_IN_INITIAL_POINT_X], + in->pointX, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECC_SCALAR_MUL_IN_INITIAL_POINT_X + + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECC_SCALAR_MUL_IN_INITIAL_POINT_Y], + in->pointY, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECC_SCALAR_MUL_IN_INITIAL_POINT_Y + + ((in->modulusSize + 3U) / 4U)); + +#ifdef WOLFSSL_STM32_PKA_V2 + /* V2 PKA also requires the curve order n and 'b' coefficient. */ + if (in->coefB != NULL) { + wc_stm32_pka_load_be(&RAM[PKA_ECC_SCALAR_MUL_IN_B_COEFF], + in->coefB, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECC_SCALAR_MUL_IN_B_COEFF + + ((in->modulusSize + 3U) / 4U)); + } + if (in->primeOrder != NULL) { + wc_stm32_pka_load_be(&RAM[PKA_ECC_SCALAR_MUL_IN_N_PRIME_ORDER], + in->primeOrder, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECC_SCALAR_MUL_IN_N_PRIME_ORDER + + ((in->modulusSize + 3U) / 4U)); + } +#endif /* WOLFSSL_STM32_PKA_V2 */ + + return wc_stm32_pka_process(hpkah, PKA_MODE_ECC_MUL); +} + +static void HAL_PKA_ECCMul_GetResult(PKA_HandleTypeDef *hpkah, + PKA_ECCMulOutTypeDef *out) +{ + uint32_t size; + volatile const uint32_t *RAM; + + if (hpkah == NULL || hpkah->Instance == NULL || out == NULL) return; + RAM = hpkah->Instance->RAM; + + /* The HAL recomputes the byte size from the saved IN_OP_NB_BITS + * slot. We do the same. */ + size = (RAM[PKA_ECC_SCALAR_MUL_IN_OP_NB_BITS] + 7U) / 8U; + + if (out->ptX != NULL) { + wc_stm32_pka_read_be(out->ptX, + &RAM[PKA_ECC_SCALAR_MUL_OUT_RESULT_X], size); + } + if (out->ptY != NULL) { + wc_stm32_pka_read_be(out->ptY, + &RAM[PKA_ECC_SCALAR_MUL_OUT_RESULT_Y], size); + } +} + +static HAL_StatusTypeDef HAL_PKA_ECDSAVerif(PKA_HandleTypeDef *hpkah, + PKA_ECDSAVerifInTypeDef *in, uint32_t Timeout) +{ + volatile uint32_t *RAM; + + (void)Timeout; + if (hpkah == NULL || in == NULL) { + return HAL_ERROR; + } + wc_stm32_pka_ensure_init(hpkah); + if (hpkah->Instance == NULL) { + return HAL_ERROR; + } + RAM = hpkah->Instance->RAM; + + RAM[PKA_ECDSA_VERIF_IN_ORDER_NB_BITS] = + wc_stm32_pka_optbits(in->primeOrderSize, *(in->primeOrder)); + RAM[PKA_ECDSA_VERIF_IN_MOD_NB_BITS] = + wc_stm32_pka_optbits(in->modulusSize, *(in->modulus)); + RAM[PKA_ECDSA_VERIF_IN_A_COEFF_SIGN] = in->coefSign; + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_A_COEFF], + in->coef, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_A_COEFF + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_MOD_GF], + in->modulus, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_MOD_GF + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_INITIAL_POINT_X], + in->basePointX, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_INITIAL_POINT_X + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_INITIAL_POINT_Y], + in->basePointY, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_INITIAL_POINT_Y + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_PUBLIC_KEY_POINT_X], + in->pPubKeyCurvePtX, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_PUBLIC_KEY_POINT_X + + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_PUBLIC_KEY_POINT_Y], + in->pPubKeyCurvePtY, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_PUBLIC_KEY_POINT_Y + + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_SIGNATURE_R], + in->RSign, in->primeOrderSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_SIGNATURE_R + ((in->primeOrderSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_SIGNATURE_S], + in->SSign, in->primeOrderSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_SIGNATURE_S + ((in->primeOrderSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_HASH_E], + in->hash, in->primeOrderSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_HASH_E + ((in->primeOrderSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_ORDER_N], + in->primeOrder, in->primeOrderSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_ORDER_N + ((in->primeOrderSize + 3U) / 4U)); + + return wc_stm32_pka_process(hpkah, PKA_MODE_ECDSA_VERIFICATION); +} + +static uint32_t HAL_PKA_ECDSAVerif_IsValidSignature(PKA_HandleTypeDef const *const hpkah) +{ + if (hpkah == NULL || hpkah->Instance == NULL) return 0U; + /* HAL semantic: PKA_ECDSA_VERIF_OUT_RESULT == 0 means valid. */ + return (hpkah->Instance->RAM[PKA_ECDSA_VERIF_OUT_RESULT] == 0UL) ? 1U : 0U; +} + +static HAL_StatusTypeDef HAL_PKA_ECDSASign(PKA_HandleTypeDef *hpkah, + PKA_ECDSASignInTypeDef *in, uint32_t Timeout) +{ + volatile uint32_t *RAM; + HAL_StatusTypeDef st; + + (void)Timeout; + if (hpkah == NULL || in == NULL) { + return HAL_ERROR; + } + wc_stm32_pka_ensure_init(hpkah); + if (hpkah->Instance == NULL) { + return HAL_ERROR; + } + RAM = hpkah->Instance->RAM; + + RAM[PKA_ECDSA_SIGN_IN_ORDER_NB_BITS] = + wc_stm32_pka_optbits(in->primeOrderSize, *(in->primeOrder)); + RAM[PKA_ECDSA_SIGN_IN_MOD_NB_BITS] = + wc_stm32_pka_optbits(in->modulusSize, *(in->modulus)); + RAM[PKA_ECDSA_SIGN_IN_A_COEFF_SIGN] = in->coefSign; + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_SIGN_IN_A_COEFF], + in->coef, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_SIGN_IN_A_COEFF + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_SIGN_IN_MOD_GF], + in->modulus, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_SIGN_IN_MOD_GF + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_SIGN_IN_K], + in->integer, in->primeOrderSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_SIGN_IN_K + ((in->primeOrderSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_SIGN_IN_INITIAL_POINT_X], + in->basePointX, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_SIGN_IN_INITIAL_POINT_X + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_SIGN_IN_INITIAL_POINT_Y], + in->basePointY, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_SIGN_IN_INITIAL_POINT_Y + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_SIGN_IN_HASH_E], + in->hash, in->primeOrderSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_SIGN_IN_HASH_E + ((in->primeOrderSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_SIGN_IN_PRIVATE_KEY_D], + in->privateKey, in->primeOrderSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_SIGN_IN_PRIVATE_KEY_D + ((in->primeOrderSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_SIGN_IN_ORDER_N], + in->primeOrder, in->primeOrderSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_SIGN_IN_ORDER_N + ((in->primeOrderSize + 3U) / 4U)); + + st = wc_stm32_pka_process(hpkah, PKA_MODE_ECDSA_SIGNATURE); + if (st != HAL_OK) { + return st; + } + /* Sign reports failure via PKA_ECDSA_SIGN_OUT_ERROR != 0 (e.g. when + * the random k is unsuitable). The caller is expected to retry with + * a fresh k. */ + if (RAM[PKA_ECDSA_SIGN_OUT_ERROR] != 0UL) { + return HAL_ERROR; + } + return HAL_OK; +} + +static void HAL_PKA_ECDSASign_GetResult(PKA_HandleTypeDef *hpkah, + PKA_ECDSASignOutTypeDef *out, + PKA_ECDSASignOutExtParamTypeDef *outExt) +{ + uint32_t size; + volatile const uint32_t *RAM; + + if (hpkah == NULL || hpkah->Instance == NULL) return; + RAM = hpkah->Instance->RAM; + size = (RAM[PKA_ECDSA_SIGN_IN_MOD_NB_BITS] + 7U) / 8U; + + if (out != NULL) { + if (out->RSign != NULL) { + wc_stm32_pka_read_be(out->RSign, + &RAM[PKA_ECDSA_SIGN_OUT_SIGNATURE_R], size); + } + if (out->SSign != NULL) { + wc_stm32_pka_read_be(out->SSign, + &RAM[PKA_ECDSA_SIGN_OUT_SIGNATURE_S], size); + } + } + if (outExt != NULL) { + if (outExt->ptX != NULL) { + wc_stm32_pka_read_be(outExt->ptX, + &RAM[PKA_ECDSA_SIGN_OUT_FINAL_POINT_X], size); + } + if (outExt->ptY != NULL) { + wc_stm32_pka_read_be(outExt->ptY, + &RAM[PKA_ECDSA_SIGN_OUT_FINAL_POINT_Y], size); + } + } +} + +#endif /* WOLFSSL_STM32_BARE */ + #endif /* WOLFSSL_STM32_PKA */ @@ -99,11 +702,17 @@ extern PKA_HandleTypeDef hpka; /* #define DEBUG_STM32_HASH */ +#if defined(WOLFSSL_STM32_BARE) && !defined(WC_STM32_HASH_CLK_ENABLE) + #error "WOLFSSL_STM32_BARE: HASH clock-enable not mapped for this STM32 family. Add WC_STM32_HASH_CLK_ENABLE() to wolfssl/wolfcrypt/port/st/stm32.h, or define NO_STM32_HASH." +#endif + /* User can override STM32_HASH_CLOCK_ENABLE and STM32_HASH_CLOCK_DISABLE */ #ifndef STM32_HASH_CLOCK_ENABLE static WC_INLINE void wc_Stm32_Hash_Clock_Enable(STM32_HASH_Context* stmCtx) { - #ifdef WOLFSSL_STM32_CUBEMX + #if defined(WOLFSSL_STM32_BARE) + WC_STM32_HASH_CLK_ENABLE(); + #elif defined(WOLFSSL_STM32_CUBEMX) __HAL_RCC_HASH_CLK_ENABLE(); #else RCC_AHB2PeriphClockCmd(RCC_AHB2Periph_HASH, ENABLE); @@ -116,7 +725,9 @@ extern PKA_HandleTypeDef hpka; #ifndef STM32_HASH_CLOCK_DISABLE static WC_INLINE void wc_Stm32_Hash_Clock_Disable(STM32_HASH_Context* stmCtx) { - #ifdef WOLFSSL_STM32_CUBEMX + #if defined(WOLFSSL_STM32_BARE) + WC_STM32_HASH_CLK_DISABLE(); + #elif defined(WOLFSSL_STM32_CUBEMX) __HAL_RCC_HASH_CLK_DISABLE(); #else RCC_AHB2PeriphClockCmd(RCC_AHB2Periph_HASH, DISABLE); @@ -224,9 +835,16 @@ static void wc_Stm32_Hash_GetDigest(byte* hash, int digestSize) sz = digestSize; while (sz > 0) { - /* first 20 bytes come from instance HR */ + /* first 20 bytes come from the instance digest registers. The CMSIS + * device header for the H5 family renames this from HR[5] to HRA[5] + * (extended HASH IP that adds a separate HASH_DIGEST->HR[16] for the + * full digest); the older F4/F7/L4 layout still uses HR[5]. */ if (i < 5) { + #if defined(WOLFSSL_STM32H5) + digest[i] = HASH->HRA[i]; + #else digest[i] = HASH->HR[i]; + #endif } #ifdef HASH_DIGEST /* reset comes from HASH_DIGEST */ @@ -643,7 +1261,824 @@ int wc_Stm32_Hmac_Final(STM32_HASH_Context* stmCtx, word32 algo, #ifdef STM32_CRYPTO #ifndef NO_AES -#ifdef WOLFSSL_STM32_CUBEMX +#ifdef WOLFSSL_STM32_BARE + +#if !defined(WC_STM32_AES_CLK_ENABLE) + #error "WOLFSSL_STM32_BARE: AES clock-enable not mapped for this STM32 family. Add WC_STM32_AES_CLK_ENABLE() to wolfssl/wolfcrypt/port/st/stm32.h, or define NO_STM32_CRYPTO." +#endif + +/* ===== Bare-metal direct-register AES driver ===== + * No HAL or StdPeriph. Two IP variants: + * - CRYP (FIFO-based): F2/F4/F7/H7/MP13 + * - AES/SAES (TinyAES): L4/L5/U5/H573/G0/G4/WB/WL/WBA + * Variant selected via family ifdefs below. */ + +#if defined(WOLFSSL_STM32F2) || defined(WOLFSSL_STM32F4) || \ + defined(WOLFSSL_STM32F7) || defined(WOLFSSL_STM32H7) || \ + defined(WOLFSSL_STM32MP13) +/* ----- CRYP IP (FIFO-based) ----- */ + +#ifndef STM32_BARE_AES_TIMEOUT + #define STM32_BARE_AES_TIMEOUT 0x10000 +#endif + +/* DATATYPE = 10b (byte) so CRYP byte-swaps DR/DOUT for us; key/IV regs are + * still big-endian. Key arrives pre-reversed via wc_AesSetKey (aes.c:4161); + * IV is byte-reversed locally before write. */ +#define STM32_CRYP_DATATYPE_BYTE CRYP_CR_DATATYPE_1 + +static int Stm32AesWaitBusy(void) +{ + int t = 0; + while ((CRYP->SR & CRYP_SR_BUSY) != 0) { + if (++t >= STM32_BARE_AES_TIMEOUT) { + return WC_TIMEOUT_E; + } + } + return 0; +} + +static int Stm32AesWaitInNotFull(void) +{ + int t = 0; + while ((CRYP->SR & CRYP_SR_IFNF) == 0) { + if (++t >= STM32_BARE_AES_TIMEOUT) { + return WC_TIMEOUT_E; + } + } + return 0; +} + +static int Stm32AesWaitOutNotEmpty(void) +{ + int t = 0; + while ((CRYP->SR & CRYP_SR_OFNE) == 0) { + if (++t >= STM32_BARE_AES_TIMEOUT) { + return WC_TIMEOUT_E; + } + } + return 0; +} + +static word32 Stm32AesKeySizeBits(word32 keyLen) +{ + if (keyLen == 24) { + return CRYP_CR_KEYSIZE_0; /* 192-bit */ + } + if (keyLen == 32) { + return CRYP_CR_KEYSIZE_1; /* 256-bit */ + } + return 0; /* 128-bit */ +} + +/* aes->key is pre-byte-reversed by wc_AesSetKey under BARE (aes.c:4161), + * so the key words go straight into the K registers in big-endian form. */ +static void Stm32AesLoadKey(const word32* key, word32 keyLen) +{ + if (keyLen == 16) { + CRYP->K2LR = key[0]; CRYP->K2RR = key[1]; + CRYP->K3LR = key[2]; CRYP->K3RR = key[3]; + } + else if (keyLen == 24) { + CRYP->K1LR = key[0]; CRYP->K1RR = key[1]; + CRYP->K2LR = key[2]; CRYP->K2RR = key[3]; + CRYP->K3LR = key[4]; CRYP->K3RR = key[5]; + } + else { /* 32 */ + CRYP->K0LR = key[0]; CRYP->K0RR = key[1]; + CRYP->K1LR = key[2]; CRYP->K1RR = key[3]; + CRYP->K2LR = key[4]; CRYP->K2RR = key[5]; + CRYP->K3LR = key[6]; CRYP->K3RR = key[7]; + } +} + +/* aes->reg (IV) is NOT pre-reversed by wc_AesSetIV, so byte-reverse here so + * the IV registers see big-endian words. */ +static void Stm32AesLoadIV(const byte* iv, word32 ivLen) +{ + word32 v[4]; + word32 copyLen = (ivLen > 16) ? 16 : ivLen; + + XMEMSET(v, 0, sizeof(v)); + if (iv != NULL && copyLen > 0) { + XMEMCPY(v, iv, copyLen); + ByteReverseWords(v, v, 16); + } + CRYP->IV0LR = v[0]; CRYP->IV0RR = v[1]; + CRYP->IV1LR = v[2]; CRYP->IV1RR = v[3]; +} + +/* Push 4 input words then drain 4 output words. */ +static int Stm32AesXferBlock(const byte* in, byte* out) +{ + int ret; + word32 i; + word32 buf[WC_AES_BLOCK_SIZE/sizeof(word32)]; + + /* Local word-aligned copy so callers may pass byte-aligned ptrs. */ + XMEMCPY(buf, in, WC_AES_BLOCK_SIZE); + + for (i = 0; i < 4; i++) { + ret = Stm32AesWaitInNotFull(); + if (ret != 0) { + return ret; + } + CRYP->DIN = buf[i]; + } + for (i = 0; i < 4; i++) { + ret = Stm32AesWaitOutNotEmpty(); + if (ret != 0) { + return ret; + } + buf[i] = CRYP->DOUT; + } + XMEMCPY(out, buf, WC_AES_BLOCK_SIZE); + return 0; +} + +/* CBC/ECB decrypt requires a key-prep pass first (per F4/H7 reference manual: + * load key, run ALGOMODE=AES_KEY, wait BUSY=0, then start the actual op). */ +static int Stm32AesPrepareKey(word32 keyLen) +{ + int ret; + + CRYP->CR = CRYP_CR_ALGOMODE_AES_KEY | + STM32_CRYP_DATATYPE_BYTE | + Stm32AesKeySizeBits(keyLen); + CRYP->CR |= CRYP_CR_CRYPEN; + ret = Stm32AesWaitBusy(); + CRYP->CR &= ~CRYP_CR_CRYPEN; + return ret; +} + +int wc_Stm32_Aes_Ecb(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc) +{ + int ret; + word32 keyLen, blocks, b; + word32 cr; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + if (sz == 0 || (sz % WC_AES_BLOCK_SIZE) != 0) { + return BAD_FUNC_ARG; + } + + ret = wc_AesGetKeySize(aes, &keyLen); + if (ret != 0) { + return ret; + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + + WC_STM32_AES_CLK_ENABLE(); + + Stm32AesLoadKey(aes->key, keyLen); + if (!isEnc) { + ret = Stm32AesPrepareKey(keyLen); + if (ret != 0) { + goto exit; + } + } + + cr = CRYP_CR_ALGOMODE_AES_ECB | + STM32_CRYP_DATATYPE_BYTE | + Stm32AesKeySizeBits(keyLen); + if (!isEnc) { + cr |= CRYP_CR_ALGODIR; + } + CRYP->CR = cr; + CRYP->CR |= CRYP_CR_FFLUSH; + CRYP->CR |= CRYP_CR_CRYPEN; + + blocks = sz / WC_AES_BLOCK_SIZE; + for (b = 0; b < blocks; b++) { + ret = Stm32AesXferBlock(in + b * WC_AES_BLOCK_SIZE, + out + b * WC_AES_BLOCK_SIZE); + if (ret != 0) { + break; + } + } + +exit: + CRYP->CR &= ~CRYP_CR_CRYPEN; + wolfSSL_CryptHwMutexUnLock(); + return ret; +} + +int wc_Stm32_Aes_Cbc(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc) +{ + int ret; + word32 keyLen, blocks, b; + word32 cr; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + if (sz == 0 || (sz % WC_AES_BLOCK_SIZE) != 0) { + return BAD_FUNC_ARG; + } + + ret = wc_AesGetKeySize(aes, &keyLen); + if (ret != 0) { + return ret; + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + + WC_STM32_AES_CLK_ENABLE(); + + Stm32AesLoadKey(aes->key, keyLen); + if (!isEnc) { + ret = Stm32AesPrepareKey(keyLen); + if (ret != 0) { + goto exit; + } + } + Stm32AesLoadIV((const byte*)aes->reg, WC_AES_BLOCK_SIZE); + + cr = CRYP_CR_ALGOMODE_AES_CBC | + STM32_CRYP_DATATYPE_BYTE | + Stm32AesKeySizeBits(keyLen); + if (!isEnc) { + cr |= CRYP_CR_ALGODIR; + } + CRYP->CR = cr; + CRYP->CR |= CRYP_CR_FFLUSH; + CRYP->CR |= CRYP_CR_CRYPEN; + + blocks = sz / WC_AES_BLOCK_SIZE; + for (b = 0; b < blocks; b++) { + ret = Stm32AesXferBlock(in + b * WC_AES_BLOCK_SIZE, + out + b * WC_AES_BLOCK_SIZE); + if (ret != 0) { + break; + } + } + + if (ret == 0) { + /* Update aes->reg with new IV (last cipher block for enc; last input + * cipher block for dec). aes.c CBC dispatcher expects aes->reg + * updated for the next call. */ + if (isEnc) { + XMEMCPY(aes->reg, out + (blocks - 1) * WC_AES_BLOCK_SIZE, + WC_AES_BLOCK_SIZE); + } + else { + XMEMCPY(aes->reg, in + (blocks - 1) * WC_AES_BLOCK_SIZE, + WC_AES_BLOCK_SIZE); + } + } + +exit: + CRYP->CR &= ~CRYP_CR_CRYPEN; + wolfSSL_CryptHwMutexUnLock(); + return ret; +} + +/* CTR: handled via the ECB-as-transform path in aes.c (XTRANSFORM_AESCTRBLOCK). + * Each per-block ECB call comes through wc_Stm32_Aes_Ecb above; aes.c manages + * the counter and the XOR with plaintext. */ + +/* === HW GCM (CRYP IP phase machine) =========================================== + * Native HW GCM for the case the CRYP IP supports directly: + * - IV is 96 bits (12 bytes) -- the standard GCM IV + * - AAD and PT lengths are whole 16-byte blocks (no partial last block) + * Returns CRYPTOCB_UNAVAILABLE for unsupported parameter combos, so the + * caller (aes.c BARE GCM dispatcher) falls back to SW GHASH + HW ECB. */ +static int Stm32AesXferDiscardOut(const byte* in) +{ + int ret; + word32 i; + word32 buf[WC_AES_BLOCK_SIZE/sizeof(word32)]; + + XMEMCPY(buf, in, WC_AES_BLOCK_SIZE); + for (i = 0; i < 4; i++) { + ret = Stm32AesWaitInNotFull(); + if (ret != 0) { + return ret; + } + CRYP->DIN = buf[i]; + } + return Stm32AesWaitBusy(); +} + +int wc_Stm32_Aes_Gcm(struct Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* tag, word32 tagSz, + const byte* aad, word32 aadSz, int isEnc) +{ + int ret, t; + word32 keyLen, b, blocks, i; + word32 cr_base; + word32 ivBuf[4]; + word32 hwTag[4]; + word64 aadBits, ptBits; + word32 aadBitsHi, aadBitsLo, ptBitsHi, ptBitsLo; + + /* Argument and capability check */ + if (aes == NULL || iv == NULL || tag == NULL) { + return BAD_FUNC_ARG; + } + if (sz > 0 && (in == NULL || out == NULL)) { + return BAD_FUNC_ARG; + } + /* HW only supports 12-byte IV (J0 = IV || 0x00000001 form) */ + if (ivSz != GCM_NONCE_MID_SZ) { + #ifdef DEBUG_STM32_BARE_GCM + printf("[STM32 BARE GCM] -> SW (ivSz=%u not 12)\n", ivSz); + #endif + return CRYPTOCB_UNAVAILABLE; + } + /* CRYP IP v1 (F4) cannot natively handle partial last blocks. Force + * SW fallback for those cases. AAD partial is handled by HW (we pad + * with zeros and the GHASH is correct because GHASH only uses bitlen). + * PT partial would produce wrong CT bytes -- bail out to SW. */ + if (sz % WC_AES_BLOCK_SIZE != 0) { + #ifdef DEBUG_STM32_BARE_GCM + printf("[STM32 BARE GCM] -> SW (sz=%u not whole-block)\n", sz); + #endif + return CRYPTOCB_UNAVAILABLE; + } +#ifdef DEBUG_STM32_BARE_GCM + printf("[STM32 BARE GCM] -> HW (sz=%u aadSz=%u)\n", sz, aadSz); +#endif + + ret = wc_AesGetKeySize(aes, &keyLen); + if (ret != 0) { + return ret; + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + WC_STM32_AES_CLK_ENABLE(); + + /* Set CR (ALGOMODE=AES-GCM, DATATYPE, KEYSIZE, phase=init) BEFORE + * loading key/IV. H7 reference HAL sets ALGOMODE first, then writes + * the K and IV registers. Doing it in the other order on H7 produces + * a wrong tag even though CT comes out right. */ + cr_base = CRYP_CR_ALGOMODE_AES_GCM | STM32_CRYP_DATATYPE_BYTE | + Stm32AesKeySizeBits(keyLen); + CRYP->CR = cr_base | (0u << CRYP_CR_GCM_CCMPH_Pos); + + /* Load key (already pre-reversed by wc_AesSetKey under BARE). */ + Stm32AesLoadKey(aes->key, keyLen); + + /* Build the IV register value. CRYP expects the 12-byte IV concatenated + * with the initial 32-bit counter = 0x00000002 (HW pre-increments to 2 + * for the first payload block; at GCMPH=00 init phase HW sets up J0). */ + XMEMSET(ivBuf, 0, 16); + XMEMCPY(ivBuf, iv, 12); + ((byte*)ivBuf)[15] = 0x02; + ByteReverseWords(ivBuf, ivBuf, 16); + CRYP->IV0LR = ivBuf[0]; CRYP->IV0RR = ivBuf[1]; + CRYP->IV1LR = ivBuf[2]; CRYP->IV1RR = ivBuf[3]; + + /* === Phase 1: Init (GCMPH=00) === + * Enable CRYP and wait for CRYPEN to auto-clear -- this is the H7- + * documented mechanism for end-of-init-phase. F4 also auto-clears + * CRYPEN after init phase, so the same wait works on both IPs. */ + CRYP->CR |= CRYP_CR_FFLUSH; + CRYP->CR |= CRYP_CR_CRYPEN; + t = 0; + while ((CRYP->CR & CRYP_CR_CRYPEN) != 0) { + if (++t >= STM32_BARE_AES_TIMEOUT) { + ret = WC_TIMEOUT_E; + goto exit; + } + } + + /* === Phase 2: Header / AAD (GCMPH=01) === + * Whole AAD blocks: feed via DIN, no DOUT to read. Partial last AAD + * block: pad with zeros (AES HW absorbs them; GHASH math uses aadSz + * bits in the final phase to truncate). */ + if (aadSz > 0) { + word32 aadBlocks = aadSz / WC_AES_BLOCK_SIZE; + word32 aadPartial = aadSz % WC_AES_BLOCK_SIZE; + + CRYP->CR = cr_base | (1u << CRYP_CR_GCM_CCMPH_Pos); + CRYP->CR |= CRYP_CR_CRYPEN; + for (b = 0; b < aadBlocks; b++) { + ret = Stm32AesXferDiscardOut(aad + b * WC_AES_BLOCK_SIZE); + if (ret != 0) { + goto exit; + } + } + if (aadPartial > 0) { + byte pad[WC_AES_BLOCK_SIZE]; + XMEMSET(pad, 0, sizeof(pad)); + XMEMCPY(pad, aad + aadBlocks * WC_AES_BLOCK_SIZE, aadPartial); + ret = Stm32AesXferDiscardOut(pad); + if (ret != 0) { + goto exit; + } + } + ret = Stm32AesWaitBusy(); + if (ret != 0) { + goto exit; + } + CRYP->CR &= ~CRYP_CR_CRYPEN; + } + + /* === Phase 3: Payload (GCMPH=10) === */ + if (sz > 0) { + blocks = sz / WC_AES_BLOCK_SIZE; + CRYP->CR = cr_base | (2u << CRYP_CR_GCM_CCMPH_Pos); + if (!isEnc) { + CRYP->CR |= CRYP_CR_ALGODIR; + } + CRYP->CR |= CRYP_CR_CRYPEN; + for (b = 0; b < blocks; b++) { + ret = Stm32AesXferBlock(in + b * WC_AES_BLOCK_SIZE, + out + b * WC_AES_BLOCK_SIZE); + if (ret != 0) { + goto exit; + } + } + ret = Stm32AesWaitBusy(); + if (ret != 0) { + goto exit; + } + CRYP->CR &= ~CRYP_CR_CRYPEN; + } + + /* === Phase 4: Final (GCMPH=11) === + * Feed 64-bit AAD-bit-len then 64-bit PT-bit-len, then read 4 DOUT + * words for the tag. + * + * H7 rev.B+ / MP13 (CRYP_VER_2_2): DIN final-phase writes are taken + * "normally" (HW does the DATATYPE swap). Write plain uint32s. + * + * F2/F4/F7 (older CRYP IP, behaves like H7 rev.A): DATATYPE swap + * does NOT apply to the final-phase length block; SW must pre-swap + * via __REV (byte-reverse the 32-bit value). The two HAL families + * disagree on this and so do their reference drivers -- match each. + */ + aadBits = (word64)aadSz * 8u; + ptBits = (word64)sz * 8u; + aadBitsHi = (word32)(aadBits >> 32); + aadBitsLo = (word32)aadBits; + ptBitsHi = (word32)(ptBits >> 32); + ptBitsLo = (word32)ptBits; +#if defined(WOLFSSL_STM32F2) || defined(WOLFSSL_STM32F4) || \ + defined(WOLFSSL_STM32F7) + aadBitsHi = __REV(aadBitsHi); + aadBitsLo = __REV(aadBitsLo); + ptBitsHi = __REV(ptBitsHi); + ptBitsLo = __REV(ptBitsLo); +#endif + + CRYP->CR = cr_base | (3u << CRYP_CR_GCM_CCMPH_Pos); + CRYP->CR |= CRYP_CR_CRYPEN; + + ret = Stm32AesWaitInNotFull(); if (ret != 0) goto exit; + CRYP->DIN = aadBitsHi; + ret = Stm32AesWaitInNotFull(); if (ret != 0) goto exit; + CRYP->DIN = aadBitsLo; + ret = Stm32AesWaitInNotFull(); if (ret != 0) goto exit; + CRYP->DIN = ptBitsHi; + ret = Stm32AesWaitInNotFull(); if (ret != 0) goto exit; + CRYP->DIN = ptBitsLo; + + for (i = 0; i < 4; i++) { + ret = Stm32AesWaitOutNotEmpty(); + if (ret != 0) { + goto exit; + } + hwTag[i] = CRYP->DOUT; + } + XMEMCPY(tag, hwTag, tagSz < 16 ? tagSz : 16); + +exit: + CRYP->CR &= ~CRYP_CR_CRYPEN; + wolfSSL_CryptHwMutexUnLock(); + return ret; +} + +#else /* TinyAES IP (L4/L5/U5/H5/H573/G0/G4/WB/WL/WBA) */ + +/* ----- TinyAES IP (single-register, polled) ----- + * Different from CRYP: no FIFO; one DINR / DOUTR pair processed per + * 16-byte block. KEYRx are written in *reversed* word order + * (KEYR3 = MSB key word for 128-bit; KEYR7 = MSB for 256-bit). + * AES-192 not supported by hardware (only 128 and 256). */ + +#ifndef STM32_BARE_AES_TIMEOUT + #define STM32_BARE_AES_TIMEOUT 0x10000 +#endif + +/* CCF (computation-complete flag) clear: newer TinyAES IPs (U3/U5/L4/L5/H5 + * etc.) have a dedicated AES_ICR register with a CCF bit. Older TinyAES IPs + * (WB/WL/G0) don't -- CCF is cleared by writing 1 to AES_CR.CCFC. */ +#if defined(AES_ICR_CCF) + #define STM32_AES_CLEAR_CCF() do { CRYP->ICR = AES_ICR_CCF; } while (0) +#elif defined(AES_CR_CCFC) + #define STM32_AES_CLEAR_CCF() do { CRYP->CR |= AES_CR_CCFC; } while (0) +#else + #error "STM32 AES IP variant: no CCF-clear mechanism known" +#endif + +#define STM32_AES_DATATYPE_BYTE AES_CR_DATATYPE_1 /* 0b10 */ +#define STM32_AES_CHMOD_ECB 0u +#define STM32_AES_CHMOD_CBC AES_CR_CHMOD_0 +#define STM32_AES_CHMOD_CTR AES_CR_CHMOD_1 +#define STM32_AES_CHMOD_GCM (AES_CR_CHMOD_0 | AES_CR_CHMOD_1) +#define STM32_AES_MODE_ENC 0u +#define STM32_AES_MODE_KEYDERIVE AES_CR_MODE_0 +#define STM32_AES_MODE_DEC AES_CR_MODE_1 +#define STM32_AES_MODE_KD_DEC (AES_CR_MODE_0 | AES_CR_MODE_1) + +static int Stm32AesWaitCCF(void) +{ + int t = 0; + while ((CRYP->SR & AES_SR_CCF) == 0) { + if (++t >= STM32_BARE_AES_TIMEOUT) { + #ifdef DEBUG_STM32_BARE_GCM + printf("[STM32 BARE AES] CCF timeout: SR=0x%08lx CR=0x%08lx\n", + (unsigned long)CRYP->SR, (unsigned long)CRYP->CR); + #endif + return WC_TIMEOUT_E; + } + } + return 0; +} + +static word32 Stm32AesKeySizeBits(word32 keyLen) +{ + if (keyLen == 32) { + return AES_CR_KEYSIZE; /* 256-bit */ + } + return 0; /* 128-bit (192 not supported by HW) */ +} + +/* Key registers must be written in increasing register order (KEYR0 first + * per RM). KEYR(N-1) holds the MSB word. + * aes->key arrives pre-byte-reversed (per wc_AesSetKey under BARE), so the + * highest-significance byte of the AES key is in the MSB of aes->key[0]. */ +static int Stm32AesLoadKey(const word32* key, word32 keyLen) +{ + if (keyLen == 16) { + CRYP->KEYR0 = key[3]; + CRYP->KEYR1 = key[2]; + CRYP->KEYR2 = key[1]; + CRYP->KEYR3 = key[0]; + return 0; + } + if (keyLen == 32) { + CRYP->KEYR0 = key[7]; + CRYP->KEYR1 = key[6]; + CRYP->KEYR2 = key[5]; + CRYP->KEYR3 = key[4]; + CRYP->KEYR4 = key[3]; + CRYP->KEYR5 = key[2]; + CRYP->KEYR6 = key[1]; + CRYP->KEYR7 = key[0]; + return 0; + } + /* AES-192 not supported by TinyAES hardware */ + return BAD_FUNC_ARG; +} + +static void Stm32AesLoadIV(const byte* iv, word32 ivLen) +{ + word32 v[4]; + word32 copyLen = (ivLen > 16) ? 16 : ivLen; + + XMEMSET(v, 0, sizeof(v)); + if (iv != NULL && copyLen > 0) { + XMEMCPY(v, iv, copyLen); + ByteReverseWords(v, v, 16); + } + /* IVRx ordering matches keyword: IVR3 = MSB */ + CRYP->IVR3 = v[0]; + CRYP->IVR2 = v[1]; + CRYP->IVR1 = v[2]; + CRYP->IVR0 = v[3]; +} + +/* One 16-byte block in / out. */ +static int Stm32AesXferBlock(const byte* in, byte* out) +{ + int ret; + word32 i; + word32 buf[WC_AES_BLOCK_SIZE/sizeof(word32)]; + + XMEMCPY(buf, in, WC_AES_BLOCK_SIZE); + for (i = 0; i < 4; i++) { + CRYP->DINR = buf[i]; + } + ret = Stm32AesWaitCCF(); + if (ret != 0) { + return ret; + } + for (i = 0; i < 4; i++) { + buf[i] = CRYP->DOUTR; + } + XMEMCPY(out, buf, WC_AES_BLOCK_SIZE); + /* Clear CCF for next block */ + STM32_AES_CLEAR_CCF(); + return 0; +} + +/* Run the key-derivation pass before decrypt (CBC/ECB). */ +static int Stm32AesPrepareKey(word32 keyLen, word32 chmod) +{ + int ret; + word32 cr = STM32_AES_MODE_KEYDERIVE | STM32_AES_DATATYPE_BYTE | + Stm32AesKeySizeBits(keyLen) | chmod; + CRYP->CR = cr; + CRYP->CR |= AES_CR_EN; + ret = Stm32AesWaitCCF(); + STM32_AES_CLEAR_CCF(); + CRYP->CR &= ~AES_CR_EN; + return ret; +} + +int wc_Stm32_Aes_Ecb(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc) +{ + int ret; + word32 keyLen, blocks, b; + word32 cr; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + if (sz == 0 || (sz % WC_AES_BLOCK_SIZE) != 0) { + return BAD_FUNC_ARG; + } + + ret = wc_AesGetKeySize(aes, &keyLen); + if (ret != 0) { + return ret; + } + if (keyLen != 16 && keyLen != 32) { + return BAD_FUNC_ARG; /* TinyAES has no 192-bit support */ + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + WC_STM32_AES_CLK_ENABLE(); + + /* Disable AES first; configure CR (with KEYSIZE) BEFORE writing the + * key registers (per RM the AES must know the key size before keys + * are loaded). Then enable. */ + CRYP->CR = 0; + + cr = STM32_AES_DATATYPE_BYTE | Stm32AesKeySizeBits(keyLen) | + STM32_AES_CHMOD_ECB | + (isEnc ? STM32_AES_MODE_ENC : STM32_AES_MODE_DEC); + CRYP->CR = cr; + STM32_AES_CLEAR_CCF(); /* clear any stale CCF/ERR */ + + ret = Stm32AesLoadKey(aes->key, keyLen); + if (ret != 0) { + goto exit; + } + if (!isEnc) { + /* Key derivation pass: temporarily set MODE=01 (key derive) */ + CRYP->CR = (cr & ~AES_CR_MODE_Msk) | STM32_AES_MODE_KEYDERIVE; + CRYP->CR |= AES_CR_EN; + ret = Stm32AesWaitCCF(); + STM32_AES_CLEAR_CCF(); + CRYP->CR &= ~AES_CR_EN; + if (ret != 0) { + goto exit; + } + /* Restore decrypt mode */ + CRYP->CR = cr; + } + + CRYP->CR |= AES_CR_EN; + + blocks = sz / WC_AES_BLOCK_SIZE; + for (b = 0; b < blocks; b++) { + ret = Stm32AesXferBlock(in + b * WC_AES_BLOCK_SIZE, + out + b * WC_AES_BLOCK_SIZE); + if (ret != 0) { + break; + } + } + +exit: + CRYP->CR &= ~AES_CR_EN; + wolfSSL_CryptHwMutexUnLock(); + return ret; +} + +int wc_Stm32_Aes_Cbc(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc) +{ + int ret; + word32 keyLen, blocks, b; + word32 cr; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + if (sz == 0 || (sz % WC_AES_BLOCK_SIZE) != 0) { + return BAD_FUNC_ARG; + } + + ret = wc_AesGetKeySize(aes, &keyLen); + if (ret != 0) { + return ret; + } + if (keyLen != 16 && keyLen != 32) { + return BAD_FUNC_ARG; + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + WC_STM32_AES_CLK_ENABLE(); + + /* Configure CR (with KEYSIZE) BEFORE loading keys -- per RM. */ + CRYP->CR = 0; + + cr = STM32_AES_DATATYPE_BYTE | Stm32AesKeySizeBits(keyLen) | + STM32_AES_CHMOD_CBC | + (isEnc ? STM32_AES_MODE_ENC : STM32_AES_MODE_DEC); + CRYP->CR = cr; + STM32_AES_CLEAR_CCF(); + + ret = Stm32AesLoadKey(aes->key, keyLen); + if (ret != 0) { + goto exit; + } + if (!isEnc) { + /* Key derivation pass for decrypt */ + CRYP->CR = (cr & ~AES_CR_MODE_Msk) | STM32_AES_MODE_KEYDERIVE; + CRYP->CR |= AES_CR_EN; + ret = Stm32AesWaitCCF(); + STM32_AES_CLEAR_CCF(); + CRYP->CR &= ~AES_CR_EN; + if (ret != 0) { + goto exit; + } + /* Restore decrypt mode */ + CRYP->CR = cr; + } + Stm32AesLoadIV((const byte*)aes->reg, WC_AES_BLOCK_SIZE); + + CRYP->CR |= AES_CR_EN; + + blocks = sz / WC_AES_BLOCK_SIZE; + for (b = 0; b < blocks; b++) { + ret = Stm32AesXferBlock(in + b * WC_AES_BLOCK_SIZE, + out + b * WC_AES_BLOCK_SIZE); + if (ret != 0) { + break; + } + } + + if (ret == 0) { + if (isEnc) { + XMEMCPY(aes->reg, out + (blocks - 1) * WC_AES_BLOCK_SIZE, + WC_AES_BLOCK_SIZE); + } + else { + XMEMCPY(aes->reg, in + (blocks - 1) * WC_AES_BLOCK_SIZE, + WC_AES_BLOCK_SIZE); + } + } + +exit: + CRYP->CR &= ~AES_CR_EN; + wolfSSL_CryptHwMutexUnLock(); + return ret; +} + +/* TinyAES HW GCM: deferred. Falls back to software GCM (with HW ECB + * blocks via wc_AesEncrypt -> wc_Stm32_Aes_Ecb). */ +int wc_Stm32_Aes_Gcm(struct Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* tag, word32 tagSz, + const byte* aad, word32 aadSz, int isEnc) +{ + (void)aes; (void)out; (void)in; (void)sz; + (void)iv; (void)ivSz; + (void)tag; (void)tagSz; + (void)aad; (void)aadSz; (void)isEnc; + return CRYPTOCB_UNAVAILABLE; +} + +#endif /* CRYP IP vs TinyAES IP */ + +#elif defined(WOLFSSL_STM32_CUBEMX) #if defined(WOLFSSL_STM32U5_DHUK) /* Set the DHUK IV to be used when unwrapping an AES key @@ -878,7 +2313,7 @@ int wc_Stm32_Aes_Init(Aes* aes, CRYP_InitTypeDef* cryptInit, void wc_Stm32_Aes_Cleanup(void) { } -#endif /* WOLFSSL_STM32_CUBEMX */ +#endif /* WOLFSSL_STM32_BARE / WOLFSSL_STM32_CUBEMX / StdPeriph */ #endif /* !NO_AES */ #endif /* STM32_CRYPTO */ diff --git a/wolfcrypt/src/random.c b/wolfcrypt/src/random.c index efc9eaf59a7..6527d7f348b 100644 --- a/wolfcrypt/src/random.c +++ b/wolfcrypt/src/random.c @@ -4241,6 +4241,9 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) #elif defined(STM32_RNG) /* Generate a RNG seed using the hardware random number generator * on the STM32F2/F4/F7/L4. */ + #include + /* Pulls in WC_STM32_RNG_CLK_ENABLE for WOLFSSL_STM32_BARE builds */ + #ifdef WOLFSSL_STM32_CUBEMX int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) @@ -4325,7 +4328,12 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) #ifndef STM32_NUTTX_RNG /* enable RNG peripheral clock */ - RCC->AHB2ENR |= RCC_AHB2ENR_RNGEN; + #ifdef WC_STM32_RNG_CLK_ENABLE + WC_STM32_RNG_CLK_ENABLE(); + #else + /* Default for F4/F7/L4/L5/U5/H5/H7 -- RNG on AHB2 */ + RCC->AHB2ENR |= RCC_AHB2ENR_RNGEN; + #endif #endif /* enable RNG interrupt, set IE bit in RNG->CR register */ diff --git a/wolfssl/wolfcrypt/port/st/stm32.h b/wolfssl/wolfcrypt/port/st/stm32.h index 9aa0d418ae1..9c335378ea6 100644 --- a/wolfssl/wolfcrypt/port/st/stm32.h +++ b/wolfssl/wolfcrypt/port/st/stm32.h @@ -23,11 +23,207 @@ #define _WOLFPORT_STM32_H_ /* Generic STM32 Hashing and Crypto Functions */ -/* Supports CubeMX HAL or Standard Peripheral Library */ +/* Supports CubeMX HAL, Standard Peripheral Library, or bare-metal direct + * register access (WOLFSSL_STM32_BARE). */ #include #include /* for MATH_INT_T */ +#ifdef WOLFSSL_STM32_BARE +/* Per-family direct-register clock-enable macros. CMSIS device header is + * already included via settings.h. RCC->...ENR bit names come from CMSIS. */ +#if defined(WOLFSSL_STM32H5) + #define WC_STM32_AES_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_AESEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_AES_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_AESEN; } while (0) + #define WC_STM32_HASH_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_HASHEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_HASH_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_HASHEN; } while (0) + #define WC_STM32_RNG_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_RNGEN; (void)RCC->AHB2ENR; } while (0) +#elif defined(WOLFSSL_STM32F4) || defined(WOLFSSL_STM32F7) || \ + defined(WOLFSSL_STM32H7) + #define WC_STM32_AES_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_CRYPEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_AES_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_CRYPEN; } while (0) + #define WC_STM32_HASH_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_HASHEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_HASH_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_HASHEN; } while (0) + #define WC_STM32_RNG_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_RNGEN; (void)RCC->AHB2ENR; } while (0) +#elif defined(WOLFSSL_STM32L4) + #define WC_STM32_AES_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_AESEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_AES_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_AESEN; } while (0) + #define WC_STM32_HASH_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_HASHEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_HASH_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_HASHEN; } while (0) + #define WC_STM32_RNG_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_RNGEN; (void)RCC->AHB2ENR; } while (0) +#elif defined(WOLFSSL_STM32U5) || defined(WOLFSSL_STM32U3) + /* U5 / U3 RCC uses AHB2ENR1 (not AHB2ENR). AES bit only present on + * variants that have the peripheral (U585+, U385+). */ + #ifdef RCC_AHB2ENR1_AESEN + #define WC_STM32_AES_CLK_ENABLE() \ + do { RCC->AHB2ENR1 |= RCC_AHB2ENR1_AESEN; (void)RCC->AHB2ENR1; } while (0) + #define WC_STM32_AES_CLK_DISABLE() \ + do { RCC->AHB2ENR1 &= ~RCC_AHB2ENR1_AESEN; } while (0) + #endif + #ifdef RCC_AHB2ENR1_HASHEN + #define WC_STM32_HASH_CLK_ENABLE() \ + do { RCC->AHB2ENR1 |= RCC_AHB2ENR1_HASHEN; (void)RCC->AHB2ENR1; } while (0) + #define WC_STM32_HASH_CLK_DISABLE() \ + do { RCC->AHB2ENR1 &= ~RCC_AHB2ENR1_HASHEN; } while (0) + #endif + #define WC_STM32_RNG_CLK_ENABLE() \ + do { RCC->AHB2ENR1 |= RCC_AHB2ENR1_RNGEN; (void)RCC->AHB2ENR1; } while (0) +#elif defined(WOLFSSL_STM32G0) + #define WC_STM32_AES_CLK_ENABLE() \ + do { RCC->AHBENR |= RCC_AHBENR_AESEN; (void)RCC->AHBENR; } while (0) + #define WC_STM32_AES_CLK_DISABLE() \ + do { RCC->AHBENR &= ~RCC_AHBENR_AESEN; } while (0) + #define WC_STM32_RNG_CLK_ENABLE() \ + do { RCC->AHBENR |= RCC_AHBENR_RNGEN; (void)RCC->AHBENR; } while (0) +#elif defined(WOLFSSL_STM32WB) + /* WB55 dual-core: AES1 is the M4 (CPU1) application AES, on AHB2. + * AES2 sits on AHB4/AHB3 and is reserved for the M0+ side / shared use. + * The wolfcrypt port maps CRYP -> AES1 (see CRYP alias above), so use + * AES1's clock-enable bit. RNG is on AHB3. */ + #define WC_STM32_AES_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_AES1EN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_AES_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_AES1EN; } while (0) + #define WC_STM32_RNG_CLK_ENABLE() \ + do { RCC->AHB3ENR |= RCC_AHB3ENR_RNGEN; (void)RCC->AHB3ENR; } while (0) +#elif defined(WOLFSSL_STM32G4) + /* G4: TinyAES + RNG + PKA on AHB2. No HASH peripheral. */ + #define WC_STM32_AES_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_AESEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_AES_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_AESEN; } while (0) + #define WC_STM32_RNG_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_RNGEN; (void)RCC->AHB2ENR; } while (0) +#elif defined(WOLFSSL_STM32WBA) + /* WBA: TinyAES + HASH + RNG + PKA + SAES on AHB2 (PKA on AHB1). */ + #define WC_STM32_AES_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_AESEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_AES_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_AESEN; } while (0) + #ifdef RCC_AHB2ENR_HASHEN + #define WC_STM32_HASH_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_HASHEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_HASH_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_HASHEN; } while (0) + #endif + #define WC_STM32_RNG_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_RNGEN; (void)RCC->AHB2ENR; } while (0) +#endif + +/* Per-family direct-register clock-enable macro for the PKA peripheral. */ +#if defined(WOLFSSL_STM32WB) + /* WB55: PKA clock is on AHB3 */ + #define WC_STM32_PKA_CLK_ENABLE() \ + do { RCC->AHB3ENR |= RCC_AHB3ENR_PKAEN; (void)RCC->AHB3ENR; } while (0) +#elif defined(WOLFSSL_STM32U5) || defined(WOLFSSL_STM32U3) + /* U5 / U3: AHB2ENR1.PKAEN */ + #ifdef RCC_AHB2ENR1_PKAEN + #define WC_STM32_PKA_CLK_ENABLE() \ + do { RCC->AHB2ENR1 |= RCC_AHB2ENR1_PKAEN; (void)RCC->AHB2ENR1; } while (0) + #endif +#elif defined(WOLFSSL_STM32H5) + #ifdef RCC_AHB2ENR_PKAEN + #define WC_STM32_PKA_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_PKAEN; (void)RCC->AHB2ENR; } while (0) + #endif +#elif defined(WOLFSSL_STM32G4) + #ifdef RCC_AHB2ENR_PKAEN + #define WC_STM32_PKA_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_PKAEN; (void)RCC->AHB2ENR; } while (0) + #endif +#elif defined(WOLFSSL_STM32WBA) + #ifdef RCC_AHB1ENR_PKAEN + #define WC_STM32_PKA_CLK_ENABLE() \ + do { RCC->AHB1ENR |= RCC_AHB1ENR_PKAEN; (void)RCC->AHB1ENR; } while (0) + #endif +#endif + +/* HAL-legacy macros that the existing direct-register HASH path depends on. + * Without HAL these aren't otherwise visible. */ +#if defined(WOLFSSL_STM32H5) || defined(WOLFSSL_STM32MP13) || \ + defined(WOLFSSL_STM32N6) || defined(WOLFSSL_STM32H7S) || \ + defined(WOLFSSL_STM32U3) + /* New-generation HASH IP: 4-bit ALGO field at bits 20:17 */ + #define HASH_ALGOSELECTION_SHA1 0u + #define HASH_ALGOSELECTION_SHA224 HASH_CR_ALGO_1 + #define HASH_ALGOSELECTION_SHA256 (HASH_CR_ALGO_0 | HASH_CR_ALGO_1) + #define HASH_ALGOSELECTION_SHA384 (HASH_CR_ALGO_2 | HASH_CR_ALGO_3) + #define HASH_ALGOSELECTION_SHA512 (HASH_CR_ALGO_0 | HASH_CR_ALGO_1 | \ + HASH_CR_ALGO_2 | HASH_CR_ALGO_3) + #define HASH_ALGOSELECTION_SHA512_224 (HASH_CR_ALGO_0 | HASH_CR_ALGO_2 | \ + HASH_CR_ALGO_3) + #define HASH_ALGOSELECTION_SHA512_256 (HASH_CR_ALGO_1 | HASH_CR_ALGO_2 | \ + HASH_CR_ALGO_3) +#else + /* Older HASH IP (F4/F7/L4 family) ALGO bit mapping (per HAL): + * SHA1 = 0 + * MD5 = ALGO_0 + * SHA224 = ALGO_1 + * SHA256 = ALGO_0 | ALGO_1 + */ + #define HASH_ALGOSELECTION_SHA1 0u + #define HASH_ALGOSELECTION_MD5 HASH_CR_ALGO_0 + #ifdef HASH_CR_ALGO_1 + #define HASH_ALGOSELECTION_SHA224 HASH_CR_ALGO_1 + #define HASH_ALGOSELECTION_SHA256 (HASH_CR_ALGO_0 | HASH_CR_ALGO_1) + #endif +#endif + +/* Legacy CamelCase aliases */ +#ifdef HASH_ALGOSELECTION_SHA1 + #define HASH_AlgoSelection_SHA1 HASH_ALGOSELECTION_SHA1 +#endif +#ifdef HASH_ALGOSELECTION_SHA224 + #define HASH_AlgoSelection_SHA224 HASH_ALGOSELECTION_SHA224 +#endif +#ifdef HASH_ALGOSELECTION_SHA256 + #define HASH_AlgoSelection_SHA256 HASH_ALGOSELECTION_SHA256 +#endif +#ifdef HASH_ALGOSELECTION_SHA384 + #define HASH_AlgoSelection_SHA384 HASH_ALGOSELECTION_SHA384 +#endif +#ifdef HASH_ALGOSELECTION_SHA512 + #define HASH_AlgoSelection_SHA512 HASH_ALGOSELECTION_SHA512 +#endif +#ifdef HASH_ALGOSELECTION_SHA512_224 + #define HASH_AlgoSelection_SHA512_224 HASH_ALGOSELECTION_SHA512_224 +#endif +#ifdef HASH_ALGOSELECTION_SHA512_256 + #define HASH_AlgoSelection_SHA512_256 HASH_ALGOSELECTION_SHA512_256 +#endif +#ifdef HASH_ALGOSELECTION_MD5 + #define HASH_AlgoSelection_MD5 HASH_ALGOSELECTION_MD5 +#endif + +#define HASH_ALGOMODE_HASH 0u +#ifdef HASH_CR_MODE + #define HASH_ALGOMODE_HMAC HASH_CR_MODE +#endif +/* Byte-stream input (auto byte-swap) */ +#ifdef HASH_CR_DATATYPE_1 + #define HASH_DATATYPE_8B HASH_CR_DATATYPE_1 +#elif defined(HASH_CR_DATATYPE_0) + #define HASH_DATATYPE_8B HASH_CR_DATATYPE_0 +#endif + +#endif /* WOLFSSL_STM32_BARE */ + + #ifdef STM32_HASH #include /* for uint32_t */ @@ -38,7 +234,8 @@ /* The HASH_DIGEST register indicates SHA224/SHA256 support */ #define STM32_HASH_SHA2 #if defined(WOLFSSL_STM32MP13) || defined(WOLFSSL_STM32H7S) || \ - defined(WOLFSSL_STM32N6) || defined(WOLFSSL_STM32H5) + defined(WOLFSSL_STM32N6) || defined(WOLFSSL_STM32H5) || \ + defined(WOLFSSL_STM32U3) #define HASH_CR_SIZE 103 #define HASH_MAX_DIGEST 64 /* Up to SHA512 */ @@ -68,7 +265,8 @@ /* These HASH HAL's have no MD5 implementation */ #if defined(WOLFSSL_STM32MP13) || defined(WOLFSSL_STM32H7S) || \ - defined(WOLFSSL_STM32N6) || defined(WOLFSSL_STM32H5) + defined(WOLFSSL_STM32N6) || defined(WOLFSSL_STM32H5) || \ + defined(WOLFSSL_STM32U3) #define STM32_NOMD5 #endif @@ -163,7 +361,8 @@ int wc_Stm32_Hmac_Final(STM32_HASH_Context* stmCtx, word32 algo, #endif #ifndef NO_AES - #if !defined(STM32_CRYPTO_AES_GCM) && (defined(WOLFSSL_STM32F4) || \ + #if !defined(STM32_CRYPTO_AES_GCM) && !defined(WOLFSSL_STM32_BARE) && \ + (defined(WOLFSSL_STM32F4) || \ defined(WOLFSSL_STM32F7) || defined(WOLFSSL_STM32L4) || \ defined(WOLFSSL_STM32L5) || defined(WOLFSSL_STM32H7) || \ defined(WOLFSSL_STM32U5) || defined(WOLFSSL_STM32U3) || \ @@ -173,6 +372,13 @@ int wc_Stm32_Hmac_Final(STM32_HASH_Context* stmCtx, word32 algo, /* Hardware supports AES GCM acceleration */ #define STM32_CRYPTO_AES_GCM #endif + /* Under WOLFSSL_STM32_BARE on the CRYP IP (F2/F4/F7/H7/MP13), the GCM + * HW phase machine (init/header/payload/final) is engaged for whole- + * block PT with a 12-byte IV; partial blocks and non-12B IVs return + * CRYPTOCB_UNAVAILABLE so aes.c falls back to SW GHASH + HW ECB. On + * the TinyAES IP the BARE driver always returns CRYPTOCB_UNAVAILABLE + * for GCM (no HW phase machine) and the SW GHASH + HW ECB path is + * used. GCM decrypt is always SW + HW ECB on both IPs in v1. */ #if defined(WOLFSSL_STM32WB) || defined(WOLFSSL_STM32WL) || \ defined(WOLFSSL_STM32WBA) @@ -186,9 +392,11 @@ int wc_Stm32_Hmac_Final(STM32_HASH_Context* stmCtx, word32 algo, #endif #if defined(WOLFSSL_STM32L4) || defined(WOLFSSL_STM32L5) || \ defined(WOLFSSL_STM32U5) || defined(WOLFSSL_STM32U3) || \ - defined(WOLFSSL_STM32H5) || defined(WOLFSSL_STM32G0) + defined(WOLFSSL_STM32H5) || defined(WOLFSSL_STM32G0) || \ + defined(WOLFSSL_STM32G4) #if defined(WOLFSSL_STM32L4) || defined(WOLFSSL_STM32U5) || \ - defined(WOLFSSL_STM32U3) || defined(WOLFSSL_STM32G0) + defined(WOLFSSL_STM32U3) || defined(WOLFSSL_STM32G0) || \ + defined(WOLFSSL_STM32G4) #define STM32_CRYPTO_AES_ONLY /* crypto engine only supports AES */ #endif #if defined(WOLFSSL_STM32H5) @@ -234,7 +442,23 @@ int wc_Stm32_Hmac_Final(STM32_HASH_Context* stmCtx, word32 algo, #define STM32_GCM_IV_START 2 struct Aes; - #ifdef WOLFSSL_STM32_CUBEMX + #ifdef WOLFSSL_STM32_BARE + /* Bare-metal direct-register AES driver. ECB and CBC are HW-native; + * CTR is provided automatically via the ECB-as-transform path in + * aes.c (XTRANSFORM_AESCTRBLOCK); GCM is HW-native for the case + * the CRYP IP supports (12-byte IV + whole-block PT) and returns + * CRYPTOCB_UNAVAILABLE otherwise so aes.c can fall back to SW + * GHASH (which still uses HW ECB for the underlying AES blocks). */ + int wc_Stm32_Aes_Ecb(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc); + int wc_Stm32_Aes_Cbc(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc); + int wc_Stm32_Aes_Gcm(struct Aes* aes, byte* out, const byte* in, + word32 sz, + const byte* iv, word32 ivSz, + byte* tag, word32 tagSz, + const byte* aad, word32 aadSz, int isEnc); + #elif defined(WOLFSSL_STM32_CUBEMX) int wc_Stm32_Aes_Init(struct Aes* aes, CRYP_HandleTypeDef* hcryp, int useSAES); void wc_Stm32_Aes_Cleanup(void); @@ -242,7 +466,7 @@ int wc_Stm32_Hmac_Final(STM32_HASH_Context* stmCtx, word32 algo, int wc_Stm32_Aes_Init(struct Aes* aes, CRYP_InitTypeDef* cryptInit, CRYP_KeyInitTypeDef* keyInit); void wc_Stm32_Aes_Cleanup(void); - #endif /* WOLFSSL_STM32_CUBEMX */ + #endif /* WOLFSSL_STM32_BARE / WOLFSSL_STM32_CUBEMX / StdPeriph */ #endif /* !NO_AES */ #endif /* STM32_CRYPTO */ diff --git a/wolfssl/wolfcrypt/settings.h b/wolfssl/wolfcrypt/settings.h index ca74b140a82..dea6bc0cdc9 100644 --- a/wolfssl/wolfcrypt/settings.h +++ b/wolfssl/wolfcrypt/settings.h @@ -2238,7 +2238,60 @@ extern void uITRON4_free(void *p) ; #define KEIL_INTRINSICS #endif #define NO_OLD_RNGNAME - #ifdef WOLFSSL_STM32_CUBEMX + + #if defined(WOLFSSL_STM32_BARE) && defined(WOLFSSL_STM32_CUBEMX) + #error "WOLFSSL_STM32_BARE and WOLFSSL_STM32_CUBEMX are mutually exclusive" + #endif + /* WOLFSSL_STM32_PKA is now supported under WOLFSSL_STM32_BARE via the + * direct-register PKA driver in wolfcrypt/src/port/st/stm32.c. */ + + #ifdef WOLFSSL_STM32_BARE + /* Direct register access; no HAL or StdPeriph driver. Pull in only the + * CMSIS device header. Existing direct-register HASH path is reused; + * RNG goes through the existing WOLFSSL_STM32_RNG_NOLIB path. */ + #ifndef WOLFSSL_STM32_RNG_NOLIB + #define WOLFSSL_STM32_RNG_NOLIB + #endif + #if defined(WOLFSSL_STM32F1) + #include "stm32f1xx.h" + #elif defined(WOLFSSL_STM32F2) + #include "stm32f2xx.h" + #elif defined(WOLFSSL_STM32F4) + #include "stm32f4xx.h" + #elif defined(WOLFSSL_STM32F7) + #include "stm32f7xx.h" + #elif defined(WOLFSSL_STM32L4) + #include "stm32l4xx.h" + #elif defined(WOLFSSL_STM32L5) + #include "stm32l5xx.h" + #elif defined(WOLFSSL_STM32H7S) + #include "stm32h7rsxx.h" + #elif defined(WOLFSSL_STM32H7) + #include "stm32h7xx.h" + #elif defined(WOLFSSL_STM32WB) + #include "stm32wbxx.h" + #elif defined(WOLFSSL_STM32WL) + #include "stm32wlxx.h" + #elif defined(WOLFSSL_STM32G0) + #include "stm32g0xx.h" + #elif defined(WOLFSSL_STM32G4) + #include "stm32g4xx.h" + #elif defined(WOLFSSL_STM32U5) + #include "stm32u5xx.h" + #elif defined(WOLFSSL_STM32U3) + #include "stm32u3xx.h" + #elif defined(WOLFSSL_STM32H5) + #include "stm32h5xx.h" + #elif defined(WOLFSSL_STM32N6) + #include "stm32n6xx.h" + #elif defined(WOLFSSL_STM32MP13) + #ifndef __ASSEMBLER__ + #include "stm32mp13xx.h" + #endif + #elif defined(WOLFSSL_STM32WBA) + #include "stm32wbaxx.h" + #endif + #elif defined(WOLFSSL_STM32_CUBEMX) #if defined(WOLFSSL_STM32F1) #include "stm32f1xx_hal.h" #elif defined(WOLFSSL_STM32F2)