From 1a42ef87eff13d6f4dc017694ddba2ab402bdfa3 Mon Sep 17 00:00:00 2001 From: Gusted Date: Sun, 22 Mar 2026 05:07:44 +0100 Subject: [PATCH] ascon: improve performance of permutation Use a slightly different substitution implementation that is slightly more efficient. This does require that the function is now strictly seperated in the three layers. The assembly with `-C opt-level=2` shows no weird instructions, `permute_12` without `soft-compact` is fully unrolled and from and to arrays is skipped. With `soft-compat` it's not unrolled but does also skip from and to arrays between the iterations. Benchmarks with and without soft-compat are quite similair (to the point I'm guessing I'm not even running it correctly). Permutation/1 round time: [4.6049 ns 4.6597 ns 4.7094 ns] change: [-17.040% -15.975% -14.976%] (p = 0.00 < 0.05) Permutation/6 rounds time: [17.179 ns 17.301 ns 17.452 ns] change: [-27.748% -26.520% -25.229%] (p = 0.00 < 0.05) Permutation/8 rounds time: [23.835 ns 23.979 ns 24.109 ns] change: [-24.968% -24.238% -23.588%] (p = 0.00 < 0.05) Permutation/12 rounds time: [33.011 ns 33.458 ns 33.882 ns] change: [-25.656% -24.646% -23.615%] (p = 0.00 < 0.05) --- ascon/src/lib.rs | 57 ++++++++++++++++++++++++++-------------------- benches/Cargo.toml | 3 --- 2 files changed, 32 insertions(+), 28 deletions(-) diff --git a/ascon/src/lib.rs b/ascon/src/lib.rs index e53f27e..1d14567 100644 --- a/ascon/src/lib.rs +++ b/ascon/src/lib.rs @@ -34,32 +34,39 @@ pub struct State { /// Ascon's round function const fn round(x: [u64; 5], c: u64) -> [u64; 5] { - // S-box layer - let x0 = x[0] ^ x[4]; - let x2 = x[2] ^ x[1] ^ c; // with round constant - let x4 = x[4] ^ x[3]; - - let tx0 = x0 ^ (!x[1] & x2); - let tx1 = x[1] ^ (!x2 & x[3]); - let tx2 = x2 ^ (!x[3] & x4); - let tx3 = x[3] ^ (!x4 & x0); - let tx4 = x4 ^ (!x0 & x[1]); - let tx1 = tx1 ^ tx0; - let tx3 = tx3 ^ tx2; - let tx0 = tx0 ^ tx4; - - // linear layer - let x0 = tx0 ^ tx0.rotate_right(9); - let x1 = tx1 ^ tx1.rotate_right(22); - let x2 = tx2 ^ tx2.rotate_right(5); - let x3 = tx3 ^ tx3.rotate_right(7); - let x4 = tx4 ^ tx4.rotate_right(34); + let (mut x0, mut x1, mut x3, mut x4) = (x[0], x[1], x[3], x[4]); + + // Addition of Constants + let mut x2 = x[2] ^ c; + + // Substitution Layer. + // BGC Optimized Implementations from: + // Optimizing S-box Implementations Using SAT Solvers: Revisited + // https://eprint.iacr.org/2023/1721.pdf + let t0 = x0 ^ x4; + let t1 = !x4; + let t2 = t1 | x3; + let t3 = x1 ^ x2; + let t4 = x3 ^ x2; + let t5 = x3 ^ x4; + let t6 = t0 | x1; + let t7 = x0 | t5; + let t8 = t4 | t3; + x1 = t0 ^ t8; + x3 = t3 ^ t7; + let t11 = x2 & t3; + let t12 = t6 ^ t5; + x2 = t3 ^ t2; + x0 = t12 ^ t11; + x4 = t0 ^ t12; + + // Linear Diffusion Layer [ - tx0 ^ x0.rotate_right(19), - tx1 ^ x1.rotate_right(39), - !(tx2 ^ x2.rotate_right(1)), - tx3 ^ x3.rotate_right(10), - tx4 ^ x4.rotate_right(7), + x0 ^ x0.rotate_right(19) ^ x0.rotate_right(28), + x1 ^ x1.rotate_right(61) ^ x1.rotate_right(39), + x2 ^ x2.rotate_right(1) ^ x2.rotate_right(6), + x3 ^ x3.rotate_right(10) ^ x3.rotate_right(17), + x4 ^ x4.rotate_right(7) ^ x4.rotate_right(41), ] } diff --git a/benches/Cargo.toml b/benches/Cargo.toml index 384e2da..0d7ee62 100644 --- a/benches/Cargo.toml +++ b/benches/Cargo.toml @@ -20,9 +20,6 @@ rand = { version = "0.8", default-features = false, features = [ "getrandom", ] } -[features] -no_unroll = ["ascon/no_unroll"] - [[bench]] name = "ascon" path = "src/ascon.rs"