diff --git a/.gitignore b/.gitignore index 704d3ca0..99741c18 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,8 @@ cipher/libcipher.la compat/Makefile compat/libcompat.la doc/gcrypt.info +doc/gcrypt.info-1 +doc/gcrypt.info-2 doc/stamp-vti doc/version.texi doc/Makefile @@ -65,6 +67,7 @@ src/gcrypt.h src/hmac256 src/libgcrypt-config src/libgcrypt.la +src/libgcrypt.pc src/mpicalc src/versioninfo.rc src/*.exe @@ -103,6 +106,8 @@ tests/t-lock tests/t-mpi-bit tests/t-mpi-point tests/t-sexp +tests/t-secmem +tests/t-x448 tests/tsexp tests/version tests/*.exe diff --git a/AUTHORS b/AUTHORS index 280d1303..b72992f8 100644 --- a/AUTHORS +++ b/AUTHORS @@ -157,6 +157,9 @@ Dmitry Eremin-Solenikov Dmitry Kasatkin 2012-12-14:50CAE2DB.80302@intel.com: +H.J. Lu +2020-01-19:20200119135241.GA4970@gmail.com: + Jia Zhang 2017-10-17:59E56E30.9060503@alibaba-inc.com: @@ -199,6 +202,9 @@ Shawn Landden Stephan Mueller 2014-08-22:2008899.25OeoelVVA@myon.chronox.de: +Tianjia Zhang +2020-01-08:dcda0127-2f45-93a3-0736-27259a33bffa@linux.alibaba.com: + Tomáš Mráz 2012-04-16:1334571250.5056.52.camel@vespa.frost.loc: diff --git a/cipher/Makefile.am b/cipher/Makefile.am index 020a9616..10a5ab62 100644 --- a/cipher/Makefile.am +++ b/cipher/Makefile.am @@ -87,7 +87,7 @@ EXTRA_libcipher_la_SOURCES = \ dsa.c \ elgamal.c \ ecc.c ecc-curves.c ecc-misc.c ecc-common.h \ - ecc-ecdh.c ecc-ecdsa.c ecc-eddsa.c ecc-gost.c \ + ecc-ecdh.c ecc-ecdsa.c ecc-eddsa.c ecc-gost.c ecc-sm2.c \ idea.c \ gost28147.c gost.h \ gostr3411-94.c \ diff --git a/cipher/camellia-aesni-avx-amd64.S b/cipher/camellia-aesni-avx-amd64.S index e16d4f61..4671bcfe 100644 --- a/cipher/camellia-aesni-avx-amd64.S +++ b/cipher/camellia-aesni-avx-amd64.S @@ -18,8 +18,9 @@ * License along with this program; if not, see . */ -#ifdef __x86_64 #include + +#ifdef __x86_64 #if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT) diff --git a/cipher/camellia-aesni-avx2-amd64.S b/cipher/camellia-aesni-avx2-amd64.S index cc01c774..517e6880 100644 --- a/cipher/camellia-aesni-avx2-amd64.S +++ b/cipher/camellia-aesni-avx2-amd64.S @@ -18,8 +18,9 @@ * License along with this program; if not, see . */ -#ifdef __x86_64 #include + +#ifdef __x86_64 #if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT) diff --git a/cipher/cipher-gcm-armv8-aarch64-ce.S b/cipher/cipher-gcm-armv8-aarch64-ce.S index b0c2cccc..877207d3 100644 --- a/cipher/cipher-gcm-armv8-aarch64-ce.S +++ b/cipher/cipher-gcm-armv8-aarch64-ce.S @@ -385,6 +385,8 @@ _gcry_ghash_setup_armv8_ce_pmull: GET_DATA_POINTER(x2, .Lrconst) + eor vZZ.16b, vZZ.16b, vZZ.16b + /* H¹ */ ld1 {rh1.16b}, [x0] rbit rh1.16b, rh1.16b diff --git a/cipher/cipher.c b/cipher/cipher.c index ab3e4240..bd571367 100644 --- a/cipher/cipher.c +++ b/cipher/cipher.c @@ -1125,7 +1125,7 @@ _gcry_cipher_encrypt (gcry_cipher_hd_t h, void *out, size_t outsize, if (h->mode != GCRY_CIPHER_MODE_NONE && !h->marks.key) { - log_error ("cipher_decrypt: key not set\n"); + log_error ("cipher_encrypt: key not set\n"); return GPG_ERR_MISSING_KEY; } diff --git a/cipher/ecc-common.h b/cipher/ecc-common.h index 7fbc950a..b8b7c763 100644 --- a/cipher/ecc-common.h +++ b/cipher/ecc-common.h @@ -125,4 +125,16 @@ gpg_err_code_t _gcry_ecc_gost_verify (gcry_mpi_t input, mpi_ec_t ec, gcry_mpi_t r, gcry_mpi_t s); +/*-- ecc-sm2.c --*/ +gpg_err_code_t _gcry_ecc_sm2_encrypt (gcry_sexp_t *r_ciph, + gcry_mpi_t input, mpi_ec_t ec); +gpg_err_code_t _gcry_ecc_sm2_decrypt (gcry_sexp_t *r_plain, + gcry_sexp_t data_list, mpi_ec_t ec); +gpg_err_code_t _gcry_ecc_sm2_sign (gcry_mpi_t input, mpi_ec_t ec, + gcry_mpi_t r, gcry_mpi_t s, + int flags, int hashalgo); +gpg_err_code_t _gcry_ecc_sm2_verify (gcry_mpi_t input, mpi_ec_t ec, + gcry_mpi_t r, gcry_mpi_t s); + + #endif /*GCRY_ECC_COMMON_H*/ diff --git a/cipher/ecc-curves.c b/cipher/ecc-curves.c index 52872c5e..92850ac7 100644 --- a/cipher/ecc-curves.c +++ b/cipher/ecc-curves.c @@ -115,6 +115,8 @@ static const struct { "secp256k1", "1.3.132.0.10" }, + { "sm2p256v1", "1.2.156.10197.1.301" }, + { NULL, NULL} }; @@ -512,6 +514,18 @@ static const ecc_domain_parms_t domain_parms[] = 1 }, + { + "sm2p256v1", 256, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xfffffffeffffffffffffffffffffffffffffffff00000000ffffffffffffffff", + "0xfffffffeffffffffffffffffffffffffffffffff00000000fffffffffffffffc", + "0x28e9fa9e9d9f5e344d5a9e4bcf6509a7f39789f515ab8f92ddbcbd414d940e93", + "0xfffffffeffffffffffffffffffffffff7203df6b21c6052b53bbf40939d54123", + "0x32c4ae2c1f1981195f9904466a39c9948fe30bbff2660be1715a4589334c74c7", + "0xbc3736a2f4f6779c59bdcee36b692153d0a9877cc62a474002df32e52139f0a0", + 1 + }, + { NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL } }; @@ -1044,6 +1058,7 @@ mpi_ec_get_elliptic_curve (elliptic_curve_t *E, int *r_flags, goto leave; if (G) { + _gcry_mpi_point_init (&E->G); mpi_point_set (&E->G, G->x, G->y, G->z); mpi_point_set (G, NULL, NULL, NULL); mpi_point_release (G); diff --git a/cipher/ecc-gost.c b/cipher/ecc-gost.c index e9dfc597..44654a47 100644 --- a/cipher/ecc-gost.c +++ b/cipher/ecc-gost.c @@ -45,8 +45,7 @@ _gcry_ecc_gost_sign (gcry_mpi_t input, mpi_ec_t ec, gcry_mpi_t k, dr, sum, ke, x, e; mpi_point_struct I; gcry_mpi_t hash; - const void *abuf; - unsigned int abits, qbits; + unsigned int qbits; if (DBG_CIPHER) log_mpidump ("gost sign hash ", input ); @@ -54,18 +53,9 @@ _gcry_ecc_gost_sign (gcry_mpi_t input, mpi_ec_t ec, qbits = mpi_get_nbits (ec->n); /* Convert the INPUT into an MPI if needed. */ - if (mpi_is_opaque (input)) - { - abuf = mpi_get_opaque (input, &abits); - rc = _gcry_mpi_scan (&hash, GCRYMPI_FMT_USG, abuf, (abits+7)/8, NULL); - if (rc) - return rc; - if (abits > qbits) - mpi_rshift (hash, hash, abits - qbits); - } - else - hash = input; - + rc = _gcry_dsa_normalize_hash (input, &hash, qbits); + if (rc) + return rc; k = NULL; dr = mpi_alloc (0); diff --git a/cipher/ecc-sm2.c b/cipher/ecc-sm2.c new file mode 100644 index 00000000..a6341132 --- /dev/null +++ b/cipher/ecc-sm2.c @@ -0,0 +1,566 @@ +/* ecc-sm2.c - Elliptic Curve SM2 implementation + * Copyright (C) 2020 Tianjia Zhang + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "bithelp.h" +#include "mpi.h" +#include "cipher.h" +#include "context.h" +#include "ec-context.h" +#include "pubkey-internal.h" +#include "ecc-common.h" + +#define MPI_NBYTES(m) ((mpi_get_nbits(m) + 7) / 8) + + +/* Key derivation function from X9.63/SECG */ +static gpg_err_code_t +kdf_x9_63 (int algo, const void *in, size_t inlen, void *out, size_t outlen) +{ + gpg_err_code_t rc; + gcry_md_hd_t hd; + int mdlen; + u32 counter = 1; + u32 counter_be; + unsigned char *dgst; + unsigned char *pout = out; + size_t rlen = outlen; + size_t len; + + rc = _gcry_md_open (&hd, algo, 0); + if (rc) + return rc; + + mdlen = _gcry_md_get_algo_dlen (algo); + + while (rlen > 0) + { + counter_be = be_bswap32 (counter); /* cpu_to_be32 */ + counter++; + + _gcry_md_write (hd, in, inlen); + _gcry_md_write (hd, &counter_be, sizeof(counter_be)); + + dgst = _gcry_md_read (hd, algo); + if (dgst == NULL) + { + rc = GPG_ERR_DIGEST_ALGO; + break; + } + + len = mdlen < rlen ? mdlen : rlen; /* min(mdlen, rlen) */ + memcpy (pout, dgst, len); + rlen -= len; + pout += len; + + _gcry_md_reset (hd); + } + + _gcry_md_close (hd); + return rc; +} + + +/* _gcry_ecc_sm2_encrypt description: + * input: + * data[0] : octet string + * output: A new S-expression with the parameters: + * a: c1 : generated ephemeral public key (kG) + * b: c3 : Hash(x2 || IN || y2) + * c: c2 : cipher + * + * sm2_decrypt description: + * in contrast to encrypt + */ +gpg_err_code_t +_gcry_ecc_sm2_encrypt (gcry_sexp_t *r_ciph, gcry_mpi_t input, mpi_ec_t ec) +{ + gpg_err_code_t rc; + const int algo = GCRY_MD_SM3; + gcry_md_hd_t md = NULL; + int mdlen; + unsigned char *dgst; + gcry_mpi_t k = NULL; + mpi_point_struct kG, kP; + gcry_mpi_t x1, y1; + gcry_mpi_t x2, y2; + gcry_mpi_t x2y2 = NULL; + unsigned char *in = NULL; + unsigned int inlen; + unsigned char *raw; + unsigned int rawlen; + unsigned char *cipher = NULL; + int i; + + point_init (&kG); + point_init (&kP); + x1 = mpi_new (0); + y1 = mpi_new (0); + x2 = mpi_new (0); + y2 = mpi_new (0); + + in = _gcry_mpi_get_buffer (input, 0, &inlen, NULL); + if (!in) + { + rc = gpg_err_code_from_syserror (); + goto leave; + } + + cipher = xtrymalloc (inlen); + if (!cipher) + { + rc = gpg_err_code_from_syserror (); + goto leave; + } + + /* rand k in [1, n-1] */ + k = _gcry_dsa_gen_k (ec->n, GCRY_VERY_STRONG_RANDOM); + + /* [k]G = (x1, y1) */ + _gcry_mpi_ec_mul_point (&kG, k, ec->G, ec); + if (_gcry_mpi_ec_get_affine (x1, y1, &kG, ec)) + { + if (DBG_CIPHER) + log_debug ("Bad check: kG can not be a Point at Infinity!\n"); + rc = GPG_ERR_INV_DATA; + goto leave; + } + + /* [k]P = (x2, y2) */ + _gcry_mpi_ec_mul_point (&kP, k, ec->Q, ec); + if (_gcry_mpi_ec_get_affine (x2, y2, &kP, ec)) + { + rc = GPG_ERR_INV_DATA; + goto leave; + } + + /* t = KDF(x2 || y2, klen) */ + x2y2 = _gcry_mpi_ec_ec2os (&kP, ec); + raw = mpi_get_opaque (x2y2, &rawlen); + rawlen = (rawlen + 7) / 8; + + /* skip the prefix '0x04' */ + raw += 1; + rawlen -= 1; + rc = kdf_x9_63 (algo, raw, rawlen, cipher, inlen); + if (rc) + goto leave; + + /* cipher = t xor in */ + for (i = 0; i < inlen; i++) + cipher[i] ^= in[i]; + + /* hash(x2 || IN || y2) */ + mdlen = _gcry_md_get_algo_dlen (algo); + rc = _gcry_md_open (&md, algo, 0); + if (rc) + goto leave; + _gcry_md_write (md, raw, MPI_NBYTES(x2)); + _gcry_md_write (md, in, inlen); + _gcry_md_write (md, raw + MPI_NBYTES(x2), MPI_NBYTES(y2)); + dgst = _gcry_md_read (md, algo); + if (dgst == NULL) + { + rc = GPG_ERR_DIGEST_ALGO; + goto leave; + } + + if (!rc) + { + gcry_mpi_t c1; + gcry_mpi_t c3; + gcry_mpi_t c2; + + c3 = mpi_new (0); + c2 = mpi_new (0); + + c1 = _gcry_ecc_ec2os (x1, y1, ec->p); + _gcry_mpi_set_opaque_copy (c3, dgst, mdlen * 8); + _gcry_mpi_set_opaque_copy (c2, cipher, inlen * 8); + + rc = sexp_build (r_ciph, NULL, + "(enc-val(flags sm2)(sm2(a%M)(b%M)(c%M)))", + c1, c3, c2); + + mpi_free (c1); + mpi_free (c3); + mpi_free (c2); + } + +leave: + _gcry_md_close (md); + mpi_free (x2y2); + mpi_free (k); + + point_free (&kG); + point_free (&kP); + mpi_free (x1); + mpi_free (y1); + mpi_free (x2); + mpi_free (y2); + + xfree (cipher); + xfree (in); + + return rc; +} + + +gpg_err_code_t +_gcry_ecc_sm2_decrypt (gcry_sexp_t *r_plain, gcry_sexp_t data_list, mpi_ec_t ec) +{ + gpg_err_code_t rc; + gcry_mpi_t data_c1 = NULL; + gcry_mpi_t data_c3 = NULL; + gcry_mpi_t data_c2 = NULL; + + /* + * Extract the data. + */ + rc = sexp_extract_param (data_list, NULL, "/a/b/c", + &data_c1, &data_c3, &data_c2, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_printmpi ("ecc_decrypt d_c1", data_c1); + log_printmpi ("ecc_decrypt d_c3", data_c3); + log_printmpi ("ecc_decrypt d_c2", data_c2); + } + + { + const int algo = GCRY_MD_SM3; + gcry_md_hd_t md = NULL; + int mdlen; + unsigned char *dgst; + mpi_point_struct c1; + mpi_point_struct kP; + gcry_mpi_t x2, y2; + gcry_mpi_t x2y2 = NULL; + unsigned char *in = NULL; + unsigned int inlen; + unsigned char *plain = NULL; + unsigned char *raw; + unsigned int rawlen; + unsigned char *c3 = NULL; + unsigned int c3_len; + int i; + + point_init (&c1); + point_init (&kP); + x2 = mpi_new (0); + y2 = mpi_new (0); + + in = mpi_get_opaque (data_c2, &inlen); + inlen = (inlen + 7) / 8; + plain = xtrymalloc (inlen); + if (!plain) + { + rc = gpg_err_code_from_syserror (); + goto leave_main; + } + + rc = _gcry_ecc_os2ec (&c1, data_c1); + if (rc) + goto leave_main; + + if (!_gcry_mpi_ec_curve_point (&c1, ec)) + { + rc = GPG_ERR_INV_DATA; + goto leave_main; + } + + /* [d]C1 = (x2, y2), C1 = [k]G */ + _gcry_mpi_ec_mul_point (&kP, ec->d, &c1, ec); + if (_gcry_mpi_ec_get_affine (x2, y2, &kP, ec)) + { + rc = GPG_ERR_INV_DATA; + goto leave_main; + } + + /* t = KDF(x2 || y2, inlen) */ + x2y2 = _gcry_mpi_ec_ec2os (&kP, ec); + raw = mpi_get_opaque (x2y2, &rawlen); + rawlen = (rawlen + 7) / 8; + /* skip the prefix '0x04' */ + raw += 1; + rawlen -= 1; + rc = kdf_x9_63 (algo, raw, rawlen, plain, inlen); + if (rc) + goto leave_main; + + /* plain = C2 xor t */ + for (i = 0; i < inlen; i++) + plain[i] ^= in[i]; + + /* Hash(x2 || IN || y2) == C3 */ + mdlen = _gcry_md_get_algo_dlen (algo); + rc = _gcry_md_open (&md, algo, 0); + if (rc) + goto leave_main; + _gcry_md_write (md, raw, MPI_NBYTES(x2)); + _gcry_md_write (md, plain, inlen); + _gcry_md_write (md, raw + MPI_NBYTES(x2), MPI_NBYTES(y2)); + dgst = _gcry_md_read (md, algo); + if (dgst == NULL) + { + memset (plain, 0, inlen); + rc = GPG_ERR_DIGEST_ALGO; + goto leave_main; + } + c3 = mpi_get_opaque (data_c3, &c3_len); + c3_len = (c3_len + 7) / 8; + if (c3_len != mdlen || memcmp (dgst, c3, c3_len) != 0) + { + memset (plain, 0, inlen); + rc = GPG_ERR_INV_DATA; + goto leave_main; + } + + if (!rc) + { + gcry_mpi_t r; + + r = mpi_new (inlen * 8); + _gcry_mpi_set_buffer (r, plain, inlen, 0); + + rc = sexp_build (r_plain, NULL, "(value %m)", r); + + mpi_free (r); + } + + leave_main: + _gcry_md_close (md); + mpi_free (x2y2); + xfree (plain); + + point_free (&c1); + point_free (&kP); + mpi_free (x2); + mpi_free (y2); + } + + leave: + _gcry_mpi_release (data_c1); + _gcry_mpi_release (data_c3); + _gcry_mpi_release (data_c2); + + return rc; +} + + +/* Compute an SM2 signature. + * Return the signature struct (r,s) from the message hash. The caller + * must have allocated R and S. + */ +gpg_err_code_t +_gcry_ecc_sm2_sign (gcry_mpi_t input, mpi_ec_t ec, + gcry_mpi_t r, gcry_mpi_t s, + int flags, int hashalgo) +{ + gpg_err_code_t rc = 0; + int extraloops = 0; + gcry_mpi_t hash; + const void *abuf; + unsigned int abits, qbits; + gcry_mpi_t tmp = NULL; + gcry_mpi_t k = NULL; + gcry_mpi_t rk = NULL; + mpi_point_struct kG; + gcry_mpi_t x1; + + if (DBG_CIPHER) + log_mpidump ("sm2 sign hash ", input); + + qbits = mpi_get_nbits (ec->n); + + /* Convert the INPUT into an MPI if needed. */ + rc = _gcry_dsa_normalize_hash (input, &hash, qbits); + if (rc) + return rc; + + point_init (&kG); + x1 = mpi_new (0); + rk = mpi_new (0); + tmp = mpi_new (0); + + for (;;) + { + /* rand k in [1, n-1] */ + if ((flags & PUBKEY_FLAG_RFC6979) && hashalgo) + { + /* Use Pornin's method for deterministic DSA. If this + flag is set, it is expected that HASH is an opaque + MPI with the to be signed hash. That hash is also + used as h1 from 3.2.a. */ + if (!mpi_is_opaque (input)) + { + rc = GPG_ERR_CONFLICT; + goto leave; + } + + abuf = mpi_get_opaque (input, &abits); + rc = _gcry_dsa_gen_rfc6979_k (&k, ec->n, ec->d, + abuf, (abits+7)/8, + hashalgo, extraloops); + if (rc) + goto leave; + extraloops++; + } + else + k = _gcry_dsa_gen_k (ec->n, GCRY_VERY_STRONG_RANDOM); + + _gcry_dsa_modify_k (k, ec->n, qbits); + + /* [k]G = (x1, y1) */ + _gcry_mpi_ec_mul_point (&kG, k, ec->G, ec); + if (_gcry_mpi_ec_get_affine (x1, NULL, &kG, ec)) + { + rc = GPG_ERR_INV_DATA; + goto leave; + } + + /* r = (e + x1) % n */ + mpi_addm (r, hash, x1, ec->n); + + /* r != 0 && r + k != n */ + if (mpi_cmp_ui (r, 0) == 0) + continue; + mpi_add (rk, r, k); + if (mpi_cmp (rk, ec->n) == 0) + continue; + + /* s = ((d + 1)^-1 * (k - rd)) % n */ + mpi_addm (s, ec->d, GCRYMPI_CONST_ONE, ec->n); + mpi_invm (s, s, ec->n); + mpi_mulm (tmp, r, ec->d, ec->n); + mpi_subm (tmp, k, tmp, ec->n); + mpi_mulm (s, s, tmp, ec->n); + + /* s != 0 */ + if (mpi_cmp_ui (s, 0) == 0) + continue; + + break; /* Okay */ + } + + if (DBG_CIPHER) + { + log_mpidump ("sm2 sign result r ", r); + log_mpidump ("sm2 sign result s ", s); + } + +leave: + point_free (&kG); + mpi_free (k); + mpi_free (x1); + mpi_free (rk); + mpi_free (tmp); + + if (hash != input) + mpi_free (hash); + + return rc; +} + + +/* Verify an SM2 signature. + * Check if R and S verifies INPUT. + */ +gpg_err_code_t +_gcry_ecc_sm2_verify (gcry_mpi_t input, mpi_ec_t ec, + gcry_mpi_t r, gcry_mpi_t s) +{ + gpg_err_code_t err = 0; + gcry_mpi_t hash = NULL; + gcry_mpi_t t = NULL; + mpi_point_struct sG, tP; + gcry_mpi_t x1, y1; + unsigned int nbits; + + /* r, s within [1, n-1] */ + if (mpi_cmp_ui (r, 1) < 0 || mpi_cmp (r, ec->n) > 0) + return GPG_ERR_BAD_SIGNATURE; + if (mpi_cmp_ui (s, 1) < 0 || mpi_cmp (s, ec->n) > 0) + return GPG_ERR_BAD_SIGNATURE; + + nbits = mpi_get_nbits (ec->n); + err = _gcry_dsa_normalize_hash (input, &hash, nbits); + if (err) + return err; + + point_init (&sG); + point_init (&tP); + x1 = mpi_new (0); + y1 = mpi_new (0); + t = mpi_new (0); + + /* t = (r + s) % n, t != 0 */ + mpi_addm (t, r, s, ec->n); + if (mpi_cmp_ui (t, 0) == 0) + { + err = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + + /* sG + tP = (x1, y1) */ + _gcry_mpi_ec_mul_point (&sG, s, ec->G, ec); + _gcry_mpi_ec_mul_point (&tP, t, ec->Q, ec); + _gcry_mpi_ec_add_points (&sG, &sG, &tP, ec); + if (_gcry_mpi_ec_get_affine (x1, y1, &sG, ec)) + { + err = GPG_ERR_INV_DATA; + goto leave; + } + + /* R = (e + x1) % n */ + mpi_addm (t, hash, x1, ec->n); + + /* check R == r */ + if (mpi_cmp (t, r)) + { + if (DBG_CIPHER) + { + log_mpidump (" R", t); + log_mpidump (" r", r); + log_mpidump (" s", s); + } + err = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + if (DBG_CIPHER) + log_debug ("sm2 verify: Accepted\n"); + + leave: + point_free (&sG); + point_free (&tP); + mpi_free (x1); + mpi_free (y1); + mpi_free (t); + if (hash != input) + mpi_free (hash); + + return err; +} diff --git a/cipher/ecc.c b/cipher/ecc.c index 921510cc..f2e8bf00 100644 --- a/cipher/ecc.c +++ b/cipher/ecc.c @@ -69,6 +69,7 @@ static const char *ecc_names[] = "ecdh", "eddsa", "gost", + "sm2", NULL, }; @@ -577,7 +578,7 @@ ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey) (&curve_flags, NULL, ((flags & PUBKEY_FLAG_PARAM) && (flags & PUBKEY_FLAG_EDDSA))? "(flags param eddsa)" : - ((flags & PUBKEY_FLAG_PARAM) && (flags & PUBKEY_FLAG_EDDSA))? + ((flags & PUBKEY_FLAG_PARAM) && (flags & PUBKEY_FLAG_DJB_TWEAK))? "(flags param djb-tweak)" : ((flags & PUBKEY_FLAG_PARAM))? "(flags param)" : ((flags & PUBKEY_FLAG_EDDSA))? @@ -723,6 +724,14 @@ ecc_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms) rc = sexp_build (r_sig, NULL, "(sig-val(gost(r%M)(s%M)))", sig_r, sig_s); } + else if ((ctx.flags & PUBKEY_FLAG_SM2)) + { + rc = _gcry_ecc_sm2_sign (data, ec, sig_r, sig_s, + ctx.flags, ctx.hash_algo); + if (!rc) + rc = sexp_build (r_sig, NULL, + "(sig-val(sm2(r%M)(s%M)))", sig_r, sig_s); + } else { rc = _gcry_ecc_ecdsa_sign (data, ec, sig_r, sig_s, @@ -811,29 +820,13 @@ ecc_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms) { rc = _gcry_ecc_gost_verify (data, ec, sig_r, sig_s); } + else if ((sigflags & PUBKEY_FLAG_SM2)) + { + rc = _gcry_ecc_sm2_verify (data, ec, sig_r, sig_s); + } else { - if (mpi_is_opaque (data)) - { - const void *abuf; - unsigned int abits, qbits; - gcry_mpi_t a; - - qbits = mpi_get_nbits (ec->n); - - abuf = mpi_get_opaque (data, &abits); - rc = _gcry_mpi_scan (&a, GCRYMPI_FMT_USG, abuf, (abits+7)/8, NULL); - if (!rc) - { - if (abits > qbits) - mpi_rshift (a, a, abits - qbits); - - rc = _gcry_ecc_ecdsa_verify (a, ec, sig_r, sig_s); - _gcry_mpi_release (a); - } - } - else - rc = _gcry_ecc_ecdsa_verify (data, ec, sig_r, sig_s); + rc = _gcry_ecc_ecdsa_verify (data, ec, sig_r, sig_s); } leave: @@ -862,20 +855,30 @@ ecc_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms) * dG - public long-term key * k - ephemeral scalar * kG - ephemeral public key - * dkG - shared secret + * S - optional salt value currently used with GOST + * kSdG - shared secret * * ecc_encrypt_raw description: - * input: - * data[0] : private scalar (k) + * input: An S-expression with: + * a private scalar (k) + * an optional salt value (S) * output: A new S-expression with the parameters: - * s : shared point (kdG) + * s : shared point (kSdG) * e : generated ephemeral public key (kG) * + * For information about the format of the input S-expression + * see _gcry_pk_util_data_to_mpi(). + * * ecc_decrypt_raw description: * input: - * data[0] : a point kG (ephemeral public key) + * data[0] : a point kG (ephemeral public key) with an optional + * salt value (S) * output: - * result[0] : shared point (kdG) + * result[0] : shared point (kSdG) + * + * The input format of the salt value to be used with ecc_decrypt_raw() + * depends on the underlying public key algorithm. For GOST keys, the salt + * value should be passed in the low bits of the input MPI value. */ static gcry_err_code_t ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) @@ -886,6 +889,7 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) gcry_mpi_t mpi_s = NULL; gcry_mpi_t mpi_e = NULL; gcry_mpi_t data = NULL; + gcry_mpi_t salt = NULL; mpi_ec_t ec = NULL; int flags = 0; int no_error_on_infinity; @@ -929,6 +933,29 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) mpi_clear_bit (data, i); mpi_set_highbit (data, ec->nbits - 1); } + + /* For GOST extract the UKM value. If its length is unspecified + take 64 bits as default. */ + if ((flags & PUBKEY_FLAG_GOST)) + { + unsigned int ukm_blen = ctx.saltlen ? ctx.saltlen : 64; + if (_gcry_mpi_get_nbits (data) < ukm_blen) + { + rc = GPG_ERR_TOO_SHORT; + goto leave; + } + salt = _gcry_mpi_copy (data); + if (!salt) + { + rc = gpg_error_from_syserror (); + goto leave; + } + _gcry_mpi_clear_highbit (salt, ukm_blen); + if (DBG_CIPHER) + log_printmpi ("UKM: ", salt); + _gcry_mpi_rshift (data, data, ukm_blen); + } + if (DBG_CIPHER) log_mpidump ("ecc_encrypt data", data); @@ -938,6 +965,13 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) goto leave; } + if ((ctx.flags & PUBKEY_FLAG_SM2)) + { + /* All encryption will be done, return it. */ + rc = _gcry_ecc_sm2_encrypt (r_ciph, data, ec); + goto leave; + } + /* The following is false: assert( mpi_cmp_ui( R.x, 1 )==0 );, so */ { mpi_point_struct R; /* Result that we return. */ @@ -957,6 +991,10 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) /* R = kQ <=> R = kdG */ _gcry_mpi_ec_mul_point (&R, data, ec->Q, ec); + /* Multiply the resulting point by a salt value if any. */ + if (salt && gcry_mpi_cmp_ui (salt, 0)) + _gcry_mpi_ec_mul_point (&R, salt, &R, ec); + if (_gcry_mpi_ec_get_affine (x, y, &R, ec)) { /* @@ -1019,7 +1057,14 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) } if (!rc) - rc = sexp_build (r_ciph, NULL, "(enc-val(ecdh(s%m)(e%m)))", mpi_s, mpi_e); + { + if (DBG_CIPHER) + { + log_printmpi ("ecc_encrypt res", mpi_s); + log_printmpi ("ecc_encrypt public key e", mpi_e); + } + rc = sexp_build (r_ciph, NULL, "(enc-val(ecdh(s%m)(e%m)))", mpi_s, mpi_e); + } leave: _gcry_mpi_release (data); @@ -1027,6 +1072,7 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) _gcry_mpi_release (mpi_e); _gcry_mpi_ec_free (ec); _gcry_pk_util_free_encoding_ctx (&ctx); + _gcry_mpi_release (salt); if (DBG_CIPHER) log_debug ("ecc_encrypt => %s\n", gpg_strerror (rc)); return rc; @@ -1034,9 +1080,10 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) /* input: - * data[0] : a point kG (ephemeral public key) + * data[0] : a point kG (ephemeral public key) with an optional + * salt value (S) * output: - * resaddr[0] : shared point kdG + * resaddr[0] : shared point kSdG * * see ecc_encrypt_raw for details. */ @@ -1052,6 +1099,7 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) mpi_point_struct kG; mpi_point_struct R; gcry_mpi_t r = NULL; + gcry_mpi_t salt = NULL; int flags = 0; int enable_specific_point_validation; @@ -1061,18 +1109,6 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_DECRYPT, (nbits = ecc_get_nbits (keyparms))); - /* - * Extract the data. - */ - rc = _gcry_pk_util_preparse_encval (s_data, ecc_names, &l1, &ctx); - if (rc) - goto leave; - rc = sexp_extract_param (l1, NULL, "/e", &data_e, NULL); - if (rc) - goto leave; - if (DBG_CIPHER) - log_printmpi ("ecc_decrypt d_e", data_e); - /* * Extract the key. */ @@ -1086,11 +1122,58 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) goto leave; } + /* + * Extract the data. + */ + rc = _gcry_pk_util_preparse_encval (s_data, ecc_names, &l1, &ctx); + if (rc) + goto leave; + if ((ctx.flags & PUBKEY_FLAG_SM2)) + { + /* All decryption will be done, return it. */ + rc = _gcry_ecc_sm2_decrypt (r_plain, l1, ec); + goto leave; + } + else + { + rc = sexp_extract_param (l1, NULL, "/e", &data_e, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + log_printmpi ("ecc_decrypt d_e", data_e); + } + if (ec->dialect == ECC_DIALECT_SAFECURVE || (flags & PUBKEY_FLAG_DJB_TWEAK)) enable_specific_point_validation = 1; else enable_specific_point_validation = 0; + /* For GOST extract the UKM value. */ + if ((flags & PUBKEY_FLAG_GOST)) + { + /* Expect the uncompressed point format 0x04... */ + int key_len = 2*nbits/8 + 1; + int data_len = (_gcry_mpi_get_nbits (data_e)+7)/8; + int ukm_blen = (data_len - key_len) * 8; + if (ukm_blen < 64) + { + rc = GPG_ERR_TOO_SHORT; + goto leave; + } + salt = _gcry_mpi_copy (data_e); + if (!salt) + { + rc = gpg_error_from_syserror (); + goto leave; + } + _gcry_mpi_clear_highbit (salt, ukm_blen); + if (DBG_CIPHER) + log_printmpi ("UKM: ", salt); + _gcry_mpi_rshift (data_e, data_e, ukm_blen); + if (DBG_CIPHER) + log_printmpi ("ecc_decrypt d_e", data_e); + } + /* * Compute the plaintext. */ @@ -1128,6 +1211,10 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) /* R = dkG */ _gcry_mpi_ec_mul_point (&R, ec->d, &kG, ec); + /* Multiply the resulting point by a salt value if any. */ + if (salt && gcry_mpi_cmp_ui (salt, 0)) + _gcry_mpi_ec_mul_point (&R, salt, &R, ec); + /* The following is false: assert( mpi_cmp_ui( R.x, 1 )==0 );, so: */ { gcry_mpi_t x, y; @@ -1203,6 +1290,7 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) sexp_release (l1); _gcry_mpi_ec_free (ec); _gcry_pk_util_free_encoding_ctx (&ctx); + _gcry_mpi_release (salt); if (DBG_CIPHER) log_debug ("ecc_decrypt => %s\n", gpg_strerror (rc)); return rc; @@ -1712,7 +1800,7 @@ gcry_pk_spec_t _gcry_pubkey_spec_ecc = GCRY_PK_ECC, { 0, 1 }, (GCRY_PK_USAGE_SIGN | GCRY_PK_USAGE_ENCR), "ECC", ecc_names, - "pabgnhq", "pabgnhqd", "sw", "rs", "pabgnhq", + "pabgnhq", "pabgnhqd", "se", "rs", "pabgnhq", ecc_generate, ecc_check_secret_key, ecc_encrypt_raw, diff --git a/cipher/pubkey-util.c b/cipher/pubkey-util.c index 4a6bf462..621000ba 100644 --- a/cipher/pubkey-util.c +++ b/cipher/pubkey-util.c @@ -81,6 +81,11 @@ _gcry_pk_util_parse_flaglist (gcry_sexp_t list, encoding = PUBKEY_ENC_RAW; flags |= PUBKEY_FLAG_RAW_FLAG; /* Explicitly given. */ } + else if (!memcmp (s, "sm2", 3)) + { + encoding = PUBKEY_ENC_RAW; + flags |= PUBKEY_FLAG_SM2 | PUBKEY_FLAG_RAW_FLAG; + } else if (!igninvflag) rc = GPG_ERR_INV_FLAG; break; @@ -429,6 +434,8 @@ _gcry_pk_util_preparse_sigval (gcry_sexp_t s_sig, const char **algo_names, *r_eccflags = PUBKEY_FLAG_EDDSA; if (!strcmp (name, "gost")) *r_eccflags = PUBKEY_FLAG_GOST; + if (!strcmp (name, "sm2")) + *r_eccflags = PUBKEY_FLAG_SM2; } *r_parms = l2; @@ -650,7 +657,7 @@ _gcry_pk_util_free_encoding_ctx (struct pk_encoding_ctx *ctx) () or (data - [(flags [raw, direct, pkcs1, oaep, pss, no-blinding, rfc6979, eddsa])] + [(flags [raw, direct, pkcs1, oaep, pss, no-blinding, rfc6979, eddsa, gost])] [(hash )] [(value )] [(hash-algo )] @@ -667,6 +674,7 @@ _gcry_pk_util_free_encoding_ctx (struct pk_encoding_ctx *ctx) LABEL is specific to OAEP. SALT-LENGTH is for PSS it is limited to 16384 bytes. + For GOST a SALT-LENGTH means the length of UKM in bits. RANDOM-OVERRIDE is used to replace random nonces for regression testing. */ @@ -816,6 +824,26 @@ _gcry_pk_util_data_to_mpi (gcry_sexp_t input, gcry_mpi_t *ret_mpi, *ret_mpi = sexp_nth_mpi (lvalue, 1, GCRYMPI_FMT_USG); if (!*ret_mpi) rc = GPG_ERR_INV_OBJ; + + if (parsed_flags & PUBKEY_FLAG_GOST) + { + gcry_sexp_t list; + /* Get SALT-LENGTH (UKM length). */ + list = sexp_find_token (ldata, "salt-length", 0); + if (list) + { + s = sexp_nth_data (list, 1, &n); + if (!s) + { + rc = GPG_ERR_NO_OBJ; + goto leave; + } + ctx->saltlen = (unsigned int)strtoul (s, NULL, 10); + sexp_release (list); + } + else + ctx->saltlen = 0; + } } else if (ctx->encoding == PUBKEY_ENC_PKCS1 && lvalue && ctx->op == PUBKEY_OP_ENCRYPT) diff --git a/cipher/rijndael-ppc.c b/cipher/rijndael-ppc.c index 7c349f8b..a8bcae46 100644 --- a/cipher/rijndael-ppc.c +++ b/cipher/rijndael-ppc.c @@ -51,17 +51,27 @@ typedef union #define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE -#define ALIGNED_LOAD(in_ptr) \ - (vec_aligned_ld (0, (const unsigned char *)(in_ptr))) +#define ALIGNED_LOAD(in_ptr, offs) \ + (asm_aligned_ld ((offs) * 16, (const void *)(in_ptr))) -#define ALIGNED_STORE(out_ptr, vec) \ - (vec_aligned_st ((vec), 0, (unsigned char *)(out_ptr))) +#define ALIGNED_STORE(out_ptr, offs, vec) \ + (asm_aligned_st ((vec), (offs) * 16, (void *)(out_ptr))) -#define VEC_LOAD_BE(in_ptr, bige_const) \ - (vec_load_be (0, (const unsigned char *)(in_ptr), bige_const)) +#define VEC_BE_SWAP(vec, bige_const) (asm_be_swap ((vec), (bige_const))) -#define VEC_STORE_BE(out_ptr, vec, bige_const) \ - (vec_store_be ((vec), 0, (unsigned char *)(out_ptr), bige_const)) +#define VEC_LOAD_BE(in_ptr, offs, bige_const) \ + (asm_be_swap (asm_load_be_noswap ((offs) * 16, (const void *)(in_ptr)), \ + bige_const)) + +#define VEC_LOAD_BE_NOSWAP(in_ptr, offs) \ + (asm_load_be_noswap ((offs) * 16, (const unsigned char *)(in_ptr))) + +#define VEC_STORE_BE(out_ptr, offs, vec, bige_const) \ + (asm_store_be_noswap (asm_be_swap ((vec), (bige_const)), (offs) * 16, \ + (void *)(out_ptr))) + +#define VEC_STORE_BE_NOSWAP(out_ptr, offs, vec) \ + (asm_store_be_noswap ((vec), (offs) * 16, (void *)(out_ptr))) #define ROUND_KEY_VARIABLES \ @@ -69,166 +79,257 @@ typedef union #define PRELOAD_ROUND_KEYS(nrounds) \ do { \ - rkey0 = ALIGNED_LOAD(&rk[0]); \ - rkeylast = ALIGNED_LOAD(&rk[nrounds]); \ + rkey0 = ALIGNED_LOAD (rk, 0); \ + rkeylast = ALIGNED_LOAD (rk, nrounds); \ } while (0) - #define AES_ENCRYPT(blk, nrounds) \ do { \ blk ^= rkey0; \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[1])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[2])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[3])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[4])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[5])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[6])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[7])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[8])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[9])); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 1)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 2)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 3)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 4)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 5)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 6)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 7)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 8)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 9)); \ if (nrounds >= 12) \ { \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[10])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[11])); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 10)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 11)); \ if (rounds > 12) \ { \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[12])); \ - blk = vec_cipher_be (blk, ALIGNED_LOAD(&rk[13])); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 12)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 13)); \ } \ } \ - blk = vec_cipherlast_be (blk, rkeylast); \ + blk = asm_cipherlast_be (blk, rkeylast); \ } while (0) - #define AES_DECRYPT(blk, nrounds) \ do { \ blk ^= rkey0; \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[1])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[2])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[3])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[4])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[5])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[6])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[7])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[8])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[9])); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 1)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 2)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 3)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 4)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 5)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 6)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 7)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 8)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 9)); \ if (nrounds >= 12) \ { \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[10])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[11])); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 10)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 11)); \ if (rounds > 12) \ { \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[12])); \ - blk = vec_ncipher_be (blk, ALIGNED_LOAD(&rk[13])); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 12)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 13)); \ } \ } \ - blk = vec_ncipherlast_be (blk, rkeylast); \ + blk = asm_ncipherlast_be (blk, rkeylast); \ } while (0) +#define ROUND_KEY_VARIABLES_ALL \ + block rkey0, rkey1, rkey2, rkey3, rkey4, rkey5, rkey6, rkey7, rkey8, \ + rkey9, rkey10, rkey11, rkey12, rkey13, rkeylast + +#define PRELOAD_ROUND_KEYS_ALL(nrounds) \ + do { \ + rkey0 = ALIGNED_LOAD (rk, 0); \ + rkey1 = ALIGNED_LOAD (rk, 1); \ + rkey2 = ALIGNED_LOAD (rk, 2); \ + rkey3 = ALIGNED_LOAD (rk, 3); \ + rkey4 = ALIGNED_LOAD (rk, 4); \ + rkey5 = ALIGNED_LOAD (rk, 5); \ + rkey6 = ALIGNED_LOAD (rk, 6); \ + rkey7 = ALIGNED_LOAD (rk, 7); \ + rkey8 = ALIGNED_LOAD (rk, 8); \ + rkey9 = ALIGNED_LOAD (rk, 9); \ + if (nrounds >= 12) \ + { \ + rkey10 = ALIGNED_LOAD (rk, 10); \ + rkey11 = ALIGNED_LOAD (rk, 11); \ + if (rounds > 12) \ + { \ + rkey12 = ALIGNED_LOAD (rk, 12); \ + rkey13 = ALIGNED_LOAD (rk, 13); \ + } \ + } \ + rkeylast = ALIGNED_LOAD (rk, nrounds); \ + } while (0) + +#define AES_ENCRYPT_ALL(blk, nrounds) \ + do { \ + blk ^= rkey0; \ + blk = asm_cipher_be (blk, rkey1); \ + blk = asm_cipher_be (blk, rkey2); \ + blk = asm_cipher_be (blk, rkey3); \ + blk = asm_cipher_be (blk, rkey4); \ + blk = asm_cipher_be (blk, rkey5); \ + blk = asm_cipher_be (blk, rkey6); \ + blk = asm_cipher_be (blk, rkey7); \ + blk = asm_cipher_be (blk, rkey8); \ + blk = asm_cipher_be (blk, rkey9); \ + if (nrounds >= 12) \ + { \ + blk = asm_cipher_be (blk, rkey10); \ + blk = asm_cipher_be (blk, rkey11); \ + if (rounds > 12) \ + { \ + blk = asm_cipher_be (blk, rkey12); \ + blk = asm_cipher_be (blk, rkey13); \ + } \ + } \ + blk = asm_cipherlast_be (blk, rkeylast); \ + } while (0) + + +#ifdef WORDS_BIGENDIAN static const block vec_bswap32_const = { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }; +#else +static const block vec_bswap32_const_neg = + { ~3, ~2, ~1, ~0, ~7, ~6, ~5, ~4, ~11, ~10, ~9, ~8, ~15, ~14, ~13, ~12 }; +#endif static ASM_FUNC_ATTR_INLINE block -vec_aligned_ld(unsigned long offset, const unsigned char *ptr) +asm_aligned_ld(unsigned long offset, const void *ptr) { -#ifndef WORDS_BIGENDIAN block vec; - __asm__ ("lvx %0,%1,%2\n\t" - : "=v" (vec) - : "r" (offset), "r" ((uintptr_t)ptr) - : "memory"); + __asm__ volatile ("lvx %0,%1,%2\n\t" + : "=v" (vec) + : "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); return vec; -#else - return vec_vsx_ld (offset, ptr); -#endif } +static ASM_FUNC_ATTR_INLINE void +asm_aligned_st(block vec, unsigned long offset, void *ptr) +{ + __asm__ volatile ("stvx %0,%1,%2\n\t" + : + : "v" (vec), "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); +} static ASM_FUNC_ATTR_INLINE block -vec_load_be_const(void) +asm_load_be_const(void) { #ifndef WORDS_BIGENDIAN - return ~ALIGNED_LOAD(&vec_bswap32_const); + return ALIGNED_LOAD (&vec_bswap32_const_neg, 0); #else static const block vec_dummy = { 0 }; return vec_dummy; #endif } - static ASM_FUNC_ATTR_INLINE block -vec_load_be(unsigned long offset, const unsigned char *ptr, - block be_bswap_const) +asm_vperm1(block vec, block mask) { -#ifndef WORDS_BIGENDIAN - block vec; - /* GCC vec_vsx_ld is generating two instructions on little-endian. Use - * lxvw4x directly instead. */ - __asm__ ("lxvw4x %x0,%1,%2\n\t" - : "=wa" (vec) - : "r" (offset), "r" ((uintptr_t)ptr) - : "memory"); - __asm__ ("vperm %0,%1,%1,%2\n\t" - : "=v" (vec) - : "v" (vec), "v" (be_bswap_const)); - return vec; -#else - (void)be_bswap_const; - return vec_vsx_ld (offset, ptr); -#endif + block o; + __asm__ volatile ("vperm %0,%1,%1,%2\n\t" + : "=v" (o) + : "v" (vec), "v" (mask)); + return o; } - -static ASM_FUNC_ATTR_INLINE void -vec_aligned_st(block vec, unsigned long offset, unsigned char *ptr) +static ASM_FUNC_ATTR_INLINE block +asm_be_swap(block vec, block be_bswap_const) { + (void)be_bswap_const; #ifndef WORDS_BIGENDIAN - __asm__ ("stvx %0,%1,%2\n\t" - : - : "v" (vec), "r" (offset), "r" ((uintptr_t)ptr) - : "memory"); + return asm_vperm1 (vec, be_bswap_const); #else - vec_vsx_st (vec, offset, ptr); + return vec; #endif } +static ASM_FUNC_ATTR_INLINE block +asm_load_be_noswap(unsigned long offset, const void *ptr) +{ + block vec; + __asm__ volatile ("lxvw4x %x0,%1,%2\n\t" + : "=wa" (vec) + : "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); + /* NOTE: vec needs to be be-swapped using 'asm_be_swap' by caller */ + return vec; +} static ASM_FUNC_ATTR_INLINE void -vec_store_be(block vec, unsigned long offset, unsigned char *ptr, - block be_bswap_const) +asm_store_be_noswap(block vec, unsigned long offset, void *ptr) { -#ifndef WORDS_BIGENDIAN - /* GCC vec_vsx_st is generating two instructions on little-endian. Use - * stxvw4x directly instead. */ - __asm__ ("vperm %0,%1,%1,%2\n\t" - : "=v" (vec) - : "v" (vec), "v" (be_bswap_const)); - __asm__ ("stxvw4x %x0,%1,%2\n\t" - : - : "wa" (vec), "r" (offset), "r" ((uintptr_t)ptr) - : "memory"); -#else - (void)be_bswap_const; - vec_vsx_st (vec, offset, ptr); -#endif + /* NOTE: vec be-swapped using 'asm_be_swap' by caller */ + __asm__ volatile ("stxvw4x %x0,%1,%2\n\t" + : + : "wa" (vec), "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); } +static ASM_FUNC_ATTR_INLINE block +asm_add_uint128(block a, block b) +{ + block res; + __asm__ volatile ("vadduqm %0,%1,%2\n\t" + : "=v" (res) + : "v" (a), "v" (b)); + return res; +} static ASM_FUNC_ATTR_INLINE block -vec_add_uint128(block a, block b) +asm_xor(block a, block b) { -#if 1 block res; - /* Use assembly as GCC (v8.3) generates slow code for vec_vadduqm. */ - __asm__ ("vadduqm %0,%1,%2\n\t" - : "=v" (res) - : "v" (a), "v" (b)); + __asm__ volatile ("vxor %0,%1,%2\n\t" + : "=v" (res) + : "v" (a), "v" (b)); return res; -#else - return (block)vec_vadduqm((vector __uint128_t)a, (vector __uint128_t)b); -#endif +} + +static ASM_FUNC_ATTR_INLINE block +asm_cipher_be(block b, block rk) +{ + block o; + __asm__ volatile ("vcipher %0, %1, %2\n\t" + : "=v" (o) + : "v" (b), "v" (rk)); + return o; +} + +static ASM_FUNC_ATTR_INLINE block +asm_cipherlast_be(block b, block rk) +{ + block o; + __asm__ volatile ("vcipherlast %0, %1, %2\n\t" + : "=v" (o) + : "v" (b), "v" (rk)); + return o; +} + +static ASM_FUNC_ATTR_INLINE block +asm_ncipher_be(block b, block rk) +{ + block o; + __asm__ volatile ("vncipher %0, %1, %2\n\t" + : "=v" (o) + : "v" (b), "v" (rk)); + return o; +} + +static ASM_FUNC_ATTR_INLINE block +asm_ncipherlast_be(block b, block rk) +{ + block o; + __asm__ volatile ("vncipherlast %0, %1, %2\n\t" + : "=v" (o) + : "v" (b), "v" (rk)); + return o; } @@ -250,7 +351,7 @@ _gcry_aes_sbox4_ppc8(u32 fourbytes) void _gcry_aes_ppc8_setkey (RIJNDAEL_context *ctx, const byte *key) { - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); union { PROPERLY_ALIGNED_TYPE dummy; @@ -345,11 +446,11 @@ _gcry_aes_ppc8_setkey (RIJNDAEL_context *ctx, const byte *key) for (r = 0; r <= rounds; r++) { #ifndef WORDS_BIGENDIAN - VEC_STORE_BE(&ekey[r], ALIGNED_LOAD(&ekey[r]), bige_const); + VEC_STORE_BE(ekey, r, ALIGNED_LOAD (ekey, r), bige_const); #else - block rvec = ALIGNED_LOAD(&ekey[r]); - ALIGNED_STORE(&ekey[r], - vec_perm(rvec, rvec, vec_bswap32_const)); + block rvec = ALIGNED_LOAD (ekey, r); + ALIGNED_STORE (ekey, r, + vec_perm(rvec, rvec, vec_bswap32_const)); (void)bige_const; #endif } @@ -378,7 +479,7 @@ aes_ppc8_prepare_decryption (RIJNDAEL_context *ctx) rr = rounds; for (r = 0, rr = rounds; r <= rounds; r++, rr--) { - ALIGNED_STORE(&dkey[r], ALIGNED_LOAD(&ekey[rr])); + ALIGNED_STORE (dkey, r, ALIGNED_LOAD (ekey, rr)); } } @@ -394,18 +495,18 @@ unsigned int _gcry_aes_ppc8_encrypt (const RIJNDAEL_context *ctx, unsigned char *out, const unsigned char *in) { - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); const u128_t *rk = (u128_t *)&ctx->keyschenc; int rounds = ctx->rounds; ROUND_KEY_VARIABLES; block b; - b = VEC_LOAD_BE (in, bige_const); + b = VEC_LOAD_BE (in, 0, bige_const); PRELOAD_ROUND_KEYS (rounds); AES_ENCRYPT (b, rounds); - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); return 0; /* does not use stack */ } @@ -415,18 +516,18 @@ unsigned int _gcry_aes_ppc8_decrypt (const RIJNDAEL_context *ctx, unsigned char *out, const unsigned char *in) { - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); const u128_t *rk = (u128_t *)&ctx->keyschdec; int rounds = ctx->rounds; ROUND_KEY_VARIABLES; block b; - b = VEC_LOAD_BE (in, bige_const); + b = VEC_LOAD_BE (in, 0, bige_const); PRELOAD_ROUND_KEYS (rounds); AES_DECRYPT (b, rounds); - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); return 0; /* does not use stack */ } @@ -436,41 +537,41 @@ void _gcry_aes_ppc8_cfb_enc (void *context, unsigned char *iv_arg, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = context; const u128_t *rk = (u128_t *)&ctx->keyschenc; const u128_t *in = (const u128_t *)inbuf_arg; u128_t *out = (u128_t *)outbuf_arg; int rounds = ctx->rounds; - ROUND_KEY_VARIABLES; + ROUND_KEY_VARIABLES_ALL; block rkeylast_orig; block iv; - iv = VEC_LOAD_BE (iv_arg, bige_const); + iv = VEC_LOAD_BE (iv_arg, 0, bige_const); - PRELOAD_ROUND_KEYS (rounds); + PRELOAD_ROUND_KEYS_ALL (rounds); rkeylast_orig = rkeylast; for (; nblocks; nblocks--) { - rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, bige_const); + rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, 0, bige_const); - AES_ENCRYPT (iv, rounds); + AES_ENCRYPT_ALL (iv, rounds); - VEC_STORE_BE (out, iv, bige_const); + VEC_STORE_BE (out, 0, iv, bige_const); out++; in++; } - VEC_STORE_BE (iv_arg, iv, bige_const); + VEC_STORE_BE (iv_arg, 0, iv, bige_const); } void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv_arg, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = context; const u128_t *rk = (u128_t *)&ctx->keyschenc; const u128_t *in = (const u128_t *)inbuf_arg; @@ -483,7 +584,7 @@ void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv_arg, block b0, b1, b2, b3, b4, b5, b6, b7; block rkey; - iv = VEC_LOAD_BE (iv_arg, bige_const); + iv = VEC_LOAD_BE (iv_arg, 0, bige_const); PRELOAD_ROUND_KEYS (rounds); rkeylast_orig = rkeylast; @@ -491,34 +592,42 @@ void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv_arg, for (; nblocks >= 8; nblocks -= 8) { in0 = iv; - in1 = VEC_LOAD_BE (in + 0, bige_const); - in2 = VEC_LOAD_BE (in + 1, bige_const); - in3 = VEC_LOAD_BE (in + 2, bige_const); - in4 = VEC_LOAD_BE (in + 3, bige_const); - in5 = VEC_LOAD_BE (in + 4, bige_const); - in6 = VEC_LOAD_BE (in + 5, bige_const); - in7 = VEC_LOAD_BE (in + 6, bige_const); - iv = VEC_LOAD_BE (in + 7, bige_const); - - b0 = rkey0 ^ in0; - b1 = rkey0 ^ in1; - b2 = rkey0 ^ in2; - b3 = rkey0 ^ in3; - b4 = rkey0 ^ in4; - b5 = rkey0 ^ in5; - b6 = rkey0 ^ in6; - b7 = rkey0 ^ in7; + in1 = VEC_LOAD_BE_NOSWAP (in, 0); + in2 = VEC_LOAD_BE_NOSWAP (in, 1); + in3 = VEC_LOAD_BE_NOSWAP (in, 2); + in4 = VEC_LOAD_BE_NOSWAP (in, 3); + in1 = VEC_BE_SWAP (in1, bige_const); + in2 = VEC_BE_SWAP (in2, bige_const); + in5 = VEC_LOAD_BE_NOSWAP (in, 4); + in6 = VEC_LOAD_BE_NOSWAP (in, 5); + in3 = VEC_BE_SWAP (in3, bige_const); + in4 = VEC_BE_SWAP (in4, bige_const); + in7 = VEC_LOAD_BE_NOSWAP (in, 6); + iv = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + in5 = VEC_BE_SWAP (in5, bige_const); + in6 = VEC_BE_SWAP (in6, bige_const); + b0 = asm_xor (rkey0, in0); + b1 = asm_xor (rkey0, in1); + in7 = VEC_BE_SWAP (in7, bige_const); + iv = VEC_BE_SWAP (iv, bige_const); + b2 = asm_xor (rkey0, in2); + b3 = asm_xor (rkey0, in3); + b4 = asm_xor (rkey0, in4); + b5 = asm_xor (rkey0, in5); + b6 = asm_xor (rkey0, in6); + b7 = asm_xor (rkey0, in7); #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD(&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); \ - b4 = vec_cipher_be (b4, rkey); \ - b5 = vec_cipher_be (b5, rkey); \ - b6 = vec_cipher_be (b6, rkey); \ - b7 = vec_cipher_be (b7, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); \ + b4 = asm_cipher_be (b4, rkey); \ + b5 = asm_cipher_be (b5, rkey); \ + b6 = asm_cipher_be (b6, rkey); \ + b7 = asm_cipher_be (b7, rkey); DO_ROUND(1); DO_ROUND(2); @@ -542,48 +651,60 @@ void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv_arg, #undef DO_ROUND - rkey = rkeylast; - b0 = vec_cipherlast_be (b0, rkey ^ in1); - b1 = vec_cipherlast_be (b1, rkey ^ in2); - b2 = vec_cipherlast_be (b2, rkey ^ in3); - b3 = vec_cipherlast_be (b3, rkey ^ in4); - b4 = vec_cipherlast_be (b4, rkey ^ in5); - b5 = vec_cipherlast_be (b5, rkey ^ in6); - b6 = vec_cipherlast_be (b6, rkey ^ in7); - b7 = vec_cipherlast_be (b7, rkey ^ iv); - - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); - VEC_STORE_BE (out + 4, b4, bige_const); - VEC_STORE_BE (out + 5, b5, bige_const); - VEC_STORE_BE (out + 6, b6, bige_const); - VEC_STORE_BE (out + 7, b7, bige_const); - - in += 8; + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + in3 = asm_xor (rkeylast, in3); + in4 = asm_xor (rkeylast, in4); + b0 = asm_cipherlast_be (b0, in1); + b1 = asm_cipherlast_be (b1, in2); + in5 = asm_xor (rkeylast, in5); + in6 = asm_xor (rkeylast, in6); + b2 = asm_cipherlast_be (b2, in3); + b3 = asm_cipherlast_be (b3, in4); + in7 = asm_xor (rkeylast, in7); + in0 = asm_xor (rkeylast, iv); + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b4 = asm_cipherlast_be (b4, in5); + b5 = asm_cipherlast_be (b5, in6); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b6 = asm_cipherlast_be (b6, in7); + b7 = asm_cipherlast_be (b7, in0); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); out += 8; } if (nblocks >= 4) { in0 = iv; - in1 = VEC_LOAD_BE (in + 0, bige_const); - in2 = VEC_LOAD_BE (in + 1, bige_const); - in3 = VEC_LOAD_BE (in + 2, bige_const); - iv = VEC_LOAD_BE (in + 3, bige_const); + in1 = VEC_LOAD_BE (in, 0, bige_const); + in2 = VEC_LOAD_BE (in, 1, bige_const); + in3 = VEC_LOAD_BE (in, 2, bige_const); + iv = VEC_LOAD_BE (in, 3, bige_const); - b0 = rkey0 ^ in0; - b1 = rkey0 ^ in1; - b2 = rkey0 ^ in2; - b3 = rkey0 ^ in3; + b0 = asm_xor (rkey0, in0); + b1 = asm_xor (rkey0, in1); + b2 = asm_xor (rkey0, in2); + b3 = asm_xor (rkey0, in3); #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD(&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); @@ -607,16 +728,18 @@ void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv_arg, #undef DO_ROUND - rkey = rkeylast; - b0 = vec_cipherlast_be (b0, rkey ^ in1); - b1 = vec_cipherlast_be (b1, rkey ^ in2); - b2 = vec_cipherlast_be (b2, rkey ^ in3); - b3 = vec_cipherlast_be (b3, rkey ^ iv); - - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + in3 = asm_xor (rkeylast, in3); + in0 = asm_xor (rkeylast, iv); + b0 = asm_cipherlast_be (b0, in1); + b1 = asm_cipherlast_be (b1, in2); + b2 = asm_cipherlast_be (b2, in3); + b3 = asm_cipherlast_be (b3, in0); + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); in += 4; out += 4; @@ -625,20 +748,20 @@ void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv_arg, for (; nblocks; nblocks--) { - bin = VEC_LOAD_BE (in, bige_const); + bin = VEC_LOAD_BE (in, 0, bige_const); rkeylast = rkeylast_orig ^ bin; b = iv; iv = bin; AES_ENCRYPT (b, rounds); - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); out++; in++; } - VEC_STORE_BE (iv_arg, iv, bige_const); + VEC_STORE_BE (iv_arg, 0, iv, bige_const); } @@ -646,41 +769,41 @@ void _gcry_aes_ppc8_cbc_enc (void *context, unsigned char *iv_arg, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int cbc_mac) { - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = context; const u128_t *rk = (u128_t *)&ctx->keyschenc; const u128_t *in = (const u128_t *)inbuf_arg; u128_t *out = (u128_t *)outbuf_arg; int rounds = ctx->rounds; - ROUND_KEY_VARIABLES; + ROUND_KEY_VARIABLES_ALL; block lastiv, b; + unsigned int outadd = !cbc_mac; - lastiv = VEC_LOAD_BE (iv_arg, bige_const); + lastiv = VEC_LOAD_BE (iv_arg, 0, bige_const); - PRELOAD_ROUND_KEYS (rounds); + PRELOAD_ROUND_KEYS_ALL (rounds); for (; nblocks; nblocks--) { - b = lastiv ^ VEC_LOAD_BE (in, bige_const); + b = lastiv ^ VEC_LOAD_BE (in, 0, bige_const); - AES_ENCRYPT (b, rounds); + AES_ENCRYPT_ALL (b, rounds); lastiv = b; - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); in++; - if (!cbc_mac) - out++; + out += outadd; } - VEC_STORE_BE (iv_arg, lastiv, bige_const); + VEC_STORE_BE (iv_arg, 0, lastiv, bige_const); } void _gcry_aes_ppc8_cbc_dec (void *context, unsigned char *iv_arg, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) { - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = context; const u128_t *rk = (u128_t *)&ctx->keyschdec; const u128_t *in = (const u128_t *)inbuf_arg; @@ -699,41 +822,49 @@ void _gcry_aes_ppc8_cbc_dec (void *context, unsigned char *iv_arg, ctx->decryption_prepared = 1; } - iv = VEC_LOAD_BE (iv_arg, bige_const); + iv = VEC_LOAD_BE (iv_arg, 0, bige_const); PRELOAD_ROUND_KEYS (rounds); rkeylast_orig = rkeylast; for (; nblocks >= 8; nblocks -= 8) { - in0 = VEC_LOAD_BE (in + 0, bige_const); - in1 = VEC_LOAD_BE (in + 1, bige_const); - in2 = VEC_LOAD_BE (in + 2, bige_const); - in3 = VEC_LOAD_BE (in + 3, bige_const); - in4 = VEC_LOAD_BE (in + 4, bige_const); - in5 = VEC_LOAD_BE (in + 5, bige_const); - in6 = VEC_LOAD_BE (in + 6, bige_const); - in7 = VEC_LOAD_BE (in + 7, bige_const); - - b0 = rkey0 ^ in0; - b1 = rkey0 ^ in1; - b2 = rkey0 ^ in2; - b3 = rkey0 ^ in3; - b4 = rkey0 ^ in4; - b5 = rkey0 ^ in5; - b6 = rkey0 ^ in6; - b7 = rkey0 ^ in7; + in0 = VEC_LOAD_BE_NOSWAP (in, 0); + in1 = VEC_LOAD_BE_NOSWAP (in, 1); + in2 = VEC_LOAD_BE_NOSWAP (in, 2); + in3 = VEC_LOAD_BE_NOSWAP (in, 3); + in0 = VEC_BE_SWAP (in0, bige_const); + in1 = VEC_BE_SWAP (in1, bige_const); + in4 = VEC_LOAD_BE_NOSWAP (in, 4); + in5 = VEC_LOAD_BE_NOSWAP (in, 5); + in2 = VEC_BE_SWAP (in2, bige_const); + in3 = VEC_BE_SWAP (in3, bige_const); + in6 = VEC_LOAD_BE_NOSWAP (in, 6); + in7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + b0 = asm_xor (rkey0, in0); + b1 = asm_xor (rkey0, in1); + in4 = VEC_BE_SWAP (in4, bige_const); + in5 = VEC_BE_SWAP (in5, bige_const); + b2 = asm_xor (rkey0, in2); + b3 = asm_xor (rkey0, in3); + in6 = VEC_BE_SWAP (in6, bige_const); + in7 = VEC_BE_SWAP (in7, bige_const); + b4 = asm_xor (rkey0, in4); + b5 = asm_xor (rkey0, in5); + b6 = asm_xor (rkey0, in6); + b7 = asm_xor (rkey0, in7); #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD(&rk[r]); \ - b0 = vec_ncipher_be (b0, rkey); \ - b1 = vec_ncipher_be (b1, rkey); \ - b2 = vec_ncipher_be (b2, rkey); \ - b3 = vec_ncipher_be (b3, rkey); \ - b4 = vec_ncipher_be (b4, rkey); \ - b5 = vec_ncipher_be (b5, rkey); \ - b6 = vec_ncipher_be (b6, rkey); \ - b7 = vec_ncipher_be (b7, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); \ + b4 = asm_ncipher_be (b4, rkey); \ + b5 = asm_ncipher_be (b5, rkey); \ + b6 = asm_ncipher_be (b6, rkey); \ + b7 = asm_ncipher_be (b7, rkey); DO_ROUND(1); DO_ROUND(2); @@ -757,48 +888,60 @@ void _gcry_aes_ppc8_cbc_dec (void *context, unsigned char *iv_arg, #undef DO_ROUND - rkey = rkeylast; - b0 = vec_ncipherlast_be (b0, rkey ^ iv); - b1 = vec_ncipherlast_be (b1, rkey ^ in0); - b2 = vec_ncipherlast_be (b2, rkey ^ in1); - b3 = vec_ncipherlast_be (b3, rkey ^ in2); - b4 = vec_ncipherlast_be (b4, rkey ^ in3); - b5 = vec_ncipherlast_be (b5, rkey ^ in4); - b6 = vec_ncipherlast_be (b6, rkey ^ in5); - b7 = vec_ncipherlast_be (b7, rkey ^ in6); + iv = asm_xor (rkeylast, iv); + in0 = asm_xor (rkeylast, in0); + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + b0 = asm_ncipherlast_be (b0, iv); iv = in7; - - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); - VEC_STORE_BE (out + 4, b4, bige_const); - VEC_STORE_BE (out + 5, b5, bige_const); - VEC_STORE_BE (out + 6, b6, bige_const); - VEC_STORE_BE (out + 7, b7, bige_const); - - in += 8; + b1 = asm_ncipherlast_be (b1, in0); + in3 = asm_xor (rkeylast, in3); + in4 = asm_xor (rkeylast, in4); + b2 = asm_ncipherlast_be (b2, in1); + b3 = asm_ncipherlast_be (b3, in2); + in5 = asm_xor (rkeylast, in5); + in6 = asm_xor (rkeylast, in6); + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b4 = asm_ncipherlast_be (b4, in3); + b5 = asm_ncipherlast_be (b5, in4); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b6 = asm_ncipherlast_be (b6, in5); + b7 = asm_ncipherlast_be (b7, in6); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); out += 8; } if (nblocks >= 4) { - in0 = VEC_LOAD_BE (in + 0, bige_const); - in1 = VEC_LOAD_BE (in + 1, bige_const); - in2 = VEC_LOAD_BE (in + 2, bige_const); - in3 = VEC_LOAD_BE (in + 3, bige_const); + in0 = VEC_LOAD_BE (in, 0, bige_const); + in1 = VEC_LOAD_BE (in, 1, bige_const); + in2 = VEC_LOAD_BE (in, 2, bige_const); + in3 = VEC_LOAD_BE (in, 3, bige_const); - b0 = rkey0 ^ in0; - b1 = rkey0 ^ in1; - b2 = rkey0 ^ in2; - b3 = rkey0 ^ in3; + b0 = asm_xor (rkey0, in0); + b1 = asm_xor (rkey0, in1); + b2 = asm_xor (rkey0, in2); + b3 = asm_xor (rkey0, in3); #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD(&rk[r]); \ - b0 = vec_ncipher_be (b0, rkey); \ - b1 = vec_ncipher_be (b1, rkey); \ - b2 = vec_ncipher_be (b2, rkey); \ - b3 = vec_ncipher_be (b3, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); @@ -822,17 +965,21 @@ void _gcry_aes_ppc8_cbc_dec (void *context, unsigned char *iv_arg, #undef DO_ROUND - rkey = rkeylast; - b0 = vec_ncipherlast_be (b0, rkey ^ iv); - b1 = vec_ncipherlast_be (b1, rkey ^ in0); - b2 = vec_ncipherlast_be (b2, rkey ^ in1); - b3 = vec_ncipherlast_be (b3, rkey ^ in2); + iv = asm_xor (rkeylast, iv); + in0 = asm_xor (rkeylast, in0); + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + + b0 = asm_ncipherlast_be (b0, iv); iv = in3; + b1 = asm_ncipherlast_be (b1, in0); + b2 = asm_ncipherlast_be (b2, in1); + b3 = asm_ncipherlast_be (b3, in2); - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); in += 4; out += 4; @@ -843,17 +990,17 @@ void _gcry_aes_ppc8_cbc_dec (void *context, unsigned char *iv_arg, { rkeylast = rkeylast_orig ^ iv; - iv = VEC_LOAD_BE (in, bige_const); + iv = VEC_LOAD_BE (in, 0, bige_const); b = iv; AES_DECRYPT (b, rounds); - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); in++; out++; } - VEC_STORE_BE (iv_arg, iv, bige_const); + VEC_STORE_BE (iv_arg, 0, iv, bige_const); } @@ -863,7 +1010,7 @@ void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr_arg, { static const unsigned char vec_one_const[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = context; const u128_t *rk = (u128_t *)&ctx->keyschenc; const u128_t *in = (const u128_t *)inbuf_arg; @@ -873,56 +1020,80 @@ void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr_arg, block rkeylast_orig; block ctr, b, one; - ctr = VEC_LOAD_BE (ctr_arg, bige_const); - one = VEC_LOAD_BE (&vec_one_const, bige_const); + ctr = VEC_LOAD_BE (ctr_arg, 0, bige_const); + one = VEC_LOAD_BE (&vec_one_const, 0, bige_const); PRELOAD_ROUND_KEYS (rounds); rkeylast_orig = rkeylast; if (nblocks >= 4) { + block in0, in1, in2, in3, in4, in5, in6, in7; block b0, b1, b2, b3, b4, b5, b6, b7; block two, three, four; - block ctr4; block rkey; - two = vec_add_uint128 (one, one); - three = vec_add_uint128 (two, one); - four = vec_add_uint128 (two, two); + two = asm_add_uint128 (one, one); + three = asm_add_uint128 (two, one); + four = asm_add_uint128 (two, two); for (; nblocks >= 8; nblocks -= 8) { - ctr4 = vec_add_uint128 (ctr, four); - b0 = rkey0 ^ ctr; - b1 = rkey0 ^ vec_add_uint128 (ctr, one); - b2 = rkey0 ^ vec_add_uint128 (ctr, two); - b3 = rkey0 ^ vec_add_uint128 (ctr, three); - b4 = rkey0 ^ ctr4; - b5 = rkey0 ^ vec_add_uint128 (ctr4, one); - b6 = rkey0 ^ vec_add_uint128 (ctr4, two); - b7 = rkey0 ^ vec_add_uint128 (ctr4, three); - ctr = vec_add_uint128 (ctr4, four); + b1 = asm_add_uint128 (ctr, one); + b2 = asm_add_uint128 (ctr, two); + b3 = asm_add_uint128 (ctr, three); + b4 = asm_add_uint128 (ctr, four); + b5 = asm_add_uint128 (b1, four); + b6 = asm_add_uint128 (b2, four); + b7 = asm_add_uint128 (b3, four); + b0 = asm_xor (rkey0, ctr); + rkey = ALIGNED_LOAD (rk, 1); + ctr = asm_add_uint128 (b4, four); + b1 = asm_xor (rkey0, b1); + b2 = asm_xor (rkey0, b2); + b3 = asm_xor (rkey0, b3); + b0 = asm_cipher_be (b0, rkey); + b1 = asm_cipher_be (b1, rkey); + b2 = asm_cipher_be (b2, rkey); + b3 = asm_cipher_be (b3, rkey); + b4 = asm_xor (rkey0, b4); + b5 = asm_xor (rkey0, b5); + b6 = asm_xor (rkey0, b6); + b7 = asm_xor (rkey0, b7); + b4 = asm_cipher_be (b4, rkey); + b5 = asm_cipher_be (b5, rkey); + b6 = asm_cipher_be (b6, rkey); + b7 = asm_cipher_be (b7, rkey); #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD(&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); \ - b4 = vec_cipher_be (b4, rkey); \ - b5 = vec_cipher_be (b5, rkey); \ - b6 = vec_cipher_be (b6, rkey); \ - b7 = vec_cipher_be (b7, rkey); - - DO_ROUND(1); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); \ + b4 = asm_cipher_be (b4, rkey); \ + b5 = asm_cipher_be (b5, rkey); \ + b6 = asm_cipher_be (b6, rkey); \ + b7 = asm_cipher_be (b7, rkey); + + in0 = VEC_LOAD_BE_NOSWAP (in, 0); DO_ROUND(2); + in1 = VEC_LOAD_BE_NOSWAP (in, 1); DO_ROUND(3); + in2 = VEC_LOAD_BE_NOSWAP (in, 2); DO_ROUND(4); + in3 = VEC_LOAD_BE_NOSWAP (in, 3); DO_ROUND(5); + in4 = VEC_LOAD_BE_NOSWAP (in, 4); DO_ROUND(6); + in5 = VEC_LOAD_BE_NOSWAP (in, 5); DO_ROUND(7); + in6 = VEC_LOAD_BE_NOSWAP (in, 6); DO_ROUND(8); + in7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; DO_ROUND(9); + if (rounds >= 12) { DO_ROUND(10); @@ -936,43 +1107,68 @@ void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr_arg, #undef DO_ROUND - rkey = rkeylast; - b0 = vec_cipherlast_be (b0, rkey ^ VEC_LOAD_BE (in + 0, bige_const)); - b1 = vec_cipherlast_be (b1, rkey ^ VEC_LOAD_BE (in + 1, bige_const)); - b2 = vec_cipherlast_be (b2, rkey ^ VEC_LOAD_BE (in + 2, bige_const)); - b3 = vec_cipherlast_be (b3, rkey ^ VEC_LOAD_BE (in + 3, bige_const)); - b4 = vec_cipherlast_be (b4, rkey ^ VEC_LOAD_BE (in + 4, bige_const)); - b5 = vec_cipherlast_be (b5, rkey ^ VEC_LOAD_BE (in + 5, bige_const)); - b6 = vec_cipherlast_be (b6, rkey ^ VEC_LOAD_BE (in + 6, bige_const)); - b7 = vec_cipherlast_be (b7, rkey ^ VEC_LOAD_BE (in + 7, bige_const)); - - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); - VEC_STORE_BE (out + 4, b4, bige_const); - VEC_STORE_BE (out + 5, b5, bige_const); - VEC_STORE_BE (out + 6, b6, bige_const); - VEC_STORE_BE (out + 7, b7, bige_const); - - in += 8; + in0 = VEC_BE_SWAP (in0, bige_const); + in1 = VEC_BE_SWAP (in1, bige_const); + in2 = VEC_BE_SWAP (in2, bige_const); + in3 = VEC_BE_SWAP (in3, bige_const); + in4 = VEC_BE_SWAP (in4, bige_const); + in5 = VEC_BE_SWAP (in5, bige_const); + in6 = VEC_BE_SWAP (in6, bige_const); + in7 = VEC_BE_SWAP (in7, bige_const); + + in0 = asm_xor (rkeylast, in0); + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + in3 = asm_xor (rkeylast, in3); + b0 = asm_cipherlast_be (b0, in0); + b1 = asm_cipherlast_be (b1, in1); + in4 = asm_xor (rkeylast, in4); + in5 = asm_xor (rkeylast, in5); + b2 = asm_cipherlast_be (b2, in2); + b3 = asm_cipherlast_be (b3, in3); + in6 = asm_xor (rkeylast, in6); + in7 = asm_xor (rkeylast, in7); + b4 = asm_cipherlast_be (b4, in4); + b5 = asm_cipherlast_be (b5, in5); + b6 = asm_cipherlast_be (b6, in6); + b7 = asm_cipherlast_be (b7, in7); + + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); out += 8; } if (nblocks >= 4) { - b0 = rkey0 ^ ctr; - b1 = rkey0 ^ vec_add_uint128 (ctr, one); - b2 = rkey0 ^ vec_add_uint128 (ctr, two); - b3 = rkey0 ^ vec_add_uint128 (ctr, three); - ctr = vec_add_uint128 (ctr, four); + b1 = asm_add_uint128 (ctr, one); + b2 = asm_add_uint128 (ctr, two); + b3 = asm_add_uint128 (ctr, three); + b0 = asm_xor (rkey0, ctr); + ctr = asm_add_uint128 (ctr, four); + b1 = asm_xor (rkey0, b1); + b2 = asm_xor (rkey0, b2); + b3 = asm_xor (rkey0, b3); #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD(&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); @@ -982,6 +1178,12 @@ void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr_arg, DO_ROUND(6); DO_ROUND(7); DO_ROUND(8); + + in0 = VEC_LOAD_BE (in, 0, bige_const); + in1 = VEC_LOAD_BE (in, 1, bige_const); + in2 = VEC_LOAD_BE (in, 2, bige_const); + in3 = VEC_LOAD_BE (in, 3, bige_const); + DO_ROUND(9); if (rounds >= 12) { @@ -996,16 +1198,21 @@ void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr_arg, #undef DO_ROUND - rkey = rkeylast; - b0 = vec_cipherlast_be (b0, rkey ^ VEC_LOAD_BE (in + 0, bige_const)); - b1 = vec_cipherlast_be (b1, rkey ^ VEC_LOAD_BE (in + 1, bige_const)); - b2 = vec_cipherlast_be (b2, rkey ^ VEC_LOAD_BE (in + 2, bige_const)); - b3 = vec_cipherlast_be (b3, rkey ^ VEC_LOAD_BE (in + 3, bige_const)); - - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); + in0 = asm_xor (rkeylast, in0); + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + in3 = asm_xor (rkeylast, in3); + + b0 = asm_cipherlast_be (b0, in0); + b1 = asm_cipherlast_be (b1, in1); + b2 = asm_cipherlast_be (b2, in2); + b3 = asm_cipherlast_be (b3, in3); + + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); + in += 4; out += 4; nblocks -= 4; @@ -1015,18 +1222,18 @@ void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr_arg, for (; nblocks; nblocks--) { b = ctr; - ctr = vec_add_uint128 (ctr, one); - rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, bige_const); + ctr = asm_add_uint128 (ctr, one); + rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, 0, bige_const); AES_ENCRYPT (b, rounds); - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); out++; in++; } - VEC_STORE_BE (ctr_arg, ctr, bige_const); + VEC_STORE_BE (ctr_arg, 0, ctr, bige_const); } @@ -1034,7 +1241,7 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = (void *)&c->context.c; const u128_t *in = (const u128_t *)inbuf_arg; u128_t *out = (u128_t *)outbuf_arg; @@ -1043,16 +1250,16 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, block l0, l1, l2, l; block b0, b1, b2, b3, b4, b5, b6, b7, b; block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7; - block rkey; + block rkey, rkeylf; block ctr, iv; ROUND_KEY_VARIABLES; - iv = VEC_LOAD_BE (c->u_iv.iv, bige_const); - ctr = VEC_LOAD_BE (c->u_ctr.ctr, bige_const); + iv = VEC_LOAD_BE (c->u_iv.iv, 0, bige_const); + ctr = VEC_LOAD_BE (c->u_ctr.ctr, 0, bige_const); - l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], bige_const); - l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], bige_const); - l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], bige_const); + l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], 0, bige_const); + l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], 0, bige_const); + l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], 0, bige_const); if (encrypt) { @@ -1062,8 +1269,8 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, for (; nblocks >= 8 && data_nblocks % 8; nblocks--) { - l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), bige_const); - b = VEC_LOAD_BE (in, bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (in, 0, bige_const); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ iv ^= l; @@ -1074,7 +1281,7 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, AES_ENCRYPT (b, rounds); b ^= iv; - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); in += 1; out += 1; @@ -1082,16 +1289,25 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, for (; nblocks >= 8; nblocks -= 8) { - b0 = VEC_LOAD_BE (in + 0, bige_const); - b1 = VEC_LOAD_BE (in + 1, bige_const); - b2 = VEC_LOAD_BE (in + 2, bige_const); - b3 = VEC_LOAD_BE (in + 3, bige_const); - b4 = VEC_LOAD_BE (in + 4, bige_const); - b5 = VEC_LOAD_BE (in + 5, bige_const); - b6 = VEC_LOAD_BE (in + 6, bige_const); - b7 = VEC_LOAD_BE (in + 7, bige_const); - - l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 8), bige_const); + b0 = VEC_LOAD_BE_NOSWAP (in, 0); + b1 = VEC_LOAD_BE_NOSWAP (in, 1); + b2 = VEC_LOAD_BE_NOSWAP (in, 2); + b3 = VEC_LOAD_BE_NOSWAP (in, 3); + b4 = VEC_LOAD_BE_NOSWAP (in, 4); + b5 = VEC_LOAD_BE_NOSWAP (in, 5); + b6 = VEC_LOAD_BE_NOSWAP (in, 6); + b7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + l = VEC_LOAD_BE_NOSWAP (ocb_get_l (c, data_nblocks += 8), 0); + b0 = VEC_BE_SWAP(b0, bige_const); + b1 = VEC_BE_SWAP(b1, bige_const); + b2 = VEC_BE_SWAP(b2, bige_const); + b3 = VEC_BE_SWAP(b3, bige_const); + b4 = VEC_BE_SWAP(b4, bige_const); + b5 = VEC_BE_SWAP(b5, bige_const); + b6 = VEC_BE_SWAP(b6, bige_const); + b7 = VEC_BE_SWAP(b7, bige_const); + l = VEC_BE_SWAP(l, bige_const); ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7; @@ -1117,15 +1333,15 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, iv = iv7 ^ rkey0; #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); \ - b4 = vec_cipher_be (b4, rkey); \ - b5 = vec_cipher_be (b5, rkey); \ - b6 = vec_cipher_be (b6, rkey); \ - b7 = vec_cipher_be (b7, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); \ + b4 = asm_cipher_be (b4, rkey); \ + b5 = asm_cipher_be (b5, rkey); \ + b6 = asm_cipher_be (b6, rkey); \ + b7 = asm_cipher_be (b7, rkey); DO_ROUND(1); DO_ROUND(2); @@ -1134,7 +1350,20 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, DO_ROUND(5); DO_ROUND(6); DO_ROUND(7); + + rkeylf = asm_xor (rkeylast, rkey0); + DO_ROUND(8); + + iv0 = asm_xor (rkeylf, iv0); + iv1 = asm_xor (rkeylf, iv1); + iv2 = asm_xor (rkeylf, iv2); + iv3 = asm_xor (rkeylf, iv3); + iv4 = asm_xor (rkeylf, iv4); + iv5 = asm_xor (rkeylf, iv5); + iv6 = asm_xor (rkeylf, iv6); + iv7 = asm_xor (rkeylf, iv7); + DO_ROUND(9); if (rounds >= 12) { @@ -1149,37 +1378,42 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, #undef DO_ROUND - rkey = rkeylast ^ rkey0; - b0 = vec_cipherlast_be (b0, rkey ^ iv0); - b1 = vec_cipherlast_be (b1, rkey ^ iv1); - b2 = vec_cipherlast_be (b2, rkey ^ iv2); - b3 = vec_cipherlast_be (b3, rkey ^ iv3); - b4 = vec_cipherlast_be (b4, rkey ^ iv4); - b5 = vec_cipherlast_be (b5, rkey ^ iv5); - b6 = vec_cipherlast_be (b6, rkey ^ iv6); - b7 = vec_cipherlast_be (b7, rkey ^ iv7); - - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); - VEC_STORE_BE (out + 4, b4, bige_const); - VEC_STORE_BE (out + 5, b5, bige_const); - VEC_STORE_BE (out + 6, b6, bige_const); - VEC_STORE_BE (out + 7, b7, bige_const); - - in += 8; + b0 = asm_cipherlast_be (b0, iv0); + b1 = asm_cipherlast_be (b1, iv1); + b2 = asm_cipherlast_be (b2, iv2); + b3 = asm_cipherlast_be (b3, iv3); + b4 = asm_cipherlast_be (b4, iv4); + b5 = asm_cipherlast_be (b5, iv5); + b6 = asm_cipherlast_be (b6, iv6); + b7 = asm_cipherlast_be (b7, iv7); + + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); out += 8; } if (nblocks >= 4 && (data_nblocks % 4) == 0) { - b0 = VEC_LOAD_BE (in + 0, bige_const); - b1 = VEC_LOAD_BE (in + 1, bige_const); - b2 = VEC_LOAD_BE (in + 2, bige_const); - b3 = VEC_LOAD_BE (in + 3, bige_const); + b0 = VEC_LOAD_BE (in, 0, bige_const); + b1 = VEC_LOAD_BE (in, 1, bige_const); + b2 = VEC_LOAD_BE (in, 2, bige_const); + b3 = VEC_LOAD_BE (in, 3, bige_const); - l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const); ctr ^= b0 ^ b1 ^ b2 ^ b3; @@ -1197,11 +1431,11 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, iv = iv3 ^ rkey0; #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); @@ -1226,15 +1460,15 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, #undef DO_ROUND rkey = rkeylast ^ rkey0; - b0 = vec_cipherlast_be (b0, rkey ^ iv0); - b1 = vec_cipherlast_be (b1, rkey ^ iv1); - b2 = vec_cipherlast_be (b2, rkey ^ iv2); - b3 = vec_cipherlast_be (b3, rkey ^ iv3); + b0 = asm_cipherlast_be (b0, rkey ^ iv0); + b1 = asm_cipherlast_be (b1, rkey ^ iv1); + b2 = asm_cipherlast_be (b2, rkey ^ iv2); + b3 = asm_cipherlast_be (b3, rkey ^ iv3); - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); in += 4; out += 4; @@ -1243,8 +1477,8 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, for (; nblocks; nblocks--) { - l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), bige_const); - b = VEC_LOAD_BE (in, bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (in, 0, bige_const); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ iv ^= l; @@ -1255,7 +1489,7 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, AES_ENCRYPT (b, rounds); b ^= iv; - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); in += 1; out += 1; @@ -1275,8 +1509,8 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, for (; nblocks >= 8 && data_nblocks % 8; nblocks--) { - l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), bige_const); - b = VEC_LOAD_BE (in, bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (in, 0, bige_const); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ iv ^= l; @@ -1287,7 +1521,7 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, /* Checksum_i = Checksum_{i-1} xor P_i */ ctr ^= b; - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); in += 1; out += 1; @@ -1295,16 +1529,25 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, for (; nblocks >= 8; nblocks -= 8) { - b0 = VEC_LOAD_BE (in + 0, bige_const); - b1 = VEC_LOAD_BE (in + 1, bige_const); - b2 = VEC_LOAD_BE (in + 2, bige_const); - b3 = VEC_LOAD_BE (in + 3, bige_const); - b4 = VEC_LOAD_BE (in + 4, bige_const); - b5 = VEC_LOAD_BE (in + 5, bige_const); - b6 = VEC_LOAD_BE (in + 6, bige_const); - b7 = VEC_LOAD_BE (in + 7, bige_const); - - l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 8), bige_const); + b0 = VEC_LOAD_BE_NOSWAP (in, 0); + b1 = VEC_LOAD_BE_NOSWAP (in, 1); + b2 = VEC_LOAD_BE_NOSWAP (in, 2); + b3 = VEC_LOAD_BE_NOSWAP (in, 3); + b4 = VEC_LOAD_BE_NOSWAP (in, 4); + b5 = VEC_LOAD_BE_NOSWAP (in, 5); + b6 = VEC_LOAD_BE_NOSWAP (in, 6); + b7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + l = VEC_LOAD_BE_NOSWAP (ocb_get_l (c, data_nblocks += 8), 0); + b0 = VEC_BE_SWAP(b0, bige_const); + b1 = VEC_BE_SWAP(b1, bige_const); + b2 = VEC_BE_SWAP(b2, bige_const); + b3 = VEC_BE_SWAP(b3, bige_const); + b4 = VEC_BE_SWAP(b4, bige_const); + b5 = VEC_BE_SWAP(b5, bige_const); + b6 = VEC_BE_SWAP(b6, bige_const); + b7 = VEC_BE_SWAP(b7, bige_const); + l = VEC_BE_SWAP(l, bige_const); iv ^= rkey0; @@ -1328,15 +1571,15 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, iv = iv7 ^ rkey0; #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_ncipher_be (b0, rkey); \ - b1 = vec_ncipher_be (b1, rkey); \ - b2 = vec_ncipher_be (b2, rkey); \ - b3 = vec_ncipher_be (b3, rkey); \ - b4 = vec_ncipher_be (b4, rkey); \ - b5 = vec_ncipher_be (b5, rkey); \ - b6 = vec_ncipher_be (b6, rkey); \ - b7 = vec_ncipher_be (b7, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); \ + b4 = asm_ncipher_be (b4, rkey); \ + b5 = asm_ncipher_be (b5, rkey); \ + b6 = asm_ncipher_be (b6, rkey); \ + b7 = asm_ncipher_be (b7, rkey); DO_ROUND(1); DO_ROUND(2); @@ -1345,7 +1588,20 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, DO_ROUND(5); DO_ROUND(6); DO_ROUND(7); + + rkeylf = asm_xor (rkeylast, rkey0); + DO_ROUND(8); + + iv0 = asm_xor (rkeylf, iv0); + iv1 = asm_xor (rkeylf, iv1); + iv2 = asm_xor (rkeylf, iv2); + iv3 = asm_xor (rkeylf, iv3); + iv4 = asm_xor (rkeylf, iv4); + iv5 = asm_xor (rkeylf, iv5); + iv6 = asm_xor (rkeylf, iv6); + iv7 = asm_xor (rkeylf, iv7); + DO_ROUND(9); if (rounds >= 12) { @@ -1360,39 +1616,44 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, #undef DO_ROUND - rkey = rkeylast ^ rkey0; - b0 = vec_ncipherlast_be (b0, rkey ^ iv0); - b1 = vec_ncipherlast_be (b1, rkey ^ iv1); - b2 = vec_ncipherlast_be (b2, rkey ^ iv2); - b3 = vec_ncipherlast_be (b3, rkey ^ iv3); - b4 = vec_ncipherlast_be (b4, rkey ^ iv4); - b5 = vec_ncipherlast_be (b5, rkey ^ iv5); - b6 = vec_ncipherlast_be (b6, rkey ^ iv6); - b7 = vec_ncipherlast_be (b7, rkey ^ iv7); - - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); - VEC_STORE_BE (out + 4, b4, bige_const); - VEC_STORE_BE (out + 5, b5, bige_const); - VEC_STORE_BE (out + 6, b6, bige_const); - VEC_STORE_BE (out + 7, b7, bige_const); + b0 = asm_ncipherlast_be (b0, iv0); + b1 = asm_ncipherlast_be (b1, iv1); + b2 = asm_ncipherlast_be (b2, iv2); + b3 = asm_ncipherlast_be (b3, iv3); + b4 = asm_ncipherlast_be (b4, iv4); + b5 = asm_ncipherlast_be (b5, iv5); + b6 = asm_ncipherlast_be (b6, iv6); + b7 = asm_ncipherlast_be (b7, iv7); ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7; - in += 8; + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); out += 8; } if (nblocks >= 4 && (data_nblocks % 4) == 0) { - b0 = VEC_LOAD_BE (in + 0, bige_const); - b1 = VEC_LOAD_BE (in + 1, bige_const); - b2 = VEC_LOAD_BE (in + 2, bige_const); - b3 = VEC_LOAD_BE (in + 3, bige_const); + b0 = VEC_LOAD_BE (in, 0, bige_const); + b1 = VEC_LOAD_BE (in, 1, bige_const); + b2 = VEC_LOAD_BE (in, 2, bige_const); + b3 = VEC_LOAD_BE (in, 3, bige_const); - l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const); iv ^= rkey0; @@ -1408,11 +1669,11 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, iv = iv3 ^ rkey0; #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_ncipher_be (b0, rkey); \ - b1 = vec_ncipher_be (b1, rkey); \ - b2 = vec_ncipher_be (b2, rkey); \ - b3 = vec_ncipher_be (b3, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); @@ -1437,15 +1698,15 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, #undef DO_ROUND rkey = rkeylast ^ rkey0; - b0 = vec_ncipherlast_be (b0, rkey ^ iv0); - b1 = vec_ncipherlast_be (b1, rkey ^ iv1); - b2 = vec_ncipherlast_be (b2, rkey ^ iv2); - b3 = vec_ncipherlast_be (b3, rkey ^ iv3); + b0 = asm_ncipherlast_be (b0, rkey ^ iv0); + b1 = asm_ncipherlast_be (b1, rkey ^ iv1); + b2 = asm_ncipherlast_be (b2, rkey ^ iv2); + b3 = asm_ncipherlast_be (b3, rkey ^ iv3); - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); ctr ^= b0 ^ b1 ^ b2 ^ b3; @@ -1456,8 +1717,8 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, for (; nblocks; nblocks--) { - l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), bige_const); - b = VEC_LOAD_BE (in, bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (in, 0, bige_const); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ iv ^= l; @@ -1468,15 +1729,15 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, /* Checksum_i = Checksum_{i-1} xor P_i */ ctr ^= b; - VEC_STORE_BE (out, b, bige_const); + VEC_STORE_BE (out, 0, b, bige_const); in += 1; out += 1; } } - VEC_STORE_BE (c->u_iv.iv, iv, bige_const); - VEC_STORE_BE (c->u_ctr.ctr, ctr, bige_const); + VEC_STORE_BE (c->u_iv.iv, 0, iv, bige_const); + VEC_STORE_BE (c->u_ctr.ctr, 0, ctr, bige_const); c->u_mode.ocb.data_nblocks = data_nblocks; return 0; @@ -1485,7 +1746,7 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, size_t nblocks) { - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = (void *)&c->context.c; const u128_t *rk = (u128_t *)&ctx->keyschenc; const u128_t *abuf = (const u128_t *)abuf_arg; @@ -1498,19 +1759,19 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, block ctr, iv; ROUND_KEY_VARIABLES; - iv = VEC_LOAD_BE (c->u_mode.ocb.aad_offset, bige_const); - ctr = VEC_LOAD_BE (c->u_mode.ocb.aad_sum, bige_const); + iv = VEC_LOAD_BE (c->u_mode.ocb.aad_offset, 0, bige_const); + ctr = VEC_LOAD_BE (c->u_mode.ocb.aad_sum, 0, bige_const); - l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], bige_const); - l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], bige_const); - l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], bige_const); + l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], 0, bige_const); + l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], 0, bige_const); + l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], 0, bige_const); PRELOAD_ROUND_KEYS (rounds); for (; nblocks >= 8 && data_nblocks % 8; nblocks--) { - l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), bige_const); - b = VEC_LOAD_BE (abuf, bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (abuf, 0, bige_const); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ iv ^= l; @@ -1524,16 +1785,16 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, for (; nblocks >= 8; nblocks -= 8) { - b0 = VEC_LOAD_BE (abuf + 0, bige_const); - b1 = VEC_LOAD_BE (abuf + 1, bige_const); - b2 = VEC_LOAD_BE (abuf + 2, bige_const); - b3 = VEC_LOAD_BE (abuf + 3, bige_const); - b4 = VEC_LOAD_BE (abuf + 4, bige_const); - b5 = VEC_LOAD_BE (abuf + 5, bige_const); - b6 = VEC_LOAD_BE (abuf + 6, bige_const); - b7 = VEC_LOAD_BE (abuf + 7, bige_const); + b0 = VEC_LOAD_BE (abuf, 0, bige_const); + b1 = VEC_LOAD_BE (abuf, 1, bige_const); + b2 = VEC_LOAD_BE (abuf, 2, bige_const); + b3 = VEC_LOAD_BE (abuf, 3, bige_const); + b4 = VEC_LOAD_BE (abuf, 4, bige_const); + b5 = VEC_LOAD_BE (abuf, 5, bige_const); + b6 = VEC_LOAD_BE (abuf, 6, bige_const); + b7 = VEC_LOAD_BE (abuf, 7, bige_const); - l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 8), bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 8), 0, bige_const); frkey = rkey0; iv ^= frkey; @@ -1558,15 +1819,15 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, iv = iv7 ^ frkey; #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); \ - b4 = vec_cipher_be (b4, rkey); \ - b5 = vec_cipher_be (b5, rkey); \ - b6 = vec_cipher_be (b6, rkey); \ - b7 = vec_cipher_be (b7, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); \ + b4 = asm_cipher_be (b4, rkey); \ + b5 = asm_cipher_be (b5, rkey); \ + b6 = asm_cipher_be (b6, rkey); \ + b7 = asm_cipher_be (b7, rkey); DO_ROUND(1); DO_ROUND(2); @@ -1591,14 +1852,14 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, #undef DO_ROUND rkey = rkeylast; - b0 = vec_cipherlast_be (b0, rkey); - b1 = vec_cipherlast_be (b1, rkey); - b2 = vec_cipherlast_be (b2, rkey); - b3 = vec_cipherlast_be (b3, rkey); - b4 = vec_cipherlast_be (b4, rkey); - b5 = vec_cipherlast_be (b5, rkey); - b6 = vec_cipherlast_be (b6, rkey); - b7 = vec_cipherlast_be (b7, rkey); + b0 = asm_cipherlast_be (b0, rkey); + b1 = asm_cipherlast_be (b1, rkey); + b2 = asm_cipherlast_be (b2, rkey); + b3 = asm_cipherlast_be (b3, rkey); + b4 = asm_cipherlast_be (b4, rkey); + b5 = asm_cipherlast_be (b5, rkey); + b6 = asm_cipherlast_be (b6, rkey); + b7 = asm_cipherlast_be (b7, rkey); ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7; @@ -1607,12 +1868,12 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, if (nblocks >= 4 && (data_nblocks % 4) == 0) { - b0 = VEC_LOAD_BE (abuf + 0, bige_const); - b1 = VEC_LOAD_BE (abuf + 1, bige_const); - b2 = VEC_LOAD_BE (abuf + 2, bige_const); - b3 = VEC_LOAD_BE (abuf + 3, bige_const); + b0 = VEC_LOAD_BE (abuf, 0, bige_const); + b1 = VEC_LOAD_BE (abuf, 1, bige_const); + b2 = VEC_LOAD_BE (abuf, 2, bige_const); + b3 = VEC_LOAD_BE (abuf, 3, bige_const); - l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const); frkey = rkey0; iv ^= frkey; @@ -1629,11 +1890,11 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, iv = iv3 ^ frkey; #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); @@ -1658,10 +1919,10 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, #undef DO_ROUND rkey = rkeylast; - b0 = vec_cipherlast_be (b0, rkey); - b1 = vec_cipherlast_be (b1, rkey); - b2 = vec_cipherlast_be (b2, rkey); - b3 = vec_cipherlast_be (b3, rkey); + b0 = asm_cipherlast_be (b0, rkey); + b1 = asm_cipherlast_be (b1, rkey); + b2 = asm_cipherlast_be (b2, rkey); + b3 = asm_cipherlast_be (b3, rkey); ctr ^= b0 ^ b1 ^ b2 ^ b3; @@ -1671,8 +1932,8 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, for (; nblocks; nblocks--) { - l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), bige_const); - b = VEC_LOAD_BE (abuf, bige_const); + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (abuf, 0, bige_const); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ iv ^= l; @@ -1684,8 +1945,8 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, abuf += 1; } - VEC_STORE_BE (c->u_mode.ocb.aad_offset, iv, bige_const); - VEC_STORE_BE (c->u_mode.ocb.aad_sum, ctr, bige_const); + VEC_STORE_BE (c->u_mode.ocb.aad_offset, 0, iv, bige_const); + VEC_STORE_BE (c->u_mode.ocb.aad_sum, 0, ctr, bige_const); c->u_mode.ocb.aad_nblocks = data_nblocks; return 0; @@ -1696,44 +1957,59 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { +#ifdef WORDS_BIGENDIAN static const block vec_bswap64_const = - { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }; + { 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 }; static const block vec_bswap128_const = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; +#else + static const block vec_bswap64_const = + { ~8, ~9, ~10, ~11, ~12, ~13, ~14, ~15, ~0, ~1, ~2, ~3, ~4, ~5, ~6, ~7 }; + static const block vec_bswap128_const = + { ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8, ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0 }; + static const block vec_tweakin_swap_const = + { ~12, ~13, ~14, ~15, ~8, ~9, ~10, ~11, ~4, ~5, ~6, ~7, ~0, ~1, ~2, ~3 }; +#endif static const unsigned char vec_tweak_const[16] = { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0x87 }; static const vector unsigned long long vec_shift63_const = { 63, 63 }; static const vector unsigned long long vec_shift1_const = { 1, 1 }; - const block bige_const = vec_load_be_const(); + const block bige_const = asm_load_be_const(); RIJNDAEL_context *ctx = context; const u128_t *in = (const u128_t *)inbuf_arg; u128_t *out = (u128_t *)outbuf_arg; int rounds = ctx->rounds; - block tweak_tmp, tweak_next, tweak; - block b0, b1, b2, b3, b4, b5, b6, b7, b, rkey; + block tweak; + block b0, b1, b2, b3, b4, b5, b6, b7, b, rkey, rkeylf; block tweak0, tweak1, tweak2, tweak3, tweak4, tweak5, tweak6, tweak7; block tweak_const, bswap64_const, bswap128_const; vector unsigned long long shift63_const, shift1_const; ROUND_KEY_VARIABLES; - tweak_const = VEC_LOAD_BE (&vec_tweak_const, bige_const); - bswap64_const = ALIGNED_LOAD (&vec_bswap64_const); - bswap128_const = ALIGNED_LOAD (&vec_bswap128_const); - shift63_const = (vector unsigned long long)ALIGNED_LOAD (&vec_shift63_const); - shift1_const = (vector unsigned long long)ALIGNED_LOAD (&vec_shift1_const); + tweak_const = VEC_LOAD_BE (&vec_tweak_const, 0, bige_const); + bswap64_const = ALIGNED_LOAD (&vec_bswap64_const, 0); + bswap128_const = ALIGNED_LOAD (&vec_bswap128_const, 0); + shift63_const = (vector unsigned long long)ALIGNED_LOAD (&vec_shift63_const, 0); + shift1_const = (vector unsigned long long)ALIGNED_LOAD (&vec_shift1_const, 0); - tweak_next = VEC_LOAD_BE (tweak_arg, bige_const); +#ifdef WORDS_BIGENDIAN + tweak = VEC_LOAD_BE (tweak_arg, 0, bige_const); + tweak = asm_vperm1 (tweak, bswap128_const); +#else + tweak = VEC_LOAD_BE (tweak_arg, 0, vec_tweakin_swap_const); +#endif -#define GEN_TWEAK(tweak, tmp) /* Generate next tweak. */ \ - tmp = vec_vperm(tweak, tweak, bswap64_const); \ - tweak = vec_vperm(tweak, tweak, bswap128_const); \ - tmp = (block)(vec_sra((vector unsigned long long)tmp, shift63_const)) & \ - tweak_const; \ - tweak = (block)vec_sl((vector unsigned long long)tweak, shift1_const); \ - tweak = tweak ^ tmp; \ - tweak = vec_vperm(tweak, tweak, bswap128_const); +#define GEN_TWEAK(tout, tin) /* Generate next tweak. */ \ + do { \ + block tmp1, tmp2; \ + tmp1 = asm_vperm1((tin), bswap64_const); \ + tmp2 = (block)vec_sl((vector unsigned long long)(tin), shift1_const); \ + tmp1 = (block)(vec_sra((vector unsigned long long)tmp1, shift63_const)) & \ + tweak_const; \ + tout = asm_xor(tmp1, tmp2); \ + } while (0) if (encrypt) { @@ -1743,42 +2019,70 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, for (; nblocks >= 8; nblocks -= 8) { - tweak0 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak1 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak2 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak3 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak4 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak5 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak6 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak7 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - - b0 = VEC_LOAD_BE (in + 0, bige_const) ^ tweak0 ^ rkey0; - b1 = VEC_LOAD_BE (in + 1, bige_const) ^ tweak1 ^ rkey0; - b2 = VEC_LOAD_BE (in + 2, bige_const) ^ tweak2 ^ rkey0; - b3 = VEC_LOAD_BE (in + 3, bige_const) ^ tweak3 ^ rkey0; - b4 = VEC_LOAD_BE (in + 4, bige_const) ^ tweak4 ^ rkey0; - b5 = VEC_LOAD_BE (in + 5, bige_const) ^ tweak5 ^ rkey0; - b6 = VEC_LOAD_BE (in + 6, bige_const) ^ tweak6 ^ rkey0; - b7 = VEC_LOAD_BE (in + 7, bige_const) ^ tweak7 ^ rkey0; + b0 = VEC_LOAD_BE_NOSWAP (in, 0); + b1 = VEC_LOAD_BE_NOSWAP (in, 1); + b2 = VEC_LOAD_BE_NOSWAP (in, 2); + b3 = VEC_LOAD_BE_NOSWAP (in, 3); + tweak0 = tweak; + GEN_TWEAK (tweak1, tweak0); + tweak0 = asm_vperm1 (tweak0, bswap128_const); + b4 = VEC_LOAD_BE_NOSWAP (in, 4); + b5 = VEC_LOAD_BE_NOSWAP (in, 5); + GEN_TWEAK (tweak2, tweak1); + tweak1 = asm_vperm1 (tweak1, bswap128_const); + b6 = VEC_LOAD_BE_NOSWAP (in, 6); + b7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + + b0 = VEC_BE_SWAP(b0, bige_const); + b1 = VEC_BE_SWAP(b1, bige_const); + GEN_TWEAK (tweak3, tweak2); + tweak2 = asm_vperm1 (tweak2, bswap128_const); + GEN_TWEAK (tweak4, tweak3); + tweak3 = asm_vperm1 (tweak3, bswap128_const); + b2 = VEC_BE_SWAP(b2, bige_const); + b3 = VEC_BE_SWAP(b3, bige_const); + GEN_TWEAK (tweak5, tweak4); + tweak4 = asm_vperm1 (tweak4, bswap128_const); + GEN_TWEAK (tweak6, tweak5); + tweak5 = asm_vperm1 (tweak5, bswap128_const); + b4 = VEC_BE_SWAP(b4, bige_const); + b5 = VEC_BE_SWAP(b5, bige_const); + GEN_TWEAK (tweak7, tweak6); + tweak6 = asm_vperm1 (tweak6, bswap128_const); + GEN_TWEAK (tweak, tweak7); + tweak7 = asm_vperm1 (tweak7, bswap128_const); + b6 = VEC_BE_SWAP(b6, bige_const); + b7 = VEC_BE_SWAP(b7, bige_const); + + tweak0 = asm_xor (tweak0, rkey0); + tweak1 = asm_xor (tweak1, rkey0); + tweak2 = asm_xor (tweak2, rkey0); + tweak3 = asm_xor (tweak3, rkey0); + tweak4 = asm_xor (tweak4, rkey0); + tweak5 = asm_xor (tweak5, rkey0); + tweak6 = asm_xor (tweak6, rkey0); + tweak7 = asm_xor (tweak7, rkey0); + + b0 = asm_xor (b0, tweak0); + b1 = asm_xor (b1, tweak1); + b2 = asm_xor (b2, tweak2); + b3 = asm_xor (b3, tweak3); + b4 = asm_xor (b4, tweak4); + b5 = asm_xor (b5, tweak5); + b6 = asm_xor (b6, tweak6); + b7 = asm_xor (b7, tweak7); #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); \ - b4 = vec_cipher_be (b4, rkey); \ - b5 = vec_cipher_be (b5, rkey); \ - b6 = vec_cipher_be (b6, rkey); \ - b7 = vec_cipher_be (b7, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); \ + b4 = asm_cipher_be (b4, rkey); \ + b5 = asm_cipher_be (b5, rkey); \ + b6 = asm_cipher_be (b6, rkey); \ + b7 = asm_cipher_be (b7, rkey); DO_ROUND(1); DO_ROUND(2); @@ -1787,7 +2091,20 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, DO_ROUND(5); DO_ROUND(6); DO_ROUND(7); + + rkeylf = asm_xor (rkeylast, rkey0); + DO_ROUND(8); + + tweak0 = asm_xor (tweak0, rkeylf); + tweak1 = asm_xor (tweak1, rkeylf); + tweak2 = asm_xor (tweak2, rkeylf); + tweak3 = asm_xor (tweak3, rkeylf); + tweak4 = asm_xor (tweak4, rkeylf); + tweak5 = asm_xor (tweak5, rkeylf); + tweak6 = asm_xor (tweak6, rkeylf); + tweak7 = asm_xor (tweak7, rkeylf); + DO_ROUND(9); if (rounds >= 12) { @@ -1802,51 +2119,62 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, #undef DO_ROUND - rkey = rkeylast; - b0 = vec_cipherlast_be (b0, rkey ^ tweak0); - b1 = vec_cipherlast_be (b1, rkey ^ tweak1); - b2 = vec_cipherlast_be (b2, rkey ^ tweak2); - b3 = vec_cipherlast_be (b3, rkey ^ tweak3); - b4 = vec_cipherlast_be (b4, rkey ^ tweak4); - b5 = vec_cipherlast_be (b5, rkey ^ tweak5); - b6 = vec_cipherlast_be (b6, rkey ^ tweak6); - b7 = vec_cipherlast_be (b7, rkey ^ tweak7); - - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); - VEC_STORE_BE (out + 4, b4, bige_const); - VEC_STORE_BE (out + 5, b5, bige_const); - VEC_STORE_BE (out + 6, b6, bige_const); - VEC_STORE_BE (out + 7, b7, bige_const); - - in += 8; + b0 = asm_cipherlast_be (b0, tweak0); + b1 = asm_cipherlast_be (b1, tweak1); + b2 = asm_cipherlast_be (b2, tweak2); + b3 = asm_cipherlast_be (b3, tweak3); + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b4 = asm_cipherlast_be (b4, tweak4); + b5 = asm_cipherlast_be (b5, tweak5); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b6 = asm_cipherlast_be (b6, tweak6); + b7 = asm_cipherlast_be (b7, tweak7); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); out += 8; } if (nblocks >= 4) { - tweak0 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak1 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak2 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak3 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - - b0 = VEC_LOAD_BE (in + 0, bige_const) ^ tweak0 ^ rkey0; - b1 = VEC_LOAD_BE (in + 1, bige_const) ^ tweak1 ^ rkey0; - b2 = VEC_LOAD_BE (in + 2, bige_const) ^ tweak2 ^ rkey0; - b3 = VEC_LOAD_BE (in + 3, bige_const) ^ tweak3 ^ rkey0; + tweak0 = tweak; + GEN_TWEAK (tweak1, tweak0); + GEN_TWEAK (tweak2, tweak1); + GEN_TWEAK (tweak3, tweak2); + GEN_TWEAK (tweak, tweak3); + + b0 = VEC_LOAD_BE (in, 0, bige_const); + b1 = VEC_LOAD_BE (in, 1, bige_const); + b2 = VEC_LOAD_BE (in, 2, bige_const); + b3 = VEC_LOAD_BE (in, 3, bige_const); + + tweak0 = asm_vperm1 (tweak0, bswap128_const); + tweak1 = asm_vperm1 (tweak1, bswap128_const); + tweak2 = asm_vperm1 (tweak2, bswap128_const); + tweak3 = asm_vperm1 (tweak3, bswap128_const); + + b0 ^= tweak0 ^ rkey0; + b1 ^= tweak1 ^ rkey0; + b2 ^= tweak2 ^ rkey0; + b3 ^= tweak3 ^ rkey0; #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_cipher_be (b0, rkey); \ - b1 = vec_cipher_be (b1, rkey); \ - b2 = vec_cipher_be (b2, rkey); \ - b3 = vec_cipher_be (b3, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); @@ -1871,15 +2199,15 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, #undef DO_ROUND rkey = rkeylast; - b0 = vec_cipherlast_be (b0, rkey ^ tweak0); - b1 = vec_cipherlast_be (b1, rkey ^ tweak1); - b2 = vec_cipherlast_be (b2, rkey ^ tweak2); - b3 = vec_cipherlast_be (b3, rkey ^ tweak3); + b0 = asm_cipherlast_be (b0, rkey ^ tweak0); + b1 = asm_cipherlast_be (b1, rkey ^ tweak1); + b2 = asm_cipherlast_be (b2, rkey ^ tweak2); + b3 = asm_cipherlast_be (b3, rkey ^ tweak3); - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); in += 4; out += 4; @@ -1888,18 +2216,18 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, for (; nblocks; nblocks--) { - tweak = tweak_next; + tweak0 = asm_vperm1 (tweak, bswap128_const); /* Xor-Encrypt/Decrypt-Xor block. */ - b = VEC_LOAD_BE (in, bige_const) ^ tweak; + b = VEC_LOAD_BE (in, 0, bige_const) ^ tweak0; /* Generate next tweak. */ - GEN_TWEAK (tweak_next, tweak_tmp); + GEN_TWEAK (tweak, tweak); AES_ENCRYPT (b, rounds); - b ^= tweak; - VEC_STORE_BE (out, b, bige_const); + b ^= tweak0; + VEC_STORE_BE (out, 0, b, bige_const); in++; out++; @@ -1919,42 +2247,70 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, for (; nblocks >= 8; nblocks -= 8) { - tweak0 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak1 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak2 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak3 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak4 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak5 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak6 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak7 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - - b0 = VEC_LOAD_BE (in + 0, bige_const) ^ tweak0 ^ rkey0; - b1 = VEC_LOAD_BE (in + 1, bige_const) ^ tweak1 ^ rkey0; - b2 = VEC_LOAD_BE (in + 2, bige_const) ^ tweak2 ^ rkey0; - b3 = VEC_LOAD_BE (in + 3, bige_const) ^ tweak3 ^ rkey0; - b4 = VEC_LOAD_BE (in + 4, bige_const) ^ tweak4 ^ rkey0; - b5 = VEC_LOAD_BE (in + 5, bige_const) ^ tweak5 ^ rkey0; - b6 = VEC_LOAD_BE (in + 6, bige_const) ^ tweak6 ^ rkey0; - b7 = VEC_LOAD_BE (in + 7, bige_const) ^ tweak7 ^ rkey0; + b0 = VEC_LOAD_BE_NOSWAP (in, 0); + b1 = VEC_LOAD_BE_NOSWAP (in, 1); + b2 = VEC_LOAD_BE_NOSWAP (in, 2); + b3 = VEC_LOAD_BE_NOSWAP (in, 3); + tweak0 = tweak; + GEN_TWEAK (tweak1, tweak0); + tweak0 = asm_vperm1 (tweak0, bswap128_const); + b4 = VEC_LOAD_BE_NOSWAP (in, 4); + b5 = VEC_LOAD_BE_NOSWAP (in, 5); + GEN_TWEAK (tweak2, tweak1); + tweak1 = asm_vperm1 (tweak1, bswap128_const); + b6 = VEC_LOAD_BE_NOSWAP (in, 6); + b7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + + b0 = VEC_BE_SWAP(b0, bige_const); + b1 = VEC_BE_SWAP(b1, bige_const); + GEN_TWEAK (tweak3, tweak2); + tweak2 = asm_vperm1 (tweak2, bswap128_const); + GEN_TWEAK (tweak4, tweak3); + tweak3 = asm_vperm1 (tweak3, bswap128_const); + b2 = VEC_BE_SWAP(b2, bige_const); + b3 = VEC_BE_SWAP(b3, bige_const); + GEN_TWEAK (tweak5, tweak4); + tweak4 = asm_vperm1 (tweak4, bswap128_const); + GEN_TWEAK (tweak6, tweak5); + tweak5 = asm_vperm1 (tweak5, bswap128_const); + b4 = VEC_BE_SWAP(b4, bige_const); + b5 = VEC_BE_SWAP(b5, bige_const); + GEN_TWEAK (tweak7, tweak6); + tweak6 = asm_vperm1 (tweak6, bswap128_const); + GEN_TWEAK (tweak, tweak7); + tweak7 = asm_vperm1 (tweak7, bswap128_const); + b6 = VEC_BE_SWAP(b6, bige_const); + b7 = VEC_BE_SWAP(b7, bige_const); + + tweak0 = asm_xor (tweak0, rkey0); + tweak1 = asm_xor (tweak1, rkey0); + tweak2 = asm_xor (tweak2, rkey0); + tweak3 = asm_xor (tweak3, rkey0); + tweak4 = asm_xor (tweak4, rkey0); + tweak5 = asm_xor (tweak5, rkey0); + tweak6 = asm_xor (tweak6, rkey0); + tweak7 = asm_xor (tweak7, rkey0); + + b0 = asm_xor (b0, tweak0); + b1 = asm_xor (b1, tweak1); + b2 = asm_xor (b2, tweak2); + b3 = asm_xor (b3, tweak3); + b4 = asm_xor (b4, tweak4); + b5 = asm_xor (b5, tweak5); + b6 = asm_xor (b6, tweak6); + b7 = asm_xor (b7, tweak7); #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_ncipher_be (b0, rkey); \ - b1 = vec_ncipher_be (b1, rkey); \ - b2 = vec_ncipher_be (b2, rkey); \ - b3 = vec_ncipher_be (b3, rkey); \ - b4 = vec_ncipher_be (b4, rkey); \ - b5 = vec_ncipher_be (b5, rkey); \ - b6 = vec_ncipher_be (b6, rkey); \ - b7 = vec_ncipher_be (b7, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); \ + b4 = asm_ncipher_be (b4, rkey); \ + b5 = asm_ncipher_be (b5, rkey); \ + b6 = asm_ncipher_be (b6, rkey); \ + b7 = asm_ncipher_be (b7, rkey); DO_ROUND(1); DO_ROUND(2); @@ -1963,7 +2319,20 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, DO_ROUND(5); DO_ROUND(6); DO_ROUND(7); + + rkeylf = asm_xor (rkeylast, rkey0); + DO_ROUND(8); + + tweak0 = asm_xor (tweak0, rkeylf); + tweak1 = asm_xor (tweak1, rkeylf); + tweak2 = asm_xor (tweak2, rkeylf); + tweak3 = asm_xor (tweak3, rkeylf); + tweak4 = asm_xor (tweak4, rkeylf); + tweak5 = asm_xor (tweak5, rkeylf); + tweak6 = asm_xor (tweak6, rkeylf); + tweak7 = asm_xor (tweak7, rkeylf); + DO_ROUND(9); if (rounds >= 12) { @@ -1978,51 +2347,62 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, #undef DO_ROUND - rkey = rkeylast; - b0 = vec_ncipherlast_be (b0, rkey ^ tweak0); - b1 = vec_ncipherlast_be (b1, rkey ^ tweak1); - b2 = vec_ncipherlast_be (b2, rkey ^ tweak2); - b3 = vec_ncipherlast_be (b3, rkey ^ tweak3); - b4 = vec_ncipherlast_be (b4, rkey ^ tweak4); - b5 = vec_ncipherlast_be (b5, rkey ^ tweak5); - b6 = vec_ncipherlast_be (b6, rkey ^ tweak6); - b7 = vec_ncipherlast_be (b7, rkey ^ tweak7); - - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); - VEC_STORE_BE (out + 4, b4, bige_const); - VEC_STORE_BE (out + 5, b5, bige_const); - VEC_STORE_BE (out + 6, b6, bige_const); - VEC_STORE_BE (out + 7, b7, bige_const); - - in += 8; + b0 = asm_ncipherlast_be (b0, tweak0); + b1 = asm_ncipherlast_be (b1, tweak1); + b2 = asm_ncipherlast_be (b2, tweak2); + b3 = asm_ncipherlast_be (b3, tweak3); + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b4 = asm_ncipherlast_be (b4, tweak4); + b5 = asm_ncipherlast_be (b5, tweak5); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b6 = asm_ncipherlast_be (b6, tweak6); + b7 = asm_ncipherlast_be (b7, tweak7); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); out += 8; } if (nblocks >= 4) { - tweak0 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak1 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak2 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - tweak3 = tweak_next; - GEN_TWEAK (tweak_next, tweak_tmp); - - b0 = VEC_LOAD_BE (in + 0, bige_const) ^ tweak0 ^ rkey0; - b1 = VEC_LOAD_BE (in + 1, bige_const) ^ tweak1 ^ rkey0; - b2 = VEC_LOAD_BE (in + 2, bige_const) ^ tweak2 ^ rkey0; - b3 = VEC_LOAD_BE (in + 3, bige_const) ^ tweak3 ^ rkey0; + tweak0 = tweak; + GEN_TWEAK (tweak1, tweak0); + GEN_TWEAK (tweak2, tweak1); + GEN_TWEAK (tweak3, tweak2); + GEN_TWEAK (tweak, tweak3); + + b0 = VEC_LOAD_BE (in, 0, bige_const); + b1 = VEC_LOAD_BE (in, 1, bige_const); + b2 = VEC_LOAD_BE (in, 2, bige_const); + b3 = VEC_LOAD_BE (in, 3, bige_const); + + tweak0 = asm_vperm1 (tweak0, bswap128_const); + tweak1 = asm_vperm1 (tweak1, bswap128_const); + tweak2 = asm_vperm1 (tweak2, bswap128_const); + tweak3 = asm_vperm1 (tweak3, bswap128_const); + + b0 ^= tweak0 ^ rkey0; + b1 ^= tweak1 ^ rkey0; + b2 ^= tweak2 ^ rkey0; + b3 ^= tweak3 ^ rkey0; #define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (&rk[r]); \ - b0 = vec_ncipher_be (b0, rkey); \ - b1 = vec_ncipher_be (b1, rkey); \ - b2 = vec_ncipher_be (b2, rkey); \ - b3 = vec_ncipher_be (b3, rkey); + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); DO_ROUND(1); DO_ROUND(2); @@ -2047,15 +2427,15 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, #undef DO_ROUND rkey = rkeylast; - b0 = vec_ncipherlast_be (b0, rkey ^ tweak0); - b1 = vec_ncipherlast_be (b1, rkey ^ tweak1); - b2 = vec_ncipherlast_be (b2, rkey ^ tweak2); - b3 = vec_ncipherlast_be (b3, rkey ^ tweak3); + b0 = asm_ncipherlast_be (b0, rkey ^ tweak0); + b1 = asm_ncipherlast_be (b1, rkey ^ tweak1); + b2 = asm_ncipherlast_be (b2, rkey ^ tweak2); + b3 = asm_ncipherlast_be (b3, rkey ^ tweak3); - VEC_STORE_BE (out + 0, b0, bige_const); - VEC_STORE_BE (out + 1, b1, bige_const); - VEC_STORE_BE (out + 2, b2, bige_const); - VEC_STORE_BE (out + 3, b3, bige_const); + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); in += 4; out += 4; @@ -2064,25 +2444,30 @@ void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, for (; nblocks; nblocks--) { - tweak = tweak_next; + tweak0 = asm_vperm1 (tweak, bswap128_const); /* Xor-Encrypt/Decrypt-Xor block. */ - b = VEC_LOAD_BE (in, bige_const) ^ tweak; + b = VEC_LOAD_BE (in, 0, bige_const) ^ tweak0; /* Generate next tweak. */ - GEN_TWEAK (tweak_next, tweak_tmp); + GEN_TWEAK (tweak, tweak); AES_DECRYPT (b, rounds); - b ^= tweak; - VEC_STORE_BE (out, b, bige_const); + b ^= tweak0; + VEC_STORE_BE (out, 0, b, bige_const); in++; out++; } } - VEC_STORE_BE (tweak_arg, tweak_next, bige_const); +#ifdef WORDS_BIGENDIAN + tweak = asm_vperm1 (tweak, bswap128_const); + VEC_STORE_BE (tweak_arg, 0, tweak, bige_const); +#else + VEC_STORE_BE (tweak_arg, 0, tweak, vec_tweakin_swap_const); +#endif #undef GEN_TWEAK } diff --git a/cipher/serpent-avx2-amd64.S b/cipher/serpent-avx2-amd64.S index 9b17c2bd..dcee9b62 100644 --- a/cipher/serpent-avx2-amd64.S +++ b/cipher/serpent-avx2-amd64.S @@ -18,8 +18,9 @@ * License along with this program; if not, see . */ -#ifdef __x86_64 #include + +#ifdef __x86_64 #if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SERPENT) && \ defined(ENABLE_AVX2_SUPPORT) diff --git a/configure.ac b/configure.ac index 4d4fb49a..f31b7558 100644 --- a/configure.ac +++ b/configure.ac @@ -97,6 +97,12 @@ AH_TOP([ AH_BOTTOM([ #define _GCRYPT_IN_LIBGCRYPT 1 +/* Add .note.gnu.property section for Intel CET in assembler sources + when CET is enabled. */ +#if defined(__ASSEMBLER__) && defined(__CET__) +# include +#endif + /* If the configure check for endianness has been disabled, get it from OS macros. This is intended for making fat binary builds on OS X. */ #ifdef DISABLED_ENDIAN_CHECK @@ -834,7 +840,6 @@ AC_TYPE_PID_T GNUPG_CHECK_TYPEDEF(byte, HAVE_BYTE_TYPEDEF) GNUPG_CHECK_TYPEDEF(ushort, HAVE_USHORT_TYPEDEF) -GNUPG_CHECK_TYPEDEF(ulong, HAVE_ULONG_TYPEDEF) GNUPG_CHECK_TYPEDEF(u16, HAVE_U16_TYPEDEF) GNUPG_CHECK_TYPEDEF(u32, HAVE_U32_TYPEDEF) @@ -2546,7 +2551,8 @@ LIST_MEMBER(ecc, $enabled_pubkey_ciphers) if test "$found" = "1" ; then GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS \ ecc.lo ecc-curves.lo ecc-misc.lo \ - ecc-ecdh.lo ecc-ecdsa.lo ecc-eddsa.lo ecc-gost.lo" + ecc-ecdh.lo ecc-ecdsa.lo ecc-eddsa.lo ecc-gost.lo \ + ecc-sm2.lo" AC_DEFINE(USE_ECC, 1, [Defined if this module should be included]) fi diff --git a/doc/gcrypt.texi b/doc/gcrypt.texi index d7bfa4c2..091704de 100644 --- a/doc/gcrypt.texi +++ b/doc/gcrypt.texi @@ -2824,7 +2824,7 @@ Return the number of elements a signature created with the algorithm @var{algo} consists of. Return 0 for an unknown algorithm or for an algorithm not capable of creating signatures. -@item GCRYCTL_GET_ALGO_NENC +@item GCRYCTL_GET_ALGO_NENCR Return the number of elements a encrypted message created with the algorithm @var{algo} consists of. Return 0 for an unknown algorithm or for an algorithm not capable of encryption. diff --git a/mpi/config.links b/mpi/config.links index 3ead4f08..4f43b732 100644 --- a/mpi/config.links +++ b/mpi/config.links @@ -382,6 +382,16 @@ if test x"$mpi_cpu_arch" = x ; then mpi_cpu_arch="unknown" fi +# Add .note.gnu.property section for Intel CET in assembler sources +# when CET is enabled. */ +if test x"$mpi_cpu_arch" = xx86 ; then + cat <> ./mpi/asm-syntax.h + +#if defined(__ASSEMBLER__) && defined(__CET__) +# include +#endif +EOF +fi # Make sysdep.h echo '/* created by config.links - do not edit */' >./mpi/sysdep.h diff --git a/mpi/i386/mpih-add1.S b/mpi/i386/mpih-add1.S index 32091f34..de78a0cb 100644 --- a/mpi/i386/mpih-add1.S +++ b/mpi/i386/mpih-add1.S @@ -55,6 +55,11 @@ C_SYMBOL_NAME(_gcry_mpih_add_n:) movl 20(%esp),%edx /* s2_ptr */ movl 24(%esp),%ecx /* size */ +#if defined __CET__ && (__CET__ & 1) != 0 + pushl %ebx + CFI_PUSH(%ebx) +#endif + movl %ecx,%eax shrl $3,%ecx /* compute count for unrolled loop */ negl %eax @@ -66,41 +71,70 @@ C_SYMBOL_NAME(_gcry_mpih_add_n:) subl %eax,%esi /* ... by a constant when we ... */ subl %eax,%edx /* ... enter the loop */ shrl $2,%eax /* restore previous value */ +#if defined __CET__ && (__CET__ & 1) != 0 + leal -4(,%eax,4),%ebx /* Count for 4-byte endbr32 */ +#endif #ifdef PIC /* Calculate start address in loop for PIC. Due to limitations in some assemblers, Loop-L0-3 cannot be put into the leal */ call L0 + CFI_ADJUST_CFA_OFFSET(4) L0: leal (%eax,%eax,8),%eax addl (%esp),%eax addl $(Loop-L0-3),%eax addl $4,%esp + CFI_ADJUST_CFA_OFFSET(-4) #else /* Calculate start address in loop for non-PIC. */ leal (Loop - 3)(%eax,%eax,8),%eax +#endif +#if defined __CET__ && (__CET__ & 1) != 0 + addl %ebx,%eax /* Adjust for endbr32 */ #endif jmp *%eax /* jump into loop */ ALIGN (3) Loop: movl (%esi),%eax adcl (%edx),%eax movl %eax,(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 4(%esi),%eax adcl 4(%edx),%eax movl %eax,4(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 8(%esi),%eax adcl 8(%edx),%eax movl %eax,8(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 12(%esi),%eax adcl 12(%edx),%eax movl %eax,12(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 16(%esi),%eax adcl 16(%edx),%eax movl %eax,16(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 20(%esi),%eax adcl 20(%edx),%eax movl %eax,20(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 24(%esi),%eax adcl 24(%edx),%eax movl %eax,24(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 28(%esi),%eax adcl 28(%edx),%eax movl %eax,28(%edi) @@ -113,6 +147,11 @@ Loop: movl (%esi),%eax sbbl %eax,%eax negl %eax +#if defined __CET__ && (__CET__ & 1) != 0 + popl %ebx + CFI_POP(%ebx) +#endif + popl %esi CFI_POP(%esi) popl %edi diff --git a/mpi/i386/mpih-sub1.S b/mpi/i386/mpih-sub1.S index 501c4a9f..2bdc1438 100644 --- a/mpi/i386/mpih-sub1.S +++ b/mpi/i386/mpih-sub1.S @@ -56,6 +56,11 @@ C_SYMBOL_NAME(_gcry_mpih_sub_n:) movl 20(%esp),%edx /* s2_ptr */ movl 24(%esp),%ecx /* size */ +#if defined __CET__ && (__CET__ & 1) != 0 + pushl %ebx + CFI_PUSH(%ebx) +#endif + movl %ecx,%eax shrl $3,%ecx /* compute count for unrolled loop */ negl %eax @@ -67,41 +72,70 @@ C_SYMBOL_NAME(_gcry_mpih_sub_n:) subl %eax,%esi /* ... by a constant when we ... */ subl %eax,%edx /* ... enter the loop */ shrl $2,%eax /* restore previous value */ +#if defined __CET__ && (__CET__ & 1) != 0 + leal -4(,%eax,4),%ebx /* Count for 4-byte endbr32 */ +#endif #ifdef PIC /* Calculate start address in loop for PIC. Due to limitations in some assemblers, Loop-L0-3 cannot be put into the leal */ call L0 + CFI_ADJUST_CFA_OFFSET(4) L0: leal (%eax,%eax,8),%eax addl (%esp),%eax addl $(Loop-L0-3),%eax addl $4,%esp + CFI_ADJUST_CFA_OFFSET(-4) #else /* Calculate start address in loop for non-PIC. */ leal (Loop - 3)(%eax,%eax,8),%eax +#endif +#if defined __CET__ && (__CET__ & 1) != 0 + addl %ebx,%eax /* Adjust for endbr32 */ #endif jmp *%eax /* jump into loop */ ALIGN (3) Loop: movl (%esi),%eax sbbl (%edx),%eax movl %eax,(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 4(%esi),%eax sbbl 4(%edx),%eax movl %eax,4(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 8(%esi),%eax sbbl 8(%edx),%eax movl %eax,8(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 12(%esi),%eax sbbl 12(%edx),%eax movl %eax,12(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 16(%esi),%eax sbbl 16(%edx),%eax movl %eax,16(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 20(%esi),%eax sbbl 20(%edx),%eax movl %eax,20(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 24(%esi),%eax sbbl 24(%edx),%eax movl %eax,24(%edi) +#ifdef _CET_ENDBR + _CET_ENDBR +#endif movl 28(%esi),%eax sbbl 28(%edx),%eax movl %eax,28(%edi) @@ -114,6 +148,11 @@ Loop: movl (%esi),%eax sbbl %eax,%eax negl %eax +#if defined __CET__ && (__CET__ & 1) != 0 + popl %ebx + CFI_POP(%ebx) +#endif + popl %esi CFI_POP(%esi) popl %edi diff --git a/mpi/mpi-div.c b/mpi/mpi-div.c index 9ac99c31..eb621fe4 100644 --- a/mpi/mpi-div.c +++ b/mpi/mpi-div.c @@ -64,8 +64,9 @@ _gcry_mpi_fdiv_r( gcry_mpi_t rem, gcry_mpi_t dividend, gcry_mpi_t divisor ) * rem is optional */ -ulong -_gcry_mpi_fdiv_r_ui( gcry_mpi_t rem, gcry_mpi_t dividend, ulong divisor ) +unsigned long +_gcry_mpi_fdiv_r_ui( gcry_mpi_t rem, gcry_mpi_t dividend, + unsigned long divisor ) { mpi_limb_t rlimb; @@ -321,7 +322,7 @@ _gcry_mpi_tdiv_q_2exp( gcry_mpi_t w, gcry_mpi_t u, unsigned int count ) * (note: divisor must fit into a limb) */ int -_gcry_mpi_divisible_ui(gcry_mpi_t dividend, ulong divisor ) +_gcry_mpi_divisible_ui(gcry_mpi_t dividend, unsigned long divisor ) { return !_gcry_mpih_mod_1( dividend->d, dividend->nlimbs, divisor ); } diff --git a/random/random-drbg.c b/random/random-drbg.c index e0b4230e..6124f5fb 100644 --- a/random/random-drbg.c +++ b/random/random-drbg.c @@ -146,12 +146,12 @@ * gcry_randomize(outbuf, OUTLEN, GCRY_STRONG_RANDOM); */ +#include + #include #include #include -#include - #include "g10lib.h" #include "random.h" #include "rand-internal.h" diff --git a/random/rndunix.c b/random/rndunix.c index fcb45b78..aff2f85d 100644 --- a/random/rndunix.c +++ b/random/rndunix.c @@ -894,7 +894,7 @@ _gcry_rndunix_gather_random (void (*add)(const void*, size_t, /* now read from the gatherer */ while( length ) { int goodness; - ulong subtract; + unsigned long subtract; if( read_a_msg( pipedes[0], &msg ) ) { log_error("reading from gatherer pipe failed: %s\n", @@ -928,7 +928,7 @@ _gcry_rndunix_gather_random (void (*add)(const void*, size_t, (*add)( msg.data, n, origin ); /* this is the trick how we cope with the goodness */ - subtract = (ulong)n * goodness / 100; + subtract = (unsigned long)n * goodness / 100; /* subtract at least 1 byte to avoid infinite loops */ length -= subtract ? subtract : 1; } diff --git a/random/rndw32.c b/random/rndw32.c index 08a8867d..b3f63d20 100644 --- a/random/rndw32.c +++ b/random/rndw32.c @@ -845,10 +845,10 @@ _gcry_rndw32_gather_random_fast (void (*add)(const void*, size_t, We discard the upper 32-bit of those values. */ { - byte buffer[20*sizeof(ulong)], *bufptr; + byte buffer[20*sizeof(unsigned long)], *bufptr; bufptr = buffer; -#define ADDINT(f) do { ulong along = (ulong)(f); \ +#define ADDINT(f) do { unsigned long along = (unsigned long)(f); \ memcpy (bufptr, &along, sizeof (along) ); \ bufptr += sizeof (along); \ } while (0) diff --git a/random/rndw32ce.c b/random/rndw32ce.c index b485eef2..873e8460 100644 --- a/random/rndw32ce.c +++ b/random/rndw32ce.c @@ -115,7 +115,7 @@ _gcry_rndw32ce_gather_random_fast (void (*add)(const void*, size_t, memcpy (bufptr, &along, sizeof (along)); \ bufptr += sizeof (along); \ } while (0) - unsigned char buffer[20*sizeof(ulong)], *bufptr; + unsigned char buffer[20*sizeof(unsigned long)], *bufptr; bufptr = buffer; ADD (HWND, GetActiveWindow ()); diff --git a/src/cipher.h b/src/cipher.h index 5aac19f1..55eedb47 100644 --- a/src/cipher.h +++ b/src/cipher.h @@ -42,6 +42,7 @@ #define PUBKEY_FLAG_GOST (1 << 13) #define PUBKEY_FLAG_NO_KEYTEST (1 << 14) #define PUBKEY_FLAG_DJB_TWEAK (1 << 15) +#define PUBKEY_FLAG_SM2 (1 << 16) enum pk_operation @@ -76,7 +77,7 @@ struct pk_encoding_ctx unsigned char *label; size_t labellen; - /* for PSS */ + /* for PSS or GOST (UKM length in bits)*/ size_t saltlen; int (* verify_cmp) (void *opaque, gcry_mpi_t tmp); diff --git a/src/global.c b/src/global.c index d82c680a..be65df54 100644 --- a/src/global.c +++ b/src/global.c @@ -261,7 +261,8 @@ _gcry_check_version (const char *req_version) /* Compare version numbers. */ if ( my_major > rq_major || (my_major == rq_major && my_minor > rq_minor) - || (my_major == rq_major && my_minor == rq_minor && my_micro > rq_micro) + || (my_major == rq_major && my_minor == rq_minor + && my_micro > rq_micro) || (my_major == rq_major && my_minor == rq_minor && my_micro == rq_micro)) { diff --git a/src/mpi.h b/src/mpi.h index c342ff48..39312fc3 100644 --- a/src/mpi.h +++ b/src/mpi.h @@ -182,14 +182,14 @@ gpg_err_code_t _gcry_mpi_to_octet_string (unsigned char **r_frame, #define mpi_tdiv_q_2exp(a,b,c) _gcry_mpi_tdiv_q_2exp((a),(b),(c)) #define mpi_divisible_ui(a,b) _gcry_mpi_divisible_ui((a),(b)) -ulong _gcry_mpi_fdiv_r_ui( gcry_mpi_t rem, gcry_mpi_t dividend, ulong divisor ); +unsigned long _gcry_mpi_fdiv_r_ui( gcry_mpi_t rem, gcry_mpi_t dividend, unsigned long divisor ); void _gcry_mpi_fdiv_r( gcry_mpi_t rem, gcry_mpi_t dividend, gcry_mpi_t divisor ); void _gcry_mpi_fdiv_q( gcry_mpi_t quot, gcry_mpi_t dividend, gcry_mpi_t divisor ); void _gcry_mpi_fdiv_qr( gcry_mpi_t quot, gcry_mpi_t rem, gcry_mpi_t dividend, gcry_mpi_t divisor ); void _gcry_mpi_tdiv_r( gcry_mpi_t rem, gcry_mpi_t num, gcry_mpi_t den); void _gcry_mpi_tdiv_qr( gcry_mpi_t quot, gcry_mpi_t rem, gcry_mpi_t num, gcry_mpi_t den); void _gcry_mpi_tdiv_q_2exp( gcry_mpi_t w, gcry_mpi_t u, unsigned count ); -int _gcry_mpi_divisible_ui(gcry_mpi_t dividend, ulong divisor ); +int _gcry_mpi_divisible_ui(gcry_mpi_t dividend, unsigned long divisor ); /*-- mpi-mod.c --*/ diff --git a/src/sexp.c b/src/sexp.c index 57d77d29..17341ba5 100644 --- a/src/sexp.c +++ b/src/sexp.c @@ -1114,7 +1114,7 @@ do_vsexp_sscan (gcry_sexp_t *retsexp, size_t *erroff, int hexcount = 0; int b64count = 0; int quoted_esc = 0; - int datalen = 0; + size_t datalen = 0; size_t dummy_erroff; struct make_space_ctx c; int arg_counter = 0; @@ -1354,7 +1354,7 @@ do_vsexp_sscan (gcry_sexp_t *retsexp, size_t *erroff, goto leave; } err = gpgrt_b64dec_proc (b64state, b64buf, b64count, - (size_t *)&datalen); + &datalen); if (err && gpg_err_code (err) != GPG_ERR_EOF) { xfree (b64state); diff --git a/src/types.h b/src/types.h index 645ddd62..39393be1 100644 --- a/src/types.h +++ b/src/types.h @@ -70,12 +70,6 @@ # define HAVE_USHORT_TYPEDEF #endif -#ifndef HAVE_ULONG_TYPEDEF -# undef ulong /* In case there is a macro with that name. */ - typedef unsigned long ulong; -# define HAVE_ULONG_TYPEDEF -#endif - #ifndef HAVE_U16_TYPEDEF # undef u16 /* In case there is a macro with that name. */ # if SIZEOF_UNSIGNED_INT == 2 diff --git a/tests/basic.c b/tests/basic.c index 8337bcfb..812bd89d 100644 --- a/tests/basic.c +++ b/tests/basic.c @@ -33,6 +33,10 @@ #define PGM "basic" #include "t-common.h" +#if __GNUC__ >= 4 +# define ALWAYS_INLINE __attribute__((always_inline)) +#endif + typedef struct test_spec_pubkey_key { const char *secret; @@ -191,7 +195,7 @@ show_mac_not_available (int algo) -void +static void progress_handler (void *cb_data, const char *what, int printchar, int current, int total) { @@ -207,6 +211,239 @@ progress_handler (void *cb_data, const char *what, int printchar, fflush (stdout); } + +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define CLUTTER_VECTOR_REGISTER_AMD64 1 +# define CLUTTER_VECTOR_REGISTER_COUNT 16 +#elif defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4 && __GNUC__ >= 4 && \ + defined(HAVE_GCC_INLINE_ASM_SSSE3) +# define CLUTTER_VECTOR_REGISTER_I386 1 +# define CLUTTER_VECTOR_REGISTER_COUNT 8 +#elif defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_AARCH64_NEON) && \ + (defined(__ARM_FEATURE_SIMD32) || defined(__ARM_NEON)) +# define CLUTTER_VECTOR_REGISTER_AARCH64 1 +# define CLUTTER_VECTOR_REGISTER_COUNT 32 +#elif defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_NEON) && \ + (defined(__ARM_FEATURE_SIMD32) || defined(__ARM_NEON)) +# define CLUTTER_VECTOR_REGISTER_NEON 1 +# define CLUTTER_VECTOR_REGISTER_COUNT 16 +#endif + + +#ifdef CLUTTER_VECTOR_REGISTER_COUNT +static void +prepare_vector_data(unsigned char data[CLUTTER_VECTOR_REGISTER_COUNT][16]) +{ + static unsigned char basedata[16] = + { + 0xd7, 0xfe, 0x5c, 0x4b, 0x58, 0xfe, 0xf4, 0xb6, + 0xed, 0x2f, 0x31, 0xc9, 0x1d, 0xd3, 0x62, 0x8d + }; + int j, i; + + for (i = 0; i < CLUTTER_VECTOR_REGISTER_COUNT; i++) + { + for (j = 0; j < 16; j++) + { + data[i][j] = basedata[(i + j) % 16]; + } + + for (j = 0; j < 16; j++) + { + basedata[j] -= j; + } + } +} +#endif + + +static inline ALWAYS_INLINE void +clutter_vector_registers(void) +{ +#ifdef CLUTTER_VECTOR_REGISTER_COUNT + unsigned char data[CLUTTER_VECTOR_REGISTER_COUNT][16]; +#if defined(CLUTTER_VECTOR_REGISTER_AARCH64) || \ + defined(CLUTTER_VECTOR_REGISTER_NEON) + static int init; + static int have_neon; + + if (!init) + { + char *string; + + string = gcry_get_config (0, "hwflist"); + if (string) + { + have_neon = (strstr(string, "arm-neon:") != NULL); + xfree(string); + } + init = 1; + } + + if (!have_neon) + return; +#elif defined(CLUTTER_VECTOR_REGISTER_I386) + static int init; + static int have_ssse3; + + if (!init) + { + char *string; + + string = gcry_get_config (0, "hwflist"); + if (string) + { + have_ssse3 = (strstr(string, "intel-ssse3:") != NULL); + xfree(string); + } + init = 1; + } + + if (!have_ssse3) + return; +#endif + + prepare_vector_data(data); + +#if defined(CLUTTER_VECTOR_REGISTER_AMD64) + asm volatile("movdqu %[data0], %%xmm0\n" + "movdqu %[data1], %%xmm1\n" + "movdqu %[data2], %%xmm2\n" + "movdqu %[data3], %%xmm3\n" + "movdqu %[data4], %%xmm4\n" + "movdqu %[data5], %%xmm5\n" + "movdqu %[data6], %%xmm6\n" + "movdqu %[data7], %%xmm7\n" + "movdqu %[data8], %%xmm8\n" + "movdqu %[data9], %%xmm9\n" + "movdqu %[data10], %%xmm10\n" + "movdqu %[data11], %%xmm11\n" + "movdqu %[data12], %%xmm12\n" + "movdqu %[data13], %%xmm13\n" + "movdqu %[data14], %%xmm14\n" + "movdqu %[data15], %%xmm15\n" + : + : [data0] "m" (*data[0]), + [data1] "m" (*data[1]), + [data2] "m" (*data[2]), + [data3] "m" (*data[3]), + [data4] "m" (*data[4]), + [data5] "m" (*data[5]), + [data6] "m" (*data[6]), + [data7] "m" (*data[7]), + [data8] "m" (*data[8]), + [data9] "m" (*data[9]), + [data10] "m" (*data[10]), + [data11] "m" (*data[11]), + [data12] "m" (*data[12]), + [data13] "m" (*data[13]), + [data14] "m" (*data[14]), + [data15] "m" (*data[15]) + : "memory" +#ifdef __SSE2__ + ,"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", + "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", + "xmm15" +#endif + ); +#elif defined(CLUTTER_VECTOR_REGISTER_I386) + asm volatile("movdqu %[data0], %%xmm0\n" + "movdqu %[data1], %%xmm1\n" + "movdqu %[data2], %%xmm2\n" + "movdqu %[data3], %%xmm3\n" + "movdqu %[data4], %%xmm4\n" + "movdqu %[data5], %%xmm5\n" + "movdqu %[data6], %%xmm6\n" + "movdqu %[data7], %%xmm7\n" + : + : [data0] "m" (*data[0]), + [data1] "m" (*data[1]), + [data2] "m" (*data[2]), + [data3] "m" (*data[3]), + [data4] "m" (*data[4]), + [data5] "m" (*data[5]), + [data6] "m" (*data[6]), + [data7] "m" (*data[7]) + : "memory" +#ifdef __SSE2__ + ,"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" +#endif + ); +#elif defined(CLUTTER_VECTOR_REGISTER_AARCH64) + asm volatile("mov x0, %[ptr]\n" + "ld1 {v0.16b}, [x0], #16\n" + "ld1 {v1.16b}, [x0], #16\n" + "ld1 {v2.16b}, [x0], #16\n" + "ld1 {v3.16b}, [x0], #16\n" + "ld1 {v4.16b}, [x0], #16\n" + "ld1 {v5.16b}, [x0], #16\n" + "ld1 {v6.16b}, [x0], #16\n" + "ld1 {v7.16b}, [x0], #16\n" + "ld1 {v8.16b}, [x0], #16\n" + "ld1 {v9.16b}, [x0], #16\n" + "ld1 {v10.16b}, [x0], #16\n" + "ld1 {v11.16b}, [x0], #16\n" + "ld1 {v12.16b}, [x0], #16\n" + "ld1 {v13.16b}, [x0], #16\n" + "ld1 {v14.16b}, [x0], #16\n" + "ld1 {v15.16b}, [x0], #16\n" + "ld1 {v16.16b}, [x0], #16\n" + "ld1 {v17.16b}, [x0], #16\n" + "ld1 {v18.16b}, [x0], #16\n" + "ld1 {v19.16b}, [x0], #16\n" + "ld1 {v20.16b}, [x0], #16\n" + "ld1 {v21.16b}, [x0], #16\n" + "ld1 {v22.16b}, [x0], #16\n" + "ld1 {v23.16b}, [x0], #16\n" + "ld1 {v24.16b}, [x0], #16\n" + "ld1 {v25.16b}, [x0], #16\n" + "ld1 {v26.16b}, [x0], #16\n" + "ld1 {v27.16b}, [x0], #16\n" + "ld1 {v28.16b}, [x0], #16\n" + "ld1 {v29.16b}, [x0], #16\n" + "ld1 {v30.16b}, [x0], #16\n" + "ld1 {v31.16b}, [x0], #16\n" + : + : [ptr] "r" (data) + : "x0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", + "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", + "memory"); +#elif defined(CLUTTER_VECTOR_REGISTER_NEON) + asm volatile("mov r0, %[ptr]\n" + "vld1.64 {q0}, [r0]!\n" + "vld1.64 {q1}, [r0]!\n" + "vld1.64 {q2}, [r0]!\n" + "vld1.64 {q3}, [r0]!\n" + "vld1.64 {q4}, [r0]!\n" + "vld1.64 {q5}, [r0]!\n" + "vld1.64 {q6}, [r0]!\n" + "vld1.64 {q7}, [r0]!\n" + "vld1.64 {q8}, [r0]!\n" + "vld1.64 {q9}, [r0]!\n" + "vld1.64 {q10}, [r0]!\n" + "vld1.64 {q11}, [r0]!\n" + "vld1.64 {q12}, [r0]!\n" + "vld1.64 {q13}, [r0]!\n" + "vld1.64 {q14}, [r0]!\n" + "vld1.64 {q15}, [r0]!\n" + : + : [ptr] "r" (data) + : "r0", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", + "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", + "memory"); +#endif + +#endif /* CLUTTER_VECTOR_REGISTER_COUNT */ +} + + + static void check_cbc_mac_cipher (void) { @@ -8280,7 +8517,9 @@ check_bulk_cipher_modes (void) goto leave; } + clutter_vector_registers(); err = gcry_cipher_setkey (hde, tv[i].key, tv[i].keylen); + clutter_vector_registers(); if (!err) err = gcry_cipher_setkey (hdd, tv[i].key, tv[i].keylen); if (err) @@ -8296,7 +8535,9 @@ check_bulk_cipher_modes (void) goto leave; } + clutter_vector_registers(); err = gcry_cipher_setiv (hde, tv[i].iv, tv[i].ivlen); + clutter_vector_registers(); if (!err) err = gcry_cipher_setiv (hdd, tv[i].iv, tv[i].ivlen); if (err) @@ -8309,6 +8550,7 @@ check_bulk_cipher_modes (void) for (j=0; j < buflen; j++) buffer[j] = ((j & 0xff) ^ ((j >> 8) & 0xff)); + clutter_vector_registers(); err = gcry_cipher_encrypt (hde, outbuf, buflen, buffer, buflen); if (err) { @@ -8330,6 +8572,7 @@ check_bulk_cipher_modes (void) fail ("encrypt mismatch (algo %d, mode %d)\n", tv[i].algo, tv[i].mode); + clutter_vector_registers(); err = gcry_cipher_decrypt (hdd, outbuf, buflen, NULL, 0); if (err) { @@ -8409,6 +8652,7 @@ check_one_cipher_core_reset (gcry_cipher_hd_t hd, int algo, int mode, int pass, if (mode == GCRY_CIPHER_MODE_OCB || mode == GCRY_CIPHER_MODE_CCM) { + clutter_vector_registers(); err = gcry_cipher_setiv (hd, iv, sizeof(iv)); if (err) { @@ -8535,6 +8779,7 @@ check_one_cipher_core (int algo, int mode, int flags, goto err_out_free; } + clutter_vector_registers(); err = gcry_cipher_setkey (hd, key, keylen); if (err) { @@ -8547,6 +8792,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (check_one_cipher_core_reset (hd, algo, mode, pass, nplain) < 0) goto err_out_free; + clutter_vector_registers(); err = gcry_cipher_encrypt (hd, out, nplain, plain, nplain); if (err) { @@ -8558,6 +8804,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (taglen > 0) { + clutter_vector_registers(); err = gcry_cipher_gettag (hd, tag, taglen); if (err) { @@ -8575,6 +8822,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (check_one_cipher_core_reset (hd, algo, mode, pass, nplain) < 0) goto err_out_free; + clutter_vector_registers(); err = gcry_cipher_decrypt (hd, in, nplain, out, nplain); if (err) { @@ -8586,6 +8834,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (taglen > 0) { + clutter_vector_registers(); err = gcry_cipher_checktag (hd, tag_result, taglen); if (err) { @@ -8605,6 +8854,7 @@ check_one_cipher_core (int algo, int mode, int flags, goto err_out_free; memcpy (out, plain, nplain); + clutter_vector_registers(); err = gcry_cipher_encrypt (hd, out, nplain, NULL, 0); if (err) { @@ -8639,6 +8889,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (check_one_cipher_core_reset (hd, algo, mode, pass, nplain) < 0) goto err_out_free; + clutter_vector_registers(); err = gcry_cipher_decrypt (hd, out, nplain, NULL, 0); if (err) { @@ -8651,6 +8902,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (taglen > 0) { + clutter_vector_registers(); err = gcry_cipher_checktag (hd, tag_result, taglen); if (err) { @@ -8677,6 +8929,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (piecelen > nplain - pos) piecelen = nplain - pos; + clutter_vector_registers(); err = gcry_cipher_encrypt (hd, out + pos, piecelen, plain + pos, piecelen); if (err) @@ -8694,6 +8947,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (taglen > 0) { + clutter_vector_registers(); err = gcry_cipher_gettag (hd, tag, taglen); if (err) { @@ -8723,6 +8977,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (piecelen > nplain - pos) piecelen = nplain - pos; + clutter_vector_registers(); err = gcry_cipher_decrypt (hd, in + pos, piecelen, out + pos, piecelen); if (err) { @@ -8739,6 +8994,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (taglen > 0) { + clutter_vector_registers(); err = gcry_cipher_checktag (hd, tag_result, taglen); if (err) { @@ -8767,6 +9023,7 @@ check_one_cipher_core (int algo, int mode, int flags, piecelen = nplain - pos; memcpy (out + pos, plain + pos, piecelen); + clutter_vector_registers(); err = gcry_cipher_encrypt (hd, out + pos, piecelen, NULL, 0); if (err) { @@ -8795,6 +9052,7 @@ check_one_cipher_core (int algo, int mode, int flags, if (piecelen > nplain - pos) piecelen = nplain - pos; + clutter_vector_registers(); err = gcry_cipher_decrypt (hd, out + pos, piecelen, NULL, 0); if (err) { @@ -9104,6 +9362,7 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, if (key && klen) { + clutter_vector_registers(); err = gcry_md_setkey (hd, key, klen); if (err) { @@ -9131,6 +9390,7 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, if (key && klen) { + clutter_vector_registers(); err = gcry_md_setkey (hd2, key, klen); if (err) { @@ -9149,10 +9409,12 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, gcry_md_reset (hd); gcry_md_reset (hd2); + clutter_vector_registers(); gcry_md_write (hd, buf, i); for (j = 0; j < i; j++) gcry_md_write (hd2, &buf[j], 1); + clutter_vector_registers(); p1 = gcry_md_read (hd, algo); p2 = gcry_md_read (hd2, algo); if (memcmp (p1, p2, mdlen)) @@ -9196,6 +9458,7 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, if (*data == '?') fillbuf_count(aaa, piecelen, 1000 * 1000 - left); + clutter_vector_registers(); gcry_md_write (hd, aaa, piecelen); left -= piecelen; @@ -9212,6 +9475,7 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, if (*data == '?') fillbuf_count(aaa, piecelen, 1000 * 1000 - left); + clutter_vector_registers(); gcry_md_write (hd, aaa, piecelen); left -= piecelen; @@ -9223,8 +9487,12 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, } } else - gcry_md_write (hd, data, len); + { + clutter_vector_registers(); + gcry_md_write (hd, data, len); + } + clutter_vector_registers(); err = gcry_md_copy (&hd2, hd); if (err) { @@ -9235,6 +9503,7 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, if (!xof) { + clutter_vector_registers(); p = gcry_md_read (hd2, algo); if (memcmp (p, expect, mdlen)) @@ -9255,12 +9524,14 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, char buf[1000]; int outmax = sizeof(buf) > elen ? elen : sizeof(buf); + clutter_vector_registers(); err = gcry_md_copy (&hd, hd2); if (err) { fail ("algo %d, gcry_md_copy failed: %s\n", algo, gpg_strerror (err)); } + clutter_vector_registers(); err = gcry_md_extract(hd2, algo, buf, outmax); if (err) { @@ -9283,6 +9554,7 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, memset(buf, 0, sizeof(buf)); /* Extract one byte at time. */ + clutter_vector_registers(); for (i = 0; i < outmax && !err; i++) err = gcry_md_extract(hd, algo, &buf[i], 1); if (err) @@ -9334,6 +9606,7 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, /* Extract large chucks, total 1000000 additional bytes. */ for (i = 0; i < 1000; i++) { + clutter_vector_registers(); err = gcry_md_extract(hd, algo, buf, 1000); if (!err) gcry_md_write(crc1, buf, 1000); @@ -9356,6 +9629,7 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, if (piecelen > left) piecelen = left; + clutter_vector_registers(); err = gcry_md_extract (hd2, algo, buf, piecelen); if (!err) gcry_md_write(crc2, buf, piecelen); @@ -9373,7 +9647,9 @@ check_one_md (int algo, const char *data, int len, const char *expect, int elen, piecelen = piecelen * 2 - ((piecelen != startlen) ? startlen : 0); } + clutter_vector_registers(); p1 = gcry_md_read (crc1, crcalgo); + clutter_vector_registers(); p2 = gcry_md_read (crc2, crcalgo); if (memcmp (p1, p2, crclen)) @@ -9449,6 +9725,7 @@ check_one_md_multi (int algo, const char *data, int len, const char *expect) iovcnt++; assert (iovcnt <= DIM (iov)); + clutter_vector_registers(); err = gcry_md_hash_buffers (algo, 0, digest, iov, iovcnt); if (err) { @@ -9498,6 +9775,7 @@ check_one_md_final(int algo, const char *expect, unsigned int expectlen) for (i = 0; i < sizeof(inbuf); i++) inbuf[i] = i; + clutter_vector_registers(); gcry_md_hash_buffer (algo, xorbuf, NULL, 0); for (i = 1; i < sizeof(inbuf); i++) { @@ -11336,6 +11614,7 @@ check_one_mac (int algo, const char *data, int datalen, return; } + clutter_vector_registers(); err = gcry_mac_setkey (hd, key, keylen); if (err) fail("algo %d, mac gcry_mac_setkey failed: %s\n", algo, gpg_strerror (err)); @@ -11344,6 +11623,7 @@ check_one_mac (int algo, const char *data, int datalen, if (ivlen && iv) { + clutter_vector_registers(); err = gcry_mac_setiv (hd, iv, ivlen); if (err) fail("algo %d, mac gcry_mac_ivkey failed: %s\n", algo, @@ -11356,6 +11636,7 @@ check_one_mac (int algo, const char *data, int datalen, { for (i = 0; i < datalen; i++) { + clutter_vector_registers(); err = gcry_mac_write (hd, &data[i], 1); if (err) fail("algo %d, mac gcry_mac_write [buf-offset: %d] failed: %s\n", @@ -11389,6 +11670,7 @@ check_one_mac (int algo, const char *data, int datalen, if (*data == '?') fillbuf_count(aaa, piecelen, 1000 * 1000 - left); + clutter_vector_registers(); gcry_mac_write (hd, aaa, piecelen); left -= piecelen; @@ -11405,6 +11687,7 @@ check_one_mac (int algo, const char *data, int datalen, if (*data == '?') fillbuf_count(aaa, piecelen, 1000 * 1000 - left); + clutter_vector_registers(); gcry_mac_write (hd, aaa, piecelen); left -= piecelen; @@ -11417,6 +11700,7 @@ check_one_mac (int algo, const char *data, int datalen, } else { + clutter_vector_registers(); err = gcry_mac_write (hd, data, datalen); } @@ -11426,11 +11710,13 @@ check_one_mac (int algo, const char *data, int datalen, goto out; } + clutter_vector_registers(); err = gcry_mac_verify (hd, expect, maclen); if (err) fail("algo %d, mac gcry_mac_verify failed: %s\n", algo, gpg_strerror (err)); macoutlen = maclen; + clutter_vector_registers(); err = gcry_mac_read (hd, p, &macoutlen); if (err) fail("algo %d, mac gcry_mac_read failed: %s\n", algo, gpg_strerror (err)); @@ -12594,6 +12880,16 @@ check_pubkey_sign_ecdsa (int n, gcry_sexp_t skey, gcry_sexp_t pkey) /* */ "000102030405060708090A0B0C0D0E0F#))", 0 }, + { 256, + "(data (flags sm2)\n" + " (hash sm3 #112233445566778899AABBCCDDEEFF00" + /* */ "123456789ABCDEF0123456789ABCDEF0#))", + 0, + "(data (flags sm2)\n" + " (hash sm3 #B524F552CD82B8B028476E005C377FB1" + /* */ "9A87E6FC682D48BB5D42E3D9B9EFFE76#))", + 0 + }, { 0, NULL } }; @@ -12984,7 +13280,7 @@ check_pubkey (void) { static const test_spec_pubkey_t pubkeys[] = { { - GCRY_PK_RSA, FLAG_CRYPT | FLAG_SIGN, + GCRY_PK_RSA, FLAG_CRYPT | FLAG_SIGN | FLAG_GRIP, { "(private-key\n" " (rsa\n" @@ -13022,7 +13318,7 @@ check_pubkey (void) "\xa2\x5d\x3d\x69\xf8\x6d\x37\xa4\xf9\x39"} }, { - GCRY_PK_DSA, FLAG_SIGN, + GCRY_PK_DSA, FLAG_SIGN | FLAG_GRIP, { "(private-key\n" " (DSA\n" @@ -13067,7 +13363,7 @@ check_pubkey (void) "\x4a\xa6\xf9\xeb\x23\xbf\xa9\x12\x2d\x5b" } }, { - GCRY_PK_ELG, FLAG_SIGN | FLAG_CRYPT, + GCRY_PK_ELG, FLAG_SIGN | FLAG_CRYPT | FLAG_GRIP, { "(private-key\n" " (ELG\n" @@ -13246,7 +13542,7 @@ check_pubkey (void) " 4DDFF75C45415C1D9DD9DD33612CD530EFE137C7C90CD4" " 0B0F5621DC3AC1B751CFA0E2634FA0503B3D52639F5D7F" " B72AFD61EA199441D943FFE7F0C70A2759A3CDB84C114E" - " 1F9339FDF27F35ECA93677BEEC#)))\n" + " 1F9339FDF27F35ECA93677BEEC#)))\n", "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" } @@ -13268,7 +13564,30 @@ check_pubkey (void) " (curve secp256k1)\n" " (q #0439A36013301597DAEF41FBE593A02CC513D0B55527EC2D" " F1050E2E8FF49C85C23CBE7DED0E7CE6A594896B8F62888F" - " DBC5C8821305E2EA42BF01E37300116281#)))\n" + " DBC5C8821305E2EA42BF01E37300116281#)))\n", + + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" } + }, + { /* sm2 test */ + GCRY_PK_ECDSA, FLAG_SIGN, + { + "(private-key\n" + " (ecc\n" + " (curve sm2p256v1)\n" + " (q #04" + " 8759389A34AAAD07ECF4E0C8C2650A4459C8D926EE2378324E0261C52538CB47" + " 7528106B1E0B7C8DD5FF29A9C86A89065656EB33154BC0556091EF8AC9D17D78#)" + " (d #41EBDBA9C98CBECCE7249CF18BFD427FF8EA0B2FAB7B9D305D9D9BF4DB6ADFC2#)" + "))", + + "(public-key\n" + " (ecc\n" + " (curve sm2p256v1)\n" + " (q #04" + " 8759389A34AAAD07ECF4E0C8C2650A4459C8D926EE2378324E0261C52538CB47" + " 7528106B1E0B7C8DD5FF29A9C86A89065656EB33154BC0556091EF8AC9D17D78#)" + "))", "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" } diff --git a/tests/curves.c b/tests/curves.c index ff244bd1..0dfa2acb 100644 --- a/tests/curves.c +++ b/tests/curves.c @@ -33,7 +33,7 @@ #include "t-common.h" /* Number of curves defined in ../cipger/ecc-curves.c */ -#define N_CURVES 25 +#define N_CURVES 26 /* A real world sample public key. */ static char const sample_key_1[] =