diff --git a/.gitignore b/.gitignore
index 0e268bfdc..495427270 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,3 +21,4 @@ web/assets/
.idea/*
web/yaamp/.idea/
*.0
+.vscode/
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 000000000..27b2d597a
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "stratum/secp256k1"]
+ path = stratum/secp256k1
+ url = https://github.com/bitcoin-core/secp256k1
diff --git a/.settings/.jsdtscope b/.settings/.jsdtscope
new file mode 100644
index 000000000..cca691f6c
--- /dev/null
+++ b/.settings/.jsdtscope
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
diff --git a/.settings/org.eclipse.wst.jsdt.ui.superType.container b/.settings/org.eclipse.wst.jsdt.ui.superType.container
new file mode 100644
index 000000000..49c8cd4f1
--- /dev/null
+++ b/.settings/org.eclipse.wst.jsdt.ui.superType.container
@@ -0,0 +1 @@
+org.eclipse.wst.jsdt.launching.JRE_CONTAINER
\ No newline at end of file
diff --git a/.settings/org.eclipse.wst.jsdt.ui.superType.name b/.settings/org.eclipse.wst.jsdt.ui.superType.name
new file mode 100644
index 000000000..11006e2a5
--- /dev/null
+++ b/.settings/org.eclipse.wst.jsdt.ui.superType.name
@@ -0,0 +1 @@
+Global
\ No newline at end of file
diff --git a/README.md b/README.md
index 99c19e84d..60c7e7600 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-[](https://travis-ci.org/tpruvot/yiimp)
+[](https://travis-ci.org/Kudaraidee/yiimp)
#yiimp - yaamp fork
diff --git a/sql/2018-09-22-workers.sql b/sql/2018-09-22-workers.sql
new file mode 100644
index 000000000..9159e2d09
--- /dev/null
+++ b/sql/2018-09-22-workers.sql
@@ -0,0 +1,6 @@
+-- Recent additions to add after db init (.gz)
+-- mysql yaamp -p < file.sql
+
+ -- filled by the stratum instance, to allow to handle/watch multiple instances
+
+ ALTER TABLE `workers` MODIFY COLUMN name VARCHAR(52);
\ No newline at end of file
diff --git a/sql/2019-03-coins_thepool_life.sql b/sql/2019-03-coins_thepool_life.sql
new file mode 100644
index 000000000..5dce9f716
--- /dev/null
+++ b/sql/2019-03-coins_thepool_life.sql
@@ -0,0 +1,11 @@
+-- Recent additions to add after db init (.gz)
+-- mysql yaamp -p < file.sql
+
+-- Additional fields for additions by cryptopool.builders
+
+ALTER TABLE `coins` ADD `link_twitter` varchar(1024) DEFAULT NULL AFTER `link_explorer`;
+ALTER TABLE `coins` ADD `link_facebook` varchar(1024) DEFAULT NULL AFTER `link_twitter`;
+ALTER TABLE `coins` ADD `donation_address` varchar(1024) DEFAULT NULL AFTER `link_facebook`;
+ALTER TABLE `coins` ADD `link_discord` varchar(1024) DEFAULT NULL AFTER `link_twitter`;
+ALTER TABLE `coins` ADD `usefaucet` tinyint(1) UNSIGNED NOT NULL DEFAULT '0' AFTER `donation_address`;
+ALTER TABLE `coins` ADD `dedicatedport` int(11) DEFAULT NULL AFTER `rpcport`;
\ No newline at end of file
diff --git a/sql/2020-06-03-blocks.sql b/sql/2020-06-03-blocks.sql
new file mode 100644
index 000000000..55ad028a8
--- /dev/null
+++ b/sql/2020-06-03-blocks.sql
@@ -0,0 +1,6 @@
+-- Recent additions to add after db init (.gz)
+-- mysql yaamp -p < file.sql
+
+ -- add blocks for solo function
+
+ALTER TABLE `blocks` ADD `solo` TINYINT(1) NULL DEFAULT NULL AFTER `category`;
diff --git a/sql/2020-11-10-yaamp.sql.gz b/sql/2020-11-10-yaamp.sql.gz
new file mode 100644
index 000000000..3b80730e2
Binary files /dev/null and b/sql/2020-11-10-yaamp.sql.gz differ
diff --git a/sql/2021-06-21-yaamp.sql.gz b/sql/2021-06-21-yaamp.sql.gz
new file mode 100644
index 000000000..cb4afd051
Binary files /dev/null and b/sql/2021-06-21-yaamp.sql.gz differ
diff --git a/stratum/Makefile b/stratum/Makefile
index 3bf978bf3..954cbe5ff 100755
--- a/stratum/Makefile
+++ b/stratum/Makefile
@@ -1,5 +1,5 @@
-CC=gcc
+CC= gcc -no-pie
CFLAGS= -g -march=native
SQLFLAGS= `mysql_config --cflags --libs`
@@ -11,7 +11,7 @@ CFLAGS += -DNO_EXCHANGE
#CFLAGS=-c -O2 -I /usr/include/mysql
LDFLAGS=-O2 `mysql_config --libs`
-LDLIBS=iniparser/libiniparser.a algos/libalgos.a sha3/libhash.a -lpthread -lgmp -lm -lstdc++
+LDLIBS=iniparser/libiniparser.a algos/libalgos.a sha3/libhash.a -Isecp256k1/include secp256k1/.libs/libsecp256k1.a -lpthread -lgmp -lm -lstdc++ -lssl -lcrypto
LDLIBS+=-lmysqlclient
SOURCES=stratum.cpp db.cpp coind.cpp coind_aux.cpp coind_template.cpp coind_submit.cpp util.cpp list.cpp \
@@ -29,16 +29,25 @@ OUTPUT=stratum
CODEDIR1=algos
CODEDIR2=sha3
+CODEDIR3=iniparser
+CODEDIR4=secp256k1
-.PHONY: projectcode1 projectcode2
-all: projectcode1 projectcode2 $(SOURCES) $(OUTPUT)
+.PHONY: projectcode1 projectcode2 projectcode3 projectcode4
+
+all: projectcode1 projectcode2 projectcode3 projectcode4 $(SOURCES) $(OUTPUT)
projectcode1:
- $(MAKE) -C $(CODEDIR1)
+ git submodule init && git submodule update && $(MAKE) -C $(CODEDIR1)
projectcode2:
$(MAKE) -C $(CODEDIR2)
+
+projectcode3:
+ $(MAKE) -C $(CODEDIR3)
+
+projectcode4:
+ cd $(CODEDIR4) && chmod +x autogen.sh && ./autogen.sh && ./configure --enable-experimental --enable-module-ecdh --with-bignum=no --enable-endomorphism && $(MAKE)
$(SOURCES): stratum.h util.h
@@ -58,7 +67,12 @@ clean:
rm -f sha3/*.o
rm -f sha3/*.a
rm -f algos/ar2/*.o
-
+ rm -f algos/blake2/*.o
+ rm -f algos/blake2-ref/*.o
+ rm -f algos/honeycomb/*.o
+ rm -f algos/SWIFFTX/*.o
+ rm -f algos/yespower/*.o
+
install: clean all
strip -s stratum
cp stratum /usr/local/bin/
diff --git a/stratum/algos/Lyra2.c b/stratum/algos/Lyra2.c
index dbcc3fa6b..1d68fd1a0 100644
--- a/stratum/algos/Lyra2.c
+++ b/stratum/algos/Lyra2.c
@@ -212,3 +212,176 @@ int LYRA2(void *K, int64_t kLen, const void *pwd, int32_t pwdlen, const void *sa
return 0;
}
+
+int LYRA2_3(void *K, int64_t kLen, const void *pwd, int32_t pwdlen, const void *salt, int32_t saltlen, int64_t timeCost, const int16_t nRows, const int16_t nCols)
+{
+ //============================= Basic variables ============================//
+ int64_t row = 2; //index of row to be processed
+ int64_t prev = 1; //index of prev (last row ever computed/modified)
+ int64_t rowa = 0; //index of row* (a previous row, deterministically picked during Setup and randomly picked while Wandering)
+ int64_t tau; //Time Loop iterator
+ int64_t step = 1; //Visitation step (used during Setup and Wandering phases)
+ int64_t window = 2; //Visitation window (used to define which rows can be revisited during Setup)
+ int64_t gap = 1; //Modifier to the step, assuming the values 1 or -1
+ int64_t i; //auxiliary iteration counter
+ int64_t v64; // 64bit var for memcpy
+ uint64_t instance = 0;
+ //==========================================================================/
+
+ //========== Initializing the Memory Matrix and pointers to it =============//
+ //Tries to allocate enough space for the whole memory matrix
+
+ const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
+ const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
+ // for Lyra2REv2, nCols = 4, v1 was using 8
+ const int64_t BLOCK_LEN = (nCols == 4) ? BLOCK_LEN_BLAKE2_SAFE_INT64 : BLOCK_LEN_BLAKE2_SAFE_BYTES;
+
+ size_t sz = (size_t)ROW_LEN_BYTES * nRows;
+ uint64_t *wholeMatrix = malloc(sz);
+ if (wholeMatrix == NULL) {
+ return -1;
+ }
+ memset(wholeMatrix, 0, sz);
+
+ //Allocates pointers to each row of the matrix
+ uint64_t **memMatrix = malloc(sizeof(uint64_t*) * nRows);
+ if (memMatrix == NULL) {
+ return -1;
+ }
+ //Places the pointers in the correct positions
+ uint64_t *ptrWord = wholeMatrix;
+ for (i = 0; i < nRows; i++) {
+ memMatrix[i] = ptrWord;
+ ptrWord += ROW_LEN_INT64;
+ }
+ //==========================================================================/
+
+ //============= Getting the password + salt + basil padded with 10*1 ===============//
+ //OBS.:The memory matrix will temporarily hold the password: not for saving memory,
+ //but this ensures that the password copied locally will be overwritten as soon as possible
+
+ //First, we clean enough blocks for the password, salt, basil and padding
+ int64_t nBlocksInput = ((saltlen + pwdlen + 6 * sizeof(uint64_t)) / BLOCK_LEN_BLAKE2_SAFE_BYTES) + 1;
+
+ byte *ptrByte = (byte*) wholeMatrix;
+
+ //Prepends the password
+ memcpy(ptrByte, pwd, pwdlen);
+ ptrByte += pwdlen;
+
+ //Concatenates the salt
+ memcpy(ptrByte, salt, saltlen);
+ ptrByte += saltlen;
+
+ memset(ptrByte, 0, (size_t) (nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - (saltlen + pwdlen)));
+
+ //Concatenates the basil: every integer passed as parameter, in the order they are provided by the interface
+ memcpy(ptrByte, &kLen, sizeof(int64_t));
+ ptrByte += sizeof(uint64_t);
+ v64 = pwdlen;
+ memcpy(ptrByte, &v64, sizeof(int64_t));
+ ptrByte += sizeof(uint64_t);
+ v64 = saltlen;
+ memcpy(ptrByte, &v64, sizeof(int64_t));
+ ptrByte += sizeof(uint64_t);
+ v64 = timeCost;
+ memcpy(ptrByte, &v64, sizeof(int64_t));
+ ptrByte += sizeof(uint64_t);
+ v64 = nRows;
+ memcpy(ptrByte, &v64, sizeof(int64_t));
+ ptrByte += sizeof(uint64_t);
+ v64 = nCols;
+ memcpy(ptrByte, &v64, sizeof(int64_t));
+ ptrByte += sizeof(uint64_t);
+
+ //Now comes the padding
+ *ptrByte = 0x80; //first byte of padding: right after the password
+ ptrByte = (byte*) wholeMatrix; //resets the pointer to the start of the memory matrix
+ ptrByte += nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - 1; //sets the pointer to the correct position: end of incomplete block
+ *ptrByte ^= 0x01; //last byte of padding: at the end of the last incomplete block
+ //==========================================================================/
+
+ //======================= Initializing the Sponge State ====================//
+ //Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c)
+ uint64_t state[16];
+ initState(state);
+ //==========================================================================/
+
+ //================================ Setup Phase =============================//
+ //Absorbing salt, password and basil: this is the only place in which the block length is hard-coded to 512 bits
+ ptrWord = wholeMatrix;
+ for (i = 0; i < nBlocksInput; i++) {
+ absorbBlockBlake2Safe(state, ptrWord); //absorbs each block of pad(pwd || salt || basil)
+ ptrWord += BLOCK_LEN; //goes to next block of pad(pwd || salt || basil)
+ }
+
+ //Initializes M[0] and M[1]
+ reducedSqueezeRow0(state, memMatrix[0], nCols); //The locally copied password is most likely overwritten here
+
+ reducedDuplexRow1(state, memMatrix[0], memMatrix[1], nCols);
+
+ do {
+ //M[row] = rand; //M[row*] = M[row*] XOR rotW(rand)
+
+ reducedDuplexRowSetup(state, memMatrix[prev], memMatrix[rowa], memMatrix[row], nCols);
+
+ //updates the value of row* (deterministically picked during Setup))
+ rowa = (rowa + step) & (window - 1);
+ //update prev: it now points to the last row ever computed
+ prev = row;
+ //updates row: goes to the next row to be computed
+ row++;
+
+ //Checks if all rows in the window where visited.
+ if (rowa == 0) {
+ step = window + gap; //changes the step: approximately doubles its value
+ window *= 2; //doubles the size of the re-visitation window
+ gap = -gap; //inverts the modifier to the step
+ }
+
+ } while (row < nRows);
+ //==========================================================================/
+
+ //============================ Wandering Phase =============================//
+ row = 0; //Resets the visitation to the first row of the memory matrix
+ for (tau = 1; tau <= timeCost; tau++) {
+ //Step is approximately half the number of all rows of the memory matrix for an odd tau; otherwise, it is -1
+ step = (tau % 2 == 0) ? -1 : nRows / 2 - 1;
+ do {
+ //Selects a pseudorandom index row*
+ //------------------------------------------------------------------------------------------
+ instance = state[instance % 16];
+ rowa = state[instance % 16] & (unsigned int)(nRows-1);
+
+ //rowa = state[0] & (unsigned int)(nRows-1); //(USE THIS IF nRows IS A POWER OF 2)
+ //rowa = state[0] % nRows; //(USE THIS FOR THE "GENERIC" CASE)
+ //------------------------------------------------------------------------------------------
+
+ //Performs a reduced-round duplexing operation over M[row*] XOR M[prev], updating both M[row*] and M[row]
+ reducedDuplexRow(state, memMatrix[prev], memMatrix[rowa], memMatrix[row], nCols);
+
+ //update prev: it now points to the last row ever computed
+ prev = row;
+
+ //updates row: goes to the next row to be computed
+ //------------------------------------------------------------------------------------------
+ row = (row + step) & (unsigned int)(nRows-1); //(USE THIS IF nRows IS A POWER OF 2)
+ //row = (row + step) % nRows; //(USE THIS FOR THE "GENERIC" CASE)
+ //------------------------------------------------------------------------------------------
+
+ } while (row != 0);
+ }
+
+ //============================ Wrap-up Phase ===============================//
+ //Absorbs the last block of the memory matrix
+ absorbBlock(state, memMatrix[rowa]);
+
+ //Squeezes the key
+ squeeze(state, K, (unsigned int) kLen);
+
+ //========================= Freeing the memory =============================//
+ free(memMatrix);
+ free(wholeMatrix);
+
+ return 0;
+}
diff --git a/stratum/algos/Lyra2.h b/stratum/algos/Lyra2.h
index e25432a1a..2b8773d6d 100644
--- a/stratum/algos/Lyra2.h
+++ b/stratum/algos/Lyra2.h
@@ -38,5 +38,6 @@ typedef unsigned char byte;
#endif
int LYRA2(void *K, int64_t kLen, const void *pwd, int32_t pwdlen, const void *salt, int32_t saltlen, int64_t timeCost, const int64_t nRows, const int16_t nCols);
+int LYRA2_3(void *K, int64_t kLen, const void *pwd, int32_t pwdlen, const void *salt, int32_t saltlen, int64_t timeCost, const int16_t nRows, const int16_t nCols);
#endif /* LYRA2_H_ */
diff --git a/stratum/algos/SWIFFTX/SWIFFTX.c b/stratum/algos/SWIFFTX/SWIFFTX.c
new file mode 100644
index 000000000..93893ab4c
--- /dev/null
+++ b/stratum/algos/SWIFFTX/SWIFFTX.c
@@ -0,0 +1,1155 @@
+///////////////////////////////////////////////////////////////////////////////////////////////
+//
+// SWIFFTX ANSI C OPTIMIZED 32BIT IMPLEMENTATION FOR NIST SHA-3 COMPETITION
+//
+// SWIFFTX.c
+//
+// October 2008
+//
+// This is the source file of the OPTIMIZED 32BIT implementation of SWIFFTX hash function.
+// SWIFFTX is a candidate function for SHA-3 NIST competition.
+// More details about SWIFFTX can be found in the accompanying submission documents.
+//
+///////////////////////////////////////////////////////////////////////////////////////////////
+#include "SWIFFTX.h"
+// See the remarks concerning compatibility issues inside stdint.h.
+#include "stdint.h"
+// Remove this while using gcc:
+//#include "stdbool.h"
+#include
+
+///////////////////////////////////////////////////////////////////////////////////////////////
+// Constants and static tables portion.
+///////////////////////////////////////////////////////////////////////////////////////////////
+
+// In SWIFFTX we work over Z_257, so this is the modulus and the arithmetic is performed modulo
+// this number.
+#define FIELD_SIZE 257
+
+// The size of FFT we use:
+#define N 64
+
+#define LOGN 6
+
+#define EIGHTH_N (N / 8)
+
+// The number of FFTS done on the input.
+#define M (SWIFFTX_INPUT_BLOCK_SIZE / 8) // 32
+
+// Omega is the 128th root of unity in Z_257.
+// We choose w = 42.
+#define OMEGA 42
+
+// The size of the inner FFT lookup table:
+#define W 8
+
+// Calculates the sum and the difference of two numbers.
+//
+// Parameters:
+// - A: the first operand. After the operation stores the sum of the two operands.
+// - B: the second operand. After the operation stores the difference between the first and the
+// second operands.
+#define ADD_SUB(A, B) {register int temp = (B); B = ((A) - (B)); A = ((A) + (temp));}
+
+// Quickly reduces an integer modulo 257.
+//
+// Parameters:
+// - A: the input.
+#define Q_REDUCE(A) (((A) & 0xff) - ((A) >> 8))
+
+// Since we need to do the setup only once, this is the indicator variable:
+static bool wasSetupDone = false;
+
+// This array stores the powers of omegas that correspond to the indices, which are the input
+// values. Known also as the "outer FFT twiddle factors".
+swift_int16_t multipliers[N];
+
+// This array stores the powers of omegas, multiplied by the corresponding values.
+// We store this table to save computation time.
+//
+// To calculate the intermediate value of the compression function (the first out of two
+// stages), we multiply the k-th bit of x_i by w^[(2i + 1) * k]. {x_i} is the input to the
+// compression function, i is between 0 and 31, x_i is a 64-bit value.
+// One can see the formula for this (intermediate) stage in the SWIFFT FSE 2008 paper --
+// formula (2), section 3, page 6.
+swift_int16_t fftTable[256 * EIGHTH_N];
+
+// The A's we use in SWIFFTX shall be random elements of Z_257.
+// We generated these A's from the decimal expansion of PI as follows: we converted each
+// triple of digits into a decimal number d. If d < (257 * 3) we used (d % 257) for the next A
+// element, otherwise move to the next triple of digits in the expansion. This guarntees that
+// the A's are random, provided that PI digits are.
+const swift_int16_t As[3 * M * N] =
+{141, 78, 139, 75, 238, 205, 129, 126, 22, 245, 197, 169, 142, 118, 105, 78,
+ 50, 149, 29, 208, 114, 34, 85, 117, 67, 148, 86, 256, 25, 49, 133, 93,
+ 95, 36, 68, 231, 211, 102, 151, 128, 224, 117, 193, 27, 102, 187, 7, 105,
+ 45, 130, 108, 124, 171, 151, 189, 128, 218, 134, 233, 165, 14, 201, 145, 134,
+ 52, 203, 91, 96, 197, 69, 134, 213, 136, 93, 3, 249, 141, 16, 210, 73,
+ 6, 92, 58, 74, 174, 6, 254, 91, 201, 107, 110, 76, 103, 11, 73, 16,
+ 34, 209, 7, 127, 146, 254, 95, 176, 57, 13, 108, 245, 77, 92, 186, 117,
+ 124, 97, 105, 118, 34, 74, 205, 122, 235, 53, 94, 238, 210, 227, 183, 11,
+ 129, 159, 105, 183, 142, 129, 86, 21, 137, 138, 224, 223, 190, 188, 179, 188,
+ 256, 25, 217, 176, 36, 176, 238, 127, 160, 210, 155, 148, 132, 0, 54, 127,
+ 145, 6, 46, 85, 243, 95, 173, 123, 178, 207, 211, 183, 224, 173, 146, 35,
+ 71, 114, 50, 22, 175, 1, 28, 19, 112, 129, 21, 34, 161, 159, 115, 52,
+ 4, 193, 211, 92, 115, 49, 59, 217, 218, 96, 61, 81, 24, 202, 198, 89,
+ 45, 128, 8, 51, 253, 87, 171, 35, 4, 188, 171, 10, 3, 137, 238, 73,
+ 19, 208, 124, 163, 103, 177, 155, 147, 46, 84, 253, 233, 171, 241, 211, 217,
+ 159, 48, 96, 79, 237, 18, 171, 226, 99, 1, 97, 195, 216, 163, 198, 95,
+ 0, 201, 65, 228, 21, 153, 124, 230, 44, 35, 44, 108, 85, 156, 249, 207,
+ 26, 222, 131, 1, 60, 242, 197, 150, 181, 19, 116, 213, 75, 98, 124, 240,
+ 123, 207, 62, 255, 60, 143, 187, 157, 139, 9, 12, 104, 89, 49, 193, 146,
+ 104, 196, 181, 82, 198, 253, 192, 191, 255, 122, 212, 104, 47, 20, 132, 208,
+ 46, 170, 2, 69, 234, 36, 56, 163, 28, 152, 104, 238, 162, 56, 24, 58,
+ 38, 150, 193, 254, 253, 125, 173, 35, 73, 126, 247, 239, 216, 6, 199, 15,
+ 90, 12, 97, 122, 9, 84, 207, 127, 219, 72, 58, 30, 29, 182, 41, 192,
+ 235, 248, 237, 74, 72, 176, 210, 252, 45, 64, 165, 87, 202, 241, 236, 223,
+ 151, 242, 119, 239, 52, 112, 169, 28, 13, 37, 160, 60, 158, 81, 133, 60,
+ 16, 145, 249, 192, 173, 217, 214, 93, 141, 184, 54, 34, 161, 104, 157, 95,
+ 38, 133, 218, 227, 211, 181, 9, 66, 137, 143, 77, 33, 248, 159, 4, 55,
+ 228, 48, 99, 219, 222, 184, 15, 36, 254, 256, 157, 237, 87, 139, 209, 113,
+ 232, 85, 126, 167, 197, 100, 103, 166, 64, 225, 125, 205, 117, 135, 84, 128,
+ 231, 112, 90, 241, 28, 22, 210, 147, 186, 49, 230, 21, 108, 39, 194, 47,
+ 123, 199, 107, 114, 30, 210, 250, 143, 59, 156, 131, 133, 221, 27, 76, 99,
+ 208, 250, 78, 12, 211, 141, 95, 81, 195, 106, 8, 232, 150, 212, 205, 221,
+ 11, 225, 87, 219, 126, 136, 137, 180, 198, 48, 68, 203, 239, 252, 194, 235,
+ 142, 137, 174, 172, 190, 145, 250, 221, 182, 204, 1, 195, 130, 153, 83, 241,
+ 161, 239, 211, 138, 11, 169, 155, 245, 174, 49, 10, 166, 16, 130, 181, 139,
+ 222, 222, 112, 99, 124, 94, 51, 243, 133, 194, 244, 136, 35, 248, 201, 177,
+ 178, 186, 129, 102, 89, 184, 180, 41, 149, 96, 165, 72, 225, 231, 134, 158,
+ 199, 28, 249, 16, 225, 195, 10, 210, 164, 252, 138, 8, 35, 152, 213, 199,
+ 82, 116, 97, 230, 63, 199, 241, 35, 79, 120, 54, 174, 67, 112, 1, 76,
+ 69, 222, 194, 96, 82, 94, 25, 228, 196, 145, 155, 136, 228, 234, 46, 101,
+ 246, 51, 103, 166, 246, 75, 9, 200, 161, 4, 108, 35, 129, 168, 208, 144,
+ 50, 14, 13, 220, 41, 132, 122, 127, 194, 9, 232, 234, 107, 28, 187, 8,
+ 51, 141, 97, 221, 225, 9, 113, 170, 166, 102, 135, 22, 231, 185, 227, 187,
+ 110, 145, 251, 146, 76, 22, 146, 228, 7, 53, 64, 25, 62, 198, 130, 190,
+ 221, 232, 169, 64, 188, 199, 237, 249, 173, 218, 196, 191, 48, 224, 5, 113,
+ 100, 166, 160, 21, 191, 197, 61, 162, 149, 171, 240, 183, 129, 231, 123, 204,
+ 192, 179, 134, 15, 47, 161, 142, 177, 239, 234, 186, 237, 231, 53, 208, 95,
+ 146, 36, 225, 231, 89, 142, 93, 248, 137, 124, 83, 39, 69, 77, 89, 208,
+ 182, 48, 85, 147, 244, 164, 246, 68, 38, 190, 220, 35, 202, 91, 157, 151,
+ 201, 240, 185, 218, 4, 152, 2, 132, 177, 88, 190, 196, 229, 74, 220, 135,
+ 137, 196, 11, 47, 5, 251, 106, 144, 163, 60, 222, 127, 52, 57, 202, 102,
+ 64, 140, 110, 206, 23, 182, 39, 245, 1, 163, 157, 186, 163, 80, 7, 230,
+ 44, 249, 176, 102, 164, 125, 147, 120, 18, 191, 186, 125, 64, 65, 198, 157,
+ 164, 213, 95, 61, 13, 181, 208, 91, 242, 197, 158, 34, 98, 169, 91, 14,
+ 17, 93, 157, 17, 65, 30, 183, 6, 139, 58, 255, 108, 100, 136, 209, 144,
+ 164, 6, 237, 33, 210, 110, 57, 126, 197, 136, 125, 244, 165, 151, 168, 3,
+ 143, 251, 247, 155, 136, 130, 88, 14, 74, 121, 250, 133, 21, 226, 185, 232,
+ 118, 132, 89, 64, 204, 161, 2, 70, 224, 159, 35, 204, 123, 180, 13, 52,
+ 231, 57, 25, 78, 66, 69, 97, 42, 198, 84, 176, 59, 8, 232, 125, 134,
+ 193, 2, 232, 109, 216, 69, 90, 142, 32, 38, 249, 37, 75, 180, 184, 188,
+ 19, 47, 120, 87, 146, 70, 232, 120, 191, 45, 33, 38, 19, 248, 110, 110,
+ 44, 64, 2, 84, 244, 228, 252, 228, 170, 123, 38, 144, 213, 144, 171, 212,
+ 243, 87, 189, 46, 128, 110, 84, 77, 65, 183, 61, 184, 101, 44, 168, 68,
+ 14, 106, 105, 8, 227, 211, 166, 39, 152, 43, 52, 254, 197, 55, 119, 89,
+ 168, 65, 53, 138, 177, 56, 219, 0, 58, 121, 148, 18, 44, 100, 215, 103,
+ 145, 229, 117, 196, 91, 89, 113, 143, 172, 239, 249, 184, 154, 39, 112, 65,
+ 204, 42, 84, 38, 155, 151, 151, 16, 100, 87, 174, 162, 145, 147, 149, 186,
+ 237, 145, 134, 144, 198, 235, 213, 163, 48, 230, 24, 47, 57, 71, 127, 0,
+ 150, 219, 12, 81, 197, 150, 131, 13, 169, 63, 175, 184, 48, 235, 65, 243,
+ 149, 200, 163, 254, 202, 114, 247, 67, 143, 250, 126, 228, 80, 130, 216, 214,
+ 36, 2, 230, 33, 119, 125, 3, 142, 237, 100, 3, 152, 197, 174, 244, 129,
+ 232, 30, 206, 199, 39, 210, 220, 43, 237, 221, 201, 54, 179, 42, 28, 133,
+ 246, 203, 198, 177, 0, 28, 194, 85, 223, 109, 155, 147, 221, 60, 133, 108,
+ 157, 254, 26, 75, 157, 185, 49, 142, 31, 137, 71, 43, 63, 64, 237, 148,
+ 237, 172, 159, 160, 155, 254, 234, 224, 140, 193, 114, 140, 62, 109, 136, 39,
+ 255, 8, 158, 146, 128, 49, 222, 96, 57, 209, 180, 249, 202, 127, 113, 231,
+ 78, 178, 46, 33, 228, 215, 104, 31, 207, 186, 82, 41, 42, 39, 103, 119,
+ 123, 133, 243, 254, 238, 156, 90, 186, 37, 212, 33, 107, 252, 51, 177, 36,
+ 237, 76, 159, 245, 93, 214, 97, 56, 190, 38, 160, 94, 105, 222, 220, 158,
+ 49, 16, 191, 52, 120, 87, 179, 2, 27, 144, 223, 230, 184, 6, 129, 227,
+ 69, 47, 215, 181, 162, 139, 72, 200, 45, 163, 159, 62, 2, 221, 124, 40,
+ 159, 242, 35, 208, 179, 166, 98, 67, 178, 68, 143, 225, 178, 146, 187, 159,
+ 57, 66, 176, 192, 236, 250, 168, 224, 122, 43, 159, 120, 133, 165, 122, 64,
+ 87, 74, 161, 241, 9, 87, 90, 24, 255, 113, 203, 220, 57, 139, 197, 159,
+ 31, 151, 27, 140, 77, 162, 7, 27, 84, 228, 187, 220, 53, 126, 162, 242,
+ 84, 181, 223, 103, 86, 177, 207, 31, 140, 18, 207, 256, 201, 166, 96, 23,
+ 233, 103, 197, 84, 161, 75, 59, 149, 138, 154, 119, 92, 16, 53, 116, 97,
+ 220, 114, 35, 45, 77, 209, 40, 196, 71, 22, 81, 178, 110, 14, 3, 180,
+ 110, 129, 112, 47, 18, 61, 134, 78, 73, 79, 254, 232, 125, 180, 205, 54,
+ 220, 119, 63, 89, 181, 52, 77, 109, 151, 77, 80, 207, 144, 25, 20, 6,
+ 208, 47, 201, 206, 192, 14, 73, 176, 256, 201, 207, 87, 216, 60, 56, 73,
+ 92, 243, 179, 113, 49, 59, 55, 168, 121, 137, 69, 154, 95, 57, 187, 47,
+ 129, 4, 15, 92, 6, 116, 69, 196, 48, 134, 84, 81, 111, 56, 38, 176,
+ 239, 6, 128, 72, 242, 134, 36, 221, 59, 48, 242, 68, 130, 110, 171, 89,
+ 13, 220, 48, 29, 5, 75, 104, 233, 91, 129, 105, 162, 44, 113, 163, 163,
+ 85, 147, 190, 111, 197, 80, 213, 153, 81, 68, 203, 33, 161, 165, 10, 61,
+ 120, 252, 0, 205, 28, 42, 193, 64, 39, 37, 83, 175, 5, 218, 215, 174,
+ 128, 121, 231, 11, 150, 145, 135, 197, 136, 91, 193, 5, 107, 88, 82, 6,
+ 4, 188, 256, 70, 40, 2, 167, 57, 169, 203, 115, 254, 215, 172, 84, 80,
+ 188, 167, 34, 137, 43, 243, 2, 79, 178, 38, 188, 135, 233, 194, 208, 13,
+ 11, 151, 231, 196, 12, 122, 162, 56, 17, 114, 191, 207, 90, 132, 64, 238,
+ 187, 6, 198, 176, 240, 88, 118, 236, 15, 226, 166, 22, 193, 229, 82, 246,
+ 213, 64, 37, 63, 31, 243, 252, 37, 156, 38, 175, 204, 138, 141, 211, 82,
+ 106, 217, 97, 139, 153, 56, 129, 218, 158, 9, 83, 26, 87, 112, 71, 21,
+ 250, 5, 65, 141, 68, 116, 231, 113, 10, 218, 99, 205, 201, 92, 157, 4,
+ 97, 46, 49, 220, 72, 139, 103, 171, 149, 129, 193, 19, 69, 245, 43, 31,
+ 58, 68, 36, 195, 159, 22, 54, 34, 233, 141, 205, 100, 226, 96, 22, 192,
+ 41, 231, 24, 79, 234, 138, 30, 120, 117, 216, 172, 197, 172, 107, 86, 29,
+ 181, 151, 0, 6, 146, 186, 68, 55, 54, 58, 213, 182, 60, 231, 33, 232,
+ 77, 210, 216, 154, 80, 51, 141, 122, 68, 148, 219, 122, 254, 48, 64, 175,
+ 41, 115, 62, 243, 141, 81, 119, 121, 5, 68, 121, 88, 239, 29, 230, 90,
+ 135, 159, 35, 223, 168, 112, 49, 37, 146, 60, 126, 134, 42, 145, 115, 90,
+ 73, 133, 211, 86, 120, 141, 122, 241, 127, 56, 130, 36, 174, 75, 83, 246,
+ 112, 45, 136, 194, 201, 115, 1, 156, 114, 167, 208, 12, 176, 147, 32, 170,
+ 251, 100, 102, 220, 122, 210, 6, 49, 75, 201, 38, 105, 132, 135, 126, 102,
+ 13, 121, 76, 228, 202, 20, 61, 213, 246, 13, 207, 42, 148, 168, 37, 253,
+ 34, 94, 141, 185, 18, 234, 157, 109, 104, 64, 250, 125, 49, 236, 86, 48,
+ 196, 77, 75, 237, 156, 103, 225, 19, 110, 229, 22, 68, 177, 93, 221, 181,
+ 152, 153, 61, 108, 101, 74, 247, 195, 127, 216, 30, 166, 168, 61, 83, 229,
+ 120, 156, 96, 120, 201, 124, 43, 27, 253, 250, 120, 143, 89, 235, 189, 243,
+ 150, 7, 127, 119, 149, 244, 84, 185, 134, 34, 128, 193, 236, 234, 132, 117,
+ 137, 32, 145, 184, 44, 121, 51, 76, 11, 228, 142, 251, 39, 77, 228, 251,
+ 41, 58, 246, 107, 125, 187, 9, 240, 35, 8, 11, 162, 242, 220, 158, 163,
+ 2, 184, 163, 227, 242, 2, 100, 101, 2, 78, 129, 34, 89, 28, 26, 157,
+ 79, 31, 107, 250, 194, 156, 186, 69, 212, 66, 41, 180, 139, 42, 211, 253,
+ 256, 239, 29, 129, 104, 248, 182, 68, 1, 189, 48, 226, 36, 229, 3, 158,
+ 41, 53, 241, 22, 115, 174, 16, 163, 224, 19, 112, 219, 177, 233, 42, 27,
+ 250, 134, 18, 28, 145, 122, 68, 34, 134, 31, 147, 17, 39, 188, 150, 76,
+ 45, 42, 167, 249, 12, 16, 23, 182, 13, 79, 121, 3, 70, 197, 239, 44,
+ 86, 177, 255, 81, 64, 171, 138, 131, 73, 110, 44, 201, 254, 198, 146, 91,
+ 48, 9, 104, 31, 29, 161, 101, 31, 138, 180, 231, 233, 79, 137, 61, 236,
+ 140, 15, 249, 218, 234, 119, 99, 195, 110, 137, 237, 207, 8, 31, 45, 24,
+ 90, 155, 203, 253, 192, 203, 65, 176, 210, 171, 142, 214, 220, 122, 136, 237,
+ 189, 186, 147, 40, 80, 254, 173, 33, 191, 46, 192, 26, 108, 255, 228, 205,
+ 61, 76, 39, 107, 225, 126, 228, 182, 140, 251, 143, 134, 252, 168, 221, 8,
+ 185, 85, 60, 233, 147, 244, 87, 137, 8, 140, 96, 80, 53, 45, 175, 160,
+ 124, 189, 112, 37, 144, 19, 70, 17, 170, 242, 2, 3, 28, 95, 120, 199,
+ 212, 43, 9, 117, 86, 151, 101, 241, 200, 145, 241, 19, 178, 69, 204, 197,
+ 227, 166, 94, 7, 193, 45, 247, 234, 19, 187, 212, 212, 236, 125, 33, 95,
+ 198, 121, 122, 103, 77, 155, 235, 49, 25, 237, 249, 11, 162, 7, 238, 24,
+ 16, 150, 129, 25, 152, 17, 42, 67, 247, 162, 77, 154, 31, 133, 55, 137,
+ 79, 119, 153, 10, 86, 28, 244, 186, 41, 169, 106, 44, 10, 49, 110, 179,
+ 32, 133, 155, 244, 61, 70, 131, 168, 170, 39, 231, 252, 32, 69, 92, 238,
+ 239, 35, 132, 136, 236, 167, 90, 32, 123, 88, 69, 22, 20, 89, 145, 166,
+ 30, 118, 75, 4, 49, 31, 225, 54, 11, 50, 56, 191, 246, 1, 187, 33,
+ 119, 107, 139, 68, 19, 240, 131, 55, 94, 113, 31, 252, 12, 179, 121, 2,
+ 120, 252, 0, 76, 41, 80, 185, 42, 62, 121, 105, 159, 121, 109, 111, 98,
+ 7, 118, 86, 29, 210, 70, 231, 179, 223, 229, 164, 70, 62, 47, 0, 206,
+ 204, 178, 168, 120, 224, 166, 99, 25, 103, 63, 246, 224, 117, 204, 75, 124,
+ 140, 133, 110, 110, 222, 88, 151, 118, 46, 37, 22, 143, 158, 40, 2, 50,
+ 153, 94, 190, 199, 13, 198, 127, 211, 180, 90, 183, 98, 0, 142, 210, 154,
+ 100, 187, 67, 231, 202, 100, 198, 235, 252, 160, 247, 124, 247, 14, 121, 221,
+ 57, 88, 253, 243, 185, 89, 45, 249, 221, 194, 108, 175, 193, 119, 50, 141,
+ 223, 133, 136, 64, 176, 250, 129, 100, 124, 94, 181, 159, 99, 185, 177, 240,
+ 135, 42, 103, 52, 202, 208, 143, 186, 193, 103, 154, 237, 102, 88, 225, 161,
+ 50, 188, 191, 109, 12, 87, 19, 227, 247, 183, 13, 52, 205, 170, 205, 146,
+ 89, 160, 18, 105, 192, 73, 231, 225, 184, 157, 252, 220, 61, 59, 169, 183,
+ 221, 20, 141, 20, 158, 101, 245, 7, 245, 225, 118, 137, 84, 55, 19, 27,
+ 164, 110, 35, 25, 202, 94, 150, 46, 91, 152, 130, 1, 7, 46, 16, 237,
+ 171, 109, 19, 200, 65, 38, 10, 213, 70, 96, 126, 226, 185, 225, 181, 46,
+ 10, 165, 11, 123, 53, 158, 22, 147, 64, 22, 227, 69, 182, 237, 197, 37,
+ 39, 49, 186, 223, 139, 128, 55, 36, 166, 178, 220, 20, 98, 172, 166, 253,
+ 45, 0, 120, 180, 189, 185, 158, 159, 196, 6, 214, 79, 141, 52, 156, 107,
+ 5, 109, 142, 159, 33, 64, 190, 133, 95, 132, 95, 202, 160, 63, 186, 23,
+ 231, 107, 163, 33, 234, 15, 244, 77, 108, 49, 51, 7, 164, 87, 142, 99,
+ 240, 202, 47, 256, 118, 190, 196, 178, 217, 42, 39, 153, 21, 192, 232, 202,
+ 14, 82, 179, 64, 233, 4, 219, 10, 133, 78, 43, 144, 146, 216, 202, 81,
+ 71, 252, 8, 201, 68, 256, 85, 233, 164, 88, 176, 30, 5, 152, 126, 179,
+ 249, 84, 140, 190, 159, 54, 118, 98, 2, 159, 27, 133, 74, 121, 239, 196,
+ 71, 149, 119, 135, 102, 20, 87, 112, 44, 75, 221, 3, 151, 158, 5, 98,
+ 152, 25, 97, 106, 63, 171, 240, 79, 234, 240, 230, 92, 76, 70, 173, 196,
+ 36, 225, 218, 133, 64, 240, 150, 41, 146, 66, 133, 51, 134, 73, 170, 238,
+ 140, 90, 45, 89, 46, 147, 96, 169, 174, 174, 244, 151, 90, 40, 32, 74,
+ 38, 154, 246, 57, 31, 14, 189, 151, 83, 243, 197, 183, 220, 185, 53, 225,
+ 51, 106, 188, 208, 222, 248, 93, 13, 93, 215, 131, 25, 142, 185, 113, 222,
+ 131, 215, 149, 50, 159, 85, 32, 5, 205, 192, 2, 227, 42, 214, 197, 42,
+ 126, 182, 68, 123, 109, 36, 237, 179, 170, 199, 77, 256, 5, 128, 214, 243,
+ 137, 177, 170, 253, 179, 180, 153, 236, 100, 196, 216, 231, 198, 37, 192, 80,
+ 121, 221, 246, 1, 16, 246, 29, 78, 64, 148, 124, 38, 96, 125, 28, 20,
+ 48, 51, 73, 187, 139, 208, 98, 253, 221, 188, 84, 129, 1, 205, 95, 205,
+ 117, 79, 71, 126, 134, 237, 19, 184, 137, 125, 129, 178, 223, 54, 188, 112,
+ 30, 7, 225, 228, 205, 184, 233, 87, 117, 22, 58, 10, 8, 42, 2, 114,
+ 254, 19, 17, 13, 150, 92, 233, 179, 63, 12, 60, 171, 127, 35, 50, 5,
+ 195, 113, 241, 25, 249, 184, 166, 44, 221, 35, 151, 116, 8, 54, 195, 89,
+ 218, 186, 132, 5, 41, 89, 226, 177, 11, 41, 87, 172, 5, 23, 20, 59,
+ 228, 94, 76, 33, 137, 43, 151, 221, 61, 232, 4, 120, 93, 217, 80, 228,
+ 228, 6, 58, 25, 62, 84, 91, 48, 209, 20, 247, 243, 55, 106, 80, 79,
+ 235, 34, 20, 180, 146, 2, 236, 13, 236, 206, 243, 222, 204, 83, 148, 213,
+ 214, 117, 237, 98, 0, 90, 204, 168, 32, 41, 126, 67, 191, 74, 27, 255,
+ 26, 75, 240, 113, 185, 105, 167, 154, 112, 67, 151, 63, 161, 134, 239, 176,
+ 42, 87, 249, 130, 45, 242, 17, 100, 107, 120, 212, 218, 237, 76, 231, 162,
+ 175, 172, 118, 155, 92, 36, 124, 17, 121, 71, 13, 9, 82, 126, 147, 142,
+ 218, 148, 138, 80, 163, 106, 164, 123, 140, 129, 35, 42, 186, 154, 228, 214,
+ 75, 73, 8, 253, 42, 153, 232, 164, 95, 24, 110, 90, 231, 197, 90, 196,
+ 57, 164, 252, 181, 31, 7, 97, 256, 35, 77, 200, 212, 99, 179, 92, 227,
+ 17, 180, 49, 176, 9, 188, 13, 182, 93, 44, 128, 219, 134, 92, 151, 6,
+ 23, 126, 200, 109, 66, 30, 140, 180, 146, 134, 67, 200, 7, 9, 223, 168,
+ 186, 221, 3, 154, 150, 165, 43, 53, 138, 27, 86, 213, 235, 160, 70, 2,
+ 240, 20, 89, 212, 84, 141, 168, 246, 183, 227, 30, 167, 138, 185, 253, 83,
+ 52, 143, 236, 94, 59, 65, 89, 218, 194, 157, 164, 156, 111, 95, 202, 168,
+ 245, 256, 151, 28, 222, 194, 72, 130, 217, 134, 253, 77, 246, 100, 76, 32,
+ 254, 174, 182, 193, 14, 237, 74, 1, 74, 26, 135, 216, 152, 208, 112, 38,
+ 181, 62, 25, 71, 61, 234, 254, 97, 191, 23, 92, 256, 190, 205, 6, 16,
+ 134, 147, 210, 219, 148, 59, 73, 185, 24, 247, 174, 143, 116, 220, 128, 144,
+ 111, 126, 101, 98, 130, 136, 101, 102, 69, 127, 24, 168, 146, 226, 226, 207,
+ 176, 122, 149, 254, 134, 196, 22, 151, 197, 21, 50, 205, 116, 154, 65, 116,
+ 177, 224, 127, 77, 177, 159, 225, 69, 176, 54, 100, 104, 140, 8, 11, 126,
+ 11, 188, 185, 159, 107, 16, 254, 142, 80, 28, 5, 157, 104, 57, 109, 82,
+ 102, 80, 173, 242, 238, 207, 57, 105, 237, 160, 59, 189, 189, 199, 26, 11,
+ 190, 156, 97, 118, 20, 12, 254, 189, 165, 147, 142, 199, 5, 213, 64, 133,
+ 108, 217, 133, 60, 94, 28, 116, 136, 47, 165, 125, 42, 183, 143, 14, 129,
+ 223, 70, 212, 205, 181, 180, 3, 201, 182, 46, 57, 104, 239, 60, 99, 181,
+ 220, 231, 45, 79, 156, 89, 149, 143, 190, 103, 153, 61, 235, 73, 136, 20,
+ 89, 243, 16, 130, 247, 141, 134, 93, 80, 68, 85, 84, 8, 72, 194, 4,
+ 242, 110, 19, 133, 199, 70, 172, 92, 132, 254, 67, 74, 36, 94, 13, 90,
+ 154, 184, 9, 109, 118, 243, 214, 71, 36, 95, 0, 90, 201, 105, 112, 215,
+ 69, 196, 224, 210, 236, 242, 155, 211, 37, 134, 69, 113, 157, 97, 68, 26,
+ 230, 149, 219, 180, 20, 76, 172, 145, 154, 40, 129, 8, 93, 56, 162, 124,
+ 207, 233, 105, 19, 3, 183, 155, 134, 8, 244, 213, 78, 139, 88, 156, 37,
+ 51, 152, 111, 102, 112, 250, 114, 252, 201, 241, 133, 24, 136, 153, 5, 90,
+ 210, 197, 216, 24, 131, 17, 147, 246, 13, 86, 3, 253, 179, 237, 101, 114,
+ 243, 191, 207, 2, 220, 133, 244, 53, 87, 125, 154, 158, 197, 20, 8, 83,
+ 32, 191, 38, 241, 204, 22, 168, 59, 217, 123, 162, 82, 21, 50, 130, 89,
+ 239, 253, 195, 56, 253, 74, 147, 125, 234, 199, 250, 28, 65, 193, 22, 237,
+ 193, 94, 58, 229, 139, 176, 69, 42, 179, 164, 150, 168, 246, 214, 86, 174,
+ 59, 117, 15, 19, 76, 37, 214, 238, 153, 226, 154, 45, 109, 114, 198, 107,
+ 45, 70, 238, 196, 142, 252, 244, 71, 123, 136, 134, 188, 99, 132, 25, 42,
+ 240, 0, 196, 33, 26, 124, 256, 145, 27, 102, 153, 35, 28, 132, 221, 167,
+ 138, 133, 41, 170, 95, 224, 40, 139, 239, 153, 1, 106, 255, 106, 170, 163,
+ 127, 44, 155, 232, 194, 119, 232, 117, 239, 143, 108, 41, 3, 9, 180, 256,
+ 144, 113, 133, 200, 79, 69, 128, 216, 31, 50, 102, 209, 249, 136, 150, 154,
+ 182, 51, 228, 39, 127, 142, 87, 15, 94, 92, 187, 245, 31, 236, 64, 58,
+ 114, 11, 17, 166, 189, 152, 218, 34, 123, 39, 58, 37, 153, 91, 63, 121,
+ 31, 34, 12, 254, 106, 96, 171, 14, 155, 247, 214, 69, 24, 98, 3, 204,
+ 202, 194, 207, 30, 253, 44, 119, 70, 14, 96, 82, 250, 63, 6, 232, 38,
+ 89, 144, 102, 191, 82, 254, 20, 222, 96, 162, 110, 6, 159, 58, 200, 226,
+ 98, 128, 42, 70, 84, 247, 128, 211, 136, 54, 143, 166, 60, 118, 99, 218,
+ 27, 193, 85, 81, 219, 223, 46, 41, 23, 233, 152, 222, 36, 236, 54, 181,
+ 56, 50, 4, 207, 129, 92, 78, 88, 197, 251, 131, 105, 31, 172, 38, 131,
+ 19, 204, 129, 47, 227, 106, 202, 183, 23, 6, 77, 224, 102, 147, 11, 218,
+ 131, 132, 60, 192, 208, 223, 236, 23, 103, 115, 89, 18, 185, 171, 70, 174,
+ 139, 0, 100, 160, 221, 11, 228, 60, 12, 122, 114, 12, 157, 235, 148, 57,
+ 83, 62, 173, 131, 169, 126, 85, 99, 93, 243, 81, 80, 29, 245, 206, 82,
+ 236, 227, 166, 14, 230, 213, 144, 97, 27, 111, 99, 164, 105, 150, 89, 111,
+ 252, 118, 140, 232, 120, 183, 137, 213, 232, 157, 224, 33, 134, 118, 186, 80,
+ 159, 2, 186, 193, 54, 242, 25, 237, 232, 249, 226, 213, 90, 149, 90, 160,
+ 118, 69, 64, 37, 10, 183, 109, 246, 30, 52, 219, 69, 189, 26, 116, 220,
+ 50, 244, 243, 243, 139, 137, 232, 98, 38, 45, 256, 143, 171, 101, 73, 238,
+ 123, 45, 194, 167, 250, 123, 12, 29, 136, 237, 141, 21, 89, 96, 199, 44,
+ 8, 214, 208, 17, 113, 41, 137, 26, 166, 155, 89, 85, 54, 58, 97, 160,
+ 50, 239, 58, 71, 21, 157, 139, 12, 37, 198, 182, 131, 149, 134, 16, 204,
+ 164, 181, 248, 166, 52, 216, 136, 201, 37, 255, 187, 240, 5, 101, 147, 231,
+ 14, 163, 253, 134, 146, 216, 8, 54, 224, 90, 220, 195, 75, 215, 186, 58,
+ 71, 204, 124, 105, 239, 53, 16, 85, 69, 163, 195, 223, 33, 38, 69, 88,
+ 88, 203, 99, 55, 176, 13, 156, 204, 236, 99, 194, 134, 75, 247, 126, 129,
+ 160, 124, 233, 206, 139, 144, 154, 45, 233, 51, 206, 61, 60, 55, 205, 107,
+ 84, 108, 96, 188, 203, 31, 89, 20, 115, 144, 137, 90, 237, 78, 231, 185,
+ 120, 217, 1, 176, 169, 30, 155, 176, 100, 113, 53, 42, 193, 108, 14, 121,
+ 176, 158, 137, 92, 178, 44, 110, 249, 108, 234, 94, 101, 128, 12, 250, 173,
+ 72, 202, 232, 66, 139, 152, 189, 18, 32, 197, 9, 238, 246, 55, 119, 183,
+ 196, 119, 113, 247, 191, 100, 200, 245, 46, 16, 234, 112, 136, 116, 232, 48,
+ 176, 108, 11, 237, 14, 153, 93, 177, 124, 72, 67, 121, 135, 143, 45, 18,
+ 97, 251, 184, 172, 136, 55, 213, 8, 103, 12, 221, 212, 13, 160, 116, 91,
+ 237, 127, 218, 190, 103, 131, 77, 82, 36, 100, 22, 252, 79, 69, 54, 26,
+ 65, 182, 115, 142, 247, 20, 89, 81, 188, 244, 27, 120, 240, 248, 13, 230,
+ 67, 133, 32, 201, 129, 87, 9, 245, 66, 88, 166, 34, 46, 184, 119, 218,
+ 144, 235, 163, 40, 138, 134, 127, 217, 64, 227, 116, 67, 55, 202, 130, 48,
+ 199, 42, 251, 112, 124, 153, 123, 194, 243, 49, 250, 12, 78, 157, 167, 134,
+ 210, 73, 156, 102, 21, 88, 216, 123, 45, 11, 208, 18, 47, 187, 20, 43,
+ 3, 180, 124, 2, 136, 176, 77, 111, 138, 139, 91, 225, 126, 8, 74, 255,
+ 88, 192, 193, 239, 138, 204, 139, 194, 166, 130, 252, 184, 140, 168, 30, 177,
+ 121, 98, 131, 124, 69, 171, 75, 49, 184, 34, 76, 122, 202, 115, 184, 253,
+ 120, 182, 33, 251, 1, 74, 216, 217, 243, 168, 70, 162, 119, 158, 197, 198,
+ 61, 89, 7, 5, 54, 199, 211, 170, 23, 226, 44, 247, 165, 195, 7, 225,
+ 91, 23, 50, 15, 51, 208, 106, 94, 12, 31, 43, 112, 146, 139, 246, 182,
+ 113, 1, 97, 15, 66, 2, 51, 76, 164, 184, 237, 200, 218, 176, 72, 98,
+ 33, 135, 38, 147, 140, 229, 50, 94, 81, 187, 129, 17, 238, 168, 146, 203,
+ 181, 99, 164, 3, 104, 98, 255, 189, 114, 142, 86, 102, 229, 102, 80, 129,
+ 64, 84, 79, 161, 81, 156, 128, 111, 164, 197, 18, 15, 55, 196, 198, 191,
+ 28, 113, 117, 96, 207, 253, 19, 158, 231, 13, 53, 130, 252, 211, 58, 180,
+ 212, 142, 7, 219, 38, 81, 62, 109, 167, 113, 33, 56, 97, 185, 157, 130,
+ 186, 129, 119, 182, 196, 26, 54, 110, 65, 170, 166, 236, 30, 22, 162, 0,
+ 106, 12, 248, 33, 48, 72, 159, 17, 76, 244, 172, 132, 89, 171, 196, 76,
+ 254, 166, 76, 218, 226, 3, 52, 220, 238, 181, 179, 144, 225, 23, 3, 166,
+ 158, 35, 228, 154, 204, 23, 203, 71, 134, 189, 18, 168, 236, 141, 117, 138,
+ 2, 132, 78, 57, 154, 21, 250, 196, 184, 40, 161, 40, 10, 178, 134, 120,
+ 132, 123, 101, 82, 205, 121, 55, 140, 231, 56, 231, 71, 206, 246, 198, 150,
+ 146, 192, 45, 105, 242, 1, 125, 18, 176, 46, 222, 122, 19, 80, 113, 133,
+ 131, 162, 81, 51, 98, 168, 247, 161, 139, 39, 63, 162, 22, 153, 170, 92,
+ 91, 130, 174, 200, 45, 112, 99, 164, 132, 184, 191, 186, 200, 167, 86, 145,
+ 167, 227, 130, 44, 12, 158, 172, 249, 204, 17, 54, 249, 16, 200, 21, 174,
+ 67, 223, 105, 201, 50, 36, 133, 203, 244, 131, 228, 67, 29, 195, 91, 91,
+ 55, 107, 167, 154, 170, 137, 218, 183, 169, 61, 99, 175, 128, 23, 142, 183,
+ 66, 255, 59, 187, 66, 85, 212, 109, 168, 82, 16, 43, 67, 139, 114, 176,
+ 216, 255, 130, 94, 152, 79, 183, 64, 100, 23, 214, 82, 34, 230, 48, 15,
+ 242, 130, 50, 241, 81, 32, 5, 125, 183, 182, 184, 99, 248, 109, 159, 210,
+ 226, 61, 119, 129, 39, 149, 78, 214, 107, 78, 147, 124, 228, 18, 143, 188,
+ 84, 180, 233, 119, 64, 39, 158, 133, 177, 168, 6, 150, 80, 117, 150, 56,
+ 49, 72, 49, 37, 30, 242, 49, 142, 33, 156, 34, 44, 44, 72, 58, 22,
+ 249, 46, 168, 80, 25, 196, 64, 174, 97, 179, 244, 134, 213, 105, 63, 151,
+ 21, 90, 168, 90, 245, 28, 157, 65, 250, 232, 188, 27, 99, 160, 156, 127,
+ 68, 193, 10, 80, 205, 36, 138, 229, 12, 223, 70, 169, 251, 41, 48, 94,
+ 41, 177, 99, 256, 158, 0, 6, 83, 231, 191, 120, 135, 157, 146, 218, 213,
+ 160, 7, 47, 234, 98, 211, 79, 225, 179, 95, 175, 105, 185, 79, 115, 0,
+ 104, 14, 65, 124, 15, 188, 52, 9, 253, 27, 132, 137, 13, 127, 75, 238,
+ 185, 253, 33, 8, 52, 157, 164, 68, 232, 188, 69, 28, 209, 233, 5, 129,
+ 216, 90, 252, 212, 33, 200, 222, 9, 112, 15, 43, 36, 226, 114, 15, 249,
+ 217, 8, 148, 22, 147, 23, 143, 67, 222, 116, 235, 250, 212, 210, 39, 142,
+ 108, 64, 209, 83, 73, 66, 99, 34, 17, 29, 45, 151, 244, 114, 28, 241,
+ 144, 208, 146, 179, 132, 89, 217, 198, 252, 219, 205, 165, 75, 107, 11, 173,
+ 76, 6, 196, 247, 152, 216, 248, 91, 209, 178, 57, 250, 174, 60, 79, 123,
+ 18, 135, 9, 241, 230, 159, 184, 68, 156, 251, 215, 9, 113, 234, 75, 235,
+ 103, 194, 205, 129, 230, 45, 96, 73, 157, 20, 200, 212, 212, 228, 161, 7,
+ 231, 228, 108, 43, 198, 87, 140, 140, 4, 182, 164, 3, 53, 104, 250, 213,
+ 85, 38, 89, 61, 52, 187, 35, 204, 86, 249, 100, 71, 248, 213, 163, 215,
+ 66, 106, 252, 129, 40, 111, 47, 24, 186, 221, 85, 205, 199, 237, 122, 181,
+ 32, 46, 182, 135, 33, 251, 142, 34, 208, 242, 128, 255, 4, 234, 15, 33,
+ 167, 222, 32, 186, 191, 34, 255, 244, 98, 240, 228, 204, 30, 142, 32, 70,
+ 69, 83, 110, 151, 10, 243, 141, 21, 223, 69, 61, 37, 59, 209, 102, 114,
+ 223, 33, 129, 254, 255, 103, 86, 247, 235, 72, 126, 177, 102, 226, 102, 30,
+ 149, 221, 62, 247, 251, 120, 163, 173, 57, 202, 204, 24, 39, 106, 120, 143,
+ 202, 176, 191, 147, 37, 38, 51, 133, 47, 245, 157, 132, 154, 71, 183, 111,
+ 30, 180, 18, 202, 82, 96, 170, 91, 157, 181, 212, 140, 256, 8, 196, 121,
+ 149, 79, 66, 127, 113, 78, 4, 197, 84, 256, 111, 222, 102, 63, 228, 104,
+ 136, 223, 67, 193, 93, 154, 249, 83, 204, 101, 200, 234, 84, 252, 230, 195,
+ 43, 140, 120, 242, 89, 63, 166, 233, 209, 94, 43, 170, 126, 5, 205, 78,
+ 112, 80, 143, 151, 146, 248, 137, 203, 45, 183, 61, 1, 155, 8, 102, 59,
+ 68, 212, 230, 61, 254, 191, 128, 223, 176, 123, 229, 27, 146, 120, 96, 165,
+ 213, 12, 232, 40, 186, 225, 66, 105, 200, 195, 212, 110, 237, 238, 151, 19,
+ 12, 171, 150, 82, 7, 228, 79, 52, 15, 78, 62, 43, 21, 154, 114, 21,
+ 12, 212, 256, 232, 125, 127, 5, 51, 37, 252, 136, 13, 47, 195, 168, 191,
+ 231, 55, 57, 251, 214, 116, 15, 86, 210, 41, 249, 242, 119, 27, 250, 203,
+ 107, 69, 90, 43, 206, 154, 127, 54, 100, 78, 187, 54, 244, 177, 234, 167,
+ 202, 136, 209, 171, 69, 114, 133, 173, 26, 139, 78, 141, 128, 32, 124, 39,
+ 45, 218, 96, 68, 90, 44, 67, 62, 83, 190, 188, 256, 103, 42, 102, 64,
+ 249, 0, 141, 11, 61, 69, 70, 66, 233, 237, 29, 200, 251, 157, 71, 51,
+ 64, 133, 113, 76, 35, 125, 76, 137, 217, 145, 35, 69, 226, 180, 56, 249,
+ 156, 163, 176, 237, 81, 54, 85, 169, 115, 211, 129, 70, 248, 40, 252, 192,
+ 194, 101, 247, 8, 181, 124, 217, 191, 194, 93, 99, 127, 117, 177, 144, 151,
+ 228, 121, 32, 11, 89, 81, 26, 29, 183, 76, 249, 132, 179, 70, 34, 102,
+ 20, 66, 87, 63, 124, 205, 174, 177, 87, 219, 73, 218, 91, 87, 176, 72,
+ 15, 211, 47, 61, 251, 165, 39, 247, 146, 70, 150, 57, 1, 212, 36, 162,
+ 39, 38, 16, 216, 3, 50, 116, 200, 32, 234, 77, 181, 155, 19, 90, 188,
+ 36, 6, 254, 46, 46, 203, 25, 230, 181, 196, 4, 151, 225, 65, 122, 216,
+ 168, 86, 158, 131, 136, 16, 49, 102, 233, 64, 154, 88, 228, 52, 146, 69,
+ 93, 157, 243, 121, 70, 209, 126, 213, 88, 145, 236, 65, 70, 96, 204, 47,
+ 10, 200, 77, 8, 103, 150, 48, 153, 5, 37, 52, 235, 209, 31, 181, 126,
+ 83, 142, 224, 140, 6, 32, 200, 171, 160, 179, 115, 229, 75, 194, 208, 39,
+ 59, 223, 52, 247, 38, 197, 135, 1, 6, 189, 106, 114, 168, 5, 211, 222,
+ 44, 63, 90, 160, 116, 172, 170, 133, 125, 138, 39, 131, 23, 178, 10, 214,
+ 36, 93, 28, 59, 68, 17, 123, 25, 255, 184, 204, 102, 194, 214, 129, 94,
+ 159, 245, 112, 141, 62, 11, 61, 197, 124, 221, 205, 11, 79, 71, 201, 54,
+ 58, 150, 29, 121, 87, 46, 240, 201, 68, 20, 194, 209, 47, 152, 158, 174,
+ 193, 164, 120, 255, 216, 165, 247, 58, 85, 130, 220, 23, 122, 223, 188, 98,
+ 21, 70, 72, 170, 150, 237, 76, 143, 112, 238, 206, 146, 215, 110, 4, 250,
+ 68, 44, 174, 177, 30, 98, 143, 241, 180, 127, 113, 48, 0, 1, 179, 199,
+ 59, 106, 201, 114, 29, 86, 173, 133, 217, 44, 200, 141, 107, 172, 16, 60,
+ 82, 58, 239, 94, 141, 234, 186, 235, 109, 173, 249, 139, 141, 59, 100, 248,
+ 84, 144, 49, 160, 51, 207, 164, 103, 74, 97, 146, 202, 193, 125, 168, 134,
+ 236, 111, 135, 121, 59, 145, 168, 200, 181, 173, 109, 2, 255, 6, 9, 245,
+ 90, 202, 214, 143, 121, 65, 85, 232, 132, 77, 228, 84, 26, 54, 184, 15,
+ 161, 29, 177, 79, 43, 0, 156, 184, 163, 165, 62, 90, 179, 93, 45, 239,
+ 1, 16, 120, 189, 127, 47, 74, 166, 20, 214, 233, 226, 89, 217, 229, 26,
+ 156, 53, 162, 60, 21, 3, 192, 72, 111, 51, 53, 101, 181, 208, 88, 82,
+ 179, 160, 219, 113, 240, 108, 43, 224, 162, 147, 62, 14, 95, 81, 205, 4,
+ 160, 177, 225, 115, 29, 69, 235, 168, 148, 29, 128, 114, 124, 129, 172, 165,
+ 215, 231, 214, 86, 160, 44, 157, 91, 248, 183, 73, 164, 56, 181, 162, 92,
+ 141, 118, 127, 240, 196, 77, 0, 9, 244, 79, 250, 100, 195, 25, 255, 85,
+ 94, 35, 212, 137, 107, 34, 110, 20, 200, 104, 17, 32, 231, 43, 150, 159,
+ 231, 216, 223, 190, 226, 109, 162, 197, 87, 92, 224, 11, 111, 73, 60, 225,
+ 238, 73, 246, 169, 19, 217, 119, 38, 121, 118, 70, 82, 99, 241, 110, 67,
+ 31, 76, 146, 215, 124, 240, 31, 103, 139, 224, 75, 160, 31, 78, 93, 4,
+ 64, 9, 103, 223, 6, 227, 119, 85, 116, 81, 21, 43, 46, 206, 234, 132,
+ 85, 99, 22, 131, 135, 97, 86, 13, 234, 188, 21, 14, 89, 169, 207, 238,
+ 219, 177, 190, 72, 157, 41, 114, 140, 92, 141, 186, 1, 63, 107, 225, 184,
+ 118, 150, 153, 254, 241, 106, 120, 210, 104, 144, 151, 161, 88, 206, 125, 164,
+ 15, 211, 173, 49, 146, 241, 71, 36, 58, 201, 46, 27, 33, 187, 91, 162,
+ 117, 19, 210, 213, 187, 97, 193, 50, 190, 114, 217, 60, 61, 167, 207, 213,
+ 213, 53, 135, 34, 156, 91, 115, 119, 46, 99, 242, 1, 90, 52, 198, 227,
+ 201, 91, 216, 146, 210, 82, 121, 38, 73, 133, 182, 193, 132, 148, 246, 75,
+ 109, 157, 179, 113, 176, 134, 205, 159, 148, 58, 103, 171, 132, 156, 133, 147,
+ 161, 231, 39, 100, 175, 97, 125, 28, 183, 129, 135, 191, 202, 181, 29, 218,
+ 43, 104, 148, 203, 189, 204, 4, 182, 169, 1, 134, 122, 141, 202, 13, 187,
+ 177, 112, 162, 35, 231, 6, 8, 241, 99, 6, 191, 45, 113, 113, 101, 104};
+
+// The S-Box we use for further linearity breaking.
+// We created it by taking the digits of decimal expansion of e.
+// The code that created it can be found in 'ProduceRandomSBox.c'.
+unsigned char SBox[256] = {
+//0 1 2 3 4 5 6 7 8 9 A B C D E F
+0x7d, 0xd1, 0x70, 0x0b, 0xfa, 0x39, 0x18, 0xc3, 0xf3, 0xbb, 0xa7, 0xd4, 0x84, 0x25, 0x3b, 0x3c, // 0
+0x2c, 0x15, 0x69, 0x9a, 0xf9, 0x27, 0xfb, 0x02, 0x52, 0xba, 0xa8, 0x4b, 0x20, 0xb5, 0x8b, 0x3a, // 1
+0x88, 0x8e, 0x26, 0xcb, 0x71, 0x5e, 0xaf, 0xad, 0x0c, 0xac, 0xa1, 0x93, 0xc6, 0x78, 0xce, 0xfc, // 2
+0x2a, 0x76, 0x17, 0x1f, 0x62, 0xc2, 0x2e, 0x99, 0x11, 0x37, 0x65, 0x40, 0xfd, 0xa0, 0x03, 0xc1, // 3
+0xca, 0x48, 0xe2, 0x9b, 0x81, 0xe4, 0x1c, 0x01, 0xec, 0x68, 0x7a, 0x5a, 0x50, 0xf8, 0x0e, 0xa3, // 4
+0xe8, 0x61, 0x2b, 0xa2, 0xeb, 0xcf, 0x8c, 0x3d, 0xb4, 0x95, 0x13, 0x08, 0x46, 0xab, 0x91, 0x7b, // 5
+0xea, 0x55, 0x67, 0x9d, 0xdd, 0x29, 0x6a, 0x8f, 0x9f, 0x22, 0x4e, 0xf2, 0x57, 0xd2, 0xa9, 0xbd, // 6
+0x38, 0x16, 0x5f, 0x4c, 0xf7, 0x9e, 0x1b, 0x2f, 0x30, 0xc7, 0x41, 0x24, 0x5c, 0xbf, 0x05, 0xf6, // 7
+0x0a, 0x31, 0xa5, 0x45, 0x21, 0x33, 0x6b, 0x6d, 0x6c, 0x86, 0xe1, 0xa4, 0xe6, 0x92, 0x9c, 0xdf, // 8
+0xe7, 0xbe, 0x28, 0xe3, 0xfe, 0x06, 0x4d, 0x98, 0x80, 0x04, 0x96, 0x36, 0x3e, 0x14, 0x4a, 0x34, // 9
+0xd3, 0xd5, 0xdb, 0x44, 0xcd, 0xf5, 0x54, 0xdc, 0x89, 0x09, 0x90, 0x42, 0x87, 0xff, 0x7e, 0x56, // A
+0x5d, 0x59, 0xd7, 0x23, 0x75, 0x19, 0x97, 0x73, 0x83, 0x64, 0x53, 0xa6, 0x1e, 0xd8, 0xb0, 0x49, // B
+0x3f, 0xef, 0xbc, 0x7f, 0x43, 0xf0, 0xc9, 0x72, 0x0f, 0x63, 0x79, 0x2d, 0xc0, 0xda, 0x66, 0xc8, // C
+0x32, 0xde, 0x47, 0x07, 0xb8, 0xe9, 0x1d, 0xc4, 0x85, 0x74, 0x82, 0xcc, 0x60, 0x51, 0x77, 0x0d, // D
+0xaa, 0x35, 0xed, 0x58, 0x7c, 0x5b, 0xb9, 0x94, 0x6e, 0x8d, 0xb1, 0xc5, 0xb7, 0xee, 0xb6, 0xae, // E
+0x10, 0xe0, 0xd6, 0xd9, 0xe5, 0x4f, 0xf1, 0x12, 0x00, 0xd0, 0xf4, 0x1a, 0x6f, 0x8a, 0xb3, 0xb2 }; // F
+
+///////////////////////////////////////////////////////////////////////////////////////////////
+//
+// Helper functions definition portion.
+//
+///////////////////////////////////////////////////////////////////////////////////////////////
+
+// Translates an input array with values in base 257 to output array with values in base 256.
+// Returns the carry bit.
+//
+// Parameters:
+// - input: the input array of size EIGHTH_N. Each value in the array is a number in Z_257.
+// The MSB is assumed to be the last one in the array.
+// - output: the input array encoded in base 256.
+//
+// Returns:
+// - The carry bit (MSB).
+swift_int16_t TranslateToBase256(swift_int32_t input[EIGHTH_N], unsigned char output[EIGHTH_N]);
+
+// Translates an input integer into the range (-FIELD_SIZE / 2) <= result <= (FIELD_SIZE / 2).
+//
+// Parameters:
+// - x: the input integer.
+//
+// Returns:
+// - The result, which equals (x MOD FIELD_SIZE), such that |result| <= (FIELD_SIZE / 2).
+int Center(int x);
+
+// Calculates bit reversal permutation.
+//
+// Parameters:
+// - input: the input to reverse.
+// - numOfBits: the number of bits in the input to reverse.
+//
+// Returns:
+// - The resulting number, which is obtained from the input by reversing its bits.
+int ReverseBits(int input, int numOfBits);
+
+// Initializes the FFT fast lookup table.
+// Shall be called only once.
+void InitializeSWIFFTX();
+
+// Calculates the FFT.
+//
+// Parameters:
+// - input: the input to the FFT.
+// - output: the resulting output.
+void FFT(const unsigned char input[EIGHTH_N], swift_int32_t *output);
+
+///////////////////////////////////////////////////////////////////////////////////////////////
+// Helper functions implementation portion.
+///////////////////////////////////////////////////////////////////////////////////////////////
+
+swift_int16_t TranslateToBase256(swift_int32_t input[EIGHTH_N], unsigned char output[EIGHTH_N])
+{
+ swift_int32_t pairs[EIGHTH_N / 2];
+ int i;
+
+ for (i = 0; i < EIGHTH_N; i += 2)
+ {
+ // input[i] + 257 * input[i + 1]
+ pairs[i >> 1] = input[i] + input[i + 1] + (input[i + 1] << 8);
+ }
+
+ for (i = (EIGHTH_N / 2) - 1; i > 0; --i)
+ {
+ int j;
+
+ for (j = i - 1; j < (EIGHTH_N / 2) - 1; ++j)
+ {
+ // pairs[j + 1] * 513, because 257^2 = 513 % 256^2.
+ register swift_int32_t temp = pairs[j] + pairs[j + 1] + (pairs[j + 1] << 9);
+ pairs[j] = temp & 0xffff;
+ pairs[j + 1] += (temp >> 16);
+ }
+ }
+
+ for (i = 0; i < EIGHTH_N; i += 2)
+ {
+ output[i] = (unsigned char) (pairs[i >> 1] & 0xff);
+ output[i + 1] = (unsigned char) ((pairs[i >> 1] >> 8) & 0xff);
+ }
+
+ return (pairs[EIGHTH_N/2 - 1] >> 16);
+}
+
+int Center(int x)
+{
+ int result = x % FIELD_SIZE;
+
+ if (result > (FIELD_SIZE / 2))
+ result -= FIELD_SIZE;
+
+ if (result < (FIELD_SIZE / -2))
+ result += FIELD_SIZE;
+
+ return result;
+}
+
+int ReverseBits(int input, int numOfBits)
+{
+ register int reversed = 0;
+
+ for (input |= numOfBits; input > 1; input >>= 1)
+ reversed = (reversed << 1) | (input & 1);
+
+ return reversed;
+}
+
+void InitializeSWIFFTX()
+{
+ int i, j, k, x;
+ // The powers of OMEGA
+ int omegaPowers[2 * N];
+ omegaPowers[0] = 1;
+
+ if (wasSetupDone)
+ return;
+
+ for (i = 1; i < (2 * N); ++i)
+ {
+ omegaPowers[i] = Center(omegaPowers[i - 1] * OMEGA);
+ }
+
+ for (i = 0; i < (N / W); ++i)
+ {
+ for (j = 0; j < W; ++j)
+ {
+ multipliers[(i << 3) + j] = omegaPowers[ReverseBits(i, N / W) * (2 * j + 1)];
+ }
+ }
+
+ for (x = 0; x < 256; ++x)
+ {
+ for (j = 0; j < 8; ++j)
+ {
+ register int temp = 0;
+ for (k = 0; k < 8; ++k)
+ {
+ temp += omegaPowers[(EIGHTH_N * (2 * j + 1) * ReverseBits(k, W)) % (2 * N)]
+ * ((x >> k) & 1);
+ }
+
+ fftTable[(x << 3) + j] = Center(temp);
+ }
+ }
+
+ wasSetupDone = true;
+}
+
+void FFT(const unsigned char input[EIGHTH_N], swift_int32_t *output)
+{
+ register swift_int16_t *mult = multipliers;
+ register swift_int32_t F0, F1, F2, F3, F4, F5, F6, F7, F8, F9,
+ F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
+ F20, F21, F22, F23, F24, F25, F26, F27, F28, F29,
+ F30, F31, F32, F33, F34, F35, F36, F37, F38, F39,
+ F40, F41, F42, F43, F44, F45, F46, F47, F48, F49,
+ F50, F51, F52, F53, F54, F55, F56, F57, F58, F59,
+ F60, F61, F62, F63;
+
+ // First loop unrolling:
+ register swift_int16_t *table = &(fftTable[input[0] << 3]);
+
+ F0 = mult[0] * table[0];
+ F8 = mult[1] * table[1];
+ F16 = mult[2] * table[2];
+ F24 = mult[3] * table[3];
+ F32 = mult[4] * table[4];
+ F40 = mult[5] * table[5];
+ F48 = mult[6] * table[6];
+ F56 = mult[7] * table[7];
+
+ mult += 8;
+ table = &(fftTable[input[1] << 3]);
+
+ F1 = mult[0] * table[0];
+ F9 = mult[1] * table[1];
+ F17 = mult[2] * table[2];
+ F25 = mult[3] * table[3];
+ F33 = mult[4] * table[4];
+ F41 = mult[5] * table[5];
+ F49 = mult[6] * table[6];
+ F57 = mult[7] * table[7];
+
+ mult += 8;
+ table = &(fftTable[input[2] << 3]);
+
+ F2 = mult[0] * table[0];
+ F10 = mult[1] * table[1];
+ F18 = mult[2] * table[2];
+ F26 = mult[3] * table[3];
+ F34 = mult[4] * table[4];
+ F42 = mult[5] * table[5];
+ F50 = mult[6] * table[6];
+ F58 = mult[7] * table[7];
+
+ mult += 8;
+ table = &(fftTable[input[3] << 3]);
+
+ F3 = mult[0] * table[0];
+ F11 = mult[1] * table[1];
+ F19 = mult[2] * table[2];
+ F27 = mult[3] * table[3];
+ F35 = mult[4] * table[4];
+ F43 = mult[5] * table[5];
+ F51 = mult[6] * table[6];
+ F59 = mult[7] * table[7];
+
+ mult += 8;
+ table = &(fftTable[input[4] << 3]);
+
+ F4 = mult[0] * table[0];
+ F12 = mult[1] * table[1];
+ F20 = mult[2] * table[2];
+ F28 = mult[3] * table[3];
+ F36 = mult[4] * table[4];
+ F44 = mult[5] * table[5];
+ F52 = mult[6] * table[6];
+ F60 = mult[7] * table[7];
+
+ mult += 8;
+ table = &(fftTable[input[5] << 3]);
+
+ F5 = mult[0] * table[0];
+ F13 = mult[1] * table[1];
+ F21 = mult[2] * table[2];
+ F29 = mult[3] * table[3];
+ F37 = mult[4] * table[4];
+ F45 = mult[5] * table[5];
+ F53 = mult[6] * table[6];
+ F61 = mult[7] * table[7];
+
+ mult += 8;
+ table = &(fftTable[input[6] << 3]);
+
+ F6 = mult[0] * table[0];
+ F14 = mult[1] * table[1];
+ F22 = mult[2] * table[2];
+ F30 = mult[3] * table[3];
+ F38 = mult[4] * table[4];
+ F46 = mult[5] * table[5];
+ F54 = mult[6] * table[6];
+ F62 = mult[7] * table[7];
+
+ mult += 8;
+ table = &(fftTable[input[7] << 3]);
+
+ F7 = mult[0] * table[0];
+ F15 = mult[1] * table[1];
+ F23 = mult[2] * table[2];
+ F31 = mult[3] * table[3];
+ F39 = mult[4] * table[4];
+ F47 = mult[5] * table[5];
+ F55 = mult[6] * table[6];
+ F63 = mult[7] * table[7];
+
+ // Second loop unrolling:
+ // Iteration 0:
+ ADD_SUB(F0, F1);
+ ADD_SUB(F2, F3);
+ ADD_SUB(F4, F5);
+ ADD_SUB(F6, F7);
+
+ F3 <<= 4;
+ F7 <<= 4;
+
+ ADD_SUB(F0, F2);
+ ADD_SUB(F1, F3);
+ ADD_SUB(F4, F6);
+ ADD_SUB(F5, F7);
+
+ F5 <<= 2;
+ F6 <<= 4;
+ F7 <<= 6;
+
+ ADD_SUB(F0, F4);
+ ADD_SUB(F1, F5);
+ ADD_SUB(F2, F6);
+ ADD_SUB(F3, F7);
+
+ output[0] = Q_REDUCE(F0);
+ output[8] = Q_REDUCE(F1);
+ output[16] = Q_REDUCE(F2);
+ output[24] = Q_REDUCE(F3);
+ output[32] = Q_REDUCE(F4);
+ output[40] = Q_REDUCE(F5);
+ output[48] = Q_REDUCE(F6);
+ output[56] = Q_REDUCE(F7);
+
+ // Iteration 1:
+ ADD_SUB(F8, F9);
+ ADD_SUB(F10, F11);
+ ADD_SUB(F12, F13);
+ ADD_SUB(F14, F15);
+
+ F11 <<= 4;
+ F15 <<= 4;
+
+ ADD_SUB(F8, F10);
+ ADD_SUB(F9, F11);
+ ADD_SUB(F12, F14);
+ ADD_SUB(F13, F15);
+
+ F13 <<= 2;
+ F14 <<= 4;
+ F15 <<= 6;
+
+ ADD_SUB(F8, F12);
+ ADD_SUB(F9, F13);
+ ADD_SUB(F10, F14);
+ ADD_SUB(F11, F15);
+
+ output[1] = Q_REDUCE(F8);
+ output[9] = Q_REDUCE(F9);
+ output[17] = Q_REDUCE(F10);
+ output[25] = Q_REDUCE(F11);
+ output[33] = Q_REDUCE(F12);
+ output[41] = Q_REDUCE(F13);
+ output[49] = Q_REDUCE(F14);
+ output[57] = Q_REDUCE(F15);
+
+ // Iteration 2:
+ ADD_SUB(F16, F17);
+ ADD_SUB(F18, F19);
+ ADD_SUB(F20, F21);
+ ADD_SUB(F22, F23);
+
+ F19 <<= 4;
+ F23 <<= 4;
+
+ ADD_SUB(F16, F18);
+ ADD_SUB(F17, F19);
+ ADD_SUB(F20, F22);
+ ADD_SUB(F21, F23);
+
+ F21 <<= 2;
+ F22 <<= 4;
+ F23 <<= 6;
+
+ ADD_SUB(F16, F20);
+ ADD_SUB(F17, F21);
+ ADD_SUB(F18, F22);
+ ADD_SUB(F19, F23);
+
+ output[2] = Q_REDUCE(F16);
+ output[10] = Q_REDUCE(F17);
+ output[18] = Q_REDUCE(F18);
+ output[26] = Q_REDUCE(F19);
+ output[34] = Q_REDUCE(F20);
+ output[42] = Q_REDUCE(F21);
+ output[50] = Q_REDUCE(F22);
+ output[58] = Q_REDUCE(F23);
+
+ // Iteration 3:
+ ADD_SUB(F24, F25);
+ ADD_SUB(F26, F27);
+ ADD_SUB(F28, F29);
+ ADD_SUB(F30, F31);
+
+ F27 <<= 4;
+ F31 <<= 4;
+
+ ADD_SUB(F24, F26);
+ ADD_SUB(F25, F27);
+ ADD_SUB(F28, F30);
+ ADD_SUB(F29, F31);
+
+ F29 <<= 2;
+ F30 <<= 4;
+ F31 <<= 6;
+
+ ADD_SUB(F24, F28);
+ ADD_SUB(F25, F29);
+ ADD_SUB(F26, F30);
+ ADD_SUB(F27, F31);
+
+ output[3] = Q_REDUCE(F24);
+ output[11] = Q_REDUCE(F25);
+ output[19] = Q_REDUCE(F26);
+ output[27] = Q_REDUCE(F27);
+ output[35] = Q_REDUCE(F28);
+ output[43] = Q_REDUCE(F29);
+ output[51] = Q_REDUCE(F30);
+ output[59] = Q_REDUCE(F31);
+
+ // Iteration 4:
+ ADD_SUB(F32, F33);
+ ADD_SUB(F34, F35);
+ ADD_SUB(F36, F37);
+ ADD_SUB(F38, F39);
+
+ F35 <<= 4;
+ F39 <<= 4;
+
+ ADD_SUB(F32, F34);
+ ADD_SUB(F33, F35);
+ ADD_SUB(F36, F38);
+ ADD_SUB(F37, F39);
+
+ F37 <<= 2;
+ F38 <<= 4;
+ F39 <<= 6;
+
+ ADD_SUB(F32, F36);
+ ADD_SUB(F33, F37);
+ ADD_SUB(F34, F38);
+ ADD_SUB(F35, F39);
+
+ output[4] = Q_REDUCE(F32);
+ output[12] = Q_REDUCE(F33);
+ output[20] = Q_REDUCE(F34);
+ output[28] = Q_REDUCE(F35);
+ output[36] = Q_REDUCE(F36);
+ output[44] = Q_REDUCE(F37);
+ output[52] = Q_REDUCE(F38);
+ output[60] = Q_REDUCE(F39);
+
+ // Iteration 5:
+ ADD_SUB(F40, F41);
+ ADD_SUB(F42, F43);
+ ADD_SUB(F44, F45);
+ ADD_SUB(F46, F47);
+
+ F43 <<= 4;
+ F47 <<= 4;
+
+ ADD_SUB(F40, F42);
+ ADD_SUB(F41, F43);
+ ADD_SUB(F44, F46);
+ ADD_SUB(F45, F47);
+
+ F45 <<= 2;
+ F46 <<= 4;
+ F47 <<= 6;
+
+ ADD_SUB(F40, F44);
+ ADD_SUB(F41, F45);
+ ADD_SUB(F42, F46);
+ ADD_SUB(F43, F47);
+
+ output[5] = Q_REDUCE(F40);
+ output[13] = Q_REDUCE(F41);
+ output[21] = Q_REDUCE(F42);
+ output[29] = Q_REDUCE(F43);
+ output[37] = Q_REDUCE(F44);
+ output[45] = Q_REDUCE(F45);
+ output[53] = Q_REDUCE(F46);
+ output[61] = Q_REDUCE(F47);
+
+ // Iteration 6:
+ ADD_SUB(F48, F49);
+ ADD_SUB(F50, F51);
+ ADD_SUB(F52, F53);
+ ADD_SUB(F54, F55);
+
+ F51 <<= 4;
+ F55 <<= 4;
+
+ ADD_SUB(F48, F50);
+ ADD_SUB(F49, F51);
+ ADD_SUB(F52, F54);
+ ADD_SUB(F53, F55);
+
+ F53 <<= 2;
+ F54 <<= 4;
+ F55 <<= 6;
+
+ ADD_SUB(F48, F52);
+ ADD_SUB(F49, F53);
+ ADD_SUB(F50, F54);
+ ADD_SUB(F51, F55);
+
+ output[6] = Q_REDUCE(F48);
+ output[14] = Q_REDUCE(F49);
+ output[22] = Q_REDUCE(F50);
+ output[30] = Q_REDUCE(F51);
+ output[38] = Q_REDUCE(F52);
+ output[46] = Q_REDUCE(F53);
+ output[54] = Q_REDUCE(F54);
+ output[62] = Q_REDUCE(F55);
+
+ // Iteration 7:
+ ADD_SUB(F56, F57);
+ ADD_SUB(F58, F59);
+ ADD_SUB(F60, F61);
+ ADD_SUB(F62, F63);
+
+ F59 <<= 4;
+ F63 <<= 4;
+
+ ADD_SUB(F56, F58);
+ ADD_SUB(F57, F59);
+ ADD_SUB(F60, F62);
+ ADD_SUB(F61, F63);
+
+ F61 <<= 2;
+ F62 <<= 4;
+ F63 <<= 6;
+
+ ADD_SUB(F56, F60);
+ ADD_SUB(F57, F61);
+ ADD_SUB(F58, F62);
+ ADD_SUB(F59, F63);
+
+ output[7] = Q_REDUCE(F56);
+ output[15] = Q_REDUCE(F57);
+ output[23] = Q_REDUCE(F58);
+ output[31] = Q_REDUCE(F59);
+ output[39] = Q_REDUCE(F60);
+ output[47] = Q_REDUCE(F61);
+ output[55] = Q_REDUCE(F62);
+ output[63] = Q_REDUCE(F63);
+}
+
+// Calculates the FFT part of SWIFFT.
+// We divided the SWIFFT calculation into two, because that way we could save 2 computations of
+// the FFT part, since in the first stage of SWIFFTX the difference between the first 3 SWIFFTs
+// is only the A's part.
+//
+// Parameters:
+// - input: the input to FFT.
+// - m: the input size divided by 8. The function performs m FFTs.
+// - output: will store the result.
+void SWIFFTFFT(const unsigned char *input, int m, swift_int32_t *output)
+{
+ int i;
+
+ for (i = 0;
+ i < m;
+ i++, input += EIGHTH_N, output += N)
+ {
+ FFT(input, output);
+ }
+}
+
+// Calculates the 'sum' part of SWIFFT, including the base change at the end.
+// We divided the SWIFFT calculation into two, because that way we could save 2 computations of
+// the FFT part, since in the first stage of SWIFFTX the difference between the first 3 SWIFFTs
+// is only the A's part.
+//
+// Parameters:
+// - input: the input. Of size 64 * m.
+// - m: the input size divided by 64.
+// - output: will store the result.
+// - a: the coefficients in the sum. Of size 64 * m.
+void SWIFFTSum(const swift_int32_t *input, int m, unsigned char *output, const swift_int16_t *a)
+{
+ int i, j;
+ swift_int32_t result[N];
+ register swift_int16_t carry = 0;
+
+ for (j = 0; j < N; ++j)
+ {
+ register swift_int32_t sum = 0;
+ const register swift_int32_t *f = input + j;
+ const register swift_int16_t *k = a + j;
+
+ for (i = 0; i < m; i++, f += N,k += N)
+ {
+ sum += (*f) * (*k);
+ }
+
+ result[j] = sum;
+ }
+
+ for (j = 0; j < N; ++j)
+ {
+ result[j] = ((FIELD_SIZE << 22) + result[j]) % FIELD_SIZE;
+ }
+
+ for (j = 0; j < 8; ++j)
+ {
+ int register carryBit = TranslateToBase256(result + (j << 3), output + (j << 3));
+ carry |= carryBit << j;
+ }
+
+ output[N] = carry;
+}
+
+void ComputeSingleSWIFFTX(unsigned char input[SWIFFTX_INPUT_BLOCK_SIZE],
+ unsigned char output[SWIFFTX_OUTPUT_BLOCK_SIZE],
+ bool doSmooth)
+{
+ int i;
+ // Will store the result of the FFT parts:
+ swift_int32_t fftOut[N * M];
+ unsigned char intermediate[N * 3 + 8];
+ unsigned char carry0,carry1,carry2;
+
+ // Do the three SWIFFTS while remembering the three carry bytes (each carry byte gets
+ // overriden by the following SWIFFT):
+
+ // 1. Compute the FFT of the input - the common part for the first 3 SWIFFTs:
+ SWIFFTFFT(input, M, fftOut);
+
+ // 2. Compute the sums of the 3 SWIFFTs, each using a different set of coefficients:
+
+ // 2a. The first SWIFFT:
+ SWIFFTSum(fftOut, M, intermediate, As);
+ // Remember the carry byte:
+ carry0 = intermediate[N];
+
+ // 2b. The second one:
+ SWIFFTSum(fftOut, M, intermediate + N, As + (M * N));
+ carry1 = intermediate[2 * N];
+
+ // 2c. The third one:
+ SWIFFTSum(fftOut, M, intermediate + (2 * N), As + 2 * (M * N));
+ carry2 = intermediate[3 * N];
+
+ //2d. Put three carry bytes in their place
+ intermediate[3 * N] = carry0;
+ intermediate[(3 * N) + 1] = carry1;
+ intermediate[(3 * N) + 2] = carry2;
+
+ // Padding intermediate output with 5 zeroes.
+ memset(intermediate + (3 * N) + 3, 0, 5);
+
+ // Apply the S-Box:
+ for (i = 0; i < (3 * N) + 8; ++i)
+ {
+ intermediate[i] = SBox[intermediate[i]];
+ }
+
+ // 3. The final and last SWIFFT:
+ SWIFFTFFT(intermediate, 3 * (N/8) + 1, fftOut);
+ SWIFFTSum(fftOut, 3 * (N/8) + 1, output, As);
+
+ if (doSmooth)
+ {
+ unsigned char sum[N];
+ register int i, j;
+ memset(sum, 0, N);
+
+ for (i = 0; i < (N + 1) * 8; ++i)
+ {
+ register const swift_int16_t *AsRow;
+ register int AShift;
+
+ if (!(output[i >> 3] & (1 << (i & 7))))
+ {
+ continue;
+ }
+
+ AsRow = As + N * M + (i & ~(N - 1)) ;
+ AShift = i & 63;
+
+ for (j = AShift; j < N; ++j)
+ {
+ sum[j] += AsRow[j - AShift];
+ }
+
+ for(j = 0; j < AShift; ++j)
+ {
+ sum[j] -= AsRow[N - AShift + j];
+ }
+ }
+
+ for (i = 0; i < N; ++i)
+ {
+ output[i] = sum[i];
+ }
+
+ output[N] = 0;
+ }
+}
\ No newline at end of file
diff --git a/stratum/algos/SWIFFTX/SWIFFTX.h b/stratum/algos/SWIFFTX/SWIFFTX.h
new file mode 100644
index 000000000..f184e070e
--- /dev/null
+++ b/stratum/algos/SWIFFTX/SWIFFTX.h
@@ -0,0 +1,74 @@
+///////////////////////////////////////////////////////////////////////////////////////////////
+//
+// SWIFFTX ANSI C OPTIMIZED 32BIT IMPLEMENTATION FOR NIST SHA-3 COMPETITION
+//
+// SWIFFTX.h
+//
+// October 2008
+//
+// This file is the exact copy from the reference implementation.
+//
+///////////////////////////////////////////////////////////////////////////////////////////////
+#ifndef __SWIFFTX__
+#define __SWIFFTX__
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+// See the remarks concerning compatibility issues inside stdint.h.
+#include "stdint.h"
+#include "stdbool.h"
+//#include "SHA3swift.h"
+
+// The size of SWIFFTX input in bytes.
+#define SWIFFTX_INPUT_BLOCK_SIZE 256
+
+// The size of output block in bytes. The compression function of SWIFFT outputs a block of
+// this size (i.e., this is the size of the resulting hash value).
+#define SWIFFTX_OUTPUT_BLOCK_SIZE 65
+
+// Computes the result of a single SWIFFT operation.
+// This is the simple implementation, where our main concern is to show our design principles.
+// It is made more efficient in the optimized version, by using FFT instead of DFT, and
+// through other speed-up techniques.
+//
+// Parameters:
+// - input: the input string. Consists of 8*m input bytes, where each octet passes the DFT
+// processing.
+// - m: the length of the input in bytes.
+// - output: the resulting hash value of SWIFFT, of size 65 bytes (520 bit). This is the
+// result of summing the dot products of the DFTS with the A's after applying the base
+// change transformation
+// - A: the A's coefficients to work with (since every SWIFFT in SWIFFTX uses different As).
+// A single application of SWIFFT uses 64*m A's.
+void ComputeSingleSWIFFT(unsigned char *input, unsigned short m,
+ unsigned char output[SWIFFTX_OUTPUT_BLOCK_SIZE],
+ const swift_int16_t *a);
+
+// Computes the result of a single SWIFFTX operation.
+// NOTE: for simplicity we use 'ComputeSingleSWIFFT()' as a subroutine. This is only to show
+// the design idea. In the optimized versions we don't do this for efficiency concerns, since
+// there we compute the first part (which doesn't involve the A coefficients) only once for all
+// of the 3 invocations of SWIFFT. This enables us to introduce a significant speedup.
+//
+// Parameters:
+// - input: the input input of 256 bytes (2048 bit).
+// - output: the resulting hash value of SWIFFT, of size 64 bytes (512 bit).
+// - doSMooth: if true, a final smoothing stage is performed and the output is of size 512 bits.
+//
+// Returns:
+// - Success value.
+void ComputeSingleSWIFFTX(unsigned char input[SWIFFTX_INPUT_BLOCK_SIZE],
+ unsigned char output[SWIFFTX_OUTPUT_BLOCK_SIZE],
+ bool doSmooth);
+
+// Calculates the powers of OMEGA and generates the bit reversal permutation.
+// You must call this function before doing SWIFFT/X, otherwise you will get zeroes everywhere.
+void InitializeSWIFFTX();
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // __SWIFFTX__
\ No newline at end of file
diff --git a/stratum/algos/SWIFFTX/inttypes.h b/stratum/algos/SWIFFTX/inttypes.h
new file mode 100644
index 000000000..cb313ae81
--- /dev/null
+++ b/stratum/algos/SWIFFTX/inttypes.h
@@ -0,0 +1,35 @@
+/*
+ inttypes.h
+ Contributors:
+ Created by Marek Michalkiewicz
+ THIS SOFTWARE IS NOT COPYRIGHTED
+ This source code is offered for use in the public domain. You may
+ use, modify or distribute it freely.
+ This code is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESS OR IMPLIED ARE HEREBY
+ DISCLAIMED. This includes but is not limited to warranties of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+
+ #ifndef __INTTYPES_H_
+ #define __INTTYPES_H_
+
+ /* Use [u]intN_t if you need exactly N bits.
+ XXX - doesn't handle the -mint8 option. */
+
+ typedef signed char swift_int8_t;
+ typedef unsigned char swift_uint8_t;
+
+ typedef int swift_int16_t;
+ typedef unsigned int swift_uint16_t;
+
+ typedef long swift_int32_t;
+ typedef unsigned long swift_uint32_t;
+
+ typedef long long swift_int64_t;
+ typedef unsigned long long swift_uint64_t;
+
+ //typedef swift_int16_t intptr_t;
+ //typedef swift_uint16_t uintptr_t;
+
+ #endif
\ No newline at end of file
diff --git a/stratum/algos/SWIFFTX/stdint.h b/stratum/algos/SWIFFTX/stdint.h
new file mode 100644
index 000000000..49d614fca
--- /dev/null
+++ b/stratum/algos/SWIFFTX/stdint.h
@@ -0,0 +1,53 @@
+#ifndef _SWIFFT_STDINT_H
+#define _SWIFFT_STDINT_H
+
+///////////////////////////////////////////////////////////////////////////////////////////////
+//
+// A note from SWIFFTX implementers:
+//
+// Although the submission was targeted for Microsoft Visual Studio 2005 compiler, we strived
+// to make the code as portable as possible. This is why we preferred to use the types defined
+// here, instead of Microsoft-specific types. We compiled the code with gcc to make this sure.
+// However, we couldn't use this header as is, due to VS2005 compiler objections. This is why
+// we commented out certain defines and clearly marked it.
+// To compile our code on gcc you may define SYS_STDINT.
+//
+///////////////////////////////////////////////////////////////////////////////////////////////
+
+#ifdef SYS_STDINT
+
+#include
+
+#else
+
+#include "inttypes.h"
+// The following was commented out by SWIFFTX implementers:
+// __BEGIN_DECLS
+
+typedef swift_int8_t swifftx_int_least8_t;
+typedef swift_int16_t swifftx_int_least16_t;
+typedef swift_int32_t swifftx_int_least32_t;
+typedef swift_uint8_t swifftx_uint_least8_t;
+typedef swift_uint16_t swifftx_uint_least16_t;
+typedef swift_uint32_t swifftx_uint_least32_t;
+
+#ifndef __STRICT_ANSI__
+typedef swift_int64_t swifftx_int_least64_t;
+typedef swift_uint64_t swifftx_uint_least64_t;
+#endif
+
+/*typedef signed char int_fast8_t;
+typedef signed long int int_fast16_t;
+typedef signed long int int_fast32_t;
+typedef signed long long int int_fast64_t;
+typedef unsigned char uint_fast8_t;
+typedef unsigned long int uint_fast16_t;
+typedef unsigned long int uint_fast32_t;
+typedef unsigned long long int uint_fast64_t;*/
+
+// The following was commented out by SWIFFTX implementers:
+// #include
+// __END_DECLS
+#endif
+
+#endif
\ No newline at end of file
diff --git a/stratum/algos/anime.c b/stratum/algos/anime.c
new file mode 100644
index 000000000..7a80d8119
--- /dev/null
+++ b/stratum/algos/anime.c
@@ -0,0 +1,91 @@
+#include "anime.h"
+#include
+#include
+#include
+#include
+#include "../sha3/sph_blake.h"
+#include "../sha3/sph_bmw.h"
+#include "../sha3/sph_groestl.h"
+#include "../sha3/sph_jh.h"
+#include "../sha3/sph_keccak.h"
+#include "../sha3/sph_skein.h"
+
+#define _ALIGN(x) __attribute__ ((aligned(x)))
+
+void anime_hash(const char* input, char* output, uint32_t len)
+{
+ uint32_t _ALIGN(128) hash[64];
+
+ sph_bmw512_context ctx_bmw;
+ sph_blake512_context ctx_blake;
+ sph_groestl512_context ctx_groestl;
+ sph_jh512_context ctx_jh;
+ sph_keccak512_context ctx_keccak;
+ sph_skein512_context ctx_skein;
+
+ sph_bmw512_init(&ctx_bmw);
+ sph_bmw512 (&ctx_bmw, input, len);
+ sph_bmw512_close(&ctx_bmw, (void*) hash);
+
+ sph_blake512_init(&ctx_blake);
+ sph_blake512 (&ctx_blake, (const void*) hash, 64);
+ sph_blake512_close(&ctx_blake, (void*) hash);
+
+ if (hash[0] & 0x8)
+ {
+ sph_groestl512_init(&ctx_groestl);
+ sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
+ sph_groestl512_close(&ctx_groestl, (void*) hash);
+ }
+ else
+ {
+ sph_skein512_init(&ctx_skein);
+ sph_skein512 (&ctx_skein, (const void*) hash, 64);
+ sph_skein512_close(&ctx_skein, (void*) hash);
+ }
+
+ sph_groestl512_init(&ctx_groestl);
+ sph_groestl512 (&ctx_groestl, (const void*) hash, 64);
+ sph_groestl512_close(&ctx_groestl, (void*) hash);
+
+ sph_jh512_init(&ctx_jh);
+ sph_jh512 (&ctx_jh, (const void*) hash, 64);
+ sph_jh512_close(&ctx_jh, (void*) hash);
+
+ if (hash[0] & 0x8)
+ {
+ sph_blake512_init(&ctx_blake);
+ sph_blake512 (&ctx_blake, (const void*) hash, 64);
+ sph_blake512_close(&ctx_blake, (void*) hash);
+ }
+ else
+ {
+ sph_bmw512_init(&ctx_bmw);
+ sph_bmw512 (&ctx_bmw, (const void*) hash, 64);
+ sph_bmw512_close(&ctx_bmw, (void*) hash);
+ }
+
+ sph_keccak512_init(&ctx_keccak);
+ sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
+ sph_keccak512_close(&ctx_keccak, (void*) hash);
+
+ sph_skein512_init(&ctx_skein);
+ sph_skein512 (&ctx_skein, (const void*) hash, 64);
+ sph_skein512_close(&ctx_skein, (void*) hash);
+
+ if (hash[0] & 0x8)
+ {
+ sph_keccak512_init(&ctx_keccak);
+ sph_keccak512 (&ctx_keccak, (const void*) hash, 64);
+ sph_keccak512_close(&ctx_keccak, (void*) hash);
+ }
+ else
+ {
+ sph_jh512_init(&ctx_jh);
+ sph_jh512 (&ctx_jh, (const void*) hash, 64);
+ sph_jh512_close(&ctx_jh, (void*) hash);
+ }
+
+ memcpy(output, hash, 32);
+}
+
diff --git a/stratum/algos/anime.h b/stratum/algos/anime.h
new file mode 100644
index 000000000..fcf38ad6e
--- /dev/null
+++ b/stratum/algos/anime.h
@@ -0,0 +1,16 @@
+#ifndef ANIME_H
+#define ANIME_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include
+
+void anime_hash(const char* input, char* output, uint32_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/stratum/algos/ar2/ar2-scrypt-jane.c b/stratum/algos/ar2/ar2-scrypt-jane.c
deleted file mode 100644
index e75b73b09..000000000
--- a/stratum/algos/ar2/ar2-scrypt-jane.c
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- scrypt-jane by Andrew M, https://github.com/floodyberry/scrypt-jane
-
- Public Domain or MIT License, whichever is easier
-*/
-
-#include
-
-#if defined( _WINDOWS )
-#if !defined( QT_GUI )
-extern "C" {
-#endif
-#endif
-
-#include "ar2-scrypt-jane.h"
-
-#include "sj/scrypt-jane-portable.h"
-#include "sj/scrypt-jane-hash.h"
-#include "sj/scrypt-jane-romix.h"
-#include "sj/scrypt-jane-test-vectors.h"
-
-#define scrypt_maxNfactor 30 /* (1 << (30 + 1)) = ~2 billion */
-#if (SCRYPT_BLOCK_BYTES == 64)
-#define scrypt_r_32kb 8 /* (1 << 8) = 256 * 2 blocks in a chunk * 64 bytes = Max of 32kb in a chunk */
-#elif (SCRYPT_BLOCK_BYTES == 128)
-#define scrypt_r_32kb 7 /* (1 << 7) = 128 * 2 blocks in a chunk * 128 bytes = Max of 32kb in a chunk */
-#elif (SCRYPT_BLOCK_BYTES == 256)
-#define scrypt_r_32kb 6 /* (1 << 6) = 64 * 2 blocks in a chunk * 256 bytes = Max of 32kb in a chunk */
-#elif (SCRYPT_BLOCK_BYTES == 512)
-#define scrypt_r_32kb 5 /* (1 << 5) = 32 * 2 blocks in a chunk * 512 bytes = Max of 32kb in a chunk */
-#endif
-#define scrypt_maxrfactor scrypt_r_32kb /* 32kb */
-#define scrypt_maxpfactor 25 /* (1 << 25) = ~33 million */
-
-#include
-//#include
-
-static void NORETURN
-scrypt_fatal_error_default(const char *msg) {
- fprintf(stderr, "%s\n", msg);
- exit(1);
-}
-
-static scrypt_fatal_errorfn scrypt_fatal_error = scrypt_fatal_error_default;
-
-void scrypt_set_fatal_error(scrypt_fatal_errorfn fn) {
- scrypt_fatal_error = fn;
-}
-
-static int scrypt_power_on_self_test(void)
-{
- const scrypt_test_setting *t;
- uint8_t test_digest[64];
- uint32_t i;
- int res = 7, scrypt_valid;
-
- if (!scrypt_test_mix()) {
-#if !defined(SCRYPT_TEST)
- scrypt_fatal_error("scrypt: mix function power-on-self-test failed");
-#endif
- res &= ~1;
- }
-
- if (!scrypt_test_hash()) {
-#if !defined(SCRYPT_TEST)
- scrypt_fatal_error("scrypt: hash function power-on-self-test failed");
-#endif
- res &= ~2;
- }
-
- for (i = 0, scrypt_valid = 1; post_settings[i].pw; i++) {
- t = post_settings + i;
- scrypt((uint8_t *)t->pw, strlen(t->pw), (uint8_t *)t->salt, strlen(t->salt), t->Nfactor, t->rfactor, t->pfactor, test_digest, sizeof(test_digest));
- scrypt_valid &= scrypt_verify(post_vectors[i], test_digest, sizeof(test_digest));
- }
-
- if (!scrypt_valid) {
-#if !defined(SCRYPT_TEST)
- scrypt_fatal_error("scrypt: scrypt power-on-self-test failed");
-#endif
- res &= ~4;
- }
-
- return res;
-}
-
-typedef struct scrypt_aligned_alloc_t {
- uint8_t *mem, *ptr;
-} scrypt_aligned_alloc;
-
-#ifdef SCRYPT_TEST_SPEED
-
-static uint8_t *mem_base = (uint8_t *)0;
-static size_t mem_bump = 0;
-
-/* allocations are assumed to be multiples of 64 bytes and total allocations not to exceed ~1.01gb */
-static scrypt_aligned_alloc scrypt_alloc(uint64_t size)
-{
- scrypt_aligned_alloc aa;
- if (!mem_base) {
- mem_base = (uint8_t *)malloc((1024 * 1024 * 1024) + (1024 * 1024) + (SCRYPT_BLOCK_BYTES - 1));
- if (!mem_base)
- scrypt_fatal_error("scrypt: out of memory");
- mem_base = (uint8_t *)(((size_t)mem_base + (SCRYPT_BLOCK_BYTES - 1)) & ~(SCRYPT_BLOCK_BYTES - 1));
- }
- aa.mem = mem_base + mem_bump;
- aa.ptr = aa.mem;
- mem_bump += (size_t)size;
- return aa;
-}
-
-static void scrypt_free(scrypt_aligned_alloc *aa) {
- mem_bump = 0;
-}
-
-#else
-
-static scrypt_aligned_alloc scrypt_alloc(uint64_t size)
-{
- static const size_t max_alloc = (size_t)-1;
- scrypt_aligned_alloc aa;
- size += (SCRYPT_BLOCK_BYTES - 1);
- if (size > max_alloc)
- scrypt_fatal_error("scrypt: not enough address space on this CPU to allocate required memory");
- aa.mem = (uint8_t *)malloc((size_t)size);
- aa.ptr = (uint8_t *)(((size_t)aa.mem + (SCRYPT_BLOCK_BYTES - 1)) & ~(SCRYPT_BLOCK_BYTES - 1));
- if (!aa.mem)
- scrypt_fatal_error("scrypt: out of memory");
- return aa;
-}
-
-static void scrypt_free(scrypt_aligned_alloc *aa)
-{
- free(aa->mem);
-}
-
-#endif /* SCRYPT_TEST_SPEED */
-
-
-void scrypt(const uint8_t *password, size_t password_len, const uint8_t *salt, size_t salt_len,
- uint8_t Nfactor, uint8_t rfactor, uint8_t pfactor, uint8_t *out, size_t bytes)
-{
- scrypt_aligned_alloc YX, V;
- uint8_t *X, *Y;
- uint32_t N, r, p, chunk_bytes, i;
-
-#if !defined(SCRYPT_CHOOSE_COMPILETIME)
- scrypt_ROMixfn scrypt_ROMix = scrypt_getROMix();
-#endif
-
-#if !defined(SCRYPT_TEST)
- static int power_on_self_test = 0;
- if (!power_on_self_test) {
- power_on_self_test = 1;
- if (!scrypt_power_on_self_test())
- scrypt_fatal_error("scrypt: power on self test failed");
- }
-#endif
-
- if (Nfactor > scrypt_maxNfactor)
- scrypt_fatal_error("scrypt: N out of range");
- if (rfactor > scrypt_maxrfactor)
- scrypt_fatal_error("scrypt: r out of range");
- if (pfactor > scrypt_maxpfactor)
- scrypt_fatal_error("scrypt: p out of range");
-
- N = (1 << (Nfactor + 1));
- r = (1 << rfactor);
- p = (1 << pfactor);
-
- chunk_bytes = SCRYPT_BLOCK_BYTES * r * 2;
- V = scrypt_alloc((uint64_t)N * chunk_bytes);
- YX = scrypt_alloc((p + 1) * chunk_bytes);
-
- /* 1: X = PBKDF2(password, salt) */
- Y = YX.ptr;
- X = Y + chunk_bytes;
- scrypt_pbkdf2(password, password_len, salt, salt_len, 1, X, chunk_bytes * p);
-
- /* 2: X = ROMix(X) */
- for (i = 0; i < p; i++)
- scrypt_ROMix((scrypt_mix_word_t *)(X + (chunk_bytes * i)), (scrypt_mix_word_t *)Y, (scrypt_mix_word_t *)V.ptr, N, r);
-
- /* 3: Out = PBKDF2(password, X) */
- scrypt_pbkdf2(password, password_len, X, chunk_bytes * p, 1, out, bytes);
-
- scrypt_ensure_zero(YX.ptr, (p + 1) * chunk_bytes);
-
- scrypt_free(&V);
- scrypt_free(&YX);
-}
-
-#define Nfactor 8
-#define rfactor 0
-#define pfactor 0
-#if (SCRYPT_BLOCK_BYTES == 64)
-#define chunk_bytes 128
-#elif (SCRYPT_BLOCK_BYTES == 128)
-#define chunk_bytes 256
-#elif (SCRYPT_BLOCK_BYTES == 256)
-#define chunk_bytes 512
-#elif (SCRYPT_BLOCK_BYTES == 512)
-#define chunk_bytes 1024
-#endif
-
-void my_scrypt(const uint8_t *password, size_t password_len, const uint8_t *salt, size_t salt_len, uint8_t *out)
-{
- scrypt_aligned_alloc YX, V;
- uint8_t *X, *Y;
-
-#if !defined(SCRYPT_CHOOSE_COMPILETIME)
- scrypt_ROMixfn scrypt_ROMix = scrypt_getROMix();
-#endif
-
-/*
-#if !defined(SCRYPT_TEST)
- static int power_on_self_test = 0;
- if (!power_on_self_test) {
- power_on_self_test = 1;
- if (!scrypt_power_on_self_test())
- scrypt_fatal_error("scrypt: power on self test failed");
- }
-#endif
-*/
- V = scrypt_alloc((uint64_t)512 * chunk_bytes);
- YX = scrypt_alloc(2 * chunk_bytes);
-
- /* 1: X = PBKDF2(password, salt) */
- Y = YX.ptr;
- X = Y + chunk_bytes;
- scrypt_pbkdf2(password, password_len, salt, salt_len, 1, X, chunk_bytes);
-
- /* 2: X = ROMix(X) */
- scrypt_ROMix((scrypt_mix_word_t *)X, (scrypt_mix_word_t *)Y, (scrypt_mix_word_t *)V.ptr, 512, 1);
-
- /* 3: Out = PBKDF2(password, X) */
- scrypt_pbkdf2(password, password_len, X, chunk_bytes, 1, out, 32);
-
- scrypt_ensure_zero(YX.ptr, 2 * chunk_bytes);
-
- scrypt_free(&V);
- scrypt_free(&YX);
-}
-
-#if defined( _WINDOWS )
-#if !defined( QT_GUI )
-} /* extern "C" */
-#endif
-#endif
diff --git a/stratum/algos/ar2/ar2-scrypt-jane.h b/stratum/algos/ar2/ar2-scrypt-jane.h
deleted file mode 100644
index e71e460af..000000000
--- a/stratum/algos/ar2/ar2-scrypt-jane.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef AR2_SCRYPT_JANE_H
-#define AR2_SCRYPT_JANE_H
-
-//#define SCRYPT_CHOOSE_COMPILETIME
-//#define SCRYPT_TEST
-#define SCRYPT_SKEIN512
-#define SCRYPT_SALSA64
-
-/*
- Nfactor: Increases CPU & Memory Hardness
- N = (1 << (Nfactor + 1)): How many times to mix a chunk and how many temporary chunks are used
-
- rfactor: Increases Memory Hardness
- r = (1 << rfactor): How large a chunk is
-
- pfactor: Increases CPU Hardness
- p = (1 << pfactor): Number of times to mix the main chunk
-
- A block is the basic mixing unit (salsa/chacha block = 64 bytes)
- A chunk is (2 * r) blocks
-
- ~Memory used = (N + 2) * ((2 * r) * block size)
-*/
-
-#include
-#include
-
-typedef void (*scrypt_fatal_errorfn)(const char *msg);
-void scrypt_set_fatal_error(scrypt_fatal_errorfn fn);
-
-void scrypt(const unsigned char *password, size_t password_len, const unsigned char *salt, size_t salt_len, unsigned char Nfactor, unsigned char rfactor, unsigned char pfactor, unsigned char *out, size_t bytes);
-void my_scrypt(const uint8_t *password, size_t password_len, const uint8_t *salt, size_t salt_len, uint8_t *out);
-#endif /* AR2_SCRYPT_JANE_H */
diff --git a/stratum/algos/ar2/sj/scrypt-jane-hash.h b/stratum/algos/ar2/sj/scrypt-jane-hash.h
deleted file mode 100644
index 3a48bf5d8..000000000
--- a/stratum/algos/ar2/sj/scrypt-jane-hash.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#if defined(SCRYPT_SKEIN512)
-#include "scrypt-jane-hash_skein512.h"
-#else
- #define SCRYPT_HASH "ERROR"
- #define SCRYPT_HASH_BLOCK_SIZE 64
- #define SCRYPT_HASH_DIGEST_SIZE 64
- typedef struct scrypt_hash_state_t { size_t dummy; } scrypt_hash_state;
- typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE];
- static void scrypt_hash_init(scrypt_hash_state *S) {}
- static void scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) {}
- static void scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) {}
- static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = {0};
- #error must define a hash function!
-#endif
-
-#include "scrypt-jane-pbkdf2.h"
-
-#define SCRYPT_TEST_HASH_LEN 257 /* (2 * largest block size) + 1 */
-
-static int
-scrypt_test_hash(void) {
- scrypt_hash_state st;
- scrypt_hash_digest hash, final;
- uint8_t msg[SCRYPT_TEST_HASH_LEN];
- size_t i;
-
- for (i = 0; i < SCRYPT_TEST_HASH_LEN; i++)
- msg[i] = (uint8_t)i;
-
- scrypt_hash_init(&st);
- for (i = 0; i < SCRYPT_TEST_HASH_LEN + 1; i++) {
- scrypt_hash(hash, msg, i);
- scrypt_hash_update(&st, hash, sizeof(hash));
- }
- scrypt_hash_finish(&st, final);
- return scrypt_verify(final, scrypt_test_hash_expected, SCRYPT_HASH_DIGEST_SIZE);
-}
-
diff --git a/stratum/algos/ar2/sj/scrypt-jane-hash_skein512.h b/stratum/algos/ar2/sj/scrypt-jane-hash_skein512.h
deleted file mode 100644
index a95d46b17..000000000
--- a/stratum/algos/ar2/sj/scrypt-jane-hash_skein512.h
+++ /dev/null
@@ -1,188 +0,0 @@
-#define SCRYPT_HASH "Skein-512"
-#define SCRYPT_HASH_BLOCK_SIZE 64
-#define SCRYPT_HASH_DIGEST_SIZE 64
-
-typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE];
-
-typedef struct scrypt_hash_state_t {
- uint64_t X[8], T[2];
- uint32_t leftover;
- uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE];
-} scrypt_hash_state;
-
-#include
-
-static void
-skein512_blocks(scrypt_hash_state *S, const uint8_t *in, size_t blocks, size_t add) {
- uint64_t X[8], key[8], Xt[9+18], T[3+1];
- size_t r;
-
- while (blocks--) {
- T[0] = S->T[0] + add;
- T[1] = S->T[1];
- T[2] = T[0] ^ T[1];
- key[0] = U8TO64_LE(in + 0); Xt[0] = S->X[0]; X[0] = key[0] + Xt[0];
- key[1] = U8TO64_LE(in + 8); Xt[1] = S->X[1]; X[1] = key[1] + Xt[1];
- key[2] = U8TO64_LE(in + 16); Xt[2] = S->X[2]; X[2] = key[2] + Xt[2];
- key[3] = U8TO64_LE(in + 24); Xt[3] = S->X[3]; X[3] = key[3] + Xt[3];
- key[4] = U8TO64_LE(in + 32); Xt[4] = S->X[4]; X[4] = key[4] + Xt[4];
- key[5] = U8TO64_LE(in + 40); Xt[5] = S->X[5]; X[5] = key[5] + Xt[5] + T[0];
- key[6] = U8TO64_LE(in + 48); Xt[6] = S->X[6]; X[6] = key[6] + Xt[6] + T[1];
- key[7] = U8TO64_LE(in + 56); Xt[7] = S->X[7]; X[7] = key[7] + Xt[7];
- Xt[8] = 0x1BD11BDAA9FC1A22ull ^ Xt[0] ^ Xt[1] ^ Xt[2] ^ Xt[3] ^ Xt[4] ^ Xt[5] ^ Xt[6] ^ Xt[7];
- in += SCRYPT_HASH_BLOCK_SIZE;
-
- for (r = 0; r < 18; r++)
- Xt[r + 9] = Xt[r + 0];
-
- for (r = 0; r < 18; r += 2) {
- X[0] += X[1]; X[1] = ROTL64(X[1], 46) ^ X[0];
- X[2] += X[3]; X[3] = ROTL64(X[3], 36) ^ X[2];
- X[4] += X[5]; X[5] = ROTL64(X[5], 19) ^ X[4];
- X[6] += X[7]; X[7] = ROTL64(X[7], 37) ^ X[6];
- X[2] += X[1]; X[1] = ROTL64(X[1], 33) ^ X[2];
- X[0] += X[3]; X[3] = ROTL64(X[3], 42) ^ X[0];
- X[6] += X[5]; X[5] = ROTL64(X[5], 14) ^ X[6];
- X[4] += X[7]; X[7] = ROTL64(X[7], 27) ^ X[4];
- X[4] += X[1]; X[1] = ROTL64(X[1], 17) ^ X[4];
- X[6] += X[3]; X[3] = ROTL64(X[3], 49) ^ X[6];
- X[0] += X[5]; X[5] = ROTL64(X[5], 36) ^ X[0];
- X[2] += X[7]; X[7] = ROTL64(X[7], 39) ^ X[2];
- X[6] += X[1]; X[1] = ROTL64(X[1], 44) ^ X[6];
- X[4] += X[3]; X[3] = ROTL64(X[3], 56) ^ X[4];
- X[2] += X[5]; X[5] = ROTL64(X[5], 54) ^ X[2];
- X[0] += X[7]; X[7] = ROTL64(X[7], 9) ^ X[0];
-
- X[0] += Xt[r + 1];
- X[1] += Xt[r + 2];
- X[2] += Xt[r + 3];
- X[3] += Xt[r + 4];
- X[4] += Xt[r + 5];
- X[5] += Xt[r + 6] + T[1];
- X[6] += Xt[r + 7] + T[2];
- X[7] += Xt[r + 8] + r + 1;
-
- T[3] = T[0];
- T[0] = T[1];
- T[1] = T[2];
- T[2] = T[3];
-
- X[0] += X[1]; X[1] = ROTL64(X[1], 39) ^ X[0];
- X[2] += X[3]; X[3] = ROTL64(X[3], 30) ^ X[2];
- X[4] += X[5]; X[5] = ROTL64(X[5], 34) ^ X[4];
- X[6] += X[7]; X[7] = ROTL64(X[7], 24) ^ X[6];
- X[2] += X[1]; X[1] = ROTL64(X[1], 13) ^ X[2];
- X[0] += X[3]; X[3] = ROTL64(X[3], 17) ^ X[0];
- X[6] += X[5]; X[5] = ROTL64(X[5], 10) ^ X[6];
- X[4] += X[7]; X[7] = ROTL64(X[7], 50) ^ X[4];
- X[4] += X[1]; X[1] = ROTL64(X[1], 25) ^ X[4];
- X[6] += X[3]; X[3] = ROTL64(X[3], 29) ^ X[6];
- X[0] += X[5]; X[5] = ROTL64(X[5], 39) ^ X[0];
- X[2] += X[7]; X[7] = ROTL64(X[7], 43) ^ X[2];
- X[6] += X[1]; X[1] = ROTL64(X[1], 8) ^ X[6];
- X[4] += X[3]; X[3] = ROTL64(X[3], 22) ^ X[4];
- X[2] += X[5]; X[5] = ROTL64(X[5], 56) ^ X[2];
- X[0] += X[7]; X[7] = ROTL64(X[7], 35) ^ X[0];
-
- X[0] += Xt[r + 2];
- X[1] += Xt[r + 3];
- X[2] += Xt[r + 4];
- X[3] += Xt[r + 5];
- X[4] += Xt[r + 6];
- X[5] += Xt[r + 7] + T[1];
- X[6] += Xt[r + 8] + T[2];
- X[7] += Xt[r + 9] + r + 2;
-
- T[3] = T[0];
- T[0] = T[1];
- T[1] = T[2];
- T[2] = T[3];
- }
-
- S->X[0] = key[0] ^ X[0];
- S->X[1] = key[1] ^ X[1];
- S->X[2] = key[2] ^ X[2];
- S->X[3] = key[3] ^ X[3];
- S->X[4] = key[4] ^ X[4];
- S->X[5] = key[5] ^ X[5];
- S->X[6] = key[6] ^ X[6];
- S->X[7] = key[7] ^ X[7];
-
- S->T[0] = T[0];
- S->T[1] = T[1] & ~0x4000000000000000ull;
- }
-}
-
-static void
-scrypt_hash_init(scrypt_hash_state *S) {
- S->X[0] = 0x4903ADFF749C51CEull;
- S->X[1] = 0x0D95DE399746DF03ull;
- S->X[2] = 0x8FD1934127C79BCEull;
- S->X[3] = 0x9A255629FF352CB1ull;
- S->X[4] = 0x5DB62599DF6CA7B0ull;
- S->X[5] = 0xEABE394CA9D5C3F4ull;
- S->X[6] = 0x991112C71A75B523ull;
- S->X[7] = 0xAE18A40B660FCC33ull;
- S->T[0] = 0x0000000000000000ull;
- S->T[1] = 0x7000000000000000ull;
- S->leftover = 0;
-}
-
-static void
-scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) {
- size_t blocks, want;
-
- /* skein processes the final <=64 bytes raw, so we can only update if there are at least 64+1 bytes available */
- if ((S->leftover + inlen) > SCRYPT_HASH_BLOCK_SIZE) {
- /* handle the previous data, we know there is enough for at least one block */
- if (S->leftover) {
- want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover);
- memcpy(S->buffer + S->leftover, in, want);
- in += want;
- inlen -= want;
- S->leftover = 0;
- skein512_blocks(S, S->buffer, 1, SCRYPT_HASH_BLOCK_SIZE);
- }
-
- /* handle the current data if there's more than one block */
- if (inlen > SCRYPT_HASH_BLOCK_SIZE) {
- blocks = ((inlen - 1) & ~(SCRYPT_HASH_BLOCK_SIZE - 1));
- skein512_blocks(S, in, blocks / SCRYPT_HASH_BLOCK_SIZE, SCRYPT_HASH_BLOCK_SIZE);
- inlen -= blocks;
- in += blocks;
- }
- }
-
- /* handle leftover data */
- memcpy(S->buffer + S->leftover, in, inlen);
- S->leftover += inlen;
-}
-
-static void
-scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) {
- memset(S->buffer + S->leftover, 0, SCRYPT_HASH_BLOCK_SIZE - S->leftover);
- S->T[1] |= 0x8000000000000000ull;
- skein512_blocks(S, S->buffer, 1, S->leftover);
-
- memset(S->buffer, 0, SCRYPT_HASH_BLOCK_SIZE);
- S->T[0] = 0;
- S->T[1] = 0xff00000000000000ull;
- skein512_blocks(S, S->buffer, 1, 8);
-
- U64TO8_LE(&hash[ 0], S->X[0]);
- U64TO8_LE(&hash[ 8], S->X[1]);
- U64TO8_LE(&hash[16], S->X[2]);
- U64TO8_LE(&hash[24], S->X[3]);
- U64TO8_LE(&hash[32], S->X[4]);
- U64TO8_LE(&hash[40], S->X[5]);
- U64TO8_LE(&hash[48], S->X[6]);
- U64TO8_LE(&hash[56], S->X[7]);
-}
-
-
-static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = {
- 0x4d,0x52,0x29,0xff,0x10,0xbc,0xd2,0x62,0xd1,0x61,0x83,0xc8,0xe6,0xf0,0x83,0xc4,
- 0x9f,0xf5,0x6a,0x42,0x75,0x2a,0x26,0x4e,0xf0,0x28,0x72,0x28,0x47,0xe8,0x23,0xdf,
- 0x1e,0x64,0xf1,0x51,0x38,0x35,0x9d,0xc2,0x83,0xfc,0x35,0x4e,0xc0,0x52,0x5f,0x41,
- 0x6a,0x0b,0x7d,0xf5,0xce,0x98,0xde,0x6f,0x36,0xd8,0x51,0x15,0x78,0x78,0x93,0x67,
-};
diff --git a/stratum/algos/ar2/sj/scrypt-jane-mix_salsa64-avx.h b/stratum/algos/ar2/sj/scrypt-jane-mix_salsa64-avx.h
deleted file mode 100644
index 663d8335c..000000000
--- a/stratum/algos/ar2/sj/scrypt-jane-mix_salsa64-avx.h
+++ /dev/null
@@ -1,367 +0,0 @@
-/* x64 */
-#if defined(X86_64ASM_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
-
-#define SCRYPT_SALSA64_AVX
-
-asm_naked_fn_proto(void, scrypt_ChunkMix_avx)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r)
-asm_naked_fn(scrypt_ChunkMix_avx)
- a1(push rbp)
- a2(mov rbp, rsp)
- a2(and rsp, ~63)
- a2(sub rsp, 128)
- a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
- a2(shl rcx,7)
- a2(lea r9,[rcx-128])
- a2(lea rax,[rsi+r9])
- a2(lea r9,[rdx+r9])
- a2(and rdx, rdx)
- a2(vmovdqa xmm0,[rax+0])
- a2(vmovdqa xmm1,[rax+16])
- a2(vmovdqa xmm2,[rax+32])
- a2(vmovdqa xmm3,[rax+48])
- a2(vmovdqa xmm4,[rax+64])
- a2(vmovdqa xmm5,[rax+80])
- a2(vmovdqa xmm6,[rax+96])
- a2(vmovdqa xmm7,[rax+112])
- aj(jz scrypt_ChunkMix_avx_no_xor1)
- a3(vpxor xmm0,xmm0,[r9+0])
- a3(vpxor xmm1,xmm1,[r9+16])
- a3(vpxor xmm2,xmm2,[r9+32])
- a3(vpxor xmm3,xmm3,[r9+48])
- a3(vpxor xmm4,xmm4,[r9+64])
- a3(vpxor xmm5,xmm5,[r9+80])
- a3(vpxor xmm6,xmm6,[r9+96])
- a3(vpxor xmm7,xmm7,[r9+112])
- a1(scrypt_ChunkMix_avx_no_xor1:)
- a2(xor r9,r9)
- a2(xor r8,r8)
- a1(scrypt_ChunkMix_avx_loop:)
- a2(and rdx, rdx)
- a3(vpxor xmm0,xmm0,[rsi+r9+0])
- a3(vpxor xmm1,xmm1,[rsi+r9+16])
- a3(vpxor xmm2,xmm2,[rsi+r9+32])
- a3(vpxor xmm3,xmm3,[rsi+r9+48])
- a3(vpxor xmm4,xmm4,[rsi+r9+64])
- a3(vpxor xmm5,xmm5,[rsi+r9+80])
- a3(vpxor xmm6,xmm6,[rsi+r9+96])
- a3(vpxor xmm7,xmm7,[rsi+r9+112])
- aj(jz scrypt_ChunkMix_avx_no_xor2)
- a3(vpxor xmm0,xmm0,[rdx+r9+0])
- a3(vpxor xmm1,xmm1,[rdx+r9+16])
- a3(vpxor xmm2,xmm2,[rdx+r9+32])
- a3(vpxor xmm3,xmm3,[rdx+r9+48])
- a3(vpxor xmm4,xmm4,[rdx+r9+64])
- a3(vpxor xmm5,xmm5,[rdx+r9+80])
- a3(vpxor xmm6,xmm6,[rdx+r9+96])
- a3(vpxor xmm7,xmm7,[rdx+r9+112])
- a1(scrypt_ChunkMix_avx_no_xor2:)
- a2(vmovdqa [rsp+0],xmm0)
- a2(vmovdqa [rsp+16],xmm1)
- a2(vmovdqa [rsp+32],xmm2)
- a2(vmovdqa [rsp+48],xmm3)
- a2(vmovdqa [rsp+64],xmm4)
- a2(vmovdqa [rsp+80],xmm5)
- a2(vmovdqa [rsp+96],xmm6)
- a2(vmovdqa [rsp+112],xmm7)
- a2(mov rax,8)
- a1(scrypt_salsa64_avx_loop: )
- a3(vpaddq xmm8, xmm0, xmm2)
- a3(vpaddq xmm9, xmm1, xmm3)
- a3(vpshufd xmm8, xmm8, 0xb1)
- a3(vpshufd xmm9, xmm9, 0xb1)
- a3(vpxor xmm6, xmm6, xmm8)
- a3(vpxor xmm7, xmm7, xmm9)
- a3(vpaddq xmm10, xmm0, xmm6)
- a3(vpaddq xmm11, xmm1, xmm7)
- a3(vpsrlq xmm8, xmm10, 51)
- a3(vpsrlq xmm9, xmm11, 51)
- a3(vpsllq xmm10, xmm10, 13)
- a3(vpsllq xmm11, xmm11, 13)
- a3(vpxor xmm4, xmm4, xmm8)
- a3(vpxor xmm5, xmm5, xmm9)
- a3(vpxor xmm4, xmm4, xmm10)
- a3(vpxor xmm5, xmm5, xmm11)
- a3(vpaddq xmm8, xmm6, xmm4)
- a3(vpaddq xmm9, xmm7, xmm5)
- a3(vpsrlq xmm10, xmm8, 25)
- a3(vpsrlq xmm11, xmm9, 25)
- a3(vpsllq xmm8, xmm8, 39)
- a3(vpsllq xmm9, xmm9, 39)
- a3(vpxor xmm2, xmm2, xmm10)
- a3(vpxor xmm3, xmm3, xmm11)
- a3(vpxor xmm2, xmm2, xmm8)
- a3(vpxor xmm3, xmm3, xmm9)
- a3(vpaddq xmm10, xmm4, xmm2)
- a3(vpaddq xmm11, xmm5, xmm3)
- a3(vpshufd xmm10, xmm10, 0xb1)
- a3(vpshufd xmm11, xmm11, 0xb1)
- a3(vpxor xmm0, xmm0, xmm10)
- a3(vpxor xmm1, xmm1, xmm11)
- a2(vmovdqa xmm8, xmm2)
- a2(vmovdqa xmm9, xmm3)
- a4(vpalignr xmm2, xmm6, xmm7, 8)
- a4(vpalignr xmm3, xmm7, xmm6, 8)
- a4(vpalignr xmm6, xmm9, xmm8, 8)
- a4(vpalignr xmm7, xmm8, xmm9, 8)
- a3(vpaddq xmm10, xmm0, xmm2)
- a3(vpaddq xmm11, xmm1, xmm3)
- a3(vpshufd xmm10, xmm10, 0xb1)
- a3(vpshufd xmm11, xmm11, 0xb1)
- a3(vpxor xmm6, xmm6, xmm10)
- a3(vpxor xmm7, xmm7, xmm11)
- a3(vpaddq xmm8, xmm0, xmm6)
- a3(vpaddq xmm9, xmm1, xmm7)
- a3(vpsrlq xmm10, xmm8, 51)
- a3(vpsrlq xmm11, xmm9, 51)
- a3(vpsllq xmm8, xmm8, 13)
- a3(vpsllq xmm9, xmm9, 13)
- a3(vpxor xmm5, xmm5, xmm10)
- a3(vpxor xmm4, xmm4, xmm11)
- a3(vpxor xmm5, xmm5, xmm8)
- a3(vpxor xmm4, xmm4, xmm9)
- a3(vpaddq xmm10, xmm6, xmm5)
- a3(vpaddq xmm11, xmm7, xmm4)
- a3(vpsrlq xmm8, xmm10, 25)
- a3(vpsrlq xmm9, xmm11, 25)
- a3(vpsllq xmm10, xmm10, 39)
- a3(vpsllq xmm11, xmm11, 39)
- a3(vpxor xmm2, xmm2, xmm8)
- a3(vpxor xmm3, xmm3, xmm9)
- a3(vpxor xmm2, xmm2, xmm10)
- a3(vpxor xmm3, xmm3, xmm11)
- a3(vpaddq xmm8, xmm5, xmm2)
- a3(vpaddq xmm9, xmm4, xmm3)
- a3(vpshufd xmm8, xmm8, 0xb1)
- a3(vpshufd xmm9, xmm9, 0xb1)
- a3(vpxor xmm0, xmm0, xmm8)
- a3(vpxor xmm1, xmm1, xmm9)
- a2(vmovdqa xmm10, xmm2)
- a2(vmovdqa xmm11, xmm3)
- a4(vpalignr xmm2, xmm6, xmm7, 8)
- a4(vpalignr xmm3, xmm7, xmm6, 8)
- a4(vpalignr xmm6, xmm11, xmm10, 8)
- a4(vpalignr xmm7, xmm10, xmm11, 8)
- a2(sub rax, 2)
- aj(ja scrypt_salsa64_avx_loop)
- a3(vpaddq xmm0,xmm0,[rsp+0])
- a3(vpaddq xmm1,xmm1,[rsp+16])
- a3(vpaddq xmm2,xmm2,[rsp+32])
- a3(vpaddq xmm3,xmm3,[rsp+48])
- a3(vpaddq xmm4,xmm4,[rsp+64])
- a3(vpaddq xmm5,xmm5,[rsp+80])
- a3(vpaddq xmm6,xmm6,[rsp+96])
- a3(vpaddq xmm7,xmm7,[rsp+112])
- a2(lea rax,[r8+r9])
- a2(xor r8,rcx)
- a2(and rax,~0xff)
- a2(add r9,128)
- a2(shr rax,1)
- a2(add rax, rdi)
- a2(cmp r9,rcx)
- a2(vmovdqa [rax+0],xmm0)
- a2(vmovdqa [rax+16],xmm1)
- a2(vmovdqa [rax+32],xmm2)
- a2(vmovdqa [rax+48],xmm3)
- a2(vmovdqa [rax+64],xmm4)
- a2(vmovdqa [rax+80],xmm5)
- a2(vmovdqa [rax+96],xmm6)
- a2(vmovdqa [rax+112],xmm7)
- aj(jne scrypt_ChunkMix_avx_loop)
- a2(mov rsp, rbp)
- a1(pop rbp)
- a1(ret)
-asm_naked_fn_end(scrypt_ChunkMix_avx)
-
-#endif
-
-
-/* intrinsic */
-#if defined(X86_INTRINSIC_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED))
-
-#define SCRYPT_SALSA64_AVX
-
-static void asm_calling_convention
-scrypt_ChunkMix_avx(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) {
- uint32_t i, blocksPerChunk = r * 2, half = 0;
- xmmi *xmmp,x0,x1,x2,x3,x4,x5,x6,x7,t0,t1,t2,t3,t4,t5,t6,t7,z0,z1,z2,z3;
- size_t rounds;
-
- /* 1: X = B_{2r - 1} */
- xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
- x0 = xmmp[0];
- x1 = xmmp[1];
- x2 = xmmp[2];
- x3 = xmmp[3];
- x4 = xmmp[4];
- x5 = xmmp[5];
- x6 = xmmp[6];
- x7 = xmmp[7];
-
- if (Bxor) {
- xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
- x0 = _mm_xor_si128(x0, xmmp[0]);
- x1 = _mm_xor_si128(x1, xmmp[1]);
- x2 = _mm_xor_si128(x2, xmmp[2]);
- x3 = _mm_xor_si128(x3, xmmp[3]);
- x4 = _mm_xor_si128(x4, xmmp[4]);
- x5 = _mm_xor_si128(x5, xmmp[5]);
- x6 = _mm_xor_si128(x6, xmmp[6]);
- x7 = _mm_xor_si128(x7, xmmp[7]);
- }
-
- /* 2: for i = 0 to 2r - 1 do */
- for (i = 0; i < blocksPerChunk; i++, half ^= r) {
- /* 3: X = H(X ^ B_i) */
- xmmp = (xmmi *)scrypt_block(Bin, i);
- x0 = _mm_xor_si128(x0, xmmp[0]);
- x1 = _mm_xor_si128(x1, xmmp[1]);
- x2 = _mm_xor_si128(x2, xmmp[2]);
- x3 = _mm_xor_si128(x3, xmmp[3]);
- x4 = _mm_xor_si128(x4, xmmp[4]);
- x5 = _mm_xor_si128(x5, xmmp[5]);
- x6 = _mm_xor_si128(x6, xmmp[6]);
- x7 = _mm_xor_si128(x7, xmmp[7]);
-
- if (Bxor) {
- xmmp = (xmmi *)scrypt_block(Bxor, i);
- x0 = _mm_xor_si128(x0, xmmp[0]);
- x1 = _mm_xor_si128(x1, xmmp[1]);
- x2 = _mm_xor_si128(x2, xmmp[2]);
- x3 = _mm_xor_si128(x3, xmmp[3]);
- x4 = _mm_xor_si128(x4, xmmp[4]);
- x5 = _mm_xor_si128(x5, xmmp[5]);
- x6 = _mm_xor_si128(x6, xmmp[6]);
- x7 = _mm_xor_si128(x7, xmmp[7]);
- }
-
- t0 = x0;
- t1 = x1;
- t2 = x2;
- t3 = x3;
- t4 = x4;
- t5 = x5;
- t6 = x6;
- t7 = x7;
-
- for (rounds = 8; rounds; rounds -= 2) {
- z0 = _mm_add_epi64(x0, x2);
- z1 = _mm_add_epi64(x1, x3);
- z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
- z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
- x6 = _mm_xor_si128(x6, z0);
- x7 = _mm_xor_si128(x7, z1);
-
- z0 = _mm_add_epi64(x6, x0);
- z1 = _mm_add_epi64(x7, x1);
- z2 = _mm_srli_epi64(z0, 64-13);
- z3 = _mm_srli_epi64(z1, 64-13);
- z0 = _mm_slli_epi64(z0, 13);
- z1 = _mm_slli_epi64(z1, 13);
- x4 = _mm_xor_si128(x4, z2);
- x5 = _mm_xor_si128(x5, z3);
- x4 = _mm_xor_si128(x4, z0);
- x5 = _mm_xor_si128(x5, z1);
-
- z0 = _mm_add_epi64(x4, x6);
- z1 = _mm_add_epi64(x5, x7);
- z2 = _mm_srli_epi64(z0, 64-39);
- z3 = _mm_srli_epi64(z1, 64-39);
- z0 = _mm_slli_epi64(z0, 39);
- z1 = _mm_slli_epi64(z1, 39);
- x2 = _mm_xor_si128(x2, z2);
- x3 = _mm_xor_si128(x3, z3);
- x2 = _mm_xor_si128(x2, z0);
- x3 = _mm_xor_si128(x3, z1);
-
- z0 = _mm_add_epi64(x2, x4);
- z1 = _mm_add_epi64(x3, x5);
- z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
- z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
- x0 = _mm_xor_si128(x0, z0);
- x1 = _mm_xor_si128(x1, z1);
-
- z0 = x2;
- z1 = x3;
- x2 = _mm_alignr_epi8(x6, x7, 8);
- x3 = _mm_alignr_epi8(x7, x6, 8);
- x6 = _mm_alignr_epi8(z1, z0, 8);
- x7 = _mm_alignr_epi8(z0, z1, 8);
-
- z0 = _mm_add_epi64(x0, x2);
- z1 = _mm_add_epi64(x1, x3);
- z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
- z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
- x6 = _mm_xor_si128(x6, z0);
- x7 = _mm_xor_si128(x7, z1);
-
- z0 = _mm_add_epi64(x6, x0);
- z1 = _mm_add_epi64(x7, x1);
- z2 = _mm_srli_epi64(z0, 64-13);
- z3 = _mm_srli_epi64(z1, 64-13);
- z0 = _mm_slli_epi64(z0, 13);
- z1 = _mm_slli_epi64(z1, 13);
- x5 = _mm_xor_si128(x5, z2);
- x4 = _mm_xor_si128(x4, z3);
- x5 = _mm_xor_si128(x5, z0);
- x4 = _mm_xor_si128(x4, z1);
-
- z0 = _mm_add_epi64(x5, x6);
- z1 = _mm_add_epi64(x4, x7);
- z2 = _mm_srli_epi64(z0, 64-39);
- z3 = _mm_srli_epi64(z1, 64-39);
- z0 = _mm_slli_epi64(z0, 39);
- z1 = _mm_slli_epi64(z1, 39);
- x2 = _mm_xor_si128(x2, z2);
- x3 = _mm_xor_si128(x3, z3);
- x2 = _mm_xor_si128(x2, z0);
- x3 = _mm_xor_si128(x3, z1);
-
- z0 = _mm_add_epi64(x2, x5);
- z1 = _mm_add_epi64(x3, x4);
- z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
- z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
- x0 = _mm_xor_si128(x0, z0);
- x1 = _mm_xor_si128(x1, z1);
-
- z0 = x2;
- z1 = x3;
- x2 = _mm_alignr_epi8(x6, x7, 8);
- x3 = _mm_alignr_epi8(x7, x6, 8);
- x6 = _mm_alignr_epi8(z1, z0, 8);
- x7 = _mm_alignr_epi8(z0, z1, 8);
- }
-
- x0 = _mm_add_epi64(x0, t0);
- x1 = _mm_add_epi64(x1, t1);
- x2 = _mm_add_epi64(x2, t2);
- x3 = _mm_add_epi64(x3, t3);
- x4 = _mm_add_epi64(x4, t4);
- x5 = _mm_add_epi64(x5, t5);
- x6 = _mm_add_epi64(x6, t6);
- x7 = _mm_add_epi64(x7, t7);
-
- /* 4: Y_i = X */
- /* 6: B'[0..r-1] = Y_even */
- /* 6: B'[r..2r-1] = Y_odd */
- xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
- xmmp[0] = x0;
- xmmp[1] = x1;
- xmmp[2] = x2;
- xmmp[3] = x3;
- xmmp[4] = x4;
- xmmp[5] = x5;
- xmmp[6] = x6;
- xmmp[7] = x7;
- }
-}
-
-#endif
-
-#if defined(SCRYPT_SALSA64_AVX)
- /* uses salsa64_core_tangle_sse2 */
-
- #undef SCRYPT_MIX
- #define SCRYPT_MIX "Salsa64/8-AVX"
- #undef SCRYPT_SALSA64_INCLUDED
- #define SCRYPT_SALSA64_INCLUDED
-#endif
diff --git a/stratum/algos/ar2/sj/scrypt-jane-mix_salsa64-avx2.h b/stratum/algos/ar2/sj/scrypt-jane-mix_salsa64-avx2.h
deleted file mode 100644
index 81813026f..000000000
--- a/stratum/algos/ar2/sj/scrypt-jane-mix_salsa64-avx2.h
+++ /dev/null
@@ -1,221 +0,0 @@
-/* x64 */
-#if defined(X86_64ASM_AVX2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
-
-#define SCRYPT_SALSA64_AVX2
-
-asm_naked_fn_proto(void, scrypt_ChunkMix_avx2)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r)
-asm_naked_fn(scrypt_ChunkMix_avx2)
- a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
- a2(shl rcx,7)
- a2(lea r9,[rcx-128])
- a2(lea rax,[rsi+r9])
- a2(lea r9,[rdx+r9])
- a2(and rdx, rdx)
- a2(vmovdqa ymm0,[rax+0])
- a2(vmovdqa ymm1,[rax+32])
- a2(vmovdqa ymm2,[rax+64])
- a2(vmovdqa ymm3,[rax+96])
- aj(jz scrypt_ChunkMix_avx2_no_xor1)
- a3(vpxor ymm0,ymm0,[r9+0])
- a3(vpxor ymm1,ymm1,[r9+32])
- a3(vpxor ymm2,ymm2,[r9+64])
- a3(vpxor ymm3,ymm3,[r9+96])
- a1(scrypt_ChunkMix_avx2_no_xor1:)
- a2(xor r9,r9)
- a2(xor r8,r8)
- a1(scrypt_ChunkMix_avx2_loop:)
- a2(and rdx, rdx)
- a3(vpxor ymm0,ymm0,[rsi+r9+0])
- a3(vpxor ymm1,ymm1,[rsi+r9+32])
- a3(vpxor ymm2,ymm2,[rsi+r9+64])
- a3(vpxor ymm3,ymm3,[rsi+r9+96])
- aj(jz scrypt_ChunkMix_avx2_no_xor2)
- a3(vpxor ymm0,ymm0,[rdx+r9+0])
- a3(vpxor ymm1,ymm1,[rdx+r9+32])
- a3(vpxor ymm2,ymm2,[rdx+r9+64])
- a3(vpxor ymm3,ymm3,[rdx+r9+96])
- a1(scrypt_ChunkMix_avx2_no_xor2:)
- a2(vmovdqa ymm6,ymm0)
- a2(vmovdqa ymm7,ymm1)
- a2(vmovdqa ymm8,ymm2)
- a2(vmovdqa ymm9,ymm3)
- a2(mov rax,4)
- a1(scrypt_salsa64_avx2_loop: )
- a3(vpaddq ymm4, ymm1, ymm0)
- a3(vpshufd ymm4, ymm4, 0xb1)
- a3(vpxor ymm3, ymm3, ymm4)
- a3(vpaddq ymm4, ymm0, ymm3)
- a3(vpsrlq ymm5, ymm4, 51)
- a3(vpxor ymm2, ymm2, ymm5)
- a3(vpsllq ymm4, ymm4, 13)
- a3(vpxor ymm2, ymm2, ymm4)
- a3(vpaddq ymm4, ymm3, ymm2)
- a3(vpsrlq ymm5, ymm4, 25)
- a3(vpxor ymm1, ymm1, ymm5)
- a3(vpsllq ymm4, ymm4, 39)
- a3(vpxor ymm1, ymm1, ymm4)
- a3(vpaddq ymm4, ymm2, ymm1)
- a3(vpshufd ymm4, ymm4, 0xb1)
- a3(vpermq ymm1, ymm1, 0x39)
- a3(vpermq ymm10, ymm2, 0x4e)
- a3(vpxor ymm0, ymm0, ymm4)
- a3(vpermq ymm3, ymm3, 0x93)
- a3(vpaddq ymm4, ymm3, ymm0)
- a3(vpshufd ymm4, ymm4, 0xb1)
- a3(vpxor ymm1, ymm1, ymm4)
- a3(vpaddq ymm4, ymm0, ymm1)
- a3(vpsrlq ymm5, ymm4, 51)
- a3(vpxor ymm10, ymm10, ymm5)
- a3(vpsllq ymm4, ymm4, 13)
- a3(vpxor ymm10, ymm10, ymm4)
- a3(vpaddq ymm4, ymm1, ymm10)
- a3(vpsrlq ymm5, ymm4, 25)
- a3(vpxor ymm3, ymm3, ymm5)
- a3(vpsllq ymm4, ymm4, 39)
- a3(vpermq ymm1, ymm1, 0x93)
- a3(vpxor ymm3, ymm3, ymm4)
- a3(vpermq ymm2, ymm10, 0x4e)
- a3(vpaddq ymm4, ymm10, ymm3)
- a3(vpshufd ymm4, ymm4, 0xb1)
- a3(vpermq ymm3, ymm3, 0x39)
- a3(vpxor ymm0, ymm0, ymm4)
- a1(dec rax)
- aj(jnz scrypt_salsa64_avx2_loop)
- a3(vpaddq ymm0,ymm0,ymm6)
- a3(vpaddq ymm1,ymm1,ymm7)
- a3(vpaddq ymm2,ymm2,ymm8)
- a3(vpaddq ymm3,ymm3,ymm9)
- a2(lea rax,[r8+r9])
- a2(xor r8,rcx)
- a2(and rax,~0xff)
- a2(add r9,128)
- a2(shr rax,1)
- a2(add rax, rdi)
- a2(cmp r9,rcx)
- a2(vmovdqa [rax+0],ymm0)
- a2(vmovdqa [rax+32],ymm1)
- a2(vmovdqa [rax+64],ymm2)
- a2(vmovdqa [rax+96],ymm3)
- aj(jne scrypt_ChunkMix_avx2_loop)
- a1(vzeroupper)
- a1(ret)
-asm_naked_fn_end(scrypt_ChunkMix_avx2)
-
-#endif
-
-
-/* intrinsic */
-#if defined(X86_INTRINSIC_AVX2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED))
-
-#define SCRYPT_SALSA64_AVX2
-
-static void asm_calling_convention
-scrypt_ChunkMix_avx2(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) {
- uint32_t i, blocksPerChunk = r * 2, half = 0;
- ymmi *ymmp,y0,y1,y2,y3,t0,t1,t2,t3,z0,z1;
- size_t rounds;
-
- /* 1: X = B_{2r - 1} */
- ymmp = (ymmi *)scrypt_block(Bin, blocksPerChunk - 1);
- y0 = ymmp[0];
- y1 = ymmp[1];
- y2 = ymmp[2];
- y3 = ymmp[3];
-
- if (Bxor) {
- ymmp = (ymmi *)scrypt_block(Bxor, blocksPerChunk - 1);
- y0 = _mm256_xor_si256(y0, ymmp[0]);
- y1 = _mm256_xor_si256(y1, ymmp[1]);
- y2 = _mm256_xor_si256(y2, ymmp[2]);
- y3 = _mm256_xor_si256(y3, ymmp[3]);
- }
-
- /* 2: for i = 0 to 2r - 1 do */
- for (i = 0; i < blocksPerChunk; i++, half ^= r) {
- /* 3: X = H(X ^ B_i) */
- ymmp = (ymmi *)scrypt_block(Bin, i);
- y0 = _mm256_xor_si256(y0, ymmp[0]);
- y1 = _mm256_xor_si256(y1, ymmp[1]);
- y2 = _mm256_xor_si256(y2, ymmp[2]);
- y3 = _mm256_xor_si256(y3, ymmp[3]);
-
- if (Bxor) {
- ymmp = (ymmi *)scrypt_block(Bxor, i);
- y0 = _mm256_xor_si256(y0, ymmp[0]);
- y1 = _mm256_xor_si256(y1, ymmp[1]);
- y2 = _mm256_xor_si256(y2, ymmp[2]);
- y3 = _mm256_xor_si256(y3, ymmp[3]);
- }
-
- t0 = y0;
- t1 = y1;
- t2 = y2;
- t3 = y3;
-
- for (rounds = 8; rounds; rounds -= 2) {
- z0 = _mm256_add_epi64(y0, y1);
- z0 = _mm256_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
- y3 = _mm256_xor_si256(y3, z0);
- z0 = _mm256_add_epi64(y3, y0);
- z1 = _mm256_srli_epi64(z0, 64-13);
- y2 = _mm256_xor_si256(y2, z1);
- z0 = _mm256_slli_epi64(z0, 13);
- y2 = _mm256_xor_si256(y2, z0);
- z0 = _mm256_add_epi64(y2, y3);
- z1 = _mm256_srli_epi64(z0, 64-39);
- y1 = _mm256_xor_si256(y1, z1);
- z0 = _mm256_slli_epi64(z0, 39);
- y1 = _mm256_xor_si256(y1, z0);
- y1 = _mm256_permute4x64_epi64(y1, _MM_SHUFFLE(0,3,2,1));
- y2 = _mm256_permute4x64_epi64(y2, _MM_SHUFFLE(1,0,3,2));
- y3 = _mm256_permute4x64_epi64(y3, _MM_SHUFFLE(2,1,0,3));
- z0 = _mm256_add_epi64(y1, y2);
- z0 = _mm256_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
- y0 = _mm256_xor_si256(y0, z0);
- z0 = _mm256_add_epi64(y0, y3);
- z0 = _mm256_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
- y1 = _mm256_xor_si256(y1, z0);
- z0 = _mm256_add_epi64(y1, y0);
- z1 = _mm256_srli_epi64(z0, 64-13);
- y2 = _mm256_xor_si256(y2, z1);
- z0 = _mm256_slli_epi64(z0, 13);
- y2 = _mm256_xor_si256(y2, z0);
- z0 = _mm256_add_epi64(y2, y1);
- z1 = _mm256_srli_epi64(z0, 64-39);
- y3 = _mm256_xor_si256(y3, z1);
- z0 = _mm256_slli_epi64(z0, 39);
- y3 = _mm256_xor_si256(y3, z0);
- z0 = _mm256_add_epi64(y3, y2);
- z0 = _mm256_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
- y0 = _mm256_xor_si256(y0, z0);
- y1 = _mm256_permute4x64_epi64(y1, _MM_SHUFFLE(2,1,0,3));
- y2 = _mm256_permute4x64_epi64(y2, _MM_SHUFFLE(1,0,3,2));
- y3 = _mm256_permute4x64_epi64(y3, _MM_SHUFFLE(0,3,2,1));
- }
-
- y0 = _mm256_add_epi64(y0, t0);
- y1 = _mm256_add_epi64(y1, t1);
- y2 = _mm256_add_epi64(y2, t2);
- y3 = _mm256_add_epi64(y3, t3);
-
- /* 4: Y_i = X */
- /* 6: B'[0..r-1] = Y_even */
- /* 6: B'[r..2r-1] = Y_odd */
- ymmp = (ymmi *)scrypt_block(Bout, (i / 2) + half);
- ymmp[0] = y0;
- ymmp[1] = y1;
- ymmp[2] = y2;
- ymmp[3] = y3;
- }
-}
-
-#endif
-
-#if defined(SCRYPT_SALSA64_AVX2)
- /* uses salsa64_core_tangle_sse2 */
-
- #undef SCRYPT_MIX
- #define SCRYPT_MIX "Salsa64/8-AVX2"
- #undef SCRYPT_SALSA64_INCLUDED
- #define SCRYPT_SALSA64_INCLUDED
-#endif
diff --git a/stratum/algos/ar2/sj/scrypt-jane-mix_salsa64-sse2.h b/stratum/algos/ar2/sj/scrypt-jane-mix_salsa64-sse2.h
deleted file mode 100644
index 971d98a35..000000000
--- a/stratum/algos/ar2/sj/scrypt-jane-mix_salsa64-sse2.h
+++ /dev/null
@@ -1,449 +0,0 @@
-/* x64 */
-#if defined(X86_64ASM_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
-
-#define SCRYPT_SALSA64_SSE2
-
-asm_naked_fn_proto(void, scrypt_ChunkMix_sse2)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r)
-asm_naked_fn(scrypt_ChunkMix_sse2)
- a1(push rbp)
- a2(mov rbp, rsp)
- a2(and rsp, ~63)
- a2(sub rsp, 128)
- a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
- a2(shl rcx,7)
- a2(lea r9,[rcx-128])
- a2(lea rax,[rsi+r9])
- a2(lea r9,[rdx+r9])
- a2(and rdx, rdx)
- a2(movdqa xmm0,[rax+0])
- a2(movdqa xmm1,[rax+16])
- a2(movdqa xmm2,[rax+32])
- a2(movdqa xmm3,[rax+48])
- a2(movdqa xmm4,[rax+64])
- a2(movdqa xmm5,[rax+80])
- a2(movdqa xmm6,[rax+96])
- a2(movdqa xmm7,[rax+112])
- aj(jz scrypt_ChunkMix_sse2_no_xor1)
- a2(pxor xmm0,[r9+0])
- a2(pxor xmm1,[r9+16])
- a2(pxor xmm2,[r9+32])
- a2(pxor xmm3,[r9+48])
- a2(pxor xmm4,[r9+64])
- a2(pxor xmm5,[r9+80])
- a2(pxor xmm6,[r9+96])
- a2(pxor xmm7,[r9+112])
- a1(scrypt_ChunkMix_sse2_no_xor1:)
- a2(xor r9,r9)
- a2(xor r8,r8)
- a1(scrypt_ChunkMix_sse2_loop:)
- a2(and rdx, rdx)
- a2(pxor xmm0,[rsi+r9+0])
- a2(pxor xmm1,[rsi+r9+16])
- a2(pxor xmm2,[rsi+r9+32])
- a2(pxor xmm3,[rsi+r9+48])
- a2(pxor xmm4,[rsi+r9+64])
- a2(pxor xmm5,[rsi+r9+80])
- a2(pxor xmm6,[rsi+r9+96])
- a2(pxor xmm7,[rsi+r9+112])
- aj(jz scrypt_ChunkMix_sse2_no_xor2)
- a2(pxor xmm0,[rdx+r9+0])
- a2(pxor xmm1,[rdx+r9+16])
- a2(pxor xmm2,[rdx+r9+32])
- a2(pxor xmm3,[rdx+r9+48])
- a2(pxor xmm4,[rdx+r9+64])
- a2(pxor xmm5,[rdx+r9+80])
- a2(pxor xmm6,[rdx+r9+96])
- a2(pxor xmm7,[rdx+r9+112])
- a1(scrypt_ChunkMix_sse2_no_xor2:)
- a2(movdqa [rsp+0],xmm0)
- a2(movdqa [rsp+16],xmm1)
- a2(movdqa [rsp+32],xmm2)
- a2(movdqa [rsp+48],xmm3)
- a2(movdqa [rsp+64],xmm4)
- a2(movdqa [rsp+80],xmm5)
- a2(movdqa [rsp+96],xmm6)
- a2(movdqa [rsp+112],xmm7)
- a2(mov rax,8)
- a1(scrypt_salsa64_sse2_loop: )
- a2(movdqa xmm8, xmm0)
- a2(movdqa xmm9, xmm1)
- a2(paddq xmm8, xmm2)
- a2(paddq xmm9, xmm3)
- a3(pshufd xmm8, xmm8, 0xb1)
- a3(pshufd xmm9, xmm9, 0xb1)
- a2(pxor xmm6, xmm8)
- a2(pxor xmm7, xmm9)
- a2(movdqa xmm10, xmm0)
- a2(movdqa xmm11, xmm1)
- a2(paddq xmm10, xmm6)
- a2(paddq xmm11, xmm7)
- a2(movdqa xmm8, xmm10)
- a2(movdqa xmm9, xmm11)
- a2(psrlq xmm10, 51)
- a2(psrlq xmm11, 51)
- a2(psllq xmm8, 13)
- a2(psllq xmm9, 13)
- a2(pxor xmm4, xmm10)
- a2(pxor xmm5, xmm11)
- a2(pxor xmm4, xmm8)
- a2(pxor xmm5, xmm9)
- a2(movdqa xmm10, xmm6)
- a2(movdqa xmm11, xmm7)
- a2(paddq xmm10, xmm4)
- a2(paddq xmm11, xmm5)
- a2(movdqa xmm8, xmm10)
- a2(movdqa xmm9, xmm11)
- a2(psrlq xmm10, 25)
- a2(psrlq xmm11, 25)
- a2(psllq xmm8, 39)
- a2(psllq xmm9, 39)
- a2(pxor xmm2, xmm10)
- a2(pxor xmm3, xmm11)
- a2(pxor xmm2, xmm8)
- a2(pxor xmm3, xmm9)
- a2(movdqa xmm8, xmm4)
- a2(movdqa xmm9, xmm5)
- a2(paddq xmm8, xmm2)
- a2(paddq xmm9, xmm3)
- a3(pshufd xmm8, xmm8, 0xb1)
- a3(pshufd xmm9, xmm9, 0xb1)
- a2(pxor xmm0, xmm8)
- a2(pxor xmm1, xmm9)
- a2(movdqa xmm8, xmm2)
- a2(movdqa xmm9, xmm3)
- a2(movdqa xmm10, xmm6)
- a2(movdqa xmm11, xmm7)
- a2(movdqa xmm2, xmm7)
- a2(movdqa xmm3, xmm6)
- a2(punpcklqdq xmm10, xmm6)
- a2(punpcklqdq xmm11, xmm7)
- a2(movdqa xmm6, xmm8)
- a2(movdqa xmm7, xmm9)
- a2(punpcklqdq xmm9, xmm9)
- a2(punpcklqdq xmm8, xmm8)
- a2(punpckhqdq xmm2, xmm10)
- a2(punpckhqdq xmm3, xmm11)
- a2(punpckhqdq xmm6, xmm9)
- a2(punpckhqdq xmm7, xmm8)
- a2(sub rax, 2)
- a2(movdqa xmm8, xmm0)
- a2(movdqa xmm9, xmm1)
- a2(paddq xmm8, xmm2)
- a2(paddq xmm9, xmm3)
- a3(pshufd xmm8, xmm8, 0xb1)
- a3(pshufd xmm9, xmm9, 0xb1)
- a2(pxor xmm6, xmm8)
- a2(pxor xmm7, xmm9)
- a2(movdqa xmm10, xmm0)
- a2(movdqa xmm11, xmm1)
- a2(paddq xmm10, xmm6)
- a2(paddq xmm11, xmm7)
- a2(movdqa xmm8, xmm10)
- a2(movdqa xmm9, xmm11)
- a2(psrlq xmm10, 51)
- a2(psrlq xmm11, 51)
- a2(psllq xmm8, 13)
- a2(psllq xmm9, 13)
- a2(pxor xmm5, xmm10)
- a2(pxor xmm4, xmm11)
- a2(pxor xmm5, xmm8)
- a2(pxor xmm4, xmm9)
- a2(movdqa xmm10, xmm6)
- a2(movdqa xmm11, xmm7)
- a2(paddq xmm10, xmm5)
- a2(paddq xmm11, xmm4)
- a2(movdqa xmm8, xmm10)
- a2(movdqa xmm9, xmm11)
- a2(psrlq xmm10, 25)
- a2(psrlq xmm11, 25)
- a2(psllq xmm8, 39)
- a2(psllq xmm9, 39)
- a2(pxor xmm2, xmm10)
- a2(pxor xmm3, xmm11)
- a2(pxor xmm2, xmm8)
- a2(pxor xmm3, xmm9)
- a2(movdqa xmm8, xmm5)
- a2(movdqa xmm9, xmm4)
- a2(paddq xmm8, xmm2)
- a2(paddq xmm9, xmm3)
- a3(pshufd xmm8, xmm8, 0xb1)
- a3(pshufd xmm9, xmm9, 0xb1)
- a2(pxor xmm0, xmm8)
- a2(pxor xmm1, xmm9)
- a2(movdqa xmm8, xmm2)
- a2(movdqa xmm9, xmm3)
- a2(movdqa xmm10, xmm6)
- a2(movdqa xmm11, xmm7)
- a2(movdqa xmm2, xmm7)
- a2(movdqa xmm3, xmm6)
- a2(punpcklqdq xmm10, xmm6)
- a2(punpcklqdq xmm11, xmm7)
- a2(movdqa xmm6, xmm8)
- a2(movdqa xmm7, xmm9)
- a2(punpcklqdq xmm9, xmm9)
- a2(punpcklqdq xmm8, xmm8)
- a2(punpckhqdq xmm2, xmm10)
- a2(punpckhqdq xmm3, xmm11)
- a2(punpckhqdq xmm6, xmm9)
- a2(punpckhqdq xmm7, xmm8)
- aj(ja scrypt_salsa64_sse2_loop)
- a2(paddq xmm0,[rsp+0])
- a2(paddq xmm1,[rsp+16])
- a2(paddq xmm2,[rsp+32])
- a2(paddq xmm3,[rsp+48])
- a2(paddq xmm4,[rsp+64])
- a2(paddq xmm5,[rsp+80])
- a2(paddq xmm6,[rsp+96])
- a2(paddq xmm7,[rsp+112])
- a2(lea rax,[r8+r9])
- a2(xor r8,rcx)
- a2(and rax,~0xff)
- a2(add r9,128)
- a2(shr rax,1)
- a2(add rax, rdi)
- a2(cmp r9,rcx)
- a2(movdqa [rax+0],xmm0)
- a2(movdqa [rax+16],xmm1)
- a2(movdqa [rax+32],xmm2)
- a2(movdqa [rax+48],xmm3)
- a2(movdqa [rax+64],xmm4)
- a2(movdqa [rax+80],xmm5)
- a2(movdqa [rax+96],xmm6)
- a2(movdqa [rax+112],xmm7)
- aj(jne scrypt_ChunkMix_sse2_loop)
- a2(mov rsp, rbp)
- a1(pop rbp)
- a1(ret)
-asm_naked_fn_end(scrypt_ChunkMix_sse2)
-
-#endif
-
-
-/* intrinsic */
-#if defined(X86_INTRINSIC_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED))
-
-#define SCRYPT_SALSA64_SSE2
-
-static void asm_calling_convention
-scrypt_ChunkMix_sse2(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) {
- uint32_t i, blocksPerChunk = r * 2, half = 0;
- xmmi *xmmp,x0,x1,x2,x3,x4,x5,x6,x7,t0,t1,t2,t3,t4,t5,t6,t7,z0,z1,z2,z3;
- size_t rounds;
-
- /* 1: X = B_{2r - 1} */
- xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
- x0 = xmmp[0];
- x1 = xmmp[1];
- x2 = xmmp[2];
- x3 = xmmp[3];
- x4 = xmmp[4];
- x5 = xmmp[5];
- x6 = xmmp[6];
- x7 = xmmp[7];
-
- if (Bxor) {
- xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
- x0 = _mm_xor_si128(x0, xmmp[0]);
- x1 = _mm_xor_si128(x1, xmmp[1]);
- x2 = _mm_xor_si128(x2, xmmp[2]);
- x3 = _mm_xor_si128(x3, xmmp[3]);
- x4 = _mm_xor_si128(x4, xmmp[4]);
- x5 = _mm_xor_si128(x5, xmmp[5]);
- x6 = _mm_xor_si128(x6, xmmp[6]);
- x7 = _mm_xor_si128(x7, xmmp[7]);
- }
-
- /* 2: for i = 0 to 2r - 1 do */
- for (i = 0; i < blocksPerChunk; i++, half ^= r) {
- /* 3: X = H(X ^ B_i) */
- xmmp = (xmmi *)scrypt_block(Bin, i);
- x0 = _mm_xor_si128(x0, xmmp[0]);
- x1 = _mm_xor_si128(x1, xmmp[1]);
- x2 = _mm_xor_si128(x2, xmmp[2]);
- x3 = _mm_xor_si128(x3, xmmp[3]);
- x4 = _mm_xor_si128(x4, xmmp[4]);
- x5 = _mm_xor_si128(x5, xmmp[5]);
- x6 = _mm_xor_si128(x6, xmmp[6]);
- x7 = _mm_xor_si128(x7, xmmp[7]);
-
- if (Bxor) {
- xmmp = (xmmi *)scrypt_block(Bxor, i);
- x0 = _mm_xor_si128(x0, xmmp[0]);
- x1 = _mm_xor_si128(x1, xmmp[1]);
- x2 = _mm_xor_si128(x2, xmmp[2]);
- x3 = _mm_xor_si128(x3, xmmp[3]);
- x4 = _mm_xor_si128(x4, xmmp[4]);
- x5 = _mm_xor_si128(x5, xmmp[5]);
- x6 = _mm_xor_si128(x6, xmmp[6]);
- x7 = _mm_xor_si128(x7, xmmp[7]);
- }
-
- t0 = x0;
- t1 = x1;
- t2 = x2;
- t3 = x3;
- t4 = x4;
- t5 = x5;
- t6 = x6;
- t7 = x7;
-
- for (rounds = 8; rounds; rounds -= 2) {
- z0 = _mm_add_epi64(x0, x2);
- z1 = _mm_add_epi64(x1, x3);
- z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
- z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
- x6 = _mm_xor_si128(x6, z0);
- x7 = _mm_xor_si128(x7, z1);
-
- z0 = _mm_add_epi64(x6, x0);
- z1 = _mm_add_epi64(x7, x1);
- z2 = _mm_srli_epi64(z0, 64-13);
- z3 = _mm_srli_epi64(z1, 64-13);
- z0 = _mm_slli_epi64(z0, 13);
- z1 = _mm_slli_epi64(z1, 13);
- x4 = _mm_xor_si128(x4, z2);
- x5 = _mm_xor_si128(x5, z3);
- x4 = _mm_xor_si128(x4, z0);
- x5 = _mm_xor_si128(x5, z1);
-
- z0 = _mm_add_epi64(x4, x6);
- z1 = _mm_add_epi64(x5, x7);
- z2 = _mm_srli_epi64(z0, 64-39);
- z3 = _mm_srli_epi64(z1, 64-39);
- z0 = _mm_slli_epi64(z0, 39);
- z1 = _mm_slli_epi64(z1, 39);
- x2 = _mm_xor_si128(x2, z2);
- x3 = _mm_xor_si128(x3, z3);
- x2 = _mm_xor_si128(x2, z0);
- x3 = _mm_xor_si128(x3, z1);
-
- z0 = _mm_add_epi64(x2, x4);
- z1 = _mm_add_epi64(x3, x5);
- z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
- z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
- x0 = _mm_xor_si128(x0, z0);
- x1 = _mm_xor_si128(x1, z1);
-
- z0 = x4;
- z1 = x5;
- z2 = x2;
- z3 = x3;
- x4 = z1;
- x5 = z0;
- x2 = _mm_unpackhi_epi64(x7, _mm_unpacklo_epi64(x6, x6));
- x3 = _mm_unpackhi_epi64(x6, _mm_unpacklo_epi64(x7, x7));
- x6 = _mm_unpackhi_epi64(z2, _mm_unpacklo_epi64(z3, z3));
- x7 = _mm_unpackhi_epi64(z3, _mm_unpacklo_epi64(z2, z2));
-
- z0 = _mm_add_epi64(x0, x2);
- z1 = _mm_add_epi64(x1, x3);
- z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
- z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
- x6 = _mm_xor_si128(x6, z0);
- x7 = _mm_xor_si128(x7, z1);
-
- z0 = _mm_add_epi64(x6, x0);
- z1 = _mm_add_epi64(x7, x1);
- z2 = _mm_srli_epi64(z0, 64-13);
- z3 = _mm_srli_epi64(z1, 64-13);
- z0 = _mm_slli_epi64(z0, 13);
- z1 = _mm_slli_epi64(z1, 13);
- x4 = _mm_xor_si128(x4, z2);
- x5 = _mm_xor_si128(x5, z3);
- x4 = _mm_xor_si128(x4, z0);
- x5 = _mm_xor_si128(x5, z1);
-
- z0 = _mm_add_epi64(x4, x6);
- z1 = _mm_add_epi64(x5, x7);
- z2 = _mm_srli_epi64(z0, 64-39);
- z3 = _mm_srli_epi64(z1, 64-39);
- z0 = _mm_slli_epi64(z0, 39);
- z1 = _mm_slli_epi64(z1, 39);
- x2 = _mm_xor_si128(x2, z2);
- x3 = _mm_xor_si128(x3, z3);
- x2 = _mm_xor_si128(x2, z0);
- x3 = _mm_xor_si128(x3, z1);
-
- z0 = _mm_add_epi64(x2, x4);
- z1 = _mm_add_epi64(x3, x5);
- z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
- z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
- x0 = _mm_xor_si128(x0, z0);
- x1 = _mm_xor_si128(x1, z1);
-
- z0 = x4;
- z1 = x5;
- z2 = x2;
- z3 = x3;
- x4 = z1;
- x5 = z0;
- x2 = _mm_unpackhi_epi64(x7, _mm_unpacklo_epi64(x6, x6));
- x3 = _mm_unpackhi_epi64(x6, _mm_unpacklo_epi64(x7, x7));
- x6 = _mm_unpackhi_epi64(z2, _mm_unpacklo_epi64(z3, z3));
- x7 = _mm_unpackhi_epi64(z3, _mm_unpacklo_epi64(z2, z2));
- }
-
- x0 = _mm_add_epi64(x0, t0);
- x1 = _mm_add_epi64(x1, t1);
- x2 = _mm_add_epi64(x2, t2);
- x3 = _mm_add_epi64(x3, t3);
- x4 = _mm_add_epi64(x4, t4);
- x5 = _mm_add_epi64(x5, t5);
- x6 = _mm_add_epi64(x6, t6);
- x7 = _mm_add_epi64(x7, t7);
-
- /* 4: Y_i = X */
- /* 6: B'[0..r-1] = Y_even */
- /* 6: B'[r..2r-1] = Y_odd */
- xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
- xmmp[0] = x0;
- xmmp[1] = x1;
- xmmp[2] = x2;
- xmmp[3] = x3;
- xmmp[4] = x4;
- xmmp[5] = x5;
- xmmp[6] = x6;
- xmmp[7] = x7;
- }
-}
-
-#endif
-
-#if defined(SCRYPT_SALSA64_SSE2)
- #undef SCRYPT_MIX
- #define SCRYPT_MIX "Salsa64/8-SSE2"
- #undef SCRYPT_SALSA64_INCLUDED
- #define SCRYPT_SALSA64_INCLUDED
-#endif
-
-/* sse3/avx use this as well */
-#if defined(SCRYPT_SALSA64_INCLUDED)
- /*
- Default layout:
- 0 1 2 3
- 4 5 6 7
- 8 9 10 11
- 12 13 14 15
-
- SSE2 layout:
- 0 5 10 15
- 12 1 6 11
- 8 13 2 7
- 4 9 14 3
- */
-
-
- static void asm_calling_convention
- salsa64_core_tangle_sse2(uint64_t *blocks, size_t count) {
- uint64_t t;
- while (count--) {
- t = blocks[1]; blocks[1] = blocks[5]; blocks[5] = t;
- t = blocks[2]; blocks[2] = blocks[10]; blocks[10] = t;
- t = blocks[3]; blocks[3] = blocks[15]; blocks[15] = t;
- t = blocks[4]; blocks[4] = blocks[12]; blocks[12] = t;
- t = blocks[7]; blocks[7] = blocks[11]; blocks[11] = t;
- t = blocks[9]; blocks[9] = blocks[13]; blocks[13] = t;
- blocks += 16;
- }
- }
-#endif
\ No newline at end of file
diff --git a/stratum/algos/ar2/sj/scrypt-jane-mix_salsa64-ssse3.h b/stratum/algos/ar2/sj/scrypt-jane-mix_salsa64-ssse3.h
deleted file mode 100644
index 21e94c99a..000000000
--- a/stratum/algos/ar2/sj/scrypt-jane-mix_salsa64-ssse3.h
+++ /dev/null
@@ -1,399 +0,0 @@
-/* x64 */
-#if defined(X86_64ASM_SSSE3) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
-
-#define SCRYPT_SALSA64_SSSE3
-
-asm_naked_fn_proto(void, scrypt_ChunkMix_ssse3)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r)
-asm_naked_fn(scrypt_ChunkMix_ssse3)
- a1(push rbp)
- a2(mov rbp, rsp)
- a2(and rsp, ~63)
- a2(sub rsp, 128)
- a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
- a2(shl rcx,7)
- a2(lea r9,[rcx-128])
- a2(lea rax,[rsi+r9])
- a2(lea r9,[rdx+r9])
- a2(and rdx, rdx)
- a2(movdqa xmm0,[rax+0])
- a2(movdqa xmm1,[rax+16])
- a2(movdqa xmm2,[rax+32])
- a2(movdqa xmm3,[rax+48])
- a2(movdqa xmm4,[rax+64])
- a2(movdqa xmm5,[rax+80])
- a2(movdqa xmm6,[rax+96])
- a2(movdqa xmm7,[rax+112])
- aj(jz scrypt_ChunkMix_ssse3_no_xor1)
- a2(pxor xmm0,[r9+0])
- a2(pxor xmm1,[r9+16])
- a2(pxor xmm2,[r9+32])
- a2(pxor xmm3,[r9+48])
- a2(pxor xmm4,[r9+64])
- a2(pxor xmm5,[r9+80])
- a2(pxor xmm6,[r9+96])
- a2(pxor xmm7,[r9+112])
- a1(scrypt_ChunkMix_ssse3_no_xor1:)
- a2(xor r9,r9)
- a2(xor r8,r8)
- a1(scrypt_ChunkMix_ssse3_loop:)
- a2(and rdx, rdx)
- a2(pxor xmm0,[rsi+r9+0])
- a2(pxor xmm1,[rsi+r9+16])
- a2(pxor xmm2,[rsi+r9+32])
- a2(pxor xmm3,[rsi+r9+48])
- a2(pxor xmm4,[rsi+r9+64])
- a2(pxor xmm5,[rsi+r9+80])
- a2(pxor xmm6,[rsi+r9+96])
- a2(pxor xmm7,[rsi+r9+112])
- aj(jz scrypt_ChunkMix_ssse3_no_xor2)
- a2(pxor xmm0,[rdx+r9+0])
- a2(pxor xmm1,[rdx+r9+16])
- a2(pxor xmm2,[rdx+r9+32])
- a2(pxor xmm3,[rdx+r9+48])
- a2(pxor xmm4,[rdx+r9+64])
- a2(pxor xmm5,[rdx+r9+80])
- a2(pxor xmm6,[rdx+r9+96])
- a2(pxor xmm7,[rdx+r9+112])
- a1(scrypt_ChunkMix_ssse3_no_xor2:)
- a2(movdqa [rsp+0],xmm0)
- a2(movdqa [rsp+16],xmm1)
- a2(movdqa [rsp+32],xmm2)
- a2(movdqa [rsp+48],xmm3)
- a2(movdqa [rsp+64],xmm4)
- a2(movdqa [rsp+80],xmm5)
- a2(movdqa [rsp+96],xmm6)
- a2(movdqa [rsp+112],xmm7)
- a2(mov rax,8)
- a1(scrypt_salsa64_ssse3_loop: )
- a2(movdqa xmm8, xmm0)
- a2(movdqa xmm9, xmm1)
- a2(paddq xmm8, xmm2)
- a2(paddq xmm9, xmm3)
- a3(pshufd xmm8, xmm8, 0xb1)
- a3(pshufd xmm9, xmm9, 0xb1)
- a2(pxor xmm6, xmm8)
- a2(pxor xmm7, xmm9)
- a2(movdqa xmm10, xmm0)
- a2(movdqa xmm11, xmm1)
- a2(paddq xmm10, xmm6)
- a2(paddq xmm11, xmm7)
- a2(movdqa xmm8, xmm10)
- a2(movdqa xmm9, xmm11)
- a2(psrlq xmm10, 51)
- a2(psrlq xmm11, 51)
- a2(psllq xmm8, 13)
- a2(psllq xmm9, 13)
- a2(pxor xmm4, xmm10)
- a2(pxor xmm5, xmm11)
- a2(pxor xmm4, xmm8)
- a2(pxor xmm5, xmm9)
- a2(movdqa xmm10, xmm6)
- a2(movdqa xmm11, xmm7)
- a2(paddq xmm10, xmm4)
- a2(paddq xmm11, xmm5)
- a2(movdqa xmm8, xmm10)
- a2(movdqa xmm9, xmm11)
- a2(psrlq xmm10, 25)
- a2(psrlq xmm11, 25)
- a2(psllq xmm8, 39)
- a2(psllq xmm9, 39)
- a2(pxor xmm2, xmm10)
- a2(pxor xmm3, xmm11)
- a2(pxor xmm2, xmm8)
- a2(pxor xmm3, xmm9)
- a2(movdqa xmm8, xmm4)
- a2(movdqa xmm9, xmm5)
- a2(paddq xmm8, xmm2)
- a2(paddq xmm9, xmm3)
- a3(pshufd xmm8, xmm8, 0xb1)
- a3(pshufd xmm9, xmm9, 0xb1)
- a2(pxor xmm0, xmm8)
- a2(pxor xmm1, xmm9)
- a2(movdqa xmm10, xmm2)
- a2(movdqa xmm11, xmm3)
- a2(movdqa xmm2, xmm6)
- a2(movdqa xmm3, xmm7)
- a3(palignr xmm2, xmm7, 8)
- a3(palignr xmm3, xmm6, 8)
- a2(movdqa xmm6, xmm11)
- a2(movdqa xmm7, xmm10)
- a3(palignr xmm6, xmm10, 8)
- a3(palignr xmm7, xmm11, 8)
- a2(sub rax, 2)
- a2(movdqa xmm8, xmm0)
- a2(movdqa xmm9, xmm1)
- a2(paddq xmm8, xmm2)
- a2(paddq xmm9, xmm3)
- a3(pshufd xmm8, xmm8, 0xb1)
- a3(pshufd xmm9, xmm9, 0xb1)
- a2(pxor xmm6, xmm8)
- a2(pxor xmm7, xmm9)
- a2(movdqa xmm10, xmm0)
- a2(movdqa xmm11, xmm1)
- a2(paddq xmm10, xmm6)
- a2(paddq xmm11, xmm7)
- a2(movdqa xmm8, xmm10)
- a2(movdqa xmm9, xmm11)
- a2(psrlq xmm10, 51)
- a2(psrlq xmm11, 51)
- a2(psllq xmm8, 13)
- a2(psllq xmm9, 13)
- a2(pxor xmm5, xmm10)
- a2(pxor xmm4, xmm11)
- a2(pxor xmm5, xmm8)
- a2(pxor xmm4, xmm9)
- a2(movdqa xmm10, xmm6)
- a2(movdqa xmm11, xmm7)
- a2(paddq xmm10, xmm5)
- a2(paddq xmm11, xmm4)
- a2(movdqa xmm8, xmm10)
- a2(movdqa xmm9, xmm11)
- a2(psrlq xmm10, 25)
- a2(psrlq xmm11, 25)
- a2(psllq xmm8, 39)
- a2(psllq xmm9, 39)
- a2(pxor xmm2, xmm10)
- a2(pxor xmm3, xmm11)
- a2(pxor xmm2, xmm8)
- a2(pxor xmm3, xmm9)
- a2(movdqa xmm8, xmm5)
- a2(movdqa xmm9, xmm4)
- a2(paddq xmm8, xmm2)
- a2(paddq xmm9, xmm3)
- a3(pshufd xmm8, xmm8, 0xb1)
- a3(pshufd xmm9, xmm9, 0xb1)
- a2(pxor xmm0, xmm8)
- a2(pxor xmm1, xmm9)
- a2(movdqa xmm10, xmm2)
- a2(movdqa xmm11, xmm3)
- a2(movdqa xmm2, xmm6)
- a2(movdqa xmm3, xmm7)
- a3(palignr xmm2, xmm7, 8)
- a3(palignr xmm3, xmm6, 8)
- a2(movdqa xmm6, xmm11)
- a2(movdqa xmm7, xmm10)
- a3(palignr xmm6, xmm10, 8)
- a3(palignr xmm7, xmm11, 8)
- aj(ja scrypt_salsa64_ssse3_loop)
- a2(paddq xmm0,[rsp+0])
- a2(paddq xmm1,[rsp+16])
- a2(paddq xmm2,[rsp+32])
- a2(paddq xmm3,[rsp+48])
- a2(paddq xmm4,[rsp+64])
- a2(paddq xmm5,[rsp+80])
- a2(paddq xmm6,[rsp+96])
- a2(paddq xmm7,[rsp+112])
- a2(lea rax,[r8+r9])
- a2(xor r8,rcx)
- a2(and rax,~0xff)
- a2(add r9,128)
- a2(shr rax,1)
- a2(add rax, rdi)
- a2(cmp r9,rcx)
- a2(movdqa [rax+0],xmm0)
- a2(movdqa [rax+16],xmm1)
- a2(movdqa [rax+32],xmm2)
- a2(movdqa [rax+48],xmm3)
- a2(movdqa [rax+64],xmm4)
- a2(movdqa [rax+80],xmm5)
- a2(movdqa [rax+96],xmm6)
- a2(movdqa [rax+112],xmm7)
- aj(jne scrypt_ChunkMix_ssse3_loop)
- a2(mov rsp, rbp)
- a1(pop rbp)
- a1(ret)
-asm_naked_fn_end(scrypt_ChunkMix_ssse3)
-
-#endif
-
-
-/* intrinsic */
-#if defined(X86_INTRINSIC_SSSE3) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED))
-
-#define SCRYPT_SALSA64_SSSE3
-
-static void asm_calling_convention
-scrypt_ChunkMix_ssse3(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) {
- uint32_t i, blocksPerChunk = r * 2, half = 0;
- xmmi *xmmp,x0,x1,x2,x3,x4,x5,x6,x7,t0,t1,t2,t3,t4,t5,t6,t7,z0,z1,z2,z3;
- size_t rounds;
-
- /* 1: X = B_{2r - 1} */
- xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
- x0 = xmmp[0];
- x1 = xmmp[1];
- x2 = xmmp[2];
- x3 = xmmp[3];
- x4 = xmmp[4];
- x5 = xmmp[5];
- x6 = xmmp[6];
- x7 = xmmp[7];
-
- if (Bxor) {
- xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
- x0 = _mm_xor_si128(x0, xmmp[0]);
- x1 = _mm_xor_si128(x1, xmmp[1]);
- x2 = _mm_xor_si128(x2, xmmp[2]);
- x3 = _mm_xor_si128(x3, xmmp[3]);
- x4 = _mm_xor_si128(x4, xmmp[4]);
- x5 = _mm_xor_si128(x5, xmmp[5]);
- x6 = _mm_xor_si128(x6, xmmp[6]);
- x7 = _mm_xor_si128(x7, xmmp[7]);
- }
-
- /* 2: for i = 0 to 2r - 1 do */
- for (i = 0; i < blocksPerChunk; i++, half ^= r) {
- /* 3: X = H(X ^ B_i) */
- xmmp = (xmmi *)scrypt_block(Bin, i);
- x0 = _mm_xor_si128(x0, xmmp[0]);
- x1 = _mm_xor_si128(x1, xmmp[1]);
- x2 = _mm_xor_si128(x2, xmmp[2]);
- x3 = _mm_xor_si128(x3, xmmp[3]);
- x4 = _mm_xor_si128(x4, xmmp[4]);
- x5 = _mm_xor_si128(x5, xmmp[5]);
- x6 = _mm_xor_si128(x6, xmmp[6]);
- x7 = _mm_xor_si128(x7, xmmp[7]);
-
- if (Bxor) {
- xmmp = (xmmi *)scrypt_block(Bxor, i);
- x0 = _mm_xor_si128(x0, xmmp[0]);
- x1 = _mm_xor_si128(x1, xmmp[1]);
- x2 = _mm_xor_si128(x2, xmmp[2]);
- x3 = _mm_xor_si128(x3, xmmp[3]);
- x4 = _mm_xor_si128(x4, xmmp[4]);
- x5 = _mm_xor_si128(x5, xmmp[5]);
- x6 = _mm_xor_si128(x6, xmmp[6]);
- x7 = _mm_xor_si128(x7, xmmp[7]);
- }
-
- t0 = x0;
- t1 = x1;
- t2 = x2;
- t3 = x3;
- t4 = x4;
- t5 = x5;
- t6 = x6;
- t7 = x7;
-
- for (rounds = 8; rounds; rounds -= 2) {
- z0 = _mm_add_epi64(x0, x2);
- z1 = _mm_add_epi64(x1, x3);
- z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
- z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
- x6 = _mm_xor_si128(x6, z0);
- x7 = _mm_xor_si128(x7, z1);
-
- z0 = _mm_add_epi64(x6, x0);
- z1 = _mm_add_epi64(x7, x1);
- z2 = _mm_srli_epi64(z0, 64-13);
- z3 = _mm_srli_epi64(z1, 64-13);
- z0 = _mm_slli_epi64(z0, 13);
- z1 = _mm_slli_epi64(z1, 13);
- x4 = _mm_xor_si128(x4, z2);
- x5 = _mm_xor_si128(x5, z3);
- x4 = _mm_xor_si128(x4, z0);
- x5 = _mm_xor_si128(x5, z1);
-
- z0 = _mm_add_epi64(x4, x6);
- z1 = _mm_add_epi64(x5, x7);
- z2 = _mm_srli_epi64(z0, 64-39);
- z3 = _mm_srli_epi64(z1, 64-39);
- z0 = _mm_slli_epi64(z0, 39);
- z1 = _mm_slli_epi64(z1, 39);
- x2 = _mm_xor_si128(x2, z2);
- x3 = _mm_xor_si128(x3, z3);
- x2 = _mm_xor_si128(x2, z0);
- x3 = _mm_xor_si128(x3, z1);
-
- z0 = _mm_add_epi64(x2, x4);
- z1 = _mm_add_epi64(x3, x5);
- z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
- z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
- x0 = _mm_xor_si128(x0, z0);
- x1 = _mm_xor_si128(x1, z1);
-
- z0 = x2;
- z1 = x3;
- x2 = _mm_alignr_epi8(x6, x7, 8);
- x3 = _mm_alignr_epi8(x7, x6, 8);
- x6 = _mm_alignr_epi8(z1, z0, 8);
- x7 = _mm_alignr_epi8(z0, z1, 8);
-
- z0 = _mm_add_epi64(x0, x2);
- z1 = _mm_add_epi64(x1, x3);
- z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
- z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
- x6 = _mm_xor_si128(x6, z0);
- x7 = _mm_xor_si128(x7, z1);
-
- z0 = _mm_add_epi64(x6, x0);
- z1 = _mm_add_epi64(x7, x1);
- z2 = _mm_srli_epi64(z0, 64-13);
- z3 = _mm_srli_epi64(z1, 64-13);
- z0 = _mm_slli_epi64(z0, 13);
- z1 = _mm_slli_epi64(z1, 13);
- x5 = _mm_xor_si128(x5, z2);
- x4 = _mm_xor_si128(x4, z3);
- x5 = _mm_xor_si128(x5, z0);
- x4 = _mm_xor_si128(x4, z1);
-
- z0 = _mm_add_epi64(x5, x6);
- z1 = _mm_add_epi64(x4, x7);
- z2 = _mm_srli_epi64(z0, 64-39);
- z3 = _mm_srli_epi64(z1, 64-39);
- z0 = _mm_slli_epi64(z0, 39);
- z1 = _mm_slli_epi64(z1, 39);
- x2 = _mm_xor_si128(x2, z2);
- x3 = _mm_xor_si128(x3, z3);
- x2 = _mm_xor_si128(x2, z0);
- x3 = _mm_xor_si128(x3, z1);
-
- z0 = _mm_add_epi64(x2, x5);
- z1 = _mm_add_epi64(x3, x4);
- z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
- z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
- x0 = _mm_xor_si128(x0, z0);
- x1 = _mm_xor_si128(x1, z1);
-
- z0 = x2;
- z1 = x3;
- x2 = _mm_alignr_epi8(x6, x7, 8);
- x3 = _mm_alignr_epi8(x7, x6, 8);
- x6 = _mm_alignr_epi8(z1, z0, 8);
- x7 = _mm_alignr_epi8(z0, z1, 8);
- }
-
- x0 = _mm_add_epi64(x0, t0);
- x1 = _mm_add_epi64(x1, t1);
- x2 = _mm_add_epi64(x2, t2);
- x3 = _mm_add_epi64(x3, t3);
- x4 = _mm_add_epi64(x4, t4);
- x5 = _mm_add_epi64(x5, t5);
- x6 = _mm_add_epi64(x6, t6);
- x7 = _mm_add_epi64(x7, t7);
-
- /* 4: Y_i = X */
- /* 6: B'[0..r-1] = Y_even */
- /* 6: B'[r..2r-1] = Y_odd */
- xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
- xmmp[0] = x0;
- xmmp[1] = x1;
- xmmp[2] = x2;
- xmmp[3] = x3;
- xmmp[4] = x4;
- xmmp[5] = x5;
- xmmp[6] = x6;
- xmmp[7] = x7;
- }
-}
-
-#endif
-
-#if defined(SCRYPT_SALSA64_SSSE3)
- /* uses salsa64_core_tangle_sse2 */
-
- #undef SCRYPT_MIX
- #define SCRYPT_MIX "Salsa64/8-SSSE3"
- #undef SCRYPT_SALSA64_INCLUDED
- #define SCRYPT_SALSA64_INCLUDED
-#endif
diff --git a/stratum/algos/ar2/sj/scrypt-jane-mix_salsa64-xop.h b/stratum/algos/ar2/sj/scrypt-jane-mix_salsa64-xop.h
deleted file mode 100644
index 34f1b4029..000000000
--- a/stratum/algos/ar2/sj/scrypt-jane-mix_salsa64-xop.h
+++ /dev/null
@@ -1,335 +0,0 @@
-/* x64 */
-#if defined(X86_64ASM_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
-
-#define SCRYPT_SALSA64_XOP
-
-asm_naked_fn_proto(void, scrypt_ChunkMix_xop)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r)
-asm_naked_fn(scrypt_ChunkMix_xop)
- a1(push rbp)
- a2(mov rbp, rsp)
- a2(and rsp, ~63)
- a2(sub rsp, 128)
- a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
- a2(shl rcx,7)
- a2(lea r9,[rcx-128])
- a2(lea rax,[rsi+r9])
- a2(lea r9,[rdx+r9])
- a2(and rdx, rdx)
- a2(vmovdqa xmm0,[rax+0])
- a2(vmovdqa xmm1,[rax+16])
- a2(vmovdqa xmm2,[rax+32])
- a2(vmovdqa xmm3,[rax+48])
- a2(vmovdqa xmm4,[rax+64])
- a2(vmovdqa xmm5,[rax+80])
- a2(vmovdqa xmm6,[rax+96])
- a2(vmovdqa xmm7,[rax+112])
- aj(jz scrypt_ChunkMix_xop_no_xor1)
- a3(vpxor xmm0,xmm0,[r9+0])
- a3(vpxor xmm1,xmm1,[r9+16])
- a3(vpxor xmm2,xmm2,[r9+32])
- a3(vpxor xmm3,xmm3,[r9+48])
- a3(vpxor xmm4,xmm4,[r9+64])
- a3(vpxor xmm5,xmm5,[r9+80])
- a3(vpxor xmm6,xmm6,[r9+96])
- a3(vpxor xmm7,xmm7,[r9+112])
- a1(scrypt_ChunkMix_xop_no_xor1:)
- a2(xor r9,r9)
- a2(xor r8,r8)
- a1(scrypt_ChunkMix_xop_loop:)
- a2(and rdx, rdx)
- a3(vpxor xmm0,xmm0,[rsi+r9+0])
- a3(vpxor xmm1,xmm1,[rsi+r9+16])
- a3(vpxor xmm2,xmm2,[rsi+r9+32])
- a3(vpxor xmm3,xmm3,[rsi+r9+48])
- a3(vpxor xmm4,xmm4,[rsi+r9+64])
- a3(vpxor xmm5,xmm5,[rsi+r9+80])
- a3(vpxor xmm6,xmm6,[rsi+r9+96])
- a3(vpxor xmm7,xmm7,[rsi+r9+112])
- aj(jz scrypt_ChunkMix_xop_no_xor2)
- a3(vpxor xmm0,xmm0,[rdx+r9+0])
- a3(vpxor xmm1,xmm1,[rdx+r9+16])
- a3(vpxor xmm2,xmm2,[rdx+r9+32])
- a3(vpxor xmm3,xmm3,[rdx+r9+48])
- a3(vpxor xmm4,xmm4,[rdx+r9+64])
- a3(vpxor xmm5,xmm5,[rdx+r9+80])
- a3(vpxor xmm6,xmm6,[rdx+r9+96])
- a3(vpxor xmm7,xmm7,[rdx+r9+112])
- a1(scrypt_ChunkMix_xop_no_xor2:)
- a2(vmovdqa [rsp+0],xmm0)
- a2(vmovdqa [rsp+16],xmm1)
- a2(vmovdqa [rsp+32],xmm2)
- a2(vmovdqa [rsp+48],xmm3)
- a2(vmovdqa [rsp+64],xmm4)
- a2(vmovdqa [rsp+80],xmm5)
- a2(vmovdqa [rsp+96],xmm6)
- a2(vmovdqa [rsp+112],xmm7)
- a2(mov rax,8)
- a1(scrypt_salsa64_xop_loop: )
- a3(vpaddq xmm8, xmm0, xmm2)
- a3(vpaddq xmm9, xmm1, xmm3)
- a3(vpshufd xmm8, xmm8, 0xb1)
- a3(vpshufd xmm9, xmm9, 0xb1)
- a3(vpxor xmm6, xmm6, xmm8)
- a3(vpxor xmm7, xmm7, xmm9)
- a3(vpaddq xmm10, xmm0, xmm6)
- a3(vpaddq xmm11, xmm1, xmm7)
- a3(vprotq xmm10, xmm10, 13)
- a3(vprotq xmm11, xmm11, 13)
- a3(vpxor xmm4, xmm4, xmm10)
- a3(vpxor xmm5, xmm5, xmm11)
- a3(vpaddq xmm8, xmm6, xmm4)
- a3(vpaddq xmm9, xmm7, xmm5)
- a3(vprotq xmm8, xmm8, 39)
- a3(vprotq xmm9, xmm9, 39)
- a3(vpxor xmm2, xmm2, xmm8)
- a3(vpxor xmm3, xmm3, xmm9)
- a3(vpaddq xmm10, xmm4, xmm2)
- a3(vpaddq xmm11, xmm5, xmm3)
- a3(vpshufd xmm10, xmm10, 0xb1)
- a3(vpshufd xmm11, xmm11, 0xb1)
- a3(vpxor xmm0, xmm0, xmm10)
- a3(vpxor xmm1, xmm1, xmm11)
- a2(vmovdqa xmm8, xmm2)
- a2(vmovdqa xmm9, xmm3)
- a4(vpalignr xmm2, xmm6, xmm7, 8)
- a4(vpalignr xmm3, xmm7, xmm6, 8)
- a4(vpalignr xmm6, xmm9, xmm8, 8)
- a4(vpalignr xmm7, xmm8, xmm9, 8)
- a3(vpaddq xmm10, xmm0, xmm2)
- a3(vpaddq xmm11, xmm1, xmm3)
- a3(vpshufd xmm10, xmm10, 0xb1)
- a3(vpshufd xmm11, xmm11, 0xb1)
- a3(vpxor xmm6, xmm6, xmm10)
- a3(vpxor xmm7, xmm7, xmm11)
- a3(vpaddq xmm8, xmm0, xmm6)
- a3(vpaddq xmm9, xmm1, xmm7)
- a3(vprotq xmm8, xmm8, 13)
- a3(vprotq xmm9, xmm9, 13)
- a3(vpxor xmm5, xmm5, xmm8)
- a3(vpxor xmm4, xmm4, xmm9)
- a3(vpaddq xmm10, xmm6, xmm5)
- a3(vpaddq xmm11, xmm7, xmm4)
- a3(vprotq xmm10, xmm10, 39)
- a3(vprotq xmm11, xmm11, 39)
- a3(vpxor xmm2, xmm2, xmm10)
- a3(vpxor xmm3, xmm3, xmm11)
- a3(vpaddq xmm8, xmm5, xmm2)
- a3(vpaddq xmm9, xmm4, xmm3)
- a3(vpshufd xmm8, xmm8, 0xb1)
- a3(vpshufd xmm9, xmm9, 0xb1)
- a3(vpxor xmm0, xmm0, xmm8)
- a3(vpxor xmm1, xmm1, xmm9)
- a2(vmovdqa xmm10, xmm2)
- a2(vmovdqa xmm11, xmm3)
- a4(vpalignr xmm2, xmm6, xmm7, 8)
- a4(vpalignr xmm3, xmm7, xmm6, 8)
- a4(vpalignr xmm6, xmm11, xmm10, 8)
- a4(vpalignr xmm7, xmm10, xmm11, 8)
- a2(sub rax, 2)
- aj(ja scrypt_salsa64_xop_loop)
- a3(vpaddq xmm0,xmm0,[rsp+0])
- a3(vpaddq xmm1,xmm1,[rsp+16])
- a3(vpaddq xmm2,xmm2,[rsp+32])
- a3(vpaddq xmm3,xmm3,[rsp+48])
- a3(vpaddq xmm4,xmm4,[rsp+64])
- a3(vpaddq xmm5,xmm5,[rsp+80])
- a3(vpaddq xmm6,xmm6,[rsp+96])
- a3(vpaddq xmm7,xmm7,[rsp+112])
- a2(lea rax,[r8+r9])
- a2(xor r8,rcx)
- a2(and rax,~0xff)
- a2(add r9,128)
- a2(shr rax,1)
- a2(add rax, rdi)
- a2(cmp r9,rcx)
- a2(vmovdqa [rax+0],xmm0)
- a2(vmovdqa [rax+16],xmm1)
- a2(vmovdqa [rax+32],xmm2)
- a2(vmovdqa [rax+48],xmm3)
- a2(vmovdqa [rax+64],xmm4)
- a2(vmovdqa [rax+80],xmm5)
- a2(vmovdqa [rax+96],xmm6)
- a2(vmovdqa [rax+112],xmm7)
- aj(jne scrypt_ChunkMix_xop_loop)
- a2(mov rsp, rbp)
- a1(pop rbp)
- a1(ret)
-asm_naked_fn_end(scrypt_ChunkMix_xop)
-
-#endif
-
-
-/* intrinsic */
-#if defined(X86_INTRINSIC_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED))
-
-#define SCRYPT_SALSA64_XOP
-
-static void asm_calling_convention
-scrypt_ChunkMix_xop(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) {
- uint32_t i, blocksPerChunk = r * 2, half = 0;
- xmmi *xmmp,x0,x1,x2,x3,x4,x5,x6,x7,t0,t1,t2,t3,t4,t5,t6,t7,z0,z1,z2,z3;
- size_t rounds;
-
- /* 1: X = B_{2r - 1} */
- xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
- x0 = xmmp[0];
- x1 = xmmp[1];
- x2 = xmmp[2];
- x3 = xmmp[3];
- x4 = xmmp[4];
- x5 = xmmp[5];
- x6 = xmmp[6];
- x7 = xmmp[7];
-
- if (Bxor) {
- xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
- x0 = _mm_xor_si128(x0, xmmp[0]);
- x1 = _mm_xor_si128(x1, xmmp[1]);
- x2 = _mm_xor_si128(x2, xmmp[2]);
- x3 = _mm_xor_si128(x3, xmmp[3]);
- x4 = _mm_xor_si128(x4, xmmp[4]);
- x5 = _mm_xor_si128(x5, xmmp[5]);
- x6 = _mm_xor_si128(x6, xmmp[6]);
- x7 = _mm_xor_si128(x7, xmmp[7]);
- }
-
- /* 2: for i = 0 to 2r - 1 do */
- for (i = 0; i < blocksPerChunk; i++, half ^= r) {
- /* 3: X = H(X ^ B_i) */
- xmmp = (xmmi *)scrypt_block(Bin, i);
- x0 = _mm_xor_si128(x0, xmmp[0]);
- x1 = _mm_xor_si128(x1, xmmp[1]);
- x2 = _mm_xor_si128(x2, xmmp[2]);
- x3 = _mm_xor_si128(x3, xmmp[3]);
- x4 = _mm_xor_si128(x4, xmmp[4]);
- x5 = _mm_xor_si128(x5, xmmp[5]);
- x6 = _mm_xor_si128(x6, xmmp[6]);
- x7 = _mm_xor_si128(x7, xmmp[7]);
-
- if (Bxor) {
- xmmp = (xmmi *)scrypt_block(Bxor, i);
- x0 = _mm_xor_si128(x0, xmmp[0]);
- x1 = _mm_xor_si128(x1, xmmp[1]);
- x2 = _mm_xor_si128(x2, xmmp[2]);
- x3 = _mm_xor_si128(x3, xmmp[3]);
- x4 = _mm_xor_si128(x4, xmmp[4]);
- x5 = _mm_xor_si128(x5, xmmp[5]);
- x6 = _mm_xor_si128(x6, xmmp[6]);
- x7 = _mm_xor_si128(x7, xmmp[7]);
- }
-
- t0 = x0;
- t1 = x1;
- t2 = x2;
- t3 = x3;
- t4 = x4;
- t5 = x5;
- t6 = x6;
- t7 = x7;
-
- for (rounds = 8; rounds; rounds -= 2) {
- z0 = _mm_add_epi64(x0, x2);
- z1 = _mm_add_epi64(x1, x3);
- z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
- z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
- x6 = _mm_xor_si128(x6, z0);
- x7 = _mm_xor_si128(x7, z1);
-
- z0 = _mm_add_epi64(x6, x0);
- z1 = _mm_add_epi64(x7, x1);
- z0 = _mm_roti_epi64(z0, 13);
- z1 = _mm_roti_epi64(z1, 13);
- x4 = _mm_xor_si128(x4, z0);
- x5 = _mm_xor_si128(x5, z1);
-
- z0 = _mm_add_epi64(x4, x6);
- z1 = _mm_add_epi64(x5, x7);
- z0 = _mm_roti_epi64(z0, 39);
- z1 = _mm_roti_epi64(z1, 39);
- x2 = _mm_xor_si128(x2, z0);
- x3 = _mm_xor_si128(x3, z1);
-
- z0 = _mm_add_epi64(x2, x4);
- z1 = _mm_add_epi64(x3, x5);
- z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
- z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
- x0 = _mm_xor_si128(x0, z0);
- x1 = _mm_xor_si128(x1, z1);
-
- z0 = x2;
- z1 = x3;
- x2 = _mm_alignr_epi8(x6, x7, 8);
- x3 = _mm_alignr_epi8(x7, x6, 8);
- x6 = _mm_alignr_epi8(z1, z0, 8);
- x7 = _mm_alignr_epi8(z0, z1, 8);
-
- z0 = _mm_add_epi64(x0, x2);
- z1 = _mm_add_epi64(x1, x3);
- z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
- z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
- x6 = _mm_xor_si128(x6, z0);
- x7 = _mm_xor_si128(x7, z1);
-
- z0 = _mm_add_epi64(x6, x0);
- z1 = _mm_add_epi64(x7, x1);
- z0 = _mm_roti_epi64(z0, 13);
- z1 = _mm_roti_epi64(z1, 13);
- x5 = _mm_xor_si128(x5, z0);
- x4 = _mm_xor_si128(x4, z1);
-
- z0 = _mm_add_epi64(x5, x6);
- z1 = _mm_add_epi64(x4, x7);
- z0 = _mm_roti_epi64(z0, 39);
- z1 = _mm_roti_epi64(z1, 39);
- x2 = _mm_xor_si128(x2, z0);
- x3 = _mm_xor_si128(x3, z1);
-
- z0 = _mm_add_epi64(x2, x5);
- z1 = _mm_add_epi64(x3, x4);
- z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
- z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
- x0 = _mm_xor_si128(x0, z0);
- x1 = _mm_xor_si128(x1, z1);
-
- z0 = x2;
- z1 = x3;
- x2 = _mm_alignr_epi8(x6, x7, 8);
- x3 = _mm_alignr_epi8(x7, x6, 8);
- x6 = _mm_alignr_epi8(z1, z0, 8);
- x7 = _mm_alignr_epi8(z0, z1, 8);
- }
-
- x0 = _mm_add_epi64(x0, t0);
- x1 = _mm_add_epi64(x1, t1);
- x2 = _mm_add_epi64(x2, t2);
- x3 = _mm_add_epi64(x3, t3);
- x4 = _mm_add_epi64(x4, t4);
- x5 = _mm_add_epi64(x5, t5);
- x6 = _mm_add_epi64(x6, t6);
- x7 = _mm_add_epi64(x7, t7);
-
- /* 4: Y_i = X */
- /* 6: B'[0..r-1] = Y_even */
- /* 6: B'[r..2r-1] = Y_odd */
- xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
- xmmp[0] = x0;
- xmmp[1] = x1;
- xmmp[2] = x2;
- xmmp[3] = x3;
- xmmp[4] = x4;
- xmmp[5] = x5;
- xmmp[6] = x6;
- xmmp[7] = x7;
- }
-}
-
-#endif
-
-#if defined(SCRYPT_SALSA64_XOP)
- /* uses salsa64_core_tangle_sse2 */
-
- #undef SCRYPT_MIX
- #define SCRYPT_MIX "Salsa64/8-XOP"
- #undef SCRYPT_SALSA64_INCLUDED
- #define SCRYPT_SALSA64_INCLUDED
-#endif
diff --git a/stratum/algos/ar2/sj/scrypt-jane-mix_salsa64.h b/stratum/algos/ar2/sj/scrypt-jane-mix_salsa64.h
deleted file mode 100644
index 2aec04f33..000000000
--- a/stratum/algos/ar2/sj/scrypt-jane-mix_salsa64.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#if !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)
-
-#undef SCRYPT_MIX
-#define SCRYPT_MIX "Salsa64/8 Ref"
-
-#undef SCRYPT_SALSA64_INCLUDED
-#define SCRYPT_SALSA64_INCLUDED
-#define SCRYPT_SALSA64_BASIC
-
-static void
-salsa64_core_basic(uint64_t state[16]) {
- const size_t rounds = 8;
- uint64_t v[16], t;
- size_t i;
-
- for (i = 0; i < 16; i++) v[i] = state[i];
-
- #define G(a,b,c,d) \
- t = v[a]+v[d]; t = ROTL64(t, 32); v[b] ^= t; \
- t = v[b]+v[a]; t = ROTL64(t, 13); v[c] ^= t; \
- t = v[c]+v[b]; t = ROTL64(t, 39); v[d] ^= t; \
- t = v[d]+v[c]; t = ROTL64(t, 32); v[a] ^= t; \
-
- for (i = 0; i < rounds; i += 2) {
- G( 0, 4, 8,12);
- G( 5, 9,13, 1);
- G(10,14, 2, 6);
- G(15, 3, 7,11);
- G( 0, 1, 2, 3);
- G( 5, 6, 7, 4);
- G(10,11, 8, 9);
- G(15,12,13,14);
- }
-
- for (i = 0; i < 16; i++) state[i] += v[i];
-
- #undef G
-}
-
-#endif
-
diff --git a/stratum/algos/ar2/sj/scrypt-jane-pbkdf2.h b/stratum/algos/ar2/sj/scrypt-jane-pbkdf2.h
deleted file mode 100644
index ddd8742b3..000000000
--- a/stratum/algos/ar2/sj/scrypt-jane-pbkdf2.h
+++ /dev/null
@@ -1,112 +0,0 @@
-typedef struct scrypt_hmac_state_t {
- scrypt_hash_state inner, outer;
-} scrypt_hmac_state;
-
-
-static void
-scrypt_hash(scrypt_hash_digest hash, const uint8_t *m, size_t mlen) {
- scrypt_hash_state st;
- scrypt_hash_init(&st);
- scrypt_hash_update(&st, m, mlen);
- scrypt_hash_finish(&st, hash);
-}
-
-/* hmac */
-static void
-scrypt_hmac_init(scrypt_hmac_state *st, const uint8_t *key, size_t keylen) {
- uint8_t pad[SCRYPT_HASH_BLOCK_SIZE] = {0};
- size_t i;
-
- scrypt_hash_init(&st->inner);
- scrypt_hash_init(&st->outer);
-
- if (keylen <= SCRYPT_HASH_BLOCK_SIZE) {
- /* use the key directly if it's <= blocksize bytes */
- memcpy(pad, key, keylen);
- } else {
- /* if it's > blocksize bytes, hash it */
- scrypt_hash(pad, key, keylen);
- }
-
- /* inner = (key ^ 0x36) */
- /* h(inner || ...) */
- for (i = 0; i < SCRYPT_HASH_BLOCK_SIZE; i++)
- pad[i] ^= 0x36;
- scrypt_hash_update(&st->inner, pad, SCRYPT_HASH_BLOCK_SIZE);
-
- /* outer = (key ^ 0x5c) */
- /* h(outer || ...) */
- for (i = 0; i < SCRYPT_HASH_BLOCK_SIZE; i++)
- pad[i] ^= (0x5c ^ 0x36);
- scrypt_hash_update(&st->outer, pad, SCRYPT_HASH_BLOCK_SIZE);
-
- scrypt_ensure_zero(pad, sizeof(pad));
-}
-
-static void
-scrypt_hmac_update(scrypt_hmac_state *st, const uint8_t *m, size_t mlen) {
- /* h(inner || m...) */
- scrypt_hash_update(&st->inner, m, mlen);
-}
-
-static void
-scrypt_hmac_finish(scrypt_hmac_state *st, scrypt_hash_digest mac) {
- /* h(inner || m) */
- scrypt_hash_digest innerhash;
- scrypt_hash_finish(&st->inner, innerhash);
-
- /* h(outer || h(inner || m)) */
- scrypt_hash_update(&st->outer, innerhash, sizeof(innerhash));
- scrypt_hash_finish(&st->outer, mac);
-
- scrypt_ensure_zero(st, sizeof(*st));
-}
-
-static void
-scrypt_pbkdf2(const uint8_t *password, size_t password_len, const uint8_t *salt, size_t salt_len, uint64_t N, uint8_t *out, size_t bytes) {
- scrypt_hmac_state hmac_pw, hmac_pw_salt, work;
- scrypt_hash_digest ti, u;
- uint8_t be[4];
- uint32_t i, j, blocks;
- uint64_t c;
-
- /* bytes must be <= (0xffffffff - (SCRYPT_HASH_DIGEST_SIZE - 1)), which they will always be under scrypt */
-
- /* hmac(password, ...) */
- scrypt_hmac_init(&hmac_pw, password, password_len);
-
- /* hmac(password, salt...) */
- hmac_pw_salt = hmac_pw;
- scrypt_hmac_update(&hmac_pw_salt, salt, salt_len);
-
- blocks = ((uint32_t)bytes + (SCRYPT_HASH_DIGEST_SIZE - 1)) / SCRYPT_HASH_DIGEST_SIZE;
- for (i = 1; i <= blocks; i++) {
- /* U1 = hmac(password, salt || be(i)) */
- U32TO8_BE(be, i);
- work = hmac_pw_salt;
- scrypt_hmac_update(&work, be, 4);
- scrypt_hmac_finish(&work, ti);
- memcpy(u, ti, sizeof(u));
-
- /* T[i] = U1 ^ U2 ^ U3... */
- for (c = 0; c < N - 1; c++) {
- /* UX = hmac(password, U{X-1}) */
- work = hmac_pw;
- scrypt_hmac_update(&work, u, SCRYPT_HASH_DIGEST_SIZE);
- scrypt_hmac_finish(&work, u);
-
- /* T[i] ^= UX */
- for (j = 0; j < sizeof(u); j++)
- ti[j] ^= u[j];
- }
-
- memcpy(out, ti, (bytes > SCRYPT_HASH_DIGEST_SIZE) ? SCRYPT_HASH_DIGEST_SIZE : bytes);
- out += SCRYPT_HASH_DIGEST_SIZE;
- bytes -= SCRYPT_HASH_DIGEST_SIZE;
- }
-
- scrypt_ensure_zero(ti, sizeof(ti));
- scrypt_ensure_zero(u, sizeof(u));
- scrypt_ensure_zero(&hmac_pw, sizeof(hmac_pw));
- scrypt_ensure_zero(&hmac_pw_salt, sizeof(hmac_pw_salt));
-}
diff --git a/stratum/algos/ar2/sj/scrypt-jane-portable-x86.h b/stratum/algos/ar2/sj/scrypt-jane-portable-x86.h
deleted file mode 100644
index 5be2db94b..000000000
--- a/stratum/algos/ar2/sj/scrypt-jane-portable-x86.h
+++ /dev/null
@@ -1,462 +0,0 @@
-#if defined(CPU_X86) && (defined(COMPILER_MSVC) || defined(COMPILER_GCC))
- #define X86ASM
-
- /* gcc 2.95 royally screws up stack alignments on variables */
- #if ((defined(COMPILER_MSVC) && (COMPILER_MSVC >= COMPILER_MSVC_VS6PP)) || (defined(COMPILER_GCC) && (COMPILER_GCC >= 30000)))
- #define X86ASM_SSE
- #define X86ASM_SSE2
- #endif
- #if ((defined(COMPILER_MSVC) && (COMPILER_MSVC >= COMPILER_MSVC_VS2005)) || (defined(COMPILER_GCC) && (COMPILER_GCC >= 40102)))
- #define X86ASM_SSSE3
- #endif
- #if ((defined(COMPILER_MSVC) && (COMPILER_MSVC >= COMPILER_MSVC_VS2010SP1)) || (defined(COMPILER_GCC) && (COMPILER_GCC >= 40400)))
- #define X86ASM_AVX
- #define X86ASM_XOP
- #endif
- #if ((defined(COMPILER_MSVC) && (COMPILER_MSVC >= COMPILER_MSVC_VS2012)) || (defined(COMPILER_GCC) && (COMPILER_GCC >= 40700)))
- #define X86ASM_AVX2
- #endif
-#endif
-
-#if defined(CPU_X86_64) && defined(COMPILER_GCC)
- #define X86_64ASM
- #define X86_64ASM_SSE2
- #if (COMPILER_GCC >= 40102)
- #define X86_64ASM_SSSE3
- #endif
- #if (COMPILER_GCC >= 40400)
- #define X86_64ASM_AVX
- #define X86_64ASM_XOP
- #endif
- #if (COMPILER_GCC >= 40700)
- #define X86_64ASM_AVX2
- #endif
-#endif
-
-#if defined(COMPILER_MSVC) && (defined(CPU_X86_FORCE_INTRINSICS) || defined(CPU_X86_64))
- #define X86_INTRINSIC
- #if defined(CPU_X86_64) || defined(X86ASM_SSE)
- #define X86_INTRINSIC_SSE
- #endif
- #if defined(CPU_X86_64) || defined(X86ASM_SSE2)
- #define X86_INTRINSIC_SSE2
- #endif
- #if (COMPILER_MSVC >= COMPILER_MSVC_VS2005)
- #define X86_INTRINSIC_SSSE3
- #endif
- #if (COMPILER_MSVC >= COMPILER_MSVC_VS2010SP1)
- #define X86_INTRINSIC_AVX
- #define X86_INTRINSIC_XOP
- #endif
- #if (COMPILER_MSVC >= COMPILER_MSVC_VS2012)
- #define X86_INTRINSIC_AVX2
- #endif
-#endif
-
-#if defined(COMPILER_GCC) && defined(CPU_X86_FORCE_INTRINSICS)
- #define X86_INTRINSIC
- #if defined(__SSE__)
- #define X86_INTRINSIC_SSE
- #endif
- #if defined(__SSE2__)
- #define X86_INTRINSIC_SSE2
- #endif
- #if defined(__SSSE3__)
- #define X86_INTRINSIC_SSSE3
- #endif
- #if defined(__AVX__)
- #define X86_INTRINSIC_AVX
- #endif
- #if defined(__XOP__)
- #define X86_INTRINSIC_XOP
- #endif
- #if defined(__AVX2__)
- #define X86_INTRINSIC_AVX2
- #endif
-#endif
-
-/* only use simd on windows (or SSE2 on gcc)! */
-#if defined(CPU_X86_FORCE_INTRINSICS) || defined(X86_INTRINSIC)
- #if defined(X86_INTRINSIC_SSE)
- #include
- #include
- typedef __m64 qmm;
- typedef __m128 xmm;
- typedef __m128d xmmd;
- #endif
- #if defined(X86_INTRINSIC_SSE2)
- #include
- typedef __m128i xmmi;
- #endif
- #if defined(X86_INTRINSIC_SSSE3)
- #include
- #endif
- #if defined(X86_INTRINSIC_AVX)
- #include
- #endif
- #if defined(X86_INTRINSIC_XOP)
- #if defined(COMPILER_MSVC)
- #include
- #else
- #include
- #endif
- #endif
- #if defined(X86_INTRINSIC_AVX2)
- typedef __m256i ymmi;
- #endif
-#endif
-
-#if defined(X86_INTRINSIC_SSE2)
- typedef union packedelem8_t {
- uint8_t u[16];
- xmmi v;
- } packedelem8;
-
- typedef union packedelem32_t {
- uint32_t u[4];
- xmmi v;
- } packedelem32;
-
- typedef union packedelem64_t {
- uint64_t u[2];
- xmmi v;
- } packedelem64;
-#else
- typedef union packedelem8_t {
- uint8_t u[16];
- uint32_t dw[4];
- } packedelem8;
-
- typedef union packedelem32_t {
- uint32_t u[4];
- uint8_t b[16];
- } packedelem32;
-
- typedef union packedelem64_t {
- uint64_t u[2];
- uint8_t b[16];
- } packedelem64;
-#endif
-
-#if defined(X86_INTRINSIC_SSSE3)
- static const packedelem8 ALIGN(16) ssse3_rotl16_32bit = {{2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13}};
- static const packedelem8 ALIGN(16) ssse3_rotl8_32bit = {{3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14}};
-#endif
-
-/*
- x86 inline asm for gcc/msvc. usage:
-
- asm_naked_fn_proto(return_type, name) (type parm1, type parm2..)
- asm_naked_fn(name)
- a1(..)
- a2(.., ..)
- a3(.., .., ..)
- 64bit OR 0 paramters: a1(ret)
- 32bit AND n parameters: aret(4n), eg aret(16) for 4 parameters
- asm_naked_fn_end(name)
-*/
-
-#if defined(X86ASM) || defined(X86_64ASM)
-
-#if defined(COMPILER_MSVC)
- #pragma warning(disable : 4731) /* frame pointer modified by inline assembly */
- #define a1(x) __asm {x}
- #define a2(x, y) __asm {x, y}
- #define a3(x, y, z) __asm {x, y, z}
- #define a4(x, y, z, w) __asm {x, y, z, w}
- #define aj(x) __asm {x}
- #define asm_align8 a1(ALIGN 8)
- #define asm_align16 a1(ALIGN 16)
-
- #define asm_calling_convention STDCALL
- #define aret(n) a1(ret n)
- #define asm_naked_fn_proto(type, fn) static NAKED type asm_calling_convention fn
- #define asm_naked_fn(fn) {
- #define asm_naked_fn_end(fn) }
-#elif defined(COMPILER_GCC)
- #define GNU_AS1(x) #x ";\n"
- #define GNU_AS2(x, y) #x ", " #y ";\n"
- #define GNU_AS3(x, y, z) #x ", " #y ", " #z ";\n"
- #define GNU_AS4(x, y, z, w) #x ", " #y ", " #z ", " #w ";\n"
- #define GNU_ASFN(x) "\n_" #x ":\n" #x ":\n"
- #define GNU_ASJ(x) ".att_syntax prefix\n" #x "\n.intel_syntax noprefix\n"
-
- #define a1(x) GNU_AS1(x)
- #define a2(x, y) GNU_AS2(x, y)
- #define a3(x, y, z) GNU_AS3(x, y, z)
- #define a4(x, y, z, w) GNU_AS4(x, y, z, w)
- #define aj(x) GNU_ASJ(x)
- #define asm_align8 ".p2align 3,,7"
- #define asm_align16 ".p2align 4,,15"
-
- #if defined(OS_WINDOWS)
- #define asm_calling_convention CDECL
- #define aret(n) a1(ret)
-
- #if defined(X86_64ASM)
- #define asm_naked_fn(fn) ; __asm__ ( \
- ".text\n" \
- asm_align16 GNU_ASFN(fn) \
- "subq $136, %rsp;" \
- "movdqa %xmm6, 0(%rsp);" \
- "movdqa %xmm7, 16(%rsp);" \
- "movdqa %xmm8, 32(%rsp);" \
- "movdqa %xmm9, 48(%rsp);" \
- "movdqa %xmm10, 64(%rsp);" \
- "movdqa %xmm11, 80(%rsp);" \
- "movdqa %xmm12, 96(%rsp);" \
- "movq %rdi, 112(%rsp);" \
- "movq %rsi, 120(%rsp);" \
- "movq %rcx, %rdi;" \
- "movq %rdx, %rsi;" \
- "movq %r8, %rdx;" \
- "movq %r9, %rcx;" \
- "call 1f;" \
- "movdqa 0(%rsp), %xmm6;" \
- "movdqa 16(%rsp), %xmm7;" \
- "movdqa 32(%rsp), %xmm8;" \
- "movdqa 48(%rsp), %xmm9;" \
- "movdqa 64(%rsp), %xmm10;" \
- "movdqa 80(%rsp), %xmm11;" \
- "movdqa 96(%rsp), %xmm12;" \
- "movq 112(%rsp), %rdi;" \
- "movq 120(%rsp), %rsi;" \
- "addq $136, %rsp;" \
- "ret;" \
- ".intel_syntax noprefix;" \
- ".p2align 4,,15;" \
- "1:;"
- #else
- #define asm_naked_fn(fn) ; __asm__ (".intel_syntax noprefix;\n.text\n" asm_align16 GNU_ASFN(fn)
- #endif
- #else
- #define asm_calling_convention STDCALL
- #define aret(n) a1(ret n)
- #define asm_naked_fn(fn) ; __asm__ (".intel_syntax noprefix;\n.text\n" asm_align16 GNU_ASFN(fn)
- #endif
-
- #define asm_naked_fn_proto(type, fn) extern type asm_calling_convention fn
- #define asm_naked_fn_end(fn) ".att_syntax prefix;\n" );
-
- #define asm_gcc() __asm__ __volatile__(".intel_syntax noprefix;\n"
- #define asm_gcc_parms() ".att_syntax prefix;"
- #define asm_gcc_trashed() __asm__ __volatile__("" :::
- #define asm_gcc_end() );
-#else
- need x86 asm
-#endif
-
-#endif /* X86ASM || X86_64ASM */
-
-
-#if defined(CPU_X86) || defined(CPU_X86_64)
-
-typedef enum cpu_flags_x86_t {
- cpu_mmx = 1 << 0,
- cpu_sse = 1 << 1,
- cpu_sse2 = 1 << 2,
- cpu_sse3 = 1 << 3,
- cpu_ssse3 = 1 << 4,
- cpu_sse4_1 = 1 << 5,
- cpu_sse4_2 = 1 << 6,
- cpu_avx = 1 << 7,
- cpu_xop = 1 << 8,
- cpu_avx2 = 1 << 9
-} cpu_flags_x86;
-
-typedef enum cpu_vendors_x86_t {
- cpu_nobody,
- cpu_intel,
- cpu_amd
-} cpu_vendors_x86;
-
-typedef struct x86_regs_t {
- uint32_t eax, ebx, ecx, edx;
-} x86_regs;
-
-#if defined(X86ASM)
-asm_naked_fn_proto(int, has_cpuid)(void)
-asm_naked_fn(has_cpuid)
- a1(pushfd)
- a1(pop eax)
- a2(mov ecx, eax)
- a2(xor eax, 0x200000)
- a1(push eax)
- a1(popfd)
- a1(pushfd)
- a1(pop eax)
- a2(xor eax, ecx)
- a2(shr eax, 21)
- a2(and eax, 1)
- a1(push ecx)
- a1(popfd)
- a1(ret)
-asm_naked_fn_end(has_cpuid)
-#endif /* X86ASM */
-
-
-static void NOINLINE
-get_cpuid(x86_regs *regs, uint32_t flags) {
-#if defined(COMPILER_MSVC)
- __cpuid((int *)regs, (int)flags);
-#else
- #if defined(CPU_X86_64)
- #define cpuid_bx rbx
- #else
- #define cpuid_bx ebx
- #endif
-
- asm_gcc()
- a1(push cpuid_bx)
- a2(xor ecx, ecx)
- a1(cpuid)
- a2(mov [%1 + 0], eax)
- a2(mov [%1 + 4], ebx)
- a2(mov [%1 + 8], ecx)
- a2(mov [%1 + 12], edx)
- a1(pop cpuid_bx)
- asm_gcc_parms() : "+a"(flags) : "S"(regs) : "%ecx", "%edx", "cc"
- asm_gcc_end()
-#endif
-}
-
-#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX)
-static uint64_t NOINLINE
-get_xgetbv(uint32_t flags) {
-#if defined(COMPILER_MSVC)
- return _xgetbv(flags);
-#else
- uint32_t lo, hi;
- asm_gcc()
- a1(xgetbv)
- asm_gcc_parms() : "+c"(flags), "=a" (lo), "=d" (hi)
- asm_gcc_end()
- return ((uint64_t)lo | ((uint64_t)hi << 32));
-#endif
-}
-#endif // AVX support
-
-#if defined(SCRYPT_TEST_SPEED)
-size_t cpu_detect_mask = (size_t)-1;
-#endif
-
-static size_t
-detect_cpu(void) {
- union { uint8_t s[12]; uint32_t i[3]; } vendor_string;
- //cpu_vendors_x86 vendor = cpu_nobody;
- x86_regs regs;
- uint32_t max_level, max_ext_level;
- size_t cpu_flags = 0;
-#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX)
- uint64_t xgetbv_flags;
-#endif
-
-#if defined(CPU_X86)
- if (!has_cpuid())
- return cpu_flags;
-#endif
-
- get_cpuid(®s, 0);
- max_level = regs.eax;
- vendor_string.i[0] = regs.ebx;
- vendor_string.i[1] = regs.edx;
- vendor_string.i[2] = regs.ecx;
-
- //if (scrypt_verify(vendor_string.s, (const uint8_t *)"GenuineIntel", 12))
- // vendor = cpu_intel;
- //else if (scrypt_verify(vendor_string.s, (const uint8_t *)"AuthenticAMD", 12))
- // vendor = cpu_amd;
-
- if (max_level & 0x00000500) {
- /* "Intel P5 pre-B0" */
- cpu_flags |= cpu_mmx;
- return cpu_flags;
- }
-
- if (max_level < 1)
- return cpu_flags;
-
- get_cpuid(®s, 1);
-#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX)
- /* xsave/xrestore */
- if (regs.ecx & (1 << 27)) {
- xgetbv_flags = get_xgetbv(0);
- if ((regs.ecx & (1 << 28)) && (xgetbv_flags & 0x6)) cpu_flags |= cpu_avx;
- }
-#endif
- if (regs.ecx & (1 << 20)) cpu_flags |= cpu_sse4_2;
- if (regs.ecx & (1 << 19)) cpu_flags |= cpu_sse4_2;
- if (regs.ecx & (1 << 9)) cpu_flags |= cpu_ssse3;
- if (regs.ecx & (1 )) cpu_flags |= cpu_sse3;
- if (regs.edx & (1 << 26)) cpu_flags |= cpu_sse2;
- if (regs.edx & (1 << 25)) cpu_flags |= cpu_sse;
- if (regs.edx & (1 << 23)) cpu_flags |= cpu_mmx;
-
- if (cpu_flags & cpu_avx) {
- if (max_level >= 7) {
- get_cpuid(®s, 7);
- if (regs.ebx & (1 << 5)) cpu_flags |= cpu_avx2;
- }
-
- get_cpuid(®s, 0x80000000);
- max_ext_level = regs.eax;
- if (max_ext_level >= 0x80000001) {
- get_cpuid(®s, 0x80000001);
- if (regs.ecx & (1 << 11)) cpu_flags |= cpu_xop;
- }
- }
-
-
-#if defined(SCRYPT_TEST_SPEED)
- cpu_flags &= cpu_detect_mask;
-#endif
-
- return cpu_flags;
-}
-
-#if defined(SCRYPT_TEST_SPEED)
-static const char *
-get_top_cpuflag_desc(size_t flag) {
- if (flag & cpu_avx2) return "AVX2";
- else if (flag & cpu_xop) return "XOP";
- else if (flag & cpu_avx) return "AVX";
- else if (flag & cpu_sse4_2) return "SSE4.2";
- else if (flag & cpu_sse4_1) return "SSE4.1";
- else if (flag & cpu_ssse3) return "SSSE3";
- else if (flag & cpu_sse2) return "SSE2";
- else if (flag & cpu_sse) return "SSE";
- else if (flag & cpu_mmx) return "MMX";
- else return "Basic";
-}
-#endif
-
-/* enable the highest system-wide option */
-#if defined(SCRYPT_CHOOSE_COMPILETIME)
- #if !defined(__AVX2__)
- #undef X86_64ASM_AVX2
- #undef X86ASM_AVX2
- #undef X86_INTRINSIC_AVX2
- #endif
- #if !defined(__XOP__)
- #undef X86_64ASM_XOP
- #undef X86ASM_XOP
- #undef X86_INTRINSIC_XOP
- #endif
- #if !defined(__AVX__)
- #undef X86_64ASM_AVX
- #undef X86ASM_AVX
- #undef X86_INTRINSIC_AVX
- #endif
- #if !defined(__SSSE3__)
- #undef X86_64ASM_SSSE3
- #undef X86ASM_SSSE3
- #undef X86_INTRINSIC_SSSE3
- #endif
- #if !defined(__SSE2__)
- #undef X86_64ASM_SSE2
- #undef X86ASM_SSE2
- #undef X86_INTRINSIC_SSE2
- #endif
-#endif
-
-#endif /* defined(CPU_X86) || defined(CPU_X86_64) */
diff --git a/stratum/algos/ar2/sj/scrypt-jane-portable.h b/stratum/algos/ar2/sj/scrypt-jane-portable.h
deleted file mode 100644
index f1c2d26f5..000000000
--- a/stratum/algos/ar2/sj/scrypt-jane-portable.h
+++ /dev/null
@@ -1,307 +0,0 @@
-/* determine os */
-#if defined(_WIN32) || defined(_WIN64) || defined(__TOS_WIN__) || defined(__WINDOWS__)
- #include
- #include
- #define OS_WINDOWS
-#elif defined(sun) || defined(__sun) || defined(__SVR4) || defined(__svr4__)
- #include
- #include
- #include
-
- #define OS_SOLARIS
-#else
- #include
- #include
- #include /* need this to define BSD */
- #include
- #include
-
- #define OS_NIX
- #if defined(__linux__)
- #include
- #define OS_LINUX
- #elif defined(BSD)
- #define OS_BSD
-
- #if defined(MACOS_X) || (defined(__APPLE__) & defined(__MACH__))
- #define OS_OSX
- #elif defined(macintosh) || defined(Macintosh)
- #define OS_MAC
- #elif defined(__OpenBSD__)
- #define OS_OPENBSD
- #endif
- #endif
-#endif
-
-
-/* determine compiler */
-#if defined(_MSC_VER)
- #define COMPILER_MSVC_VS6 120000000
- #define COMPILER_MSVC_VS6PP 121000000
- #define COMPILER_MSVC_VS2002 130000000
- #define COMPILER_MSVC_VS2003 131000000
- #define COMPILER_MSVC_VS2005 140050727
- #define COMPILER_MSVC_VS2008 150000000
- #define COMPILER_MSVC_VS2008SP1 150030729
- #define COMPILER_MSVC_VS2010 160000000
- #define COMPILER_MSVC_VS2010SP1 160040219
- #define COMPILER_MSVC_VS2012RC 170000000
- #define COMPILER_MSVC_VS2012 170050727
-
- #if _MSC_FULL_VER > 100000000
- #define COMPILER_MSVC (_MSC_FULL_VER)
- #else
- #define COMPILER_MSVC (_MSC_FULL_VER * 10)
- #endif
-
- #if ((_MSC_VER == 1200) && defined(_mm_free))
- #undef COMPILER_MSVC
- #define COMPILER_MSVC COMPILER_MSVC_VS6PP
- #endif
-
- #pragma warning(disable : 4127) /* conditional expression is constant */
- #pragma warning(disable : 4100) /* unreferenced formal parameter */
-
- #define _CRT_SECURE_NO_WARNINGS
- #include
- #include /* _rotl */
- #include
-
- typedef unsigned char uint8_t;
- typedef unsigned short uint16_t;
- typedef unsigned int uint32_t;
- typedef signed int int32_t;
- typedef unsigned __int64 uint64_t;
- typedef signed __int64 int64_t;
-
- #define ROTL32(a,b) _rotl(a,b)
- #define ROTR32(a,b) _rotr(a,b)
- #define ROTL64(a,b) _rotl64(a,b)
- #define ROTR64(a,b) _rotr64(a,b)
- #undef NOINLINE
- #define NOINLINE __declspec(noinline)
- #undef NORETURN
- #define NORETURN
- #undef INLINE
- #define INLINE __forceinline
- #undef FASTCALL
- #define FASTCALL __fastcall
- #undef CDECL
- #define CDECL __cdecl
- #undef STDCALL
- #define STDCALL __stdcall
- #undef NAKED
- #define NAKED __declspec(naked)
- #define ALIGN(n) __declspec(align(n))
-#endif
-#if defined(__ICC)
- #define COMPILER_INTEL
-#endif
-#if defined(__GNUC__)
- #if (__GNUC__ >= 3)
- #define COMPILER_GCC_PATCHLEVEL __GNUC_PATCHLEVEL__
- #else
- #define COMPILER_GCC_PATCHLEVEL 0
- #endif
- #define COMPILER_GCC (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + COMPILER_GCC_PATCHLEVEL)
- #define ROTL32(a,b) (((a) << (b)) | ((a) >> (32 - b)))
- #define ROTR32(a,b) (((a) >> (b)) | ((a) << (32 - b)))
- #define ROTL64(a,b) (((a) << (b)) | ((a) >> (64 - b)))
- #define ROTR64(a,b) (((a) >> (b)) | ((a) << (64 - b)))
- #undef NOINLINE
- #if (COMPILER_GCC >= 30000)
- #define NOINLINE __attribute__((noinline))
- #else
- #define NOINLINE
- #endif
- #undef NORETURN
- #if (COMPILER_GCC >= 30000)
- #define NORETURN __attribute__((noreturn))
- #else
- #define NORETURN
- #endif
- #undef INLINE
- #if (COMPILER_GCC >= 30000)
- #define INLINE __attribute__((always_inline))
- #else
- #define INLINE inline
- #endif
- #undef FASTCALL
- #if (COMPILER_GCC >= 30400)
- #define FASTCALL __attribute__((fastcall))
- #else
- #define FASTCALL
- #endif
- #undef CDECL
- #define CDECL __attribute__((cdecl))
- #undef STDCALL
- #define STDCALL __attribute__((stdcall))
- #define ALIGN(n) __attribute__((aligned(n)))
- #include
-#endif
-#if defined(__MINGW32__) || defined(__MINGW64__)
- #define COMPILER_MINGW
-#endif
-#if defined(__PATHCC__)
- #define COMPILER_PATHCC
-#endif
-
-#define OPTIONAL_INLINE
-#if defined(OPTIONAL_INLINE)
- #undef OPTIONAL_INLINE
- #define OPTIONAL_INLINE INLINE
-#else
- #define OPTIONAL_INLINE
-#endif
-
-#define CRYPTO_FN NOINLINE STDCALL
-
-/* determine cpu */
-#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__ ) || defined(_M_X64)
- #define CPU_X86_64
-#elif defined(__i586__) || defined(__i686__) || (defined(_M_IX86) && (_M_IX86 >= 500))
- #define CPU_X86 500
-#elif defined(__i486__) || (defined(_M_IX86) && (_M_IX86 >= 400))
- #define CPU_X86 400
-#elif defined(__i386__) || (defined(_M_IX86) && (_M_IX86 >= 300)) || defined(__X86__) || defined(_X86_) || defined(__I86__)
- #define CPU_X86 300
-#elif defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(_M_IA64) || defined(__ia64)
- #define CPU_IA64
-#endif
-
-#if defined(__sparc__) || defined(__sparc) || defined(__sparcv9)
- #define CPU_SPARC
- #if defined(__sparcv9)
- #define CPU_SPARC64
- #endif
-#endif
-
-#if defined(CPU_X86_64) || defined(CPU_IA64) || defined(CPU_SPARC64) || defined(__64BIT__) || defined(__LP64__) || defined(_LP64) || (defined(_MIPS_SZLONG) && (_MIPS_SZLONG == 64))
- #define CPU_64BITS
- #undef FASTCALL
- #define FASTCALL
- #undef CDECL
- #define CDECL
- #undef STDCALL
- #define STDCALL
-#endif
-
-#if defined(powerpc) || defined(__PPC__) || defined(__ppc__) || defined(_ARCH_PPC) || defined(__powerpc__) || defined(__powerpc) || defined(POWERPC) || defined(_M_PPC)
- #define CPU_PPC
- #if defined(_ARCH_PWR7)
- #define CPU_POWER7
- #elif defined(__64BIT__)
- #define CPU_PPC64
- #else
- #define CPU_PPC32
- #endif
-#endif
-
-#if defined(__hppa__) || defined(__hppa)
- #define CPU_HPPA
-#endif
-
-#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA)
- #define CPU_ALPHA
-#endif
-
-/* endian */
-
-#if ((defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN)) || \
- (defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && (BYTE_ORDER == LITTLE_ENDIAN)) || \
- (defined(CPU_X86) || defined(CPU_X86_64)) || \
- (defined(vax) || defined(MIPSEL) || defined(_MIPSEL)))
-#define CPU_LE
-#elif ((defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN)) || \
- (defined(BYTE_ORDER) && defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN)) || \
- (defined(CPU_SPARC) || defined(CPU_PPC) || defined(mc68000) || defined(sel)) || defined(_MIPSEB))
-#define CPU_BE
-#else
- /* unknown endian! */
-#endif
-
-
-#define U8TO32_BE(p) \
- (((uint32_t)((p)[0]) << 24) | ((uint32_t)((p)[1]) << 16) | \
- ((uint32_t)((p)[2]) << 8) | ((uint32_t)((p)[3]) ))
-
-#define U8TO32_LE(p) \
- (((uint32_t)((p)[0]) ) | ((uint32_t)((p)[1]) << 8) | \
- ((uint32_t)((p)[2]) << 16) | ((uint32_t)((p)[3]) << 24))
-
-#define U32TO8_BE(p, v) \
- (p)[0] = (uint8_t)((v) >> 24); (p)[1] = (uint8_t)((v) >> 16); \
- (p)[2] = (uint8_t)((v) >> 8); (p)[3] = (uint8_t)((v) );
-
-#define U32TO8_LE(p, v) \
- (p)[0] = (uint8_t)((v) ); (p)[1] = (uint8_t)((v) >> 8); \
- (p)[2] = (uint8_t)((v) >> 16); (p)[3] = (uint8_t)((v) >> 24);
-
-#define U8TO64_BE(p) \
- (((uint64_t)U8TO32_BE(p) << 32) | (uint64_t)U8TO32_BE((p) + 4))
-
-#define U8TO64_LE(p) \
- (((uint64_t)U8TO32_LE(p)) | ((uint64_t)U8TO32_LE((p) + 4) << 32))
-
-#define U64TO8_BE(p, v) \
- U32TO8_BE((p), (uint32_t)((v) >> 32)); \
- U32TO8_BE((p) + 4, (uint32_t)((v) ));
-
-#define U64TO8_LE(p, v) \
- U32TO8_LE((p), (uint32_t)((v) )); \
- U32TO8_LE((p) + 4, (uint32_t)((v) >> 32));
-
-#define U32_SWAP(v) { \
- (v) = (((v) << 8) & 0xFF00FF00 ) | (((v) >> 8) & 0xFF00FF ); \
- (v) = ((v) << 16) | ((v) >> 16); \
-}
-
-#define U64_SWAP(v) { \
- (v) = (((v) << 8) & 0xFF00FF00FF00FF00ull ) | (((v) >> 8) & 0x00FF00FF00FF00FFull ); \
- (v) = (((v) << 16) & 0xFFFF0000FFFF0000ull ) | (((v) >> 16) & 0x0000FFFF0000FFFFull ); \
- (v) = ((v) << 32) | ((v) >> 32); \
-}
-
-static int
-scrypt_verify(const uint8_t *x, const uint8_t *y, size_t len) {
- uint32_t differentbits = 0;
- while (len--)
- differentbits |= (*x++ ^ *y++);
- return (1 & ((differentbits - 1) >> 8));
-}
-
-static void
-scrypt_ensure_zero(void *p, size_t len) {
-#if ((defined(CPU_X86) || defined(CPU_X86_64)) && defined(COMPILER_MSVC))
- __stosb((unsigned char *)p, 0, len);
-#elif (defined(CPU_X86) && defined(COMPILER_GCC))
- __asm__ __volatile__(
- "pushl %%edi;\n"
- "pushl %%ecx;\n"
- "rep stosb;\n"
- "popl %%ecx;\n"
- "popl %%edi;\n"
- :: "a"(0), "D"(p), "c"(len) : "cc", "memory"
- );
-#elif (defined(CPU_X86_64) && defined(COMPILER_GCC))
- __asm__ __volatile__(
- "pushq %%rdi;\n"
- "pushq %%rcx;\n"
- "rep stosb;\n"
- "popq %%rcx;\n"
- "popq %%rdi;\n"
- :: "a"(0), "D"(p), "c"(len) : "cc", "memory"
- );
-#else
- volatile uint8_t *b = (volatile uint8_t *)p;
- size_t i;
- for (i = 0; i < len; i++)
- b[i] = 0;
-#endif
-}
-
-#include "scrypt-jane-portable-x86.h"
-
-#if !defined(asm_calling_convention)
-#define asm_calling_convention
-#endif
diff --git a/stratum/algos/ar2/sj/scrypt-jane-romix-basic.h b/stratum/algos/ar2/sj/scrypt-jane-romix-basic.h
deleted file mode 100644
index 57ba649f9..000000000
--- a/stratum/algos/ar2/sj/scrypt-jane-romix-basic.h
+++ /dev/null
@@ -1,74 +0,0 @@
-#if !defined(SCRYPT_CHOOSE_COMPILETIME)
-/* function type returned by scrypt_getROMix, used with cpu detection */
-typedef void (FASTCALL *scrypt_ROMixfn)(scrypt_mix_word_t *X/*[chunkWords]*/, scrypt_mix_word_t *Y/*[chunkWords]*/, scrypt_mix_word_t *V/*[chunkWords * N]*/, uint32_t N, uint32_t r);
-#endif
-
-/* romix pre/post nop function */
-static void asm_calling_convention
-scrypt_romix_nop(scrypt_mix_word_t *blocks, size_t nblocks) {
- (void)blocks; (void)nblocks;
-}
-
-/* romix pre/post endian conversion function */
-static void asm_calling_convention
-scrypt_romix_convert_endian(scrypt_mix_word_t *blocks, size_t nblocks) {
-#if !defined(CPU_LE)
- static const union { uint8_t b[2]; uint16_t w; } endian_test = {{1,0}};
- size_t i;
- if (endian_test.w == 0x100) {
- nblocks *= SCRYPT_BLOCK_WORDS;
- for (i = 0; i < nblocks; i++) {
- SCRYPT_WORD_ENDIAN_SWAP(blocks[i]);
- }
- }
-#else
- (void)blocks; (void)nblocks;
-#endif
-}
-
-/* chunkmix test function */
-typedef void (asm_calling_convention *chunkmixfn)(scrypt_mix_word_t *Bout/*[chunkWords]*/, scrypt_mix_word_t *Bin/*[chunkWords]*/, scrypt_mix_word_t *Bxor/*[chunkWords]*/, uint32_t r);
-typedef void (asm_calling_convention *blockfixfn)(scrypt_mix_word_t *blocks, size_t nblocks);
-
-static int
-scrypt_test_mix_instance(chunkmixfn mixfn, blockfixfn prefn, blockfixfn postfn, const uint8_t expected[16]) {
- /* r = 2, (2 * r) = 4 blocks in a chunk, 4 * SCRYPT_BLOCK_WORDS total */
- const uint32_t r = 2, blocks = 2 * r, words = blocks * SCRYPT_BLOCK_WORDS;
-#if (defined(X86ASM_AVX2) || defined(X86_64ASM_AVX2) || defined(X86_INTRINSIC_AVX2))
- scrypt_mix_word_t ALIGN(32) chunk[2][4 * SCRYPT_BLOCK_WORDS], v;
-#else
- scrypt_mix_word_t ALIGN(16) chunk[2][4 * SCRYPT_BLOCK_WORDS], v;
-#endif
- uint8_t final[16];
- size_t i;
-
- for (i = 0; i < words; i++) {
- v = (scrypt_mix_word_t)i;
- v = (v << 8) | v;
- v = (v << 16) | v;
- chunk[0][i] = v;
- }
-
- prefn(chunk[0], blocks);
- mixfn(chunk[1], chunk[0], NULL, r);
- postfn(chunk[1], blocks);
-
- /* grab the last 16 bytes of the final block */
- for (i = 0; i < 16; i += sizeof(scrypt_mix_word_t)) {
- SCRYPT_WORDTO8_LE(final + i, chunk[1][words - (16 / sizeof(scrypt_mix_word_t)) + (i / sizeof(scrypt_mix_word_t))]);
- }
-
- return scrypt_verify(expected, final, 16);
-}
-
-/* returns a pointer to item i, where item is len scrypt_mix_word_t's long */
-static scrypt_mix_word_t *
-scrypt_item(scrypt_mix_word_t *base, scrypt_mix_word_t i, scrypt_mix_word_t len) {
- return base + (i * len);
-}
-
-/* returns a pointer to block i */
-static scrypt_mix_word_t *
-scrypt_block(scrypt_mix_word_t *base, scrypt_mix_word_t i) {
- return base + (i * SCRYPT_BLOCK_WORDS);
-}
diff --git a/stratum/algos/ar2/sj/scrypt-jane-romix-template.h b/stratum/algos/ar2/sj/scrypt-jane-romix-template.h
deleted file mode 100644
index 373ae6048..000000000
--- a/stratum/algos/ar2/sj/scrypt-jane-romix-template.h
+++ /dev/null
@@ -1,122 +0,0 @@
-#if !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_HAVE_ROMIX)
-
-#if defined(SCRYPT_CHOOSE_COMPILETIME)
-#undef SCRYPT_ROMIX_FN
-#define SCRYPT_ROMIX_FN scrypt_ROMix
-#endif
-
-#undef SCRYPT_HAVE_ROMIX
-#define SCRYPT_HAVE_ROMIX
-
-#if !defined(SCRYPT_CHUNKMIX_FN)
-
-#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_basic
-
-/*
- Bout = ChunkMix(Bin)
-
- 2*r: number of blocks in the chunk
-*/
-static void asm_calling_convention
-SCRYPT_CHUNKMIX_FN(scrypt_mix_word_t *Bout/*[chunkWords]*/, scrypt_mix_word_t *Bin/*[chunkWords]*/, scrypt_mix_word_t *Bxor/*[chunkWords]*/, uint32_t r) {
-#if (defined(X86ASM_AVX2) || defined(X86_64ASM_AVX2) || defined(X86_INTRINSIC_AVX2))
- scrypt_mix_word_t ALIGN(32) X[SCRYPT_BLOCK_WORDS], *block;
-#else
- scrypt_mix_word_t ALIGN(16) X[SCRYPT_BLOCK_WORDS], *block;
-#endif
- uint32_t i, j, blocksPerChunk = /*r * 2*/2, half = 0;
-
- /* 1: X = B_{2r - 1} */
- block = scrypt_block(Bin, blocksPerChunk - 1);
- for (i = 0; i < SCRYPT_BLOCK_WORDS; i++)
- X[i] = block[i];
-
- if (Bxor) {
- block = scrypt_block(Bxor, blocksPerChunk - 1);
- for (i = 0; i < SCRYPT_BLOCK_WORDS; i++)
- X[i] ^= block[i];
- }
-
- /* 2: for i = 0 to 2r - 1 do */
- for (i = 0; i < blocksPerChunk; i++, half ^= /*r*/1) {
- /* 3: X = H(X ^ B_i) */
- block = scrypt_block(Bin, i);
- for (j = 0; j < SCRYPT_BLOCK_WORDS; j++)
- X[j] ^= block[j];
-
- if (Bxor) {
- block = scrypt_block(Bxor, i);
- for (j = 0; j < SCRYPT_BLOCK_WORDS; j++)
- X[j] ^= block[j];
- }
- SCRYPT_MIX_FN(X);
-
- /* 4: Y_i = X */
- /* 6: B'[0..r-1] = Y_even */
- /* 6: B'[r..2r-1] = Y_odd */
- block = scrypt_block(Bout, (i / 2) + half);
- for (j = 0; j < SCRYPT_BLOCK_WORDS; j++)
- block[j] = X[j];
- }
-}
-#endif
-
-/*
- X = ROMix(X)
-
- X: chunk to mix
- Y: scratch chunk
- N: number of rounds
- V[N]: array of chunks to randomly index in to
- 2*r: number of blocks in a chunk
-*/
-
-static void NOINLINE FASTCALL
-SCRYPT_ROMIX_FN(scrypt_mix_word_t *X/*[chunkWords]*/, scrypt_mix_word_t *Y/*[chunkWords]*/, scrypt_mix_word_t *V/*[N * chunkWords]*/, uint32_t N, uint32_t r) {
- uint32_t i, j, chunkWords = (uint32_t)(SCRYPT_BLOCK_WORDS * 2);
- scrypt_mix_word_t *block = V;
-
- SCRYPT_ROMIX_TANGLE_FN(X, 2);
-
- /* 1: X = B */
- /* implicit */
-
- /* 2: for i = 0 to N - 1 do */
- memcpy(block, X, chunkWords * sizeof(scrypt_mix_word_t));
- for (i = 0; i < /*N - 1*/511; i++, block += chunkWords) {
- /* 3: V_i = X */
- /* 4: X = H(X) */
- SCRYPT_CHUNKMIX_FN(block + chunkWords, block, NULL, /*r*/1);
- }
- SCRYPT_CHUNKMIX_FN(X, block, NULL, 1);
-
- /* 6: for i = 0 to N - 1 do */
- for (i = 0; i < /*N*/512; i += 2) {
- /* 7: j = Integerify(X) % N */
- j = X[chunkWords - SCRYPT_BLOCK_WORDS] & /*(N - 1)*/511;
-
- /* 8: X = H(Y ^ V_j) */
- SCRYPT_CHUNKMIX_FN(Y, X, scrypt_item(V, j, chunkWords), 1);
-
- /* 7: j = Integerify(Y) % N */
- j = Y[chunkWords - SCRYPT_BLOCK_WORDS] & /*(N - 1)*/511;
-
- /* 8: X = H(Y ^ V_j) */
- SCRYPT_CHUNKMIX_FN(X, Y, scrypt_item(V, j, chunkWords), 1);
- }
-
- /* 10: B' = X */
- /* implicit */
-
- SCRYPT_ROMIX_UNTANGLE_FN(X, 2);
-}
-
-#endif /* !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_HAVE_ROMIX) */
-
-
-#undef SCRYPT_CHUNKMIX_FN
-#undef SCRYPT_ROMIX_FN
-#undef SCRYPT_MIX_FN
-#undef SCRYPT_ROMIX_TANGLE_FN
-#undef SCRYPT_ROMIX_UNTANGLE_FN
-
diff --git a/stratum/algos/ar2/sj/scrypt-jane-romix.h b/stratum/algos/ar2/sj/scrypt-jane-romix.h
deleted file mode 100644
index cf4ac2f9e..000000000
--- a/stratum/algos/ar2/sj/scrypt-jane-romix.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifdef SCRYPT_SALSA64
-#include "scrypt-jane-salsa64.h"
-#else
- #define SCRYPT_MIX_BASE "ERROR"
- typedef uint32_t scrypt_mix_word_t;
- #define SCRYPT_WORDTO8_LE U32TO8_LE
- #define SCRYPT_WORD_ENDIAN_SWAP U32_SWAP
- #define SCRYPT_BLOCK_BYTES 64
- #define SCRYPT_BLOCK_WORDS (SCRYPT_BLOCK_BYTES / sizeof(scrypt_mix_word_t))
- #if !defined(SCRYPT_CHOOSE_COMPILETIME)
- static void FASTCALL scrypt_ROMix_error(scrypt_mix_word_t *X/*[chunkWords]*/, scrypt_mix_word_t *Y/*[chunkWords]*/, scrypt_mix_word_t *V/*[chunkWords * N]*/, uint32_t N, uint32_t r) {}
- static scrypt_ROMixfn scrypt_getROMix(void) { return scrypt_ROMix_error; }
- #else
- static void FASTCALL scrypt_ROMix(scrypt_mix_word_t *X, scrypt_mix_word_t *Y, scrypt_mix_word_t *V, uint32_t N, uint32_t r) {}
- #endif
- static int scrypt_test_mix(void) { return 0; }
- #error must define a mix function!
-#endif
-
-#if !defined(SCRYPT_CHOOSE_COMPILETIME)
-#undef SCRYPT_MIX
-#define SCRYPT_MIX SCRYPT_MIX_BASE
-#endif
diff --git a/stratum/algos/ar2/sj/scrypt-jane-salsa64.h b/stratum/algos/ar2/sj/scrypt-jane-salsa64.h
deleted file mode 100644
index 96b781360..000000000
--- a/stratum/algos/ar2/sj/scrypt-jane-salsa64.h
+++ /dev/null
@@ -1,183 +0,0 @@
-#define SCRYPT_MIX_BASE "Salsa64/8"
-
-typedef uint64_t scrypt_mix_word_t;
-
-#define SCRYPT_WORDTO8_LE U64TO8_LE
-#define SCRYPT_WORD_ENDIAN_SWAP U64_SWAP
-
-#define SCRYPT_BLOCK_BYTES 128
-#define SCRYPT_BLOCK_WORDS (SCRYPT_BLOCK_BYTES / sizeof(scrypt_mix_word_t))
-
-/* must have these here in case block bytes is ever != 64 */
-#include "scrypt-jane-romix-basic.h"
-
-#include "scrypt-jane-mix_salsa64-avx2.h"
-#include "scrypt-jane-mix_salsa64-xop.h"
-#include "scrypt-jane-mix_salsa64-avx.h"
-#include "scrypt-jane-mix_salsa64-ssse3.h"
-#include "scrypt-jane-mix_salsa64-sse2.h"
-#include "scrypt-jane-mix_salsa64.h"
-
-#if defined(SCRYPT_SALSA64_AVX2)
- #define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_avx2
- #define SCRYPT_ROMIX_FN scrypt_ROMix_avx2
- #define SCRYPT_ROMIX_TANGLE_FN salsa64_core_tangle_sse2
- #define SCRYPT_ROMIX_UNTANGLE_FN salsa64_core_tangle_sse2
- #include "scrypt-jane-romix-template.h"
-#endif
-
-#if defined(SCRYPT_SALSA64_XOP)
- #define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_xop
- #define SCRYPT_ROMIX_FN scrypt_ROMix_xop
- #define SCRYPT_ROMIX_TANGLE_FN salsa64_core_tangle_sse2
- #define SCRYPT_ROMIX_UNTANGLE_FN salsa64_core_tangle_sse2
- #include "scrypt-jane-romix-template.h"
-#endif
-
-#if defined(SCRYPT_SALSA64_AVX)
- #define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_avx
- #define SCRYPT_ROMIX_FN scrypt_ROMix_avx
- #define SCRYPT_ROMIX_TANGLE_FN salsa64_core_tangle_sse2
- #define SCRYPT_ROMIX_UNTANGLE_FN salsa64_core_tangle_sse2
- #include "scrypt-jane-romix-template.h"
-#endif
-
-#if defined(SCRYPT_SALSA64_SSSE3)
- #define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_ssse3
- #define SCRYPT_ROMIX_FN scrypt_ROMix_ssse3
- #define SCRYPT_ROMIX_TANGLE_FN salsa64_core_tangle_sse2
- #define SCRYPT_ROMIX_UNTANGLE_FN salsa64_core_tangle_sse2
- #include "scrypt-jane-romix-template.h"
-#endif
-
-#if defined(SCRYPT_SALSA64_SSE2)
- #define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_sse2
- #define SCRYPT_ROMIX_FN scrypt_ROMix_sse2
- #define SCRYPT_ROMIX_TANGLE_FN salsa64_core_tangle_sse2
- #define SCRYPT_ROMIX_UNTANGLE_FN salsa64_core_tangle_sse2
- #include "scrypt-jane-romix-template.h"
-#endif
-
-/* cpu agnostic */
-#define SCRYPT_ROMIX_FN scrypt_ROMix_basic
-#define SCRYPT_MIX_FN salsa64_core_basic
-#define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_convert_endian
-#define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_convert_endian
-#include "scrypt-jane-romix-template.h"
-
-#if !defined(SCRYPT_CHOOSE_COMPILETIME)
-static scrypt_ROMixfn
-scrypt_getROMix(void) {
- size_t cpuflags = detect_cpu();
-
-#if defined(SCRYPT_SALSA64_AVX2)
- if (cpuflags & cpu_avx2)
- return scrypt_ROMix_avx2;
- else
-#endif
-
-#if defined(SCRYPT_SALSA64_XOP)
- if (cpuflags & cpu_xop)
- return scrypt_ROMix_xop;
- else
-#endif
-
-#if defined(SCRYPT_SALSA64_AVX)
- if (cpuflags & cpu_avx)
- return scrypt_ROMix_avx;
- else
-#endif
-
-#if defined(SCRYPT_SALSA64_SSSE3)
- if (cpuflags & cpu_ssse3)
- return scrypt_ROMix_ssse3;
- else
-#endif
-
-#if defined(SCRYPT_SALSA64_SSE2)
- if (cpuflags & cpu_sse2)
- return scrypt_ROMix_sse2;
- else
-#endif
-
- return scrypt_ROMix_basic;
-}
-#endif
-
-
-#if defined(SCRYPT_TEST_SPEED)
-static size_t
-available_implementations(void) {
- size_t cpuflags = detect_cpu();
- size_t flags = 0;
-
-#if defined(SCRYPT_SALSA64_AVX2)
- if (cpuflags & cpu_avx2)
- flags |= cpu_avx2;
-#endif
-
-#if defined(SCRYPT_SALSA64_XOP)
- if (cpuflags & cpu_xop)
- flags |= cpu_xop;
-#endif
-
-#if defined(SCRYPT_SALSA64_AVX)
- if (cpuflags & cpu_avx)
- flags |= cpu_avx;
-#endif
-
-#if defined(SCRYPT_SALSA64_SSSE3)
- if (cpuflags & cpu_ssse3)
- flags |= cpu_ssse3;
-#endif
-
-#if defined(SCRYPT_SALSA64_SSE2)
- if (cpuflags & cpu_sse2)
- flags |= cpu_sse2;
-#endif
-
- return flags;
-}
-#endif
-
-static int
-scrypt_test_mix(void) {
- static const uint8_t expected[16] = {
- 0xf8,0x92,0x9b,0xf8,0xcc,0x1d,0xce,0x2e,0x13,0x82,0xac,0x96,0xb2,0x6c,0xee,0x2c,
- };
-
- int ret = 1;
- size_t cpuflags = detect_cpu();
-
-#if defined(SCRYPT_SALSA64_AVX2)
- if (cpuflags & cpu_avx2)
- ret &= scrypt_test_mix_instance(scrypt_ChunkMix_avx2, salsa64_core_tangle_sse2, salsa64_core_tangle_sse2, expected);
-#endif
-
-#if defined(SCRYPT_SALSA64_XOP)
- if (cpuflags & cpu_xop)
- ret &= scrypt_test_mix_instance(scrypt_ChunkMix_xop, salsa64_core_tangle_sse2, salsa64_core_tangle_sse2, expected);
-#endif
-
-#if defined(SCRYPT_SALSA64_AVX)
- if (cpuflags & cpu_avx)
- ret &= scrypt_test_mix_instance(scrypt_ChunkMix_avx, salsa64_core_tangle_sse2, salsa64_core_tangle_sse2, expected);
-#endif
-
-#if defined(SCRYPT_SALSA64_SSSE3)
- if (cpuflags & cpu_ssse3)
- ret &= scrypt_test_mix_instance(scrypt_ChunkMix_ssse3, salsa64_core_tangle_sse2, salsa64_core_tangle_sse2, expected);
-#endif
-
-#if defined(SCRYPT_SALSA64_SSE2)
- if (cpuflags & cpu_sse2)
- ret &= scrypt_test_mix_instance(scrypt_ChunkMix_sse2, salsa64_core_tangle_sse2, salsa64_core_tangle_sse2, expected);
-#endif
-
-#if defined(SCRYPT_SALSA64_BASIC)
- ret &= scrypt_test_mix_instance(scrypt_ChunkMix_basic, scrypt_romix_convert_endian, scrypt_romix_convert_endian, expected);
-#endif
-
- return ret;
-}
-
diff --git a/stratum/algos/ar2/sj/scrypt-jane-test-vectors.h b/stratum/algos/ar2/sj/scrypt-jane-test-vectors.h
deleted file mode 100644
index 20fd0cf23..000000000
--- a/stratum/algos/ar2/sj/scrypt-jane-test-vectors.h
+++ /dev/null
@@ -1,28 +0,0 @@
-typedef struct scrypt_test_setting_t {
- const char *pw, *salt;
- uint8_t Nfactor, rfactor, pfactor;
-} scrypt_test_setting;
-
-static const scrypt_test_setting post_settings[] = {
- {"", "", 3, 0, 0},
- {"password", "NaCl", 9, 3, 4},
- {0, 0, 0, 0, 0}
-};
-
-#if defined(SCRYPT_SKEIN512)
- #ifdef SCRYPT_SALSA64
- static const uint8_t post_vectors[][64] = {
- {0xd2,0xad,0x32,0x05,0xee,0x80,0xe3,0x44,0x70,0xc6,0x34,0xde,0x05,0xb6,0xcf,0x60,
- 0x89,0x98,0x70,0xc0,0xb8,0xf5,0x54,0xf1,0xa6,0xb2,0xc8,0x76,0x34,0xec,0xc4,0x59,
- 0x8e,0x64,0x42,0xd0,0xa9,0xed,0xe7,0x19,0xb2,0x8a,0x11,0xc6,0xa6,0xbf,0xa7,0xa9,
- 0x4e,0x44,0x32,0x7e,0x12,0x91,0x9d,0xfe,0x52,0x48,0xa8,0x27,0xb3,0xfc,0xb1,0x89},
- {0xd6,0x67,0xd2,0x3e,0x30,0x1e,0x9d,0xe2,0x55,0x68,0x17,0x3d,0x2b,0x75,0x5a,0xe5,
- 0x04,0xfb,0x3d,0x0e,0x86,0xe0,0xaa,0x1d,0xd4,0x72,0xda,0xb0,0x79,0x41,0xb7,0x99,
- 0x68,0xe5,0xd9,0x55,0x79,0x7d,0xc3,0xd1,0xa6,0x56,0xc1,0xbe,0x0b,0x6c,0x62,0x23,
- 0x66,0x67,0x91,0x47,0x99,0x13,0x6b,0xe3,0xda,0x59,0x55,0x18,0x67,0x8f,0x2e,0x3b}
- };
- #endif
-#else
- static const uint8_t post_vectors[][64] = {{0}};
-#endif
-
diff --git a/stratum/algos/argon2a.c b/stratum/algos/argon2a.c
deleted file mode 100644
index e32b0b8f1..000000000
--- a/stratum/algos/argon2a.c
+++ /dev/null
@@ -1,45 +0,0 @@
-#include
-#include
-#include
-#include
-
-#include "sysendian.h"
-
-#include "argon2a.h"
-#include "ar2/argon2.h"
-#include "ar2/core.h"
-#include "ar2/ar2-scrypt-jane.h"
-
-#define _ALIGN(x) __attribute__ ((aligned(x)))
-
-#define T_COSTS 2
-#define M_COSTS 16
-#define MASK 8
-#define ZERO 0
-
-inline void argon_call(void *out, void *in, void *salt, int type)
-{
- argon2_context context = { 0 };
-
- context.out = (uint8_t *)out;
- context.pwd = (uint8_t *)in;
- context.salt = (uint8_t *)salt;
-
- argon2_ctx(&context, type);
-}
-
-void argon2a_hash(const char* input, char* output, uint32_t len)
-{
- uint32_t _ALIGN(32) hashA[8], hashB[8];
-
- my_scrypt((unsigned char *)input, len,
- (unsigned char *)input, len,
- (unsigned char *)hashA);
-
- argon_call(hashB, hashA, hashA, (hashA[0] & MASK) == ZERO);
-
- my_scrypt((const unsigned char *)hashB, 32,
- (const unsigned char *)hashB, 32,
- (unsigned char *)output);
-}
-
diff --git a/stratum/algos/argon2d-dyn.c b/stratum/algos/argon2d-dyn.c
deleted file mode 100644
index c9c6a5677..000000000
--- a/stratum/algos/argon2d-dyn.c
+++ /dev/null
@@ -1,43 +0,0 @@
-#include
-#include
-#include
-#include
-
-#include "sysendian.h"
-
-#include "ar2/argon2.h"
-#include "ar2/core.h"
-
-static const size_t INPUT_BYTES = 80; // Lenth of a block header in bytes. Input Length = Salt Length (salt = input)
-static const size_t OUTPUT_BYTES = 32; // Length of output needed for a 256-bit hash
-static const unsigned int DEFAULT_ARGON2_FLAG = 2; //Same as ARGON2_DEFAULT_FLAGS
-
-void argon2d_call(const void *input, void *output)
-{
- argon2_context context;
- context.out = (uint8_t *)output;
- context.outlen = (uint32_t)OUTPUT_BYTES;
- context.pwd = (uint8_t *)input;
- context.pwdlen = (uint32_t)INPUT_BYTES;
- context.salt = (uint8_t *)input; //salt = input
- context.saltlen = (uint32_t)INPUT_BYTES;
- context.secret = NULL;
- context.secretlen = 0;
- context.ad = NULL;
- context.adlen = 0;
- context.allocate_cbk = NULL;
- context.free_cbk = NULL;
- context.flags = DEFAULT_ARGON2_FLAG; // = ARGON2_DEFAULT_FLAGS
- // main configurable Argon2 hash parameters
- context.m_cost = 500; // Memory in KiB (512KB)
- context.lanes = 8; // Degree of Parallelism
- context.threads = 1; // Threads
- context.t_cost = 2; // Iterations
-
- argon2_ctx(&context, Argon2_d);
-}
-
-void argon2d_dyn_hash(const unsigned char* input, unsigned char* output, unsigned int len)
-{
- argon2d_call(input, output);
-}
\ No newline at end of file
diff --git a/stratum/algos/argon2d.c b/stratum/algos/argon2d.c
new file mode 100644
index 000000000..d9782791a
--- /dev/null
+++ b/stratum/algos/argon2d.c
@@ -0,0 +1,103 @@
+#include
+#include
+#include
+#include
+
+#include "sysendian.h"
+
+#include "ar2/argon2.h"
+#include "ar2/core.h"
+
+static const size_t INPUT_BYTES = 80; // Lenth of a block header in bytes. Input Length = Salt Length (salt = input)
+static const size_t OUTPUT_BYTES = 32; // Length of output needed for a 256-bit hash
+static const unsigned int DEFAULT_ARGON2_FLAG = 2; //Same as ARGON2_DEFAULT_FLAGS
+
+void argon2d_crds_call(const void *input, void *output)
+{
+ argon2_context context;
+ context.out = (uint8_t *)output;
+ context.outlen = (uint32_t)OUTPUT_BYTES;
+ context.pwd = (uint8_t *)input;
+ context.pwdlen = (uint32_t)INPUT_BYTES;
+ context.salt = (uint8_t *)input; //salt = input
+ context.saltlen = (uint32_t)INPUT_BYTES;
+ context.secret = NULL;
+ context.secretlen = 0;
+ context.ad = NULL;
+ context.adlen = 0;
+ context.allocate_cbk = NULL;
+ context.free_cbk = NULL;
+ context.flags = DEFAULT_ARGON2_FLAG; // = ARGON2_DEFAULT_FLAGS
+ // main configurable Argon2 hash parameters
+ context.m_cost = 250; // Memory in KiB (250KB)
+ context.lanes = 4; // Degree of Parallelism
+ context.threads = 1; // Threads
+ context.t_cost = 1; // Iterations
+
+ argon2_ctx(&context, Argon2_d);
+}
+void argon2d_dyn_call(const void *input, void *output)
+{
+ argon2_context context;
+ context.out = (uint8_t *)output;
+ context.outlen = (uint32_t)OUTPUT_BYTES;
+ context.pwd = (uint8_t *)input;
+ context.pwdlen = (uint32_t)INPUT_BYTES;
+ context.salt = (uint8_t *)input; //salt = input
+ context.saltlen = (uint32_t)INPUT_BYTES;
+ context.secret = NULL;
+ context.secretlen = 0;
+ context.ad = NULL;
+ context.adlen = 0;
+ context.allocate_cbk = NULL;
+ context.free_cbk = NULL;
+ context.flags = DEFAULT_ARGON2_FLAG; // = ARGON2_DEFAULT_FLAGS
+ // main configurable Argon2 hash parameters
+ context.m_cost = 500; // Memory in KiB (512KB)
+ context.lanes = 8; // Degree of Parallelism
+ context.threads = 1; // Threads
+ context.t_cost = 2; // Iterations
+
+ argon2_ctx(&context, Argon2_d);
+}
+void argon2d16000_call(const void *input, void *output)
+{
+ argon2_context context;
+ context.out = (uint8_t *)output;
+ context.outlen = (uint32_t)OUTPUT_BYTES;
+ context.pwd = (uint8_t *)input;
+ context.pwdlen = (uint32_t)INPUT_BYTES;
+ context.salt = (uint8_t *)input; //salt = input
+ context.saltlen = (uint32_t)INPUT_BYTES;
+ context.secret = NULL;
+ context.secretlen = 0;
+ context.ad = NULL;
+ context.adlen = 0;
+ context.allocate_cbk = NULL;
+ context.free_cbk = NULL;
+ context.flags = DEFAULT_ARGON2_FLAG; // = ARGON2_DEFAULT_FLAGS
+ // main configurable Argon2 hash parameters
+ context.m_cost = 16000; // Memory in KiB (~16384KB)
+ context.lanes = 1; // Degree of Parallelism
+ context.threads = 1; // Threads
+ context.t_cost = 1; // Iterations
+
+
+ argon2_ctx(&context, Argon2_d);
+}
+
+void argon2d_crds_hash(const unsigned char* input, unsigned char* output, unsigned int len)
+{
+ argon2d_crds_call(input, output);
+}
+
+void argon2d_dyn_hash(const unsigned char* input, unsigned char* output, unsigned int len)
+{
+ argon2d_dyn_call(input, output);
+}
+
+void argon2d16000_hash(const unsigned char* input, unsigned char* output, unsigned int len)
+{
+ argon2d16000_call(input, output);
+}
+
diff --git a/stratum/algos/argon2d-dyn.h b/stratum/algos/argon2d.h
similarity index 58%
rename from stratum/algos/argon2d-dyn.h
rename to stratum/algos/argon2d.h
index 1d5f99ca6..5f330a700 100644
--- a/stratum/algos/argon2d-dyn.h
+++ b/stratum/algos/argon2d.h
@@ -7,7 +7,9 @@ extern "C" {
#include
+void argon2d_crds_hash(const char* input, char* output, unsigned int len);
void argon2d_dyn_hash(const char* input, char* output, unsigned int len);
+void argon2d16000_hash(const char* input, char* output, unsigned int len);
#ifdef __cplusplus
}
diff --git a/stratum/algos/bcd.c b/stratum/algos/bcd.c
new file mode 100644
index 000000000..4d2220d30
--- /dev/null
+++ b/stratum/algos/bcd.c
@@ -0,0 +1,99 @@
+#include "bcd.h"
+#include
+#include
+#include
+#include
+
+#include "../sha3/sph_blake.h"
+#include "../sha3/sph_bmw.h"
+#include "../sha3/sph_groestl.h"
+#include "../sha3/sph_jh.h"
+#include "../sha3/sph_keccak.h"
+#include "../sha3/sph_skein.h"
+#include "../sha3/sph_luffa.h"
+#include "../sha3/sph_cubehash.h"
+#include "../sha3/sph_shavite.h"
+#include "../sha3/sph_simd.h"
+#include "../sha3/sph_echo.h"
+#include "../sha3/sph_hamsi.h"
+#include "../sha3/sph_fugue.h"
+#include "../sha3/sph_sm3.h"
+
+#include "common.h"
+
+void bcd_hash(const char* input, char* output, uint32_t len)
+{
+ sph_blake512_context ctx_blake;
+ sph_bmw512_context ctx_bmw;
+ sph_groestl512_context ctx_groestl;
+ sph_skein512_context ctx_skein;
+ sph_jh512_context ctx_jh;
+ sph_keccak512_context ctx_keccak;
+ sm3_ctx_t ctx_sm3;
+ sph_cubehash512_context ctx_cubehash1;
+ sph_shavite512_context ctx_shavite1;
+ sph_simd512_context ctx_simd1;
+ sph_echo512_context ctx_echo1;
+ sph_hamsi512_context ctx_hamsi1;
+ sph_fugue512_context ctx_fugue1;
+
+ uint32_t hashA[16], hashB[16];
+
+ sph_blake512_init(&ctx_blake);
+ sph_blake512 (&ctx_blake, input, len);
+ sph_blake512_close (&ctx_blake, hashA);
+
+ sph_bmw512_init(&ctx_bmw);
+ sph_bmw512 (&ctx_bmw, hashA, 64);
+ sph_bmw512_close(&ctx_bmw, hashB);
+
+ sph_groestl512_init(&ctx_groestl);
+ sph_groestl512 (&ctx_groestl, hashB, 64);
+ sph_groestl512_close(&ctx_groestl, hashA);
+
+ sph_skein512_init(&ctx_skein);
+ sph_skein512 (&ctx_skein, hashA, 64);
+ sph_skein512_close (&ctx_skein, hashB);
+
+ sph_jh512_init(&ctx_jh);
+ sph_jh512 (&ctx_jh, hashB, 64);
+ sph_jh512_close(&ctx_jh, hashA);
+
+ sph_keccak512_init(&ctx_keccak);
+ sph_keccak512 (&ctx_keccak, hashA, 64);
+ sph_keccak512_close(&ctx_keccak, hashB);
+
+ memset(hashA, 0, sizeof(hashA));
+ sm3_init(&ctx_sm3);
+ sph_sm3(&ctx_sm3, hashB, 64);
+ sph_sm3_close(&ctx_sm3, hashA);
+
+ sph_cubehash512_init (&ctx_cubehash1);
+ sph_cubehash512 (&ctx_cubehash1, hashA, 64);
+ sph_cubehash512_close(&ctx_cubehash1, hashB);
+
+ sph_shavite512_init (&ctx_shavite1);
+ sph_shavite512 (&ctx_shavite1, hashB, 64);
+ sph_shavite512_close(&ctx_shavite1, hashA);
+
+ sph_simd512_init (&ctx_simd1);
+ sph_simd512 (&ctx_simd1, hashA, 64);
+ sph_simd512_close(&ctx_simd1, hashB);
+
+ sph_echo512_init (&ctx_echo1);
+ sph_echo512 (&ctx_echo1, hashB, 64);
+ sph_echo512_close(&ctx_echo1, hashA);
+
+ sph_hamsi512_init (&ctx_hamsi1);
+ sph_hamsi512 (&ctx_hamsi1, hashA, 64);
+ sph_hamsi512_close(&ctx_hamsi1, hashB);
+
+ sph_fugue512_init (&ctx_fugue1);
+ sph_fugue512 (&ctx_fugue1, hashB, 64);
+ sph_fugue512_close(&ctx_fugue1, hashA);
+
+
+
+ memcpy(output, hashA, 32);
+
+}
\ No newline at end of file
diff --git a/stratum/algos/bcd.h b/stratum/algos/bcd.h
new file mode 100644
index 000000000..89ec03c23
--- /dev/null
+++ b/stratum/algos/bcd.h
@@ -0,0 +1,16 @@
+#ifndef BCD_H
+#define BCD_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include
+
+void bcd_hash(const char* input, char* output, uint32_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
\ No newline at end of file
diff --git a/stratum/algos/beenode.c b/stratum/algos/beenode.c
new file mode 100644
index 000000000..ff072c3bb
--- /dev/null
+++ b/stratum/algos/beenode.c
@@ -0,0 +1,80 @@
+#include "beenode.h"
+#include
+#include
+#include
+#include
+
+
+#include "honeycomb/facet_one.h"
+#include "honeycomb/facet_two.h"
+#include "honeycomb/facet_three.h"
+#include "honeycomb/facet_four.h"
+#include "honeycomb/facet_five.h"
+#include "honeycomb/facet_six.h"
+
+
+
+void HoneyBee( const unsigned char *in, unsigned int sz, unsigned char *out ){
+ memcpy( &out[ 0], &in[0], 36 );
+ memcpy( &out[36], &in[sz-28], 28 );
+}
+
+void xor64byte( unsigned char *a, unsigned char *b, unsigned char *out ){
+ for( int i = 0; i < 64; i++){
+ out[i] = a[i] ^ b[i];
+ }
+}
+
+void beenode_hash(const char* input, char* output, unsigned int len){
+
+ facet_one_context ctx_one;
+ facet_two_context ctx_two;
+ facet_three_context ctx_three;
+ facet_four_context ctx_four;
+ facet_five_context ctx_five;
+ facet_six_context ctx_six;
+
+ unsigned char honey[64];
+
+ unsigned char hash0[64];
+ unsigned char hash1[64];
+ unsigned char hash2[64];
+ unsigned char hash3[64];
+ unsigned char hash4[64];
+ unsigned char hash5[64];
+ unsigned char hash6[64];
+ unsigned char hash7[64];
+ unsigned char hash8[64];
+ unsigned char hash9[64];
+ unsigned char hash10[64];
+ unsigned char hash11[64];
+
+ HoneyBee( (const unsigned char*)input, len, honey );
+ facet_one_init(&ctx_one);
+ facet_one(&ctx_one, input, len );
+ facet_one_close(&ctx_one, hash0 );
+ facet_four_init(&ctx_four);
+ facet_four(&ctx_four, input, len );
+ facet_four_close(&ctx_four, hash1 );
+ xor64byte( honey, hash1, hash2 );
+ xor64byte( hash0, hash2, hash3 );
+ facet_two_init( &ctx_two );
+ facet_two( &ctx_two, hash3, 64 );
+ facet_two_close( &ctx_two, hash4 );
+ facet_five_init(&ctx_five);
+ facet_five (&ctx_five, input, len );
+ facet_five_close(&ctx_five, hash5 );
+ xor64byte( honey, hash5, hash6 );
+ xor64byte( hash4, hash6, hash7 );
+ facet_three_init( &ctx_three );
+ facet_three ( &ctx_three, hash7, 64 );
+ facet_three_close( &ctx_three, hash8 );
+ facet_six_init(&ctx_six);
+ facet_six( &ctx_six, input, len );
+ facet_six_close(&ctx_six, hash9);
+ xor64byte( honey, hash9, hash10 );
+ xor64byte( hash8, hash10, hash11 );
+
+ memcpy(output, hash11, 32);
+}
+
diff --git a/stratum/algos/beenode.h b/stratum/algos/beenode.h
new file mode 100644
index 000000000..975d50e26
--- /dev/null
+++ b/stratum/algos/beenode.h
@@ -0,0 +1,14 @@
+#ifndef BEENODE_H
+#define BEENODE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void beenode_hash(const char* input, char* output, unsigned int len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/stratum/algos/blake2-ref/blake2-impl.h b/stratum/algos/blake2-ref/blake2-impl.h
new file mode 100644
index 000000000..ace753107
--- /dev/null
+++ b/stratum/algos/blake2-ref/blake2-impl.h
@@ -0,0 +1,187 @@
+/*
+ * Argon2 reference source code package - reference C implementations
+ *
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+ *
+ * You may use this work under the terms of a Creative Commons CC0 1.0
+ * License/Waiver or the Apache Public License 2.0, at your option. The terms of
+ * these licenses can be found at:
+ *
+ * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
+ * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * You should have received a copy of both of these licenses along with this
+ * software. If not, they may be obtained at the above URLs.
+ */
+
+#ifndef PORTABLE_BLAKE2_IMPL_H
+#define PORTABLE_BLAKE2_IMPL_H
+
+#include
+#include
+
+#if defined(_MSC_VER)
+#define BLAKE2_INLINE __inline
+#elif defined(__GNUC__) || defined(__clang__)
+#define BLAKE2_INLINE __inline__
+#else
+#define BLAKE2_INLINE
+#endif
+
+/* Argon2 Team - Begin Code */
+/*
+ Not an exhaustive list, but should cover the majority of modern platforms
+ Additionally, the code will always be correct---this is only a performance
+ tweak.
+*/
+#if (defined(__BYTE_ORDER__) && \
+ (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) || \
+ defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || defined(__MIPSEL__) || \
+ defined(__AARCH64EL__) || defined(__amd64__) || defined(__i386__) || \
+ defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) || \
+ defined(_M_ARM)
+#define NATIVE_LITTLE_ENDIAN
+#endif
+/* Argon2 Team - End Code */
+
+static BLAKE2_INLINE uint32_t load32(const void *src) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+ uint32_t w;
+ memcpy(&w, src, sizeof w);
+ return w;
+#else
+ const uint8_t *p = (const uint8_t *)src;
+ uint32_t w = *p++;
+ w |= (uint32_t)(*p++) << 8;
+ w |= (uint32_t)(*p++) << 16;
+ w |= (uint32_t)(*p++) << 24;
+ return w;
+#endif
+}
+
+static BLAKE2_INLINE uint64_t load64(const void *src) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+ uint64_t w;
+ memcpy(&w, src, sizeof w);
+ return w;
+#else
+ const uint8_t *p = (const uint8_t *)src;
+ uint64_t w = *p++;
+ w |= (uint64_t)(*p++) << 8;
+ w |= (uint64_t)(*p++) << 16;
+ w |= (uint64_t)(*p++) << 24;
+ w |= (uint64_t)(*p++) << 32;
+ w |= (uint64_t)(*p++) << 40;
+ w |= (uint64_t)(*p++) << 48;
+ w |= (uint64_t)(*p++) << 56;
+ return w;
+#endif
+}
+
+static BLAKE2_INLINE uint16_t load16( const void *src )
+{
+#if defined(NATIVE_LITTLE_ENDIAN)
+ uint16_t w;
+ memcpy(&w, src, sizeof w);
+ return w;
+#else
+ const uint8_t *p = ( const uint8_t * )src;
+ return (( uint16_t )( p[0] ) << 0) |
+ (( uint16_t )( p[1] ) << 8) ;
+#endif
+}
+
+static BLAKE2_INLINE void store16( void *dst, uint16_t w )
+{
+#if defined(NATIVE_LITTLE_ENDIAN)
+ memcpy(dst, &w, sizeof w);
+#else
+ uint8_t *p = ( uint8_t * )dst;
+ *p++ = ( uint8_t )w; w >>= 8;
+ *p++ = ( uint8_t )w;
+#endif
+}
+
+static BLAKE2_INLINE void store32(void *dst, uint32_t w) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+ memcpy(dst, &w, sizeof w);
+#else
+ uint8_t *p = (uint8_t *)dst;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+#endif
+}
+
+static BLAKE2_INLINE void store64(void *dst, uint64_t w) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+ memcpy(dst, &w, sizeof w);
+#else
+ uint8_t *p = (uint8_t *)dst;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+#endif
+}
+
+static BLAKE2_INLINE uint64_t load48(const void *src) {
+ const uint8_t *p = (const uint8_t *)src;
+ uint64_t w = *p++;
+ w |= (uint64_t)(*p++) << 8;
+ w |= (uint64_t)(*p++) << 16;
+ w |= (uint64_t)(*p++) << 24;
+ w |= (uint64_t)(*p++) << 32;
+ w |= (uint64_t)(*p++) << 40;
+ return w;
+}
+
+static BLAKE2_INLINE void store48(void *dst, uint64_t w) {
+ uint8_t *p = (uint8_t *)dst;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+ w >>= 8;
+ *p++ = (uint8_t)w;
+}
+
+static BLAKE2_INLINE uint32_t rotr32(const uint32_t w, const unsigned c) {
+ return (w >> c) | (w << (32 - c));
+}
+
+static BLAKE2_INLINE uint64_t rotr64(const uint64_t w, const unsigned c) {
+ return (w >> c) | (w << (64 - c));
+}
+
+/* prevents compiler optimizing out memset() */
+static BLAKE2_INLINE void secure_zero_memory(void *v, size_t n)
+{
+ static void *(*const volatile memset_v)(void *, int, size_t) = &memset;
+ memset_v(v, 0, n);
+}
+
+void clear_internal_memory(void *v, size_t n);
+
+#endif
\ No newline at end of file
diff --git a/stratum/algos/blake2-ref/blake2.h b/stratum/algos/blake2-ref/blake2.h
new file mode 100644
index 000000000..685257af9
--- /dev/null
+++ b/stratum/algos/blake2-ref/blake2.h
@@ -0,0 +1,192 @@
+/*
+ BLAKE2 reference source code package - reference C implementations
+ Copyright 2012, Samuel Neves . You may use this under the
+ terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
+ your option. The terms of these licenses can be found at:
+ - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
+ - OpenSSL license : https://www.openssl.org/source/license.html
+ - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
+ More information about the BLAKE2 hash function can be found at
+ https://blake2.net.
+*/
+#ifndef BLAKE2_H
+#define BLAKE2_H
+
+#include
+#include
+
+#if defined(_MSC_VER)
+#define BLAKE2_PACKED(x) __pragma(pack(push, 1)) x __pragma(pack(pop))
+#else
+#define BLAKE2_PACKED(x) x __attribute__((packed))
+#endif
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+ enum blake2s_constant
+ {
+ BLAKE2S_BLOCKBYTES = 64,
+ BLAKE2S_OUTBYTES = 32,
+ BLAKE2S_KEYBYTES = 32,
+ BLAKE2S_SALTBYTES = 8,
+ BLAKE2S_PERSONALBYTES = 8
+ };
+
+ enum blake2b_constant
+ {
+ BLAKE2B_BLOCKBYTES = 128,
+ BLAKE2B_OUTBYTES = 64,
+ BLAKE2B_KEYBYTES = 64,
+ BLAKE2B_SALTBYTES = 16,
+ BLAKE2B_PERSONALBYTES = 16
+ };
+
+ typedef struct blake2s_state__
+ {
+ uint32_t h[8];
+ uint32_t t[2];
+ uint32_t f[2];
+ uint8_t buf[BLAKE2S_BLOCKBYTES];
+ size_t buflen;
+ size_t outlen;
+ uint8_t last_node;
+ } blake2s_state;
+
+ typedef struct blake2b_state__
+ {
+ uint64_t h[8];
+ uint64_t t[2];
+ uint64_t f[2];
+ uint8_t buf[BLAKE2B_BLOCKBYTES];
+ size_t buflen;
+ size_t outlen;
+ uint8_t last_node;
+ } blake2b_state;
+
+ typedef struct blake2sp_state__
+ {
+ blake2s_state S[8][1];
+ blake2s_state R[1];
+ uint8_t buf[8 * BLAKE2S_BLOCKBYTES];
+ size_t buflen;
+ size_t outlen;
+ } blake2sp_state;
+
+ typedef struct blake2bp_state__
+ {
+ blake2b_state S[4][1];
+ blake2b_state R[1];
+ uint8_t buf[4 * BLAKE2B_BLOCKBYTES];
+ size_t buflen;
+ size_t outlen;
+ } blake2bp_state;
+
+
+ BLAKE2_PACKED(struct blake2s_param__
+ {
+ uint8_t digest_length; /* 1 */
+ uint8_t key_length; /* 2 */
+ uint8_t fanout; /* 3 */
+ uint8_t depth; /* 4 */
+ uint32_t leaf_length; /* 8 */
+ uint32_t node_offset; /* 12 */
+ uint16_t xof_length; /* 14 */
+ uint8_t node_depth; /* 15 */
+ uint8_t inner_length; /* 16 */
+ /* uint8_t reserved[0]; */
+ uint8_t salt[BLAKE2S_SALTBYTES]; /* 24 */
+ uint8_t personal[BLAKE2S_PERSONALBYTES]; /* 32 */
+ });
+
+ typedef struct blake2s_param__ blake2s_param;
+
+ BLAKE2_PACKED(struct blake2b_param__
+ {
+ uint8_t digest_length; /* 1 */
+ uint8_t key_length; /* 2 */
+ uint8_t fanout; /* 3 */
+ uint8_t depth; /* 4 */
+ uint32_t leaf_length; /* 8 */
+ uint32_t node_offset; /* 12 */
+ uint32_t xof_length; /* 16 */
+ uint8_t node_depth; /* 17 */
+ uint8_t inner_length; /* 18 */
+ uint8_t reserved[14]; /* 32 */
+ uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */
+ uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
+ });
+
+ typedef struct blake2b_param__ blake2b_param;
+
+ typedef struct blake2xs_state__
+ {
+ blake2s_state S[1];
+ blake2s_param P[1];
+ } blake2xs_state;
+
+ typedef struct blake2xb_state__
+ {
+ blake2b_state S[1];
+ blake2b_param P[1];
+ } blake2xb_state;
+
+ /* Padded structs result in a compile-time error */
+ enum {
+ BLAKE2_DUMMY_1 = 1/(sizeof(blake2s_param) == BLAKE2S_OUTBYTES),
+ BLAKE2_DUMMY_2 = 1/(sizeof(blake2b_param) == BLAKE2B_OUTBYTES)
+ };
+
+ /* Streaming API */
+ int blake2s_init( blake2s_state *S, size_t outlen );
+ int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen );
+ int blake2s_init_param( blake2s_state *S, const blake2s_param *P );
+ int blake2s_update( blake2s_state *S, const void *in, size_t inlen );
+ int blake2s_final( blake2s_state *S, void *out, size_t outlen );
+
+ int blake2b_init( blake2b_state *S, size_t outlen );
+ int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen );
+ int blake2b_init_param( blake2b_state *S, const blake2b_param *P );
+ int blake2b_update( blake2b_state *S, const void *in, size_t inlen );
+ int blake2b_final( blake2b_state *S, void *out, size_t outlen );
+
+ int blake2sp_init( blake2sp_state *S, size_t outlen );
+ int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen );
+ int blake2sp_update( blake2sp_state *S, const void *in, size_t inlen );
+ int blake2sp_final( blake2sp_state *S, void *out, size_t outlen );
+
+ int blake2bp_init( blake2bp_state *S, size_t outlen );
+ int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen );
+ int blake2bp_update( blake2bp_state *S, const void *in, size_t inlen );
+ int blake2bp_final( blake2bp_state *S, void *out, size_t outlen );
+
+ /* Variable output length API */
+ int blake2xs_init( blake2xs_state *S, const size_t outlen );
+ int blake2xs_init_key( blake2xs_state *S, const size_t outlen, const void *key, size_t keylen );
+ int blake2xs_update( blake2xs_state *S, const void *in, size_t inlen );
+ int blake2xs_final(blake2xs_state *S, void *out, size_t outlen);
+
+ int blake2xb_init( blake2xb_state *S, const size_t outlen );
+ int blake2xb_init_key( blake2xb_state *S, const size_t outlen, const void *key, size_t keylen );
+ int blake2xb_update( blake2xb_state *S, const void *in, size_t inlen );
+ int blake2xb_final(blake2xb_state *S, void *out, size_t outlen);
+
+ /* Simple API */
+ int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
+ int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
+
+ int blake2sp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
+ int blake2bp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
+
+ int blake2xs( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
+ int blake2xb( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
+
+ /* This is simply an alias for blake2b */
+ int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
\ No newline at end of file
diff --git a/stratum/algos/blake2-ref/blake2b.c b/stratum/algos/blake2-ref/blake2b.c
new file mode 100644
index 000000000..ca05df598
--- /dev/null
+++ b/stratum/algos/blake2-ref/blake2b.c
@@ -0,0 +1,390 @@
+/*
+ * Argon2 reference source code package - reference C implementations
+ *
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+ *
+ * You may use this work under the terms of a Creative Commons CC0 1.0
+ * License/Waiver or the Apache Public License 2.0, at your option. The terms of
+ * these licenses can be found at:
+ *
+ * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
+ * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * You should have received a copy of both of these licenses along with this
+ * software. If not, they may be obtained at the above URLs.
+ */
+
+#include
+#include
+#include
+
+#include "blake2.h"
+#include "blake2-impl.h"
+
+static const uint64_t blake2b_IV[8] = {
+ UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b),
+ UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1),
+ UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f),
+ UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179)};
+
+static const unsigned int blake2b_sigma[12][16] = {
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+ {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
+ {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
+ {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
+ {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
+ {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
+ {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
+ {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
+ {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
+ {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+ {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
+};
+
+static BLAKE2_INLINE void blake2b_set_lastnode(blake2b_state *S) {
+ S->f[1] = (uint64_t)-1;
+}
+
+static BLAKE2_INLINE void blake2b_set_lastblock(blake2b_state *S) {
+ if (S->last_node) {
+ blake2b_set_lastnode(S);
+ }
+ S->f[0] = (uint64_t)-1;
+}
+
+static BLAKE2_INLINE void blake2b_increment_counter(blake2b_state *S,
+ uint64_t inc) {
+ S->t[0] += inc;
+ S->t[1] += (S->t[0] < inc);
+}
+
+static BLAKE2_INLINE void blake2b_invalidate_state(blake2b_state *S) {
+ clear_internal_memory(S, sizeof(*S)); /* wipe */
+ blake2b_set_lastblock(S); /* invalidate for further use */
+}
+
+static BLAKE2_INLINE void blake2b_init0(blake2b_state *S) {
+ memset(S, 0, sizeof(*S));
+ memcpy(S->h, blake2b_IV, sizeof(S->h));
+}
+
+int blake2b_init_param(blake2b_state *S, const blake2b_param *P) {
+ const unsigned char *p = (const unsigned char *)P;
+ unsigned int i;
+
+ if (NULL == P || NULL == S) {
+ return -1;
+ }
+
+ blake2b_init0(S);
+ /* IV XOR Parameter Block */
+ for (i = 0; i < 8; ++i) {
+ S->h[i] ^= load64(&p[i * sizeof(S->h[i])]);
+ }
+ S->outlen = P->digest_length;
+ return 0;
+}
+
+/* Sequential blake2b initialization */
+int blake2b_init(blake2b_state *S, size_t outlen) {
+ blake2b_param P;
+
+ if (S == NULL) {
+ return -1;
+ }
+
+ if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) {
+ blake2b_invalidate_state(S);
+ return -1;
+ }
+
+ /* Setup Parameter Block for unkeyed BLAKE2 */
+ P.digest_length = (uint8_t)outlen;
+ P.key_length = 0;
+ P.fanout = 1;
+ P.depth = 1;
+ P.leaf_length = 0;
+ P.node_offset = 0;
+ P.node_depth = 0;
+ P.inner_length = 0;
+ memset(P.reserved, 0, sizeof(P.reserved));
+ memset(P.salt, 0, sizeof(P.salt));
+ memset(P.personal, 0, sizeof(P.personal));
+
+ return blake2b_init_param(S, &P);
+}
+
+int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
+ size_t keylen) {
+ blake2b_param P;
+
+ if (S == NULL) {
+ return -1;
+ }
+
+ if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) {
+ blake2b_invalidate_state(S);
+ return -1;
+ }
+
+ if ((key == 0) || (keylen == 0) || (keylen > BLAKE2B_KEYBYTES)) {
+ blake2b_invalidate_state(S);
+ return -1;
+ }
+
+ /* Setup Parameter Block for keyed BLAKE2 */
+ P.digest_length = (uint8_t)outlen;
+ P.key_length = (uint8_t)keylen;
+ P.fanout = 1;
+ P.depth = 1;
+ P.leaf_length = 0;
+ P.node_offset = 0;
+ P.node_depth = 0;
+ P.inner_length = 0;
+ memset(P.reserved, 0, sizeof(P.reserved));
+ memset(P.salt, 0, sizeof(P.salt));
+ memset(P.personal, 0, sizeof(P.personal));
+
+ if (blake2b_init_param(S, &P) < 0) {
+ blake2b_invalidate_state(S);
+ return -1;
+ }
+
+ {
+ uint8_t block[BLAKE2B_BLOCKBYTES];
+ memset(block, 0, BLAKE2B_BLOCKBYTES);
+ memcpy(block, key, keylen);
+ blake2b_update(S, block, BLAKE2B_BLOCKBYTES);
+ /* Burn the key from stack */
+ clear_internal_memory(block, BLAKE2B_BLOCKBYTES);
+ }
+ return 0;
+}
+
+static void blake2b_compress(blake2b_state *S, const uint8_t *block) {
+ uint64_t m[16];
+ uint64_t v[16];
+ unsigned int i, r;
+
+ for (i = 0; i < 16; ++i) {
+ m[i] = load64(block + i * sizeof(m[i]));
+ }
+
+ for (i = 0; i < 8; ++i) {
+ v[i] = S->h[i];
+ }
+
+ v[8] = blake2b_IV[0];
+ v[9] = blake2b_IV[1];
+ v[10] = blake2b_IV[2];
+ v[11] = blake2b_IV[3];
+ v[12] = blake2b_IV[4] ^ S->t[0];
+ v[13] = blake2b_IV[5] ^ S->t[1];
+ v[14] = blake2b_IV[6] ^ S->f[0];
+ v[15] = blake2b_IV[7] ^ S->f[1];
+
+#define G(r, i, a, b, c, d) \
+ do { \
+ a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \
+ d = rotr64(d ^ a, 32); \
+ c = c + d; \
+ b = rotr64(b ^ c, 24); \
+ a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \
+ d = rotr64(d ^ a, 16); \
+ c = c + d; \
+ b = rotr64(b ^ c, 63); \
+ } while ((void)0, 0)
+
+#define ROUND(r) \
+ do { \
+ G(r, 0, v[0], v[4], v[8], v[12]); \
+ G(r, 1, v[1], v[5], v[9], v[13]); \
+ G(r, 2, v[2], v[6], v[10], v[14]); \
+ G(r, 3, v[3], v[7], v[11], v[15]); \
+ G(r, 4, v[0], v[5], v[10], v[15]); \
+ G(r, 5, v[1], v[6], v[11], v[12]); \
+ G(r, 6, v[2], v[7], v[8], v[13]); \
+ G(r, 7, v[3], v[4], v[9], v[14]); \
+ } while ((void)0, 0)
+
+ for (r = 0; r < 12; ++r) {
+ ROUND(r);
+ }
+
+ for (i = 0; i < 8; ++i) {
+ S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
+ }
+
+#undef G
+#undef ROUND
+}
+
+int blake2b_update(blake2b_state *S, const void *in, size_t inlen) {
+ const uint8_t *pin = (const uint8_t *)in;
+
+ if (inlen == 0) {
+ return 0;
+ }
+
+ /* Sanity check */
+ if (S == NULL || in == NULL) {
+ return -1;
+ }
+
+ /* Is this a reused state? */
+ if (S->f[0] != 0) {
+ return -1;
+ }
+
+ if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) {
+ /* Complete current block */
+ size_t left = S->buflen;
+ size_t fill = BLAKE2B_BLOCKBYTES - left;
+ memcpy(&S->buf[left], pin, fill);
+ blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
+ blake2b_compress(S, S->buf);
+ S->buflen = 0;
+ inlen -= fill;
+ pin += fill;
+ /* Avoid buffer copies when possible */
+ while (inlen > BLAKE2B_BLOCKBYTES) {
+ blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
+ blake2b_compress(S, pin);
+ inlen -= BLAKE2B_BLOCKBYTES;
+ pin += BLAKE2B_BLOCKBYTES;
+ }
+ }
+ memcpy(&S->buf[S->buflen], pin, inlen);
+ S->buflen += (unsigned int)inlen;
+ return 0;
+}
+
+int blake2b_final(blake2b_state *S, void *out, size_t outlen) {
+ uint8_t buffer[BLAKE2B_OUTBYTES] = {0};
+ unsigned int i;
+
+ /* Sanity checks */
+ if (S == NULL || out == NULL || outlen < S->outlen) {
+ return -1;
+ }
+
+ /* Is this a reused state? */
+ if (S->f[0] != 0) {
+ return -1;
+ }
+
+ blake2b_increment_counter(S, S->buflen);
+ blake2b_set_lastblock(S);
+ memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */
+ blake2b_compress(S, S->buf);
+
+ for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */
+ store64(buffer + sizeof(S->h[i]) * i, S->h[i]);
+ }
+
+ memcpy(out, buffer, S->outlen);
+ clear_internal_memory(buffer, sizeof(buffer));
+ clear_internal_memory(S->buf, sizeof(S->buf));
+ clear_internal_memory(S->h, sizeof(S->h));
+ return 0;
+}
+
+int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
+ const void *key, size_t keylen) {
+ blake2b_state S;
+ int ret = -1;
+
+ /* Verify parameters */
+ if (NULL == in && inlen > 0) {
+ goto fail;
+ }
+
+ if (NULL == out || outlen == 0 || outlen > BLAKE2B_OUTBYTES) {
+ goto fail;
+ }
+
+ if ((NULL == key && keylen > 0) || keylen > BLAKE2B_KEYBYTES) {
+ goto fail;
+ }
+
+ if (keylen > 0) {
+ if (blake2b_init_key(&S, outlen, key, keylen) < 0) {
+ goto fail;
+ }
+ } else {
+ if (blake2b_init(&S, outlen) < 0) {
+ goto fail;
+ }
+ }
+
+ if (blake2b_update(&S, in, inlen) < 0) {
+ goto fail;
+ }
+ ret = blake2b_final(&S, out, outlen);
+
+fail:
+ clear_internal_memory(&S, sizeof(S));
+ return ret;
+}
+
+/* Argon2 Team - Begin Code */
+int blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) {
+ uint8_t *out = (uint8_t *)pout;
+ blake2b_state blake_state;
+ uint8_t outlen_bytes[sizeof(uint32_t)] = {0};
+ int ret = -1;
+
+ if (outlen > UINT32_MAX) {
+ goto fail;
+ }
+
+ /* Ensure little-endian byte order! */
+ store32(outlen_bytes, (uint32_t)outlen);
+
+#define TRY(statement) \
+ do { \
+ ret = statement; \
+ if (ret < 0) { \
+ goto fail; \
+ } \
+ } while ((void)0, 0)
+
+ if (outlen <= BLAKE2B_OUTBYTES) {
+ TRY(blake2b_init(&blake_state, outlen));
+ TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
+ TRY(blake2b_update(&blake_state, in, inlen));
+ TRY(blake2b_final(&blake_state, out, outlen));
+ } else {
+ uint32_t toproduce;
+ uint8_t out_buffer[BLAKE2B_OUTBYTES];
+ uint8_t in_buffer[BLAKE2B_OUTBYTES];
+ TRY(blake2b_init(&blake_state, BLAKE2B_OUTBYTES));
+ TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
+ TRY(blake2b_update(&blake_state, in, inlen));
+ TRY(blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES));
+ memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
+ out += BLAKE2B_OUTBYTES / 2;
+ toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2;
+
+ while (toproduce > BLAKE2B_OUTBYTES) {
+ memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
+ TRY(blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer,
+ BLAKE2B_OUTBYTES, NULL, 0));
+ memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
+ out += BLAKE2B_OUTBYTES / 2;
+ toproduce -= BLAKE2B_OUTBYTES / 2;
+ }
+
+ memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
+ TRY(blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL,
+ 0));
+ memcpy(out, out_buffer, toproduce);
+ }
+fail:
+ clear_internal_memory(&blake_state, sizeof(blake_state));
+ return ret;
+#undef TRY
+}
+/* Argon2 Team - End Code */
diff --git a/stratum/algos/blake2-ref/blake2s.c b/stratum/algos/blake2-ref/blake2s.c
new file mode 100644
index 000000000..482310881
--- /dev/null
+++ b/stratum/algos/blake2-ref/blake2s.c
@@ -0,0 +1,364 @@
+/*
+ BLAKE2 reference source code package - reference C implementations
+ Copyright 2012, Samuel Neves . You may use this under the
+ terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
+ your option. The terms of these licenses can be found at:
+ - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
+ - OpenSSL license : https://www.openssl.org/source/license.html
+ - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
+ More information about the BLAKE2 hash function can be found at
+ https://blake2.net.
+*/
+
+#include
+#include
+#include
+
+#include "blake2.h"
+#include "blake2-impl.h"
+
+static const uint32_t blake2s_IV[8] =
+{
+ 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
+ 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
+};
+
+static const uint8_t blake2s_sigma[10][16] =
+{
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
+ { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
+ { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
+ { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
+ { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
+ { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
+ { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
+ { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
+ { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
+ { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
+};
+
+static void blake2s_set_lastnode( blake2s_state *S )
+{
+ S->f[1] = (uint32_t)-1;
+}
+
+/* Some helper functions, not necessarily useful */
+static int blake2s_is_lastblock( const blake2s_state *S )
+{
+ return S->f[0] != 0;
+}
+
+static void blake2s_set_lastblock( blake2s_state *S )
+{
+ if( S->last_node ) blake2s_set_lastnode( S );
+
+ S->f[0] = (uint32_t)-1;
+}
+
+static void blake2s_increment_counter( blake2s_state *S, const uint32_t inc )
+{
+ S->t[0] += inc;
+ S->t[1] += ( S->t[0] < inc );
+}
+
+static void blake2s_init0( blake2s_state *S )
+{
+ size_t i;
+ memset( S, 0, sizeof( blake2s_state ) );
+
+ for( i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i];
+}
+
+/* init2 xors IV with input parameter block */
+int blake2s_init_param( blake2s_state *S, const blake2s_param *P )
+{
+ const unsigned char *p = ( const unsigned char * )( P );
+ size_t i;
+
+ blake2s_init0( S );
+
+ /* IV XOR ParamBlock */
+ for( i = 0; i < 8; ++i )
+ S->h[i] ^= load32( &p[i * 4] );
+
+ S->outlen = P->digest_length;
+ return 0;
+}
+
+
+/* Sequential blake2s initialization */
+int blake2s_init( blake2s_state *S, size_t outlen )
+{
+ blake2s_param P[1];
+
+ /* Move interval verification here? */
+ if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
+
+ P->digest_length = (uint8_t)outlen;
+ P->key_length = 0;
+ P->fanout = 1;
+ P->depth = 1;
+ store32( &P->leaf_length, 0 );
+ store32( &P->node_offset, 0 );
+ store16( &P->xof_length, 0 );
+ P->node_depth = 0;
+ P->inner_length = 0;
+ /* memset(P->reserved, 0, sizeof(P->reserved) ); */
+ memset( P->salt, 0, sizeof( P->salt ) );
+ memset( P->personal, 0, sizeof( P->personal ) );
+ return blake2s_init_param( S, P );
+}
+
+int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen )
+{
+ blake2s_param P[1];
+
+ if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
+
+ if ( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1;
+
+ P->digest_length = (uint8_t)outlen;
+ P->key_length = (uint8_t)keylen;
+ P->fanout = 1;
+ P->depth = 1;
+ store32( &P->leaf_length, 0 );
+ store32( &P->node_offset, 0 );
+ store16( &P->xof_length, 0 );
+ P->node_depth = 0;
+ P->inner_length = 0;
+ /* memset(P->reserved, 0, sizeof(P->reserved) ); */
+ memset( P->salt, 0, sizeof( P->salt ) );
+ memset( P->personal, 0, sizeof( P->personal ) );
+
+ if( blake2s_init_param( S, P ) < 0 ) return -1;
+
+ {
+ uint8_t block[BLAKE2S_BLOCKBYTES];
+ memset( block, 0, BLAKE2S_BLOCKBYTES );
+ memcpy( block, key, keylen );
+ blake2s_update( S, block, BLAKE2S_BLOCKBYTES );
+ secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
+ }
+ return 0;
+}
+
+#define G(r,i,a,b,c,d) \
+ do { \
+ a = a + b + m[blake2s_sigma[r][2*i+0]]; \
+ d = rotr32(d ^ a, 16); \
+ c = c + d; \
+ b = rotr32(b ^ c, 12); \
+ a = a + b + m[blake2s_sigma[r][2*i+1]]; \
+ d = rotr32(d ^ a, 8); \
+ c = c + d; \
+ b = rotr32(b ^ c, 7); \
+ } while(0)
+
+#define ROUND(r) \
+ do { \
+ G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
+ G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
+ G(r,2,v[ 2],v[ 6],v[10],v[14]); \
+ G(r,3,v[ 3],v[ 7],v[11],v[15]); \
+ G(r,4,v[ 0],v[ 5],v[10],v[15]); \
+ G(r,5,v[ 1],v[ 6],v[11],v[12]); \
+ G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
+ G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
+ } while(0)
+
+static void blake2s_compress( blake2s_state *S, const uint8_t in[BLAKE2S_BLOCKBYTES] )
+{
+ uint32_t m[16];
+ uint32_t v[16];
+ size_t i;
+
+ for( i = 0; i < 16; ++i ) {
+ m[i] = load32( in + i * sizeof( m[i] ) );
+ }
+
+ for( i = 0; i < 8; ++i ) {
+ v[i] = S->h[i];
+ }
+
+ v[ 8] = blake2s_IV[0];
+ v[ 9] = blake2s_IV[1];
+ v[10] = blake2s_IV[2];
+ v[11] = blake2s_IV[3];
+ v[12] = S->t[0] ^ blake2s_IV[4];
+ v[13] = S->t[1] ^ blake2s_IV[5];
+ v[14] = S->f[0] ^ blake2s_IV[6];
+ v[15] = S->f[1] ^ blake2s_IV[7];
+
+ ROUND( 0 );
+ ROUND( 1 );
+ ROUND( 2 );
+ ROUND( 3 );
+ ROUND( 4 );
+ ROUND( 5 );
+ ROUND( 6 );
+ ROUND( 7 );
+ ROUND( 8 );
+ ROUND( 9 );
+
+ for( i = 0; i < 8; ++i ) {
+ S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
+ }
+}
+
+#undef G
+#undef ROUND
+
+int blake2s_update( blake2s_state *S, const void *pin, size_t inlen )
+{
+ const unsigned char * in = (const unsigned char *)pin;
+ if( inlen > 0 )
+ {
+ size_t left = S->buflen;
+ size_t fill = BLAKE2S_BLOCKBYTES - left;
+ if( inlen > fill )
+ {
+ S->buflen = 0;
+ memcpy( S->buf + left, in, fill ); /* Fill buffer */
+ blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
+ blake2s_compress( S, S->buf ); /* Compress */
+ in += fill; inlen -= fill;
+ while(inlen > BLAKE2S_BLOCKBYTES) {
+ blake2s_increment_counter(S, BLAKE2S_BLOCKBYTES);
+ blake2s_compress( S, in );
+ in += BLAKE2S_BLOCKBYTES;
+ inlen -= BLAKE2S_BLOCKBYTES;
+ }
+ }
+ memcpy( S->buf + S->buflen, in, inlen );
+ S->buflen += inlen;
+ }
+ return 0;
+}
+
+int blake2s_final( blake2s_state *S, void *out, size_t outlen )
+{
+ uint8_t buffer[BLAKE2S_OUTBYTES] = {0};
+ size_t i;
+
+ if( out == NULL || outlen < S->outlen )
+ return -1;
+
+ if( blake2s_is_lastblock( S ) )
+ return -1;
+
+ blake2s_increment_counter( S, ( uint32_t )S->buflen );
+ blake2s_set_lastblock( S );
+ memset( S->buf + S->buflen, 0, BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */
+ blake2s_compress( S, S->buf );
+
+ for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
+ store32( buffer + sizeof( S->h[i] ) * i, S->h[i] );
+
+ memcpy( out, buffer, outlen );
+ secure_zero_memory(buffer, sizeof(buffer));
+ return 0;
+}
+
+int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen )
+{
+ blake2s_state S[1];
+
+ /* Verify parameters */
+ if ( NULL == in && inlen > 0 ) return -1;
+
+ if ( NULL == out ) return -1;
+
+ if ( NULL == key && keylen > 0) return -1;
+
+ if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
+
+ if( keylen > BLAKE2S_KEYBYTES ) return -1;
+
+ if( keylen > 0 )
+ {
+ if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1;
+ }
+ else
+ {
+ if( blake2s_init( S, outlen ) < 0 ) return -1;
+ }
+
+ blake2s_update( S, ( const uint8_t * )in, inlen );
+ blake2s_final( S, out, outlen );
+ return 0;
+}
+
+#if defined(SUPERCOP)
+int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
+{
+ return blake2s( out, BLAKE2S_OUTBYTES, in, inlen, NULL, 0 );
+}
+#endif
+
+#if defined(BLAKE2S_SELFTEST)
+#include
+#include "blake2-kat.h"
+int main( void )
+{
+ uint8_t key[BLAKE2S_KEYBYTES];
+ uint8_t buf[BLAKE2_KAT_LENGTH];
+ size_t i, step;
+
+ for( i = 0; i < BLAKE2S_KEYBYTES; ++i )
+ key[i] = ( uint8_t )i;
+
+ for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
+ buf[i] = ( uint8_t )i;
+
+ /* Test simple API */
+ for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
+ {
+ uint8_t hash[BLAKE2S_OUTBYTES];
+ blake2s( hash, BLAKE2S_OUTBYTES, buf, i, key, BLAKE2S_KEYBYTES );
+
+ if( 0 != memcmp( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) )
+ {
+ goto fail;
+ }
+ }
+
+ /* Test streaming API */
+ for(step = 1; step < BLAKE2S_BLOCKBYTES; ++step) {
+ for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) {
+ uint8_t hash[BLAKE2S_OUTBYTES];
+ blake2s_state S;
+ uint8_t * p = buf;
+ size_t mlen = i;
+ int err = 0;
+
+ if( (err = blake2s_init_key(&S, BLAKE2S_OUTBYTES, key, BLAKE2S_KEYBYTES)) < 0 ) {
+ goto fail;
+ }
+
+ while (mlen >= step) {
+ if ( (err = blake2s_update(&S, p, step)) < 0 ) {
+ goto fail;
+ }
+ mlen -= step;
+ p += step;
+ }
+ if ( (err = blake2s_update(&S, p, mlen)) < 0) {
+ goto fail;
+ }
+ if ( (err = blake2s_final(&S, hash, BLAKE2S_OUTBYTES)) < 0) {
+ goto fail;
+ }
+
+ if (0 != memcmp(hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES)) {
+ goto fail;
+ }
+ }
+ }
+
+ puts( "ok" );
+ return 0;
+fail:
+ puts("error");
+ return -1;
+}
+#endif
\ No newline at end of file
diff --git a/stratum/algos/blake2-ref/blamka-round-opt.h b/stratum/algos/blake2-ref/blamka-round-opt.h
new file mode 100644
index 000000000..faf96662e
--- /dev/null
+++ b/stratum/algos/blake2-ref/blamka-round-opt.h
@@ -0,0 +1,476 @@
+/*
+ * Argon2 reference source code package - reference C implementations
+ *
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+ *
+ * You may use this work under the terms of a Creative Commons CC0 1.0
+ * License/Waiver or the Apache Public License 2.0, at your option. The terms of
+ * these licenses can be found at:
+ *
+ * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
+ * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * You should have received a copy of both of these licenses along with this
+ * software. If not, they may be obtained at the above URLs.
+ */
+
+#ifndef BLAKE_ROUND_MKA_OPT_H
+#define BLAKE_ROUND_MKA_OPT_H
+
+#if defined(HAVE_CONFIG_H)
+#include "config/dynamic-config.h"
+#endif
+
+#include "blake2-impl.h"
+
+#include
+#if defined(__SSSE3__)
+#include /* for _mm_shuffle_epi8 and _mm_alignr_epi8 */
+#endif
+
+#if defined(__XOP__) && (defined(__GNUC__) || defined(__clang__))
+#include
+#endif
+
+#if !defined(__AVX512F__)
+#if !defined(__AVX2__)
+#if !defined(__XOP__)
+#if defined(__SSSE3__)
+#define r16 \
+ (_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
+#define r24 \
+ (_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
+#define _mm_roti_epi64(x, c) \
+ (-(c) == 32) \
+ ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \
+ : (-(c) == 24) \
+ ? _mm_shuffle_epi8((x), r24) \
+ : (-(c) == 16) \
+ ? _mm_shuffle_epi8((x), r16) \
+ : (-(c) == 63) \
+ ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
+ _mm_add_epi64((x), (x))) \
+ : _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
+ _mm_slli_epi64((x), 64 - (-(c))))
+#else /* defined(__SSE2__) */
+#define _mm_roti_epi64(r, c) \
+ _mm_xor_si128(_mm_srli_epi64((r), -(c)), _mm_slli_epi64((r), 64 - (-(c))))
+#endif
+#else
+#endif
+
+static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
+ const __m128i z = _mm_mul_epu32(x, y);
+ return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z));
+}
+
+#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
+ do { \
+ A0 = fBlaMka(A0, B0); \
+ A1 = fBlaMka(A1, B1); \
+ \
+ D0 = _mm_xor_si128(D0, A0); \
+ D1 = _mm_xor_si128(D1, A1); \
+ \
+ D0 = _mm_roti_epi64(D0, -32); \
+ D1 = _mm_roti_epi64(D1, -32); \
+ \
+ C0 = fBlaMka(C0, D0); \
+ C1 = fBlaMka(C1, D1); \
+ \
+ B0 = _mm_xor_si128(B0, C0); \
+ B1 = _mm_xor_si128(B1, C1); \
+ \
+ B0 = _mm_roti_epi64(B0, -24); \
+ B1 = _mm_roti_epi64(B1, -24); \
+ } while ((void)0, 0)
+
+#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
+ do { \
+ A0 = fBlaMka(A0, B0); \
+ A1 = fBlaMka(A1, B1); \
+ \
+ D0 = _mm_xor_si128(D0, A0); \
+ D1 = _mm_xor_si128(D1, A1); \
+ \
+ D0 = _mm_roti_epi64(D0, -16); \
+ D1 = _mm_roti_epi64(D1, -16); \
+ \
+ C0 = fBlaMka(C0, D0); \
+ C1 = fBlaMka(C1, D1); \
+ \
+ B0 = _mm_xor_si128(B0, C0); \
+ B1 = _mm_xor_si128(B1, C1); \
+ \
+ B0 = _mm_roti_epi64(B0, -63); \
+ B1 = _mm_roti_epi64(B1, -63); \
+ } while ((void)0, 0)
+
+#if defined(__SSSE3__)
+#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
+ do { \
+ __m128i t0 = _mm_alignr_epi8(B1, B0, 8); \
+ __m128i t1 = _mm_alignr_epi8(B0, B1, 8); \
+ B0 = t0; \
+ B1 = t1; \
+ \
+ t0 = C0; \
+ C0 = C1; \
+ C1 = t0; \
+ \
+ t0 = _mm_alignr_epi8(D1, D0, 8); \
+ t1 = _mm_alignr_epi8(D0, D1, 8); \
+ D0 = t1; \
+ D1 = t0; \
+ } while ((void)0, 0)
+
+#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
+ do { \
+ __m128i t0 = _mm_alignr_epi8(B0, B1, 8); \
+ __m128i t1 = _mm_alignr_epi8(B1, B0, 8); \
+ B0 = t0; \
+ B1 = t1; \
+ \
+ t0 = C0; \
+ C0 = C1; \
+ C1 = t0; \
+ \
+ t0 = _mm_alignr_epi8(D0, D1, 8); \
+ t1 = _mm_alignr_epi8(D1, D0, 8); \
+ D0 = t1; \
+ D1 = t0; \
+ } while ((void)0, 0)
+#else /* SSE2 */
+#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
+ do { \
+ __m128i t0 = D0; \
+ __m128i t1 = B0; \
+ D0 = C0; \
+ C0 = C1; \
+ C1 = D0; \
+ D0 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t0, t0)); \
+ D1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(D1, D1)); \
+ B0 = _mm_unpackhi_epi64(B0, _mm_unpacklo_epi64(B1, B1)); \
+ B1 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(t1, t1)); \
+ } while ((void)0, 0)
+
+#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
+ do { \
+ __m128i t0, t1; \
+ t0 = C0; \
+ C0 = C1; \
+ C1 = t0; \
+ t0 = B0; \
+ t1 = D0; \
+ B0 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(B0, B0)); \
+ B1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(B1, B1)); \
+ D0 = _mm_unpackhi_epi64(D0, _mm_unpacklo_epi64(D1, D1)); \
+ D1 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t1, t1)); \
+ } while ((void)0, 0)
+#endif
+
+#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \
+ do { \
+ G1(A0, B0, C0, D0, A1, B1, C1, D1); \
+ G2(A0, B0, C0, D0, A1, B1, C1, D1); \
+ \
+ DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
+ \
+ G1(A0, B0, C0, D0, A1, B1, C1, D1); \
+ G2(A0, B0, C0, D0, A1, B1, C1, D1); \
+ \
+ UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
+ } while ((void)0, 0)
+
+#else /* __AVX2__ */
+
+#include
+
+#define rotr32(x) _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1))
+#define rotr24(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
+#define rotr16(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
+#define rotr63(x) _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x)))
+
+#define G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ do { \
+ __m256i ml = _mm256_mul_epu32(A0, B0); \
+ ml = _mm256_add_epi64(ml, ml); \
+ A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
+ D0 = _mm256_xor_si256(D0, A0); \
+ D0 = rotr32(D0); \
+ \
+ ml = _mm256_mul_epu32(C0, D0); \
+ ml = _mm256_add_epi64(ml, ml); \
+ C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
+ \
+ B0 = _mm256_xor_si256(B0, C0); \
+ B0 = rotr24(B0); \
+ \
+ ml = _mm256_mul_epu32(A1, B1); \
+ ml = _mm256_add_epi64(ml, ml); \
+ A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
+ D1 = _mm256_xor_si256(D1, A1); \
+ D1 = rotr32(D1); \
+ \
+ ml = _mm256_mul_epu32(C1, D1); \
+ ml = _mm256_add_epi64(ml, ml); \
+ C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
+ \
+ B1 = _mm256_xor_si256(B1, C1); \
+ B1 = rotr24(B1); \
+ } while((void)0, 0);
+
+#define G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ do { \
+ __m256i ml = _mm256_mul_epu32(A0, B0); \
+ ml = _mm256_add_epi64(ml, ml); \
+ A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
+ D0 = _mm256_xor_si256(D0, A0); \
+ D0 = rotr16(D0); \
+ \
+ ml = _mm256_mul_epu32(C0, D0); \
+ ml = _mm256_add_epi64(ml, ml); \
+ C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
+ B0 = _mm256_xor_si256(B0, C0); \
+ B0 = rotr63(B0); \
+ \
+ ml = _mm256_mul_epu32(A1, B1); \
+ ml = _mm256_add_epi64(ml, ml); \
+ A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
+ D1 = _mm256_xor_si256(D1, A1); \
+ D1 = rotr16(D1); \
+ \
+ ml = _mm256_mul_epu32(C1, D1); \
+ ml = _mm256_add_epi64(ml, ml); \
+ C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
+ B1 = _mm256_xor_si256(B1, C1); \
+ B1 = rotr63(B1); \
+ } while((void)0, 0);
+
+#define DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
+ do { \
+ B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
+ C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
+ D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
+ \
+ B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
+ C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
+ D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
+ } while((void)0, 0);
+
+#define DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ do { \
+ __m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
+ __m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
+ B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
+ B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
+ \
+ tmp1 = C0; \
+ C0 = C1; \
+ C1 = tmp1; \
+ \
+ tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \
+ tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \
+ D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
+ D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
+ } while(0);
+
+#define UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
+ do { \
+ B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
+ C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
+ D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
+ \
+ B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
+ C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
+ D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
+ } while((void)0, 0);
+
+#define UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ do { \
+ __m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
+ __m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
+ B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
+ B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
+ \
+ tmp1 = C0; \
+ C0 = C1; \
+ C1 = tmp1; \
+ \
+ tmp1 = _mm256_blend_epi32(D0, D1, 0x33); \
+ tmp2 = _mm256_blend_epi32(D0, D1, 0xCC); \
+ D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
+ D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
+ } while((void)0, 0);
+
+#define BLAKE2_ROUND_1(A0, A1, B0, B1, C0, C1, D0, D1) \
+ do{ \
+ G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ \
+ DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
+ \
+ G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ \
+ UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
+ } while((void)0, 0);
+
+#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ do{ \
+ G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ \
+ DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ \
+ G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ \
+ UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ } while((void)0, 0);
+
+#endif /* __AVX2__ */
+
+#else /* __AVX512F__ */
+
+#include
+
+#define ror64(x, n) _mm512_ror_epi64((x), (n))
+
+static __m512i muladd(__m512i x, __m512i y)
+{
+ __m512i z = _mm512_mul_epu32(x, y);
+ return _mm512_add_epi64(_mm512_add_epi64(x, y), _mm512_add_epi64(z, z));
+}
+
+#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
+ do { \
+ A0 = muladd(A0, B0); \
+ A1 = muladd(A1, B1); \
+\
+ D0 = _mm512_xor_si512(D0, A0); \
+ D1 = _mm512_xor_si512(D1, A1); \
+\
+ D0 = ror64(D0, 32); \
+ D1 = ror64(D1, 32); \
+\
+ C0 = muladd(C0, D0); \
+ C1 = muladd(C1, D1); \
+\
+ B0 = _mm512_xor_si512(B0, C0); \
+ B1 = _mm512_xor_si512(B1, C1); \
+\
+ B0 = ror64(B0, 24); \
+ B1 = ror64(B1, 24); \
+ } while ((void)0, 0)
+
+#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
+ do { \
+ A0 = muladd(A0, B0); \
+ A1 = muladd(A1, B1); \
+\
+ D0 = _mm512_xor_si512(D0, A0); \
+ D1 = _mm512_xor_si512(D1, A1); \
+\
+ D0 = ror64(D0, 16); \
+ D1 = ror64(D1, 16); \
+\
+ C0 = muladd(C0, D0); \
+ C1 = muladd(C1, D1); \
+\
+ B0 = _mm512_xor_si512(B0, C0); \
+ B1 = _mm512_xor_si512(B1, C1); \
+\
+ B0 = ror64(B0, 63); \
+ B1 = ror64(B1, 63); \
+ } while ((void)0, 0)
+
+#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
+ do { \
+ B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
+ B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
+\
+ C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
+ C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
+\
+ D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
+ D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
+ } while ((void)0, 0)
+
+#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
+ do { \
+ B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
+ B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
+\
+ C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
+ C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
+\
+ D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
+ D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
+ } while ((void)0, 0)
+
+#define BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1) \
+ do { \
+ G1(A0, B0, C0, D0, A1, B1, C1, D1); \
+ G2(A0, B0, C0, D0, A1, B1, C1, D1); \
+\
+ DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
+\
+ G1(A0, B0, C0, D0, A1, B1, C1, D1); \
+ G2(A0, B0, C0, D0, A1, B1, C1, D1); \
+\
+ UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
+ } while ((void)0, 0)
+
+#define SWAP_HALVES(A0, A1) \
+ do { \
+ __m512i t0, t1; \
+ t0 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(1, 0, 1, 0)); \
+ t1 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(3, 2, 3, 2)); \
+ A0 = t0; \
+ A1 = t1; \
+ } while((void)0, 0)
+
+#define SWAP_QUARTERS(A0, A1) \
+ do { \
+ SWAP_HALVES(A0, A1); \
+ A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
+ A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
+ } while((void)0, 0)
+
+#define UNSWAP_QUARTERS(A0, A1) \
+ do { \
+ A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
+ A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
+ SWAP_HALVES(A0, A1); \
+ } while((void)0, 0)
+
+#define BLAKE2_ROUND_1(A0, C0, B0, D0, A1, C1, B1, D1) \
+ do { \
+ SWAP_HALVES(A0, B0); \
+ SWAP_HALVES(C0, D0); \
+ SWAP_HALVES(A1, B1); \
+ SWAP_HALVES(C1, D1); \
+ BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
+ SWAP_HALVES(A0, B0); \
+ SWAP_HALVES(C0, D0); \
+ SWAP_HALVES(A1, B1); \
+ SWAP_HALVES(C1, D1); \
+ } while ((void)0, 0)
+
+#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
+ do { \
+ SWAP_QUARTERS(A0, A1); \
+ SWAP_QUARTERS(B0, B1); \
+ SWAP_QUARTERS(C0, C1); \
+ SWAP_QUARTERS(D0, D1); \
+ BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
+ UNSWAP_QUARTERS(A0, A1); \
+ UNSWAP_QUARTERS(B0, B1); \
+ UNSWAP_QUARTERS(C0, C1); \
+ UNSWAP_QUARTERS(D0, D1); \
+ } while ((void)0, 0)
+
+#endif /* __AVX512F__ */
+#endif /* BLAKE_ROUND_MKA_OPT_H */
\ No newline at end of file
diff --git a/stratum/algos/blake2-ref/blamka-round-ref.h b/stratum/algos/blake2-ref/blamka-round-ref.h
new file mode 100644
index 000000000..2238959e1
--- /dev/null
+++ b/stratum/algos/blake2-ref/blamka-round-ref.h
@@ -0,0 +1,56 @@
+/*
+ * Argon2 reference source code package - reference C implementations
+ *
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+ *
+ * You may use this work under the terms of a Creative Commons CC0 1.0
+ * License/Waiver or the Apache Public License 2.0, at your option. The terms of
+ * these licenses can be found at:
+ *
+ * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
+ * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * You should have received a copy of both of these licenses along with this
+ * software. If not, they may be obtained at the above URLs.
+ */
+
+#ifndef BLAKE_ROUND_MKA_H
+#define BLAKE_ROUND_MKA_H
+
+#include "blake2.h"
+#include "blake2-impl.h"
+
+/*designed by the Lyra PHC team */
+static BLAKE2_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) {
+ const uint64_t m = UINT64_C(0xFFFFFFFF);
+ const uint64_t xy = (x & m) * (y & m);
+ return x + y + 2 * xy;
+}
+
+#define G(a, b, c, d) \
+ do { \
+ a = fBlaMka(a, b); \
+ d = rotr64(d ^ a, 32); \
+ c = fBlaMka(c, d); \
+ b = rotr64(b ^ c, 24); \
+ a = fBlaMka(a, b); \
+ d = rotr64(d ^ a, 16); \
+ c = fBlaMka(c, d); \
+ b = rotr64(b ^ c, 63); \
+ } while ((void)0, 0)
+
+#define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, \
+ v12, v13, v14, v15) \
+ do { \
+ G(v0, v4, v8, v12); \
+ G(v1, v5, v9, v13); \
+ G(v2, v6, v10, v14); \
+ G(v3, v7, v11, v15); \
+ G(v0, v5, v10, v15); \
+ G(v1, v6, v11, v12); \
+ G(v2, v7, v8, v13); \
+ G(v3, v4, v9, v14); \
+ } while ((void)0, 0)
+
+#endif
\ No newline at end of file
diff --git a/stratum/algos/bmw512.c b/stratum/algos/bmw512.c
new file mode 100644
index 000000000..54e18cdba
--- /dev/null
+++ b/stratum/algos/bmw512.c
@@ -0,0 +1,18 @@
+#include "bmw512.h"
+#include
+#include
+#include
+#include
+#include "../sha3/sph_bmw.h"
+
+ void bmw512_hash(const char* input, char* output, uint32_t len)
+{
+ sph_bmw512_context ctx_bmw;
+ uint32_t hashA[16];
+
+ sph_bmw512_init(&ctx_bmw);
+ sph_bmw512 (&ctx_bmw, input, len);
+ sph_bmw512_close(&ctx_bmw, hashA);
+
+ memcpy(output, hashA, 32);
+}
\ No newline at end of file
diff --git a/stratum/algos/bmw512.h b/stratum/algos/bmw512.h
new file mode 100644
index 000000000..e58f0ced4
--- /dev/null
+++ b/stratum/algos/bmw512.h
@@ -0,0 +1,11 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+ #include
+
+ void bmw512_hash(const char* input, char* output, uint32_t len);
+
+ #ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/stratum/algos/cosa.c b/stratum/algos/cosa.c
new file mode 100644
index 000000000..91abca80e
--- /dev/null
+++ b/stratum/algos/cosa.c
@@ -0,0 +1,130 @@
+#include
+#include
+#include
+#include
+
+#include "../sha3/sph_blake.h"
+#include "../sha3/sph_bmw.h"
+#include "../sha3/sph_groestl.h"
+#include "../sha3/sph_jh.h"
+#include "../sha3/sph_keccak.h"
+#include "../sha3/sph_skein.h"
+#include "../sha3/sph_luffa.h"
+#include "../sha3/sph_cubehash.h"
+#include "../sha3/sph_shavite.h"
+#include "../sha3/sph_simd.h"
+#include "../sha3/sph_echo.h"
+#include "../sha3/sph_hamsi.h"
+#include "../sha3/sph_fugue.h"
+#include "../sha3/sph_shabal.h"
+#include "../sha3/sph_whirlpool.h"
+#include "../sha3/sph_sha2.h"
+#include "../sha3/sph_haval.h"
+#include "../sha3/sph_streebog.h"
+#include "Lyra2-z.h"
+
+#define _ALIGN(x) __attribute__ ((aligned(x)))
+
+void cosa_hash(const char* input, char* output, uint32_t len)
+{
+ unsigned char _ALIGN(128) hash[128],hashB[128],hashC[128],hashD[128];
+
+ sph_blake512_context ctx_blake;
+ sph_bmw512_context ctx_bmw;
+ sph_groestl512_context ctx_groestl;
+ sph_jh512_context ctx_jh;
+ sph_keccak512_context ctx_keccak;
+ sph_skein512_context ctx_skein;
+ sph_luffa512_context ctx_luffa;
+ sph_cubehash512_context ctx_cubehash;
+ sph_shavite512_context ctx_shavite;
+ sph_simd512_context ctx_simd;
+ sph_echo512_context ctx_echo;
+ sph_hamsi512_context ctx_hamsi;
+ sph_fugue512_context ctx_fugue;
+ sph_shabal512_context ctx_shabal;
+ sph_whirlpool_context ctx_whirlpool;
+ sph_sha512_context ctx_sha512;
+ sph_haval256_5_context ctx_haval;
+ sph_gost512_context ctx_gost;
+
+ sph_blake512_init(&ctx_blake);
+ sph_blake512(&ctx_blake, input, len);
+ sph_blake512_close(&ctx_blake, hash);
+
+ sph_bmw512_init(&ctx_bmw);
+ sph_bmw512(&ctx_bmw, hash, 64);
+ sph_bmw512_close(&ctx_bmw, hash);
+
+ sph_groestl512_init(&ctx_groestl);
+ sph_groestl512(&ctx_groestl, hash, 64);
+ sph_groestl512_close(&ctx_groestl, hash);
+
+ sph_skein512_init(&ctx_skein);
+ sph_skein512(&ctx_skein, hash, 64);
+ sph_skein512_close(&ctx_skein, hash);
+
+ sph_jh512_init(&ctx_jh);
+ sph_jh512(&ctx_jh, hash, 64);
+ sph_jh512_close(&ctx_jh, hash);
+
+ sph_keccak512_init(&ctx_keccak);
+ sph_keccak512(&ctx_keccak, hash, 64);
+ sph_keccak512_close(&ctx_keccak, hash);
+
+ sph_luffa512_init(&ctx_luffa);
+ sph_luffa512(&ctx_luffa, hash, 64);
+ sph_luffa512_close(&ctx_luffa, hash);
+
+ sph_cubehash512_init(&ctx_cubehash);
+ sph_cubehash512(&ctx_cubehash, hash, 64);
+ sph_cubehash512_close(&ctx_cubehash, hash);
+
+ sph_shavite512_init(&ctx_shavite);
+ sph_shavite512(&ctx_shavite, hash, 64);
+ sph_shavite512_close(&ctx_shavite, hash);
+
+ sph_simd512_init(&ctx_simd);
+ sph_simd512(&ctx_simd, hash, 64);
+ sph_simd512_close(&ctx_simd, hash);
+
+ sph_echo512_init(&ctx_echo);
+ sph_echo512(&ctx_echo, hash, 64);
+ sph_echo512_close(&ctx_echo, hash);
+
+ sph_hamsi512_init(&ctx_hamsi);
+ sph_hamsi512(&ctx_hamsi, hash, 64);
+ sph_hamsi512_close(&ctx_hamsi, hash);
+
+ sph_fugue512_init(&ctx_fugue);
+ sph_fugue512(&ctx_fugue, hash, 64);
+ sph_fugue512_close(&ctx_fugue, hash);
+
+ sph_shabal512_init(&ctx_shabal);
+ sph_shabal512(&ctx_shabal, hash, 64);
+ sph_shabal512_close(&ctx_shabal, hash);
+
+ sph_whirlpool_init(&ctx_whirlpool);
+ sph_whirlpool(&ctx_whirlpool, hash, 64);
+ sph_whirlpool_close(&ctx_whirlpool, hash);
+
+ sph_sha512_init(&ctx_sha512);
+ sph_sha512(&ctx_sha512,(const void*) hash, 64);
+ sph_sha512_close(&ctx_sha512,(void*) hash);
+
+ memset (hashB,0x0,128);
+ memset (hashC,0x0,128);
+ memset (hashD,0x0,128);
+
+ sph_haval256_5_init(&ctx_haval);
+ sph_haval256_5(&ctx_haval,(const void*) hash, 64);
+ sph_haval256_5_close(&ctx_haval, hashB);
+
+ sph_gost512_init(&ctx_gost);
+ sph_gost512(&ctx_gost, (const void*) hashB, 64);
+ sph_gost512_close(&ctx_gost, (void*) hashC);
+
+ LYRA2z(hashD, 32, hashC, 80, hashC, 80, 2, 66, 66);
+
+ memcpy(output, hashD, 32);
+}
\ No newline at end of file
diff --git a/stratum/algos/cosa.h b/stratum/algos/cosa.h
new file mode 100644
index 000000000..c6e2a4124
--- /dev/null
+++ b/stratum/algos/cosa.h
@@ -0,0 +1,16 @@
+#ifndef COSA_H
+#define COSA_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include
+
+void cosa_hash(const char* input, char* output, uint32_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/stratum/algos/cryptonote/crypto/aesb.c b/stratum/algos/cryptonote/crypto/aesb.c
new file mode 100644
index 000000000..ebe70cdca
--- /dev/null
+++ b/stratum/algos/cryptonote/crypto/aesb.c
@@ -0,0 +1,177 @@
+/*
+---------------------------------------------------------------------------
+Copyright (c) 1998-2013, Brian Gladman, Worcester, UK. All rights reserved.
+
+The redistribution and use of this software (with or without changes)
+is allowed without the payment of fees or royalties provided that:
+
+ source code distributions include the above copyright notice, this
+ list of conditions and the following disclaimer;
+
+ binary distributions include the above copyright notice, this list
+ of conditions and the following disclaimer in their documentation.
+
+This software is provided 'as is' with no explicit or implied warranties
+in respect of its operation, including, but not limited to, correctness
+and fitness for purpose.
+---------------------------------------------------------------------------
+Issue Date: 20/12/2007
+*/
+
+#include
+
+#if defined(__cplusplus)
+extern "C"
+{
+#endif
+
+#define TABLE_ALIGN 32
+#define WPOLY 0x011b
+#define N_COLS 4
+#define AES_BLOCK_SIZE 16
+#define RC_LENGTH (5 * (AES_BLOCK_SIZE / 4 - 2))
+
+#if defined(_MSC_VER)
+#define ALIGN __declspec(align(TABLE_ALIGN))
+#elif defined(__GNUC__)
+#define ALIGN __attribute__ ((aligned(16)))
+#else
+#define ALIGN
+#endif
+
+#define rf1(r,c) (r)
+#define word_in(x,c) (*((uint32_t*)(x)+(c)))
+#define word_out(x,c,v) (*((uint32_t*)(x)+(c)) = (v))
+
+#define s(x,c) x[c]
+#define si(y,x,c) (s(y,c) = word_in(x, c))
+#define so(y,x,c) word_out(y, c, s(x,c))
+#define state_in(y,x) si(y,x,0); si(y,x,1); si(y,x,2); si(y,x,3)
+#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
+#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
+#define to_byte(x) ((x) & 0xff)
+#define bval(x,n) to_byte((x) >> (8 * (n)))
+
+#define fwd_var(x,r,c)\
+ ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
+ : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\
+ : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
+ : ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2)))
+
+#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c))
+
+#define sb_data(w) {\
+ w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
+ w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
+ w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
+ w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
+ w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
+ w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
+ w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
+ w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
+ w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
+ w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
+ w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
+ w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
+ w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
+ w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
+ w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
+ w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
+ w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
+ w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
+ w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
+ w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
+ w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
+ w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
+ w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
+ w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
+ w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
+ w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
+ w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
+ w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
+ w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
+ w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
+ w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
+ w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }
+
+#define rc_data(w) {\
+ w(0x01), w(0x02), w(0x04), w(0x08), w(0x10),w(0x20), w(0x40), w(0x80),\
+ w(0x1b), w(0x36) }
+
+#define bytes2word(b0, b1, b2, b3) (((uint32_t)(b3) << 24) | \
+ ((uint32_t)(b2) << 16) | ((uint32_t)(b1) << 8) | (b0))
+
+#define h0(x) (x)
+#define w0(p) bytes2word(p, 0, 0, 0)
+#define w1(p) bytes2word(0, p, 0, 0)
+#define w2(p) bytes2word(0, 0, p, 0)
+#define w3(p) bytes2word(0, 0, 0, p)
+
+#define u0(p) bytes2word(f2(p), p, p, f3(p))
+#define u1(p) bytes2word(f3(p), f2(p), p, p)
+#define u2(p) bytes2word(p, f3(p), f2(p), p)
+#define u3(p) bytes2word(p, p, f3(p), f2(p))
+
+#define v0(p) bytes2word(fe(p), f9(p), fd(p), fb(p))
+#define v1(p) bytes2word(fb(p), fe(p), f9(p), fd(p))
+#define v2(p) bytes2word(fd(p), fb(p), fe(p), f9(p))
+#define v3(p) bytes2word(f9(p), fd(p), fb(p), fe(p))
+
+#define f2(x) ((x<<1) ^ (((x>>7) & 1) * WPOLY))
+#define f4(x) ((x<<2) ^ (((x>>6) & 1) * WPOLY) ^ (((x>>6) & 2) * WPOLY))
+#define f8(x) ((x<<3) ^ (((x>>5) & 1) * WPOLY) ^ (((x>>5) & 2) * WPOLY) ^ (((x>>5) & 4) * WPOLY))
+#define f3(x) (f2(x) ^ x)
+#define f9(x) (f8(x) ^ x)
+#define fb(x) (f8(x) ^ f2(x) ^ x)
+#define fd(x) (f8(x) ^ f4(x) ^ x)
+#define fe(x) (f8(x) ^ f4(x) ^ f2(x))
+
+#define t_dec(m,n) t_##m##n
+#define t_set(m,n) t_##m##n
+#define t_use(m,n) t_##m##n
+
+#define d_4(t,n,b,e,f,g,h) ALIGN const t n[4][256] = { b(e), b(f), b(g), b(h) }
+
+#define four_tables(x,tab,vf,rf,c) \
+ (tab[0][bval(vf(x,0,c),rf(0,c))] \
+ ^ tab[1][bval(vf(x,1,c),rf(1,c))] \
+ ^ tab[2][bval(vf(x,2,c),rf(2,c))] \
+ ^ tab[3][bval(vf(x,3,c),rf(3,c))])
+
+d_4(uint32_t, t_dec(f,n), sb_data, u0, u1, u2, u3);
+
+void aesb_single_round(const uint8_t *in, uint8_t *out, uint8_t *expandedKey)
+{
+ uint32_t b0[4], b1[4];
+ const uint32_t *kp = (uint32_t *) expandedKey;
+ state_in(b0, in);
+
+ round(fwd_rnd, b1, b0, kp);
+
+ state_out(out, b1);
+}
+
+void aesb_pseudo_round(const uint8_t *in, uint8_t *out, uint8_t *expandedKey)
+{
+ uint32_t b0[4], b1[4];
+ const uint32_t *kp = (uint32_t *) expandedKey;
+ state_in(b0, in);
+
+ round(fwd_rnd, b1, b0, kp);
+ round(fwd_rnd, b0, b1, kp + 1 * N_COLS);
+ round(fwd_rnd, b1, b0, kp + 2 * N_COLS);
+ round(fwd_rnd, b0, b1, kp + 3 * N_COLS);
+ round(fwd_rnd, b1, b0, kp + 4 * N_COLS);
+ round(fwd_rnd, b0, b1, kp + 5 * N_COLS);
+ round(fwd_rnd, b1, b0, kp + 6 * N_COLS);
+ round(fwd_rnd, b0, b1, kp + 7 * N_COLS);
+ round(fwd_rnd, b1, b0, kp + 8 * N_COLS);
+ round(fwd_rnd, b0, b1, kp + 9 * N_COLS);
+
+ state_out(out, b0);
+}
+
+
+#if defined(__cplusplus)
+}
+#endif
diff --git a/stratum/algos/cryptonote/crypto/c_blake256.c b/stratum/algos/cryptonote/crypto/c_blake256.c
new file mode 100644
index 000000000..c0e4f336c
--- /dev/null
+++ b/stratum/algos/cryptonote/crypto/c_blake256.c
@@ -0,0 +1,326 @@
+/*
+ * The blake256_* and blake224_* functions are largely copied from
+ * blake256_light.c and blake224_light.c from the BLAKE website:
+ *
+ * http://131002.net/blake/
+ *
+ * The hmac_* functions implement HMAC-BLAKE-256 and HMAC-BLAKE-224.
+ * HMAC is specified by RFC 2104.
+ */
+
+#include
+#include
+#include
+#include "c_blake256.h"
+
+#define U8TO32(p) \
+ (((uint32_t)((p)[0]) << 24) | ((uint32_t)((p)[1]) << 16) | \
+ ((uint32_t)((p)[2]) << 8) | ((uint32_t)((p)[3]) ))
+#define U32TO8(p, v) \
+ (p)[0] = (uint8_t)((v) >> 24); (p)[1] = (uint8_t)((v) >> 16); \
+ (p)[2] = (uint8_t)((v) >> 8); (p)[3] = (uint8_t)((v) );
+
+const uint8_t sigma[][16] = {
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15},
+ {14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3},
+ {11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4},
+ { 7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8},
+ { 9, 0, 5, 7, 2, 4,10,15,14, 1,11,12, 6, 8, 3,13},
+ { 2,12, 6,10, 0,11, 8, 3, 4,13, 7, 5,15,14, 1, 9},
+ {12, 5, 1,15,14,13, 4,10, 0, 7, 6, 3, 9, 2, 8,11},
+ {13,11, 7,14,12, 1, 3, 9, 5, 0,15, 4, 8, 6, 2,10},
+ { 6,15,14, 9,11, 3, 0, 8,12, 2,13, 7, 1, 4,10, 5},
+ {10, 2, 8, 4, 7, 6, 1, 5,15,11, 9,14, 3,12,13, 0},
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15},
+ {14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3},
+ {11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4},
+ { 7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8}
+};
+
+const uint32_t cst[16] = {
+ 0x243F6A88, 0x85A308D3, 0x13198A2E, 0x03707344,
+ 0xA4093822, 0x299F31D0, 0x082EFA98, 0xEC4E6C89,
+ 0x452821E6, 0x38D01377, 0xBE5466CF, 0x34E90C6C,
+ 0xC0AC29B7, 0xC97C50DD, 0x3F84D5B5, 0xB5470917
+};
+
+static const uint8_t padding[] = {
+ 0x80,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+};
+
+
+void blake256_compress(state *S, const uint8_t *block) {
+ uint32_t v[16], m[16], i;
+
+#define ROT(x,n) (((x)<<(32-n))|((x)>>(n)))
+#define G(a,b,c,d,e) \
+ v[a] += (m[sigma[i][e]] ^ cst[sigma[i][e+1]]) + v[b]; \
+ v[d] = ROT(v[d] ^ v[a],16); \
+ v[c] += v[d]; \
+ v[b] = ROT(v[b] ^ v[c],12); \
+ v[a] += (m[sigma[i][e+1]] ^ cst[sigma[i][e]])+v[b]; \
+ v[d] = ROT(v[d] ^ v[a], 8); \
+ v[c] += v[d]; \
+ v[b] = ROT(v[b] ^ v[c], 7);
+
+ for (i = 0; i < 16; ++i) m[i] = U8TO32(block + i * 4);
+ for (i = 0; i < 8; ++i) v[i] = S->h[i];
+ v[ 8] = S->s[0] ^ 0x243F6A88;
+ v[ 9] = S->s[1] ^ 0x85A308D3;
+ v[10] = S->s[2] ^ 0x13198A2E;
+ v[11] = S->s[3] ^ 0x03707344;
+ v[12] = 0xA4093822;
+ v[13] = 0x299F31D0;
+ v[14] = 0x082EFA98;
+ v[15] = 0xEC4E6C89;
+
+ if (S->nullt == 0) {
+ v[12] ^= S->t[0];
+ v[13] ^= S->t[0];
+ v[14] ^= S->t[1];
+ v[15] ^= S->t[1];
+ }
+
+ for (i = 0; i < 14; ++i) {
+ G(0, 4, 8, 12, 0);
+ G(1, 5, 9, 13, 2);
+ G(2, 6, 10, 14, 4);
+ G(3, 7, 11, 15, 6);
+ G(3, 4, 9, 14, 14);
+ G(2, 7, 8, 13, 12);
+ G(0, 5, 10, 15, 8);
+ G(1, 6, 11, 12, 10);
+ }
+
+ for (i = 0; i < 16; ++i) S->h[i % 8] ^= v[i];
+ for (i = 0; i < 8; ++i) S->h[i] ^= S->s[i % 4];
+}
+
+void blake256_init(state *S) {
+ S->h[0] = 0x6A09E667;
+ S->h[1] = 0xBB67AE85;
+ S->h[2] = 0x3C6EF372;
+ S->h[3] = 0xA54FF53A;
+ S->h[4] = 0x510E527F;
+ S->h[5] = 0x9B05688C;
+ S->h[6] = 0x1F83D9AB;
+ S->h[7] = 0x5BE0CD19;
+ S->t[0] = S->t[1] = S->buflen = S->nullt = 0;
+ S->s[0] = S->s[1] = S->s[2] = S->s[3] = 0;
+}
+
+void blake224_init(state *S) {
+ S->h[0] = 0xC1059ED8;
+ S->h[1] = 0x367CD507;
+ S->h[2] = 0x3070DD17;
+ S->h[3] = 0xF70E5939;
+ S->h[4] = 0xFFC00B31;
+ S->h[5] = 0x68581511;
+ S->h[6] = 0x64F98FA7;
+ S->h[7] = 0xBEFA4FA4;
+ S->t[0] = S->t[1] = S->buflen = S->nullt = 0;
+ S->s[0] = S->s[1] = S->s[2] = S->s[3] = 0;
+}
+
+// datalen = number of bits
+void blake256_update(state *S, const uint8_t *data, uint64_t datalen) {
+ int left = S->buflen >> 3;
+ int fill = 64 - left;
+
+ if (left && (((datalen >> 3) & 0x3F) >= (unsigned) fill)) {
+ memcpy((void *) (S->buf + left), (void *) data, fill);
+ S->t[0] += 512;
+ if (S->t[0] == 0) S->t[1]++;
+ blake256_compress(S, S->buf);
+ data += fill;
+ datalen -= (fill << 3);
+ left = 0;
+ }
+
+ while (datalen >= 512) {
+ S->t[0] += 512;
+ if (S->t[0] == 0) S->t[1]++;
+ blake256_compress(S, data);
+ data += 64;
+ datalen -= 512;
+ }
+
+ if (datalen > 0) {
+ memcpy((void *) (S->buf + left), (void *) data, datalen >> 3);
+ S->buflen = (left << 3) + (int)datalen;
+ } else {
+ S->buflen = 0;
+ }
+}
+
+// datalen = number of bits
+void blake224_update(state *S, const uint8_t *data, uint64_t datalen) {
+ blake256_update(S, data, datalen);
+}
+
+void blake256_final_h(state *S, uint8_t *digest, uint8_t pa, uint8_t pb) {
+ uint8_t msglen[8];
+ uint32_t lo = S->t[0] + S->buflen, hi = S->t[1];
+ if (lo < (unsigned) S->buflen) hi++;
+ U32TO8(msglen + 0, hi);
+ U32TO8(msglen + 4, lo);
+
+ if (S->buflen == 440) { /* one padding byte */
+ S->t[0] -= 8;
+ blake256_update(S, &pa, 8);
+ } else {
+ if (S->buflen < 440) { /* enough space to fill the block */
+ if (S->buflen == 0) S->nullt = 1;
+ S->t[0] -= 440 - S->buflen;
+ blake256_update(S, padding, 440 - S->buflen);
+ } else { /* need 2 compressions */
+ S->t[0] -= 512 - S->buflen;
+ blake256_update(S, padding, 512 - S->buflen);
+ S->t[0] -= 440;
+ blake256_update(S, padding + 1, 440);
+ S->nullt = 1;
+ }
+ blake256_update(S, &pb, 8);
+ S->t[0] -= 8;
+ }
+ S->t[0] -= 64;
+ blake256_update(S, msglen, 64);
+
+ U32TO8(digest + 0, S->h[0]);
+ U32TO8(digest + 4, S->h[1]);
+ U32TO8(digest + 8, S->h[2]);
+ U32TO8(digest + 12, S->h[3]);
+ U32TO8(digest + 16, S->h[4]);
+ U32TO8(digest + 20, S->h[5]);
+ U32TO8(digest + 24, S->h[6]);
+ U32TO8(digest + 28, S->h[7]);
+}
+
+void blake256_final(state *S, uint8_t *digest) {
+ blake256_final_h(S, digest, 0x81, 0x01);
+}
+
+void blake224_final(state *S, uint8_t *digest) {
+ blake256_final_h(S, digest, 0x80, 0x00);
+}
+
+// inlen = number of bytes
+void blake256_hash(uint8_t *out, const uint8_t *in, uint64_t inlen) {
+ state S;
+ blake256_init(&S);
+ blake256_update(&S, in, inlen * 8);
+ blake256_final(&S, out);
+}
+
+// inlen = number of bytes
+void blake224_hash(uint8_t *out, const uint8_t *in, uint64_t inlen) {
+ state S;
+ blake224_init(&S);
+ blake224_update(&S, in, inlen * 8);
+ blake224_final(&S, out);
+}
+
+// keylen = number of bytes
+void hmac_blake256_init(hmac_state *S, const uint8_t *_key, uint64_t keylen) {
+ const uint8_t *key = _key;
+ uint8_t keyhash[32];
+ uint8_t pad[64];
+ uint64_t i;
+
+ if (keylen > 64) {
+ blake256_hash(keyhash, key, keylen);
+ key = keyhash;
+ keylen = 32;
+ }
+
+ blake256_init(&S->inner);
+ memset(pad, 0x36, 64);
+ for (i = 0; i < keylen; ++i) {
+ pad[i] ^= key[i];
+ }
+ blake256_update(&S->inner, pad, 512);
+
+ blake256_init(&S->outer);
+ memset(pad, 0x5c, 64);
+ for (i = 0; i < keylen; ++i) {
+ pad[i] ^= key[i];
+ }
+ blake256_update(&S->outer, pad, 512);
+
+ memset(keyhash, 0, 32);
+}
+
+// keylen = number of bytes
+void hmac_blake224_init(hmac_state *S, const uint8_t *_key, uint64_t keylen) {
+ const uint8_t *key = _key;
+ uint8_t keyhash[32];
+ uint8_t pad[64];
+ uint64_t i;
+
+ if (keylen > 64) {
+ blake256_hash(keyhash, key, keylen);
+ key = keyhash;
+ keylen = 28;
+ }
+
+ blake224_init(&S->inner);
+ memset(pad, 0x36, 64);
+ for (i = 0; i < keylen; ++i) {
+ pad[i] ^= key[i];
+ }
+ blake224_update(&S->inner, pad, 512);
+
+ blake224_init(&S->outer);
+ memset(pad, 0x5c, 64);
+ for (i = 0; i < keylen; ++i) {
+ pad[i] ^= key[i];
+ }
+ blake224_update(&S->outer, pad, 512);
+
+ memset(keyhash, 0, 32);
+}
+
+// datalen = number of bits
+void hmac_blake256_update(hmac_state *S, const uint8_t *data, uint64_t datalen) {
+ // update the inner state
+ blake256_update(&S->inner, data, datalen);
+}
+
+// datalen = number of bits
+void hmac_blake224_update(hmac_state *S, const uint8_t *data, uint64_t datalen) {
+ // update the inner state
+ blake224_update(&S->inner, data, datalen);
+}
+
+void hmac_blake256_final(hmac_state *S, uint8_t *digest) {
+ uint8_t ihash[32];
+ blake256_final(&S->inner, ihash);
+ blake256_update(&S->outer, ihash, 256);
+ blake256_final(&S->outer, digest);
+ memset(ihash, 0, 32);
+}
+
+void hmac_blake224_final(hmac_state *S, uint8_t *digest) {
+ uint8_t ihash[32];
+ blake224_final(&S->inner, ihash);
+ blake224_update(&S->outer, ihash, 224);
+ blake224_final(&S->outer, digest);
+ memset(ihash, 0, 32);
+}
+
+// keylen = number of bytes; inlen = number of bytes
+void hmac_blake256_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const uint8_t *in, uint64_t inlen) {
+ hmac_state S;
+ hmac_blake256_init(&S, key, keylen);
+ hmac_blake256_update(&S, in, inlen * 8);
+ hmac_blake256_final(&S, out);
+}
+
+// keylen = number of bytes; inlen = number of bytes
+void hmac_blake224_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const uint8_t *in, uint64_t inlen) {
+ hmac_state S;
+ hmac_blake224_init(&S, key, keylen);
+ hmac_blake224_update(&S, in, inlen * 8);
+ hmac_blake224_final(&S, out);
+}
diff --git a/stratum/algos/cryptonote/crypto/c_blake256.h b/stratum/algos/cryptonote/crypto/c_blake256.h
new file mode 100644
index 000000000..b9c2aad0d
--- /dev/null
+++ b/stratum/algos/cryptonote/crypto/c_blake256.h
@@ -0,0 +1,43 @@
+#ifndef _BLAKE256_H_
+#define _BLAKE256_H_
+
+#include
+
+typedef struct {
+ uint32_t h[8], s[4], t[2];
+ int buflen, nullt;
+ uint8_t buf[64];
+} state;
+
+typedef struct {
+ state inner;
+ state outer;
+} hmac_state;
+
+void blake256_init(state *);
+void blake224_init(state *);
+
+void blake256_update(state *, const uint8_t *, uint64_t);
+void blake224_update(state *, const uint8_t *, uint64_t);
+
+void blake256_final(state *, uint8_t *);
+void blake224_final(state *, uint8_t *);
+
+void blake256_hash(uint8_t *, const uint8_t *, uint64_t);
+void blake224_hash(uint8_t *, const uint8_t *, uint64_t);
+
+/* HMAC functions: */
+
+void hmac_blake256_init(hmac_state *, const uint8_t *, uint64_t);
+void hmac_blake224_init(hmac_state *, const uint8_t *, uint64_t);
+
+void hmac_blake256_update(hmac_state *, const uint8_t *, uint64_t);
+void hmac_blake224_update(hmac_state *, const uint8_t *, uint64_t);
+
+void hmac_blake256_final(hmac_state *, uint8_t *);
+void hmac_blake224_final(hmac_state *, uint8_t *);
+
+void hmac_blake256_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint64_t);
+void hmac_blake224_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint64_t);
+
+#endif /* _BLAKE256_H_ */
diff --git a/stratum/algos/cryptonote/crypto/c_groestl.c b/stratum/algos/cryptonote/crypto/c_groestl.c
new file mode 100644
index 000000000..94f95566d
--- /dev/null
+++ b/stratum/algos/cryptonote/crypto/c_groestl.c
@@ -0,0 +1,360 @@
+/* hash.c April 2012
+ * Groestl ANSI C code optimised for 32-bit machines
+ * Author: Thomas Krinninger
+ *
+ * This work is based on the implementation of
+ * Soeren S. Thomsen and Krystian Matusiewicz
+ *
+ *
+ */
+
+#include "c_groestl.h"
+#include "groestl_tables.h"
+
+#define P_TYPE 0
+#define Q_TYPE 1
+
+const uint8_t shift_Values[2][8] = {{0,1,2,3,4,5,6,7},{1,3,5,7,0,2,4,6}};
+
+const uint8_t indices_cyclic[15] = {0,1,2,3,4,5,6,7,0,1,2,3,4,5,6};
+
+
+#define ROTATE_COLUMN_DOWN(v1, v2, amount_bytes, temp_var) {temp_var = (v1<<(8*amount_bytes))|(v2>>(8*(4-amount_bytes))); \
+ v2 = (v2<<(8*amount_bytes))|(v1>>(8*(4-amount_bytes))); \
+ v1 = temp_var;}
+
+
+#define COLUMN(x,y,i,c0,c1,c2,c3,c4,c5,c6,c7,tv1,tv2,tu,tl,t) \
+ tu = T[2*(uint32_t)x[4*c0+0]]; \
+ tl = T[2*(uint32_t)x[4*c0+0]+1]; \
+ tv1 = T[2*(uint32_t)x[4*c1+1]]; \
+ tv2 = T[2*(uint32_t)x[4*c1+1]+1]; \
+ ROTATE_COLUMN_DOWN(tv1,tv2,1,t) \
+ tu ^= tv1; \
+ tl ^= tv2; \
+ tv1 = T[2*(uint32_t)x[4*c2+2]]; \
+ tv2 = T[2*(uint32_t)x[4*c2+2]+1]; \
+ ROTATE_COLUMN_DOWN(tv1,tv2,2,t) \
+ tu ^= tv1; \
+ tl ^= tv2; \
+ tv1 = T[2*(uint32_t)x[4*c3+3]]; \
+ tv2 = T[2*(uint32_t)x[4*c3+3]+1]; \
+ ROTATE_COLUMN_DOWN(tv1,tv2,3,t) \
+ tu ^= tv1; \
+ tl ^= tv2; \
+ tl ^= T[2*(uint32_t)x[4*c4+0]]; \
+ tu ^= T[2*(uint32_t)x[4*c4+0]+1]; \
+ tv1 = T[2*(uint32_t)x[4*c5+1]]; \
+ tv2 = T[2*(uint32_t)x[4*c5+1]+1]; \
+ ROTATE_COLUMN_DOWN(tv1,tv2,1,t) \
+ tl ^= tv1; \
+ tu ^= tv2; \
+ tv1 = T[2*(uint32_t)x[4*c6+2]]; \
+ tv2 = T[2*(uint32_t)x[4*c6+2]+1]; \
+ ROTATE_COLUMN_DOWN(tv1,tv2,2,t) \
+ tl ^= tv1; \
+ tu ^= tv2; \
+ tv1 = T[2*(uint32_t)x[4*c7+3]]; \
+ tv2 = T[2*(uint32_t)x[4*c7+3]+1]; \
+ ROTATE_COLUMN_DOWN(tv1,tv2,3,t) \
+ tl ^= tv1; \
+ tu ^= tv2; \
+ y[i] = tu; \
+ y[i+1] = tl;
+
+
+/* compute one round of P (short variants) */
+static void RND512P(uint8_t *x, uint32_t *y, uint32_t r) {
+ uint32_t temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp;
+ uint32_t* x32 = (uint32_t*)x;
+ x32[ 0] ^= 0x00000000^r;
+ x32[ 2] ^= 0x00000010^r;
+ x32[ 4] ^= 0x00000020^r;
+ x32[ 6] ^= 0x00000030^r;
+ x32[ 8] ^= 0x00000040^r;
+ x32[10] ^= 0x00000050^r;
+ x32[12] ^= 0x00000060^r;
+ x32[14] ^= 0x00000070^r;
+ COLUMN(x,y, 0, 0, 2, 4, 6, 9, 11, 13, 15, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+ COLUMN(x,y, 2, 2, 4, 6, 8, 11, 13, 15, 1, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+ COLUMN(x,y, 4, 4, 6, 8, 10, 13, 15, 1, 3, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+ COLUMN(x,y, 6, 6, 8, 10, 12, 15, 1, 3, 5, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+ COLUMN(x,y, 8, 8, 10, 12, 14, 1, 3, 5, 7, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+ COLUMN(x,y,10, 10, 12, 14, 0, 3, 5, 7, 9, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+ COLUMN(x,y,12, 12, 14, 0, 2, 5, 7, 9, 11, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+ COLUMN(x,y,14, 14, 0, 2, 4, 7, 9, 11, 13, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+}
+
+/* compute one round of Q (short variants) */
+static void RND512Q(uint8_t *x, uint32_t *y, uint32_t r) {
+ uint32_t temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp;
+ uint32_t* x32 = (uint32_t*)x;
+ x32[ 0] = ~x32[ 0];
+ x32[ 1] ^= 0xffffffff^r;
+ x32[ 2] = ~x32[ 2];
+ x32[ 3] ^= 0xefffffff^r;
+ x32[ 4] = ~x32[ 4];
+ x32[ 5] ^= 0xdfffffff^r;
+ x32[ 6] = ~x32[ 6];
+ x32[ 7] ^= 0xcfffffff^r;
+ x32[ 8] = ~x32[ 8];
+ x32[ 9] ^= 0xbfffffff^r;
+ x32[10] = ~x32[10];
+ x32[11] ^= 0xafffffff^r;
+ x32[12] = ~x32[12];
+ x32[13] ^= 0x9fffffff^r;
+ x32[14] = ~x32[14];
+ x32[15] ^= 0x8fffffff^r;
+ COLUMN(x,y, 0, 2, 6, 10, 14, 1, 5, 9, 13, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+ COLUMN(x,y, 2, 4, 8, 12, 0, 3, 7, 11, 15, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+ COLUMN(x,y, 4, 6, 10, 14, 2, 5, 9, 13, 1, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+ COLUMN(x,y, 6, 8, 12, 0, 4, 7, 11, 15, 3, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+ COLUMN(x,y, 8, 10, 14, 2, 6, 9, 13, 1, 5, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+ COLUMN(x,y,10, 12, 0, 4, 8, 11, 15, 3, 7, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+ COLUMN(x,y,12, 14, 2, 6, 10, 13, 1, 5, 9, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+ COLUMN(x,y,14, 0, 4, 8, 12, 15, 3, 7, 11, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+}
+
+/* compute compression function (short variants) */
+static void F512(uint32_t *h, const uint32_t *m) {
+ int i;
+ uint32_t Ptmp[2*COLS512];
+ uint32_t Qtmp[2*COLS512];
+ uint32_t y[2*COLS512];
+ uint32_t z[2*COLS512];
+
+ for (i = 0; i < 2*COLS512; i++) {
+ z[i] = m[i];
+ Ptmp[i] = h[i]^m[i];
+ }
+
+ /* compute Q(m) */
+ RND512Q((uint8_t*)z, y, 0x00000000);
+ RND512Q((uint8_t*)y, z, 0x01000000);
+ RND512Q((uint8_t*)z, y, 0x02000000);
+ RND512Q((uint8_t*)y, z, 0x03000000);
+ RND512Q((uint8_t*)z, y, 0x04000000);
+ RND512Q((uint8_t*)y, z, 0x05000000);
+ RND512Q((uint8_t*)z, y, 0x06000000);
+ RND512Q((uint8_t*)y, z, 0x07000000);
+ RND512Q((uint8_t*)z, y, 0x08000000);
+ RND512Q((uint8_t*)y, Qtmp, 0x09000000);
+
+ /* compute P(h+m) */
+ RND512P((uint8_t*)Ptmp, y, 0x00000000);
+ RND512P((uint8_t*)y, z, 0x00000001);
+ RND512P((uint8_t*)z, y, 0x00000002);
+ RND512P((uint8_t*)y, z, 0x00000003);
+ RND512P((uint8_t*)z, y, 0x00000004);
+ RND512P((uint8_t*)y, z, 0x00000005);
+ RND512P((uint8_t*)z, y, 0x00000006);
+ RND512P((uint8_t*)y, z, 0x00000007);
+ RND512P((uint8_t*)z, y, 0x00000008);
+ RND512P((uint8_t*)y, Ptmp, 0x00000009);
+
+ /* compute P(h+m) + Q(m) + h */
+ for (i = 0; i < 2*COLS512; i++) {
+ h[i] ^= Ptmp[i]^Qtmp[i];
+ }
+}
+
+
+/* digest up to msglen bytes of input (full blocks only) */
+static void Transform(hashState *ctx,
+ const uint8_t *input,
+ int msglen) {
+
+ /* digest message, one block at a time */
+ for (; msglen >= SIZE512;
+ msglen -= SIZE512, input += SIZE512) {
+ F512(ctx->chaining,(uint32_t*)input);
+
+ /* increment block counter */
+ ctx->block_counter1++;
+ if (ctx->block_counter1 == 0) ctx->block_counter2++;
+ }
+}
+
+/* given state h, do h <- P(h)+h */
+static void OutputTransformation(hashState *ctx) {
+ int j;
+ uint32_t temp[2*COLS512];
+ uint32_t y[2*COLS512];
+ uint32_t z[2*COLS512];
+
+
+
+ for (j = 0; j < 2*COLS512; j++) {
+ temp[j] = ctx->chaining[j];
+ }
+ RND512P((uint8_t*)temp, y, 0x00000000);
+ RND512P((uint8_t*)y, z, 0x00000001);
+ RND512P((uint8_t*)z, y, 0x00000002);
+ RND512P((uint8_t*)y, z, 0x00000003);
+ RND512P((uint8_t*)z, y, 0x00000004);
+ RND512P((uint8_t*)y, z, 0x00000005);
+ RND512P((uint8_t*)z, y, 0x00000006);
+ RND512P((uint8_t*)y, z, 0x00000007);
+ RND512P((uint8_t*)z, y, 0x00000008);
+ RND512P((uint8_t*)y, temp, 0x00000009);
+ for (j = 0; j < 2*COLS512; j++) {
+ ctx->chaining[j] ^= temp[j];
+ }
+}
+
+/* initialise context */
+static void Init(hashState* ctx) {
+ uint32_t i = 0;
+ /* allocate memory for state and data buffer */
+
+ for(;i<(SIZE512/sizeof(uint32_t));i++)
+ {
+ ctx->chaining[i] = 0;
+ }
+
+ /* set initial value */
+ ctx->chaining[2*COLS512-1] = u32BIG((uint32_t)HASH_BIT_LEN);
+
+ /* set other variables */
+ ctx->buf_ptr = 0;
+ ctx->block_counter1 = 0;
+ ctx->block_counter2 = 0;
+ ctx->bits_in_last_byte = 0;
+}
+
+/* update state with databitlen bits of input */
+static void Update(hashState* ctx,
+ const BitSequence* input,
+ DataLength databitlen) {
+ int index = 0;
+ int msglen = (int)(databitlen/8);
+ int rem = (int)(databitlen%8);
+
+ /* if the buffer contains data that has not yet been digested, first
+ add data to buffer until full */
+ if (ctx->buf_ptr) {
+ while (ctx->buf_ptr < SIZE512 && index < msglen) {
+ ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
+ }
+ if (ctx->buf_ptr < SIZE512) {
+ /* buffer still not full, return */
+ if (rem) {
+ ctx->bits_in_last_byte = rem;
+ ctx->buffer[(int)ctx->buf_ptr++] = input[index];
+ }
+ return;
+ }
+
+ /* digest buffer */
+ ctx->buf_ptr = 0;
+ Transform(ctx, ctx->buffer, SIZE512);
+ }
+
+ /* digest bulk of message */
+ Transform(ctx, input+index, msglen-index);
+ index += ((msglen-index)/SIZE512)*SIZE512;
+
+ /* store remaining data in buffer */
+ while (index < msglen) {
+ ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
+ }
+
+ /* if non-integral number of bytes have been supplied, store
+ remaining bits in last byte, together with information about
+ number of bits */
+ if (rem) {
+ ctx->bits_in_last_byte = rem;
+ ctx->buffer[(int)ctx->buf_ptr++] = input[index];
+ }
+}
+
+#define BILB ctx->bits_in_last_byte
+
+/* finalise: process remaining data (including padding), perform
+ output transformation, and write hash result to 'output' */
+static void Final(hashState* ctx,
+ BitSequence* output) {
+ int i, j = 0, hashbytelen = HASH_BIT_LEN/8;
+ uint8_t *s = (BitSequence*)ctx->chaining;
+
+ /* pad with '1'-bit and first few '0'-bits */
+ if (BILB) {
+ ctx->buffer[(int)ctx->buf_ptr-1] &= ((1<buffer[(int)ctx->buf_ptr-1] ^= 0x1<<(7-BILB);
+ BILB = 0;
+ }
+ else ctx->buffer[(int)ctx->buf_ptr++] = 0x80;
+
+ /* pad with '0'-bits */
+ if (ctx->buf_ptr > SIZE512-LENGTHFIELDLEN) {
+ /* padding requires two blocks */
+ while (ctx->buf_ptr < SIZE512) {
+ ctx->buffer[(int)ctx->buf_ptr++] = 0;
+ }
+ /* digest first padding block */
+ Transform(ctx, ctx->buffer, SIZE512);
+ ctx->buf_ptr = 0;
+ }
+ while (ctx->buf_ptr < SIZE512-LENGTHFIELDLEN) {
+ ctx->buffer[(int)ctx->buf_ptr++] = 0;
+ }
+
+ /* length padding */
+ ctx->block_counter1++;
+ if (ctx->block_counter1 == 0) ctx->block_counter2++;
+ ctx->buf_ptr = SIZE512;
+
+ while (ctx->buf_ptr > SIZE512-(int)sizeof(uint32_t)) {
+ ctx->buffer[(int)--ctx->buf_ptr] = (uint8_t)ctx->block_counter1;
+ ctx->block_counter1 >>= 8;
+ }
+ while (ctx->buf_ptr > SIZE512-LENGTHFIELDLEN) {
+ ctx->buffer[(int)--ctx->buf_ptr] = (uint8_t)ctx->block_counter2;
+ ctx->block_counter2 >>= 8;
+ }
+ /* digest final padding block */
+ Transform(ctx, ctx->buffer, SIZE512);
+ /* perform output transformation */
+ OutputTransformation(ctx);
+
+ /* store hash result in output */
+ for (i = SIZE512-hashbytelen; i < SIZE512; i++,j++) {
+ output[j] = s[i];
+ }
+
+ /* zeroise relevant variables and deallocate memory */
+ for (i = 0; i < COLS512; i++) {
+ ctx->chaining[i] = 0;
+ }
+ for (i = 0; i < SIZE512; i++) {
+ ctx->buffer[i] = 0;
+ }
+}
+
+/* hash bit sequence */
+void groestl(const BitSequence* data,
+ DataLength databitlen,
+ BitSequence* hashval) {
+
+ hashState context;
+
+ /* initialise */
+ Init(&context);
+
+
+ /* process message */
+ Update(&context, data, databitlen);
+
+ /* finalise */
+ Final(&context, hashval);
+}
+/*
+static int crypto_hash(unsigned char *out,
+ const unsigned char *in,
+ unsigned long long len)
+{
+ groestl(in, 8*len, out);
+ return 0;
+}
+
+*/
diff --git a/stratum/algos/cryptonote/crypto/c_groestl.h b/stratum/algos/cryptonote/crypto/c_groestl.h
new file mode 100644
index 000000000..21069093f
--- /dev/null
+++ b/stratum/algos/cryptonote/crypto/c_groestl.h
@@ -0,0 +1,56 @@
+#pragma once
+/*
+#include "crypto_uint8.h"
+#include "crypto_uint32.h"
+#include "crypto_uint64.h"
+#include "crypto_hash.h"
+
+typedef crypto_uint8 uint8_t;
+typedef crypto_uint32 uint32_t;
+typedef crypto_uint64 uint64_t;
+*/
+#include
+#include "hash.h"
+
+/* some sizes (number of bytes) */
+#define ROWS 8
+#define LENGTHFIELDLEN ROWS
+#define COLS512 8
+
+#define SIZE512 (ROWS*COLS512)
+
+#define ROUNDS512 10
+#define HASH_BIT_LEN 256
+
+#define ROTL32(v, n) ((((v)<<(n))|((v)>>(32-(n))))&li_32(ffffffff))
+
+
+#define li_32(h) 0x##h##u
+#define EXT_BYTE(var,n) ((uint8_t)((uint32_t)(var) >> (8*n)))
+#define u32BIG(a) \
+ ((ROTL32(a,8) & li_32(00FF00FF)) | \
+ (ROTL32(a,24) & li_32(FF00FF00)))
+
+
+/* NIST API begin */
+typedef struct {
+ uint32_t chaining[SIZE512/sizeof(uint32_t)]; /* actual state */
+ uint32_t block_counter1,
+ block_counter2; /* message block counter(s) */
+ BitSequence buffer[SIZE512]; /* data buffer */
+ int buf_ptr; /* data buffer pointer */
+ int bits_in_last_byte; /* no. of message bits in last byte of
+ data buffer */
+} hashState;
+
+/*void Init(hashState*);
+void Update(hashState*, const BitSequence*, DataLength);
+void Final(hashState*, BitSequence*); */
+void groestl(const BitSequence*, DataLength, BitSequence*);
+/* NIST API end */
+
+/*
+int crypto_hash(unsigned char *out,
+ const unsigned char *in,
+ unsigned long long len);
+*/
diff --git a/stratum/algos/cryptonote/crypto/c_jh.c b/stratum/algos/cryptonote/crypto/c_jh.c
new file mode 100644
index 000000000..a7c9ede5d
--- /dev/null
+++ b/stratum/algos/cryptonote/crypto/c_jh.c
@@ -0,0 +1,367 @@
+/*This program gives the 64-bit optimized bitslice implementation of JH using ANSI C
+
+ --------------------------------
+ Performance
+
+ Microprocessor: Intel CORE 2 processor (Core 2 Duo Mobile T6600 2.2GHz)
+ Operating System: 64-bit Ubuntu 10.04 (Linux kernel 2.6.32-22-generic)
+ Speed for long message:
+ 1) 45.8 cycles/byte compiler: Intel C++ Compiler 11.1 compilation option: icc -O2
+ 2) 56.8 cycles/byte compiler: gcc 4.4.3 compilation option: gcc -O3
+
+ --------------------------------
+ Last Modified: January 16, 2011
+*/
+
+#include "c_jh.h"
+
+#include
+#include
+
+/*typedef unsigned long long uint64;*/
+typedef uint64_t uint64;
+
+/*define data alignment for different C compilers*/
+#if defined(__GNUC__)
+ #define DATA_ALIGN16(x) x __attribute__ ((aligned(16)))
+#else
+ #define DATA_ALIGN16(x) __declspec(align(16)) x
+#endif
+
+
+typedef struct {
+ int hashbitlen; /*the message digest size*/
+ unsigned long long databitlen; /*the message size in bits*/
+ unsigned long long datasize_in_buffer; /*the size of the message remained in buffer; assumed to be multiple of 8bits except for the last partial block at the end of the message*/
+ DATA_ALIGN16(uint64 x[8][2]); /*the 1024-bit state, ( x[i][0] || x[i][1] ) is the ith row of the state in the pseudocode*/
+ unsigned char buffer[64]; /*the 512-bit message block to be hashed;*/
+} hashState;
+
+
+/*The initial hash value H(0)*/
+const unsigned char JH224_H0[128]={0x2d,0xfe,0xdd,0x62,0xf9,0x9a,0x98,0xac,0xae,0x7c,0xac,0xd6,0x19,0xd6,0x34,0xe7,0xa4,0x83,0x10,0x5,0xbc,0x30,0x12,0x16,0xb8,0x60,0x38,0xc6,0xc9,0x66,0x14,0x94,0x66,0xd9,0x89,0x9f,0x25,0x80,0x70,0x6f,0xce,0x9e,0xa3,0x1b,0x1d,0x9b,0x1a,0xdc,0x11,0xe8,0x32,0x5f,0x7b,0x36,0x6e,0x10,0xf9,0x94,0x85,0x7f,0x2,0xfa,0x6,0xc1,0x1b,0x4f,0x1b,0x5c,0xd8,0xc8,0x40,0xb3,0x97,0xf6,0xa1,0x7f,0x6e,0x73,0x80,0x99,0xdc,0xdf,0x93,0xa5,0xad,0xea,0xa3,0xd3,0xa4,0x31,0xe8,0xde,0xc9,0x53,0x9a,0x68,0x22,0xb4,0xa9,0x8a,0xec,0x86,0xa1,0xe4,0xd5,0x74,0xac,0x95,0x9c,0xe5,0x6c,0xf0,0x15,0x96,0xd,0xea,0xb5,0xab,0x2b,0xbf,0x96,0x11,0xdc,0xf0,0xdd,0x64,0xea,0x6e};
+const unsigned char JH256_H0[128]={0xeb,0x98,0xa3,0x41,0x2c,0x20,0xd3,0xeb,0x92,0xcd,0xbe,0x7b,0x9c,0xb2,0x45,0xc1,0x1c,0x93,0x51,0x91,0x60,0xd4,0xc7,0xfa,0x26,0x0,0x82,0xd6,0x7e,0x50,0x8a,0x3,0xa4,0x23,0x9e,0x26,0x77,0x26,0xb9,0x45,0xe0,0xfb,0x1a,0x48,0xd4,0x1a,0x94,0x77,0xcd,0xb5,0xab,0x26,0x2,0x6b,0x17,0x7a,0x56,0xf0,0x24,0x42,0xf,0xff,0x2f,0xa8,0x71,0xa3,0x96,0x89,0x7f,0x2e,0x4d,0x75,0x1d,0x14,0x49,0x8,0xf7,0x7d,0xe2,0x62,0x27,0x76,0x95,0xf7,0x76,0x24,0x8f,0x94,0x87,0xd5,0xb6,0x57,0x47,0x80,0x29,0x6c,0x5c,0x5e,0x27,0x2d,0xac,0x8e,0xd,0x6c,0x51,0x84,0x50,0xc6,0x57,0x5,0x7a,0xf,0x7b,0xe4,0xd3,0x67,0x70,0x24,0x12,0xea,0x89,0xe3,0xab,0x13,0xd3,0x1c,0xd7,0x69};
+const unsigned char JH384_H0[128]={0x48,0x1e,0x3b,0xc6,0xd8,0x13,0x39,0x8a,0x6d,0x3b,0x5e,0x89,0x4a,0xde,0x87,0x9b,0x63,0xfa,0xea,0x68,0xd4,0x80,0xad,0x2e,0x33,0x2c,0xcb,0x21,0x48,0xf,0x82,0x67,0x98,0xae,0xc8,0x4d,0x90,0x82,0xb9,0x28,0xd4,0x55,0xea,0x30,0x41,0x11,0x42,0x49,0x36,0xf5,0x55,0xb2,0x92,0x48,0x47,0xec,0xc7,0x25,0xa,0x93,0xba,0xf4,0x3c,0xe1,0x56,0x9b,0x7f,0x8a,0x27,0xdb,0x45,0x4c,0x9e,0xfc,0xbd,0x49,0x63,0x97,0xaf,0xe,0x58,0x9f,0xc2,0x7d,0x26,0xaa,0x80,0xcd,0x80,0xc0,0x8b,0x8c,0x9d,0xeb,0x2e,0xda,0x8a,0x79,0x81,0xe8,0xf8,0xd5,0x37,0x3a,0xf4,0x39,0x67,0xad,0xdd,0xd1,0x7a,0x71,0xa9,0xb4,0xd3,0xbd,0xa4,0x75,0xd3,0x94,0x97,0x6c,0x3f,0xba,0x98,0x42,0x73,0x7f};
+const unsigned char JH512_H0[128]={0x6f,0xd1,0x4b,0x96,0x3e,0x0,0xaa,0x17,0x63,0x6a,0x2e,0x5,0x7a,0x15,0xd5,0x43,0x8a,0x22,0x5e,0x8d,0xc,0x97,0xef,0xb,0xe9,0x34,0x12,0x59,0xf2,0xb3,0xc3,0x61,0x89,0x1d,0xa0,0xc1,0x53,0x6f,0x80,0x1e,0x2a,0xa9,0x5,0x6b,0xea,0x2b,0x6d,0x80,0x58,0x8e,0xcc,0xdb,0x20,0x75,0xba,0xa6,0xa9,0xf,0x3a,0x76,0xba,0xf8,0x3b,0xf7,0x1,0x69,0xe6,0x5,0x41,0xe3,0x4a,0x69,0x46,0xb5,0x8a,0x8e,0x2e,0x6f,0xe6,0x5a,0x10,0x47,0xa7,0xd0,0xc1,0x84,0x3c,0x24,0x3b,0x6e,0x71,0xb1,0x2d,0x5a,0xc1,0x99,0xcf,0x57,0xf6,0xec,0x9d,0xb1,0xf8,0x56,0xa7,0x6,0x88,0x7c,0x57,0x16,0xb1,0x56,0xe3,0xc2,0xfc,0xdf,0xe6,0x85,0x17,0xfb,0x54,0x5a,0x46,0x78,0xcc,0x8c,0xdd,0x4b};
+
+/*42 round constants, each round constant is 32-byte (256-bit)*/
+const unsigned char E8_bitslice_roundconstant[42][32]={
+{0x72,0xd5,0xde,0xa2,0xdf,0x15,0xf8,0x67,0x7b,0x84,0x15,0xa,0xb7,0x23,0x15,0x57,0x81,0xab,0xd6,0x90,0x4d,0x5a,0x87,0xf6,0x4e,0x9f,0x4f,0xc5,0xc3,0xd1,0x2b,0x40},
+{0xea,0x98,0x3a,0xe0,0x5c,0x45,0xfa,0x9c,0x3,0xc5,0xd2,0x99,0x66,0xb2,0x99,0x9a,0x66,0x2,0x96,0xb4,0xf2,0xbb,0x53,0x8a,0xb5,0x56,0x14,0x1a,0x88,0xdb,0xa2,0x31},
+{0x3,0xa3,0x5a,0x5c,0x9a,0x19,0xe,0xdb,0x40,0x3f,0xb2,0xa,0x87,0xc1,0x44,0x10,0x1c,0x5,0x19,0x80,0x84,0x9e,0x95,0x1d,0x6f,0x33,0xeb,0xad,0x5e,0xe7,0xcd,0xdc},
+{0x10,0xba,0x13,0x92,0x2,0xbf,0x6b,0x41,0xdc,0x78,0x65,0x15,0xf7,0xbb,0x27,0xd0,0xa,0x2c,0x81,0x39,0x37,0xaa,0x78,0x50,0x3f,0x1a,0xbf,0xd2,0x41,0x0,0x91,0xd3},
+{0x42,0x2d,0x5a,0xd,0xf6,0xcc,0x7e,0x90,0xdd,0x62,0x9f,0x9c,0x92,0xc0,0x97,0xce,0x18,0x5c,0xa7,0xb,0xc7,0x2b,0x44,0xac,0xd1,0xdf,0x65,0xd6,0x63,0xc6,0xfc,0x23},
+{0x97,0x6e,0x6c,0x3,0x9e,0xe0,0xb8,0x1a,0x21,0x5,0x45,0x7e,0x44,0x6c,0xec,0xa8,0xee,0xf1,0x3,0xbb,0x5d,0x8e,0x61,0xfa,0xfd,0x96,0x97,0xb2,0x94,0x83,0x81,0x97},
+{0x4a,0x8e,0x85,0x37,0xdb,0x3,0x30,0x2f,0x2a,0x67,0x8d,0x2d,0xfb,0x9f,0x6a,0x95,0x8a,0xfe,0x73,0x81,0xf8,0xb8,0x69,0x6c,0x8a,0xc7,0x72,0x46,0xc0,0x7f,0x42,0x14},
+{0xc5,0xf4,0x15,0x8f,0xbd,0xc7,0x5e,0xc4,0x75,0x44,0x6f,0xa7,0x8f,0x11,0xbb,0x80,0x52,0xde,0x75,0xb7,0xae,0xe4,0x88,0xbc,0x82,0xb8,0x0,0x1e,0x98,0xa6,0xa3,0xf4},
+{0x8e,0xf4,0x8f,0x33,0xa9,0xa3,0x63,0x15,0xaa,0x5f,0x56,0x24,0xd5,0xb7,0xf9,0x89,0xb6,0xf1,0xed,0x20,0x7c,0x5a,0xe0,0xfd,0x36,0xca,0xe9,0x5a,0x6,0x42,0x2c,0x36},
+{0xce,0x29,0x35,0x43,0x4e,0xfe,0x98,0x3d,0x53,0x3a,0xf9,0x74,0x73,0x9a,0x4b,0xa7,0xd0,0xf5,0x1f,0x59,0x6f,0x4e,0x81,0x86,0xe,0x9d,0xad,0x81,0xaf,0xd8,0x5a,0x9f},
+{0xa7,0x5,0x6,0x67,0xee,0x34,0x62,0x6a,0x8b,0xb,0x28,0xbe,0x6e,0xb9,0x17,0x27,0x47,0x74,0x7,0x26,0xc6,0x80,0x10,0x3f,0xe0,0xa0,0x7e,0x6f,0xc6,0x7e,0x48,0x7b},
+{0xd,0x55,0xa,0xa5,0x4a,0xf8,0xa4,0xc0,0x91,0xe3,0xe7,0x9f,0x97,0x8e,0xf1,0x9e,0x86,0x76,0x72,0x81,0x50,0x60,0x8d,0xd4,0x7e,0x9e,0x5a,0x41,0xf3,0xe5,0xb0,0x62},
+{0xfc,0x9f,0x1f,0xec,0x40,0x54,0x20,0x7a,0xe3,0xe4,0x1a,0x0,0xce,0xf4,0xc9,0x84,0x4f,0xd7,0x94,0xf5,0x9d,0xfa,0x95,0xd8,0x55,0x2e,0x7e,0x11,0x24,0xc3,0x54,0xa5},
+{0x5b,0xdf,0x72,0x28,0xbd,0xfe,0x6e,0x28,0x78,0xf5,0x7f,0xe2,0xf,0xa5,0xc4,0xb2,0x5,0x89,0x7c,0xef,0xee,0x49,0xd3,0x2e,0x44,0x7e,0x93,0x85,0xeb,0x28,0x59,0x7f},
+{0x70,0x5f,0x69,0x37,0xb3,0x24,0x31,0x4a,0x5e,0x86,0x28,0xf1,0x1d,0xd6,0xe4,0x65,0xc7,0x1b,0x77,0x4,0x51,0xb9,0x20,0xe7,0x74,0xfe,0x43,0xe8,0x23,0xd4,0x87,0x8a},
+{0x7d,0x29,0xe8,0xa3,0x92,0x76,0x94,0xf2,0xdd,0xcb,0x7a,0x9,0x9b,0x30,0xd9,0xc1,0x1d,0x1b,0x30,0xfb,0x5b,0xdc,0x1b,0xe0,0xda,0x24,0x49,0x4f,0xf2,0x9c,0x82,0xbf},
+{0xa4,0xe7,0xba,0x31,0xb4,0x70,0xbf,0xff,0xd,0x32,0x44,0x5,0xde,0xf8,0xbc,0x48,0x3b,0xae,0xfc,0x32,0x53,0xbb,0xd3,0x39,0x45,0x9f,0xc3,0xc1,0xe0,0x29,0x8b,0xa0},
+{0xe5,0xc9,0x5,0xfd,0xf7,0xae,0x9,0xf,0x94,0x70,0x34,0x12,0x42,0x90,0xf1,0x34,0xa2,0x71,0xb7,0x1,0xe3,0x44,0xed,0x95,0xe9,0x3b,0x8e,0x36,0x4f,0x2f,0x98,0x4a},
+{0x88,0x40,0x1d,0x63,0xa0,0x6c,0xf6,0x15,0x47,0xc1,0x44,0x4b,0x87,0x52,0xaf,0xff,0x7e,0xbb,0x4a,0xf1,0xe2,0xa,0xc6,0x30,0x46,0x70,0xb6,0xc5,0xcc,0x6e,0x8c,0xe6},
+{0xa4,0xd5,0xa4,0x56,0xbd,0x4f,0xca,0x0,0xda,0x9d,0x84,0x4b,0xc8,0x3e,0x18,0xae,0x73,0x57,0xce,0x45,0x30,0x64,0xd1,0xad,0xe8,0xa6,0xce,0x68,0x14,0x5c,0x25,0x67},
+{0xa3,0xda,0x8c,0xf2,0xcb,0xe,0xe1,0x16,0x33,0xe9,0x6,0x58,0x9a,0x94,0x99,0x9a,0x1f,0x60,0xb2,0x20,0xc2,0x6f,0x84,0x7b,0xd1,0xce,0xac,0x7f,0xa0,0xd1,0x85,0x18},
+{0x32,0x59,0x5b,0xa1,0x8d,0xdd,0x19,0xd3,0x50,0x9a,0x1c,0xc0,0xaa,0xa5,0xb4,0x46,0x9f,0x3d,0x63,0x67,0xe4,0x4,0x6b,0xba,0xf6,0xca,0x19,0xab,0xb,0x56,0xee,0x7e},
+{0x1f,0xb1,0x79,0xea,0xa9,0x28,0x21,0x74,0xe9,0xbd,0xf7,0x35,0x3b,0x36,0x51,0xee,0x1d,0x57,0xac,0x5a,0x75,0x50,0xd3,0x76,0x3a,0x46,0xc2,0xfe,0xa3,0x7d,0x70,0x1},
+{0xf7,0x35,0xc1,0xaf,0x98,0xa4,0xd8,0x42,0x78,0xed,0xec,0x20,0x9e,0x6b,0x67,0x79,0x41,0x83,0x63,0x15,0xea,0x3a,0xdb,0xa8,0xfa,0xc3,0x3b,0x4d,0x32,0x83,0x2c,0x83},
+{0xa7,0x40,0x3b,0x1f,0x1c,0x27,0x47,0xf3,0x59,0x40,0xf0,0x34,0xb7,0x2d,0x76,0x9a,0xe7,0x3e,0x4e,0x6c,0xd2,0x21,0x4f,0xfd,0xb8,0xfd,0x8d,0x39,0xdc,0x57,0x59,0xef},
+{0x8d,0x9b,0xc,0x49,0x2b,0x49,0xeb,0xda,0x5b,0xa2,0xd7,0x49,0x68,0xf3,0x70,0xd,0x7d,0x3b,0xae,0xd0,0x7a,0x8d,0x55,0x84,0xf5,0xa5,0xe9,0xf0,0xe4,0xf8,0x8e,0x65},
+{0xa0,0xb8,0xa2,0xf4,0x36,0x10,0x3b,0x53,0xc,0xa8,0x7,0x9e,0x75,0x3e,0xec,0x5a,0x91,0x68,0x94,0x92,0x56,0xe8,0x88,0x4f,0x5b,0xb0,0x5c,0x55,0xf8,0xba,0xbc,0x4c},
+{0xe3,0xbb,0x3b,0x99,0xf3,0x87,0x94,0x7b,0x75,0xda,0xf4,0xd6,0x72,0x6b,0x1c,0x5d,0x64,0xae,0xac,0x28,0xdc,0x34,0xb3,0x6d,0x6c,0x34,0xa5,0x50,0xb8,0x28,0xdb,0x71},
+{0xf8,0x61,0xe2,0xf2,0x10,0x8d,0x51,0x2a,0xe3,0xdb,0x64,0x33,0x59,0xdd,0x75,0xfc,0x1c,0xac,0xbc,0xf1,0x43,0xce,0x3f,0xa2,0x67,0xbb,0xd1,0x3c,0x2,0xe8,0x43,0xb0},
+{0x33,0xa,0x5b,0xca,0x88,0x29,0xa1,0x75,0x7f,0x34,0x19,0x4d,0xb4,0x16,0x53,0x5c,0x92,0x3b,0x94,0xc3,0xe,0x79,0x4d,0x1e,0x79,0x74,0x75,0xd7,0xb6,0xee,0xaf,0x3f},
+{0xea,0xa8,0xd4,0xf7,0xbe,0x1a,0x39,0x21,0x5c,0xf4,0x7e,0x9,0x4c,0x23,0x27,0x51,0x26,0xa3,0x24,0x53,0xba,0x32,0x3c,0xd2,0x44,0xa3,0x17,0x4a,0x6d,0xa6,0xd5,0xad},
+{0xb5,0x1d,0x3e,0xa6,0xaf,0xf2,0xc9,0x8,0x83,0x59,0x3d,0x98,0x91,0x6b,0x3c,0x56,0x4c,0xf8,0x7c,0xa1,0x72,0x86,0x60,0x4d,0x46,0xe2,0x3e,0xcc,0x8,0x6e,0xc7,0xf6},
+{0x2f,0x98,0x33,0xb3,0xb1,0xbc,0x76,0x5e,0x2b,0xd6,0x66,0xa5,0xef,0xc4,0xe6,0x2a,0x6,0xf4,0xb6,0xe8,0xbe,0xc1,0xd4,0x36,0x74,0xee,0x82,0x15,0xbc,0xef,0x21,0x63},
+{0xfd,0xc1,0x4e,0xd,0xf4,0x53,0xc9,0x69,0xa7,0x7d,0x5a,0xc4,0x6,0x58,0x58,0x26,0x7e,0xc1,0x14,0x16,0x6,0xe0,0xfa,0x16,0x7e,0x90,0xaf,0x3d,0x28,0x63,0x9d,0x3f},
+{0xd2,0xc9,0xf2,0xe3,0x0,0x9b,0xd2,0xc,0x5f,0xaa,0xce,0x30,0xb7,0xd4,0xc,0x30,0x74,0x2a,0x51,0x16,0xf2,0xe0,0x32,0x98,0xd,0xeb,0x30,0xd8,0xe3,0xce,0xf8,0x9a},
+{0x4b,0xc5,0x9e,0x7b,0xb5,0xf1,0x79,0x92,0xff,0x51,0xe6,0x6e,0x4,0x86,0x68,0xd3,0x9b,0x23,0x4d,0x57,0xe6,0x96,0x67,0x31,0xcc,0xe6,0xa6,0xf3,0x17,0xa,0x75,0x5},
+{0xb1,0x76,0x81,0xd9,0x13,0x32,0x6c,0xce,0x3c,0x17,0x52,0x84,0xf8,0x5,0xa2,0x62,0xf4,0x2b,0xcb,0xb3,0x78,0x47,0x15,0x47,0xff,0x46,0x54,0x82,0x23,0x93,0x6a,0x48},
+{0x38,0xdf,0x58,0x7,0x4e,0x5e,0x65,0x65,0xf2,0xfc,0x7c,0x89,0xfc,0x86,0x50,0x8e,0x31,0x70,0x2e,0x44,0xd0,0xb,0xca,0x86,0xf0,0x40,0x9,0xa2,0x30,0x78,0x47,0x4e},
+{0x65,0xa0,0xee,0x39,0xd1,0xf7,0x38,0x83,0xf7,0x5e,0xe9,0x37,0xe4,0x2c,0x3a,0xbd,0x21,0x97,0xb2,0x26,0x1,0x13,0xf8,0x6f,0xa3,0x44,0xed,0xd1,0xef,0x9f,0xde,0xe7},
+{0x8b,0xa0,0xdf,0x15,0x76,0x25,0x92,0xd9,0x3c,0x85,0xf7,0xf6,0x12,0xdc,0x42,0xbe,0xd8,0xa7,0xec,0x7c,0xab,0x27,0xb0,0x7e,0x53,0x8d,0x7d,0xda,0xaa,0x3e,0xa8,0xde},
+{0xaa,0x25,0xce,0x93,0xbd,0x2,0x69,0xd8,0x5a,0xf6,0x43,0xfd,0x1a,0x73,0x8,0xf9,0xc0,0x5f,0xef,0xda,0x17,0x4a,0x19,0xa5,0x97,0x4d,0x66,0x33,0x4c,0xfd,0x21,0x6a},
+{0x35,0xb4,0x98,0x31,0xdb,0x41,0x15,0x70,0xea,0x1e,0xf,0xbb,0xed,0xcd,0x54,0x9b,0x9a,0xd0,0x63,0xa1,0x51,0x97,0x40,0x72,0xf6,0x75,0x9d,0xbf,0x91,0x47,0x6f,0xe2}};
+
+
+static void E8(hashState *state); /*The bijective function E8, in bitslice form*/
+static void F8(hashState *state); /*The compression function F8 */
+
+/*The API functions*/
+static HashReturn Init(hashState *state, int hashbitlen);
+static HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen);
+static HashReturn Final(hashState *state, BitSequence *hashval);
+HashReturn jh_hash(int hashbitlen, const BitSequence *data,DataLength databitlen, BitSequence *hashval);
+
+/*swapping bit 2i with bit 2i+1 of 64-bit x*/
+#define SWAP1(x) (x) = ((((x) & 0x5555555555555555ULL) << 1) | (((x) & 0xaaaaaaaaaaaaaaaaULL) >> 1));
+/*swapping bits 4i||4i+1 with bits 4i+2||4i+3 of 64-bit x*/
+#define SWAP2(x) (x) = ((((x) & 0x3333333333333333ULL) << 2) | (((x) & 0xccccccccccccccccULL) >> 2));
+/*swapping bits 8i||8i+1||8i+2||8i+3 with bits 8i+4||8i+5||8i+6||8i+7 of 64-bit x*/
+#define SWAP4(x) (x) = ((((x) & 0x0f0f0f0f0f0f0f0fULL) << 4) | (((x) & 0xf0f0f0f0f0f0f0f0ULL) >> 4));
+/*swapping bits 16i||16i+1||......||16i+7 with bits 16i+8||16i+9||......||16i+15 of 64-bit x*/
+#define SWAP8(x) (x) = ((((x) & 0x00ff00ff00ff00ffULL) << 8) | (((x) & 0xff00ff00ff00ff00ULL) >> 8));
+/*swapping bits 32i||32i+1||......||32i+15 with bits 32i+16||32i+17||......||32i+31 of 64-bit x*/
+#define SWAP16(x) (x) = ((((x) & 0x0000ffff0000ffffULL) << 16) | (((x) & 0xffff0000ffff0000ULL) >> 16));
+/*swapping bits 64i||64i+1||......||64i+31 with bits 64i+32||64i+33||......||64i+63 of 64-bit x*/
+#define SWAP32_JH(x) (x) = (((x) << 32) | ((x) >> 32));
+
+/*The MDS transform*/
+#define L(m0,m1,m2,m3,m4,m5,m6,m7) \
+ (m4) ^= (m1); \
+ (m5) ^= (m2); \
+ (m6) ^= (m0) ^ (m3); \
+ (m7) ^= (m0); \
+ (m0) ^= (m5); \
+ (m1) ^= (m6); \
+ (m2) ^= (m4) ^ (m7); \
+ (m3) ^= (m4);
+
+/*Two Sboxes are computed in parallel, each Sbox implements S0 and S1, selected by a constant bit*/
+/*The reason to compute two Sboxes in parallel is to try to fully utilize the parallel processing power*/
+#define SS(m0,m1,m2,m3,m4,m5,m6,m7,cc0,cc1) \
+ m3 = ~(m3); \
+ m7 = ~(m7); \
+ m0 ^= ((~(m2)) & (cc0)); \
+ m4 ^= ((~(m6)) & (cc1)); \
+ temp0 = (cc0) ^ ((m0) & (m1));\
+ temp1 = (cc1) ^ ((m4) & (m5));\
+ m0 ^= ((m2) & (m3)); \
+ m4 ^= ((m6) & (m7)); \
+ m3 ^= ((~(m1)) & (m2)); \
+ m7 ^= ((~(m5)) & (m6)); \
+ m1 ^= ((m0) & (m2)); \
+ m5 ^= ((m4) & (m6)); \
+ m2 ^= ((m0) & (~(m3))); \
+ m6 ^= ((m4) & (~(m7))); \
+ m0 ^= ((m1) | (m3)); \
+ m4 ^= ((m5) | (m7)); \
+ m3 ^= ((m1) & (m2)); \
+ m7 ^= ((m5) & (m6)); \
+ m1 ^= (temp0 & (m0)); \
+ m5 ^= (temp1 & (m4)); \
+ m2 ^= temp0; \
+ m6 ^= temp1;
+
+/*The bijective function E8, in bitslice form*/
+static void E8(hashState *state)
+{
+ uint64 i,roundnumber,temp0,temp1;
+
+ for (roundnumber = 0; roundnumber < 42; roundnumber = roundnumber+7) {
+ /*round 7*roundnumber+0: Sbox, MDS and Swapping layers*/
+ for (i = 0; i < 2; i++) {
+ SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+0])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+0])[i+2] );
+ L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
+ SWAP1(state->x[1][i]); SWAP1(state->x[3][i]); SWAP1(state->x[5][i]); SWAP1(state->x[7][i]);
+ }
+
+ /*round 7*roundnumber+1: Sbox, MDS and Swapping layers*/
+ for (i = 0; i < 2; i++) {
+ SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+1])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+1])[i+2] );
+ L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
+ SWAP2(state->x[1][i]); SWAP2(state->x[3][i]); SWAP2(state->x[5][i]); SWAP2(state->x[7][i]);
+ }
+
+ /*round 7*roundnumber+2: Sbox, MDS and Swapping layers*/
+ for (i = 0; i < 2; i++) {
+ SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+2])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+2])[i+2] );
+ L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
+ SWAP4(state->x[1][i]); SWAP4(state->x[3][i]); SWAP4(state->x[5][i]); SWAP4(state->x[7][i]);
+ }
+
+ /*round 7*roundnumber+3: Sbox, MDS and Swapping layers*/
+ for (i = 0; i < 2; i++) {
+ SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+3])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+3])[i+2] );
+ L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
+ SWAP8(state->x[1][i]); SWAP8(state->x[3][i]); SWAP8(state->x[5][i]); SWAP8(state->x[7][i]);
+ }
+
+ /*round 7*roundnumber+4: Sbox, MDS and Swapping layers*/
+ for (i = 0; i < 2; i++) {
+ SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+4])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+4])[i+2] );
+ L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
+ SWAP16(state->x[1][i]); SWAP16(state->x[3][i]); SWAP16(state->x[5][i]); SWAP16(state->x[7][i]);
+ }
+
+ /*round 7*roundnumber+5: Sbox, MDS and Swapping layers*/
+ for (i = 0; i < 2; i++) {
+ SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+5])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+5])[i+2] );
+ L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
+ SWAP32_JH(state->x[1][i]); SWAP32_JH(state->x[3][i]); SWAP32_JH(state->x[5][i]); SWAP32_JH(state->x[7][i]);
+ }
+
+ /*round 7*roundnumber+6: Sbox and MDS layers*/
+ for (i = 0; i < 2; i++) {
+ SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+6])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+6])[i+2] );
+ L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
+ }
+ /*round 7*roundnumber+6: swapping layer*/
+ for (i = 1; i < 8; i = i+2) {
+ temp0 = state->x[i][0]; state->x[i][0] = state->x[i][1]; state->x[i][1] = temp0;
+ }
+ }
+
+}
+
+/*The compression function F8 */
+static void F8(hashState *state)
+{
+ uint64 i;
+
+ /*xor the 512-bit message with the fist half of the 1024-bit hash state*/
+ for (i = 0; i < 8; i++) state->x[i >> 1][i & 1] ^= ((uint64*)state->buffer)[i];
+
+ /*the bijective function E8 */
+ E8(state);
+
+ /*xor the 512-bit message with the second half of the 1024-bit hash state*/
+ for (i = 0; i < 8; i++) state->x[(8+i) >> 1][(8+i) & 1] ^= ((uint64*)state->buffer)[i];
+}
+
+/*before hashing a message, initialize the hash state as H0 */
+static HashReturn Init(hashState *state, int hashbitlen)
+{
+ state->databitlen = 0;
+ state->datasize_in_buffer = 0;
+
+ /*initialize the initial hash value of JH*/
+ state->hashbitlen = hashbitlen;
+
+ /*load the intital hash value into state*/
+ switch (hashbitlen)
+ {
+ case 224: memcpy(state->x,JH224_H0,128); break;
+ case 256: memcpy(state->x,JH256_H0,128); break;
+ case 384: memcpy(state->x,JH384_H0,128); break;
+ case 512: memcpy(state->x,JH512_H0,128); break;
+ }
+
+ return(SUCCESS);
+}
+
+
+/*hash each 512-bit message block, except the last partial block*/
+static HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen)
+{
+ DataLength index; /*the starting address of the data to be compressed*/
+
+ state->databitlen += databitlen;
+ index = 0;
+
+ /*if there is remaining data in the buffer, fill it to a full message block first*/
+ /*we assume that the size of the data in the buffer is the multiple of 8 bits if it is not at the end of a message*/
+
+ /*There is data in the buffer, but the incoming data is insufficient for a full block*/
+ if ( (state->datasize_in_buffer > 0 ) && (( state->datasize_in_buffer + databitlen) < 512) ) {
+ if ( (databitlen & 7) == 0 ) {
+ memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3)) ;
+ }
+ else memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3)+1) ;
+ state->datasize_in_buffer += databitlen;
+ databitlen = 0;
+ }
+
+ /*There is data in the buffer, and the incoming data is sufficient for a full block*/
+ if ( (state->datasize_in_buffer > 0 ) && (( state->datasize_in_buffer + databitlen) >= 512) ) {
+ memcpy( state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3) ) ;
+ index = 64-(state->datasize_in_buffer >> 3);
+ databitlen = databitlen - (512 - state->datasize_in_buffer);
+ F8(state);
+ state->datasize_in_buffer = 0;
+ }
+
+ /*hash the remaining full message blocks*/
+ for ( ; databitlen >= 512; index = index+64, databitlen = databitlen - 512) {
+ memcpy(state->buffer, data+index, 64);
+ F8(state);
+ }
+
+ /*store the partial block into buffer, assume that -- if part of the last byte is not part of the message, then that part consists of 0 bits*/
+ if ( databitlen > 0) {
+ if ((databitlen & 7) == 0)
+ memcpy(state->buffer, data+index, (databitlen & 0x1ff) >> 3);
+ else
+ memcpy(state->buffer, data+index, ((databitlen & 0x1ff) >> 3)+1);
+ state->datasize_in_buffer = databitlen;
+ }
+
+ return(SUCCESS);
+}
+
+/*pad the message, process the padded block(s), truncate the hash value H to obtain the message digest*/
+static HashReturn Final(hashState *state, BitSequence *hashval)
+{
+ unsigned int i;
+
+ if ( (state->databitlen & 0x1ff) == 0 ) {
+ /*pad the message when databitlen is multiple of 512 bits, then process the padded block*/
+ memset(state->buffer, 0, 64);
+ state->buffer[0] = 0x80;
+ state->buffer[63] = state->databitlen & 0xff;
+ state->buffer[62] = (state->databitlen >> 8) & 0xff;
+ state->buffer[61] = (state->databitlen >> 16) & 0xff;
+ state->buffer[60] = (state->databitlen >> 24) & 0xff;
+ state->buffer[59] = (state->databitlen >> 32) & 0xff;
+ state->buffer[58] = (state->databitlen >> 40) & 0xff;
+ state->buffer[57] = (state->databitlen >> 48) & 0xff;
+ state->buffer[56] = (state->databitlen >> 56) & 0xff;
+ F8(state);
+ }
+ else {
+ /*set the rest of the bytes in the buffer to 0*/
+ if ( (state->datasize_in_buffer & 7) == 0)
+ for (i = (state->databitlen & 0x1ff) >> 3; i < 64; i++) state->buffer[i] = 0;
+ else
+ for (i = ((state->databitlen & 0x1ff) >> 3)+1; i < 64; i++) state->buffer[i] = 0;
+
+ /*pad and process the partial block when databitlen is not multiple of 512 bits, then hash the padded blocks*/
+ state->buffer[((state->databitlen & 0x1ff) >> 3)] |= 1 << (7- (state->databitlen & 7));
+
+ F8(state);
+ memset(state->buffer, 0, 64);
+ state->buffer[63] = state->databitlen & 0xff;
+ state->buffer[62] = (state->databitlen >> 8) & 0xff;
+ state->buffer[61] = (state->databitlen >> 16) & 0xff;
+ state->buffer[60] = (state->databitlen >> 24) & 0xff;
+ state->buffer[59] = (state->databitlen >> 32) & 0xff;
+ state->buffer[58] = (state->databitlen >> 40) & 0xff;
+ state->buffer[57] = (state->databitlen >> 48) & 0xff;
+ state->buffer[56] = (state->databitlen >> 56) & 0xff;
+ F8(state);
+ }
+
+ /*truncating the final hash value to generate the message digest*/
+ switch(state->hashbitlen) {
+ case 224: memcpy(hashval,(unsigned char*)state->x+64+36,28); break;
+ case 256: memcpy(hashval,(unsigned char*)state->x+64+32,32); break;
+ case 384: memcpy(hashval,(unsigned char*)state->x+64+16,48); break;
+ case 512: memcpy(hashval,(unsigned char*)state->x+64,64); break;
+ }
+
+ return(SUCCESS);
+}
+
+/* hash a message,
+ three inputs: message digest size in bits (hashbitlen); message (data); message length in bits (databitlen)
+ one output: message digest (hashval)
+*/
+HashReturn jh_hash(int hashbitlen, const BitSequence *data,DataLength databitlen, BitSequence *hashval)
+{
+ hashState state;
+
+ if ( hashbitlen == 224 || hashbitlen == 256 || hashbitlen == 384 || hashbitlen == 512 ) {
+ Init(&state, hashbitlen);
+ Update(&state, data, databitlen);
+ Final(&state, hashval);
+ return SUCCESS;
+ }
+ else
+ return(BAD_HASHLEN);
+}
diff --git a/stratum/algos/cryptonote/crypto/c_jh.h b/stratum/algos/cryptonote/crypto/c_jh.h
new file mode 100644
index 000000000..8084ec72b
--- /dev/null
+++ b/stratum/algos/cryptonote/crypto/c_jh.h
@@ -0,0 +1,20 @@
+/*This program gives the 64-bit optimized bitslice implementation of JH using ANSI C
+
+ --------------------------------
+ Performance
+
+ Microprocessor: Intel CORE 2 processor (Core 2 Duo Mobile T6600 2.2GHz)
+ Operating System: 64-bit Ubuntu 10.04 (Linux kernel 2.6.32-22-generic)
+ Speed for long message:
+ 1) 45.8 cycles/byte compiler: Intel C++ Compiler 11.1 compilation option: icc -O2
+ 2) 56.8 cycles/byte compiler: gcc 4.4.3 compilation option: gcc -O3
+
+ --------------------------------
+ Last Modified: January 16, 2011
+*/
+#pragma once
+#include "hash.h"
+
+typedef enum {SUCCESS = 0, FAIL = 1, BAD_HASHLEN = 2} HashReturn;
+
+HashReturn jh_hash(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval);
diff --git a/stratum/algos/cryptonote/crypto/c_keccak.c b/stratum/algos/cryptonote/crypto/c_keccak.c
new file mode 100644
index 000000000..a7e5bb400
--- /dev/null
+++ b/stratum/algos/cryptonote/crypto/c_keccak.c
@@ -0,0 +1,112 @@
+// keccak.c
+// 19-Nov-11 Markku-Juhani O. Saarinen
+// A baseline Keccak (3rd round) implementation.
+
+#include "hash-ops.h"
+#include "c_keccak.h"
+
+const uint64_t keccakf_rndc[24] =
+{
+ 0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
+ 0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
+ 0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
+ 0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
+ 0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
+ 0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
+ 0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
+ 0x8000000000008080, 0x0000000080000001, 0x8000000080008008
+};
+
+const int keccakf_rotc[24] =
+{
+ 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
+ 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44
+};
+
+const int keccakf_piln[24] =
+{
+ 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
+ 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1
+};
+
+// update the state with given number of rounds
+
+void keccakf(uint64_t st[25], int rounds)
+{
+ int i, j, round;
+ uint64_t t, bc[5];
+
+ for (round = 0; round < rounds; round++) {
+
+ // Theta
+ for (i = 0; i < 5; i++)
+ bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15] ^ st[i + 20];
+
+ for (i = 0; i < 5; i++) {
+ t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1);
+ for (j = 0; j < 25; j += 5)
+ st[j + i] ^= t;
+ }
+
+ // Rho Pi
+ t = st[1];
+ for (i = 0; i < 24; i++) {
+ j = keccakf_piln[i];
+ bc[0] = st[j];
+ st[j] = ROTL64(t, keccakf_rotc[i]);
+ t = bc[0];
+ }
+
+ // Chi
+ for (j = 0; j < 25; j += 5) {
+ for (i = 0; i < 5; i++)
+ bc[i] = st[j + i];
+ for (i = 0; i < 5; i++)
+ st[j + i] ^= (~bc[(i + 1) % 5]) & bc[(i + 2) % 5];
+ }
+
+ // Iota
+ st[0] ^= keccakf_rndc[round];
+ }
+}
+
+// compute a keccak hash (md) of given byte length from "in"
+typedef uint64_t state_t[25];
+
+int keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen)
+{
+ state_t st;
+ uint8_t temp[144];
+ int i, rsiz, rsizw;
+
+ rsiz = sizeof(state_t) == mdlen ? HASH_DATA_AREA : 200 - 2 * mdlen;
+ rsizw = rsiz / 8;
+
+ memset(st, 0, sizeof(st));
+
+ for ( ; inlen >= rsiz; inlen -= rsiz, in += rsiz) {
+ for (i = 0; i < rsizw; i++)
+ st[i] ^= ((uint64_t *) in)[i];
+ keccakf(st, KECCAK_ROUNDS);
+ }
+
+ // last block and padding
+ memcpy(temp, in, inlen);
+ temp[inlen++] = 1;
+ memset(temp + inlen, 0, rsiz - inlen);
+ temp[rsiz - 1] |= 0x80;
+
+ for (i = 0; i < rsizw; i++)
+ st[i] ^= ((uint64_t *) temp)[i];
+
+ keccakf(st, KECCAK_ROUNDS);
+
+ memcpy(md, st, mdlen);
+
+ return 0;
+}
+
+void keccak1600(const uint8_t *in, int inlen, uint8_t *md)
+{
+ keccak(in, inlen, md, sizeof(state_t));
+}
diff --git a/stratum/algos/cryptonote/crypto/c_keccak.h b/stratum/algos/cryptonote/crypto/c_keccak.h
new file mode 100644
index 000000000..4f7f85729
--- /dev/null
+++ b/stratum/algos/cryptonote/crypto/c_keccak.h
@@ -0,0 +1,26 @@
+// keccak.h
+// 19-Nov-11 Markku-Juhani O. Saarinen
+
+#ifndef KECCAK_H
+#define KECCAK_H
+
+#include
+#include
+
+#ifndef KECCAK_ROUNDS
+#define KECCAK_ROUNDS 24
+#endif
+
+#ifndef ROTL64
+#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
+#endif
+
+// compute a keccak hash (md) of given byte length from "in"
+int keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen);
+
+// update the state
+void keccakf(uint64_t st[25], int norounds);
+
+void keccak1600(const uint8_t *in, int inlen, uint8_t *md);
+
+#endif
diff --git a/stratum/algos/cryptonote/crypto/c_skein.c b/stratum/algos/cryptonote/crypto/c_skein.c
new file mode 100644
index 000000000..6f3946fbb
--- /dev/null
+++ b/stratum/algos/cryptonote/crypto/c_skein.c
@@ -0,0 +1,2036 @@
+/***********************************************************************
+**
+** Implementation of the Skein hash function.
+**
+** Source code author: Doug Whiting, 2008.
+**
+** This algorithm and source code is released to the public domain.
+**
+************************************************************************/
+
+#define SKEIN_PORT_CODE /* instantiate any code in skein_port.h */
+
+#include /* get size_t definition */
+#include /* get the memcpy/memset functions */
+#include "c_skein.h" /* get the Skein API definitions */
+
+#define DISABLE_UNUSED 0
+
+#ifndef SKEIN_256_NIST_MAX_HASHBITS
+#define SKEIN_256_NIST_MAX_HASHBITS (0)
+#endif
+
+#ifndef SKEIN_512_NIST_MAX_HASHBITS
+#define SKEIN_512_NIST_MAX_HASHBITS (512)
+#endif
+
+#define SKEIN_MODIFIER_WORDS ( 2) /* number of modifier (tweak) words */
+
+#define SKEIN_256_STATE_WORDS ( 4)
+#define SKEIN_512_STATE_WORDS ( 8)
+#define SKEIN1024_STATE_WORDS (16)
+#define SKEIN_MAX_STATE_WORDS (16)
+
+#define SKEIN_256_STATE_BYTES ( 8*SKEIN_256_STATE_WORDS)
+#define SKEIN_512_STATE_BYTES ( 8*SKEIN_512_STATE_WORDS)
+#define SKEIN1024_STATE_BYTES ( 8*SKEIN1024_STATE_WORDS)
+
+#define SKEIN_256_STATE_BITS (64*SKEIN_256_STATE_WORDS)
+#define SKEIN_512_STATE_BITS (64*SKEIN_512_STATE_WORDS)
+#define SKEIN1024_STATE_BITS (64*SKEIN1024_STATE_WORDS)
+
+#define SKEIN_256_BLOCK_BYTES ( 8*SKEIN_256_STATE_WORDS)
+#define SKEIN_512_BLOCK_BYTES ( 8*SKEIN_512_STATE_WORDS)
+#define SKEIN1024_BLOCK_BYTES ( 8*SKEIN1024_STATE_WORDS)
+
+#define SKEIN_RND_SPECIAL (1000u)
+#define SKEIN_RND_KEY_INITIAL (SKEIN_RND_SPECIAL+0u)
+#define SKEIN_RND_KEY_INJECT (SKEIN_RND_SPECIAL+1u)
+#define SKEIN_RND_FEED_FWD (SKEIN_RND_SPECIAL+2u)
+
+typedef struct
+{
+ size_t hashBitLen; /* size of hash result, in bits */
+ size_t bCnt; /* current byte count in buffer b[] */
+ u64b_t T[SKEIN_MODIFIER_WORDS]; /* tweak words: T[0]=byte cnt, T[1]=flags */
+} Skein_Ctxt_Hdr_t;
+
+typedef struct /* 256-bit Skein hash context structure */
+{
+ Skein_Ctxt_Hdr_t h; /* common header context variables */
+ u64b_t X[SKEIN_256_STATE_WORDS]; /* chaining variables */
+ u08b_t b[SKEIN_256_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */
+} Skein_256_Ctxt_t;
+
+typedef struct /* 512-bit Skein hash context structure */
+{
+ Skein_Ctxt_Hdr_t h; /* common header context variables */
+ u64b_t X[SKEIN_512_STATE_WORDS]; /* chaining variables */
+ u08b_t b[SKEIN_512_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */
+} Skein_512_Ctxt_t;
+
+typedef struct /* 1024-bit Skein hash context structure */
+{
+ Skein_Ctxt_Hdr_t h; /* common header context variables */
+ u64b_t X[SKEIN1024_STATE_WORDS]; /* chaining variables */
+ u08b_t b[SKEIN1024_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */
+} Skein1024_Ctxt_t;
+
+/* Skein APIs for (incremental) "straight hashing" */
+#if SKEIN_256_NIST_MAX_HASH_BITS
+static int Skein_256_Init (Skein_256_Ctxt_t *ctx, size_t hashBitLen);
+#endif
+static int Skein_512_Init (Skein_512_Ctxt_t *ctx, size_t hashBitLen);
+static int Skein1024_Init (Skein1024_Ctxt_t *ctx, size_t hashBitLen);
+
+static int Skein_256_Update(Skein_256_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt);
+static int Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt);
+static int Skein1024_Update(Skein1024_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt);
+
+static int Skein_256_Final (Skein_256_Ctxt_t *ctx, u08b_t * hashVal);
+static int Skein_512_Final (Skein_512_Ctxt_t *ctx, u08b_t * hashVal);
+static int Skein1024_Final (Skein1024_Ctxt_t *ctx, u08b_t * hashVal);
+
+/*
+** Skein APIs for "extended" initialization: MAC keys, tree hashing.
+** After an InitExt() call, just use Update/Final calls as with Init().
+**
+** Notes: Same parameters as _Init() calls, plus treeInfo/key/keyBytes.
+** When keyBytes == 0 and treeInfo == SKEIN_SEQUENTIAL,
+** the results of InitExt() are identical to calling Init().
+** The function Init() may be called once to "precompute" the IV for
+** a given hashBitLen value, then by saving a copy of the context
+** the IV computation may be avoided in later calls.
+** Similarly, the function InitExt() may be called once per MAC key
+** to precompute the MAC IV, then a copy of the context saved and
+** reused for each new MAC computation.
+**/
+#if 0
+static int Skein_256_InitExt(Skein_256_Ctxt_t *ctx, size_t hashBitLen, u64b_t treeInfo, const u08b_t *key, size_t keyBytes);
+static int Skein_512_InitExt(Skein_512_Ctxt_t *ctx, size_t hashBitLen, u64b_t treeInfo, const u08b_t *key, size_t keyBytes);
+static int Skein1024_InitExt(Skein1024_Ctxt_t *ctx, size_t hashBitLen, u64b_t treeInfo, const u08b_t *key, size_t keyBytes);
+#endif
+
+/*
+** Skein APIs for MAC and tree hash:
+** Final_Pad: pad, do final block, but no OUTPUT type
+** Output: do just the output stage
+*/
+#if 0
+static int Skein_256_Final_Pad(Skein_256_Ctxt_t *ctx, u08b_t * hashVal);
+static int Skein_512_Final_Pad(Skein_512_Ctxt_t *ctx, u08b_t * hashVal);
+static int Skein1024_Final_Pad(Skein1024_Ctxt_t *ctx, u08b_t * hashVal);
+#endif
+
+#ifndef SKEIN_TREE_HASH
+#define SKEIN_TREE_HASH (1)
+#endif
+#if 0
+#if SKEIN_TREE_HASH
+static int Skein_256_Output (Skein_256_Ctxt_t *ctx, u08b_t * hashVal);
+static int Skein_512_Output (Skein_512_Ctxt_t *ctx, u08b_t * hashVal);
+static int Skein1024_Output (Skein1024_Ctxt_t *ctx, u08b_t * hashVal);
+#endif
+#endif
+
+/*****************************************************************
+** "Internal" Skein definitions
+** -- not needed for sequential hashing API, but will be
+** helpful for other uses of Skein (e.g., tree hash mode).
+** -- included here so that they can be shared between
+** reference and optimized code.
+******************************************************************/
+
+/* tweak word T[1]: bit field starting positions */
+#define SKEIN_T1_BIT(BIT) ((BIT) - 64) /* offset 64 because it's the second word */
+
+#define SKEIN_T1_POS_TREE_LVL SKEIN_T1_BIT(112) /* bits 112..118: level in hash tree */
+#define SKEIN_T1_POS_BIT_PAD SKEIN_T1_BIT(119) /* bit 119 : partial final input byte */
+#define SKEIN_T1_POS_BLK_TYPE SKEIN_T1_BIT(120) /* bits 120..125: type field */
+#define SKEIN_T1_POS_FIRST SKEIN_T1_BIT(126) /* bits 126 : first block flag */
+#define SKEIN_T1_POS_FINAL SKEIN_T1_BIT(127) /* bit 127 : final block flag */
+
+/* tweak word T[1]: flag bit definition(s) */
+#define SKEIN_T1_FLAG_FIRST (((u64b_t) 1 ) << SKEIN_T1_POS_FIRST)
+#define SKEIN_T1_FLAG_FINAL (((u64b_t) 1 ) << SKEIN_T1_POS_FINAL)
+#define SKEIN_T1_FLAG_BIT_PAD (((u64b_t) 1 ) << SKEIN_T1_POS_BIT_PAD)
+
+/* tweak word T[1]: tree level bit field mask */
+#define SKEIN_T1_TREE_LVL_MASK (((u64b_t)0x7F) << SKEIN_T1_POS_TREE_LVL)
+#define SKEIN_T1_TREE_LEVEL(n) (((u64b_t) (n)) << SKEIN_T1_POS_TREE_LVL)
+
+/* tweak word T[1]: block type field */
+#define SKEIN_BLK_TYPE_KEY ( 0) /* key, for MAC and KDF */
+#define SKEIN_BLK_TYPE_CFG ( 4) /* configuration block */
+#define SKEIN_BLK_TYPE_PERS ( 8) /* personalization string */
+#define SKEIN_BLK_TYPE_PK (12) /* public key (for digital signature hashing) */
+#define SKEIN_BLK_TYPE_KDF (16) /* key identifier for KDF */
+#define SKEIN_BLK_TYPE_NONCE (20) /* nonce for PRNG */
+#define SKEIN_BLK_TYPE_MSG (48) /* message processing */
+#define SKEIN_BLK_TYPE_OUT (63) /* output stage */
+#define SKEIN_BLK_TYPE_MASK (63) /* bit field mask */
+
+#define SKEIN_T1_BLK_TYPE(T) (((u64b_t) (SKEIN_BLK_TYPE_##T)) << SKEIN_T1_POS_BLK_TYPE)
+#define SKEIN_T1_BLK_TYPE_KEY SKEIN_T1_BLK_TYPE(KEY) /* key, for MAC and KDF */
+#define SKEIN_T1_BLK_TYPE_CFG SKEIN_T1_BLK_TYPE(CFG) /* configuration block */
+#define SKEIN_T1_BLK_TYPE_PERS SKEIN_T1_BLK_TYPE(PERS) /* personalization string */
+#define SKEIN_T1_BLK_TYPE_PK SKEIN_T1_BLK_TYPE(PK) /* public key (for digital signature hashing) */
+#define SKEIN_T1_BLK_TYPE_KDF SKEIN_T1_BLK_TYPE(KDF) /* key identifier for KDF */
+#define SKEIN_T1_BLK_TYPE_NONCE SKEIN_T1_BLK_TYPE(NONCE)/* nonce for PRNG */
+#define SKEIN_T1_BLK_TYPE_MSG SKEIN_T1_BLK_TYPE(MSG) /* message processing */
+#define SKEIN_T1_BLK_TYPE_OUT SKEIN_T1_BLK_TYPE(OUT) /* output stage */
+#define SKEIN_T1_BLK_TYPE_MASK SKEIN_T1_BLK_TYPE(MASK) /* field bit mask */
+
+#define SKEIN_T1_BLK_TYPE_CFG_FINAL (SKEIN_T1_BLK_TYPE_CFG | SKEIN_T1_FLAG_FINAL)
+#define SKEIN_T1_BLK_TYPE_OUT_FINAL (SKEIN_T1_BLK_TYPE_OUT | SKEIN_T1_FLAG_FINAL)
+
+#define SKEIN_VERSION (1)
+
+#ifndef SKEIN_ID_STRING_LE /* allow compile-time personalization */
+#define SKEIN_ID_STRING_LE (0x33414853) /* "SHA3" (little-endian)*/
+#endif
+
+#define SKEIN_MK_64(hi32,lo32) ((lo32) + (((u64b_t) (hi32)) << 32))
+#define SKEIN_SCHEMA_VER SKEIN_MK_64(SKEIN_VERSION,SKEIN_ID_STRING_LE)
+#define SKEIN_KS_PARITY SKEIN_MK_64(0x1BD11BDA,0xA9FC1A22)
+
+#define SKEIN_CFG_STR_LEN (4*8)
+
+/* bit field definitions in config block treeInfo word */
+#define SKEIN_CFG_TREE_LEAF_SIZE_POS ( 0)
+#define SKEIN_CFG_TREE_NODE_SIZE_POS ( 8)
+#define SKEIN_CFG_TREE_MAX_LEVEL_POS (16)
+
+#define SKEIN_CFG_TREE_LEAF_SIZE_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_LEAF_SIZE_POS)
+#define SKEIN_CFG_TREE_NODE_SIZE_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_NODE_SIZE_POS)
+#define SKEIN_CFG_TREE_MAX_LEVEL_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_MAX_LEVEL_POS)
+
+#define SKEIN_CFG_TREE_INFO(leaf,node,maxLvl) \
+ ( (((u64b_t)(leaf )) << SKEIN_CFG_TREE_LEAF_SIZE_POS) | \
+ (((u64b_t)(node )) << SKEIN_CFG_TREE_NODE_SIZE_POS) | \
+ (((u64b_t)(maxLvl)) << SKEIN_CFG_TREE_MAX_LEVEL_POS) )
+
+#define SKEIN_CFG_TREE_INFO_SEQUENTIAL SKEIN_CFG_TREE_INFO(0,0,0) /* use as treeInfo in InitExt() call for sequential processing */
+
+/*
+** Skein macros for getting/setting tweak words, etc.
+** These are useful for partial input bytes, hash tree init/update, etc.
+**/
+#define Skein_Get_Tweak(ctxPtr,TWK_NUM) ((ctxPtr)->h.T[TWK_NUM])
+#define Skein_Set_Tweak(ctxPtr,TWK_NUM,tVal) {(ctxPtr)->h.T[TWK_NUM] = (tVal);}
+
+#define Skein_Get_T0(ctxPtr) Skein_Get_Tweak(ctxPtr,0)
+#define Skein_Get_T1(ctxPtr) Skein_Get_Tweak(ctxPtr,1)
+#define Skein_Set_T0(ctxPtr,T0) Skein_Set_Tweak(ctxPtr,0,T0)
+#define Skein_Set_T1(ctxPtr,T1) Skein_Set_Tweak(ctxPtr,1,T1)
+
+/* set both tweak words at once */
+#define Skein_Set_T0_T1(ctxPtr,T0,T1) \
+{ \
+ Skein_Set_T0(ctxPtr,(T0)); \
+ Skein_Set_T1(ctxPtr,(T1)); \
+}
+
+#define Skein_Set_Type(ctxPtr,BLK_TYPE) \
+ Skein_Set_T1(ctxPtr,SKEIN_T1_BLK_TYPE_##BLK_TYPE)
+
+/* set up for starting with a new type: h.T[0]=0; h.T[1] = NEW_TYPE; h.bCnt=0; */
+#define Skein_Start_New_Type(ctxPtr,BLK_TYPE) \
+{ Skein_Set_T0_T1(ctxPtr,0,SKEIN_T1_FLAG_FIRST | SKEIN_T1_BLK_TYPE_##BLK_TYPE); (ctxPtr)->h.bCnt=0; }
+
+#define Skein_Clear_First_Flag(hdr) { (hdr).T[1] &= ~SKEIN_T1_FLAG_FIRST; }
+#define Skein_Set_Bit_Pad_Flag(hdr) { (hdr).T[1] |= SKEIN_T1_FLAG_BIT_PAD; }
+
+#define Skein_Set_Tree_Level(hdr,height) { (hdr).T[1] |= SKEIN_T1_TREE_LEVEL(height);}
+
+/*****************************************************************
+** "Internal" Skein definitions for debugging and error checking
+******************************************************************/
+#define Skein_Show_Block(bits,ctx,X,blkPtr,wPtr,ksEvenPtr,ksOddPtr)
+#define Skein_Show_Round(bits,ctx,r,X)
+#define Skein_Show_R_Ptr(bits,ctx,r,X_ptr)
+#define Skein_Show_Final(bits,ctx,cnt,outPtr)
+#define Skein_Show_Key(bits,ctx,key,keyBytes)
+
+
+#ifndef SKEIN_ERR_CHECK /* run-time checks (e.g., bad params, uninitialized context)? */
+#define Skein_Assert(x,retCode)/* default: ignore all Asserts, for performance */
+#define Skein_assert(x)
+#elif defined(SKEIN_ASSERT)
+#include
+#define Skein_Assert(x,retCode) assert(x)
+#define Skein_assert(x) assert(x)
+#else
+#include
+#define Skein_Assert(x,retCode) { if (!(x)) return retCode; } /* caller error */
+#define Skein_assert(x) assert(x) /* internal error */
+#endif
+
+/*****************************************************************
+** Skein block function constants (shared across Ref and Opt code)
+******************************************************************/
+enum
+{
+ /* Skein_256 round rotation constants */
+ R_256_0_0=14, R_256_0_1=16,
+ R_256_1_0=52, R_256_1_1=57,
+ R_256_2_0=23, R_256_2_1=40,
+ R_256_3_0= 5, R_256_3_1=37,
+ R_256_4_0=25, R_256_4_1=33,
+ R_256_5_0=46, R_256_5_1=12,
+ R_256_6_0=58, R_256_6_1=22,
+ R_256_7_0=32, R_256_7_1=32,
+
+ /* Skein_512 round rotation constants */
+ R_512_0_0=46, R_512_0_1=36, R_512_0_2=19, R_512_0_3=37,
+ R_512_1_0=33, R_512_1_1=27, R_512_1_2=14, R_512_1_3=42,
+ R_512_2_0=17, R_512_2_1=49, R_512_2_2=36, R_512_2_3=39,
+ R_512_3_0=44, R_512_3_1= 9, R_512_3_2=54, R_512_3_3=56,
+ R_512_4_0=39, R_512_4_1=30, R_512_4_2=34, R_512_4_3=24,
+ R_512_5_0=13, R_512_5_1=50, R_512_5_2=10, R_512_5_3=17,
+ R_512_6_0=25, R_512_6_1=29, R_512_6_2=39, R_512_6_3=43,
+ R_512_7_0= 8, R_512_7_1=35, R_512_7_2=56, R_512_7_3=22,
+
+ /* Skein1024 round rotation constants */
+ R1024_0_0=24, R1024_0_1=13, R1024_0_2= 8, R1024_0_3=47, R1024_0_4= 8, R1024_0_5=17, R1024_0_6=22, R1024_0_7=37,
+ R1024_1_0=38, R1024_1_1=19, R1024_1_2=10, R1024_1_3=55, R1024_1_4=49, R1024_1_5=18, R1024_1_6=23, R1024_1_7=52,
+ R1024_2_0=33, R1024_2_1= 4, R1024_2_2=51, R1024_2_3=13, R1024_2_4=34, R1024_2_5=41, R1024_2_6=59, R1024_2_7=17,
+ R1024_3_0= 5, R1024_3_1=20, R1024_3_2=48, R1024_3_3=41, R1024_3_4=47, R1024_3_5=28, R1024_3_6=16, R1024_3_7=25,
+ R1024_4_0=41, R1024_4_1= 9, R1024_4_2=37, R1024_4_3=31, R1024_4_4=12, R1024_4_5=47, R1024_4_6=44, R1024_4_7=30,
+ R1024_5_0=16, R1024_5_1=34, R1024_5_2=56, R1024_5_3=51, R1024_5_4= 4, R1024_5_5=53, R1024_5_6=42, R1024_5_7=41,
+ R1024_6_0=31, R1024_6_1=44, R1024_6_2=47, R1024_6_3=46, R1024_6_4=19, R1024_6_5=42, R1024_6_6=44, R1024_6_7=25,
+ R1024_7_0= 9, R1024_7_1=48, R1024_7_2=35, R1024_7_3=52, R1024_7_4=23, R1024_7_5=31, R1024_7_6=37, R1024_7_7=20
+};
+
+#ifndef SKEIN_ROUNDS
+#define SKEIN_256_ROUNDS_TOTAL (72) /* number of rounds for the different block sizes */
+#define SKEIN_512_ROUNDS_TOTAL (72)
+#define SKEIN1024_ROUNDS_TOTAL (80)
+#else /* allow command-line define in range 8*(5..14) */
+#define SKEIN_256_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS/100) + 5) % 10) + 5))
+#define SKEIN_512_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS/ 10) + 5) % 10) + 5))
+#define SKEIN1024_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS ) + 5) % 10) + 5))
+#endif
+
+
+/*
+***************** Pre-computed Skein IVs *******************
+**
+** NOTE: these values are not "magic" constants, but
+** are generated using the Threefish block function.
+** They are pre-computed here only for speed; i.e., to
+** avoid the need for a Threefish call during Init().
+**
+** The IV for any fixed hash length may be pre-computed.
+** Only the most common values are included here.
+**
+************************************************************
+**/
+
+#define MK_64 SKEIN_MK_64
+
+/* blkSize = 256 bits. hashSize = 128 bits */
+const u64b_t SKEIN_256_IV_128[] =
+ {
+ MK_64(0xE1111906,0x964D7260),
+ MK_64(0x883DAAA7,0x7C8D811C),
+ MK_64(0x10080DF4,0x91960F7A),
+ MK_64(0xCCF7DDE5,0xB45BC1C2)
+ };
+
+/* blkSize = 256 bits. hashSize = 160 bits */
+const u64b_t SKEIN_256_IV_160[] =
+ {
+ MK_64(0x14202314,0x72825E98),
+ MK_64(0x2AC4E9A2,0x5A77E590),
+ MK_64(0xD47A5856,0x8838D63E),
+ MK_64(0x2DD2E496,0x8586AB7D)
+ };
+
+/* blkSize = 256 bits. hashSize = 224 bits */
+const u64b_t SKEIN_256_IV_224[] =
+ {
+ MK_64(0xC6098A8C,0x9AE5EA0B),
+ MK_64(0x876D5686,0x08C5191C),
+ MK_64(0x99CB88D7,0xD7F53884),
+ MK_64(0x384BDDB1,0xAEDDB5DE)
+ };
+
+/* blkSize = 256 bits. hashSize = 256 bits */
+const u64b_t SKEIN_256_IV_256[] =
+ {
+ MK_64(0xFC9DA860,0xD048B449),
+ MK_64(0x2FCA6647,0x9FA7D833),
+ MK_64(0xB33BC389,0x6656840F),
+ MK_64(0x6A54E920,0xFDE8DA69)
+ };
+
+/* blkSize = 512 bits. hashSize = 128 bits */
+const u64b_t SKEIN_512_IV_128[] =
+ {
+ MK_64(0xA8BC7BF3,0x6FBF9F52),
+ MK_64(0x1E9872CE,0xBD1AF0AA),
+ MK_64(0x309B1790,0xB32190D3),
+ MK_64(0xBCFBB854,0x3F94805C),
+ MK_64(0x0DA61BCD,0x6E31B11B),
+ MK_64(0x1A18EBEA,0xD46A32E3),
+ MK_64(0xA2CC5B18,0xCE84AA82),
+ MK_64(0x6982AB28,0x9D46982D)
+ };
+
+/* blkSize = 512 bits. hashSize = 160 bits */
+const u64b_t SKEIN_512_IV_160[] =
+ {
+ MK_64(0x28B81A2A,0xE013BD91),
+ MK_64(0xC2F11668,0xB5BDF78F),
+ MK_64(0x1760D8F3,0xF6A56F12),
+ MK_64(0x4FB74758,0x8239904F),
+ MK_64(0x21EDE07F,0x7EAF5056),
+ MK_64(0xD908922E,0x63ED70B8),
+ MK_64(0xB8EC76FF,0xECCB52FA),
+ MK_64(0x01A47BB8,0xA3F27A6E)
+ };
+
+/* blkSize = 512 bits. hashSize = 224 bits */
+const u64b_t SKEIN_512_IV_224[] =
+ {
+ MK_64(0xCCD06162,0x48677224),
+ MK_64(0xCBA65CF3,0xA92339EF),
+ MK_64(0x8CCD69D6,0x52FF4B64),
+ MK_64(0x398AED7B,0x3AB890B4),
+ MK_64(0x0F59D1B1,0x457D2BD0),
+ MK_64(0x6776FE65,0x75D4EB3D),
+ MK_64(0x99FBC70E,0x997413E9),
+ MK_64(0x9E2CFCCF,0xE1C41EF7)
+ };
+
+/* blkSize = 512 bits. hashSize = 256 bits */
+const u64b_t SKEIN_512_IV_256[] =
+ {
+ MK_64(0xCCD044A1,0x2FDB3E13),
+ MK_64(0xE8359030,0x1A79A9EB),
+ MK_64(0x55AEA061,0x4F816E6F),
+ MK_64(0x2A2767A4,0xAE9B94DB),
+ MK_64(0xEC06025E,0x74DD7683),
+ MK_64(0xE7A436CD,0xC4746251),
+ MK_64(0xC36FBAF9,0x393AD185),
+ MK_64(0x3EEDBA18,0x33EDFC13)
+ };
+
+/* blkSize = 512 bits. hashSize = 384 bits */
+const u64b_t SKEIN_512_IV_384[] =
+ {
+ MK_64(0xA3F6C6BF,0x3A75EF5F),
+ MK_64(0xB0FEF9CC,0xFD84FAA4),
+ MK_64(0x9D77DD66,0x3D770CFE),
+ MK_64(0xD798CBF3,0xB468FDDA),
+ MK_64(0x1BC4A666,0x8A0E4465),
+ MK_64(0x7ED7D434,0xE5807407),
+ MK_64(0x548FC1AC,0xD4EC44D6),
+ MK_64(0x266E1754,0x6AA18FF8)
+ };
+
+/* blkSize = 512 bits. hashSize = 512 bits */
+const u64b_t SKEIN_512_IV_512[] =
+ {
+ MK_64(0x4903ADFF,0x749C51CE),
+ MK_64(0x0D95DE39,0x9746DF03),
+ MK_64(0x8FD19341,0x27C79BCE),
+ MK_64(0x9A255629,0xFF352CB1),
+ MK_64(0x5DB62599,0xDF6CA7B0),
+ MK_64(0xEABE394C,0xA9D5C3F4),
+ MK_64(0x991112C7,0x1A75B523),
+ MK_64(0xAE18A40B,0x660FCC33)
+ };
+
+/* blkSize = 1024 bits. hashSize = 384 bits */
+const u64b_t SKEIN1024_IV_384[] =
+ {
+ MK_64(0x5102B6B8,0xC1894A35),
+ MK_64(0xFEEBC9E3,0xFE8AF11A),
+ MK_64(0x0C807F06,0xE32BED71),
+ MK_64(0x60C13A52,0xB41A91F6),
+ MK_64(0x9716D35D,0xD4917C38),
+ MK_64(0xE780DF12,0x6FD31D3A),
+ MK_64(0x797846B6,0xC898303A),
+ MK_64(0xB172C2A8,0xB3572A3B),
+ MK_64(0xC9BC8203,0xA6104A6C),
+ MK_64(0x65909338,0xD75624F4),
+ MK_64(0x94BCC568,0x4B3F81A0),
+ MK_64(0x3EBBF51E,0x10ECFD46),
+ MK_64(0x2DF50F0B,0xEEB08542),
+ MK_64(0x3B5A6530,0x0DBC6516),
+ MK_64(0x484B9CD2,0x167BBCE1),
+ MK_64(0x2D136947,0xD4CBAFEA)
+ };
+
+/* blkSize = 1024 bits. hashSize = 512 bits */
+const u64b_t SKEIN1024_IV_512[] =
+ {
+ MK_64(0xCAEC0E5D,0x7C1B1B18),
+ MK_64(0xA01B0E04,0x5F03E802),
+ MK_64(0x33840451,0xED912885),
+ MK_64(0x374AFB04,0xEAEC2E1C),
+ MK_64(0xDF25A0E2,0x813581F7),
+ MK_64(0xE4004093,0x8B12F9D2),
+ MK_64(0xA662D539,0xC2ED39B6),
+ MK_64(0xFA8B85CF,0x45D8C75A),
+ MK_64(0x8316ED8E,0x29EDE796),
+ MK_64(0x053289C0,0x2E9F91B8),
+ MK_64(0xC3F8EF1D,0x6D518B73),
+ MK_64(0xBDCEC3C4,0xD5EF332E),
+ MK_64(0x549A7E52,0x22974487),
+ MK_64(0x67070872,0x5B749816),
+ MK_64(0xB9CD28FB,0xF0581BD1),
+ MK_64(0x0E2940B8,0x15804974)
+ };
+
+/* blkSize = 1024 bits. hashSize = 1024 bits */
+const u64b_t SKEIN1024_IV_1024[] =
+ {
+ MK_64(0xD593DA07,0x41E72355),
+ MK_64(0x15B5E511,0xAC73E00C),
+ MK_64(0x5180E5AE,0xBAF2C4F0),
+ MK_64(0x03BD41D3,0xFCBCAFAF),
+ MK_64(0x1CAEC6FD,0x1983A898),
+ MK_64(0x6E510B8B,0xCDD0589F),
+ MK_64(0x77E2BDFD,0xC6394ADA),
+ MK_64(0xC11E1DB5,0x24DCB0A3),
+ MK_64(0xD6D14AF9,0xC6329AB5),
+ MK_64(0x6A9B0BFC,0x6EB67E0D),
+ MK_64(0x9243C60D,0xCCFF1332),
+ MK_64(0x1A1F1DDE,0x743F02D4),
+ MK_64(0x0996753C,0x10ED0BB8),
+ MK_64(0x6572DD22,0xF2B4969A),
+ MK_64(0x61FD3062,0xD00A579A),
+ MK_64(0x1DE0536E,0x8682E539)
+ };
+
+
+#ifndef SKEIN_USE_ASM
+#define SKEIN_USE_ASM (0) /* default is all C code (no ASM) */
+#endif
+
+#ifndef SKEIN_LOOP
+#define SKEIN_LOOP 001 /* default: unroll 256 and 512, but not 1024 */
+#endif
+
+#define BLK_BITS (WCNT*64) /* some useful definitions for code here */
+#define KW_TWK_BASE (0)
+#define KW_KEY_BASE (3)
+#define ks (kw + KW_KEY_BASE)
+#define ts (kw + KW_TWK_BASE)
+
+#ifdef SKEIN_DEBUG
+#define DebugSaveTweak(ctx) { ctx->h.T[0] = ts[0]; ctx->h.T[1] = ts[1]; }
+#else
+#define DebugSaveTweak(ctx)
+#endif
+
+/***************************** Skein_256 ******************************/
+#if !(SKEIN_USE_ASM & 256)
+static void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd)
+ { /* do it in C */
+ enum
+ {
+ WCNT = SKEIN_256_STATE_WORDS
+ };
+#undef RCNT
+#define RCNT (SKEIN_256_ROUNDS_TOTAL/8)
+
+#ifdef SKEIN_LOOP /* configure how much to unroll the loop */
+#define SKEIN_UNROLL_256 (((SKEIN_LOOP)/100)%10)
+#else
+#define SKEIN_UNROLL_256 (0)
+#endif
+
+#if SKEIN_UNROLL_256
+#if (RCNT % SKEIN_UNROLL_256)
+#error "Invalid SKEIN_UNROLL_256" /* sanity check on unroll count */
+#endif
+ size_t r;
+ u64b_t kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/
+#else
+ u64b_t kw[WCNT+4]; /* key schedule words : chaining vars + tweak */
+#endif
+ u64b_t X0,X1,X2,X3; /* local copy of context vars, for speed */
+ u64b_t w [WCNT]; /* local copy of input block */
+#ifdef SKEIN_DEBUG
+ const u64b_t *Xptr[4]; /* use for debugging (help compiler put Xn in registers) */
+ Xptr[0] = &X0; Xptr[1] = &X1; Xptr[2] = &X2; Xptr[3] = &X3;
+#endif
+ Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */
+ ts[0] = ctx->h.T[0];
+ ts[1] = ctx->h.T[1];
+ do {
+ /* this implementation only supports 2**64 input bytes (no carry out here) */
+ ts[0] += byteCntAdd; /* update processed length */
+
+ /* precompute the key schedule for this block */
+ ks[0] = ctx->X[0];
+ ks[1] = ctx->X[1];
+ ks[2] = ctx->X[2];
+ ks[3] = ctx->X[3];
+ ks[4] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ SKEIN_KS_PARITY;
+
+ ts[2] = ts[0] ^ ts[1];
+
+ Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */
+ DebugSaveTweak(ctx);
+ Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts);
+
+ X0 = w[0] + ks[0]; /* do the first full key injection */
+ X1 = w[1] + ks[1] + ts[0];
+ X2 = w[2] + ks[2] + ts[1];
+ X3 = w[3] + ks[3];
+
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr); /* show starting state values */
+
+ blkPtr += SKEIN_256_BLOCK_BYTES;
+
+ /* run the rounds */
+
+#define Round256(p0,p1,p2,p3,ROT,rNum) \
+ X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \
+ X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \
+
+#if SKEIN_UNROLL_256 == 0
+#define R256(p0,p1,p2,p3,ROT,rNum) /* fully unrolled */ \
+ Round256(p0,p1,p2,p3,ROT,rNum) \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr);
+
+#define I256(R) \
+ X0 += ks[((R)+1) % 5]; /* inject the key schedule value */ \
+ X1 += ks[((R)+2) % 5] + ts[((R)+1) % 3]; \
+ X2 += ks[((R)+3) % 5] + ts[((R)+2) % 3]; \
+ X3 += ks[((R)+4) % 5] + (R)+1; \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
+#else /* looping version */
+#define R256(p0,p1,p2,p3,ROT,rNum) \
+ Round256(p0,p1,p2,p3,ROT,rNum) \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rNum,Xptr);
+
+#define I256(R) \
+ X0 += ks[r+(R)+0]; /* inject the key schedule value */ \
+ X1 += ks[r+(R)+1] + ts[r+(R)+0]; \
+ X2 += ks[r+(R)+2] + ts[r+(R)+1]; \
+ X3 += ks[r+(R)+3] + r+(R) ; \
+ ks[r + (R)+4 ] = ks[r+(R)-1]; /* rotate key schedule */\
+ ts[r + (R)+2 ] = ts[r+(R)-1]; \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
+
+ for (r=1;r < 2*RCNT;r+=2*SKEIN_UNROLL_256) /* loop thru it */
+#endif
+ {
+#define R256_8_rounds(R) \
+ R256(0,1,2,3,R_256_0,8*(R) + 1); \
+ R256(0,3,2,1,R_256_1,8*(R) + 2); \
+ R256(0,1,2,3,R_256_2,8*(R) + 3); \
+ R256(0,3,2,1,R_256_3,8*(R) + 4); \
+ I256(2*(R)); \
+ R256(0,1,2,3,R_256_4,8*(R) + 5); \
+ R256(0,3,2,1,R_256_5,8*(R) + 6); \
+ R256(0,1,2,3,R_256_6,8*(R) + 7); \
+ R256(0,3,2,1,R_256_7,8*(R) + 8); \
+ I256(2*(R)+1);
+
+ R256_8_rounds( 0);
+
+#define R256_Unroll_R(NN) ((SKEIN_UNROLL_256 == 0 && SKEIN_256_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_256 > (NN)))
+
+ #if R256_Unroll_R( 1)
+ R256_8_rounds( 1);
+ #endif
+ #if R256_Unroll_R( 2)
+ R256_8_rounds( 2);
+ #endif
+ #if R256_Unroll_R( 3)
+ R256_8_rounds( 3);
+ #endif
+ #if R256_Unroll_R( 4)
+ R256_8_rounds( 4);
+ #endif
+ #if R256_Unroll_R( 5)
+ R256_8_rounds( 5);
+ #endif
+ #if R256_Unroll_R( 6)
+ R256_8_rounds( 6);
+ #endif
+ #if R256_Unroll_R( 7)
+ R256_8_rounds( 7);
+ #endif
+ #if R256_Unroll_R( 8)
+ R256_8_rounds( 8);
+ #endif
+ #if R256_Unroll_R( 9)
+ R256_8_rounds( 9);
+ #endif
+ #if R256_Unroll_R(10)
+ R256_8_rounds(10);
+ #endif
+ #if R256_Unroll_R(11)
+ R256_8_rounds(11);
+ #endif
+ #if R256_Unroll_R(12)
+ R256_8_rounds(12);
+ #endif
+ #if R256_Unroll_R(13)
+ R256_8_rounds(13);
+ #endif
+ #if R256_Unroll_R(14)
+ R256_8_rounds(14);
+ #endif
+ #if (SKEIN_UNROLL_256 > 14)
+#error "need more unrolling in Skein_256_Process_Block"
+ #endif
+ }
+ /* do the final "feedforward" xor, update context chaining vars */
+ ctx->X[0] = X0 ^ w[0];
+ ctx->X[1] = X1 ^ w[1];
+ ctx->X[2] = X2 ^ w[2];
+ ctx->X[3] = X3 ^ w[3];
+
+ Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X);
+
+ ts[1] &= ~SKEIN_T1_FLAG_FIRST;
+ }
+ while (--blkCnt);
+ ctx->h.T[0] = ts[0];
+ ctx->h.T[1] = ts[1];
+ }
+
+#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+static size_t Skein_256_Process_Block_CodeSize(void)
+ {
+ return ((u08b_t *) Skein_256_Process_Block_CodeSize) -
+ ((u08b_t *) Skein_256_Process_Block);
+ }
+static uint_t Skein_256_Unroll_Cnt(void)
+ {
+ return SKEIN_UNROLL_256;
+ }
+#endif
+#endif
+
+/***************************** Skein_512 ******************************/
+#if !(SKEIN_USE_ASM & 512)
+static void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd)
+ { /* do it in C */
+ enum
+ {
+ WCNT = SKEIN_512_STATE_WORDS
+ };
+#undef RCNT
+#define RCNT (SKEIN_512_ROUNDS_TOTAL/8)
+
+#ifdef SKEIN_LOOP /* configure how much to unroll the loop */
+#define SKEIN_UNROLL_512 (((SKEIN_LOOP)/10)%10)
+#else
+#define SKEIN_UNROLL_512 (0)
+#endif
+
+#if SKEIN_UNROLL_512
+#if (RCNT % SKEIN_UNROLL_512)
+#error "Invalid SKEIN_UNROLL_512" /* sanity check on unroll count */
+#endif
+ size_t r;
+ u64b_t kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/
+#else
+ u64b_t kw[WCNT+4]; /* key schedule words : chaining vars + tweak */
+#endif
+ u64b_t X0,X1,X2,X3,X4,X5,X6,X7; /* local copy of vars, for speed */
+ u64b_t w [WCNT]; /* local copy of input block */
+#ifdef SKEIN_DEBUG
+ const u64b_t *Xptr[8]; /* use for debugging (help compiler put Xn in registers) */
+ Xptr[0] = &X0; Xptr[1] = &X1; Xptr[2] = &X2; Xptr[3] = &X3;
+ Xptr[4] = &X4; Xptr[5] = &X5; Xptr[6] = &X6; Xptr[7] = &X7;
+#endif
+
+ Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */
+ ts[0] = ctx->h.T[0];
+ ts[1] = ctx->h.T[1];
+ do {
+ /* this implementation only supports 2**64 input bytes (no carry out here) */
+ ts[0] += byteCntAdd; /* update processed length */
+
+ /* precompute the key schedule for this block */
+ ks[0] = ctx->X[0];
+ ks[1] = ctx->X[1];
+ ks[2] = ctx->X[2];
+ ks[3] = ctx->X[3];
+ ks[4] = ctx->X[4];
+ ks[5] = ctx->X[5];
+ ks[6] = ctx->X[6];
+ ks[7] = ctx->X[7];
+ ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
+ ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY;
+
+ ts[2] = ts[0] ^ ts[1];
+
+ Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */
+ DebugSaveTweak(ctx);
+ Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts);
+
+ X0 = w[0] + ks[0]; /* do the first full key injection */
+ X1 = w[1] + ks[1];
+ X2 = w[2] + ks[2];
+ X3 = w[3] + ks[3];
+ X4 = w[4] + ks[4];
+ X5 = w[5] + ks[5] + ts[0];
+ X6 = w[6] + ks[6] + ts[1];
+ X7 = w[7] + ks[7];
+
+ blkPtr += SKEIN_512_BLOCK_BYTES;
+
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr);
+ /* run the rounds */
+#define Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \
+ X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \
+ X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \
+ X##p4 += X##p5; X##p5 = RotL_64(X##p5,ROT##_2); X##p5 ^= X##p4; \
+ X##p6 += X##p7; X##p7 = RotL_64(X##p7,ROT##_3); X##p7 ^= X##p6; \
+
+#if SKEIN_UNROLL_512 == 0
+#define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) /* unrolled */ \
+ Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr);
+
+#define I512(R) \
+ X0 += ks[((R)+1) % 9]; /* inject the key schedule value */ \
+ X1 += ks[((R)+2) % 9]; \
+ X2 += ks[((R)+3) % 9]; \
+ X3 += ks[((R)+4) % 9]; \
+ X4 += ks[((R)+5) % 9]; \
+ X5 += ks[((R)+6) % 9] + ts[((R)+1) % 3]; \
+ X6 += ks[((R)+7) % 9] + ts[((R)+2) % 3]; \
+ X7 += ks[((R)+8) % 9] + (R)+1; \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
+#else /* looping version */
+#define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \
+ Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rNum,Xptr);
+
+#define I512(R) \
+ X0 += ks[r+(R)+0]; /* inject the key schedule value */ \
+ X1 += ks[r+(R)+1]; \
+ X2 += ks[r+(R)+2]; \
+ X3 += ks[r+(R)+3]; \
+ X4 += ks[r+(R)+4]; \
+ X5 += ks[r+(R)+5] + ts[r+(R)+0]; \
+ X6 += ks[r+(R)+6] + ts[r+(R)+1]; \
+ X7 += ks[r+(R)+7] + r+(R) ; \
+ ks[r + (R)+8] = ks[r+(R)-1]; /* rotate key schedule */ \
+ ts[r + (R)+2] = ts[r+(R)-1]; \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
+
+ for (r=1;r < 2*RCNT;r+=2*SKEIN_UNROLL_512) /* loop thru it */
+#endif /* end of looped code definitions */
+ {
+#define R512_8_rounds(R) /* do 8 full rounds */ \
+ R512(0,1,2,3,4,5,6,7,R_512_0,8*(R)+ 1); \
+ R512(2,1,4,7,6,5,0,3,R_512_1,8*(R)+ 2); \
+ R512(4,1,6,3,0,5,2,7,R_512_2,8*(R)+ 3); \
+ R512(6,1,0,7,2,5,4,3,R_512_3,8*(R)+ 4); \
+ I512(2*(R)); \
+ R512(0,1,2,3,4,5,6,7,R_512_4,8*(R)+ 5); \
+ R512(2,1,4,7,6,5,0,3,R_512_5,8*(R)+ 6); \
+ R512(4,1,6,3,0,5,2,7,R_512_6,8*(R)+ 7); \
+ R512(6,1,0,7,2,5,4,3,R_512_7,8*(R)+ 8); \
+ I512(2*(R)+1); /* and key injection */
+
+ R512_8_rounds( 0);
+
+#define R512_Unroll_R(NN) ((SKEIN_UNROLL_512 == 0 && SKEIN_512_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_512 > (NN)))
+
+ #if R512_Unroll_R( 1)
+ R512_8_rounds( 1);
+ #endif
+ #if R512_Unroll_R( 2)
+ R512_8_rounds( 2);
+ #endif
+ #if R512_Unroll_R( 3)
+ R512_8_rounds( 3);
+ #endif
+ #if R512_Unroll_R( 4)
+ R512_8_rounds( 4);
+ #endif
+ #if R512_Unroll_R( 5)
+ R512_8_rounds( 5);
+ #endif
+ #if R512_Unroll_R( 6)
+ R512_8_rounds( 6);
+ #endif
+ #if R512_Unroll_R( 7)
+ R512_8_rounds( 7);
+ #endif
+ #if R512_Unroll_R( 8)
+ R512_8_rounds( 8);
+ #endif
+ #if R512_Unroll_R( 9)
+ R512_8_rounds( 9);
+ #endif
+ #if R512_Unroll_R(10)
+ R512_8_rounds(10);
+ #endif
+ #if R512_Unroll_R(11)
+ R512_8_rounds(11);
+ #endif
+ #if R512_Unroll_R(12)
+ R512_8_rounds(12);
+ #endif
+ #if R512_Unroll_R(13)
+ R512_8_rounds(13);
+ #endif
+ #if R512_Unroll_R(14)
+ R512_8_rounds(14);
+ #endif
+ #if (SKEIN_UNROLL_512 > 14)
+#error "need more unrolling in Skein_512_Process_Block"
+ #endif
+ }
+
+ /* do the final "feedforward" xor, update context chaining vars */
+ ctx->X[0] = X0 ^ w[0];
+ ctx->X[1] = X1 ^ w[1];
+ ctx->X[2] = X2 ^ w[2];
+ ctx->X[3] = X3 ^ w[3];
+ ctx->X[4] = X4 ^ w[4];
+ ctx->X[5] = X5 ^ w[5];
+ ctx->X[6] = X6 ^ w[6];
+ ctx->X[7] = X7 ^ w[7];
+ Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X);
+
+ ts[1] &= ~SKEIN_T1_FLAG_FIRST;
+ }
+ while (--blkCnt);
+ ctx->h.T[0] = ts[0];
+ ctx->h.T[1] = ts[1];
+ }
+
+#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+static size_t Skein_512_Process_Block_CodeSize(void)
+ {
+ return ((u08b_t *) Skein_512_Process_Block_CodeSize) -
+ ((u08b_t *) Skein_512_Process_Block);
+ }
+static uint_t Skein_512_Unroll_Cnt(void)
+ {
+ return SKEIN_UNROLL_512;
+ }
+#endif
+#endif
+
+/***************************** Skein1024 ******************************/
+#if !(SKEIN_USE_ASM & 1024)
+static void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd)
+ { /* do it in C, always looping (unrolled is bigger AND slower!) */
+ enum
+ {
+ WCNT = SKEIN1024_STATE_WORDS
+ };
+#undef RCNT
+#define RCNT (SKEIN1024_ROUNDS_TOTAL/8)
+
+#ifdef SKEIN_LOOP /* configure how much to unroll the loop */
+#define SKEIN_UNROLL_1024 ((SKEIN_LOOP)%10)
+#else
+#define SKEIN_UNROLL_1024 (0)
+#endif
+
+#if (SKEIN_UNROLL_1024 != 0)
+#if (RCNT % SKEIN_UNROLL_1024)
+#error "Invalid SKEIN_UNROLL_1024" /* sanity check on unroll count */
+#endif
+ size_t r;
+ u64b_t kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/
+#else
+ u64b_t kw[WCNT+4]; /* key schedule words : chaining vars + tweak */
+#endif
+
+ u64b_t X00,X01,X02,X03,X04,X05,X06,X07, /* local copy of vars, for speed */
+ X08,X09,X10,X11,X12,X13,X14,X15;
+ u64b_t w [WCNT]; /* local copy of input block */
+#ifdef SKEIN_DEBUG
+ const u64b_t *Xptr[16]; /* use for debugging (help compiler put Xn in registers) */
+ Xptr[ 0] = &X00; Xptr[ 1] = &X01; Xptr[ 2] = &X02; Xptr[ 3] = &X03;
+ Xptr[ 4] = &X04; Xptr[ 5] = &X05; Xptr[ 6] = &X06; Xptr[ 7] = &X07;
+ Xptr[ 8] = &X08; Xptr[ 9] = &X09; Xptr[10] = &X10; Xptr[11] = &X11;
+ Xptr[12] = &X12; Xptr[13] = &X13; Xptr[14] = &X14; Xptr[15] = &X15;
+#endif
+
+ Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */
+ ts[0] = ctx->h.T[0];
+ ts[1] = ctx->h.T[1];
+ do {
+ /* this implementation only supports 2**64 input bytes (no carry out here) */
+ ts[0] += byteCntAdd; /* update processed length */
+
+ /* precompute the key schedule for this block */
+ ks[ 0] = ctx->X[ 0];
+ ks[ 1] = ctx->X[ 1];
+ ks[ 2] = ctx->X[ 2];
+ ks[ 3] = ctx->X[ 3];
+ ks[ 4] = ctx->X[ 4];
+ ks[ 5] = ctx->X[ 5];
+ ks[ 6] = ctx->X[ 6];
+ ks[ 7] = ctx->X[ 7];
+ ks[ 8] = ctx->X[ 8];
+ ks[ 9] = ctx->X[ 9];
+ ks[10] = ctx->X[10];
+ ks[11] = ctx->X[11];
+ ks[12] = ctx->X[12];
+ ks[13] = ctx->X[13];
+ ks[14] = ctx->X[14];
+ ks[15] = ctx->X[15];
+ ks[16] = ks[ 0] ^ ks[ 1] ^ ks[ 2] ^ ks[ 3] ^
+ ks[ 4] ^ ks[ 5] ^ ks[ 6] ^ ks[ 7] ^
+ ks[ 8] ^ ks[ 9] ^ ks[10] ^ ks[11] ^
+ ks[12] ^ ks[13] ^ ks[14] ^ ks[15] ^ SKEIN_KS_PARITY;
+
+ ts[2] = ts[0] ^ ts[1];
+
+ Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */
+ DebugSaveTweak(ctx);
+ Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts);
+
+ X00 = w[ 0] + ks[ 0]; /* do the first full key injection */
+ X01 = w[ 1] + ks[ 1];
+ X02 = w[ 2] + ks[ 2];
+ X03 = w[ 3] + ks[ 3];
+ X04 = w[ 4] + ks[ 4];
+ X05 = w[ 5] + ks[ 5];
+ X06 = w[ 6] + ks[ 6];
+ X07 = w[ 7] + ks[ 7];
+ X08 = w[ 8] + ks[ 8];
+ X09 = w[ 9] + ks[ 9];
+ X10 = w[10] + ks[10];
+ X11 = w[11] + ks[11];
+ X12 = w[12] + ks[12];
+ X13 = w[13] + ks[13] + ts[0];
+ X14 = w[14] + ks[14] + ts[1];
+ X15 = w[15] + ks[15];
+
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr);
+
+#define Round1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rNum) \
+ X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \
+ X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \
+ X##p4 += X##p5; X##p5 = RotL_64(X##p5,ROT##_2); X##p5 ^= X##p4; \
+ X##p6 += X##p7; X##p7 = RotL_64(X##p7,ROT##_3); X##p7 ^= X##p6; \
+ X##p8 += X##p9; X##p9 = RotL_64(X##p9,ROT##_4); X##p9 ^= X##p8; \
+ X##pA += X##pB; X##pB = RotL_64(X##pB,ROT##_5); X##pB ^= X##pA; \
+ X##pC += X##pD; X##pD = RotL_64(X##pD,ROT##_6); X##pD ^= X##pC; \
+ X##pE += X##pF; X##pF = RotL_64(X##pF,ROT##_7); X##pF ^= X##pE; \
+
+#if SKEIN_UNROLL_1024 == 0
+#define R1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \
+ Round1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rn,Xptr);
+
+#define I1024(R) \
+ X00 += ks[((R)+ 1) % 17]; /* inject the key schedule value */ \
+ X01 += ks[((R)+ 2) % 17]; \
+ X02 += ks[((R)+ 3) % 17]; \
+ X03 += ks[((R)+ 4) % 17]; \
+ X04 += ks[((R)+ 5) % 17]; \
+ X05 += ks[((R)+ 6) % 17]; \
+ X06 += ks[((R)+ 7) % 17]; \
+ X07 += ks[((R)+ 8) % 17]; \
+ X08 += ks[((R)+ 9) % 17]; \
+ X09 += ks[((R)+10) % 17]; \
+ X10 += ks[((R)+11) % 17]; \
+ X11 += ks[((R)+12) % 17]; \
+ X12 += ks[((R)+13) % 17]; \
+ X13 += ks[((R)+14) % 17] + ts[((R)+1) % 3]; \
+ X14 += ks[((R)+15) % 17] + ts[((R)+2) % 3]; \
+ X15 += ks[((R)+16) % 17] + (R)+1; \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
+#else /* looping version */
+#define R1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \
+ Round1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rn,Xptr);
+
+#define I1024(R) \
+ X00 += ks[r+(R)+ 0]; /* inject the key schedule value */ \
+ X01 += ks[r+(R)+ 1]; \
+ X02 += ks[r+(R)+ 2]; \
+ X03 += ks[r+(R)+ 3]; \
+ X04 += ks[r+(R)+ 4]; \
+ X05 += ks[r+(R)+ 5]; \
+ X06 += ks[r+(R)+ 6]; \
+ X07 += ks[r+(R)+ 7]; \
+ X08 += ks[r+(R)+ 8]; \
+ X09 += ks[r+(R)+ 9]; \
+ X10 += ks[r+(R)+10]; \
+ X11 += ks[r+(R)+11]; \
+ X12 += ks[r+(R)+12]; \
+ X13 += ks[r+(R)+13] + ts[r+(R)+0]; \
+ X14 += ks[r+(R)+14] + ts[r+(R)+1]; \
+ X15 += ks[r+(R)+15] + r+(R) ; \
+ ks[r + (R)+16] = ks[r+(R)-1]; /* rotate key schedule */ \
+ ts[r + (R)+ 2] = ts[r+(R)-1]; \
+ Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
+
+ for (r=1;r <= 2*RCNT;r+=2*SKEIN_UNROLL_1024) /* loop thru it */
+#endif
+ {
+#define R1024_8_rounds(R) /* do 8 full rounds */ \
+ R1024(00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,R1024_0,8*(R) + 1); \
+ R1024(00,09,02,13,06,11,04,15,10,07,12,03,14,05,08,01,R1024_1,8*(R) + 2); \
+ R1024(00,07,02,05,04,03,06,01,12,15,14,13,08,11,10,09,R1024_2,8*(R) + 3); \
+ R1024(00,15,02,11,06,13,04,09,14,01,08,05,10,03,12,07,R1024_3,8*(R) + 4); \
+ I1024(2*(R)); \
+ R1024(00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,R1024_4,8*(R) + 5); \
+ R1024(00,09,02,13,06,11,04,15,10,07,12,03,14,05,08,01,R1024_5,8*(R) + 6); \
+ R1024(00,07,02,05,04,03,06,01,12,15,14,13,08,11,10,09,R1024_6,8*(R) + 7); \
+ R1024(00,15,02,11,06,13,04,09,14,01,08,05,10,03,12,07,R1024_7,8*(R) + 8); \
+ I1024(2*(R)+1);
+
+ R1024_8_rounds( 0);
+
+#define R1024_Unroll_R(NN) ((SKEIN_UNROLL_1024 == 0 && SKEIN1024_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_1024 > (NN)))
+
+ #if R1024_Unroll_R( 1)
+ R1024_8_rounds( 1);
+ #endif
+ #if R1024_Unroll_R( 2)
+ R1024_8_rounds( 2);
+ #endif
+ #if R1024_Unroll_R( 3)
+ R1024_8_rounds( 3);
+ #endif
+ #if R1024_Unroll_R( 4)
+ R1024_8_rounds( 4);
+ #endif
+ #if R1024_Unroll_R( 5)
+ R1024_8_rounds( 5);
+ #endif
+ #if R1024_Unroll_R( 6)
+ R1024_8_rounds( 6);
+ #endif
+ #if R1024_Unroll_R( 7)
+ R1024_8_rounds( 7);
+ #endif
+ #if R1024_Unroll_R( 8)
+ R1024_8_rounds( 8);
+ #endif
+ #if R1024_Unroll_R( 9)
+ R1024_8_rounds( 9);
+ #endif
+ #if R1024_Unroll_R(10)
+ R1024_8_rounds(10);
+ #endif
+ #if R1024_Unroll_R(11)
+ R1024_8_rounds(11);
+ #endif
+ #if R1024_Unroll_R(12)
+ R1024_8_rounds(12);
+ #endif
+ #if R1024_Unroll_R(13)
+ R1024_8_rounds(13);
+ #endif
+ #if R1024_Unroll_R(14)
+ R1024_8_rounds(14);
+ #endif
+ #if (SKEIN_UNROLL_1024 > 14)
+#error "need more unrolling in Skein_1024_Process_Block"
+ #endif
+ }
+ /* do the final "feedforward" xor, update context chaining vars */
+
+ ctx->X[ 0] = X00 ^ w[ 0];
+ ctx->X[ 1] = X01 ^ w[ 1];
+ ctx->X[ 2] = X02 ^ w[ 2];
+ ctx->X[ 3] = X03 ^ w[ 3];
+ ctx->X[ 4] = X04 ^ w[ 4];
+ ctx->X[ 5] = X05 ^ w[ 5];
+ ctx->X[ 6] = X06 ^ w[ 6];
+ ctx->X[ 7] = X07 ^ w[ 7];
+ ctx->X[ 8] = X08 ^ w[ 8];
+ ctx->X[ 9] = X09 ^ w[ 9];
+ ctx->X[10] = X10 ^ w[10];
+ ctx->X[11] = X11 ^ w[11];
+ ctx->X[12] = X12 ^ w[12];
+ ctx->X[13] = X13 ^ w[13];
+ ctx->X[14] = X14 ^ w[14];
+ ctx->X[15] = X15 ^ w[15];
+
+ Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X);
+
+ ts[1] &= ~SKEIN_T1_FLAG_FIRST;
+ blkPtr += SKEIN1024_BLOCK_BYTES;
+ }
+ while (--blkCnt);
+ ctx->h.T[0] = ts[0];
+ ctx->h.T[1] = ts[1];
+ }
+
+#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+static size_t Skein1024_Process_Block_CodeSize(void)
+ {
+ return ((u08b_t *) Skein1024_Process_Block_CodeSize) -
+ ((u08b_t *) Skein1024_Process_Block);
+ }
+static uint_t Skein1024_Unroll_Cnt(void)
+ {
+ return SKEIN_UNROLL_1024;
+ }
+#endif
+#endif
+
+
+#if 0
+/*****************************************************************/
+/* 256-bit Skein */
+/*****************************************************************/
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* init the context for a straight hashing operation */
+static int Skein_256_Init(Skein_256_Ctxt_t *ctx, size_t hashBitLen)
+ {
+ union
+ {
+ u08b_t b[SKEIN_256_STATE_BYTES];
+ u64b_t w[SKEIN_256_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+
+ switch (hashBitLen)
+ { /* use pre-computed values, where available */
+#ifndef SKEIN_NO_PRECOMP
+ case 256: memcpy(ctx->X,SKEIN_256_IV_256,sizeof(ctx->X)); break;
+ case 224: memcpy(ctx->X,SKEIN_256_IV_224,sizeof(ctx->X)); break;
+ case 160: memcpy(ctx->X,SKEIN_256_IV_160,sizeof(ctx->X)); break;
+ case 128: memcpy(ctx->X,SKEIN_256_IV_128,sizeof(ctx->X)); break;
+#endif
+ default:
+ /* here if there is no precomputed IV value available */
+ /* build/process the config block, type == CONFIG (could be precomputed) */
+ Skein_Start_New_Type(ctx,CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */
+
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */
+ cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
+ cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
+ memset(&cfg.w[3],0,sizeof(cfg) - 3*sizeof(cfg.w[0])); /* zero pad config block */
+
+ /* compute the initial chaining values from config block */
+ memset(ctx->X,0,sizeof(ctx->X)); /* zero the chaining variables */
+ Skein_256_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN);
+ break;
+ }
+ /* The chaining vars ctx->X are now initialized for the given hashBitLen. */
+ /* Set up to process the data message portion of the hash (default) */
+ Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type */
+
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* init the context for a MAC and/or tree hash operation */
+/* [identical to Skein_256_Init() when keyBytes == 0 && treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */
+static int Skein_256_InitExt(Skein_256_Ctxt_t *ctx,size_t hashBitLen,u64b_t treeInfo, const u08b_t *key, size_t keyBytes)
+ {
+ union
+ {
+ u08b_t b[SKEIN_256_STATE_BYTES];
+ u64b_t w[SKEIN_256_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
+ Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL);
+
+ /* compute the initial chaining values ctx->X[], based on key */
+ if (keyBytes == 0) /* is there a key? */
+ {
+ memset(ctx->X,0,sizeof(ctx->X)); /* no key: use all zeroes as key for config block */
+ }
+ else /* here to pre-process a key */
+ {
+ Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X));
+ /* do a mini-Init right here */
+ ctx->h.hashBitLen=8*sizeof(ctx->X); /* set output hash bit count = state size */
+ Skein_Start_New_Type(ctx,KEY); /* set tweaks: T0 = 0; T1 = KEY type */
+ memset(ctx->X,0,sizeof(ctx->X)); /* zero the initial chaining variables */
+ Skein_256_Update(ctx,key,keyBytes); /* hash the key */
+ Skein_256_Final_Pad(ctx,cfg.b); /* put result into cfg.b[] */
+ memcpy(ctx->X,cfg.b,sizeof(cfg.b)); /* copy over into ctx->X[] */
+#if SKEIN_NEED_SWAP
+ {
+ uint_t i;
+ for (i=0;iX[i] = Skein_Swap64(ctx->X[i]);
+ }
+#endif
+ }
+ /* build/process the config block, type == CONFIG (could be precomputed for each key) */
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+ Skein_Start_New_Type(ctx,CFG_FINAL);
+
+ memset(&cfg.w,0,sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
+ cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
+ cfg.w[2] = Skein_Swap64(treeInfo); /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */
+
+ Skein_Show_Key(256,&ctx->h,key,keyBytes);
+
+ /* compute the initial chaining values from config block */
+ Skein_256_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN);
+
+ /* The chaining vars ctx->X are now initialized */
+ /* Set up to process the data message portion of the hash (default) */
+ ctx->h.bCnt = 0; /* buffer b[] starts out empty */
+ Skein_Start_New_Type(ctx,MSG);
+
+ return SKEIN_SUCCESS;
+ }
+#endif
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* process the input bytes */
+static int Skein_256_Update(Skein_256_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt)
+ {
+ size_t n;
+
+ Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ /* process full blocks, if any */
+ if (msgByteCnt + ctx->h.bCnt > SKEIN_256_BLOCK_BYTES)
+ {
+ if (ctx->h.bCnt) /* finish up any buffered message data */
+ {
+ n = SKEIN_256_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */
+ if (n)
+ {
+ Skein_assert(n < msgByteCnt); /* check on our logic here */
+ memcpy(&ctx->b[ctx->h.bCnt],msg,n);
+ msgByteCnt -= n;
+ msg += n;
+ ctx->h.bCnt += n;
+ }
+ Skein_assert(ctx->h.bCnt == SKEIN_256_BLOCK_BYTES);
+ Skein_256_Process_Block(ctx,ctx->b,1,SKEIN_256_BLOCK_BYTES);
+ ctx->h.bCnt = 0;
+ }
+ /* now process any remaining full blocks, directly from input message data */
+ if (msgByteCnt > SKEIN_256_BLOCK_BYTES)
+ {
+ n = (msgByteCnt-1) / SKEIN_256_BLOCK_BYTES; /* number of full blocks to process */
+ Skein_256_Process_Block(ctx,msg,n,SKEIN_256_BLOCK_BYTES);
+ msgByteCnt -= n * SKEIN_256_BLOCK_BYTES;
+ msg += n * SKEIN_256_BLOCK_BYTES;
+ }
+ Skein_assert(ctx->h.bCnt == 0);
+ }
+
+ /* copy any remaining source message data bytes into b[] */
+ if (msgByteCnt)
+ {
+ Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES);
+ memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt);
+ ctx->h.bCnt += msgByteCnt;
+ }
+
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* finalize the hash computation and output the result */
+static int Skein_256_Final(Skein_256_Ctxt_t *ctx, u08b_t *hashVal)
+ {
+ size_t i,n,byteCnt;
+ u64b_t X[SKEIN_256_STATE_WORDS];
+ Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
+ if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES) /* zero pad b[] if necessary */
+ memset(&ctx->b[ctx->h.bCnt],0,SKEIN_256_BLOCK_BYTES - ctx->h.bCnt);
+
+ Skein_256_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */
+
+ /* now output the result */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */
+
+ /* run Threefish in "counter mode" to generate output */
+ memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */
+ memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */
+ for (i=0;i*SKEIN_256_BLOCK_BYTES < byteCnt;i++)
+ {
+ ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */
+ Skein_Start_New_Type(ctx,OUT_FINAL);
+ Skein_256_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */
+ n = byteCnt - i*SKEIN_256_BLOCK_BYTES; /* number of output bytes left to go */
+ if (n >= SKEIN_256_BLOCK_BYTES)
+ n = SKEIN_256_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal+i*SKEIN_256_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN_256_BLOCK_BYTES);
+ memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */
+ }
+ return SKEIN_SUCCESS;
+ }
+
+#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+static size_t Skein_256_API_CodeSize(void)
+ {
+ return ((u08b_t *) Skein_256_API_CodeSize) -
+ ((u08b_t *) Skein_256_Init);
+ }
+#endif
+
+/*****************************************************************/
+/* 512-bit Skein */
+/*****************************************************************/
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* init the context for a straight hashing operation */
+static int Skein_512_Init(Skein_512_Ctxt_t *ctx, size_t hashBitLen)
+ {
+ union
+ {
+ u08b_t b[SKEIN_512_STATE_BYTES];
+ u64b_t w[SKEIN_512_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+
+ switch (hashBitLen)
+ { /* use pre-computed values, where available */
+#ifndef SKEIN_NO_PRECOMP
+ case 512: memcpy(ctx->X,SKEIN_512_IV_512,sizeof(ctx->X)); break;
+ case 384: memcpy(ctx->X,SKEIN_512_IV_384,sizeof(ctx->X)); break;
+ case 256: memcpy(ctx->X,SKEIN_512_IV_256,sizeof(ctx->X)); break;
+ case 224: memcpy(ctx->X,SKEIN_512_IV_224,sizeof(ctx->X)); break;
+#endif
+ default:
+ /* here if there is no precomputed IV value available */
+ /* build/process the config block, type == CONFIG (could be precomputed) */
+ Skein_Start_New_Type(ctx,CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */
+
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */
+ cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
+ cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
+ memset(&cfg.w[3],0,sizeof(cfg) - 3*sizeof(cfg.w[0])); /* zero pad config block */
+
+ /* compute the initial chaining values from config block */
+ memset(ctx->X,0,sizeof(ctx->X)); /* zero the chaining variables */
+ Skein_512_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN);
+ break;
+ }
+
+ /* The chaining vars ctx->X are now initialized for the given hashBitLen. */
+ /* Set up to process the data message portion of the hash (default) */
+ Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type */
+
+ return SKEIN_SUCCESS;
+ }
+
+#if 0
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* init the context for a MAC and/or tree hash operation */
+/* [identical to Skein_512_Init() when keyBytes == 0 && treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */
+static int Skein_512_InitExt(Skein_512_Ctxt_t *ctx,size_t hashBitLen,u64b_t treeInfo, const u08b_t *key, size_t keyBytes)
+ {
+ union
+ {
+ u08b_t b[SKEIN_512_STATE_BYTES];
+ u64b_t w[SKEIN_512_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
+ Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL);
+
+ /* compute the initial chaining values ctx->X[], based on key */
+ if (keyBytes == 0) /* is there a key? */
+ {
+ memset(ctx->X,0,sizeof(ctx->X)); /* no key: use all zeroes as key for config block */
+ }
+ else /* here to pre-process a key */
+ {
+ Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X));
+ /* do a mini-Init right here */
+ ctx->h.hashBitLen=8*sizeof(ctx->X); /* set output hash bit count = state size */
+ Skein_Start_New_Type(ctx,KEY); /* set tweaks: T0 = 0; T1 = KEY type */
+ memset(ctx->X,0,sizeof(ctx->X)); /* zero the initial chaining variables */
+ Skein_512_Update(ctx,key,keyBytes); /* hash the key */
+ Skein_512_Final_Pad(ctx,cfg.b); /* put result into cfg.b[] */
+ memcpy(ctx->X,cfg.b,sizeof(cfg.b)); /* copy over into ctx->X[] */
+#if SKEIN_NEED_SWAP
+ {
+ uint_t i;
+ for (i=0;iX[i] = Skein_Swap64(ctx->X[i]);
+ }
+#endif
+ }
+ /* build/process the config block, type == CONFIG (could be precomputed for each key) */
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+ Skein_Start_New_Type(ctx,CFG_FINAL);
+
+ memset(&cfg.w,0,sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
+ cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
+ cfg.w[2] = Skein_Swap64(treeInfo); /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */
+
+ Skein_Show_Key(512,&ctx->h,key,keyBytes);
+
+ /* compute the initial chaining values from config block */
+ Skein_512_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN);
+
+ /* The chaining vars ctx->X are now initialized */
+ /* Set up to process the data message portion of the hash (default) */
+ ctx->h.bCnt = 0; /* buffer b[] starts out empty */
+ Skein_Start_New_Type(ctx,MSG);
+
+ return SKEIN_SUCCESS;
+ }
+#endif
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* process the input bytes */
+static int Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt)
+ {
+ size_t n;
+
+ Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ /* process full blocks, if any */
+ if (msgByteCnt + ctx->h.bCnt > SKEIN_512_BLOCK_BYTES)
+ {
+ if (ctx->h.bCnt) /* finish up any buffered message data */
+ {
+ n = SKEIN_512_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */
+ if (n)
+ {
+ Skein_assert(n < msgByteCnt); /* check on our logic here */
+ memcpy(&ctx->b[ctx->h.bCnt],msg,n);
+ msgByteCnt -= n;
+ msg += n;
+ ctx->h.bCnt += n;
+ }
+ Skein_assert(ctx->h.bCnt == SKEIN_512_BLOCK_BYTES);
+ Skein_512_Process_Block(ctx,ctx->b,1,SKEIN_512_BLOCK_BYTES);
+ ctx->h.bCnt = 0;
+ }
+ /* now process any remaining full blocks, directly from input message data */
+ if (msgByteCnt > SKEIN_512_BLOCK_BYTES)
+ {
+ n = (msgByteCnt-1) / SKEIN_512_BLOCK_BYTES; /* number of full blocks to process */
+ Skein_512_Process_Block(ctx,msg,n,SKEIN_512_BLOCK_BYTES);
+ msgByteCnt -= n * SKEIN_512_BLOCK_BYTES;
+ msg += n * SKEIN_512_BLOCK_BYTES;
+ }
+ Skein_assert(ctx->h.bCnt == 0);
+ }
+
+ /* copy any remaining source message data bytes into b[] */
+ if (msgByteCnt)
+ {
+ Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES);
+ memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt);
+ ctx->h.bCnt += msgByteCnt;
+ }
+
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* finalize the hash computation and output the result */
+static int Skein_512_Final(Skein_512_Ctxt_t *ctx, u08b_t *hashVal)
+ {
+ size_t i,n,byteCnt;
+ u64b_t X[SKEIN_512_STATE_WORDS];
+ Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
+ if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) /* zero pad b[] if necessary */
+ memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
+
+ Skein_512_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */
+
+ /* now output the result */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */
+
+ /* run Threefish in "counter mode" to generate output */
+ memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */
+ memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */
+ for (i=0;i*SKEIN_512_BLOCK_BYTES < byteCnt;i++)
+ {
+ ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */
+ Skein_Start_New_Type(ctx,OUT_FINAL);
+ Skein_512_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */
+ n = byteCnt - i*SKEIN_512_BLOCK_BYTES; /* number of output bytes left to go */
+ if (n >= SKEIN_512_BLOCK_BYTES)
+ n = SKEIN_512_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal+i*SKEIN_512_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(512,&ctx->h,n,hashVal+i*SKEIN_512_BLOCK_BYTES);
+ memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */
+ }
+ return SKEIN_SUCCESS;
+ }
+
+#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+static size_t Skein_512_API_CodeSize(void)
+ {
+ return ((u08b_t *) Skein_512_API_CodeSize) -
+ ((u08b_t *) Skein_512_Init);
+ }
+#endif
+
+/*****************************************************************/
+/* 1024-bit Skein */
+/*****************************************************************/
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* init the context for a straight hashing operation */
+static int Skein1024_Init(Skein1024_Ctxt_t *ctx, size_t hashBitLen)
+ {
+ union
+ {
+ u08b_t b[SKEIN1024_STATE_BYTES];
+ u64b_t w[SKEIN1024_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+
+ switch (hashBitLen)
+ { /* use pre-computed values, where available */
+#ifndef SKEIN_NO_PRECOMP
+ case 512: memcpy(ctx->X,SKEIN1024_IV_512 ,sizeof(ctx->X)); break;
+ case 384: memcpy(ctx->X,SKEIN1024_IV_384 ,sizeof(ctx->X)); break;
+ case 1024: memcpy(ctx->X,SKEIN1024_IV_1024,sizeof(ctx->X)); break;
+#endif
+ default:
+ /* here if there is no precomputed IV value available */
+ /* build/process the config block, type == CONFIG (could be precomputed) */
+ Skein_Start_New_Type(ctx,CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */
+
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */
+ cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
+ cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
+ memset(&cfg.w[3],0,sizeof(cfg) - 3*sizeof(cfg.w[0])); /* zero pad config block */
+
+ /* compute the initial chaining values from config block */
+ memset(ctx->X,0,sizeof(ctx->X)); /* zero the chaining variables */
+ Skein1024_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN);
+ break;
+ }
+
+ /* The chaining vars ctx->X are now initialized for the given hashBitLen. */
+ /* Set up to process the data message portion of the hash (default) */
+ Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type */
+
+ return SKEIN_SUCCESS;
+ }
+
+#if 0
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* init the context for a MAC and/or tree hash operation */
+/* [identical to Skein1024_Init() when keyBytes == 0 && treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */
+static int Skein1024_InitExt(Skein1024_Ctxt_t *ctx,size_t hashBitLen,u64b_t treeInfo, const u08b_t *key, size_t keyBytes)
+ {
+ union
+ {
+ u08b_t b[SKEIN1024_STATE_BYTES];
+ u64b_t w[SKEIN1024_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
+ Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL);
+
+ /* compute the initial chaining values ctx->X[], based on key */
+ if (keyBytes == 0) /* is there a key? */
+ {
+ memset(ctx->X,0,sizeof(ctx->X)); /* no key: use all zeroes as key for config block */
+ }
+ else /* here to pre-process a key */
+ {
+ Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X));
+ /* do a mini-Init right here */
+ ctx->h.hashBitLen=8*sizeof(ctx->X); /* set output hash bit count = state size */
+ Skein_Start_New_Type(ctx,KEY); /* set tweaks: T0 = 0; T1 = KEY type */
+ memset(ctx->X,0,sizeof(ctx->X)); /* zero the initial chaining variables */
+ Skein1024_Update(ctx,key,keyBytes); /* hash the key */
+ Skein1024_Final_Pad(ctx,cfg.b); /* put result into cfg.b[] */
+ memcpy(ctx->X,cfg.b,sizeof(cfg.b)); /* copy over into ctx->X[] */
+#if SKEIN_NEED_SWAP
+ {
+ uint_t i;
+ for (i=0;iX[i] = Skein_Swap64(ctx->X[i]);
+ }
+#endif
+ }
+ /* build/process the config block, type == CONFIG (could be precomputed for each key) */
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+ Skein_Start_New_Type(ctx,CFG_FINAL);
+
+ memset(&cfg.w,0,sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
+ cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
+ cfg.w[2] = Skein_Swap64(treeInfo); /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */
+
+ Skein_Show_Key(1024,&ctx->h,key,keyBytes);
+
+ /* compute the initial chaining values from config block */
+ Skein1024_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN);
+
+ /* The chaining vars ctx->X are now initialized */
+ /* Set up to process the data message portion of the hash (default) */
+ ctx->h.bCnt = 0; /* buffer b[] starts out empty */
+ Skein_Start_New_Type(ctx,MSG);
+
+ return SKEIN_SUCCESS;
+ }
+#endif
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* process the input bytes */
+static int Skein1024_Update(Skein1024_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt)
+ {
+ size_t n;
+
+ Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ /* process full blocks, if any */
+ if (msgByteCnt + ctx->h.bCnt > SKEIN1024_BLOCK_BYTES)
+ {
+ if (ctx->h.bCnt) /* finish up any buffered message data */
+ {
+ n = SKEIN1024_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */
+ if (n)
+ {
+ Skein_assert(n < msgByteCnt); /* check on our logic here */
+ memcpy(&ctx->b[ctx->h.bCnt],msg,n);
+ msgByteCnt -= n;
+ msg += n;
+ ctx->h.bCnt += n;
+ }
+ Skein_assert(ctx->h.bCnt == SKEIN1024_BLOCK_BYTES);
+ Skein1024_Process_Block(ctx,ctx->b,1,SKEIN1024_BLOCK_BYTES);
+ ctx->h.bCnt = 0;
+ }
+ /* now process any remaining full blocks, directly from input message data */
+ if (msgByteCnt > SKEIN1024_BLOCK_BYTES)
+ {
+ n = (msgByteCnt-1) / SKEIN1024_BLOCK_BYTES; /* number of full blocks to process */
+ Skein1024_Process_Block(ctx,msg,n,SKEIN1024_BLOCK_BYTES);
+ msgByteCnt -= n * SKEIN1024_BLOCK_BYTES;
+ msg += n * SKEIN1024_BLOCK_BYTES;
+ }
+ Skein_assert(ctx->h.bCnt == 0);
+ }
+
+ /* copy any remaining source message data bytes into b[] */
+ if (msgByteCnt)
+ {
+ Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES);
+ memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt);
+ ctx->h.bCnt += msgByteCnt;
+ }
+
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* finalize the hash computation and output the result */
+static int Skein1024_Final(Skein1024_Ctxt_t *ctx, u08b_t *hashVal)
+ {
+ size_t i,n,byteCnt;
+ u64b_t X[SKEIN1024_STATE_WORDS];
+ Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
+ if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES) /* zero pad b[] if necessary */
+ memset(&ctx->b[ctx->h.bCnt],0,SKEIN1024_BLOCK_BYTES - ctx->h.bCnt);
+
+ Skein1024_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */
+
+ /* now output the result */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */
+
+ /* run Threefish in "counter mode" to generate output */
+ memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */
+ memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */
+ for (i=0;i*SKEIN1024_BLOCK_BYTES < byteCnt;i++)
+ {
+ ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */
+ Skein_Start_New_Type(ctx,OUT_FINAL);
+ Skein1024_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */
+ n = byteCnt - i*SKEIN1024_BLOCK_BYTES; /* number of output bytes left to go */
+ if (n >= SKEIN1024_BLOCK_BYTES)
+ n = SKEIN1024_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal+i*SKEIN1024_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(1024,&ctx->h,n,hashVal+i*SKEIN1024_BLOCK_BYTES);
+ memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */
+ }
+ return SKEIN_SUCCESS;
+ }
+
+#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+static size_t Skein1024_API_CodeSize(void)
+ {
+ return ((u08b_t *) Skein1024_API_CodeSize) -
+ ((u08b_t *) Skein1024_Init);
+ }
+#endif
+
+/**************** Functions to support MAC/tree hashing ***************/
+/* (this code is identical for Optimized and Reference versions) */
+
+#if 0
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* finalize the hash computation and output the block, no OUTPUT stage */
+static int Skein_256_Final_Pad(Skein_256_Ctxt_t *ctx, u08b_t *hashVal)
+ {
+ Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
+ if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES) /* zero pad b[] if necessary */
+ memset(&ctx->b[ctx->h.bCnt],0,SKEIN_256_BLOCK_BYTES - ctx->h.bCnt);
+ Skein_256_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */
+
+ Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN_256_BLOCK_BYTES); /* "output" the state bytes */
+
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* finalize the hash computation and output the block, no OUTPUT stage */
+static int Skein_512_Final_Pad(Skein_512_Ctxt_t *ctx, u08b_t *hashVal)
+ {
+ Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
+ if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) /* zero pad b[] if necessary */
+ memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
+ Skein_512_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */
+
+ Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN_512_BLOCK_BYTES); /* "output" the state bytes */
+
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* finalize the hash computation and output the block, no OUTPUT stage */
+static int Skein1024_Final_Pad(Skein1024_Ctxt_t *ctx, u08b_t *hashVal)
+ {
+ Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
+ if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES) /* zero pad b[] if necessary */
+ memset(&ctx->b[ctx->h.bCnt],0,SKEIN1024_BLOCK_BYTES - ctx->h.bCnt);
+ Skein1024_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */
+
+ Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN1024_BLOCK_BYTES); /* "output" the state bytes */
+
+ return SKEIN_SUCCESS;
+ }
+
+
+#if SKEIN_TREE_HASH
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* just do the OUTPUT stage */
+static int Skein_256_Output(Skein_256_Ctxt_t *ctx, u08b_t *hashVal)
+ {
+ size_t i,n,byteCnt;
+ u64b_t X[SKEIN_256_STATE_WORDS];
+ Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ /* now output the result */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */
+
+ /* run Threefish in "counter mode" to generate output */
+ memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */
+ memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */
+ for (i=0;i*SKEIN_256_BLOCK_BYTES < byteCnt;i++)
+ {
+ ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */
+ Skein_Start_New_Type(ctx,OUT_FINAL);
+ Skein_256_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */
+ n = byteCnt - i*SKEIN_256_BLOCK_BYTES; /* number of output bytes left to go */
+ if (n >= SKEIN_256_BLOCK_BYTES)
+ n = SKEIN_256_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal+i*SKEIN_256_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN_256_BLOCK_BYTES);
+ memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */
+ }
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* just do the OUTPUT stage */
+static int Skein_512_Output(Skein_512_Ctxt_t *ctx, u08b_t *hashVal)
+ {
+ size_t i,n,byteCnt;
+ u64b_t X[SKEIN_512_STATE_WORDS];
+ Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ /* now output the result */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */
+
+ /* run Threefish in "counter mode" to generate output */
+ memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */
+ memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */
+ for (i=0;i*SKEIN_512_BLOCK_BYTES < byteCnt;i++)
+ {
+ ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */
+ Skein_Start_New_Type(ctx,OUT_FINAL);
+ Skein_512_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */
+ n = byteCnt - i*SKEIN_512_BLOCK_BYTES; /* number of output bytes left to go */
+ if (n >= SKEIN_512_BLOCK_BYTES)
+ n = SKEIN_512_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal+i*SKEIN_512_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN_512_BLOCK_BYTES);
+ memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */
+ }
+ return SKEIN_SUCCESS;
+ }
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* just do the OUTPUT stage */
+static int Skein1024_Output(Skein1024_Ctxt_t *ctx, u08b_t *hashVal)
+ {
+ size_t i,n,byteCnt;
+ u64b_t X[SKEIN1024_STATE_WORDS];
+ Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */
+
+ /* now output the result */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */
+
+ /* run Threefish in "counter mode" to generate output */
+ memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */
+ memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */
+ for (i=0;i*SKEIN1024_BLOCK_BYTES < byteCnt;i++)
+ {
+ ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */
+ Skein_Start_New_Type(ctx,OUT_FINAL);
+ Skein1024_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */
+ n = byteCnt - i*SKEIN1024_BLOCK_BYTES; /* number of output bytes left to go */
+ if (n >= SKEIN1024_BLOCK_BYTES)
+ n = SKEIN1024_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal+i*SKEIN1024_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN1024_BLOCK_BYTES);
+ memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */
+ }
+ return SKEIN_SUCCESS;
+ }
+#endif
+#endif
+
+typedef struct
+{
+ uint_t statebits; /* 256, 512, or 1024 */
+ union
+ {
+ Skein_Ctxt_Hdr_t h; /* common header "overlay" */
+ Skein_256_Ctxt_t ctx_256;
+ Skein_512_Ctxt_t ctx_512;
+ Skein1024_Ctxt_t ctx1024;
+ } u;
+}
+hashState;
+
+/* "incremental" hashing API */
+static SkeinHashReturn Init (hashState *state, int hashbitlen);
+static SkeinHashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen);
+static SkeinHashReturn Final (hashState *state, BitSequence *hashval);
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* select the context size and init the context */
+static SkeinHashReturn Init(hashState *state, int hashbitlen)
+{
+#if SKEIN_256_NIST_MAX_HASH_BITS
+ if (hashbitlen <= SKEIN_256_NIST_MAX_HASHBITS)
+ {
+ Skein_Assert(hashbitlen > 0,BAD_HASHLEN);
+ state->statebits = 64*SKEIN_256_STATE_WORDS;
+ return Skein_256_Init(&state->u.ctx_256,(size_t) hashbitlen);
+ }
+#endif
+ if (hashbitlen <= SKEIN_512_NIST_MAX_HASHBITS)
+ {
+ state->statebits = 64*SKEIN_512_STATE_WORDS;
+ return Skein_512_Init(&state->u.ctx_512,(size_t) hashbitlen);
+ }
+ else
+ {
+ state->statebits = 64*SKEIN1024_STATE_WORDS;
+ return Skein1024_Init(&state->u.ctx1024,(size_t) hashbitlen);
+ }
+}
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* process data to be hashed */
+static SkeinHashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen)
+{
+ /* only the final Update() call is allowed do partial bytes, else assert an error */
+ Skein_Assert((state->u.h.T[1] & SKEIN_T1_FLAG_BIT_PAD) == 0 || databitlen == 0, SKEIN_FAIL);
+
+ Skein_Assert(state->statebits % 256 == 0 && (state->statebits-256) < 1024,SKEIN_FAIL);
+ if ((databitlen & 7) == 0) /* partial bytes? */
+ {
+ switch ((state->statebits >> 8) & 3)
+ {
+ case 2: return Skein_512_Update(&state->u.ctx_512,data,databitlen >> 3);
+ case 1: return Skein_256_Update(&state->u.ctx_256,data,databitlen >> 3);
+ case 0: return Skein1024_Update(&state->u.ctx1024,data,databitlen >> 3);
+ default: return SKEIN_FAIL;
+ }
+ }
+ else
+ { /* handle partial final byte */
+ size_t bCnt = (databitlen >> 3) + 1; /* number of bytes to handle (nonzero here!) */
+ u08b_t b,mask;
+
+ mask = (u08b_t) (1u << (7 - (databitlen & 7))); /* partial byte bit mask */
+ b = (u08b_t) ((data[bCnt-1] & (0-mask)) | mask); /* apply bit padding on final byte */
+
+ switch ((state->statebits >> 8) & 3)
+ {
+ case 2: Skein_512_Update(&state->u.ctx_512,data,bCnt-1); /* process all but the final byte */
+ Skein_512_Update(&state->u.ctx_512,&b , 1 ); /* process the (masked) partial byte */
+ break;
+ case 1: Skein_256_Update(&state->u.ctx_256,data,bCnt-1); /* process all but the final byte */
+ Skein_256_Update(&state->u.ctx_256,&b , 1 ); /* process the (masked) partial byte */
+ break;
+ case 0: Skein1024_Update(&state->u.ctx1024,data,bCnt-1); /* process all but the final byte */
+ Skein1024_Update(&state->u.ctx1024,&b , 1 ); /* process the (masked) partial byte */
+ break;
+ default: return SKEIN_FAIL;
+ }
+ Skein_Set_Bit_Pad_Flag(state->u.h); /* set tweak flag for the final call */
+
+ return SKEIN_SUCCESS;
+ }
+}
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* finalize hash computation and output the result (hashbitlen bits) */
+static SkeinHashReturn Final(hashState *state, BitSequence *hashval)
+{
+ Skein_Assert(state->statebits % 256 == 0 && (state->statebits-256) < 1024,FAIL);
+ switch ((state->statebits >> 8) & 3)
+ {
+ case 2: return Skein_512_Final(&state->u.ctx_512,hashval);
+ case 1: return Skein_256_Final(&state->u.ctx_256,hashval);
+ case 0: return Skein1024_Final(&state->u.ctx1024,hashval);
+ default: return SKEIN_FAIL;
+ }
+}
+
+/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
+/* all-in-one hash function */
+SkeinHashReturn c_skein_hash(int hashbitlen, const BitSequence *data, /* all-in-one call */
+ DataLength databitlen,BitSequence *hashval)
+{
+ hashState state;
+ SkeinHashReturn r = Init(&state,hashbitlen);
+ if (r == SKEIN_SUCCESS)
+ { /* these calls do not fail when called properly */
+ r = Update(&state,data,databitlen);
+ Final(&state,hashval);
+ }
+ return r;
+}
diff --git a/stratum/algos/cryptonote/crypto/c_skein.h b/stratum/algos/cryptonote/crypto/c_skein.h
new file mode 100644
index 000000000..256cc4d59
--- /dev/null
+++ b/stratum/algos/cryptonote/crypto/c_skein.h
@@ -0,0 +1,45 @@
+#ifndef _SKEIN_H_
+#define _SKEIN_H_ 1
+/**************************************************************************
+**
+** Interface declarations and internal definitions for Skein hashing.
+**
+** Source code author: Doug Whiting, 2008.
+**
+** This algorithm and source code is released to the public domain.
+**
+***************************************************************************
+**
+** The following compile-time switches may be defined to control some
+** tradeoffs between speed, code size, error checking, and security.
+**
+** The "default" note explains what happens when the switch is not defined.
+**
+** SKEIN_DEBUG -- make callouts from inside Skein code
+** to examine/display intermediate values.
+** [default: no callouts (no overhead)]
+**
+** SKEIN_ERR_CHECK -- how error checking is handled inside Skein
+** code. If not defined, most error checking
+** is disabled (for performance). Otherwise,
+** the switch value is interpreted as:
+** 0: use assert() to flag errors
+** 1: return SKEIN_FAIL to flag errors
+**
+***************************************************************************/
+#include "skein_port.h" /* get platform-specific definitions */
+#include "hash.h"
+
+typedef enum
+{
+ SKEIN_SUCCESS = 0, /* return codes from Skein calls */
+ SKEIN_FAIL = 1,
+ SKEIN_BAD_HASHLEN = 2
+}
+SkeinHashReturn;
+
+/* "all-in-one" call */
+SkeinHashReturn c_skein_hash(int hashbitlen, const BitSequence *data,
+ DataLength databitlen, BitSequence *hashval);
+
+#endif /* ifndef _SKEIN_H_ */
diff --git a/stratum/algos/cryptonote/crypto/crypto.h b/stratum/algos/cryptonote/crypto/crypto.h
new file mode 100644
index 000000000..61641fbcf
--- /dev/null
+++ b/stratum/algos/cryptonote/crypto/crypto.h
@@ -0,0 +1,186 @@
+// Copyright (c) 2012-2013 The Cryptonote developers
+// Distributed under the MIT/X11 software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#pragma once
+
+#include
+#include
+#include
+
+#include "common/pod-class.h"
+#include "generic-ops.h"
+#include "hash.h"
+
+namespace crypto {
+
+ extern "C" {
+#include "random.h"
+ }
+
+ extern std::mutex random_lock;
+
+#pragma pack(push, 1)
+ POD_CLASS ec_point {
+ char data[32];
+ };
+
+ POD_CLASS ec_scalar {
+ char data[32];
+ };
+
+ POD_CLASS public_key: ec_point {
+ friend class crypto_ops;
+ };
+
+ POD_CLASS secret_key: ec_scalar {
+ friend class crypto_ops;
+ };
+
+ POD_CLASS key_derivation: ec_point {
+ friend class crypto_ops;
+ };
+
+ POD_CLASS key_image: ec_point {
+ friend class crypto_ops;
+ };
+
+ POD_CLASS signature {
+ ec_scalar c, r;
+ friend class crypto_ops;
+ };
+#pragma pack(pop)
+
+ static_assert(sizeof(ec_point) == 32 && sizeof(ec_scalar) == 32 &&
+ sizeof(public_key) == 32 && sizeof(secret_key) == 32 &&
+ sizeof(key_derivation) == 32 && sizeof(key_image) == 32 &&
+ sizeof(signature) == 64, "Invalid structure size");
+
+ class crypto_ops {
+ crypto_ops();
+ crypto_ops(const crypto_ops &);
+ void operator=(const crypto_ops &);
+ ~crypto_ops();
+
+ static void generate_keys(public_key &, secret_key &);
+ friend void generate_keys(public_key &, secret_key &);
+ static bool check_key(const public_key &);
+ friend bool check_key(const public_key &);
+ static bool secret_key_to_public_key(const secret_key &, public_key &);
+ friend bool secret_key_to_public_key(const secret_key &, public_key &);
+ static bool generate_key_derivation(const public_key &, const secret_key &, key_derivation &);
+ friend bool generate_key_derivation(const public_key &, const secret_key &, key_derivation &);
+ static bool derive_public_key(const key_derivation &, std::size_t, const public_key &, public_key &);
+ friend bool derive_public_key(const key_derivation &, std::size_t, const public_key &, public_key &);
+ static void derive_secret_key(const key_derivation &, std::size_t, const secret_key &, secret_key &);
+ friend void derive_secret_key(const key_derivation &, std::size_t, const secret_key &, secret_key &);
+ static void generate_signature(const hash &, const public_key &, const secret_key &, signature &);
+ friend void generate_signature(const hash &, const public_key &, const secret_key &, signature &);
+ static bool check_signature(const hash &, const public_key &, const signature &);
+ friend bool check_signature(const hash &, const public_key &, const signature &);
+ static void generate_key_image(const public_key &, const secret_key &, key_image &);
+ friend void generate_key_image(const public_key &, const secret_key &, key_image &);
+ static void generate_ring_signature(const hash &, const key_image &,
+ const public_key *const *, std::size_t, const secret_key &, std::size_t, signature *);
+ friend void generate_ring_signature(const hash &, const key_image &,
+ const public_key *const *, std::size_t, const secret_key &, std::size_t, signature *);
+ static bool check_ring_signature(const hash &, const key_image &,
+ const public_key *const *, std::size_t, const signature *);
+ friend bool check_ring_signature(const hash &, const key_image &,
+ const public_key *const *, std::size_t, const signature *);
+ };
+
+ /* Generate a value filled with random bytes.
+ */
+ template
+ typename std::enable_if::value, T>::type rand() {
+ typename std::remove_cv::type res;
+ std::lock_guard lock(random_lock);
+ generate_random_bytes(sizeof(T), &res);
+ return res;
+ }
+
+ /* Generate a new key pair
+ */
+ inline void generate_keys(public_key &pub, secret_key &sec) {
+ crypto_ops::generate_keys(pub, sec);
+ }
+
+ /* Check a public key. Returns true if it is valid, false otherwise.
+ */
+ inline bool check_key(const public_key &key) {
+ return crypto_ops::check_key(key);
+ }
+
+ /* Checks a private key and computes the corresponding public key.
+ */
+ inline bool secret_key_to_public_key(const secret_key &sec, public_key &pub) {
+ return crypto_ops::secret_key_to_public_key(sec, pub);
+ }
+
+ /* To generate an ephemeral key used to send money to:
+ * * The sender generates a new key pair, which becomes the transaction key. The public transaction key is included in "extra" field.
+ * * Both the sender and the receiver generate key derivation from the transaction key, the receivers' "view" key and the output index.
+ * * The sender uses key derivation and the receivers' "spend" key to derive an ephemeral public key.
+ * * The receiver can either derive the public key (to check that the transaction is addressed to him) or the private key (to spend the money).
+ */
+ inline bool generate_key_derivation(const public_key &key1, const secret_key &key2, key_derivation &derivation) {
+ return crypto_ops::generate_key_derivation(key1, key2, derivation);
+ }
+ inline bool derive_public_key(const key_derivation &derivation, std::size_t output_index,
+ const public_key &base, public_key &derived_key) {
+ return crypto_ops::derive_public_key(derivation, output_index, base, derived_key);
+ }
+ inline void derive_secret_key(const key_derivation &derivation, std::size_t output_index,
+ const secret_key &base, secret_key &derived_key) {
+ crypto_ops::derive_secret_key(derivation, output_index, base, derived_key);
+ }
+
+ /* Generation and checking of a standard signature.
+ */
+ inline void generate_signature(const hash &prefix_hash, const public_key &pub, const secret_key &sec, signature &sig) {
+ crypto_ops::generate_signature(prefix_hash, pub, sec, sig);
+ }
+ inline bool check_signature(const hash &prefix_hash, const public_key &pub, const signature &sig) {
+ return crypto_ops::check_signature(prefix_hash, pub, sig);
+ }
+
+ /* To send money to a key:
+ * * The sender generates an ephemeral key and includes it in transaction output.
+ * * To spend the money, the receiver generates a key image from it.
+ * * Then he selects a bunch of outputs, including the one he spends, and uses them to generate a ring signature.
+ * To check the signature, it is necessary to collect all the keys that were used to generate it. To detect double spends, it is necessary to check that each key image is used at most once.
+ */
+ inline void generate_key_image(const public_key &pub, const secret_key &sec, key_image &image) {
+ crypto_ops::generate_key_image(pub, sec, image);
+ }
+ inline void generate_ring_signature(const hash &prefix_hash, const key_image &image,
+ const public_key *const *pubs, std::size_t pubs_count,
+ const secret_key &sec, std::size_t sec_index,
+ signature *sig) {
+ crypto_ops::generate_ring_signature(prefix_hash, image, pubs, pubs_count, sec, sec_index, sig);
+ }
+ inline bool check_ring_signature(const hash &prefix_hash, const key_image &image,
+ const public_key *const *pubs, std::size_t pubs_count,
+ const signature *sig) {
+ return crypto_ops::check_ring_signature(prefix_hash, image, pubs, pubs_count, sig);
+ }
+
+ /* Variants with vector parameters.
+ */
+ inline void generate_ring_signature(const hash &prefix_hash, const key_image &image,
+ const std::vector &pubs,
+ const secret_key &sec, std::size_t sec_index,
+ signature *sig) {
+ generate_ring_signature(prefix_hash, image, pubs.data(), pubs.size(), sec, sec_index, sig);
+ }
+ inline bool check_ring_signature(const hash &prefix_hash, const key_image &image,
+ const std::vector &pubs,
+ const signature *sig) {
+ return check_ring_signature(prefix_hash, image, pubs.data(), pubs.size(), sig);
+ }
+}
+
+CRYPTO_MAKE_COMPARABLE(public_key)
+CRYPTO_MAKE_HASHABLE(key_image)
+CRYPTO_MAKE_COMPARABLE(signature)
diff --git a/stratum/algos/cryptonote/crypto/groestl_tables.h b/stratum/algos/cryptonote/crypto/groestl_tables.h
new file mode 100644
index 000000000..a23295c35
--- /dev/null
+++ b/stratum/algos/cryptonote/crypto/groestl_tables.h
@@ -0,0 +1,38 @@
+#ifndef __tables_h
+#define __tables_h
+
+
+const uint32_t T[512] = {0xa5f432c6, 0xc6a597f4, 0x84976ff8, 0xf884eb97, 0x99b05eee, 0xee99c7b0, 0x8d8c7af6, 0xf68df78c, 0xd17e8ff, 0xff0de517, 0xbddc0ad6, 0xd6bdb7dc, 0xb1c816de, 0xdeb1a7c8, 0x54fc6d91, 0x915439fc
+, 0x50f09060, 0x6050c0f0, 0x3050702, 0x2030405, 0xa9e02ece, 0xcea987e0, 0x7d87d156, 0x567dac87, 0x192bcce7, 0xe719d52b, 0x62a613b5, 0xb56271a6, 0xe6317c4d, 0x4de69a31, 0x9ab559ec, 0xec9ac3b5
+, 0x45cf408f, 0x8f4505cf, 0x9dbca31f, 0x1f9d3ebc, 0x40c04989, 0x894009c0, 0x879268fa, 0xfa87ef92, 0x153fd0ef, 0xef15c53f, 0xeb2694b2, 0xb2eb7f26, 0xc940ce8e, 0x8ec90740, 0xb1de6fb, 0xfb0bed1d
+, 0xec2f6e41, 0x41ec822f, 0x67a91ab3, 0xb3677da9, 0xfd1c435f, 0x5ffdbe1c, 0xea256045, 0x45ea8a25, 0xbfdaf923, 0x23bf46da, 0xf7025153, 0x53f7a602, 0x96a145e4, 0xe496d3a1, 0x5bed769b, 0x9b5b2ded
+, 0xc25d2875, 0x75c2ea5d, 0x1c24c5e1, 0xe11cd924, 0xaee9d43d, 0x3dae7ae9, 0x6abef24c, 0x4c6a98be, 0x5aee826c, 0x6c5ad8ee, 0x41c3bd7e, 0x7e41fcc3, 0x206f3f5, 0xf502f106, 0x4fd15283, 0x834f1dd1
+, 0x5ce48c68, 0x685cd0e4, 0xf4075651, 0x51f4a207, 0x345c8dd1, 0xd134b95c, 0x818e1f9, 0xf908e918, 0x93ae4ce2, 0xe293dfae, 0x73953eab, 0xab734d95, 0x53f59762, 0x6253c4f5, 0x3f416b2a, 0x2a3f5441
+, 0xc141c08, 0x80c1014, 0x52f66395, 0x955231f6, 0x65afe946, 0x46658caf, 0x5ee27f9d, 0x9d5e21e2, 0x28784830, 0x30286078, 0xa1f8cf37, 0x37a16ef8, 0xf111b0a, 0xa0f1411, 0xb5c4eb2f, 0x2fb55ec4
+, 0x91b150e, 0xe091c1b, 0x365a7e24, 0x2436485a, 0x9bb6ad1b, 0x1b9b36b6, 0x3d4798df, 0xdf3da547, 0x266aa7cd, 0xcd26816a, 0x69bbf54e, 0x4e699cbb, 0xcd4c337f, 0x7fcdfe4c, 0x9fba50ea, 0xea9fcfba
+, 0x1b2d3f12, 0x121b242d, 0x9eb9a41d, 0x1d9e3ab9, 0x749cc458, 0x5874b09c, 0x2e724634, 0x342e6872, 0x2d774136, 0x362d6c77, 0xb2cd11dc, 0xdcb2a3cd, 0xee299db4, 0xb4ee7329, 0xfb164d5b, 0x5bfbb616
+, 0xf601a5a4, 0xa4f65301, 0x4dd7a176, 0x764decd7, 0x61a314b7, 0xb76175a3, 0xce49347d, 0x7dcefa49, 0x7b8ddf52, 0x527ba48d, 0x3e429fdd, 0xdd3ea142, 0x7193cd5e, 0x5e71bc93, 0x97a2b113, 0x139726a2
+, 0xf504a2a6, 0xa6f55704, 0x68b801b9, 0xb96869b8, 0x0, 0x0, 0x2c74b5c1, 0xc12c9974, 0x60a0e040, 0x406080a0, 0x1f21c2e3, 0xe31fdd21, 0xc8433a79, 0x79c8f243, 0xed2c9ab6, 0xb6ed772c
+, 0xbed90dd4, 0xd4beb3d9, 0x46ca478d, 0x8d4601ca, 0xd9701767, 0x67d9ce70, 0x4bddaf72, 0x724be4dd, 0xde79ed94, 0x94de3379, 0xd467ff98, 0x98d42b67, 0xe82393b0, 0xb0e87b23, 0x4ade5b85, 0x854a11de
+, 0x6bbd06bb, 0xbb6b6dbd, 0x2a7ebbc5, 0xc52a917e, 0xe5347b4f, 0x4fe59e34, 0x163ad7ed, 0xed16c13a, 0xc554d286, 0x86c51754, 0xd762f89a, 0x9ad72f62, 0x55ff9966, 0x6655ccff, 0x94a7b611, 0x119422a7
+, 0xcf4ac08a, 0x8acf0f4a, 0x1030d9e9, 0xe910c930, 0x60a0e04, 0x406080a, 0x819866fe, 0xfe81e798, 0xf00baba0, 0xa0f05b0b, 0x44ccb478, 0x7844f0cc, 0xbad5f025, 0x25ba4ad5, 0xe33e754b, 0x4be3963e
+, 0xf30eaca2, 0xa2f35f0e, 0xfe19445d, 0x5dfeba19, 0xc05bdb80, 0x80c01b5b, 0x8a858005, 0x58a0a85, 0xadecd33f, 0x3fad7eec, 0xbcdffe21, 0x21bc42df, 0x48d8a870, 0x7048e0d8, 0x40cfdf1, 0xf104f90c
+, 0xdf7a1963, 0x63dfc67a, 0xc1582f77, 0x77c1ee58, 0x759f30af, 0xaf75459f, 0x63a5e742, 0x426384a5, 0x30507020, 0x20304050, 0x1a2ecbe5, 0xe51ad12e, 0xe12effd, 0xfd0ee112, 0x6db708bf, 0xbf6d65b7
+, 0x4cd45581, 0x814c19d4, 0x143c2418, 0x1814303c, 0x355f7926, 0x26354c5f, 0x2f71b2c3, 0xc32f9d71, 0xe13886be, 0xbee16738, 0xa2fdc835, 0x35a26afd, 0xcc4fc788, 0x88cc0b4f, 0x394b652e, 0x2e395c4b
+, 0x57f96a93, 0x93573df9, 0xf20d5855, 0x55f2aa0d, 0x829d61fc, 0xfc82e39d, 0x47c9b37a, 0x7a47f4c9, 0xacef27c8, 0xc8ac8bef, 0xe73288ba, 0xbae76f32, 0x2b7d4f32, 0x322b647d, 0x95a442e6, 0xe695d7a4
+, 0xa0fb3bc0, 0xc0a09bfb, 0x98b3aa19, 0x199832b3, 0xd168f69e, 0x9ed12768, 0x7f8122a3, 0xa37f5d81, 0x66aaee44, 0x446688aa, 0x7e82d654, 0x547ea882, 0xabe6dd3b, 0x3bab76e6, 0x839e950b, 0xb83169e
+, 0xca45c98c, 0x8cca0345, 0x297bbcc7, 0xc729957b, 0xd36e056b, 0x6bd3d66e, 0x3c446c28, 0x283c5044, 0x798b2ca7, 0xa779558b, 0xe23d81bc, 0xbce2633d, 0x1d273116, 0x161d2c27, 0x769a37ad, 0xad76419a
+, 0x3b4d96db, 0xdb3bad4d, 0x56fa9e64, 0x6456c8fa, 0x4ed2a674, 0x744ee8d2, 0x1e223614, 0x141e2822, 0xdb76e492, 0x92db3f76, 0xa1e120c, 0xc0a181e, 0x6cb4fc48, 0x486c90b4, 0xe4378fb8, 0xb8e46b37
+, 0x5de7789f, 0x9f5d25e7, 0x6eb20fbd, 0xbd6e61b2, 0xef2a6943, 0x43ef862a, 0xa6f135c4, 0xc4a693f1, 0xa8e3da39, 0x39a872e3, 0xa4f7c631, 0x31a462f7, 0x37598ad3, 0xd337bd59, 0x8b8674f2, 0xf28bff86
+, 0x325683d5, 0xd532b156, 0x43c54e8b, 0x8b430dc5, 0x59eb856e, 0x6e59dceb, 0xb7c218da, 0xdab7afc2, 0x8c8f8e01, 0x18c028f, 0x64ac1db1, 0xb16479ac, 0xd26df19c, 0x9cd2236d, 0xe03b7249, 0x49e0923b
+, 0xb4c71fd8, 0xd8b4abc7, 0xfa15b9ac, 0xacfa4315, 0x709faf3, 0xf307fd09, 0x256fa0cf, 0xcf25856f, 0xafea20ca, 0xcaaf8fea, 0x8e897df4, 0xf48ef389, 0xe9206747, 0x47e98e20, 0x18283810, 0x10182028
+, 0xd5640b6f, 0x6fd5de64, 0x888373f0, 0xf088fb83, 0x6fb1fb4a, 0x4a6f94b1, 0x7296ca5c, 0x5c72b896, 0x246c5438, 0x3824706c, 0xf1085f57, 0x57f1ae08, 0xc7522173, 0x73c7e652, 0x51f36497, 0x975135f3
+, 0x2365aecb, 0xcb238d65, 0x7c8425a1, 0xa17c5984, 0x9cbf57e8, 0xe89ccbbf, 0x21635d3e, 0x3e217c63, 0xdd7cea96, 0x96dd377c, 0xdc7f1e61, 0x61dcc27f, 0x86919c0d, 0xd861a91, 0x85949b0f, 0xf851e94
+, 0x90ab4be0, 0xe090dbab, 0x42c6ba7c, 0x7c42f8c6, 0xc4572671, 0x71c4e257, 0xaae529cc, 0xccaa83e5, 0xd873e390, 0x90d83b73, 0x50f0906, 0x6050c0f, 0x103f4f7, 0xf701f503, 0x12362a1c, 0x1c123836
+, 0xa3fe3cc2, 0xc2a39ffe, 0x5fe18b6a, 0x6a5fd4e1, 0xf910beae, 0xaef94710, 0xd06b0269, 0x69d0d26b, 0x91a8bf17, 0x17912ea8, 0x58e87199, 0x995829e8, 0x2769533a, 0x3a277469, 0xb9d0f727, 0x27b94ed0
+, 0x384891d9, 0xd938a948, 0x1335deeb, 0xeb13cd35, 0xb3cee52b, 0x2bb356ce, 0x33557722, 0x22334455, 0xbbd604d2, 0xd2bbbfd6, 0x709039a9, 0xa9704990, 0x89808707, 0x7890e80, 0xa7f2c133, 0x33a766f2
+, 0xb6c1ec2d, 0x2db65ac1, 0x22665a3c, 0x3c227866, 0x92adb815, 0x15922aad, 0x2060a9c9, 0xc9208960, 0x49db5c87, 0x874915db, 0xff1ab0aa, 0xaaff4f1a, 0x7888d850, 0x5078a088, 0x7a8e2ba5, 0xa57a518e
+, 0x8f8a8903, 0x38f068a, 0xf8134a59, 0x59f8b213, 0x809b9209, 0x980129b, 0x1739231a, 0x1a173439, 0xda751065, 0x65daca75, 0x315384d7, 0xd731b553, 0xc651d584, 0x84c61351, 0xb8d303d0, 0xd0b8bbd3
+, 0xc35edc82, 0x82c31f5e, 0xb0cbe229, 0x29b052cb, 0x7799c35a, 0x5a77b499, 0x11332d1e, 0x1e113c33, 0xcb463d7b, 0x7bcbf646, 0xfc1fb7a8, 0xa8fc4b1f, 0xd6610c6d, 0x6dd6da61, 0x3a4e622c, 0x2c3a584e};
+
+#endif /* __tables_h */
diff --git a/stratum/algos/cryptonote/crypto/hash-ops.h b/stratum/algos/cryptonote/crypto/hash-ops.h
new file mode 100644
index 000000000..b0a26b87e
--- /dev/null
+++ b/stratum/algos/cryptonote/crypto/hash-ops.h
@@ -0,0 +1,57 @@
+// Copyright (c) 2012-2013 The Cryptonote developers
+// Distributed under the MIT/X11 software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#pragma once
+
+#if !defined(__cplusplus)
+
+#include
+#include
+#include
+#include
+
+#include "int-util.h"
+
+static inline void *padd(void *p, size_t i) {
+ return (char *) p + i;
+}
+
+static inline const void *cpadd(const void *p, size_t i) {
+ return (const char *) p + i;
+}
+
+static inline void place_length(uint8_t *buffer, size_t bufsize, size_t length) {
+ if (sizeof(size_t) == 4) {
+ *(uint32_t *) padd(buffer, bufsize - 4) = swap32be(length);
+ } else {
+ *(uint64_t *) padd(buffer, bufsize - 8) = swap64be(length);
+ }
+}
+
+#pragma pack(push, 1)
+union hash_state {
+ uint8_t b[200];
+ uint64_t w[25];
+};
+#pragma pack(pop)
+
+void hash_permutation(union hash_state *state);
+void hash_process(union hash_state *state, const uint8_t *buf, size_t count);
+
+#endif
+
+enum {
+ HASH_SIZE = 32,
+ HASH_DATA_AREA = 136
+};
+
+void cn_fast_hash(const void *data, size_t length, char *hash);
+void cn_slow_hash(const void *data, size_t length, char *hash);
+
+void hash_extra_blake(const void *data, size_t length, char *hash);
+void hash_extra_groestl(const void *data, size_t length, char *hash);
+void hash_extra_jh(const void *data, size_t length, char *hash);
+void hash_extra_skein(const void *data, size_t length, char *hash);
+
+void tree_hash(const char (*hashes)[HASH_SIZE], size_t count, char *root_hash);
diff --git a/stratum/algos/cryptonote/crypto/hash.c b/stratum/algos/cryptonote/crypto/hash.c
new file mode 100644
index 000000000..f3a16f0c1
--- /dev/null
+++ b/stratum/algos/cryptonote/crypto/hash.c
@@ -0,0 +1,24 @@
+// Copyright (c) 2012-2013 The Cryptonote developers
+// Distributed under the MIT/X11 software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#include
+#include
+#include
+
+#include "hash-ops.h"
+#include "c_keccak.h"
+
+void hash_permutation(union hash_state *state) {
+ keccakf((uint64_t*)state, 24);
+}
+
+void hash_process(union hash_state *state, const uint8_t *buf, size_t count) {
+ keccak1600(buf, count, (uint8_t*)state);
+}
+
+void cn_fast_hash(const void *data, size_t length, char *hash) {
+ union hash_state state;
+ hash_process(&state, data, length);
+ memcpy(hash, &state, HASH_SIZE);
+}
diff --git a/stratum/algos/cryptonote/crypto/hash.h b/stratum/algos/cryptonote/crypto/hash.h
new file mode 100644
index 000000000..b5ee22114
--- /dev/null
+++ b/stratum/algos/cryptonote/crypto/hash.h
@@ -0,0 +1,22 @@
+#pragma once
+
+#include "hash-ops.h"
+
+typedef unsigned char BitSequence;
+typedef unsigned long long DataLength;
+
+#ifdef __cplusplus
+
+#include
+
+typedef std::string blobdata;
+
+namespace crypto {
+#pragma pack(push, 1)
+ class hash {
+ char data[HASH_SIZE];
+ };
+#pragma pack(pop)
+}
+
+#endif
diff --git a/stratum/algos/cryptonote/crypto/int-util.h b/stratum/algos/cryptonote/crypto/int-util.h
new file mode 100644
index 000000000..90980ba86
--- /dev/null
+++ b/stratum/algos/cryptonote/crypto/int-util.h
@@ -0,0 +1,230 @@
+// Copyright (c) 2012-2013 The Cryptonote developers
+// Distributed under the MIT/X11 software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#pragma once
+
+#include
+#include
+#include
+#include
+
+/*
+ * Create GNU compatible endian macros. We use the values for __LITTLE_ENDIAN
+ * and __BIG_ENDIAN based on endian.h.
+ */
+#ifdef __sun
+#include
+#define LITTLE_ENDIAN 1234
+#define BIG_ENDIAN 4321
+#ifdef _LITTLE_ENDIAN
+#define BYTE_ORDER LITTLE_ENDIAN
+#else
+#define BYTE_ORDER BIG_ENDIAN
+#endif /* _LITTLE_ENDIAN */
+#endif /* __sun */
+
+#if defined(_MSC_VER)
+#include
+
+//instead of #include
+// assume little-endian on Windows
+#define LITTLE_ENDIAN 1234
+#define BIG_ENDIAN 4321
+#define BYTE_ORDER LITTLE_ENDIAN
+
+static inline uint32_t rol32(uint32_t x, int r) {
+ static_assert(sizeof(uint32_t) == sizeof(unsigned int), "this code assumes 32-bit integers");
+ return _rotl(x, r);
+}
+
+static inline uint64_t rol64(uint64_t x, int r) {
+ return _rotl64(x, r);
+}
+
+#else
+#include
+
+static inline uint32_t rol32(uint32_t x, int r) {
+ return (x << (r & 31)) | (x >> (-r & 31));
+}
+
+static inline uint64_t rol64(uint64_t x, int r) {
+ return (x << (r & 63)) | (x >> (-r & 63));
+}
+
+#endif
+
+static inline uint64_t hi_dword(uint64_t val) {
+ return val >> 32;
+}
+
+static inline uint64_t lo_dword(uint64_t val) {
+ return val & 0xFFFFFFFF;
+}
+
+static inline uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi) {
+ // multiplier = ab = a * 2^32 + b
+ // multiplicand = cd = c * 2^32 + d
+ // ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
+ uint64_t a = hi_dword(multiplier);
+ uint64_t b = lo_dword(multiplier);
+ uint64_t c = hi_dword(multiplicand);
+ uint64_t d = lo_dword(multiplicand);
+
+ uint64_t ac = a * c;
+ uint64_t ad = a * d;
+ uint64_t bc = b * c;
+ uint64_t bd = b * d;
+
+ uint64_t adbc = ad + bc;
+ uint64_t adbc_carry = adbc < ad ? 1 : 0;
+
+ // multiplier * multiplicand = product_hi * 2^64 + product_lo
+ uint64_t product_lo = bd + (adbc << 32);
+ uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
+ *product_hi = ac + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
+ assert(ac <= *product_hi);
+
+ return product_lo;
+}
+
+static inline uint64_t div_with_reminder(uint64_t dividend, uint32_t divisor, uint32_t* remainder) {
+ dividend |= ((uint64_t)*remainder) << 32;
+ *remainder = dividend % divisor;
+ return dividend / divisor;
+}
+
+// Long division with 2^32 base
+static inline uint32_t div128_32(uint64_t dividend_hi, uint64_t dividend_lo, uint32_t divisor, uint64_t* quotient_hi, uint64_t* quotient_lo) {
+ uint64_t dividend_dwords[4];
+ uint32_t remainder = 0;
+
+ dividend_dwords[3] = hi_dword(dividend_hi);
+ dividend_dwords[2] = lo_dword(dividend_hi);
+ dividend_dwords[1] = hi_dword(dividend_lo);
+ dividend_dwords[0] = lo_dword(dividend_lo);
+
+ *quotient_hi = div_with_reminder(dividend_dwords[3], divisor, &remainder) << 32;
+ *quotient_hi |= div_with_reminder(dividend_dwords[2], divisor, &remainder);
+ *quotient_lo = div_with_reminder(dividend_dwords[1], divisor, &remainder) << 32;
+ *quotient_lo |= div_with_reminder(dividend_dwords[0], divisor, &remainder);
+
+ return remainder;
+}
+
+#define IDENT32(x) ((uint32_t) (x))
+#define IDENT64(x) ((uint64_t) (x))
+
+#define SWAP32(x) ((((uint32_t) (x) & 0x000000ff) << 24) | \
+ (((uint32_t) (x) & 0x0000ff00) << 8) | \
+ (((uint32_t) (x) & 0x00ff0000) >> 8) | \
+ (((uint32_t) (x) & 0xff000000) >> 24))
+#define SWAP64(x) ((((uint64_t) (x) & 0x00000000000000ff) << 56) | \
+ (((uint64_t) (x) & 0x000000000000ff00) << 40) | \
+ (((uint64_t) (x) & 0x0000000000ff0000) << 24) | \
+ (((uint64_t) (x) & 0x00000000ff000000) << 8) | \
+ (((uint64_t) (x) & 0x000000ff00000000) >> 8) | \
+ (((uint64_t) (x) & 0x0000ff0000000000) >> 24) | \
+ (((uint64_t) (x) & 0x00ff000000000000) >> 40) | \
+ (((uint64_t) (x) & 0xff00000000000000) >> 56))
+
+static inline uint32_t ident32(uint32_t x) { return x; }
+static inline uint64_t ident64(uint64_t x) { return x; }
+
+static inline uint32_t swap32(uint32_t x) {
+ x = ((x & 0x00ff00ff) << 8) | ((x & 0xff00ff00) >> 8);
+ return (x << 16) | (x >> 16);
+}
+static inline uint64_t swap64(uint64_t x) {
+ x = ((x & 0x00ff00ff00ff00ff) << 8) | ((x & 0xff00ff00ff00ff00) >> 8);
+ x = ((x & 0x0000ffff0000ffff) << 16) | ((x & 0xffff0000ffff0000) >> 16);
+ return (x << 32) | (x >> 32);
+}
+
+#if defined(__GNUC__)
+#define UNUSED __attribute__((unused))
+#else
+#define UNUSED
+#endif
+static inline void mem_inplace_ident(void *mem UNUSED, size_t n UNUSED) { }
+#undef UNUSED
+
+static inline void mem_inplace_swap32(void *mem, size_t n) {
+ size_t i;
+ for (i = 0; i < n; i++) {
+ ((uint32_t *) mem)[i] = swap32(((const uint32_t *) mem)[i]);
+ }
+}
+static inline void mem_inplace_swap64(void *mem, size_t n) {
+ size_t i;
+ for (i = 0; i < n; i++) {
+ ((uint64_t *) mem)[i] = swap64(((const uint64_t *) mem)[i]);
+ }
+}
+
+static inline void memcpy_ident32(void *dst, const void *src, size_t n) {
+ memcpy(dst, src, 4 * n);
+}
+static inline void memcpy_ident64(void *dst, const void *src, size_t n) {
+ memcpy(dst, src, 8 * n);
+}
+
+static inline void memcpy_swap32(void *dst, const void *src, size_t n) {
+ size_t i;
+ for (i = 0; i < n; i++) {
+ ((uint32_t *) dst)[i] = swap32(((const uint32_t *) src)[i]);
+ }
+}
+static inline void memcpy_swap64(void *dst, const void *src, size_t n) {
+ size_t i;
+ for (i = 0; i < n; i++) {
+ ((uint64_t *) dst)[i] = swap64(((const uint64_t *) src)[i]);
+ }
+}
+
+#if !defined(BYTE_ORDER) || !defined(LITTLE_ENDIAN) || !defined(BIG_ENDIAN)
+#if __STDC_VERSION__ - 0 >= 201112L
+static_assert(false, "BYTE_ORDER is undefined. Perhaps, GNU extensions are not enabled");
+#else
+#error "BYTE_ORDER is undefined. Perhaps, GNU extensions are not enabled"
+#endif
+#endif
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define SWAP32LE IDENT32
+#define SWAP32BE SWAP32
+#define swap32le ident32
+#define swap32be swap32
+#define mem_inplace_swap32le mem_inplace_ident
+#define mem_inplace_swap32be mem_inplace_swap32
+#define memcpy_swap32le memcpy_ident32
+#define memcpy_swap32be memcpy_swap32
+#define SWAP64LE IDENT64
+#define SWAP64BE SWAP64
+#define swap64le ident64
+#define swap64be swap64
+#define mem_inplace_swap64le mem_inplace_ident
+#define mem_inplace_swap64be mem_inplace_swap64
+#define memcpy_swap64le memcpy_ident64
+#define memcpy_swap64be memcpy_swap64
+#endif
+
+#if BYTE_ORDER == BIG_ENDIAN
+#define SWAP32BE IDENT32
+#define SWAP32LE SWAP32
+#define swap32be ident32
+#define swap32le swap32
+#define mem_inplace_swap32be mem_inplace_ident
+#define mem_inplace_swap32le mem_inplace_swap32
+#define memcpy_swap32be memcpy_ident32
+#define memcpy_swap32le memcpy_swap32
+#define SWAP64BE IDENT64
+#define SWAP64LE SWAP64
+#define swap64be ident64
+#define swap64le swap64
+#define mem_inplace_swap64be mem_inplace_ident
+#define mem_inplace_swap64le mem_inplace_swap64
+#define memcpy_swap64be memcpy_ident64
+#define memcpy_swap64le memcpy_swap64
+#endif
diff --git a/stratum/algos/cryptonote/crypto/oaes_config.h b/stratum/algos/cryptonote/crypto/oaes_config.h
new file mode 100644
index 000000000..3fc0e1be5
--- /dev/null
+++ b/stratum/algos/cryptonote/crypto/oaes_config.h
@@ -0,0 +1,50 @@
+/*
+ * ---------------------------------------------------------------------------
+ * OpenAES License
+ * ---------------------------------------------------------------------------
+ * Copyright (c) 2012, Nabil S. Al Ramli, www.nalramli.com
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ * ---------------------------------------------------------------------------
+ */
+
+#ifndef _OAES_CONFIG_H
+#define _OAES_CONFIG_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//#ifndef OAES_HAVE_ISAAC
+//#define OAES_HAVE_ISAAC 1
+//#endif // OAES_HAVE_ISAAC
+
+//#ifndef OAES_DEBUG
+//#define OAES_DEBUG 0
+//#endif // OAES_DEBUG
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _OAES_CONFIG_H
diff --git a/stratum/algos/cryptonote/crypto/oaes_lib.c b/stratum/algos/cryptonote/crypto/oaes_lib.c
new file mode 100644
index 000000000..29559afc6
--- /dev/null
+++ b/stratum/algos/cryptonote/crypto/oaes_lib.c
@@ -0,0 +1,1468 @@
+/*
+ * ---------------------------------------------------------------------------
+ * OpenAES License
+ * ---------------------------------------------------------------------------
+ * Copyright (c) 2012, Nabil S. Al Ramli, www.nalramli.com
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ * ---------------------------------------------------------------------------
+ */
+
+static const char _NR[] = {
+ 0x4e,0x61,0x62,0x69,0x6c,0x20,0x53,0x2e,0x20,
+ 0x41,0x6c,0x20,0x52,0x61,0x6d,0x6c,0x69,0x00
+};
+
+#include
+#include
+#include
+#ifdef __APPLE__
+#include
+#else
+#include
+#endif
+#include
+#include
+#include
+
+#ifdef WIN32
+#include
+#else
+#include
+#include
+#endif
+
+#include "oaes_config.h"
+#include "oaes_lib.h"
+
+#ifdef OAES_HAVE_ISAAC
+#include "rand.h"
+#endif // OAES_HAVE_ISAAC
+
+#define OAES_RKEY_LEN 4
+#define OAES_COL_LEN 4
+#define OAES_ROUND_BASE 7
+
+// the block is padded
+#define OAES_FLAG_PAD 0x01
+
+#ifndef min
+# define min(a,b) (((a)<(b)) ? (a) : (b))
+#endif /* min */
+
+// "OAES<8-bit header version><8-bit type><16-bit options><8-bit flags><56-bit reserved>"
+static uint8_t oaes_header[OAES_BLOCK_SIZE] = {
+ // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f,
+ /*0*/ 0x4f, 0x41, 0x45, 0x53, 0x01, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+static uint8_t oaes_gf_8[] = {
+ 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 };
+
+static uint8_t oaes_sub_byte_value[16][16] = {
+ // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f,
+ /*0*/ { 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 },
+ /*1*/ { 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 },
+ /*2*/ { 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 },
+ /*3*/ { 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 },
+ /*4*/ { 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 },
+ /*5*/ { 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf },
+ /*6*/ { 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 },
+ /*7*/ { 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 },
+ /*8*/ { 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 },
+ /*9*/ { 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb },
+ /*a*/ { 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 },
+ /*b*/ { 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 },
+ /*c*/ { 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a },
+ /*d*/ { 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e },
+ /*e*/ { 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf },
+ /*f*/ { 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 },
+};
+
+static uint8_t oaes_inv_sub_byte_value[16][16] = {
+ // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f,
+ /*0*/ { 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb },
+ /*1*/ { 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb },
+ /*2*/ { 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e },
+ /*3*/ { 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 },
+ /*4*/ { 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 },
+ /*5*/ { 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 },
+ /*6*/ { 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 },
+ /*7*/ { 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b },
+ /*8*/ { 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 },
+ /*9*/ { 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e },
+ /*a*/ { 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b },
+ /*b*/ { 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 },
+ /*c*/ { 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f },
+ /*d*/ { 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef },
+ /*e*/ { 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 },
+ /*f*/ { 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d },
+};
+
+static uint8_t oaes_gf_mul_2[16][16] = {
+ // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f,
+ /*0*/ { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e },
+ /*1*/ { 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e },
+ /*2*/ { 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e },
+ /*3*/ { 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e },
+ /*4*/ { 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e },
+ /*5*/ { 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe },
+ /*6*/ { 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde },
+ /*7*/ { 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe },
+ /*8*/ { 0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05 },
+ /*9*/ { 0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25 },
+ /*a*/ { 0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45 },
+ /*b*/ { 0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65 },
+ /*c*/ { 0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85 },
+ /*d*/ { 0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5 },
+ /*e*/ { 0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5 },
+ /*f*/ { 0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5 },
+};
+
+static uint8_t oaes_gf_mul_3[16][16] = {
+ // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f,
+ /*0*/ { 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11 },
+ /*1*/ { 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21 },
+ /*2*/ { 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71 },
+ /*3*/ { 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41 },
+ /*4*/ { 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1 },
+ /*5*/ { 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1 },
+ /*6*/ { 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1 },
+ /*7*/ { 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81 },
+ /*8*/ { 0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a },
+ /*9*/ { 0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba },
+ /*a*/ { 0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea },
+ /*b*/ { 0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda },
+ /*c*/ { 0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a },
+ /*d*/ { 0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a },
+ /*e*/ { 0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a },
+ /*f*/ { 0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a },
+};
+
+static uint8_t oaes_gf_mul_9[16][16] = {
+ // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f,
+ /*0*/ { 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77 },
+ /*1*/ { 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7 },
+ /*2*/ { 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c },
+ /*3*/ { 0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc },
+ /*4*/ { 0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01 },
+ /*5*/ { 0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91 },
+ /*6*/ { 0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a },
+ /*7*/ { 0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa },
+ /*8*/ { 0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b },
+ /*9*/ { 0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b },
+ /*a*/ { 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0 },
+ /*b*/ { 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30 },
+ /*c*/ { 0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed },
+ /*d*/ { 0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d },
+ /*e*/ { 0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6 },
+ /*f*/ { 0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46 },
+};
+
+static uint8_t oaes_gf_mul_b[16][16] = {
+ // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f,
+ /*0*/ { 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69 },
+ /*1*/ { 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9 },
+ /*2*/ { 0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12 },
+ /*3*/ { 0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2 },
+ /*4*/ { 0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f },
+ /*5*/ { 0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f },
+ /*6*/ { 0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4 },
+ /*7*/ { 0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54 },
+ /*8*/ { 0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e },
+ /*9*/ { 0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e },
+ /*a*/ { 0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5 },
+ /*b*/ { 0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55 },
+ /*c*/ { 0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68 },
+ /*d*/ { 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8 },
+ /*e*/ { 0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13 },
+ /*f*/ { 0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3 },
+};
+
+static uint8_t oaes_gf_mul_d[16][16] = {
+ // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f,
+ /*0*/ { 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b },
+ /*1*/ { 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b },
+ /*2*/ { 0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0 },
+ /*3*/ { 0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20 },
+ /*4*/ { 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26 },
+ /*5*/ { 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6 },
+ /*6*/ { 0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d },
+ /*7*/ { 0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d },
+ /*8*/ { 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91 },
+ /*9*/ { 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41 },
+ /*a*/ { 0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a },
+ /*b*/ { 0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa },
+ /*c*/ { 0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc },
+ /*d*/ { 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c },
+ /*e*/ { 0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47 },
+ /*f*/ { 0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97 },
+};
+
+static uint8_t oaes_gf_mul_e[16][16] = {
+ // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f,
+ /*0*/ { 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a },
+ /*1*/ { 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba },
+ /*2*/ { 0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81 },
+ /*3*/ { 0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61 },
+ /*4*/ { 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7 },
+ /*5*/ { 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17 },
+ /*6*/ { 0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c },
+ /*7*/ { 0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc },
+ /*8*/ { 0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b },
+ /*9*/ { 0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb },
+ /*a*/ { 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0 },
+ /*b*/ { 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20 },
+ /*c*/ { 0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6 },
+ /*d*/ { 0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56 },
+ /*e*/ { 0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d },
+ /*f*/ { 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d },
+};
+
+static OAES_RET oaes_sub_byte( uint8_t * byte )
+{
+ size_t _x, _y;
+
+ if( NULL == byte )
+ return OAES_RET_ARG1;
+
+ _x = _y = *byte;
+ _x &= 0x0f;
+ _y &= 0xf0;
+ _y >>= 4;
+ *byte = oaes_sub_byte_value[_y][_x];
+
+ return OAES_RET_SUCCESS;
+}
+
+static OAES_RET oaes_inv_sub_byte( uint8_t * byte )
+{
+ size_t _x, _y;
+
+ if( NULL == byte )
+ return OAES_RET_ARG1;
+
+ _x = _y = *byte;
+ _x &= 0x0f;
+ _y &= 0xf0;
+ _y >>= 4;
+ *byte = oaes_inv_sub_byte_value[_y][_x];
+
+ return OAES_RET_SUCCESS;
+}
+/*
+static OAES_RET oaes_word_rot_right( uint8_t word[OAES_COL_LEN] )
+{
+ uint8_t _temp[OAES_COL_LEN];
+
+ if( NULL == word )
+ return OAES_RET_ARG1;
+
+ memcpy( _temp + 1, word, OAES_COL_LEN - 1 );
+ _temp[0] = word[OAES_COL_LEN - 1];
+ memcpy( word, _temp, OAES_COL_LEN );
+
+ return OAES_RET_SUCCESS;
+}
+*/
+static OAES_RET oaes_word_rot_left( uint8_t word[OAES_COL_LEN] )
+{
+ uint8_t _temp[OAES_COL_LEN];
+
+ if( NULL == word )
+ return OAES_RET_ARG1;
+
+ memcpy( _temp, word + 1, OAES_COL_LEN - 1 );
+ _temp[OAES_COL_LEN - 1] = word[0];
+ memcpy( word, _temp, OAES_COL_LEN );
+
+ return OAES_RET_SUCCESS;
+}
+
+static OAES_RET oaes_shift_rows( uint8_t block[OAES_BLOCK_SIZE] )
+{
+ uint8_t _temp[OAES_BLOCK_SIZE];
+
+ if( NULL == block )
+ return OAES_RET_ARG1;
+
+ _temp[0x00] = block[0x00];
+ _temp[0x01] = block[0x05];
+ _temp[0x02] = block[0x0a];
+ _temp[0x03] = block[0x0f];
+ _temp[0x04] = block[0x04];
+ _temp[0x05] = block[0x09];
+ _temp[0x06] = block[0x0e];
+ _temp[0x07] = block[0x03];
+ _temp[0x08] = block[0x08];
+ _temp[0x09] = block[0x0d];
+ _temp[0x0a] = block[0x02];
+ _temp[0x0b] = block[0x07];
+ _temp[0x0c] = block[0x0c];
+ _temp[0x0d] = block[0x01];
+ _temp[0x0e] = block[0x06];
+ _temp[0x0f] = block[0x0b];
+ memcpy( block, _temp, OAES_BLOCK_SIZE );
+
+ return OAES_RET_SUCCESS;
+}
+
+static OAES_RET oaes_inv_shift_rows( uint8_t block[OAES_BLOCK_SIZE] )
+{
+ uint8_t _temp[OAES_BLOCK_SIZE];
+
+ if( NULL == block )
+ return OAES_RET_ARG1;
+
+ _temp[0x00] = block[0x00];
+ _temp[0x01] = block[0x0d];
+ _temp[0x02] = block[0x0a];
+ _temp[0x03] = block[0x07];
+ _temp[0x04] = block[0x04];
+ _temp[0x05] = block[0x01];
+ _temp[0x06] = block[0x0e];
+ _temp[0x07] = block[0x0b];
+ _temp[0x08] = block[0x08];
+ _temp[0x09] = block[0x05];
+ _temp[0x0a] = block[0x02];
+ _temp[0x0b] = block[0x0f];
+ _temp[0x0c] = block[0x0c];
+ _temp[0x0d] = block[0x09];
+ _temp[0x0e] = block[0x06];
+ _temp[0x0f] = block[0x03];
+ memcpy( block, _temp, OAES_BLOCK_SIZE );
+
+ return OAES_RET_SUCCESS;
+}
+
+static uint8_t oaes_gf_mul(uint8_t left, uint8_t right)
+{
+ size_t _x, _y;
+
+ _x = _y = left;
+ _x &= 0x0f;
+ _y &= 0xf0;
+ _y >>= 4;
+
+ switch( right )
+ {
+ case 0x02:
+ return oaes_gf_mul_2[_y][_x];
+ break;
+ case 0x03:
+ return oaes_gf_mul_3[_y][_x];
+ break;
+ case 0x09:
+ return oaes_gf_mul_9[_y][_x];
+ break;
+ case 0x0b:
+ return oaes_gf_mul_b[_y][_x];
+ break;
+ case 0x0d:
+ return oaes_gf_mul_d[_y][_x];
+ break;
+ case 0x0e:
+ return oaes_gf_mul_e[_y][_x];
+ break;
+ default:
+ return left;
+ break;
+ }
+}
+
+static OAES_RET oaes_mix_cols( uint8_t word[OAES_COL_LEN] )
+{
+ uint8_t _temp[OAES_COL_LEN];
+
+ if( NULL == word )
+ return OAES_RET_ARG1;
+
+ _temp[0] = oaes_gf_mul(word[0], 0x02) ^ oaes_gf_mul( word[1], 0x03 ) ^
+ word[2] ^ word[3];
+ _temp[1] = word[0] ^ oaes_gf_mul( word[1], 0x02 ) ^
+ oaes_gf_mul( word[2], 0x03 ) ^ word[3];
+ _temp[2] = word[0] ^ word[1] ^
+ oaes_gf_mul( word[2], 0x02 ) ^ oaes_gf_mul( word[3], 0x03 );
+ _temp[3] = oaes_gf_mul( word[0], 0x03 ) ^ word[1] ^
+ word[2] ^ oaes_gf_mul( word[3], 0x02 );
+ memcpy( word, _temp, OAES_COL_LEN );
+
+ return OAES_RET_SUCCESS;
+}
+
+static OAES_RET oaes_inv_mix_cols( uint8_t word[OAES_COL_LEN] )
+{
+ uint8_t _temp[OAES_COL_LEN];
+
+ if( NULL == word )
+ return OAES_RET_ARG1;
+
+ _temp[0] = oaes_gf_mul( word[0], 0x0e ) ^ oaes_gf_mul( word[1], 0x0b ) ^
+ oaes_gf_mul( word[2], 0x0d ) ^ oaes_gf_mul( word[3], 0x09 );
+ _temp[1] = oaes_gf_mul( word[0], 0x09 ) ^ oaes_gf_mul( word[1], 0x0e ) ^
+ oaes_gf_mul( word[2], 0x0b ) ^ oaes_gf_mul( word[3], 0x0d );
+ _temp[2] = oaes_gf_mul( word[0], 0x0d ) ^ oaes_gf_mul( word[1], 0x09 ) ^
+ oaes_gf_mul( word[2], 0x0e ) ^ oaes_gf_mul( word[3], 0x0b );
+ _temp[3] = oaes_gf_mul( word[0], 0x0b ) ^ oaes_gf_mul( word[1], 0x0d ) ^
+ oaes_gf_mul( word[2], 0x09 ) ^ oaes_gf_mul( word[3], 0x0e );
+ memcpy( word, _temp, OAES_COL_LEN );
+
+ return OAES_RET_SUCCESS;
+}
+
+OAES_RET oaes_sprintf(
+ char * buf, size_t * buf_len, const uint8_t * data, size_t data_len )
+{
+ size_t _i, _buf_len_in;
+ char _temp[4];
+
+ if( NULL == buf_len )
+ return OAES_RET_ARG2;
+
+ _buf_len_in = *buf_len;
+ *buf_len = data_len * 3 + data_len / OAES_BLOCK_SIZE + 1;
+
+ if( NULL == buf )
+ return OAES_RET_SUCCESS;
+
+ if( *buf_len > _buf_len_in )
+ return OAES_RET_BUF;
+
+ if( NULL == data )
+ return OAES_RET_ARG3;
+
+ strcpy( buf, "" );
+
+ for( _i = 0; _i < data_len; _i++ )
+ {
+ sprintf( _temp, "%02x ", data[_i] );
+ strcat( buf, _temp );
+ if( _i && 0 == ( _i + 1 ) % OAES_BLOCK_SIZE )
+ strcat( buf, "\n" );
+ }
+
+ return OAES_RET_SUCCESS;
+}
+
+#ifdef OAES_HAVE_ISAAC
+static void oaes_get_seed( char buf[RANDSIZ + 1] )
+{
+ struct timeb timer;
+ struct tm *gmTimer;
+ char * _test = NULL;
+
+ ftime (&timer);
+ gmTimer = gmtime( &timer.time );
+ _test = (char *) calloc( sizeof( char ), timer.millitm );
+ sprintf( buf, "%04d%02d%02d%02d%02d%02d%03d%p%d",
+ gmTimer->tm_year + 1900, gmTimer->tm_mon + 1, gmTimer->tm_mday,
+ gmTimer->tm_hour, gmTimer->tm_min, gmTimer->tm_sec, timer.millitm,
+ _test + timer.millitm, getpid() );
+
+ if( _test )
+ free( _test );
+}
+#else
+static uint32_t oaes_get_seed(void)
+{
+ struct timeb timer;
+ struct tm *gmTimer;
+ char * _test = NULL;
+ uint32_t _ret = 0;
+
+ ftime (&timer);
+ gmTimer = gmtime( &timer.time );
+ _test = (char *) calloc( sizeof( char ), timer.millitm );
+ _ret = (uint32_t)(gmTimer->tm_year + 1900 + gmTimer->tm_mon + 1 + gmTimer->tm_mday +
+ gmTimer->tm_hour + gmTimer->tm_min + gmTimer->tm_sec + timer.millitm +
+ (uintptr_t) ( _test + timer.millitm ) + getpid());
+
+ if( _test )
+ free( _test );
+
+ return _ret;
+}
+#endif // OAES_HAVE_ISAAC
+
+static OAES_RET oaes_key_destroy( oaes_key ** key )
+{
+ if( NULL == *key )
+ return OAES_RET_SUCCESS;
+
+ if( (*key)->data )
+ {
+ free( (*key)->data );
+ (*key)->data = NULL;
+ }
+
+ if( (*key)->exp_data )
+ {
+ free( (*key)->exp_data );
+ (*key)->exp_data = NULL;
+ }
+
+ (*key)->data_len = 0;
+ (*key)->exp_data_len = 0;
+ (*key)->num_keys = 0;
+ (*key)->key_base = 0;
+ free( *key );
+ *key = NULL;
+
+ return OAES_RET_SUCCESS;
+}
+
+static OAES_RET oaes_key_expand( OAES_CTX * ctx )
+{
+ size_t _i, _j;
+ oaes_ctx * _ctx = (oaes_ctx *) ctx;
+
+ if( NULL == _ctx )
+ return OAES_RET_ARG1;
+
+ if( NULL == _ctx->key )
+ return OAES_RET_NOKEY;
+
+ _ctx->key->key_base = _ctx->key->data_len / OAES_RKEY_LEN;
+ _ctx->key->num_keys = _ctx->key->key_base + OAES_ROUND_BASE;
+
+ _ctx->key->exp_data_len = _ctx->key->num_keys * OAES_RKEY_LEN * OAES_COL_LEN;
+ _ctx->key->exp_data = (uint8_t *)
+ calloc( _ctx->key->exp_data_len, sizeof( uint8_t ));
+
+ if( NULL == _ctx->key->exp_data )
+ return OAES_RET_MEM;
+
+ // the first _ctx->key->data_len are a direct copy
+ memcpy( _ctx->key->exp_data, _ctx->key->data, _ctx->key->data_len );
+
+ // apply ExpandKey algorithm for remainder
+ for( _i = _ctx->key->key_base; _i < _ctx->key->num_keys * OAES_RKEY_LEN; _i++ )
+ {
+ uint8_t _temp[OAES_COL_LEN];
+
+ memcpy( _temp,
+ _ctx->key->exp_data + ( _i - 1 ) * OAES_RKEY_LEN, OAES_COL_LEN );
+
+ // transform key column
+ if( 0 == _i % _ctx->key->key_base )
+ {
+ oaes_word_rot_left( _temp );
+
+ for( _j = 0; _j < OAES_COL_LEN; _j++ )
+ oaes_sub_byte( _temp + _j );
+
+ _temp[0] = _temp[0] ^ oaes_gf_8[ _i / _ctx->key->key_base - 1 ];
+ }
+ else if( _ctx->key->key_base > 6 && 4 == _i % _ctx->key->key_base )
+ {
+ for( _j = 0; _j < OAES_COL_LEN; _j++ )
+ oaes_sub_byte( _temp + _j );
+ }
+
+ for( _j = 0; _j < OAES_COL_LEN; _j++ )
+ {
+ _ctx->key->exp_data[ _i * OAES_RKEY_LEN + _j ] =
+ _ctx->key->exp_data[ ( _i - _ctx->key->key_base ) *
+ OAES_RKEY_LEN + _j ] ^ _temp[_j];
+ }
+ }
+
+ return OAES_RET_SUCCESS;
+}
+
+static OAES_RET oaes_key_gen( OAES_CTX * ctx, size_t key_size )
+{
+ size_t _i;
+ oaes_key * _key = NULL;
+ oaes_ctx * _ctx = (oaes_ctx *) ctx;
+ OAES_RET _rc = OAES_RET_SUCCESS;
+
+ if( NULL == _ctx )
+ return OAES_RET_ARG1;
+
+ _key = (oaes_key *) calloc( sizeof( oaes_key ), 1 );
+
+ if( NULL == _key )
+ return OAES_RET_MEM;
+
+ if( _ctx->key )
+ oaes_key_destroy( &(_ctx->key) );
+
+ _key->data_len = key_size;
+ _key->data = (uint8_t *) calloc( key_size, sizeof( uint8_t ));
+
+ if( NULL == _key->data )
+ return OAES_RET_MEM;
+
+ for( _i = 0; _i < key_size; _i++ )
+#ifdef OAES_HAVE_ISAAC
+ _key->data[_i] = (uint8_t) rand( _ctx->rctx );
+#else
+ _key->data[_i] = (uint8_t) rand();
+#endif // OAES_HAVE_ISAAC
+
+ _ctx->key = _key;
+ _rc = _rc || oaes_key_expand( ctx );
+
+ if( _rc != OAES_RET_SUCCESS )
+ {
+ oaes_key_destroy( &(_ctx->key) );
+ return _rc;
+ }
+
+ return OAES_RET_SUCCESS;
+}
+
+OAES_RET oaes_key_gen_128( OAES_CTX * ctx )
+{
+ return oaes_key_gen( ctx, 16 );
+}
+
+OAES_RET oaes_key_gen_192( OAES_CTX * ctx )
+{
+ return oaes_key_gen( ctx, 24 );
+}
+
+OAES_RET oaes_key_gen_256( OAES_CTX * ctx )
+{
+ return oaes_key_gen( ctx, 32 );
+}
+
+OAES_RET oaes_key_export( OAES_CTX * ctx,
+ uint8_t * data, size_t * data_len )
+{
+ size_t _data_len_in;
+ oaes_ctx * _ctx = (oaes_ctx *) ctx;
+
+ if( NULL == _ctx )
+ return OAES_RET_ARG1;
+
+ if( NULL == _ctx->key )
+ return OAES_RET_NOKEY;
+
+ if( NULL == data_len )
+ return OAES_RET_ARG3;
+
+ _data_len_in = *data_len;
+ // data + header
+ *data_len = _ctx->key->data_len + OAES_BLOCK_SIZE;
+
+ if( NULL == data )
+ return OAES_RET_SUCCESS;
+
+ if( _data_len_in < *data_len )
+ return OAES_RET_BUF;
+
+ // header
+ memcpy( data, oaes_header, OAES_BLOCK_SIZE );
+ data[5] = 0x01;
+ data[7] = (uint8_t)_ctx->key->data_len;
+ memcpy( data + OAES_BLOCK_SIZE, _ctx->key->data, _ctx->key->data_len );
+
+ return OAES_RET_SUCCESS;
+}
+
+OAES_RET oaes_key_export_data( OAES_CTX * ctx,
+ uint8_t * data, size_t * data_len )
+{
+ size_t _data_len_in;
+ oaes_ctx * _ctx = (oaes_ctx *) ctx;
+
+ if( NULL == _ctx )
+ return OAES_RET_ARG1;
+
+ if( NULL == _ctx->key )
+ return OAES_RET_NOKEY;
+
+ if( NULL == data_len )
+ return OAES_RET_ARG3;
+
+ _data_len_in = *data_len;
+ *data_len = _ctx->key->data_len;
+
+ if( NULL == data )
+ return OAES_RET_SUCCESS;
+
+ if( _data_len_in < *data_len )
+ return OAES_RET_BUF;
+
+ memcpy( data, _ctx->key->data, *data_len );
+
+ return OAES_RET_SUCCESS;
+}
+
+OAES_RET oaes_key_import( OAES_CTX * ctx,
+ const uint8_t * data, size_t data_len )
+{
+ oaes_ctx * _ctx = (oaes_ctx *) ctx;
+ OAES_RET _rc = OAES_RET_SUCCESS;
+ int _key_length;
+
+ if( NULL == _ctx )
+ return OAES_RET_ARG1;
+
+ if( NULL == data )
+ return OAES_RET_ARG2;
+
+ switch( data_len )
+ {
+ case 16 + OAES_BLOCK_SIZE:
+ case 24 + OAES_BLOCK_SIZE:
+ case 32 + OAES_BLOCK_SIZE:
+ break;
+ default:
+ return OAES_RET_ARG3;
+ }
+
+ // header
+ if( 0 != memcmp( data, oaes_header, 4 ) )
+ return OAES_RET_HEADER;
+
+ // header version
+ switch( data[4] )
+ {
+ case 0x01:
+ break;
+ default:
+ return OAES_RET_HEADER;
+ }
+
+ // header type
+ switch( data[5] )
+ {
+ case 0x01:
+ break;
+ default:
+ return OAES_RET_HEADER;
+ }
+
+ // options
+ _key_length = data[7];
+ switch( _key_length )
+ {
+ case 16:
+ case 24:
+ case 32:
+ break;
+ default:
+ return OAES_RET_HEADER;
+ }
+
+ if( (int)data_len != _key_length + OAES_BLOCK_SIZE )
+ return OAES_RET_ARG3;
+
+ if( _ctx->key )
+ oaes_key_destroy( &(_ctx->key) );
+
+ _ctx->key = (oaes_key *) calloc( sizeof( oaes_key ), 1 );
+
+ if( NULL == _ctx->key )
+ return OAES_RET_MEM;
+
+ _ctx->key->data_len = _key_length;
+ _ctx->key->data = (uint8_t *)
+ calloc( _key_length, sizeof( uint8_t ));
+
+ if( NULL == _ctx->key->data )
+ {
+ oaes_key_destroy( &(_ctx->key) );
+ return OAES_RET_MEM;
+ }
+
+ memcpy( _ctx->key->data, data + OAES_BLOCK_SIZE, _key_length );
+ _rc = _rc || oaes_key_expand( ctx );
+
+ if( _rc != OAES_RET_SUCCESS )
+ {
+ oaes_key_destroy( &(_ctx->key) );
+ return _rc;
+ }
+
+ return OAES_RET_SUCCESS;
+}
+
+OAES_RET oaes_key_import_data( OAES_CTX * ctx,
+ const uint8_t * data, size_t data_len )
+{
+ oaes_ctx * _ctx = (oaes_ctx *) ctx;
+ OAES_RET _rc = OAES_RET_SUCCESS;
+
+ if( NULL == _ctx )
+ return OAES_RET_ARG1;
+
+ if( NULL == data )
+ return OAES_RET_ARG2;
+
+ switch( data_len )
+ {
+ case 16:
+ case 24:
+ case 32:
+ break;
+ default:
+ return OAES_RET_ARG3;
+ }
+
+ if( _ctx->key )
+ oaes_key_destroy( &(_ctx->key) );
+
+ _ctx->key = (oaes_key *) calloc( sizeof( oaes_key ), 1 );
+
+ if( NULL == _ctx->key )
+ return OAES_RET_MEM;
+
+ _ctx->key->data_len = data_len;
+ _ctx->key->data = (uint8_t *)
+ calloc( data_len, sizeof( uint8_t ));
+
+ if( NULL == _ctx->key->data )
+ {
+ oaes_key_destroy( &(_ctx->key) );
+ return OAES_RET_MEM;
+ }
+
+ memcpy( _ctx->key->data, data, data_len );
+ _rc = _rc || oaes_key_expand( ctx );
+
+ if( _rc != OAES_RET_SUCCESS )
+ {
+ oaes_key_destroy( &(_ctx->key) );
+ return _rc;
+ }
+
+ return OAES_RET_SUCCESS;
+}
+
+OAES_CTX * oaes_alloc(void)
+{
+ oaes_ctx * _ctx = (oaes_ctx *) calloc( sizeof( oaes_ctx ), 1 );
+
+ if( NULL == _ctx )
+ return NULL;
+
+#ifdef OAES_HAVE_ISAAC
+ {
+ ub4 _i = 0;
+ char _seed[RANDSIZ + 1];
+
+ _ctx->rctx = (randctx *) calloc( sizeof( randctx ), 1 );
+
+ if( NULL == _ctx->rctx )
+ {
+ free( _ctx );
+ return NULL;
+ }
+
+ oaes_get_seed( _seed );
+ memset( _ctx->rctx->randrsl, 0, RANDSIZ );
+ memcpy( _ctx->rctx->randrsl, _seed, RANDSIZ );
+ randinit( _ctx->rctx, TRUE);
+ }
+#else
+ srand( oaes_get_seed() );
+#endif // OAES_HAVE_ISAAC
+
+ _ctx->key = NULL;
+ oaes_set_option( _ctx, OAES_OPTION_CBC, NULL );
+
+#ifdef OAES_DEBUG
+ _ctx->step_cb = NULL;
+ oaes_set_option( _ctx, OAES_OPTION_STEP_OFF, NULL );
+#endif // OAES_DEBUG
+
+ return (OAES_CTX *) _ctx;
+}
+
+OAES_RET oaes_free( OAES_CTX ** ctx )
+{
+ oaes_ctx ** _ctx = (oaes_ctx **) ctx;
+
+ if( NULL == _ctx )
+ return OAES_RET_ARG1;
+
+ if( NULL == *_ctx )
+ return OAES_RET_SUCCESS;
+
+ if( (*_ctx)->key )
+ oaes_key_destroy( &((*_ctx)->key) );
+
+#ifdef OAES_HAVE_ISAAC
+ if( (*_ctx)->rctx )
+ {
+ free( (*_ctx)->rctx );
+ (*_ctx)->rctx = NULL;
+ }
+#endif // OAES_HAVE_ISAAC
+
+ free( *_ctx );
+ *_ctx = NULL;
+
+ return OAES_RET_SUCCESS;
+}
+
+OAES_RET oaes_set_option( OAES_CTX * ctx,
+ OAES_OPTION option, const void * value )
+{
+ size_t _i;
+ oaes_ctx * _ctx = (oaes_ctx *) ctx;
+
+ if( NULL == _ctx )
+ return OAES_RET_ARG1;
+
+ switch( option )
+ {
+ case OAES_OPTION_ECB:
+ _ctx->options &= ~OAES_OPTION_CBC;
+ memset( _ctx->iv, 0, OAES_BLOCK_SIZE );
+ break;
+
+ case OAES_OPTION_CBC:
+ _ctx->options &= ~OAES_OPTION_ECB;
+ if( value )
+ memcpy( _ctx->iv, value, OAES_BLOCK_SIZE );
+ else
+ {
+ for( _i = 0; _i < OAES_BLOCK_SIZE; _i++ )
+#ifdef OAES_HAVE_ISAAC
+ _ctx->iv[_i] = (uint8_t) rand( _ctx->rctx );
+#else
+ _ctx->iv[_i] = (uint8_t) rand();
+#endif // OAES_HAVE_ISAAC
+ }
+ break;
+
+#ifdef OAES_DEBUG
+
+ case OAES_OPTION_STEP_ON:
+ if( value )
+ {
+ _ctx->options &= ~OAES_OPTION_STEP_OFF;
+ _ctx->step_cb = value;
+ }
+ else
+ {
+ _ctx->options &= ~OAES_OPTION_STEP_ON;
+ _ctx->options |= OAES_OPTION_STEP_OFF;
+ _ctx->step_cb = NULL;
+ return OAES_RET_ARG3;
+ }
+ break;
+
+ case OAES_OPTION_STEP_OFF:
+ _ctx->options &= ~OAES_OPTION_STEP_ON;
+ _ctx->step_cb = NULL;
+ break;
+
+#endif // OAES_DEBUG
+
+ default:
+ return OAES_RET_ARG2;
+ }
+
+ _ctx->options |= option;
+
+ return OAES_RET_SUCCESS;
+}
+
+static OAES_RET oaes_encrypt_block(
+ OAES_CTX * ctx, uint8_t * c, size_t c_len )
+{
+ size_t _i, _j;
+ oaes_ctx * _ctx = (oaes_ctx *) ctx;
+
+ if( NULL == _ctx )
+ return OAES_RET_ARG1;
+
+ if( NULL == c )
+ return OAES_RET_ARG2;
+
+ if( c_len != OAES_BLOCK_SIZE )
+ return OAES_RET_ARG3;
+
+ if( NULL == _ctx->key )
+ return OAES_RET_NOKEY;
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ _ctx->step_cb( c, "input", 1, NULL );
+#endif // OAES_DEBUG
+
+ // AddRoundKey(State, K0)
+ for( _i = 0; _i < c_len; _i++ )
+ c[_i] = c[_i] ^ _ctx->key->exp_data[_i];
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ {
+ _ctx->step_cb( _ctx->key->exp_data, "k_sch", 1, NULL );
+ _ctx->step_cb( c, "k_add", 1, NULL );
+ }
+#endif // OAES_DEBUG
+
+ // for round = 1 step 1 to Nr–1
+ for( _i = 1; _i < _ctx->key->num_keys - 1; _i++ )
+ {
+ // SubBytes(state)
+ for( _j = 0; _j < c_len; _j++ )
+ oaes_sub_byte( c + _j );
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ _ctx->step_cb( c, "s_box", _i, NULL );
+#endif // OAES_DEBUG
+
+ // ShiftRows(state)
+ oaes_shift_rows( c );
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ _ctx->step_cb( c, "s_row", _i, NULL );
+#endif // OAES_DEBUG
+
+ // MixColumns(state)
+ oaes_mix_cols( c );
+ oaes_mix_cols( c + 4 );
+ oaes_mix_cols( c + 8 );
+ oaes_mix_cols( c + 12 );
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ _ctx->step_cb( c, "m_col", _i, NULL );
+#endif // OAES_DEBUG
+
+ // AddRoundKey(state, w[round*Nb, (round+1)*Nb-1])
+ for( _j = 0; _j < c_len; _j++ )
+ c[_j] = c[_j] ^
+ _ctx->key->exp_data[_i * OAES_RKEY_LEN * OAES_COL_LEN + _j];
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ {
+ _ctx->step_cb( _ctx->key->exp_data + _i * OAES_RKEY_LEN * OAES_COL_LEN,
+ "k_sch", _i, NULL );
+ _ctx->step_cb( c, "k_add", _i, NULL );
+ }
+#endif // OAES_DEBUG
+
+ }
+
+ // SubBytes(state)
+ for( _i = 0; _i < c_len; _i++ )
+ oaes_sub_byte( c + _i );
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ _ctx->step_cb( c, "s_box", _ctx->key->num_keys - 1, NULL );
+#endif // OAES_DEBUG
+
+ // ShiftRows(state)
+ oaes_shift_rows( c );
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ _ctx->step_cb( c, "s_row", _ctx->key->num_keys - 1, NULL );
+#endif // OAES_DEBUG
+
+ // AddRoundKey(state, w[Nr*Nb, (Nr+1)*Nb-1])
+ for( _i = 0; _i < c_len; _i++ )
+ c[_i] = c[_i] ^ _ctx->key->exp_data[
+ ( _ctx->key->num_keys - 1 ) * OAES_RKEY_LEN * OAES_COL_LEN + _i ];
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ {
+ _ctx->step_cb( _ctx->key->exp_data +
+ ( _ctx->key->num_keys - 1 ) * OAES_RKEY_LEN * OAES_COL_LEN,
+ "k_sch", _ctx->key->num_keys - 1, NULL );
+ _ctx->step_cb( c, "output", _ctx->key->num_keys - 1, NULL );
+ }
+#endif // OAES_DEBUG
+
+ return OAES_RET_SUCCESS;
+}
+
+static OAES_RET oaes_decrypt_block(
+ OAES_CTX * ctx, uint8_t * c, size_t c_len )
+{
+ size_t _i, _j;
+ oaes_ctx * _ctx = (oaes_ctx *) ctx;
+
+ if( NULL == _ctx )
+ return OAES_RET_ARG1;
+
+ if( NULL == c )
+ return OAES_RET_ARG2;
+
+ if( c_len != OAES_BLOCK_SIZE )
+ return OAES_RET_ARG3;
+
+ if( NULL == _ctx->key )
+ return OAES_RET_NOKEY;
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ _ctx->step_cb( c, "iinput", _ctx->key->num_keys - 1, NULL );
+#endif // OAES_DEBUG
+
+ // AddRoundKey(state, w[Nr*Nb, (Nr+1)*Nb-1])
+ for( _i = 0; _i < c_len; _i++ )
+ c[_i] = c[_i] ^ _ctx->key->exp_data[
+ ( _ctx->key->num_keys - 1 ) * OAES_RKEY_LEN * OAES_COL_LEN + _i ];
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ {
+ _ctx->step_cb( _ctx->key->exp_data +
+ ( _ctx->key->num_keys - 1 ) * OAES_RKEY_LEN * OAES_COL_LEN,
+ "ik_sch", _ctx->key->num_keys - 1, NULL );
+ _ctx->step_cb( c, "ik_add", _ctx->key->num_keys - 1, NULL );
+ }
+#endif // OAES_DEBUG
+
+ for( _i = _ctx->key->num_keys - 2; _i > 0; _i-- )
+ {
+ // InvShiftRows(state)
+ oaes_inv_shift_rows( c );
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ _ctx->step_cb( c, "is_row", _i, NULL );
+#endif // OAES_DEBUG
+
+ // InvSubBytes(state)
+ for( _j = 0; _j < c_len; _j++ )
+ oaes_inv_sub_byte( c + _j );
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ _ctx->step_cb( c, "is_box", _i, NULL );
+#endif // OAES_DEBUG
+
+ // AddRoundKey(state, w[round*Nb, (round+1)*Nb-1])
+ for( _j = 0; _j < c_len; _j++ )
+ c[_j] = c[_j] ^
+ _ctx->key->exp_data[_i * OAES_RKEY_LEN * OAES_COL_LEN + _j];
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ {
+ _ctx->step_cb( _ctx->key->exp_data + _i * OAES_RKEY_LEN * OAES_COL_LEN,
+ "ik_sch", _i, NULL );
+ _ctx->step_cb( c, "ik_add", _i, NULL );
+ }
+#endif // OAES_DEBUG
+
+ // InvMixColums(state)
+ oaes_inv_mix_cols( c );
+ oaes_inv_mix_cols( c + 4 );
+ oaes_inv_mix_cols( c + 8 );
+ oaes_inv_mix_cols( c + 12 );
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ _ctx->step_cb( c, "im_col", _i, NULL );
+#endif // OAES_DEBUG
+
+ }
+
+ // InvShiftRows(state)
+ oaes_inv_shift_rows( c );
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ _ctx->step_cb( c, "is_row", 1, NULL );
+#endif // OAES_DEBUG
+
+ // InvSubBytes(state)
+ for( _i = 0; _i < c_len; _i++ )
+ oaes_inv_sub_byte( c + _i );
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ _ctx->step_cb( c, "is_box", 1, NULL );
+#endif // OAES_DEBUG
+
+ // AddRoundKey(state, w[0, Nb-1])
+ for( _i = 0; _i < c_len; _i++ )
+ c[_i] = c[_i] ^ _ctx->key->exp_data[_i];
+
+#ifdef OAES_DEBUG
+ if( _ctx->step_cb )
+ {
+ _ctx->step_cb( _ctx->key->exp_data, "ik_sch", 1, NULL );
+ _ctx->step_cb( c, "ioutput", 1, NULL );
+ }
+#endif // OAES_DEBUG
+
+ return OAES_RET_SUCCESS;
+}
+
+OAES_RET oaes_encrypt( OAES_CTX * ctx,
+ const uint8_t * m, size_t m_len, uint8_t * c, size_t * c_len )
+{
+ size_t _i, _j, _c_len_in, _c_data_len;
+ size_t _pad_len = m_len % OAES_BLOCK_SIZE == 0 ?
+ 0 : OAES_BLOCK_SIZE - m_len % OAES_BLOCK_SIZE;
+ oaes_ctx * _ctx = (oaes_ctx *) ctx;
+ OAES_RET _rc = OAES_RET_SUCCESS;
+ uint8_t _flags = _pad_len ? OAES_FLAG_PAD : 0;
+
+ if( NULL == _ctx )
+ return OAES_RET_ARG1;
+
+ if( NULL == m )
+ return OAES_RET_ARG2;
+
+ if( NULL == c_len )
+ return OAES_RET_ARG5;
+
+ _c_len_in = *c_len;
+ // data + pad
+ _c_data_len = m_len + _pad_len;
+ // header + iv + data + pad
+ *c_len = 2 * OAES_BLOCK_SIZE + m_len + _pad_len;
+
+ if( NULL == c )
+ return OAES_RET_SUCCESS;
+
+ if( _c_len_in < *c_len )
+ return OAES_RET_BUF;
+
+ if( NULL == _ctx->key )
+ return OAES_RET_NOKEY;
+
+ // header
+ memcpy(c, oaes_header, OAES_BLOCK_SIZE );
+ memcpy(c + 6, &_ctx->options, sizeof(_ctx->options));
+ memcpy(c + 8, &_flags, sizeof(_flags));
+ // iv
+ memcpy(c + OAES_BLOCK_SIZE, _ctx->iv, OAES_BLOCK_SIZE );
+ // data
+ memcpy(c + 2 * OAES_BLOCK_SIZE, m, m_len );
+
+ for( _i = 0; _i < _c_data_len; _i += OAES_BLOCK_SIZE )
+ {
+ uint8_t _block[OAES_BLOCK_SIZE];
+ size_t _block_size = min( m_len - _i, OAES_BLOCK_SIZE );
+
+ memcpy( _block, c + 2 * OAES_BLOCK_SIZE + _i, _block_size );
+
+ // insert pad
+ for( _j = 0; _j < OAES_BLOCK_SIZE - _block_size; _j++ )
+ _block[ _block_size + _j ] = (uint8_t)(_j + 1);
+
+ // CBC
+ if( _ctx->options & OAES_OPTION_CBC )
+ {
+ for( _j = 0; _j < OAES_BLOCK_SIZE; _j++ )
+ _block[_j] = _block[_j] ^ _ctx->iv[_j];
+ }
+
+ _rc = _rc ||
+ oaes_encrypt_block( ctx, _block, OAES_BLOCK_SIZE );
+ memcpy( c + 2 * OAES_BLOCK_SIZE + _i, _block, OAES_BLOCK_SIZE );
+
+ if( _ctx->options & OAES_OPTION_CBC )
+ memcpy( _ctx->iv, _block, OAES_BLOCK_SIZE );
+ }
+
+ return _rc;
+}
+
+OAES_RET oaes_decrypt( OAES_CTX * ctx,
+ const uint8_t * c, size_t c_len, uint8_t * m, size_t * m_len )
+{
+ size_t _i, _j, _m_len_in;
+ oaes_ctx * _ctx = (oaes_ctx *) ctx;
+ OAES_RET _rc = OAES_RET_SUCCESS;
+ uint8_t _iv[OAES_BLOCK_SIZE];
+ uint8_t _flags;
+ OAES_OPTION _options;
+
+ if( NULL == ctx )
+ return OAES_RET_ARG1;
+
+ if( NULL == c )
+ return OAES_RET_ARG2;
+
+ if( c_len % OAES_BLOCK_SIZE )
+ return OAES_RET_ARG3;
+
+ if( NULL == m_len )
+ return OAES_RET_ARG5;
+
+ _m_len_in = *m_len;
+ *m_len = c_len - 2 * OAES_BLOCK_SIZE;
+
+ if( NULL == m )
+ return OAES_RET_SUCCESS;
+
+ if( _m_len_in < *m_len )
+ return OAES_RET_BUF;
+
+ if( NULL == _ctx->key )
+ return OAES_RET_NOKEY;
+
+ // header
+ if( 0 != memcmp( c, oaes_header, 4 ) )
+ return OAES_RET_HEADER;
+
+ // header version
+ switch( c[4] )
+ {
+ case 0x01:
+ break;
+ default:
+ return OAES_RET_HEADER;
+ }
+
+ // header type
+ switch( c[5] )
+ {
+ case 0x02:
+ break;
+ default:
+ return OAES_RET_HEADER;
+ }
+
+ // options
+ memcpy(&_options, c + 6, sizeof(_options));
+ // validate that all options are valid
+ if( _options & ~(
+ OAES_OPTION_ECB
+ | OAES_OPTION_CBC
+#ifdef OAES_DEBUG
+ | OAES_OPTION_STEP_ON
+ | OAES_OPTION_STEP_OFF
+#endif // OAES_DEBUG
+ ) )
+ return OAES_RET_HEADER;
+ if( ( _options & OAES_OPTION_ECB ) &&
+ ( _options & OAES_OPTION_CBC ) )
+ return OAES_RET_HEADER;
+ if( _options == OAES_OPTION_NONE )
+ return OAES_RET_HEADER;
+
+ // flags
+ memcpy(&_flags, c + 8, sizeof(_flags));
+ // validate that all flags are valid
+ if( _flags & ~(
+ OAES_FLAG_PAD
+ ) )
+ return OAES_RET_HEADER;
+
+ // iv
+ memcpy( _iv, c + OAES_BLOCK_SIZE, OAES_BLOCK_SIZE);
+ // data + pad
+ memcpy( m, c + 2 * OAES_BLOCK_SIZE, *m_len );
+
+ for( _i = 0; _i < *m_len; _i += OAES_BLOCK_SIZE )
+ {
+ if( ( _options & OAES_OPTION_CBC ) && _i > 0 )
+ memcpy( _iv, c + OAES_BLOCK_SIZE + _i, OAES_BLOCK_SIZE );
+
+ _rc = _rc ||
+ oaes_decrypt_block( ctx, m + _i, min( *m_len - _i, OAES_BLOCK_SIZE ) );
+
+ // CBC
+ if( _options & OAES_OPTION_CBC )
+ {
+ for( _j = 0; _j < OAES_BLOCK_SIZE; _j++ )
+ m[ _i + _j ] = m[ _i + _j ] ^ _iv[_j];
+ }
+ }
+
+ // remove pad
+ if( _flags & OAES_FLAG_PAD )
+ {
+ int _is_pad = 1;
+ size_t _temp = (size_t) m[*m_len - 1];
+
+ if( _temp <= 0x00 || _temp > 0x0f )
+ return OAES_RET_HEADER;
+ for( _i = 0; _i < _temp; _i++ )
+ if( m[*m_len - 1 - _i] != _temp - _i )
+ _is_pad = 0;
+ if( _is_pad )
+ {
+ memset( m + *m_len - _temp, 0, _temp );
+ *m_len -= _temp;
+ }
+ else
+ return OAES_RET_HEADER;
+ }
+
+ return OAES_RET_SUCCESS;
+}
+
+
+OAES_API OAES_RET oaes_encryption_round( const uint8_t * key, uint8_t * c )
+{
+ size_t _i;
+
+ if( NULL == key )
+ return OAES_RET_ARG1;
+
+ if( NULL == c )
+ return OAES_RET_ARG2;
+
+ // SubBytes(state)
+ for( _i = 0; _i < OAES_BLOCK_SIZE; _i++ )
+ oaes_sub_byte( c + _i );
+
+ // ShiftRows(state)
+ oaes_shift_rows( c );
+
+ // MixColumns(state)
+ oaes_mix_cols( c );
+ oaes_mix_cols( c + 4 );
+ oaes_mix_cols( c + 8 );
+ oaes_mix_cols( c + 12 );
+
+ // AddRoundKey(State, key)
+ for( _i = 0; _i < OAES_BLOCK_SIZE; _i++ )
+ c[_i] ^= key[_i];
+
+ return OAES_RET_SUCCESS;
+}
+
+OAES_API OAES_RET oaes_pseudo_encrypt_ecb( OAES_CTX * ctx, uint8_t * c )
+{
+ size_t _i;
+ oaes_ctx * _ctx = (oaes_ctx *) ctx;
+
+ if( NULL == _ctx )
+ return OAES_RET_ARG1;
+
+ if( NULL == c )
+ return OAES_RET_ARG2;
+
+ if( NULL == _ctx->key )
+ return OAES_RET_NOKEY;
+
+ for ( _i = 0; _i < 10; ++_i )
+ {
+ oaes_encryption_round( &_ctx->key->exp_data[_i * OAES_RKEY_LEN * OAES_COL_LEN], c );
+ }
+
+ return OAES_RET_SUCCESS;
+}
diff --git a/stratum/algos/cryptonote/crypto/oaes_lib.h b/stratum/algos/cryptonote/crypto/oaes_lib.h
new file mode 100644
index 000000000..fd1942822
--- /dev/null
+++ b/stratum/algos/cryptonote/crypto/oaes_lib.h
@@ -0,0 +1,215 @@
+/*
+ * ---------------------------------------------------------------------------
+ * OpenAES License
+ * ---------------------------------------------------------------------------
+ * Copyright (c) 2012, Nabil S. Al Ramli, www.nalramli.com
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ * ---------------------------------------------------------------------------
+ */
+
+#ifndef _OAES_LIB_H
+#define _OAES_LIB_H
+
+#include
+#include
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _WIN32
+# ifdef OAES_SHARED
+# ifdef oaes_lib_EXPORTS
+# define OAES_API __declspec(dllexport)
+# else
+# define OAES_API __declspec(dllimport)
+# endif
+# else
+# define OAES_API
+# endif
+#else
+# define OAES_API
+#endif // WIN32
+
+#define OAES_VERSION "0.8.1"
+#define OAES_BLOCK_SIZE 16
+
+typedef void OAES_CTX;
+
+typedef enum
+{
+ OAES_RET_FIRST = 0,
+ OAES_RET_SUCCESS = 0,
+ OAES_RET_UNKNOWN,
+ OAES_RET_ARG1,
+ OAES_RET_ARG2,
+ OAES_RET_ARG3,
+ OAES_RET_ARG4,
+ OAES_RET_ARG5,
+ OAES_RET_NOKEY,
+ OAES_RET_MEM,
+ OAES_RET_BUF,
+ OAES_RET_HEADER,
+ OAES_RET_COUNT
+} OAES_RET;
+
+/*
+ * oaes_set_option() takes one of these values for its [option] parameter
+ * some options accept either an optional or a required [value] parameter
+ */
+// no option
+#define OAES_OPTION_NONE 0
+// enable ECB mode, disable CBC mode
+#define OAES_OPTION_ECB 1
+// enable CBC mode, disable ECB mode
+// value is optional, may pass uint8_t iv[OAES_BLOCK_SIZE] to specify
+// the value of the initialization vector, iv
+#define OAES_OPTION_CBC 2
+
+#ifdef OAES_DEBUG
+typedef int ( * oaes_step_cb ) (
+ const uint8_t state[OAES_BLOCK_SIZE],
+ const char * step_name,
+ int step_count,
+ void * user_data );
+// enable state stepping mode
+// value is required, must pass oaes_step_cb to receive the state at each step
+#define OAES_OPTION_STEP_ON 4
+// disable state stepping mode
+#define OAES_OPTION_STEP_OFF 8
+#endif // OAES_DEBUG
+
+typedef uint16_t OAES_OPTION;
+
+typedef struct _oaes_key
+{
+ size_t data_len;
+ uint8_t *data;
+ size_t exp_data_len;
+ uint8_t *exp_data;
+ size_t num_keys;
+ size_t key_base;
+} oaes_key;
+
+typedef struct _oaes_ctx
+{
+#ifdef OAES_HAVE_ISAAC
+ randctx * rctx;
+#endif // OAES_HAVE_ISAAC
+
+#ifdef OAES_DEBUG
+ oaes_step_cb step_cb;
+#endif // OAES_DEBUG
+
+ oaes_key * key;
+ OAES_OPTION options;
+ uint8_t iv[OAES_BLOCK_SIZE];
+} oaes_ctx;
+/*
+ * // usage:
+ *
+ * OAES_CTX * ctx = oaes_alloc();
+ * .
+ * .
+ * .
+ * {
+ * oaes_gen_key_xxx( ctx );
+ * {
+ * oaes_key_export( ctx, _buf, &_buf_len );
+ * // or
+ * oaes_key_export_data( ctx, _buf, &_buf_len );\
+ * }
+ * }
+ * // or
+ * {
+ * oaes_key_import( ctx, _buf, _buf_len );
+ * // or
+ * oaes_key_import_data( ctx, _buf, _buf_len );
+ * }
+ * .
+ * .
+ * .
+ * oaes_encrypt( ctx, m, m_len, c, &c_len );
+ * .
+ * .
+ * .
+ * oaes_decrypt( ctx, c, c_len, m, &m_len );
+ * .
+ * .
+ * .
+ * oaes_free( &ctx );
+ */
+
+OAES_API OAES_CTX * oaes_alloc(void);
+
+OAES_API OAES_RET oaes_free( OAES_CTX ** ctx );
+
+OAES_API OAES_RET oaes_set_option( OAES_CTX * ctx,
+ OAES_OPTION option, const void * value );
+
+OAES_API OAES_RET oaes_key_gen_128( OAES_CTX * ctx );
+
+OAES_API OAES_RET oaes_key_gen_192( OAES_CTX * ctx );
+
+OAES_API OAES_RET oaes_key_gen_256( OAES_CTX * ctx );
+
+// export key with header information
+// set data == NULL to get the required data_len
+OAES_API OAES_RET oaes_key_export( OAES_CTX * ctx,
+ uint8_t * data, size_t * data_len );
+
+// directly export the data from key
+// set data == NULL to get the required data_len
+OAES_API OAES_RET oaes_key_export_data( OAES_CTX * ctx,
+ uint8_t * data, size_t * data_len );
+
+// import key with header information
+OAES_API OAES_RET oaes_key_import( OAES_CTX * ctx,
+ const uint8_t * data, size_t data_len );
+
+// directly import data into key
+OAES_API OAES_RET oaes_key_import_data( OAES_CTX * ctx,
+ const uint8_t * data, size_t data_len );
+
+// set c == NULL to get the required c_len
+OAES_API OAES_RET oaes_encrypt( OAES_CTX * ctx,
+ const uint8_t * m, size_t m_len, uint8_t * c, size_t * c_len );
+
+// set m == NULL to get the required m_len
+OAES_API OAES_RET oaes_decrypt( OAES_CTX * ctx,
+ const uint8_t * c, size_t c_len, uint8_t * m, size_t * m_len );
+
+// set buf == NULL to get the required buf_len
+OAES_API OAES_RET oaes_sprintf(
+ char * buf, size_t * buf_len, const uint8_t * data, size_t data_len );
+
+OAES_API OAES_RET oaes_encryption_round( const uint8_t * key, uint8_t * c );
+
+OAES_API OAES_RET oaes_pseudo_encrypt_ecb( OAES_CTX * ctx, uint8_t * c );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _OAES_LIB_H
diff --git a/stratum/algos/cryptonote/crypto/skein_port.h b/stratum/algos/cryptonote/crypto/skein_port.h
new file mode 100644
index 000000000..36394683e
--- /dev/null
+++ b/stratum/algos/cryptonote/crypto/skein_port.h
@@ -0,0 +1,190 @@
+#ifndef _SKEIN_PORT_H_
+#define _SKEIN_PORT_H_
+
+#include
+#include
+
+#ifndef RETURN_VALUES
+# define RETURN_VALUES
+# if defined( DLL_EXPORT )
+# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER )
+# define VOID_RETURN __declspec( dllexport ) void __stdcall
+# define INT_RETURN __declspec( dllexport ) int __stdcall
+# elif defined( __GNUC__ )
+# define VOID_RETURN __declspec( __dllexport__ ) void
+# define INT_RETURN __declspec( __dllexport__ ) int
+# else
+# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers
+# endif
+# elif defined( DLL_IMPORT )
+# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER )
+# define VOID_RETURN __declspec( dllimport ) void __stdcall
+# define INT_RETURN __declspec( dllimport ) int __stdcall
+# elif defined( __GNUC__ )
+# define VOID_RETURN __declspec( __dllimport__ ) void
+# define INT_RETURN __declspec( __dllimport__ ) int
+# else
+# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers
+# endif
+# elif defined( __WATCOMC__ )
+# define VOID_RETURN void __cdecl
+# define INT_RETURN int __cdecl
+# else
+# define VOID_RETURN void
+# define INT_RETURN int
+# endif
+#endif
+
+/* These defines are used to declare buffers in a way that allows
+ faster operations on longer variables to be used. In all these
+ defines 'size' must be a power of 2 and >= 8
+
+ dec_unit_type(size,x) declares a variable 'x' of length
+ 'size' bits
+
+ dec_bufr_type(size,bsize,x) declares a buffer 'x' of length 'bsize'
+ bytes defined as an array of variables
+ each of 'size' bits (bsize must be a
+ multiple of size / 8)
+
+ ptr_cast(x,size) casts a pointer to a pointer to a
+ varaiable of length 'size' bits
+*/
+
+#define ui_type(size) uint##size##_t
+#define dec_unit_type(size,x) typedef ui_type(size) x
+#define dec_bufr_type(size,bsize,x) typedef ui_type(size) x[bsize / (size >> 3)]
+#define ptr_cast(x,size) ((ui_type(size)*)(x))
+
+typedef unsigned int uint_t; /* native unsigned integer */
+typedef uint8_t u08b_t; /* 8-bit unsigned integer */
+typedef uint64_t u64b_t; /* 64-bit unsigned integer */
+
+#ifndef RotL_64
+#define RotL_64(x,N) (((x) << (N)) | ((x) >> (64-(N))))
+#endif
+
+/*
+ * Skein is "natively" little-endian (unlike SHA-xxx), for optimal
+ * performance on x86 CPUs. The Skein code requires the following
+ * definitions for dealing with endianness:
+ *
+ * SKEIN_NEED_SWAP: 0 for little-endian, 1 for big-endian
+ * Skein_Put64_LSB_First
+ * Skein_Get64_LSB_First
+ * Skein_Swap64
+ *
+ * If SKEIN_NEED_SWAP is defined at compile time, it is used here
+ * along with the portable versions of Put64/Get64/Swap64, which
+ * are slow in general.
+ *
+ * Otherwise, an "auto-detect" of endianness is attempted below.
+ * If the default handling doesn't work well, the user may insert
+ * platform-specific code instead (e.g., for big-endian CPUs).
+ *
+ */
+#ifndef SKEIN_NEED_SWAP /* compile-time "override" for endianness? */
+
+
+#include "int-util.h"
+
+#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
+#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#endif
+
+#if BYTE_ORDER == BIG_ENDIAN
+# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
+#endif
+
+/* special handler for IA64, which may be either endianness (?) */
+/* here we assume little-endian, but this may need to be changed */
+#if defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
+# define PLATFORM_MUST_ALIGN (1)
+#ifndef PLATFORM_BYTE_ORDER
+# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#endif
+#endif
+
+#ifndef PLATFORM_MUST_ALIGN
+# define PLATFORM_MUST_ALIGN (0)
+#endif
+
+
+#if PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN
+ /* here for big-endian CPUs */
+#define SKEIN_NEED_SWAP (1)
+#elif PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
+ /* here for x86 and x86-64 CPUs (and other detected little-endian CPUs) */
+#define SKEIN_NEED_SWAP (0)
+#if PLATFORM_MUST_ALIGN == 0 /* ok to use "fast" versions? */
+#define Skein_Put64_LSB_First(dst08,src64,bCnt) memcpy(dst08,src64,bCnt)
+#define Skein_Get64_LSB_First(dst64,src08,wCnt) memcpy(dst64,src08,8*(wCnt))
+#endif
+#else
+#error "Skein needs endianness setting!"
+#endif
+
+#endif /* ifndef SKEIN_NEED_SWAP */
+
+/*
+ ******************************************************************
+ * Provide any definitions still needed.
+ ******************************************************************
+ */
+#ifndef Skein_Swap64 /* swap for big-endian, nop for little-endian */
+#if SKEIN_NEED_SWAP
+#define Skein_Swap64(w64) \
+ ( (( ((u64b_t)(w64)) & 0xFF) << 56) | \
+ (((((u64b_t)(w64)) >> 8) & 0xFF) << 48) | \
+ (((((u64b_t)(w64)) >>16) & 0xFF) << 40) | \
+ (((((u64b_t)(w64)) >>24) & 0xFF) << 32) | \
+ (((((u64b_t)(w64)) >>32) & 0xFF) << 24) | \
+ (((((u64b_t)(w64)) >>40) & 0xFF) << 16) | \
+ (((((u64b_t)(w64)) >>48) & 0xFF) << 8) | \
+ (((((u64b_t)(w64)) >>56) & 0xFF) ) )
+#else
+#define Skein_Swap64(w64) (w64)
+#endif
+#endif /* ifndef Skein_Swap64 */
+
+
+#ifndef Skein_Put64_LSB_First
+void Skein_Put64_LSB_First(u08b_t *dst,const u64b_t *src,size_t bCnt)
+#ifdef SKEIN_PORT_CODE /* instantiate the function code here? */
+ { /* this version is fully portable (big-endian or little-endian), but slow */
+ size_t n;
+
+ for (n=0;n>3] >> (8*(n&7)));
+ }
+#else
+ ; /* output only the function prototype */
+#endif
+#endif /* ifndef Skein_Put64_LSB_First */
+
+
+#ifndef Skein_Get64_LSB_First
+void Skein_Get64_LSB_First(u64b_t *dst,const u08b_t *src,size_t wCnt)
+#ifdef SKEIN_PORT_CODE /* instantiate the function code here? */
+ { /* this version is fully portable (big-endian or little-endian), but slow */
+ size_t n;
+
+ for (n=0;n<8*wCnt;n+=8)
+ dst[n/8] = (((u64b_t) src[n ]) ) +
+ (((u64b_t) src[n+1]) << 8) +
+ (((u64b_t) src[n+2]) << 16) +
+ (((u64b_t) src[n+3]) << 24) +
+ (((u64b_t) src[n+4]) << 32) +
+ (((u64b_t) src[n+5]) << 40) +
+ (((u64b_t) src[n+6]) << 48) +
+ (((u64b_t) src[n+7]) << 56) ;
+ }
+#else
+ ; /* output only the function prototype */
+#endif
+#endif /* ifndef Skein_Get64_LSB_First */
+
+#endif /* ifndef _SKEIN_PORT_H_ */
diff --git a/stratum/algos/cryptonote/crypto/variant2_int_sqrt.h b/stratum/algos/cryptonote/crypto/variant2_int_sqrt.h
new file mode 100644
index 000000000..ba5b431ae
--- /dev/null
+++ b/stratum/algos/cryptonote/crypto/variant2_int_sqrt.h
@@ -0,0 +1,168 @@
+#ifndef VARIANT2_INT_SQRT_H
+#define VARIANT2_INT_SQRT_H
+
+#include
+#include
+
+#define VARIANT2_INTEGER_MATH_SQRT_STEP_SSE2() \
+ do { \
+ const __m128i exp_double_bias = _mm_set_epi64x(0, 1023ULL << 52); \
+ __m128d x = _mm_castsi128_pd(_mm_add_epi64(_mm_cvtsi64_si128(sqrt_input >> 12), exp_double_bias)); \
+ x = _mm_sqrt_sd(_mm_setzero_pd(), x); \
+ sqrt_result = (uint64_t)(_mm_cvtsi128_si64(_mm_sub_epi64(_mm_castpd_si128(x), exp_double_bias))) >> 19; \
+ } while(0)
+
+#define VARIANT2_INTEGER_MATH_SQRT_STEP_FP64() \
+ do { \
+ sqrt_result = sqrt(sqrt_input + 18446744073709551616.0) * 2.0 - 8589934592.0; \
+ } while(0)
+
+//#define VARIANT2_INTEGER_MATH_SQRT_STEP_REF() \
+// sqrt_result = integer_square_root_v2(sqrt_input)
+
+// Reference implementation of the integer square root for Cryptonight variant 2
+// Computes integer part of "sqrt(2^64 + n) * 2 - 2^33"
+//
+// In other words, given 64-bit unsigned integer n:
+// 1) Write it as x = 1.NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN000... in binary (1 <= x < 2, all 64 bits of n are used)
+// 2) Calculate sqrt(x) = 1.0RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR... (1 <= sqrt(x) < sqrt(2), so it will always start with "1.0" in binary)
+// 3) Take 32 bits that come after "1.0" and return them as a 32-bit unsigned integer, discard all remaining bits
+//
+// Some sample inputs and outputs:
+//
+// Input | Output | Exact value of "sqrt(2^64 + n) * 2 - 2^33"
+// -----------------|------------|-------------------------------------------
+// 0 | 0 | 0
+// 2^32 | 0 | 0.99999999994179233909330885695244...
+// 2^32 + 1 | 1 | 1.0000000001746229827200734316305...
+// 2^50 | 262140 | 262140.00012206565608606978175873...
+// 2^55 + 20963331 | 8384515 | 8384515.9999999997673963974959744...
+// 2^55 + 20963332 | 8384516 | 8384516
+// 2^62 + 26599786 | 1013904242 | 1013904242.9999999999479374853545...
+// 2^62 + 26599787 | 1013904243 | 1013904243.0000000001561875439364...
+// 2^64 - 1 | 3558067407 | 3558067407.9041987696409179931096...
+
+// The reference implementation as it is now uses only unsigned int64 arithmetic, so it can't have undefined behavior
+// It was tested once for all edge cases and confirmed correct
+//
+// !!! Note: if you're modifying this code, uncomment the test in monero/tests/hash/main.cpp !!!
+//
+/*
+static inline uint64_t integer_square_root_v2(uint64_t n)
+{
+ uint64_t r = 1ULL << 63;
+
+ for (uint64_t bit = 1ULL << 60; bit; bit >>= 2)
+ {
+ const bool b = (n < r + bit);
+ const uint64_t n_next = n - (r + bit);
+ const uint64_t r_next = r + bit * 2;
+ n = b ? n : n_next;
+ r = b ? r : r_next;
+ r >>= 1;
+ }
+
+ return r * 2 + ((n > r) ? 1 : 0);
+}
+*/
+
+/*
+VARIANT2_INTEGER_MATH_SQRT_FIXUP checks that "r" is an integer part of "sqrt(2^64 + sqrt_input) * 2 - 2^33" and adds or subtracts 1 if needed
+It's hard to understand how it works, so here is a full calculation of formulas used in VARIANT2_INTEGER_MATH_SQRT_FIXUP
+
+The following inequalities must hold for r if it's an integer part of "sqrt(2^64 + sqrt_input) * 2 - 2^33":
+1) r <= sqrt(2^64 + sqrt_input) * 2 - 2^33
+2) r + 1 > sqrt(2^64 + sqrt_input) * 2 - 2^33
+
+We need to check them using only unsigned integer arithmetic to avoid rounding errors and undefined behavior
+
+First inequality: r <= sqrt(2^64 + sqrt_input) * 2 - 2^33
+-----------------------------------------------------------------------------------
+r <= sqrt(2^64 + sqrt_input) * 2 - 2^33
+r + 2^33 <= sqrt(2^64 + sqrt_input) * 2
+r/2 + 2^32 <= sqrt(2^64 + sqrt_input)
+(r/2 + 2^32)^2 <= 2^64 + sqrt_input
+
+Rewrite r as r = s * 2 + b (s = trunc(r/2), b is 0 or 1)
+
+((s*2+b)/2 + 2^32)^2 <= 2^64 + sqrt_input
+(s*2+b)^2/4 + 2*2^32*(s*2+b)/2 + 2^64 <= 2^64 + sqrt_input
+(s*2+b)^2/4 + 2*2^32*(s*2+b)/2 <= sqrt_input
+(s*2+b)^2/4 + 2^32*r <= sqrt_input
+(s^2*4+2*s*2*b+b^2)/4 + 2^32*r <= sqrt_input
+s^2+s*b+b^2/4 + 2^32*r <= sqrt_input
+s*(s+b) + b^2/4 + 2^32*r <= sqrt_input
+
+Let r2 = s*(s+b) + r*2^32
+r2 + b^2/4 <= sqrt_input
+
+If this inequality doesn't hold, then we must decrement r: IF "r2 + b^2/4 > sqrt_input" THEN r = r - 1
+
+b can be 0 or 1
+If b is 0 then we need to compare "r2 > sqrt_input"
+If b is 1 then b^2/4 = 0.25, so we need to compare "r2 + 0.25 > sqrt_input"
+Since both r2 and sqrt_input are integers, we can safely replace it with "r2 + 1 > sqrt_input"
+-----------------------------------------------------------------------------------
+Both cases can be merged to a single expression "r2 + b > sqrt_input"
+-----------------------------------------------------------------------------------
+There will be no overflow when calculating "r2 + b", so it's safe to compare with sqrt_input:
+r2 + b = s*(s+b) + r*2^32 + b
+The largest value s, b and r can have is s = 1779033703, b = 1, r = 3558067407 when sqrt_input = 2^64 - 1
+r2 + b <= 1779033703*1779033704 + 3558067407*2^32 + 1 = 18446744068217447385 < 2^64
+
+Second inequality: r + 1 > sqrt(2^64 + sqrt_input) * 2 - 2^33
+-----------------------------------------------------------------------------------
+r + 1 > sqrt(2^64 + sqrt_input) * 2 - 2^33
+r + 1 + 2^33 > sqrt(2^64 + sqrt_input) * 2
+((r+1)/2 + 2^32)^2 > 2^64 + sqrt_input
+
+Rewrite r as r = s * 2 + b (s = trunc(r/2), b is 0 or 1)
+
+((s*2+b+1)/2 + 2^32)^2 > 2^64 + sqrt_input
+(s*2+b+1)^2/4 + 2*(s*2+b+1)/2*2^32 + 2^64 > 2^64 + sqrt_input
+(s*2+b+1)^2/4 + (s*2+b+1)*2^32 > sqrt_input
+(s*2+b+1)^2/4 + (r+1)*2^32 > sqrt_input
+(s*2+(b+1))^2/4 + r*2^32 + 2^32 > sqrt_input
+(s^2*4+2*s*2*(b+1)+(b+1)^2)/4 + r*2^32 + 2^32 > sqrt_input
+s^2+s*(b+1)+(b+1)^2/4 + r*2^32 + 2^32 > sqrt_input
+s*(s+b) + s + (b+1)^2/4 + r*2^32 + 2^32 > sqrt_input
+
+Let r2 = s*(s+b) + r*2^32
+
+r2 + s + (b+1)^2/4 + 2^32 > sqrt_input
+r2 + 2^32 + (b+1)^2/4 > sqrt_input - s
+
+If this inequality doesn't hold, then we must decrement r: IF "r2 + 2^32 + (b+1)^2/4 <= sqrt_input - s" THEN r = r - 1
+b can be 0 or 1
+If b is 0 then we need to compare "r2 + 2^32 + 1/4 <= sqrt_input - s" which is equal to "r2 + 2^32 < sqrt_input - s" because all numbers here are integers
+If b is 1 then (b+1)^2/4 = 1, so we need to compare "r2 + 2^32 + 1 <= sqrt_input - s" which is also equal to "r2 + 2^32 < sqrt_input - s"
+-----------------------------------------------------------------------------------
+Both cases can be merged to a single expression "r2 + 2^32 < sqrt_input - s"
+-----------------------------------------------------------------------------------
+There will be no overflow when calculating "r2 + 2^32":
+r2 + 2^32 = s*(s+b) + r*2^32 + 2^32 = s*(s+b) + (r+1)*2^32
+The largest value s, b and r can have is s = 1779033703, b = 1, r = 3558067407 when sqrt_input = 2^64 - 1
+r2 + b <= 1779033703*1779033704 + 3558067408*2^32 = 18446744072512414680 < 2^64
+
+There will be no integer overflow when calculating "sqrt_input - s", i.e. "sqrt_input >= s" at all times:
+s = trunc(r/2) = trunc(sqrt(2^64 + sqrt_input) - 2^32) < sqrt(2^64 + sqrt_input) - 2^32 + 1
+sqrt_input > sqrt(2^64 + sqrt_input) - 2^32 + 1
+sqrt_input + 2^32 - 1 > sqrt(2^64 + sqrt_input)
+(sqrt_input + 2^32 - 1)^2 > sqrt_input + 2^64
+sqrt_input^2 + 2*sqrt_input*(2^32 - 1) + (2^32-1)^2 > sqrt_input + 2^64
+sqrt_input^2 + sqrt_input*(2^33 - 2) + (2^32-1)^2 > sqrt_input + 2^64
+sqrt_input^2 + sqrt_input*(2^33 - 3) + (2^32-1)^2 > 2^64
+sqrt_input^2 + sqrt_input*(2^33 - 3) + 2^64-2^33+1 > 2^64
+sqrt_input^2 + sqrt_input*(2^33 - 3) - 2^33 + 1 > 0
+This inequality is true if sqrt_input > 1 and it's easy to check that s = 0 if sqrt_input is 0 or 1, so there will be no integer overflow
+*/
+
+#define VARIANT2_INTEGER_MATH_SQRT_FIXUP(r) \
+ do { \
+ const uint64_t s = r >> 1; \
+ const uint64_t b = r & 1; \
+ const uint64_t r2 = (uint64_t)(s) * (s + b) + (r << 32); \
+ r += ((r2 + b > sqrt_input) ? -1 : 0) + ((r2 + (1ULL << 32) < sqrt_input - s) ? 1 : 0); \
+ } while(0)
+
+#endif
diff --git a/stratum/algos/cryptonote/crypto/wild_keccak.cpp b/stratum/algos/cryptonote/crypto/wild_keccak.cpp
new file mode 100644
index 000000000..569b4f51b
--- /dev/null
+++ b/stratum/algos/cryptonote/crypto/wild_keccak.cpp
@@ -0,0 +1,119 @@
+// keccak.c
+// 19-Nov-11 Markku-Juhani O. Saarinen
+// A baseline Keccak (3rd round) implementation.
+
+// Memory-hard extension of keccak for PoW
+// Copyright (c) 2014 The Boolberry developers
+// Distributed under the MIT/X11 software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+
+#include "wild_keccak.h"
+namespace crypto
+{
+
+ const uint64_t keccakf_rndc[24] =
+ {
+ 0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
+ 0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
+ 0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
+ 0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
+ 0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
+ 0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
+ 0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
+ 0x8000000000008080, 0x0000000080000001, 0x8000000080008008
+ };
+
+ const int keccakf_rotc[24] =
+ {
+ 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
+ 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44
+ };
+
+ const int keccakf_piln[24] =
+ {
+ 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
+ 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1
+ };
+
+ // update the state with given number of rounds
+ void regular_f::keccakf(uint64_t st[25], int rounds)
+ {
+ int i, j, round;
+ uint64_t t, bc[5];
+
+ for (round = 0; round < rounds; round++) {
+
+ // Theta
+ for (i = 0; i < 5; i++)
+ bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15] ^ st[i + 20];
+
+ for (i = 0; i < 5; i++) {
+ t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1);
+ for (j = 0; j < 25; j += 5)
+ st[j + i] ^= t;
+ }
+
+ // Rho Pi
+ t = st[1];
+ for (i = 0; i < 24; i++) {
+ j = keccakf_piln[i];
+ bc[0] = st[j];
+ st[j] = ROTL64(t, keccakf_rotc[i]);
+ t = bc[0];
+ }
+
+ // Chi
+ for (j = 0; j < 25; j += 5) {
+ for (i = 0; i < 5; i++)
+ bc[i] = st[j + i];
+ for (i = 0; i < 5; i++)
+ st[j + i] ^= (~bc[(i + 1) % 5]) & bc[(i + 2) % 5];
+ }
+
+ // Iota
+ st[0] ^= keccakf_rndc[round];
+ }
+ }
+
+ void mul_f::keccakf(uint64_t st[25], int rounds)
+ {
+ int i, j, round;
+ uint64_t t, bc[5];
+
+ for (round = 0; round < rounds; round++) {
+
+ // Theta
+ for (i = 0; i < 5; i++)
+ {
+ bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] * st[i + 15] * st[i + 20];//surprise
+ }
+
+ for (i = 0; i < 5; i++) {
+ t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1);
+ for (j = 0; j < 25; j += 5)
+ st[j + i] ^= t;
+ }
+
+ // Rho Pi
+ t = st[1];
+ for (i = 0; i < 24; i++) {
+ j = keccakf_piln[i];
+ bc[0] = st[j];
+ st[j] = ROTL64(t, keccakf_rotc[i]);
+ t = bc[0];
+ }
+
+ // Chi
+ for (j = 0; j < 25; j += 5) {
+ for (i = 0; i < 5; i++)
+ bc[i] = st[j + i];
+ for (i = 0; i < 5; i++)
+ st[j + i] ^= (~bc[(i + 1) % 5]) & bc[(i + 2) % 5];
+ }
+
+ // Iota
+ st[0] ^= keccakf_rndc[round];
+ }
+ }
+}
\ No newline at end of file
diff --git a/stratum/algos/cryptonote/crypto/wild_keccak.h b/stratum/algos/cryptonote/crypto/wild_keccak.h
new file mode 100644
index 000000000..963da70e6
--- /dev/null
+++ b/stratum/algos/cryptonote/crypto/wild_keccak.h
@@ -0,0 +1,168 @@
+// keccak.h
+// 19-Nov-11 Markku-Juhani O. Saarinen
+
+// Copyright (c) 2014 The Boolberry developers
+// Distributed under the MIT/X11 software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+
+#pragma once
+
+#include
+#include
+#include "hash.h"
+
+#ifndef KECCAK_ROUNDS
+#define KECCAK_ROUNDS 24
+#endif
+
+#ifndef ROTL64
+#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
+#endif
+
+// compute a keccak hash (md) of given byte length from "in"
+
+#define KK_MIXIN_SIZE 24
+
+namespace crypto
+{
+ template
+ pod_operand_a xor_pod(const pod_operand_a& a, const pod_operand_b& b)
+ {
+ static_assert(sizeof(pod_operand_a) == sizeof(pod_operand_b), "invalid xor_h usage: different sizes");
+ static_assert(sizeof(pod_operand_a)%8 == 0, "invalid xor_h usage: wrong size");
+
+ hash r;
+ for(size_t i = 0; i != 4; i++)
+ {
+ ((uint64_t*)&r)[i] = ((const uint64_t*)&a)[i] ^ ((const uint64_t*)&b)[i];
+ }
+ return r;
+ }
+
+#define XOR_2(A, B) crypto::xor_pod(A, B)
+#define XOR_3(A, B, C) crypto::xor_pod(A, XOR_2(B, C))
+#define XOR_4(A, B, C, D) crypto::xor_pod(A, XOR_3(B, C, D))
+#define XOR_5(A, B, C, D, E) crypto::xor_pod(A, XOR_4(B, C, D, E))
+#define XOR_8(A, B, C, D, F, G, H, I) crypto::xor_pod(XOR_4(A, B, C, D), XOR_4(F, G, H, I))
+
+
+
+
+ typedef uint64_t state_t_m[25];
+ typedef uint64_t mixin_t[KK_MIXIN_SIZE];
+
+ //with multiplication, for tests
+ template
+ int keccak_generic(const uint8_t *in, size_t inlen, uint8_t *md, size_t mdlen)
+ {
+ state_t_m st;
+ uint8_t temp[144];
+ size_t i, rsiz, rsizw;
+
+ rsiz = sizeof(state_t_m) == mdlen ? HASH_DATA_AREA : 200 - 2 * mdlen;
+ rsizw = rsiz / 8;
+
+ memset(st, 0, sizeof(st));
+
+ for ( ; inlen >= rsiz; inlen -= rsiz, in += rsiz) {
+ for (i = 0; i < rsizw; i++)
+ st[i] ^= ((uint64_t *) in)[i];
+ f_traits::keccakf(st, KECCAK_ROUNDS);
+ }
+
+
+ // last block and padding
+ memcpy(temp, in, inlen);
+ temp[inlen++] = 1;
+ memset(temp + inlen, 0, rsiz - inlen);
+ temp[rsiz - 1] |= 0x80;
+
+ for (i = 0; i < rsizw; i++)
+ st[i] ^= ((uint64_t *) temp)[i];
+
+ f_traits::keccakf(st, KECCAK_ROUNDS);
+
+ memcpy(md, st, mdlen);
+
+ return 0;
+ }
+
+ template
+ int wild_keccak(const uint8_t *in, size_t inlen, uint8_t *md, size_t mdlen, callback_t cb)
+ {
+ state_t_m st;
+ uint8_t temp[144];
+ uint64_t rsiz, rsizw;
+
+ rsiz = sizeof(state_t_m) == mdlen ? HASH_DATA_AREA : 200 - 2 * mdlen;
+ rsizw = rsiz / 8;
+ memset(&st[0], 0, 25*sizeof(st[0]));
+
+
+ for ( ; inlen >= rsiz; inlen -= rsiz, in += rsiz)
+ {
+ for (size_t i = 0; i < rsizw; i++)
+ st[i] ^= ((uint64_t *) in)[i];
+
+ for(size_t ll = 0; ll != KECCAK_ROUNDS; ll++)
+ {
+ if(ll != 0)
+ {//skip first round
+ mixin_t mix_in;
+ cb(st, mix_in);
+ for (size_t k = 0; k < KK_MIXIN_SIZE; k++)
+ st[k] ^= mix_in[k];
+ }
+ f_traits::keccakf(st, 1);
+ }
+ }
+
+ // last block and padding
+ memcpy(temp, in, inlen);
+ temp[inlen++] = 1;
+ memset(temp + inlen, 0, rsiz - inlen);
+ temp[rsiz - 1] |= 0x80;
+
+ for (size_t i = 0; i < rsizw; i++)
+ st[i] ^= ((uint64_t *) temp)[i];
+
+ for(size_t ll = 0; ll != KECCAK_ROUNDS; ll++)
+ {
+ if(ll != 0)
+ {//skip first state with
+ mixin_t mix_in;
+ cb(st, mix_in);
+ for (size_t k = 0; k < KK_MIXIN_SIZE; k++)
+ st[k] ^= mix_in[k];
+ }
+ f_traits::keccakf(st, 1);
+ }
+
+ memcpy(md, st, mdlen);
+
+ return 0;
+ }
+
+ template
+ int wild_keccak_dbl(const uint8_t *in, size_t inlen, uint8_t *md, size_t mdlen, callback_t cb)
+ {
+ //Satoshi's classic
+ wild_keccak(in, inlen, md, mdlen, cb);
+ wild_keccak(md, mdlen, md, mdlen, cb);
+ return 0;
+ }
+
+ class regular_f
+ {
+ public:
+ static void keccakf(uint64_t st[25], int rounds);
+ };
+
+ class mul_f
+ {
+ public:
+ static void keccakf(uint64_t st[25], int rounds);
+ };
+}
+
diff --git a/stratum/algos/cryptonote/cryptonight.c b/stratum/algos/cryptonote/cryptonight.c
new file mode 100644
index 000000000..a80d61de1
--- /dev/null
+++ b/stratum/algos/cryptonote/cryptonight.c
@@ -0,0 +1,300 @@
+// Copyright (c) 2012-2013 The Cryptonote developers
+// Distributed under the MIT/X11 software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+// Portions Copyright (c) 2018 The Monero developers
+// Portions Copyright (c) 2018 The TurtleCoin Developers
+
+#include
+#include
+#include "crypto/oaes_lib.h"
+#include "crypto/c_keccak.h"
+#include "crypto/c_groestl.h"
+#include "crypto/c_blake256.h"
+#include "crypto/c_jh.h"
+#include "crypto/c_skein.h"
+#include "crypto/int-util.h"
+#include "crypto/hash-ops.h"
+#include "crypto/variant2_int_sqrt.h"
+
+#if defined(_MSC_VER)
+#include
+#endif
+
+#define MEMORY 2097152 /* 2 MiB 2^21 */
+#define ITER 1048576 /* 2^20 */
+#define ITER_DIV 524288 /* 2^19 */
+#define AES_BLOCK_SIZE 16
+#define AES_KEY_SIZE 32 /*16*/
+#define INIT_SIZE_BLK 8
+#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)
+#define CN_INIT (MEMORY / INIT_SIZE_BYTE)
+#define CN_AES_INIT (MEMORY / AES_BLOCK_SIZE)
+
+#define VARIANT1_1(p) \
+ do if (variant == 1) \
+ { \
+ const uint8_t tmp = ((const uint8_t*)(p))[11]; \
+ static const uint32_t table = 0x75310; \
+ const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1; \
+ ((uint8_t*)(p))[11] = tmp ^ ((table >> index) & 0x30); \
+ } while(0)
+
+#define VARIANT1_2(p) \
+ do if (variant == 1) \
+ { \
+ ((uint64_t*)p)[1] ^= tweak1_2; \
+ } while(0)
+
+#define VARIANT1_INIT() \
+ if (variant == 1 && len < 43) \
+ { \
+ fprintf(stderr, "Cryptonight variant 1 needs at least 43 bytes of data"); \
+ _exit(1); \
+ } \
+ const uint64_t tweak1_2 = (variant == 1) ? *(const uint64_t*)(((const uint8_t*)input)+35) ^ ctx->state.hs.w[24] : 0
+
+#define U64(p) ((uint64_t*)(p))
+
+#define VARIANT2_INIT(b, state) \
+ uint64_t division_result; \
+ uint64_t sqrt_result; \
+ do if (variant >= 2) \
+ { \
+ U64(b)[2] = state.hs.w[8] ^ state.hs.w[10]; \
+ U64(b)[3] = state.hs.w[9] ^ state.hs.w[11]; \
+ division_result = state.hs.w[12]; \
+ sqrt_result = state.hs.w[13]; \
+ } while (0)
+
+#define VARIANT2_SHUFFLE_ADD(base_ptr, offset, a, b) \
+ do if (variant >= 2) \
+ { \
+ uint64_t* chunk1 = U64((base_ptr) + ((offset) ^ 0x10)); \
+ uint64_t* chunk2 = U64((base_ptr) + ((offset) ^ 0x20)); \
+ uint64_t* chunk3 = U64((base_ptr) + ((offset) ^ 0x30)); \
+ \
+ const uint64_t chunk1_old[2] = { chunk1[0], chunk1[1] }; \
+ \
+ chunk1[0] = chunk3[0] + U64(b + 16)[0]; \
+ chunk1[1] = chunk3[1] + U64(b + 16)[1]; \
+ \
+ chunk3[0] = chunk2[0] + U64(a)[0]; \
+ chunk3[1] = chunk2[1] + U64(a)[1]; \
+ \
+ chunk2[0] = chunk1_old[0] + U64(b)[0]; \
+ chunk2[1] = chunk1_old[1] + U64(b)[1]; \
+ } while (0)
+
+#define VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr) \
+ ((uint64_t*)(b))[0] ^= division_result ^ (sqrt_result << 32); \
+ { \
+ const uint64_t dividend = ((uint64_t*)(ptr))[1]; \
+ const uint32_t divisor = (((uint32_t*)(ptr))[0] + (uint32_t)(sqrt_result << 1)) | 0x80000001UL; \
+ division_result = ((uint32_t)(dividend / divisor)) + \
+ (((uint64_t)(dividend % divisor)) << 32); \
+ } \
+ const uint64_t sqrt_input = ((uint64_t*)(ptr))[0] + division_result
+
+#define VARIANT2_INTEGER_MATH(b, ptr) \
+ do if (variant >= 2) \
+ { \
+ VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr); \
+ VARIANT2_INTEGER_MATH_SQRT_STEP_FP64(); \
+ VARIANT2_INTEGER_MATH_SQRT_FIXUP(sqrt_result); \
+ } while (0)
+
+#define VARIANT2_2() \
+ do if (variant >= 2) { \
+ ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x10)))[0] ^= hi; \
+ ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x10)))[1] ^= lo; \
+ hi ^= ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x20)))[0]; \
+ lo ^= ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x20)))[1]; \
+ } while (0)
+
+#pragma pack(push, 1)
+union cn_slow_hash_state {
+ union hash_state hs;
+ struct {
+ uint8_t k[64];
+ uint8_t init[INIT_SIZE_BYTE];
+ };
+};
+#pragma pack(pop)
+
+static void do_blake_hash(const void* input, size_t len, char* output) {
+ blake256_hash((uint8_t*)output, input, len);
+}
+
+void do_groestl_hash(const void* input, size_t len, char* output) {
+ groestl(input, len * 8, (uint8_t*)output);
+}
+
+static void do_jh_hash(const void* input, size_t len, char* output) {
+ int r = jh_hash(HASH_SIZE * 8, input, 8 * len, (uint8_t*)output);
+ assert(SUCCESS == r);
+}
+
+static void do_skein_hash(const void* input, size_t len, char* output) {
+ int r = c_skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output);
+ assert(SKEIN_SUCCESS == r);
+}
+
+static void (* const extra_hashes[4])(const void *, size_t, char *) = {
+ do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash
+};
+
+extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
+extern int aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *expandedKey);
+
+static inline size_t e2i(const uint8_t* a) {
+ return (*((uint64_t*) a) / AES_BLOCK_SIZE) & (CN_AES_INIT - 1);
+}
+
+static void mul(const uint8_t* a, const uint8_t* b, uint8_t* res) {
+ ((uint64_t*) res)[1] = mul128(((uint64_t*) a)[0], ((uint64_t*) b)[0], (uint64_t*) res);
+}
+
+static void sum_half_blocks(uint8_t* a, const uint8_t* b) {
+ uint64_t a0, a1, b0, b1;
+
+ a0 = SWAP64LE(((uint64_t*) a)[0]);
+ a1 = SWAP64LE(((uint64_t*) a)[1]);
+ b0 = SWAP64LE(((uint64_t*) b)[0]);
+ b1 = SWAP64LE(((uint64_t*) b)[1]);
+ a0 += b0;
+ a1 += b1;
+ ((uint64_t*) a)[0] = SWAP64LE(a0);
+ ((uint64_t*) a)[1] = SWAP64LE(a1);
+}
+
+static inline void copy_block(uint8_t* dst, const uint8_t* src) {
+ ((uint64_t*) dst)[0] = ((uint64_t*) src)[0];
+ ((uint64_t*) dst)[1] = ((uint64_t*) src)[1];
+}
+
+static void swap_blocks(uint8_t* a, uint8_t* b) {
+ size_t i;
+ uint8_t t;
+ for (i = 0; i < AES_BLOCK_SIZE; i++) {
+ t = a[i];
+ a[i] = b[i];
+ b[i] = t;
+ }
+}
+
+static inline void xor_blocks(uint8_t* a, const uint8_t* b) {
+ ((uint64_t*) a)[0] ^= ((uint64_t*) b)[0];
+ ((uint64_t*) a)[1] ^= ((uint64_t*) b)[1];
+}
+
+static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
+ ((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0];
+ ((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1];
+}
+
+struct cryptonight_ctx {
+ uint8_t long_state[MEMORY];
+ union cn_slow_hash_state state;
+ uint8_t text[INIT_SIZE_BYTE];
+ uint8_t a[AES_BLOCK_SIZE];
+ uint8_t b[AES_BLOCK_SIZE * 2];
+ uint8_t c[AES_BLOCK_SIZE];
+ uint8_t aes_key[AES_KEY_SIZE];
+ oaes_ctx* aes_ctx;
+};
+
+void cryptonight_hash(const char* input, char* output, uint32_t len, int variant) {
+#if defined(_MSC_VER)
+ struct cryptonight_ctx *ctx = _malloca(sizeof(struct cryptonight_ctx));
+#else
+ struct cryptonight_ctx *ctx = alloca(sizeof(struct cryptonight_ctx));
+#endif
+ hash_process(&ctx->state.hs, (const uint8_t*) input, len);
+ memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
+ memcpy(ctx->aes_key, ctx->state.hs.b, AES_KEY_SIZE);
+ ctx->aes_ctx = (oaes_ctx*) oaes_alloc();
+ size_t i, j;
+
+ VARIANT1_INIT();
+ VARIANT2_INIT(ctx->b, ctx->state);
+
+ oaes_key_import_data(ctx->aes_ctx, ctx->aes_key, AES_KEY_SIZE);
+ for (i = 0; i < CN_INIT; i++) {
+ for (j = 0; j < INIT_SIZE_BLK; j++) {
+ aesb_pseudo_round(&ctx->text[AES_BLOCK_SIZE * j],
+ &ctx->text[AES_BLOCK_SIZE * j],
+ ctx->aes_ctx->key->exp_data);
+ }
+ memcpy(&ctx->long_state[i * INIT_SIZE_BYTE], ctx->text, INIT_SIZE_BYTE);
+ }
+
+ for (i = 0; i < 16; i++) {
+ ctx->a[i] = ctx->state.k[i] ^ ctx->state.k[32 + i];
+ ctx->b[i] = ctx->state.k[16 + i] ^ ctx->state.k[48 + i];
+ }
+
+ for (i = 0; i < ITER_DIV; i++) {
+ /* Dependency chain: address -> read value ------+
+ * written value <-+ hard function (AES or MUL) <+
+ * next address <-+
+ */
+ /* Iteration 1 */
+ j = e2i(ctx->a);
+ aesb_single_round(&ctx->long_state[j * AES_BLOCK_SIZE], ctx->c, ctx->a);
+ VARIANT2_SHUFFLE_ADD(ctx->long_state, j * AES_BLOCK_SIZE, ctx->a, ctx->b);
+ xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j * AES_BLOCK_SIZE]);
+ VARIANT1_1((uint8_t*)&ctx->long_state[j * AES_BLOCK_SIZE]);
+ /* Iteration 2 */
+ j = e2i(ctx->c);
+
+ uint64_t* dst = (uint64_t*)&ctx->long_state[j * AES_BLOCK_SIZE];
+
+ uint64_t t[2];
+ t[0] = dst[0];
+ t[1] = dst[1];
+
+ VARIANT2_INTEGER_MATH(t, ctx->c);
+
+ uint64_t hi;
+ uint64_t lo = mul128(((uint64_t*)ctx->c)[0], t[0], &hi);
+
+ VARIANT2_2();
+ VARIANT2_SHUFFLE_ADD(ctx->long_state, j * AES_BLOCK_SIZE, ctx->a, ctx->b);
+
+ ((uint64_t*)ctx->a)[0] += hi;
+ ((uint64_t*)ctx->a)[1] += lo;
+
+ dst[0] = ((uint64_t*)ctx->a)[0];
+ dst[1] = ((uint64_t*)ctx->a)[1];
+
+ ((uint64_t*)ctx->a)[0] ^= t[0];
+ ((uint64_t*)ctx->a)[1] ^= t[1];
+
+ VARIANT1_2((uint8_t*)&ctx->long_state[j * AES_BLOCK_SIZE]);
+ copy_block(ctx->b + AES_BLOCK_SIZE, ctx->b);
+ copy_block(ctx->b, ctx->c);
+ }
+
+ memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
+ oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE);
+ for (i = 0; i < CN_INIT; i++) {
+ for (j = 0; j < INIT_SIZE_BLK; j++) {
+ xor_blocks(&ctx->text[j * AES_BLOCK_SIZE],
+ &ctx->long_state[i * INIT_SIZE_BYTE + j * AES_BLOCK_SIZE]);
+ aesb_pseudo_round(&ctx->text[j * AES_BLOCK_SIZE],
+ &ctx->text[j * AES_BLOCK_SIZE],
+ ctx->aes_ctx->key->exp_data);
+ }
+ }
+ memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
+ hash_permutation(&ctx->state.hs);
+ /*memcpy(hash, &state, 32);*/
+ extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
+ oaes_free((OAES_CTX **) &ctx->aes_ctx);
+}
+
+void cryptonight_fast_hash(const char* input, char* output, uint32_t len) {
+ union hash_state state;
+ hash_process(&state, (const uint8_t*) input, len);
+ memcpy(output, &state, HASH_SIZE);
+}
diff --git a/stratum/algos/cryptonote/cryptonight.h b/stratum/algos/cryptonote/cryptonight.h
new file mode 100644
index 000000000..7d99ec072
--- /dev/null
+++ b/stratum/algos/cryptonote/cryptonight.h
@@ -0,0 +1,17 @@
+#ifndef CRYPTONIGHT_H
+#define CRYPTONIGHT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include
+
+void cryptonight_hash(const char* input, char* output, uint32_t len, int variant);
+void cryptonight_fast_hash(const char* input, char* output, uint32_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/stratum/algos/cryptonote/cryptonight_dark.c b/stratum/algos/cryptonote/cryptonight_dark.c
new file mode 100644
index 000000000..9c8af9210
--- /dev/null
+++ b/stratum/algos/cryptonote/cryptonight_dark.c
@@ -0,0 +1,300 @@
+// Copyright (c) 2012-2013 The Cryptonote developers
+// Distributed under the MIT/X11 software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+// Portions Copyright (c) 2018 The Monero developers
+// Portions Copyright (c) 2018 The TurtleCoin Developers
+
+#include
+#include
+#include "crypto/oaes_lib.h"
+#include "crypto/c_keccak.h"
+#include "crypto/c_groestl.h"
+#include "crypto/c_blake256.h"
+#include "crypto/c_jh.h"
+#include "crypto/c_skein.h"
+#include "crypto/int-util.h"
+#include "crypto/hash-ops.h"
+#include "crypto/variant2_int_sqrt.h"
+
+#if defined(_MSC_VER)
+#include
+#endif
+
+#define MEMORY 524288 /* 512KB - 2^19 */
+#define ITER 262144 /* 2^18 */
+#define ITER_DIV 131072 /* 2^17 */
+#define AES_BLOCK_SIZE 16
+#define AES_KEY_SIZE 32 /*16*/
+#define INIT_SIZE_BLK 8
+#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)
+#define CN_INIT (MEMORY / INIT_SIZE_BYTE)
+#define CN_AES_INIT (MEMORY / AES_BLOCK_SIZE)
+
+#define VARIANT1_1(p) \
+ do if (variant == 1) \
+ { \
+ const uint8_t tmp = ((const uint8_t*)(p))[11]; \
+ static const uint32_t table = 0x75310; \
+ const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1; \
+ ((uint8_t*)(p))[11] = tmp ^ ((table >> index) & 0x30); \
+ } while(0)
+
+#define VARIANT1_2(p) \
+ do if (variant == 1) \
+ { \
+ ((uint64_t*)p)[1] ^= tweak1_2; \
+ } while(0)
+
+#define VARIANT1_INIT() \
+ if (variant == 1 && len < 43) \
+ { \
+ fprintf(stderr, "Cryptonight variant 1 needs at least 43 bytes of data"); \
+ _exit(1); \
+ } \
+ const uint64_t tweak1_2 = (variant == 1) ? *(const uint64_t*)(((const uint8_t*)input)+35) ^ ctx->state.hs.w[24] : 0
+
+#define U64(p) ((uint64_t*)(p))
+
+#define VARIANT2_INIT(b, state) \
+ uint64_t division_result; \
+ uint64_t sqrt_result; \
+ do if (variant >= 2) \
+ { \
+ U64(b)[2] = state.hs.w[8] ^ state.hs.w[10]; \
+ U64(b)[3] = state.hs.w[9] ^ state.hs.w[11]; \
+ division_result = state.hs.w[12]; \
+ sqrt_result = state.hs.w[13]; \
+ } while (0)
+
+#define VARIANT2_SHUFFLE_ADD(base_ptr, offset, a, b) \
+ do if (variant >= 2) \
+ { \
+ uint64_t* chunk1 = U64((base_ptr) + ((offset) ^ 0x10)); \
+ uint64_t* chunk2 = U64((base_ptr) + ((offset) ^ 0x20)); \
+ uint64_t* chunk3 = U64((base_ptr) + ((offset) ^ 0x30)); \
+ \
+ const uint64_t chunk1_old[2] = { chunk1[0], chunk1[1] }; \
+ \
+ chunk1[0] = chunk3[0] + U64(b + 16)[0]; \
+ chunk1[1] = chunk3[1] + U64(b + 16)[1]; \
+ \
+ chunk3[0] = chunk2[0] + U64(a)[0]; \
+ chunk3[1] = chunk2[1] + U64(a)[1]; \
+ \
+ chunk2[0] = chunk1_old[0] + U64(b)[0]; \
+ chunk2[1] = chunk1_old[1] + U64(b)[1]; \
+ } while (0)
+
+#define VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr) \
+ ((uint64_t*)(b))[0] ^= division_result ^ (sqrt_result << 32); \
+ { \
+ const uint64_t dividend = ((uint64_t*)(ptr))[1]; \
+ const uint32_t divisor = (((uint32_t*)(ptr))[0] + (uint32_t)(sqrt_result << 1)) | 0x80000001UL; \
+ division_result = ((uint32_t)(dividend / divisor)) + \
+ (((uint64_t)(dividend % divisor)) << 32); \
+ } \
+ const uint64_t sqrt_input = ((uint64_t*)(ptr))[0] + division_result
+
+#define VARIANT2_INTEGER_MATH(b, ptr) \
+ do if (variant >= 2) \
+ { \
+ VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr); \
+ VARIANT2_INTEGER_MATH_SQRT_STEP_FP64(); \
+ VARIANT2_INTEGER_MATH_SQRT_FIXUP(sqrt_result); \
+ } while (0)
+
+#define VARIANT2_2() \
+ do if (variant >= 2) { \
+ ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x10)))[0] ^= hi; \
+ ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x10)))[1] ^= lo; \
+ hi ^= ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x20)))[0]; \
+ lo ^= ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x20)))[1]; \
+ } while (0)
+
+#pragma pack(push, 1)
+union cn_slow_hash_state {
+ union hash_state hs;
+ struct {
+ uint8_t k[64];
+ uint8_t init[INIT_SIZE_BYTE];
+ };
+};
+#pragma pack(pop)
+
+static void do_dark_blake_hash(const void* input, size_t len, char* output) {
+ blake256_hash((uint8_t*)output, input, len);
+}
+
+void do_dark_groestl_hash(const void* input, size_t len, char* output) {
+ groestl(input, len * 8, (uint8_t*)output);
+}
+
+static void do_dark_jh_hash(const void* input, size_t len, char* output) {
+ int r = jh_hash(HASH_SIZE * 8, input, 8 * len, (uint8_t*)output);
+ assert(SUCCESS == r);
+}
+
+static void do_dark_skein_hash(const void* input, size_t len, char* output) {
+ int r = c_skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output);
+ assert(SKEIN_SUCCESS == r);
+}
+
+static void (* const extra_hashes[4])(const void *, size_t, char *) = {
+ do_dark_blake_hash, do_dark_groestl_hash, do_dark_jh_hash, do_dark_skein_hash
+};
+
+extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
+extern int aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *expandedKey);
+
+static inline size_t e2i(const uint8_t* a) {
+ return (*((uint64_t*) a) / AES_BLOCK_SIZE) & (CN_AES_INIT - 1);
+}
+
+static void mul(const uint8_t* a, const uint8_t* b, uint8_t* res) {
+ ((uint64_t*) res)[1] = mul128(((uint64_t*) a)[0], ((uint64_t*) b)[0], (uint64_t*) res);
+}
+
+static void sum_half_blocks(uint8_t* a, const uint8_t* b) {
+ uint64_t a0, a1, b0, b1;
+
+ a0 = SWAP64LE(((uint64_t*) a)[0]);
+ a1 = SWAP64LE(((uint64_t*) a)[1]);
+ b0 = SWAP64LE(((uint64_t*) b)[0]);
+ b1 = SWAP64LE(((uint64_t*) b)[1]);
+ a0 += b0;
+ a1 += b1;
+ ((uint64_t*) a)[0] = SWAP64LE(a0);
+ ((uint64_t*) a)[1] = SWAP64LE(a1);
+}
+
+static inline void copy_block(uint8_t* dst, const uint8_t* src) {
+ ((uint64_t*) dst)[0] = ((uint64_t*) src)[0];
+ ((uint64_t*) dst)[1] = ((uint64_t*) src)[1];
+}
+
+static void swap_blocks(uint8_t* a, uint8_t* b) {
+ size_t i;
+ uint8_t t;
+ for (i = 0; i < AES_BLOCK_SIZE; i++) {
+ t = a[i];
+ a[i] = b[i];
+ b[i] = t;
+ }
+}
+
+static inline void xor_blocks(uint8_t* a, const uint8_t* b) {
+ ((uint64_t*) a)[0] ^= ((uint64_t*) b)[0];
+ ((uint64_t*) a)[1] ^= ((uint64_t*) b)[1];
+}
+
+static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
+ ((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0];
+ ((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1];
+}
+
+struct cryptonightdark_ctx {
+ uint8_t long_state[MEMORY];
+ union cn_slow_hash_state state;
+ uint8_t text[INIT_SIZE_BYTE];
+ uint8_t a[AES_BLOCK_SIZE];
+ uint8_t b[AES_BLOCK_SIZE * 2];
+ uint8_t c[AES_BLOCK_SIZE];
+ uint8_t aes_key[AES_KEY_SIZE];
+ oaes_ctx* aes_ctx;
+};
+
+void cryptonightdark_hash(const char* input, char* output, uint32_t len, int variant) {
+#if defined(_MSC_VER)
+ struct cryptonightdark_ctx *ctx = _malloca(sizeof(struct cryptonightdark_ctx));
+#else
+ struct cryptonightdark_ctx *ctx = alloca(sizeof(struct cryptonightdark_ctx));
+#endif
+ hash_process(&ctx->state.hs, (const uint8_t*) input, len);
+ memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
+ memcpy(ctx->aes_key, ctx->state.hs.b, AES_KEY_SIZE);
+ ctx->aes_ctx = (oaes_ctx*) oaes_alloc();
+ size_t i, j;
+
+ VARIANT1_INIT();
+ VARIANT2_INIT(ctx->b, ctx->state);
+
+ oaes_key_import_data(ctx->aes_ctx, ctx->aes_key, AES_KEY_SIZE);
+ for (i = 0; i < CN_INIT; i++) {
+ for (j = 0; j < INIT_SIZE_BLK; j++) {
+ aesb_pseudo_round(&ctx->text[AES_BLOCK_SIZE * j],
+ &ctx->text[AES_BLOCK_SIZE * j],
+ ctx->aes_ctx->key->exp_data);
+ }
+ memcpy(&ctx->long_state[i * INIT_SIZE_BYTE], ctx->text, INIT_SIZE_BYTE);
+ }
+
+ for (i = 0; i < 16; i++) {
+ ctx->a[i] = ctx->state.k[i] ^ ctx->state.k[32 + i];
+ ctx->b[i] = ctx->state.k[16 + i] ^ ctx->state.k[48 + i];
+ }
+
+ for (i = 0; i < ITER_DIV; i++) {
+ /* Dependency chain: address -> read value ------+
+ * written value <-+ hard function (AES or MUL) <+
+ * next address <-+
+ */
+ /* Iteration 1 */
+ j = e2i(ctx->a);
+ aesb_single_round(&ctx->long_state[j * AES_BLOCK_SIZE], ctx->c, ctx->a);
+ VARIANT2_SHUFFLE_ADD(ctx->long_state, j * AES_BLOCK_SIZE, ctx->a, ctx->b);
+ xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j * AES_BLOCK_SIZE]);
+ VARIANT1_1((uint8_t*)&ctx->long_state[j * AES_BLOCK_SIZE]);
+ /* Iteration 2 */
+ j = e2i(ctx->c);
+
+ uint64_t* dst = (uint64_t*)&ctx->long_state[j * AES_BLOCK_SIZE];
+
+ uint64_t t[2];
+ t[0] = dst[0];
+ t[1] = dst[1];
+
+ VARIANT2_INTEGER_MATH(t, ctx->c);
+
+ uint64_t hi;
+ uint64_t lo = mul128(((uint64_t*)ctx->c)[0], t[0], &hi);
+
+ VARIANT2_2();
+ VARIANT2_SHUFFLE_ADD(ctx->long_state, j * AES_BLOCK_SIZE, ctx->a, ctx->b);
+
+ ((uint64_t*)ctx->a)[0] += hi;
+ ((uint64_t*)ctx->a)[1] += lo;
+
+ dst[0] = ((uint64_t*)ctx->a)[0];
+ dst[1] = ((uint64_t*)ctx->a)[1];
+
+ ((uint64_t*)ctx->a)[0] ^= t[0];
+ ((uint64_t*)ctx->a)[1] ^= t[1];
+
+ VARIANT1_2((uint8_t*)&ctx->long_state[j * AES_BLOCK_SIZE]);
+ copy_block(ctx->b + AES_BLOCK_SIZE, ctx->b);
+ copy_block(ctx->b, ctx->c);
+ }
+
+ memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
+ oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE);
+ for (i = 0; i < CN_INIT; i++) {
+ for (j = 0; j < INIT_SIZE_BLK; j++) {
+ xor_blocks(&ctx->text[j * AES_BLOCK_SIZE],
+ &ctx->long_state[i * INIT_SIZE_BYTE + j * AES_BLOCK_SIZE]);
+ aesb_pseudo_round(&ctx->text[j * AES_BLOCK_SIZE],
+ &ctx->text[j * AES_BLOCK_SIZE],
+ ctx->aes_ctx->key->exp_data);
+ }
+ }
+ memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
+ hash_permutation(&ctx->state.hs);
+ /*memcpy(hash, &state, 32);*/
+ extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
+ oaes_free((OAES_CTX **) &ctx->aes_ctx);
+}
+
+void cryptonightdark_fast_hash(const char* input, char* output, uint32_t len) {
+ union hash_state state;
+ hash_process(&state, (const uint8_t*) input, len);
+ memcpy(output, &state, HASH_SIZE);
+}
diff --git a/stratum/algos/cryptonote/cryptonight_dark.h b/stratum/algos/cryptonote/cryptonight_dark.h
new file mode 100644
index 000000000..c636e336d
--- /dev/null
+++ b/stratum/algos/cryptonote/cryptonight_dark.h
@@ -0,0 +1,17 @@
+#ifndef CRYPTONIGHTDARK_H
+#define CRYPTONIGHTDARK_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include
+
+void cryptonightdark_hash(const char* input, char* output, uint32_t len, int variant);
+void cryptonightdark_fast_hash(const char* input, char* output, uint32_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/stratum/algos/cryptonote/cryptonight_dark_lite.c b/stratum/algos/cryptonote/cryptonight_dark_lite.c
new file mode 100644
index 000000000..15b3e09ea
--- /dev/null
+++ b/stratum/algos/cryptonote/cryptonight_dark_lite.c
@@ -0,0 +1,300 @@
+// Copyright (c) 2012-2013 The Cryptonote developers
+// Distributed under the MIT/X11 software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+// Portions Copyright (c) 2018 The Monero developers
+// Portions Copyright (c) 2018 The darkCoin Developers
+
+#include
+#include
+#include "crypto/oaes_lib.h"
+#include "crypto/c_keccak.h"
+#include "crypto/c_groestl.h"
+#include "crypto/c_blake256.h"
+#include "crypto/c_jh.h"
+#include "crypto/c_skein.h"
+#include "crypto/int-util.h"
+#include "crypto/hash-ops.h"
+#include "crypto/variant2_int_sqrt.h"
+
+#if defined(_MSC_VER)
+#include
+#endif
+
+#define MEMORY 524288 /* 512KB - 2^19 */
+#define ITER 262144 /* 2^18 */
+#define ITER_DIV 131072 /* 2^17 */
+#define AES_BLOCK_SIZE 16
+#define AES_KEY_SIZE 32 /*16*/
+#define INIT_SIZE_BLK 8
+#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)
+#define CN_INIT (MEMORY / INIT_SIZE_BYTE)
+#define CN_AES_INIT (MEMORY / AES_BLOCK_SIZE) / 2
+
+#define VARIANT1_1(p) \
+ do if (variant == 1) \
+ { \
+ const uint8_t tmp = ((const uint8_t*)(p))[11]; \
+ static const uint32_t table = 0x75310; \
+ const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1; \
+ ((uint8_t*)(p))[11] = tmp ^ ((table >> index) & 0x30); \
+ } while(0)
+
+#define VARIANT1_2(p) \
+ do if (variant == 1) \
+ { \
+ ((uint64_t*)p)[1] ^= tweak1_2; \
+ } while(0)
+
+#define VARIANT1_INIT() \
+ if (variant == 1 && len < 43) \
+ { \
+ fprintf(stderr, "Cryptonight variant 1 needs at least 43 bytes of data"); \
+ _exit(1); \
+ } \
+ const uint64_t tweak1_2 = (variant == 1) ? *(const uint64_t*)(((const uint8_t*)input)+35) ^ ctx->state.hs.w[24] : 0
+
+#define U64(p) ((uint64_t*)(p))
+
+#define VARIANT2_INIT(b, state) \
+ uint64_t division_result; \
+ uint64_t sqrt_result; \
+ do if (variant >= 2) \
+ { \
+ U64(b)[2] = state.hs.w[8] ^ state.hs.w[10]; \
+ U64(b)[3] = state.hs.w[9] ^ state.hs.w[11]; \
+ division_result = state.hs.w[12]; \
+ sqrt_result = state.hs.w[13]; \
+ } while (0)
+
+#define VARIANT2_SHUFFLE_ADD(base_ptr, offset, a, b) \
+ do if (variant >= 2) \
+ { \
+ uint64_t* chunk1 = U64((base_ptr) + ((offset) ^ 0x10)); \
+ uint64_t* chunk2 = U64((base_ptr) + ((offset) ^ 0x20)); \
+ uint64_t* chunk3 = U64((base_ptr) + ((offset) ^ 0x30)); \
+ \
+ const uint64_t chunk1_old[2] = { chunk1[0], chunk1[1] }; \
+ \
+ chunk1[0] = chunk3[0] + U64(b + 16)[0]; \
+ chunk1[1] = chunk3[1] + U64(b + 16)[1]; \
+ \
+ chunk3[0] = chunk2[0] + U64(a)[0]; \
+ chunk3[1] = chunk2[1] + U64(a)[1]; \
+ \
+ chunk2[0] = chunk1_old[0] + U64(b)[0]; \
+ chunk2[1] = chunk1_old[1] + U64(b)[1]; \
+ } while (0)
+
+#define VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr) \
+ ((uint64_t*)(b))[0] ^= division_result ^ (sqrt_result << 32); \
+ { \
+ const uint64_t dividend = ((uint64_t*)(ptr))[1]; \
+ const uint32_t divisor = (((uint32_t*)(ptr))[0] + (uint32_t)(sqrt_result << 1)) | 0x80000001UL; \
+ division_result = ((uint32_t)(dividend / divisor)) + \
+ (((uint64_t)(dividend % divisor)) << 32); \
+ } \
+ const uint64_t sqrt_input = ((uint64_t*)(ptr))[0] + division_result
+
+#define VARIANT2_INTEGER_MATH(b, ptr) \
+ do if (variant >= 2) \
+ { \
+ VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr); \
+ VARIANT2_INTEGER_MATH_SQRT_STEP_FP64(); \
+ VARIANT2_INTEGER_MATH_SQRT_FIXUP(sqrt_result); \
+ } while (0)
+
+#define VARIANT2_2() \
+ do if (variant >= 2) { \
+ ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x10)))[0] ^= hi; \
+ ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x10)))[1] ^= lo; \
+ hi ^= ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x20)))[0]; \
+ lo ^= ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x20)))[1]; \
+ } while (0)
+
+#pragma pack(push, 1)
+union cn_slow_hash_state {
+ union hash_state hs;
+ struct {
+ uint8_t k[64];
+ uint8_t init[INIT_SIZE_BYTE];
+ };
+};
+#pragma pack(pop)
+
+static void do_dark_lite_blake_hash(const void* input, size_t len, char* output) {
+ blake256_hash((uint8_t*)output, input, len);
+}
+
+void do_dark_lite_groestl_hash(const void* input, size_t len, char* output) {
+ groestl(input, len * 8, (uint8_t*)output);
+}
+
+static void do_dark_lite_jh_hash(const void* input, size_t len, char* output) {
+ int r = jh_hash(HASH_SIZE * 8, input, 8 * len, (uint8_t*)output);
+ assert(SUCCESS == r);
+}
+
+static void do_dark_lite_skein_hash(const void* input, size_t len, char* output) {
+ int r = c_skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output);
+ assert(SKEIN_SUCCESS == r);
+}
+
+static void (* const extra_hashes[4])(const void *, size_t, char *) = {
+ do_dark_lite_blake_hash, do_dark_lite_groestl_hash, do_dark_lite_jh_hash, do_dark_lite_skein_hash
+};
+
+extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
+extern int aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *expandedKey);
+
+static inline size_t e2i(const uint8_t* a) {
+ return (*((uint64_t*) a) / AES_BLOCK_SIZE) & (CN_AES_INIT - 1);
+}
+
+static void mul(const uint8_t* a, const uint8_t* b, uint8_t* res) {
+ ((uint64_t*) res)[1] = mul128(((uint64_t*) a)[0], ((uint64_t*) b)[0], (uint64_t*) res);
+}
+
+static void sum_half_blocks(uint8_t* a, const uint8_t* b) {
+ uint64_t a0, a1, b0, b1;
+
+ a0 = SWAP64LE(((uint64_t*) a)[0]);
+ a1 = SWAP64LE(((uint64_t*) a)[1]);
+ b0 = SWAP64LE(((uint64_t*) b)[0]);
+ b1 = SWAP64LE(((uint64_t*) b)[1]);
+ a0 += b0;
+ a1 += b1;
+ ((uint64_t*) a)[0] = SWAP64LE(a0);
+ ((uint64_t*) a)[1] = SWAP64LE(a1);
+}
+
+static inline void copy_block(uint8_t* dst, const uint8_t* src) {
+ ((uint64_t*) dst)[0] = ((uint64_t*) src)[0];
+ ((uint64_t*) dst)[1] = ((uint64_t*) src)[1];
+}
+
+static void swap_blocks(uint8_t* a, uint8_t* b) {
+ size_t i;
+ uint8_t t;
+ for (i = 0; i < AES_BLOCK_SIZE; i++) {
+ t = a[i];
+ a[i] = b[i];
+ b[i] = t;
+ }
+}
+
+static inline void xor_blocks(uint8_t* a, const uint8_t* b) {
+ ((uint64_t*) a)[0] ^= ((uint64_t*) b)[0];
+ ((uint64_t*) a)[1] ^= ((uint64_t*) b)[1];
+}
+
+static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
+ ((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0];
+ ((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1];
+}
+
+struct cryptonightdarklite_ctx {
+ uint8_t long_state[MEMORY];
+ union cn_slow_hash_state state;
+ uint8_t text[INIT_SIZE_BYTE];
+ uint8_t a[AES_BLOCK_SIZE];
+ uint8_t b[AES_BLOCK_SIZE * 2];
+ uint8_t c[AES_BLOCK_SIZE];
+ uint8_t aes_key[AES_KEY_SIZE];
+ oaes_ctx* aes_ctx;
+};
+
+void cryptonightdarklite_hash(const char* input, char* output, uint32_t len, int variant) {
+#if defined(_MSC_VER)
+ struct cryptonightdarklite_ctx *ctx = _malloca(sizeof(struct cryptonightdarklite_ctx));
+#else
+ struct cryptonightdarklite_ctx *ctx = alloca(sizeof(struct cryptonightdarklite_ctx));
+#endif
+ hash_process(&ctx->state.hs, (const uint8_t*) input, len);
+ memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
+ memcpy(ctx->aes_key, ctx->state.hs.b, AES_KEY_SIZE);
+ ctx->aes_ctx = (oaes_ctx*) oaes_alloc();
+ size_t i, j;
+
+ VARIANT1_INIT();
+ VARIANT2_INIT(ctx->b, ctx->state);
+
+ oaes_key_import_data(ctx->aes_ctx, ctx->aes_key, AES_KEY_SIZE);
+ for (i = 0; i < CN_INIT; i++) {
+ for (j = 0; j < INIT_SIZE_BLK; j++) {
+ aesb_pseudo_round(&ctx->text[AES_BLOCK_SIZE * j],
+ &ctx->text[AES_BLOCK_SIZE * j],
+ ctx->aes_ctx->key->exp_data);
+ }
+ memcpy(&ctx->long_state[i * INIT_SIZE_BYTE], ctx->text, INIT_SIZE_BYTE);
+ }
+
+ for (i = 0; i < 16; i++) {
+ ctx->a[i] = ctx->state.k[i] ^ ctx->state.k[32 + i];
+ ctx->b[i] = ctx->state.k[16 + i] ^ ctx->state.k[48 + i];
+ }
+
+ for (i = 0; i < ITER_DIV; i++) {
+ /* Dependency chain: address -> read value ------+
+ * written value <-+ hard function (AES or MUL) <+
+ * next address <-+
+ */
+ /* Iteration 1 */
+ j = e2i(ctx->a);
+ aesb_single_round(&ctx->long_state[j * AES_BLOCK_SIZE], ctx->c, ctx->a);
+ VARIANT2_SHUFFLE_ADD(ctx->long_state, j * AES_BLOCK_SIZE, ctx->a, ctx->b);
+ xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j * AES_BLOCK_SIZE]);
+ VARIANT1_1((uint8_t*)&ctx->long_state[j * AES_BLOCK_SIZE]);
+ /* Iteration 2 */
+ j = e2i(ctx->c);
+
+ uint64_t* dst = (uint64_t*)&ctx->long_state[j * AES_BLOCK_SIZE];
+
+ uint64_t t[2];
+ t[0] = dst[0];
+ t[1] = dst[1];
+
+ VARIANT2_INTEGER_MATH(t, ctx->c);
+
+ uint64_t hi;
+ uint64_t lo = mul128(((uint64_t*)ctx->c)[0], t[0], &hi);
+
+ VARIANT2_2();
+ VARIANT2_SHUFFLE_ADD(ctx->long_state, j * AES_BLOCK_SIZE, ctx->a, ctx->b);
+
+ ((uint64_t*)ctx->a)[0] += hi;
+ ((uint64_t*)ctx->a)[1] += lo;
+
+ dst[0] = ((uint64_t*)ctx->a)[0];
+ dst[1] = ((uint64_t*)ctx->a)[1];
+
+ ((uint64_t*)ctx->a)[0] ^= t[0];
+ ((uint64_t*)ctx->a)[1] ^= t[1];
+
+ VARIANT1_2((uint8_t*)&ctx->long_state[j * AES_BLOCK_SIZE]);
+ copy_block(ctx->b + AES_BLOCK_SIZE, ctx->b);
+ copy_block(ctx->b, ctx->c);
+ }
+
+ memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
+ oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE);
+ for (i = 0; i < CN_INIT; i++) {
+ for (j = 0; j < INIT_SIZE_BLK; j++) {
+ xor_blocks(&ctx->text[j * AES_BLOCK_SIZE],
+ &ctx->long_state[i * INIT_SIZE_BYTE + j * AES_BLOCK_SIZE]);
+ aesb_pseudo_round(&ctx->text[j * AES_BLOCK_SIZE],
+ &ctx->text[j * AES_BLOCK_SIZE],
+ ctx->aes_ctx->key->exp_data);
+ }
+ }
+ memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
+ hash_permutation(&ctx->state.hs);
+ /*memcpy(hash, &state, 32);*/
+ extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
+ oaes_free((OAES_CTX **) &ctx->aes_ctx);
+}
+
+void cryptonightdarklite_fast_hash(const char* input, char* output, uint32_t len) {
+ union hash_state state;
+ hash_process(&state, (const uint8_t*) input, len);
+ memcpy(output, &state, HASH_SIZE);
+}
diff --git a/stratum/algos/cryptonote/cryptonight_dark_lite.h b/stratum/algos/cryptonote/cryptonight_dark_lite.h
new file mode 100644
index 000000000..1dd6eddf6
--- /dev/null
+++ b/stratum/algos/cryptonote/cryptonight_dark_lite.h
@@ -0,0 +1,17 @@
+#ifndef CRYPTONIGHTDARKLITE_H
+#define CRYPTONIGHTDARKLITE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include
+
+void cryptonightdarklite_hash(const char* input, char* output, uint32_t len, int variant);
+void cryptonightdarklite_fast_hash(const char* input, char* output, uint32_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/stratum/algos/cryptonote/cryptonight_fast.c b/stratum/algos/cryptonote/cryptonight_fast.c
new file mode 100644
index 000000000..8145f76b7
--- /dev/null
+++ b/stratum/algos/cryptonote/cryptonight_fast.c
@@ -0,0 +1,297 @@
+// Copyright (c) 2012-2013 The Cryptonote developers
+// Distributed under the MIT/X11 software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+// Portions Copyright (c) 2018 The Monero developers
+// Portions Copyright (c) 2018 The TurtleCoin Developers
+
+#include
+#include
+#include "crypto/oaes_lib.h"
+#include "crypto/c_keccak.h"
+#include "crypto/c_groestl.h"
+#include "crypto/c_blake256.h"
+#include "crypto/c_jh.h"
+#include "crypto/c_skein.h"
+#include "crypto/int-util.h"
+#include "crypto/hash-ops.h"
+#include "crypto/variant2_int_sqrt.h"
+
+#if defined(_MSC_VER)
+#include
+#endif
+
+#define MEMORY 2097152 /* 2 MiB 2^21 */
+#define ITER 524288 /* 2^19 */
+#define ITER_DIV 262144 /* 2^18 */
+#define AES_BLOCK_SIZE 16
+#define AES_KEY_SIZE 32 /*16*/
+#define INIT_SIZE_BLK 8
+#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)
+#define CN_INIT (MEMORY / INIT_SIZE_BYTE)
+#define CN_AES_INIT (MEMORY / AES_BLOCK_SIZE)
+
+#define VARIANT1_1(p) \
+ do if (variant == 1) \
+ { \
+ const uint8_t tmp = ((const uint8_t*)(p))[11]; \
+ static const uint32_t table = 0x75310; \
+ const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1; \
+ ((uint8_t*)(p))[11] = tmp ^ ((table >> index) & 0x30); \
+ } while(0)
+
+#define VARIANT1_2(p) \
+ do if (variant == 1) \
+ { \
+ ((uint64_t*)p)[1] ^= tweak1_2; \
+ } while(0)
+
+#define VARIANT1_INIT() \
+ if (variant == 1 && len < 43) \
+ { \
+ fprintf(stderr, "Cryptonight variant 1 needs at least 43 bytes of data"); \
+ _exit(1); \
+ } \
+ const uint64_t tweak1_2 = (variant == 1) ? *(const uint64_t*)(((const uint8_t*)input)+35) ^ ctx->state.hs.w[24] : 0
+
+#define U64(p) ((uint64_t*)(p))
+
+#define VARIANT2_INIT(b, state) \
+ uint64_t division_result; \
+ uint64_t sqrt_result; \
+ do if (variant >= 2) \
+ { \
+ U64(b)[2] = state.hs.w[8] ^ state.hs.w[10]; \
+ U64(b)[3] = state.hs.w[9] ^ state.hs.w[11]; \
+ division_result = state.hs.w[12]; \
+ sqrt_result = state.hs.w[13]; \
+ } while (0)
+
+#define VARIANT2_SHUFFLE_ADD(base_ptr, offset, a, b) \
+ do if (variant >= 2) \
+ { \
+ uint64_t* chunk1 = U64((base_ptr) + ((offset) ^ 0x10)); \
+ uint64_t* chunk2 = U64((base_ptr) + ((offset) ^ 0x20)); \
+ uint64_t* chunk3 = U64((base_ptr) + ((offset) ^ 0x30)); \
+ \
+ const uint64_t chunk1_old[2] = { chunk1[0], chunk1[1] }; \
+ \
+ chunk1[0] = chunk3[0] + U64(b + 16)[0]; \
+ chunk1[1] = chunk3[1] + U64(b + 16)[1]; \
+ \
+ chunk3[0] = chunk2[0] + U64(a)[0]; \
+ chunk3[1] = chunk2[1] + U64(a)[1]; \
+ \
+ chunk2[0] = chunk1_old[0] + U64(b)[0]; \
+ chunk2[1] = chunk1_old[1] + U64(b)[1]; \
+ } while (0)
+
+#define VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr) \
+ ((uint64_t*)(b))[0] ^= division_result ^ (sqrt_result << 32); \
+ { \
+ const uint64_t dividend = ((uint64_t*)(ptr))[1]; \
+ const uint32_t divisor = (((uint32_t*)(ptr))[0] + (uint32_t)(sqrt_result << 1)) | 0x80000001UL; \
+ division_result = ((uint32_t)(dividend / divisor)) + \
+ (((uint64_t)(dividend % divisor)) << 32); \
+ } \
+ const uint64_t sqrt_input = ((uint64_t*)(ptr))[0] + division_result
+
+#define VARIANT2_INTEGER_MATH(b, ptr) \
+ do if (variant >= 2) \
+ { \
+ VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr); \
+ VARIANT2_INTEGER_MATH_SQRT_STEP_FP64(); \
+ VARIANT2_INTEGER_MATH_SQRT_FIXUP(sqrt_result); \
+ } while (0)
+
+#define VARIANT2_2() \
+ do if (variant >= 2) { \
+ ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x10)))[0] ^= hi; \
+ ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x10)))[1] ^= lo; \
+ hi ^= ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x20)))[0]; \
+ lo ^= ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x20)))[1]; \
+ } while (0)
+
+#pragma pack(push, 1)
+union cn_slow_hash_state {
+ union hash_state hs;
+ struct {
+ uint8_t k[64];
+ uint8_t init[INIT_SIZE_BYTE];
+ };
+};
+#pragma pack(pop)
+
+static void do_fast_blake_hash(const void* input, size_t len, char* output) {
+ blake256_hash((uint8_t*)output, input, len);
+}
+
+void do_fast_groestl_hash(const void* input, size_t len, char* output) {
+ groestl(input, len * 8, (uint8_t*)output);
+}
+
+static void do_fast_jh_hash(const void* input, size_t len, char* output) {
+ int r = jh_hash(HASH_SIZE * 8, input, 8 * len, (uint8_t*)output);
+ assert(SUCCESS == r);
+}
+
+static void do_fast_skein_hash(const void* input, size_t len, char* output) {
+ int r = c_skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output);
+ assert(SKEIN_SUCCESS == r);
+}
+
+static void (* const extra_hashes[4])(const void *, size_t, char *) = {
+ do_fast_blake_hash, do_fast_groestl_hash, do_fast_jh_hash, do_fast_skein_hash
+};
+
+extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
+extern int aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *expandedKey);
+
+static inline size_t e2i(const uint8_t* a) {
+ return (*((uint64_t*) a) / AES_BLOCK_SIZE) & (CN_AES_INIT - 1);
+}
+
+static void mul(const uint8_t* a, const uint8_t* b, uint8_t* res) {
+ ((uint64_t*) res)[1] = mul128(((uint64_t*) a)[0], ((uint64_t*) b)[0], (uint64_t*) res);
+}
+
+static void sum_half_blocks(uint8_t* a, const uint8_t* b) {
+ uint64_t a0, a1, b0, b1;
+
+ a0 = SWAP64LE(((uint64_t*) a)[0]);
+ a1 = SWAP64LE(((uint64_t*) a)[1]);
+ b0 = SWAP64LE(((uint64_t*) b)[0]);
+ b1 = SWAP64LE(((uint64_t*) b)[1]);
+ a0 += b0;
+ a1 += b1;
+ ((uint64_t*) a)[0] = SWAP64LE(a0);
+ ((uint64_t*) a)[1] = SWAP64LE(a1);
+}
+
+static inline void copy_block(uint8_t* dst, const uint8_t* src) {
+ ((uint64_t*) dst)[0] = ((uint64_t*) src)[0];
+ ((uint64_t*) dst)[1] = ((uint64_t*) src)[1];
+}
+
+static void swap_blocks(uint8_t* a, uint8_t* b) {
+ size_t i;
+ uint8_t t;
+ for (i = 0; i < AES_BLOCK_SIZE; i++) {
+ t = a[i];
+ a[i] = b[i];
+ b[i] = t;
+ }
+}
+
+static inline void xor_blocks(uint8_t* a, const uint8_t* b) {
+ ((uint64_t*) a)[0] ^= ((uint64_t*) b)[0];
+ ((uint64_t*) a)[1] ^= ((uint64_t*) b)[1];
+}
+
+static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
+ ((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0];
+ ((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1];
+}
+
+struct cryptonightfast_ctx {
+ uint8_t long_state[MEMORY];
+ union cn_slow_hash_state state;
+ uint8_t text[INIT_SIZE_BYTE];
+ uint8_t a[AES_BLOCK_SIZE];
+ uint8_t b[AES_BLOCK_SIZE * 2];
+ uint8_t c[AES_BLOCK_SIZE];
+ uint8_t aes_key[AES_KEY_SIZE];
+ oaes_ctx* aes_ctx;
+};
+
+void cryptonightfast_hash(const char* input, char* output, uint32_t len, int variant) {
+ struct cryptonightfast_ctx *ctx = malloc(sizeof(struct cryptonightfast_ctx));
+ hash_process(&ctx->state.hs, (const uint8_t*) input, len);
+ memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
+ memcpy(ctx->aes_key, ctx->state.hs.b, AES_KEY_SIZE);
+ ctx->aes_ctx = (oaes_ctx*) oaes_alloc();
+ size_t i, j;
+
+ VARIANT1_INIT();
+ VARIANT2_INIT(ctx->b, ctx->state);
+
+ oaes_key_import_data(ctx->aes_ctx, ctx->aes_key, AES_KEY_SIZE);
+ for (i = 0; i < CN_INIT; i++) {
+ for (j = 0; j < INIT_SIZE_BLK; j++) {
+ aesb_pseudo_round(&ctx->text[AES_BLOCK_SIZE * j],
+ &ctx->text[AES_BLOCK_SIZE * j],
+ ctx->aes_ctx->key->exp_data);
+ }
+ memcpy(&ctx->long_state[i * INIT_SIZE_BYTE], ctx->text, INIT_SIZE_BYTE);
+ }
+
+ for (i = 0; i < 16; i++) {
+ ctx->a[i] = ctx->state.k[i] ^ ctx->state.k[32 + i];
+ ctx->b[i] = ctx->state.k[16 + i] ^ ctx->state.k[48 + i];
+ }
+
+ for (i = 0; i < ITER_DIV; i++) {
+ /* Dependency chain: address -> read value ------+
+ * written value <-+ hard function (AES or MUL) <+
+ * next address <-+
+ */
+ /* Iteration 1 */
+ j = e2i(ctx->a);
+ aesb_single_round(&ctx->long_state[j * AES_BLOCK_SIZE], ctx->c, ctx->a);
+ VARIANT2_SHUFFLE_ADD(ctx->long_state, j * AES_BLOCK_SIZE, ctx->a, ctx->b);
+ xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j * AES_BLOCK_SIZE]);
+ VARIANT1_1((uint8_t*)&ctx->long_state[j * AES_BLOCK_SIZE]);
+ /* Iteration 2 */
+ j = e2i(ctx->c);
+
+ uint64_t* dst = (uint64_t*)&ctx->long_state[j * AES_BLOCK_SIZE];
+
+ uint64_t t[2];
+ t[0] = dst[0];
+ t[1] = dst[1];
+
+ VARIANT2_INTEGER_MATH(t, ctx->c);
+
+ uint64_t hi;
+ uint64_t lo = mul128(((uint64_t*)ctx->c)[0], t[0], &hi);
+
+ VARIANT2_2();
+ VARIANT2_SHUFFLE_ADD(ctx->long_state, j * AES_BLOCK_SIZE, ctx->a, ctx->b);
+
+ ((uint64_t*)ctx->a)[0] += hi;
+ ((uint64_t*)ctx->a)[1] += lo;
+
+ dst[0] = ((uint64_t*)ctx->a)[0];
+ dst[1] = ((uint64_t*)ctx->a)[1];
+
+ ((uint64_t*)ctx->a)[0] ^= t[0];
+ ((uint64_t*)ctx->a)[1] ^= t[1];
+
+ VARIANT1_2((uint8_t*)&ctx->long_state[j * AES_BLOCK_SIZE]);
+ copy_block(ctx->b + AES_BLOCK_SIZE, ctx->b);
+ copy_block(ctx->b, ctx->c);
+ }
+
+ memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
+ oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE);
+ for (i = 0; i < CN_INIT; i++) {
+ for (j = 0; j < INIT_SIZE_BLK; j++) {
+ xor_blocks(&ctx->text[j * AES_BLOCK_SIZE],
+ &ctx->long_state[i * INIT_SIZE_BYTE + j * AES_BLOCK_SIZE]);
+ aesb_pseudo_round(&ctx->text[j * AES_BLOCK_SIZE],
+ &ctx->text[j * AES_BLOCK_SIZE],
+ ctx->aes_ctx->key->exp_data);
+ }
+ }
+ memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
+ hash_permutation(&ctx->state.hs);
+ /*memcpy(hash, &state, 32);*/
+ extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
+ oaes_free((OAES_CTX **) &ctx->aes_ctx);
+ free(ctx);
+}
+
+void cryptonightfast_fast_hash(const char* input, char* output, uint32_t len) {
+ union hash_state state;
+ hash_process(&state, (const uint8_t*) input, len);
+ memcpy(output, &state, HASH_SIZE);
+}
diff --git a/stratum/algos/cryptonote/cryptonight_fast.h b/stratum/algos/cryptonote/cryptonight_fast.h
new file mode 100644
index 000000000..897b8ad85
--- /dev/null
+++ b/stratum/algos/cryptonote/cryptonight_fast.h
@@ -0,0 +1,17 @@
+#ifndef CRYPTONIGHTFAST_H
+#define CRYPTONIGHTFAST_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include
+
+void cryptonightfast_hash(const char* input, char* output, uint32_t len, int variant);
+void cryptonightfast_fast_hash(const char* input, char* output, uint32_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/stratum/algos/cryptonote/cryptonight_lite.c b/stratum/algos/cryptonote/cryptonight_lite.c
new file mode 100644
index 000000000..d8af93f3e
--- /dev/null
+++ b/stratum/algos/cryptonote/cryptonight_lite.c
@@ -0,0 +1,300 @@
+// Copyright (c) 2012-2013 The Cryptonote developers
+// Distributed under the MIT/X11 software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+// Portions Copyright (c) 2018 The Monero developers
+// Portions Copyright (c) 2018 The TurtleCoin Developers
+
+#include
+#include
+#include "crypto/oaes_lib.h"
+#include "crypto/c_keccak.h"
+#include "crypto/c_groestl.h"
+#include "crypto/c_blake256.h"
+#include "crypto/c_jh.h"
+#include "crypto/c_skein.h"
+#include "crypto/int-util.h"
+#include "crypto/hash-ops.h"
+#include "crypto/variant2_int_sqrt.h"
+
+#if defined(_MSC_VER)
+#include
+#endif
+
+#define MEMORY 1048576 /* 1 MiB - 2^20 */
+#define ITER 524288 /* 2^19 */
+#define ITER_DIV 262144 /* 2^18 */
+#define AES_BLOCK_SIZE 16
+#define AES_KEY_SIZE 32 /*16*/
+#define INIT_SIZE_BLK 8
+#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)
+#define CN_INIT (MEMORY / INIT_SIZE_BYTE)
+#define CN_AES_INIT (MEMORY / AES_BLOCK_SIZE)
+
+#define VARIANT1_1(p) \
+ do if (variant == 1) \
+ { \
+ const uint8_t tmp = ((const uint8_t*)(p))[11]; \
+ static const uint32_t table = 0x75310; \
+ const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1; \
+ ((uint8_t*)(p))[11] = tmp ^ ((table >> index) & 0x30); \
+ } while(0)
+
+#define VARIANT1_2(p) \
+ do if (variant == 1) \
+ { \
+ ((uint64_t*)p)[1] ^= tweak1_2; \
+ } while(0)
+
+#define VARIANT1_INIT() \
+ if (variant == 1 && len < 43) \
+ { \
+ fprintf(stderr, "Cryptonight variant 1 needs at least 43 bytes of data"); \
+ _exit(1); \
+ } \
+ const uint64_t tweak1_2 = (variant == 1) ? *(const uint64_t*)(((const uint8_t*)input)+35) ^ ctx->state.hs.w[24] : 0
+
+#define U64(p) ((uint64_t*)(p))
+
+#define VARIANT2_INIT(b, state) \
+ uint64_t division_result; \
+ uint64_t sqrt_result; \
+ do if (variant >= 2) \
+ { \
+ U64(b)[2] = state.hs.w[8] ^ state.hs.w[10]; \
+ U64(b)[3] = state.hs.w[9] ^ state.hs.w[11]; \
+ division_result = state.hs.w[12]; \
+ sqrt_result = state.hs.w[13]; \
+ } while (0)
+
+#define VARIANT2_SHUFFLE_ADD(base_ptr, offset, a, b) \
+ do if (variant >= 2) \
+ { \
+ uint64_t* chunk1 = U64((base_ptr) + ((offset) ^ 0x10)); \
+ uint64_t* chunk2 = U64((base_ptr) + ((offset) ^ 0x20)); \
+ uint64_t* chunk3 = U64((base_ptr) + ((offset) ^ 0x30)); \
+ \
+ const uint64_t chunk1_old[2] = { chunk1[0], chunk1[1] }; \
+ \
+ chunk1[0] = chunk3[0] + U64(b + 16)[0]; \
+ chunk1[1] = chunk3[1] + U64(b + 16)[1]; \
+ \
+ chunk3[0] = chunk2[0] + U64(a)[0]; \
+ chunk3[1] = chunk2[1] + U64(a)[1]; \
+ \
+ chunk2[0] = chunk1_old[0] + U64(b)[0]; \
+ chunk2[1] = chunk1_old[1] + U64(b)[1]; \
+ } while (0)
+
+#define VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr) \
+ ((uint64_t*)(b))[0] ^= division_result ^ (sqrt_result << 32); \
+ { \
+ const uint64_t dividend = ((uint64_t*)(ptr))[1]; \
+ const uint32_t divisor = (((uint32_t*)(ptr))[0] + (uint32_t)(sqrt_result << 1)) | 0x80000001UL; \
+ division_result = ((uint32_t)(dividend / divisor)) + \
+ (((uint64_t)(dividend % divisor)) << 32); \
+ } \
+ const uint64_t sqrt_input = ((uint64_t*)(ptr))[0] + division_result
+
+#define VARIANT2_INTEGER_MATH(b, ptr) \
+ do if (variant >= 2) \
+ { \
+ VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr); \
+ VARIANT2_INTEGER_MATH_SQRT_STEP_FP64(); \
+ VARIANT2_INTEGER_MATH_SQRT_FIXUP(sqrt_result); \
+ } while (0)
+
+#define VARIANT2_2() \
+ do if (variant >= 2) { \
+ ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x10)))[0] ^= hi; \
+ ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x10)))[1] ^= lo; \
+ hi ^= ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x20)))[0]; \
+ lo ^= ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x20)))[1]; \
+ } while (0)
+
+#pragma pack(push, 1)
+union cn_slow_hash_state {
+ union hash_state hs;
+ struct {
+ uint8_t k[64];
+ uint8_t init[INIT_SIZE_BYTE];
+ };
+};
+#pragma pack(pop)
+
+static void do_lite_blake_hash(const void* input, size_t len, char* output) {
+ blake256_hash((uint8_t*)output, input, len);
+}
+
+void do_lite_groestl_hash(const void* input, size_t len, char* output) {
+ groestl(input, len * 8, (uint8_t*)output);
+}
+
+static void do_lite_jh_hash(const void* input, size_t len, char* output) {
+ int r = jh_hash(HASH_SIZE * 8, input, 8 * len, (uint8_t*)output);
+ assert(SUCCESS == r);
+}
+
+static void do_lite_skein_hash(const void* input, size_t len, char* output) {
+ int r = c_skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output);
+ assert(SKEIN_SUCCESS == r);
+}
+
+static void (* const extra_hashes[4])(const void *, size_t, char *) = {
+ do_lite_blake_hash, do_lite_groestl_hash, do_lite_jh_hash, do_lite_skein_hash
+};
+
+extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
+extern int aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *expandedKey);
+
+static inline size_t e2i(const uint8_t* a) {
+ return (*((uint64_t*) a) / AES_BLOCK_SIZE) & (CN_AES_INIT - 1);
+}
+
+static void mul(const uint8_t* a, const uint8_t* b, uint8_t* res) {
+ ((uint64_t*) res)[1] = mul128(((uint64_t*) a)[0], ((uint64_t*) b)[0], (uint64_t*) res);
+}
+
+static void sum_half_blocks(uint8_t* a, const uint8_t* b) {
+ uint64_t a0, a1, b0, b1;
+
+ a0 = SWAP64LE(((uint64_t*) a)[0]);
+ a1 = SWAP64LE(((uint64_t*) a)[1]);
+ b0 = SWAP64LE(((uint64_t*) b)[0]);
+ b1 = SWAP64LE(((uint64_t*) b)[1]);
+ a0 += b0;
+ a1 += b1;
+ ((uint64_t*) a)[0] = SWAP64LE(a0);
+ ((uint64_t*) a)[1] = SWAP64LE(a1);
+}
+
+static inline void copy_block(uint8_t* dst, const uint8_t* src) {
+ ((uint64_t*) dst)[0] = ((uint64_t*) src)[0];
+ ((uint64_t*) dst)[1] = ((uint64_t*) src)[1];
+}
+
+static void swap_blocks(uint8_t* a, uint8_t* b) {
+ size_t i;
+ uint8_t t;
+ for (i = 0; i < AES_BLOCK_SIZE; i++) {
+ t = a[i];
+ a[i] = b[i];
+ b[i] = t;
+ }
+}
+
+static inline void xor_blocks(uint8_t* a, const uint8_t* b) {
+ ((uint64_t*) a)[0] ^= ((uint64_t*) b)[0];
+ ((uint64_t*) a)[1] ^= ((uint64_t*) b)[1];
+}
+
+static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
+ ((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0];
+ ((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1];
+}
+
+struct cryptonightlite_ctx {
+ uint8_t long_state[MEMORY];
+ union cn_slow_hash_state state;
+ uint8_t text[INIT_SIZE_BYTE];
+ uint8_t a[AES_BLOCK_SIZE];
+ uint8_t b[AES_BLOCK_SIZE * 2];
+ uint8_t c[AES_BLOCK_SIZE];
+ uint8_t aes_key[AES_KEY_SIZE];
+ oaes_ctx* aes_ctx;
+};
+
+void cryptonightlite_hash(const char* input, char* output, uint32_t len, int variant) {
+#if defined(_MSC_VER)
+ struct cryptonightlite_ctx *ctx = _malloca(sizeof(struct cryptonightlite_ctx));
+#else
+ struct cryptonightlite_ctx *ctx = alloca(sizeof(struct cryptonightlite_ctx));
+#endif
+ hash_process(&ctx->state.hs, (const uint8_t*) input, len);
+ memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
+ memcpy(ctx->aes_key, ctx->state.hs.b, AES_KEY_SIZE);
+ ctx->aes_ctx = (oaes_ctx*) oaes_alloc();
+ size_t i, j;
+
+ VARIANT1_INIT();
+ VARIANT2_INIT(ctx->b, ctx->state);
+
+ oaes_key_import_data(ctx->aes_ctx, ctx->aes_key, AES_KEY_SIZE);
+ for (i = 0; i < CN_INIT; i++) {
+ for (j = 0; j < INIT_SIZE_BLK; j++) {
+ aesb_pseudo_round(&ctx->text[AES_BLOCK_SIZE * j],
+ &ctx->text[AES_BLOCK_SIZE * j],
+ ctx->aes_ctx->key->exp_data);
+ }
+ memcpy(&ctx->long_state[i * INIT_SIZE_BYTE], ctx->text, INIT_SIZE_BYTE);
+ }
+
+ for (i = 0; i < 16; i++) {
+ ctx->a[i] = ctx->state.k[i] ^ ctx->state.k[32 + i];
+ ctx->b[i] = ctx->state.k[16 + i] ^ ctx->state.k[48 + i];
+ }
+
+ for (i = 0; i < ITER_DIV; i++) {
+ /* Dependency chain: address -> read value ------+
+ * written value <-+ hard function (AES or MUL) <+
+ * next address <-+
+ */
+ /* Iteration 1 */
+ j = e2i(ctx->a);
+ aesb_single_round(&ctx->long_state[j * AES_BLOCK_SIZE], ctx->c, ctx->a);
+ VARIANT2_SHUFFLE_ADD(ctx->long_state, j * AES_BLOCK_SIZE, ctx->a, ctx->b);
+ xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j * AES_BLOCK_SIZE]);
+ VARIANT1_1((uint8_t*)&ctx->long_state[j * AES_BLOCK_SIZE]);
+ /* Iteration 2 */
+ j = e2i(ctx->c);
+
+ uint64_t* dst = (uint64_t*)&ctx->long_state[j * AES_BLOCK_SIZE];
+
+ uint64_t t[2];
+ t[0] = dst[0];
+ t[1] = dst[1];
+
+ VARIANT2_INTEGER_MATH(t, ctx->c);
+
+ uint64_t hi;
+ uint64_t lo = mul128(((uint64_t*)ctx->c)[0], t[0], &hi);
+
+ VARIANT2_2();
+ VARIANT2_SHUFFLE_ADD(ctx->long_state, j * AES_BLOCK_SIZE, ctx->a, ctx->b);
+
+ ((uint64_t*)ctx->a)[0] += hi;
+ ((uint64_t*)ctx->a)[1] += lo;
+
+ dst[0] = ((uint64_t*)ctx->a)[0];
+ dst[1] = ((uint64_t*)ctx->a)[1];
+
+ ((uint64_t*)ctx->a)[0] ^= t[0];
+ ((uint64_t*)ctx->a)[1] ^= t[1];
+
+ VARIANT1_2((uint8_t*)&ctx->long_state[j * AES_BLOCK_SIZE]);
+ copy_block(ctx->b + AES_BLOCK_SIZE, ctx->b);
+ copy_block(ctx->b, ctx->c);
+ }
+
+ memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
+ oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE);
+ for (i = 0; i < CN_INIT; i++) {
+ for (j = 0; j < INIT_SIZE_BLK; j++) {
+ xor_blocks(&ctx->text[j * AES_BLOCK_SIZE],
+ &ctx->long_state[i * INIT_SIZE_BYTE + j * AES_BLOCK_SIZE]);
+ aesb_pseudo_round(&ctx->text[j * AES_BLOCK_SIZE],
+ &ctx->text[j * AES_BLOCK_SIZE],
+ ctx->aes_ctx->key->exp_data);
+ }
+ }
+ memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
+ hash_permutation(&ctx->state.hs);
+ /*memcpy(hash, &state, 32);*/
+ extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
+ oaes_free((OAES_CTX **) &ctx->aes_ctx);
+}
+
+void cryptonightlite_fast_hash(const char* input, char* output, uint32_t len) {
+ union hash_state state;
+ hash_process(&state, (const uint8_t*) input, len);
+ memcpy(output, &state, HASH_SIZE);
+}
diff --git a/stratum/algos/cryptonote/cryptonight_lite.h b/stratum/algos/cryptonote/cryptonight_lite.h
new file mode 100644
index 000000000..ebe1e45fa
--- /dev/null
+++ b/stratum/algos/cryptonote/cryptonight_lite.h
@@ -0,0 +1,17 @@
+#ifndef CRYPTONIGHTLITE_H
+#define CRYPTONIGHTLITE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include
+
+void cryptonightlite_hash(const char* input, char* output, uint32_t len, int variant);
+void cryptonightlite_fast_hash(const char* input, char* output, uint32_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/stratum/algos/cryptonote/cryptonight_soft_shell.c b/stratum/algos/cryptonote/cryptonight_soft_shell.c
new file mode 100644
index 000000000..9299b4c8b
--- /dev/null
+++ b/stratum/algos/cryptonote/cryptonight_soft_shell.c
@@ -0,0 +1,298 @@
+// Copyright (c) 2012-2013 The Cryptonote developers
+// Distributed under the MIT/X11 software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+// Portions Copyright (c) 2018 The Monero developers
+// Portions Copyright (c) 2018 The TurtleCoin Developers
+
+#include
+#include
+#include "crypto/oaes_lib.h"
+#include "crypto/c_keccak.h"
+#include "crypto/c_groestl.h"
+#include "crypto/c_blake256.h"
+#include "crypto/c_jh.h"
+#include "crypto/c_skein.h"
+#include "crypto/int-util.h"
+#include "crypto/hash-ops.h"
+#include "crypto/variant2_int_sqrt.h"
+
+#if defined(_MSC_VER)
+#include
+#endif
+
+// Standard Crypto Definitions
+#define AES_BLOCK_SIZE 16
+#define AES_KEY_SIZE 32
+#define INIT_SIZE_BLK 8
+#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)
+
+#define VARIANT1_1(p) \
+ do if (variant == 1) \
+ { \
+ const uint8_t tmp = ((const uint8_t*)(p))[11]; \
+ static const uint32_t table = 0x75310; \
+ const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1; \
+ ((uint8_t*)(p))[11] = tmp ^ ((table >> index) & 0x30); \
+ } while(0)
+
+#define VARIANT1_2(p) \
+ do if (variant == 1) \
+ { \
+ ((uint64_t*)p)[1] ^= tweak1_2; \
+ } while(0)
+
+#define VARIANT1_INIT() \
+ if (variant == 1 && len < 43) \
+ { \
+ fprintf(stderr, "Cryptonight variant 1 needs at least 43 bytes of data"); \
+ _exit(1); \
+ } \
+ const uint64_t tweak1_2 = (variant == 1) ? *(const uint64_t*)(((const uint8_t*)input)+35) ^ state.hs.w[24] : 0
+
+#define U64(p) ((uint64_t*)(p))
+
+#define VARIANT2_INIT(b, state) \
+ uint64_t division_result; \
+ uint64_t sqrt_result; \
+ do if (variant >= 2) \
+ { \
+ U64(b)[2] = state.hs.w[8] ^ state.hs.w[10]; \
+ U64(b)[3] = state.hs.w[9] ^ state.hs.w[11]; \
+ division_result = state.hs.w[12]; \
+ sqrt_result = state.hs.w[13]; \
+ } while (0)
+
+#define VARIANT2_SHUFFLE_ADD(base_ptr, offset, a, b) \
+ do if (variant >= 2) \
+ { \
+ uint64_t* chunk1 = U64((base_ptr) + ((offset) ^ 0x10)); \
+ uint64_t* chunk2 = U64((base_ptr) + ((offset) ^ 0x20)); \
+ uint64_t* chunk3 = U64((base_ptr) + ((offset) ^ 0x30)); \
+ \
+ const uint64_t chunk1_old[2] = { chunk1[0], chunk1[1] }; \
+ \
+ chunk1[0] = chunk3[0] + U64(b + 16)[0]; \
+ chunk1[1] = chunk3[1] + U64(b + 16)[1]; \
+ \
+ chunk3[0] = chunk2[0] + U64(a)[0]; \
+ chunk3[1] = chunk2[1] + U64(a)[1]; \
+ \
+ chunk2[0] = chunk1_old[0] + U64(b)[0]; \
+ chunk2[1] = chunk1_old[1] + U64(b)[1]; \
+ } while (0)
+
+#define VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr) \
+ ((uint64_t*)(b))[0] ^= division_result ^ (sqrt_result << 32); \
+ { \
+ const uint64_t dividend = ((uint64_t*)(ptr))[1]; \
+ const uint32_t divisor = (((uint32_t*)(ptr))[0] + (uint32_t)(sqrt_result << 1)) | 0x80000001UL; \
+ division_result = ((uint32_t)(dividend / divisor)) + \
+ (((uint64_t)(dividend % divisor)) << 32); \
+ } \
+ const uint64_t sqrt_input = ((uint64_t*)(ptr))[0] + division_result
+
+#define VARIANT2_INTEGER_MATH(b, ptr) \
+ do if (variant >= 2) \
+ { \
+ VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr); \
+ VARIANT2_INTEGER_MATH_SQRT_STEP_FP64(); \
+ VARIANT2_INTEGER_MATH_SQRT_FIXUP(sqrt_result); \
+ } while (0)
+
+#define VARIANT2_2() \
+ do if (variant >= 2) { \
+ ((uint64_t*)(long_state + ((j * AES_BLOCK_SIZE) ^ 0x10)))[0] ^= hi; \
+ ((uint64_t*)(long_state + ((j * AES_BLOCK_SIZE) ^ 0x10)))[1] ^= lo; \
+ hi ^= ((uint64_t*)(long_state + ((j * AES_BLOCK_SIZE) ^ 0x20)))[0]; \
+ lo ^= ((uint64_t*)(long_state + ((j * AES_BLOCK_SIZE) ^ 0x20)))[1]; \
+ } while (0)
+
+#pragma pack(push, 1)
+union cn_slow_hash_state {
+ union hash_state hs;
+ struct {
+ uint8_t k[64];
+ uint8_t init[INIT_SIZE_BYTE];
+ };
+};
+#pragma pack(pop)
+
+static void do_soft_shell_blake_hash(const void* input, size_t len, char* output) {
+ blake256_hash((uint8_t*)output, input, len);
+}
+
+void do_soft_shell_groestl_hash(const void* input, size_t len, char* output) {
+ groestl(input, len * 8, (uint8_t*)output);
+}
+
+static void do_soft_shell_jh_hash(const void* input, size_t len, char* output) {
+ int r = jh_hash(HASH_SIZE * 8, input, 8 * len, (uint8_t*)output);
+ assert(SUCCESS == r);
+}
+
+static void do_soft_shell_skein_hash(const void* input, size_t len, char* output) {
+ int r = c_skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output);
+ assert(SKEIN_SUCCESS == r);
+}
+
+static void (* const extra_hashes[4])(const void *, size_t, char *) = {
+ do_soft_shell_blake_hash, do_soft_shell_groestl_hash, do_soft_shell_jh_hash, do_soft_shell_skein_hash
+};
+
+extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
+extern int aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *expandedKey);
+
+static inline size_t e2i(const uint8_t* a, size_t count) {
+ return (*((uint64_t*) a) / AES_BLOCK_SIZE) & (count - 1);
+}
+
+static void mul(const uint8_t* a, const uint8_t* b, uint8_t* res) {
+ ((uint64_t*) res)[1] = mul128(((uint64_t*) a)[0], ((uint64_t*) b)[0], (uint64_t*) res);
+}
+
+static void sum_half_blocks(uint8_t* a, const uint8_t* b) {
+ uint64_t a0, a1, b0, b1;
+
+ a0 = SWAP64LE(((uint64_t*) a)[0]);
+ a1 = SWAP64LE(((uint64_t*) a)[1]);
+ b0 = SWAP64LE(((uint64_t*) b)[0]);
+ b1 = SWAP64LE(((uint64_t*) b)[1]);
+ a0 += b0;
+ a1 += b1;
+ ((uint64_t*) a)[0] = SWAP64LE(a0);
+ ((uint64_t*) a)[1] = SWAP64LE(a1);
+}
+
+static inline void copy_block(uint8_t* dst, const uint8_t* src) {
+ ((uint64_t*) dst)[0] = ((uint64_t*) src)[0];
+ ((uint64_t*) dst)[1] = ((uint64_t*) src)[1];
+}
+
+static void swap_blocks(uint8_t* a, uint8_t* b) {
+ size_t i;
+ uint8_t t;
+ for (i = 0; i < AES_BLOCK_SIZE; i++) {
+ t = a[i];
+ a[i] = b[i];
+ b[i] = t;
+ }
+}
+
+static inline void xor_blocks(uint8_t* a, const uint8_t* b) {
+ ((uint64_t*) a)[0] ^= ((uint64_t*) b)[0];
+ ((uint64_t*) a)[1] ^= ((uint64_t*) b)[1];
+}
+
+static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
+ ((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0];
+ ((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1];
+}
+
+void cryptonight_soft_shell_hash(const char* input, char* output, uint32_t len, int variant, uint32_t scratchpad, uint32_t iterations) {
+ union cn_slow_hash_state state;
+ uint8_t text[INIT_SIZE_BYTE];
+ uint8_t a[AES_BLOCK_SIZE];
+ uint8_t b[AES_BLOCK_SIZE];
+ uint8_t c[AES_BLOCK_SIZE];
+ uint8_t aes_key[AES_KEY_SIZE];
+ oaes_ctx* aes_ctx;
+
+#if defined(_MSC_VER)
+ uint8_t *long_state = (uint8_t *)_malloca(scratchpad);
+#else
+ uint8_t *long_state = (uint8_t *)malloc(scratchpad);
+#endif
+
+ size_t CN_INIT = (scratchpad / INIT_SIZE_BYTE);
+ size_t ITER_DIV = (iterations / 2);
+ size_t CN_AES_INIT = (scratchpad / AES_BLOCK_SIZE) / 2;
+
+ hash_process(&state.hs, (const uint8_t*) input, len);
+ memcpy(text, state.init, INIT_SIZE_BYTE);
+ memcpy(aes_key, state.hs.b, AES_KEY_SIZE);
+ aes_ctx = (oaes_ctx*) oaes_alloc();
+ size_t i, j;
+
+ VARIANT1_INIT();
+ VARIANT2_INIT(b, state);
+
+ oaes_key_import_data(aes_ctx, aes_key, AES_KEY_SIZE);
+ for (i = 0; i < CN_INIT; i++) {
+ for (j = 0; j < INIT_SIZE_BLK; j++) {
+ aesb_pseudo_round(&text[AES_BLOCK_SIZE * j],
+ &text[AES_BLOCK_SIZE * j],
+ aes_ctx->key->exp_data);
+ }
+ memcpy(&long_state[i * INIT_SIZE_BYTE], text, INIT_SIZE_BYTE);
+ }
+
+ for (i = 0; i < 16; i++) {
+ a[i] = state.k[i] ^ state.k[32 + i];
+ b[i] = state.k[16 + i] ^ state.k[48 + i];
+ }
+
+ for (i = 0; i < ITER_DIV; i++) {
+ /* Dependency chain: address -> read value ------+
+ * written value <-+ hard function (AES or MUL) <+
+ * next address <-+
+ */
+ /* Iteration 1 */
+ j = e2i(a, CN_AES_INIT);
+ aesb_single_round(&long_state[j * AES_BLOCK_SIZE], c, a);
+ VARIANT2_SHUFFLE_ADD(long_state, j * AES_BLOCK_SIZE, a, b);
+ xor_blocks_dst(c, b, &long_state[j * AES_BLOCK_SIZE]);
+ VARIANT1_1((uint8_t*)&long_state[j * AES_BLOCK_SIZE]);
+ /* Iteration 2 */
+ j = e2i(c, CN_AES_INIT);
+
+ uint64_t* dst = (uint64_t*)&long_state[j * AES_BLOCK_SIZE];
+
+ uint64_t t[2];
+ t[0] = dst[0];
+ t[1] = dst[1];
+
+ VARIANT2_INTEGER_MATH(t, c);
+
+ uint64_t hi;
+ uint64_t lo = mul128(((uint64_t*)c)[0], t[0], &hi);
+
+ VARIANT2_2();
+ VARIANT2_SHUFFLE_ADD(long_state, j * AES_BLOCK_SIZE, a, b);
+
+ ((uint64_t*)a)[0] += hi;
+ ((uint64_t*)a)[1] += lo;
+
+ dst[0] = ((uint64_t*)a)[0];
+ dst[1] = ((uint64_t*)a)[1];
+
+ ((uint64_t*)a)[0] ^= t[0];
+ ((uint64_t*)a)[1] ^= t[1];
+
+ VARIANT1_2((uint8_t*)&long_state[j * AES_BLOCK_SIZE]);
+ copy_block(b + AES_BLOCK_SIZE, b);
+ copy_block(b, c);
+ }
+
+ memcpy(text, state.init, INIT_SIZE_BYTE);
+ oaes_key_import_data(aes_ctx, &state.hs.b[32], AES_KEY_SIZE);
+ for (i = 0; i < CN_INIT; i++) {
+ for (j = 0; j < INIT_SIZE_BLK; j++) {
+ xor_blocks(&text[j * AES_BLOCK_SIZE],
+ &long_state[i * INIT_SIZE_BYTE + j * AES_BLOCK_SIZE]);
+ aesb_pseudo_round(&text[j * AES_BLOCK_SIZE],
+ &text[j * AES_BLOCK_SIZE],
+ aes_ctx->key->exp_data);
+ }
+ }
+ memcpy(state.init, text, INIT_SIZE_BYTE);
+ hash_permutation(&state.hs);
+ /*memcpy(hash, &state, 32);*/
+ extra_hashes[state.hs.b[0] & 3](&state, 200, output);
+ oaes_free((OAES_CTX **) &aes_ctx);
+}
+
+void cryptonight_soft_shell_fast_hash(const char* input, char* output, uint32_t len) {
+ union hash_state state;
+ hash_process(&state, (const uint8_t*) input, len);
+ memcpy(output, &state, HASH_SIZE);
+}
diff --git a/stratum/algos/cryptonote/cryptonight_soft_shell.h b/stratum/algos/cryptonote/cryptonight_soft_shell.h
new file mode 100644
index 000000000..d32570ba5
--- /dev/null
+++ b/stratum/algos/cryptonote/cryptonight_soft_shell.h
@@ -0,0 +1,17 @@
+#ifndef CRYPTONIGHT_SOFT_SHELL_H
+#define CRYPTONIGHT_SOFT_SHELL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include
+
+void cryptonight_soft_shell_hash(const char* input, char* output, uint32_t len, int variant, uint32_t scratchpad, uint32_t iterations);
+void cryptonight_soft_shell_fast_hash(const char* input, char* output, uint32_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/stratum/algos/cryptonote/cryptonight_turtle.c b/stratum/algos/cryptonote/cryptonight_turtle.c
new file mode 100644
index 000000000..c6705e375
--- /dev/null
+++ b/stratum/algos/cryptonote/cryptonight_turtle.c
@@ -0,0 +1,300 @@
+// Copyright (c) 2012-2013 The Cryptonote developers
+// Distributed under the MIT/X11 software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+// Portions Copyright (c) 2018 The Monero developers
+// Portions Copyright (c) 2018 The TurtleCoin Developers
+
+#include
+#include
+#include "crypto/oaes_lib.h"
+#include "crypto/c_keccak.h"
+#include "crypto/c_groestl.h"
+#include "crypto/c_blake256.h"
+#include "crypto/c_jh.h"
+#include "crypto/c_skein.h"
+#include "crypto/int-util.h"
+#include "crypto/hash-ops.h"
+#include "crypto/variant2_int_sqrt.h"
+
+#if defined(_MSC_VER)
+#include
+#endif
+
+#define MEMORY 262144 /* 256KB - 2^18 */
+#define ITER 131072 /* 2^17 */
+#define ITER_DIV 65536 /* 2^16 */
+#define AES_BLOCK_SIZE 16
+#define AES_KEY_SIZE 32 /*16*/
+#define INIT_SIZE_BLK 8
+#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)
+#define CN_INIT (MEMORY / INIT_SIZE_BYTE)
+#define CN_AES_INIT (MEMORY / AES_BLOCK_SIZE)
+
+#define VARIANT1_1(p) \
+ do if (variant == 1) \
+ { \
+ const uint8_t tmp = ((const uint8_t*)(p))[11]; \
+ static const uint32_t table = 0x75310; \
+ const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1; \
+ ((uint8_t*)(p))[11] = tmp ^ ((table >> index) & 0x30); \
+ } while(0)
+
+#define VARIANT1_2(p) \
+ do if (variant == 1) \
+ { \
+ ((uint64_t*)p)[1] ^= tweak1_2; \
+ } while(0)
+
+#define VARIANT1_INIT() \
+ if (variant == 1 && len < 43) \
+ { \
+ fprintf(stderr, "Cryptonight variant 1 needs at least 43 bytes of data"); \
+ _exit(1); \
+ } \
+ const uint64_t tweak1_2 = (variant == 1) ? *(const uint64_t*)(((const uint8_t*)input)+35) ^ ctx->state.hs.w[24] : 0
+
+#define U64(p) ((uint64_t*)(p))
+
+#define VARIANT2_INIT(b, state) \
+ uint64_t division_result; \
+ uint64_t sqrt_result; \
+ do if (variant >= 2) \
+ { \
+ U64(b)[2] = state.hs.w[8] ^ state.hs.w[10]; \
+ U64(b)[3] = state.hs.w[9] ^ state.hs.w[11]; \
+ division_result = state.hs.w[12]; \
+ sqrt_result = state.hs.w[13]; \
+ } while (0)
+
+#define VARIANT2_SHUFFLE_ADD(base_ptr, offset, a, b) \
+ do if (variant >= 2) \
+ { \
+ uint64_t* chunk1 = U64((base_ptr) + ((offset) ^ 0x10)); \
+ uint64_t* chunk2 = U64((base_ptr) + ((offset) ^ 0x20)); \
+ uint64_t* chunk3 = U64((base_ptr) + ((offset) ^ 0x30)); \
+ \
+ const uint64_t chunk1_old[2] = { chunk1[0], chunk1[1] }; \
+ \
+ chunk1[0] = chunk3[0] + U64(b + 16)[0]; \
+ chunk1[1] = chunk3[1] + U64(b + 16)[1]; \
+ \
+ chunk3[0] = chunk2[0] + U64(a)[0]; \
+ chunk3[1] = chunk2[1] + U64(a)[1]; \
+ \
+ chunk2[0] = chunk1_old[0] + U64(b)[0]; \
+ chunk2[1] = chunk1_old[1] + U64(b)[1]; \
+ } while (0)
+
+#define VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr) \
+ ((uint64_t*)(b))[0] ^= division_result ^ (sqrt_result << 32); \
+ { \
+ const uint64_t dividend = ((uint64_t*)(ptr))[1]; \
+ const uint32_t divisor = (((uint32_t*)(ptr))[0] + (uint32_t)(sqrt_result << 1)) | 0x80000001UL; \
+ division_result = ((uint32_t)(dividend / divisor)) + \
+ (((uint64_t)(dividend % divisor)) << 32); \
+ } \
+ const uint64_t sqrt_input = ((uint64_t*)(ptr))[0] + division_result
+
+#define VARIANT2_INTEGER_MATH(b, ptr) \
+ do if (variant >= 2) \
+ { \
+ VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr); \
+ VARIANT2_INTEGER_MATH_SQRT_STEP_FP64(); \
+ VARIANT2_INTEGER_MATH_SQRT_FIXUP(sqrt_result); \
+ } while (0)
+
+#define VARIANT2_2() \
+ do if (variant >= 2) { \
+ ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x10)))[0] ^= hi; \
+ ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x10)))[1] ^= lo; \
+ hi ^= ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x20)))[0]; \
+ lo ^= ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x20)))[1]; \
+ } while (0)
+
+#pragma pack(push, 1)
+union cn_slow_hash_state {
+ union hash_state hs;
+ struct {
+ uint8_t k[64];
+ uint8_t init[INIT_SIZE_BYTE];
+ };
+};
+#pragma pack(pop)
+
+static void do_turtle_blake_hash(const void* input, size_t len, char* output) {
+ blake256_hash((uint8_t*)output, input, len);
+}
+
+void do_turtle_groestl_hash(const void* input, size_t len, char* output) {
+ groestl(input, len * 8, (uint8_t*)output);
+}
+
+static void do_turtle_jh_hash(const void* input, size_t len, char* output) {
+ int r = jh_hash(HASH_SIZE * 8, input, 8 * len, (uint8_t*)output);
+ assert(SUCCESS == r);
+}
+
+static void do_turtle_skein_hash(const void* input, size_t len, char* output) {
+ int r = c_skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output);
+ assert(SKEIN_SUCCESS == r);
+}
+
+static void (* const extra_hashes[4])(const void *, size_t, char *) = {
+ do_turtle_blake_hash, do_turtle_groestl_hash, do_turtle_jh_hash, do_turtle_skein_hash
+};
+
+extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
+extern int aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *expandedKey);
+
+static inline size_t e2i(const uint8_t* a) {
+ return (*((uint64_t*) a) / AES_BLOCK_SIZE) & (CN_AES_INIT - 1);
+}
+
+static void mul(const uint8_t* a, const uint8_t* b, uint8_t* res) {
+ ((uint64_t*) res)[1] = mul128(((uint64_t*) a)[0], ((uint64_t*) b)[0], (uint64_t*) res);
+}
+
+static void sum_half_blocks(uint8_t* a, const uint8_t* b) {
+ uint64_t a0, a1, b0, b1;
+
+ a0 = SWAP64LE(((uint64_t*) a)[0]);
+ a1 = SWAP64LE(((uint64_t*) a)[1]);
+ b0 = SWAP64LE(((uint64_t*) b)[0]);
+ b1 = SWAP64LE(((uint64_t*) b)[1]);
+ a0 += b0;
+ a1 += b1;
+ ((uint64_t*) a)[0] = SWAP64LE(a0);
+ ((uint64_t*) a)[1] = SWAP64LE(a1);
+}
+
+static inline void copy_block(uint8_t* dst, const uint8_t* src) {
+ ((uint64_t*) dst)[0] = ((uint64_t*) src)[0];
+ ((uint64_t*) dst)[1] = ((uint64_t*) src)[1];
+}
+
+static void swap_blocks(uint8_t* a, uint8_t* b) {
+ size_t i;
+ uint8_t t;
+ for (i = 0; i < AES_BLOCK_SIZE; i++) {
+ t = a[i];
+ a[i] = b[i];
+ b[i] = t;
+ }
+}
+
+static inline void xor_blocks(uint8_t* a, const uint8_t* b) {
+ ((uint64_t*) a)[0] ^= ((uint64_t*) b)[0];
+ ((uint64_t*) a)[1] ^= ((uint64_t*) b)[1];
+}
+
+static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
+ ((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0];
+ ((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1];
+}
+
+struct cryptonightturtle_ctx {
+ uint8_t long_state[MEMORY];
+ union cn_slow_hash_state state;
+ uint8_t text[INIT_SIZE_BYTE];
+ uint8_t a[AES_BLOCK_SIZE];
+ uint8_t b[AES_BLOCK_SIZE * 2];
+ uint8_t c[AES_BLOCK_SIZE];
+ uint8_t aes_key[AES_KEY_SIZE];
+ oaes_ctx* aes_ctx;
+};
+
+void cryptonightturtle_hash(const char* input, char* output, uint32_t len, int variant) {
+#if defined(_MSC_VER)
+ struct cryptonightturtle_ctx *ctx = _malloca(sizeof(struct cryptonightturtle_ctx));
+#else
+ struct cryptonightturtle_ctx *ctx = alloca(sizeof(struct cryptonightturtle_ctx));
+#endif
+ hash_process(&ctx->state.hs, (const uint8_t*) input, len);
+ memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
+ memcpy(ctx->aes_key, ctx->state.hs.b, AES_KEY_SIZE);
+ ctx->aes_ctx = (oaes_ctx*) oaes_alloc();
+ size_t i, j;
+
+ VARIANT1_INIT();
+ VARIANT2_INIT(ctx->b, ctx->state);
+
+ oaes_key_import_data(ctx->aes_ctx, ctx->aes_key, AES_KEY_SIZE);
+ for (i = 0; i < CN_INIT; i++) {
+ for (j = 0; j < INIT_SIZE_BLK; j++) {
+ aesb_pseudo_round(&ctx->text[AES_BLOCK_SIZE * j],
+ &ctx->text[AES_BLOCK_SIZE * j],
+ ctx->aes_ctx->key->exp_data);
+ }
+ memcpy(&ctx->long_state[i * INIT_SIZE_BYTE], ctx->text, INIT_SIZE_BYTE);
+ }
+
+ for (i = 0; i < 16; i++) {
+ ctx->a[i] = ctx->state.k[i] ^ ctx->state.k[32 + i];
+ ctx->b[i] = ctx->state.k[16 + i] ^ ctx->state.k[48 + i];
+ }
+
+ for (i = 0; i < ITER_DIV; i++) {
+ /* Dependency chain: address -> read value ------+
+ * written value <-+ hard function (AES or MUL) <+
+ * next address <-+
+ */
+ /* Iteration 1 */
+ j = e2i(ctx->a);
+ aesb_single_round(&ctx->long_state[j * AES_BLOCK_SIZE], ctx->c, ctx->a);
+ VARIANT2_SHUFFLE_ADD(ctx->long_state, j * AES_BLOCK_SIZE, ctx->a, ctx->b);
+ xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j * AES_BLOCK_SIZE]);
+ VARIANT1_1((uint8_t*)&ctx->long_state[j * AES_BLOCK_SIZE]);
+ /* Iteration 2 */
+ j = e2i(ctx->c);
+
+ uint64_t* dst = (uint64_t*)&ctx->long_state[j * AES_BLOCK_SIZE];
+
+ uint64_t t[2];
+ t[0] = dst[0];
+ t[1] = dst[1];
+
+ VARIANT2_INTEGER_MATH(t, ctx->c);
+
+ uint64_t hi;
+ uint64_t lo = mul128(((uint64_t*)ctx->c)[0], t[0], &hi);
+
+ VARIANT2_2();
+ VARIANT2_SHUFFLE_ADD(ctx->long_state, j * AES_BLOCK_SIZE, ctx->a, ctx->b);
+
+ ((uint64_t*)ctx->a)[0] += hi;
+ ((uint64_t*)ctx->a)[1] += lo;
+
+ dst[0] = ((uint64_t*)ctx->a)[0];
+ dst[1] = ((uint64_t*)ctx->a)[1];
+
+ ((uint64_t*)ctx->a)[0] ^= t[0];
+ ((uint64_t*)ctx->a)[1] ^= t[1];
+
+ VARIANT1_2((uint8_t*)&ctx->long_state[j * AES_BLOCK_SIZE]);
+ copy_block(ctx->b + AES_BLOCK_SIZE, ctx->b);
+ copy_block(ctx->b, ctx->c);
+ }
+
+ memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
+ oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE);
+ for (i = 0; i < CN_INIT; i++) {
+ for (j = 0; j < INIT_SIZE_BLK; j++) {
+ xor_blocks(&ctx->text[j * AES_BLOCK_SIZE],
+ &ctx->long_state[i * INIT_SIZE_BYTE + j * AES_BLOCK_SIZE]);
+ aesb_pseudo_round(&ctx->text[j * AES_BLOCK_SIZE],
+ &ctx->text[j * AES_BLOCK_SIZE],
+ ctx->aes_ctx->key->exp_data);
+ }
+ }
+ memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
+ hash_permutation(&ctx->state.hs);
+ /*memcpy(hash, &state, 32);*/
+ extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
+ oaes_free((OAES_CTX **) &ctx->aes_ctx);
+}
+
+void cryptonightturtle_fast_hash(const char* input, char* output, uint32_t len) {
+ union hash_state state;
+ hash_process(&state, (const uint8_t*) input, len);
+ memcpy(output, &state, HASH_SIZE);
+}
diff --git a/stratum/algos/cryptonote/cryptonight_turtle.h b/stratum/algos/cryptonote/cryptonight_turtle.h
new file mode 100644
index 000000000..e9334e449
--- /dev/null
+++ b/stratum/algos/cryptonote/cryptonight_turtle.h
@@ -0,0 +1,17 @@
+#ifndef CRYPTONIGHTTURTLE_H
+#define CRYPTONIGHTTURTLE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include
+
+void cryptonightturtle_hash(const char* input, char* output, uint32_t len, int variant);
+void cryptonightturtle_fast_hash(const char* input, char* output, uint32_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/stratum/algos/cryptonote/cryptonight_turtle_lite.c b/stratum/algos/cryptonote/cryptonight_turtle_lite.c
new file mode 100644
index 000000000..4731537e7
--- /dev/null
+++ b/stratum/algos/cryptonote/cryptonight_turtle_lite.c
@@ -0,0 +1,300 @@
+// Copyright (c) 2012-2013 The Cryptonote developers
+// Distributed under the MIT/X11 software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+// Portions Copyright (c) 2018 The Monero developers
+// Portions Copyright (c) 2018 The TurtleCoin Developers
+
+#include
+#include
+#include "crypto/oaes_lib.h"
+#include "crypto/c_keccak.h"
+#include "crypto/c_groestl.h"
+#include "crypto/c_blake256.h"
+#include "crypto/c_jh.h"
+#include "crypto/c_skein.h"
+#include "crypto/int-util.h"
+#include "crypto/hash-ops.h"
+#include "crypto/variant2_int_sqrt.h"
+
+#if defined(_MSC_VER)
+#include
+#endif
+
+#define MEMORY 262144 /* 256KB - 2^18 */
+#define ITER 131072 /* 2^17 */
+#define ITER_DIV 65536 /* 2^16 */
+#define AES_BLOCK_SIZE 16
+#define AES_KEY_SIZE 32 /*16*/
+#define INIT_SIZE_BLK 8
+#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)
+#define CN_INIT (MEMORY / INIT_SIZE_BYTE)
+#define CN_AES_INIT (MEMORY / AES_BLOCK_SIZE) / 2
+
+#define VARIANT1_1(p) \
+ do if (variant == 1) \
+ { \
+ const uint8_t tmp = ((const uint8_t*)(p))[11]; \
+ static const uint32_t table = 0x75310; \
+ const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1; \
+ ((uint8_t*)(p))[11] = tmp ^ ((table >> index) & 0x30); \
+ } while(0)
+
+#define VARIANT1_2(p) \
+ do if (variant == 1) \
+ { \
+ ((uint64_t*)p)[1] ^= tweak1_2; \
+ } while(0)
+
+#define VARIANT1_INIT() \
+ if (variant == 1 && len < 43) \
+ { \
+ fprintf(stderr, "Cryptonight variant 1 needs at least 43 bytes of data"); \
+ _exit(1); \
+ } \
+ const uint64_t tweak1_2 = (variant == 1) ? *(const uint64_t*)(((const uint8_t*)input)+35) ^ ctx->state.hs.w[24] : 0
+
+#define U64(p) ((uint64_t*)(p))
+
+#define VARIANT2_INIT(b, state) \
+ uint64_t division_result; \
+ uint64_t sqrt_result; \
+ do if (variant >= 2) \
+ { \
+ U64(b)[2] = state.hs.w[8] ^ state.hs.w[10]; \
+ U64(b)[3] = state.hs.w[9] ^ state.hs.w[11]; \
+ division_result = state.hs.w[12]; \
+ sqrt_result = state.hs.w[13]; \
+ } while (0)
+
+#define VARIANT2_SHUFFLE_ADD(base_ptr, offset, a, b) \
+ do if (variant >= 2) \
+ { \
+ uint64_t* chunk1 = U64((base_ptr) + ((offset) ^ 0x10)); \
+ uint64_t* chunk2 = U64((base_ptr) + ((offset) ^ 0x20)); \
+ uint64_t* chunk3 = U64((base_ptr) + ((offset) ^ 0x30)); \
+ \
+ const uint64_t chunk1_old[2] = { chunk1[0], chunk1[1] }; \
+ \
+ chunk1[0] = chunk3[0] + U64(b + 16)[0]; \
+ chunk1[1] = chunk3[1] + U64(b + 16)[1]; \
+ \
+ chunk3[0] = chunk2[0] + U64(a)[0]; \
+ chunk3[1] = chunk2[1] + U64(a)[1]; \
+ \
+ chunk2[0] = chunk1_old[0] + U64(b)[0]; \
+ chunk2[1] = chunk1_old[1] + U64(b)[1]; \
+ } while (0)
+
+#define VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr) \
+ ((uint64_t*)(b))[0] ^= division_result ^ (sqrt_result << 32); \
+ { \
+ const uint64_t dividend = ((uint64_t*)(ptr))[1]; \
+ const uint32_t divisor = (((uint32_t*)(ptr))[0] + (uint32_t)(sqrt_result << 1)) | 0x80000001UL; \
+ division_result = ((uint32_t)(dividend / divisor)) + \
+ (((uint64_t)(dividend % divisor)) << 32); \
+ } \
+ const uint64_t sqrt_input = ((uint64_t*)(ptr))[0] + division_result
+
+#define VARIANT2_INTEGER_MATH(b, ptr) \
+ do if (variant >= 2) \
+ { \
+ VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr); \
+ VARIANT2_INTEGER_MATH_SQRT_STEP_FP64(); \
+ VARIANT2_INTEGER_MATH_SQRT_FIXUP(sqrt_result); \
+ } while (0)
+
+#define VARIANT2_2() \
+ do if (variant >= 2) { \
+ ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x10)))[0] ^= hi; \
+ ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x10)))[1] ^= lo; \
+ hi ^= ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x20)))[0]; \
+ lo ^= ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x20)))[1]; \
+ } while (0)
+
+#pragma pack(push, 1)
+union cn_slow_hash_state {
+ union hash_state hs;
+ struct {
+ uint8_t k[64];
+ uint8_t init[INIT_SIZE_BYTE];
+ };
+};
+#pragma pack(pop)
+
+static void do_turtle_lite_blake_hash(const void* input, size_t len, char* output) {
+ blake256_hash((uint8_t*)output, input, len);
+}
+
+void do_turtle_lite_groestl_hash(const void* input, size_t len, char* output) {
+ groestl(input, len * 8, (uint8_t*)output);
+}
+
+static void do_turtle_lite_jh_hash(const void* input, size_t len, char* output) {
+ int r = jh_hash(HASH_SIZE * 8, input, 8 * len, (uint8_t*)output);
+ assert(SUCCESS == r);
+}
+
+static void do_turtle_lite_skein_hash(const void* input, size_t len, char* output) {
+ int r = c_skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output);
+ assert(SKEIN_SUCCESS == r);
+}
+
+static void (* const extra_hashes[4])(const void *, size_t, char *) = {
+ do_turtle_lite_blake_hash, do_turtle_lite_groestl_hash, do_turtle_lite_jh_hash, do_turtle_lite_skein_hash
+};
+
+extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
+extern int aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *expandedKey);
+
+static inline size_t e2i(const uint8_t* a) {
+ return (*((uint64_t*) a) / AES_BLOCK_SIZE) & (CN_AES_INIT - 1);
+}
+
+static void mul(const uint8_t* a, const uint8_t* b, uint8_t* res) {
+ ((uint64_t*) res)[1] = mul128(((uint64_t*) a)[0], ((uint64_t*) b)[0], (uint64_t*) res);
+}
+
+static void sum_half_blocks(uint8_t* a, const uint8_t* b) {
+ uint64_t a0, a1, b0, b1;
+
+ a0 = SWAP64LE(((uint64_t*) a)[0]);
+ a1 = SWAP64LE(((uint64_t*) a)[1]);
+ b0 = SWAP64LE(((uint64_t*) b)[0]);
+ b1 = SWAP64LE(((uint64_t*) b)[1]);
+ a0 += b0;
+ a1 += b1;
+ ((uint64_t*) a)[0] = SWAP64LE(a0);
+ ((uint64_t*) a)[1] = SWAP64LE(a1);
+}
+
+static inline void copy_block(uint8_t* dst, const uint8_t* src) {
+ ((uint64_t*) dst)[0] = ((uint64_t*) src)[0];
+ ((uint64_t*) dst)[1] = ((uint64_t*) src)[1];
+}
+
+static void swap_blocks(uint8_t* a, uint8_t* b) {
+ size_t i;
+ uint8_t t;
+ for (i = 0; i < AES_BLOCK_SIZE; i++) {
+ t = a[i];
+ a[i] = b[i];
+ b[i] = t;
+ }
+}
+
+static inline void xor_blocks(uint8_t* a, const uint8_t* b) {
+ ((uint64_t*) a)[0] ^= ((uint64_t*) b)[0];
+ ((uint64_t*) a)[1] ^= ((uint64_t*) b)[1];
+}
+
+static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
+ ((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0];
+ ((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1];
+}
+
+struct cryptonightturtlelite_ctx {
+ uint8_t long_state[MEMORY];
+ union cn_slow_hash_state state;
+ uint8_t text[INIT_SIZE_BYTE];
+ uint8_t a[AES_BLOCK_SIZE];
+ uint8_t b[AES_BLOCK_SIZE * 2];
+ uint8_t c[AES_BLOCK_SIZE];
+ uint8_t aes_key[AES_KEY_SIZE];
+ oaes_ctx* aes_ctx;
+};
+
+void cryptonightturtlelite_hash(const char* input, char* output, uint32_t len, int variant) {
+#if defined(_MSC_VER)
+ struct cryptonightturtlelite_ctx *ctx = _malloca(sizeof(struct cryptonightturtlelite_ctx));
+#else
+ struct cryptonightturtlelite_ctx *ctx = alloca(sizeof(struct cryptonightturtlelite_ctx));
+#endif
+ hash_process(&ctx->state.hs, (const uint8_t*) input, len);
+ memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
+ memcpy(ctx->aes_key, ctx->state.hs.b, AES_KEY_SIZE);
+ ctx->aes_ctx = (oaes_ctx*) oaes_alloc();
+ size_t i, j;
+
+ VARIANT1_INIT();
+ VARIANT2_INIT(ctx->b, ctx->state);
+
+ oaes_key_import_data(ctx->aes_ctx, ctx->aes_key, AES_KEY_SIZE);
+ for (i = 0; i < CN_INIT; i++) {
+ for (j = 0; j < INIT_SIZE_BLK; j++) {
+ aesb_pseudo_round(&ctx->text[AES_BLOCK_SIZE * j],
+ &ctx->text[AES_BLOCK_SIZE * j],
+ ctx->aes_ctx->key->exp_data);
+ }
+ memcpy(&ctx->long_state[i * INIT_SIZE_BYTE], ctx->text, INIT_SIZE_BYTE);
+ }
+
+ for (i = 0; i < 16; i++) {
+ ctx->a[i] = ctx->state.k[i] ^ ctx->state.k[32 + i];
+ ctx->b[i] = ctx->state.k[16 + i] ^ ctx->state.k[48 + i];
+ }
+
+ for (i = 0; i < ITER_DIV; i++) {
+ /* Dependency chain: address -> read value ------+
+ * written value <-+ hard function (AES or MUL) <+
+ * next address <-+
+ */
+ /* Iteration 1 */
+ j = e2i(ctx->a);
+ aesb_single_round(&ctx->long_state[j * AES_BLOCK_SIZE], ctx->c, ctx->a);
+ VARIANT2_SHUFFLE_ADD(ctx->long_state, j * AES_BLOCK_SIZE, ctx->a, ctx->b);
+ xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j * AES_BLOCK_SIZE]);
+ VARIANT1_1((uint8_t*)&ctx->long_state[j * AES_BLOCK_SIZE]);
+ /* Iteration 2 */
+ j = e2i(ctx->c);
+
+ uint64_t* dst = (uint64_t*)&ctx->long_state[j * AES_BLOCK_SIZE];
+
+ uint64_t t[2];
+ t[0] = dst[0];
+ t[1] = dst[1];
+
+ VARIANT2_INTEGER_MATH(t, ctx->c);
+
+ uint64_t hi;
+ uint64_t lo = mul128(((uint64_t*)ctx->c)[0], t[0], &hi);
+
+ VARIANT2_2();
+ VARIANT2_SHUFFLE_ADD(ctx->long_state, j * AES_BLOCK_SIZE, ctx->a, ctx->b);
+
+ ((uint64_t*)ctx->a)[0] += hi;
+ ((uint64_t*)ctx->a)[1] += lo;
+
+ dst[0] = ((uint64_t*)ctx->a)[0];
+ dst[1] = ((uint64_t*)ctx->a)[1];
+
+ ((uint64_t*)ctx->a)[0] ^= t[0];
+ ((uint64_t*)ctx->a)[1] ^= t[1];
+
+ VARIANT1_2((uint8_t*)&ctx->long_state[j * AES_BLOCK_SIZE]);
+ copy_block(ctx->b + AES_BLOCK_SIZE, ctx->b);
+ copy_block(ctx->b, ctx->c);
+ }
+
+ memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
+ oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE);
+ for (i = 0; i < CN_INIT; i++) {
+ for (j = 0; j < INIT_SIZE_BLK; j++) {
+ xor_blocks(&ctx->text[j * AES_BLOCK_SIZE],
+ &ctx->long_state[i * INIT_SIZE_BYTE + j * AES_BLOCK_SIZE]);
+ aesb_pseudo_round(&ctx->text[j * AES_BLOCK_SIZE],
+ &ctx->text[j * AES_BLOCK_SIZE],
+ ctx->aes_ctx->key->exp_data);
+ }
+ }
+ memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
+ hash_permutation(&ctx->state.hs);
+ /*memcpy(hash, &state, 32);*/
+ extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
+ oaes_free((OAES_CTX **) &ctx->aes_ctx);
+}
+
+void cryptonightturtlelite_fast_hash(const char* input, char* output, uint32_t len) {
+ union hash_state state;
+ hash_process(&state, (const uint8_t*) input, len);
+ memcpy(output, &state, HASH_SIZE);
+}
diff --git a/stratum/algos/cryptonote/cryptonight_turtle_lite.h b/stratum/algos/cryptonote/cryptonight_turtle_lite.h
new file mode 100644
index 000000000..28bcd5282
--- /dev/null
+++ b/stratum/algos/cryptonote/cryptonight_turtle_lite.h
@@ -0,0 +1,17 @@
+#ifndef CRYPTONIGHTTURTLELITE_H
+#define CRYPTONIGHTTURTLELITE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include
+
+void cryptonightturtlelite_hash(const char* input, char* output, uint32_t len, int variant);
+void cryptonightturtlelite_fast_hash(const char* input, char* output, uint32_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/stratum/algos/curvehash.c b/stratum/algos/curvehash.c
new file mode 100644
index 000000000..ab02fd801
--- /dev/null
+++ b/stratum/algos/curvehash.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright 2011 ArtForz
+ * Copyright 2011-2013 pooler
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version. See COPYING for more details.
+ */
+
+#include "curvehash.h"
+#include "../secp256k1/include/secp256k1.h"
+#include
+#include
+
+#ifdef _MSC_VER
+#define ROTL(a, b) _rotl(a,b)
+#define ROTR(a, b) _rotr(a,b)
+#else
+#define ROTL(a, b) (((a) << b) | ((a) >> (32 - b)))
+#define ROTR(a, b) ((a >> b) | (a << (32 - b)))
+#endif
+#ifndef _MSC_VER
+#define _ALIGN(x) __attribute__ ((aligned(x)))
+#endif
+static const uint32_t sha256_h[8] = {
+ 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
+ 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
+};
+static const uint32_t sha256_k[64] = {
+ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+ 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+ 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+ 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+ 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+ 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+ 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+ 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+ 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+ 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+ 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+};
+
+static __inline uint32_t
+be32dec(const void *pp)
+{
+ const uint8_t *p = (uint8_t const *)pp;
+
+ return ((uint32_t)(p[3]) + ((uint32_t)(p[2]) << 8) +
+ ((uint32_t)(p[1]) << 16) + ((uint32_t)(p[0]) << 24));
+}
+
+static __inline void
+be32enc(void *pp, uint32_t x)
+{
+ uint8_t * p = (uint8_t *)pp;
+
+ p[3] = x & 0xff;
+ p[2] = (x >> 8) & 0xff;
+ p[1] = (x >> 16) & 0xff;
+ p[0] = (x >> 24) & 0xff;
+}
+
+/* Elementary functions used by SHA256 */
+#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
+#define Maj(x, y, z) ((x & (y | z)) | (y & z))
+#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
+#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
+#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ (x >> 3))
+#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ (x >> 10))
+
+/* SHA256 round function */
+#define RND(a, b, c, d, e, f, g, h, k) \
+ do { \
+ t0 = h + S1(e) + Ch(e, f, g) + k; \
+ t1 = S0(a) + Maj(a, b, c); \
+ d += t0; \
+ h = t0 + t1; \
+ } while (0)
+
+/* Adjusted round function for rotating state */
+#define RNDr(S, W, i) \
+ RND(S[(64 - i) % 8], S[(65 - i) % 8], \
+ S[(66 - i) % 8], S[(67 - i) % 8], \
+ S[(68 - i) % 8], S[(69 - i) % 8], \
+ S[(70 - i) % 8], S[(71 - i) % 8], \
+ W[i] + sha256_k[i])
+void sha256_init_curve(uint32_t *state)
+{
+ memcpy(state, sha256_h, 32);
+}
+static inline void sha256_transform_volatile(uint32_t *state, uint32_t *block)
+{
+ uint32_t* W=block; //note: block needs to be a mutable 64 int32_t
+ uint32_t S[8];
+ uint32_t t0, t1;
+ int i;
+
+ for (i = 16; i < 64; i += 2) {
+ W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
+ W[i+1] = s1(W[i - 1]) + W[i - 6] + s0(W[i - 14]) + W[i - 15];
+ }
+
+ /* 2. Initialize working variables. */
+ memcpy(S, state, 32);
+
+ /* 3. Mix. */
+ RNDr(S, W, 0);
+ RNDr(S, W, 1);
+ RNDr(S, W, 2);
+ RNDr(S, W, 3);
+ RNDr(S, W, 4);
+ RNDr(S, W, 5);
+ RNDr(S, W, 6);
+ RNDr(S, W, 7);
+ RNDr(S, W, 8);
+ RNDr(S, W, 9);
+ RNDr(S, W, 10);
+ RNDr(S, W, 11);
+ RNDr(S, W, 12);
+ RNDr(S, W, 13);
+ RNDr(S, W, 14);
+ RNDr(S, W, 15);
+ RNDr(S, W, 16);
+ RNDr(S, W, 17);
+ RNDr(S, W, 18);
+ RNDr(S, W, 19);
+ RNDr(S, W, 20);
+ RNDr(S, W, 21);
+ RNDr(S, W, 22);
+ RNDr(S, W, 23);
+ RNDr(S, W, 24);
+ RNDr(S, W, 25);
+ RNDr(S, W, 26);
+ RNDr(S, W, 27);
+ RNDr(S, W, 28);
+ RNDr(S, W, 29);
+ RNDr(S, W, 30);
+ RNDr(S, W, 31);
+ RNDr(S, W, 32);
+ RNDr(S, W, 33);
+ RNDr(S, W, 34);
+ RNDr(S, W, 35);
+ RNDr(S, W, 36);
+ RNDr(S, W, 37);
+ RNDr(S, W, 38);
+ RNDr(S, W, 39);
+ RNDr(S, W, 40);
+ RNDr(S, W, 41);
+ RNDr(S, W, 42);
+ RNDr(S, W, 43);
+ RNDr(S, W, 44);
+ RNDr(S, W, 45);
+ RNDr(S, W, 46);
+ RNDr(S, W, 47);
+ RNDr(S, W, 48);
+ RNDr(S, W, 49);
+ RNDr(S, W, 50);
+ RNDr(S, W, 51);
+ RNDr(S, W, 52);
+ RNDr(S, W, 53);
+ RNDr(S, W, 54);
+ RNDr(S, W, 55);
+ RNDr(S, W, 56);
+ RNDr(S, W, 57);
+ RNDr(S, W, 58);
+ RNDr(S, W, 59);
+ RNDr(S, W, 60);
+ RNDr(S, W, 61);
+ RNDr(S, W, 62);
+ RNDr(S, W, 63);
+
+ /* 4. Mix local working variables into global state */
+ for (i = 0; i < 8; i++)
+ state[i] += S[i];
+}
+
+void sha256hash(const char* hash, char* data, uint32_t len)
+{
+ uint32_t _ALIGN(64) S[16];
+ uint32_t _ALIGN(64) T[64];
+ int i, r;
+
+ sha256_init_curve(S);
+ for (r = len; r > -9; r -= 64) {
+ if (r < 64)
+ memset(T, 0, 64);
+ memcpy(T, data + len - r, r > 64 ? 64 : (r < 0 ? 0 : r));
+ if (r >= 0 && r < 64)
+ ((unsigned char *)T)[r] = 0x80;
+ for (i = 0; i < 16; i++)
+ T[i] = be32dec(T + i);
+ if (r < 56)
+ T[15] = 8 * len;
+ //sha256_transform(S, T, 0);
+ sha256_transform_volatile(S, T);
+ }
+ for (i = 0; i < 8; i++)
+ be32enc((uint32_t *)hash + i, S[i]);
+}
+
+void curve_hash(const char* input, char* output, uint32_t len)
+{
+ uint32_t _ALIGN(128) hash[8];
+
+ // secp256k1 context for PoW
+ secp256k1_context *ctx = secp256k1_context_create(SECP256K1_CONTEXT_SIGN);
+ secp256k1_pubkey pubkey;
+
+ unsigned char pub[65];
+ size_t publen = 65;
+
+
+ // Calculate initial SHA256 hash of blockheader and nonce
+ sha256hash((unsigned char *) hash, (unsigned char *) input, len);
+
+ // 8 rounds of secp256k1 and sha256
+ for(int round=0; round<8; round++)
+ {
+ // Assume SHA256 result as private key and compute uncompressed public key
+ secp256k1_ec_pubkey_create(ctx, &pubkey, (unsigned char *) hash);
+ secp256k1_ec_pubkey_serialize(ctx, pub, &publen, &pubkey, SECP256K1_EC_UNCOMPRESSED);
+
+ // Use SHA256 to hash resulting public key
+ sha256hash((unsigned char *) hash, pub, 65);
+ }
+ secp256k1_context_destroy(ctx);
+
+ memcpy(output, hash, 32);
+}
diff --git a/stratum/algos/curvehash.h b/stratum/algos/curvehash.h
new file mode 100644
index 000000000..a9571c21c
--- /dev/null
+++ b/stratum/algos/curvehash.h
@@ -0,0 +1,16 @@
+#ifndef CURVE_H
+#define CURVE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include
+
+void curve_hash(const char* input, char* output, uint32_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/stratum/algos/dedal.c b/stratum/algos/dedal.c
new file mode 100644
index 000000000..f6cdcb34e
--- /dev/null
+++ b/stratum/algos/dedal.c
@@ -0,0 +1,187 @@
+#include
+#include
+#include
+
+#include "dedal.h"
+#include "../sha3/sph_blake.h"
+#include "../sha3/sph_bmw.h"
+#include "../sha3/sph_groestl.h"
+#include "../sha3/sph_jh.h"
+#include "../sha3/sph_keccak.h"
+#include "../sha3/sph_skein.h"
+#include "../sha3/sph_luffa.h"
+#include "../sha3/sph_cubehash.h"
+#include "../sha3/sph_shavite.h"
+#include "../sha3/sph_simd.h"
+#include "../sha3/sph_echo.h"
+#include "../sha3/sph_hamsi.h"
+#include "../sha3/sph_fugue.h"
+#include "../sha3/sph_shabal.h"
+#include "../sha3/sph_whirlpool.h"
+#include "../sha3/sph_sha2.h"
+
+const uint8_t Kspeed[16] = {
+ 200, // BLAKE
+ 236, // BMW
+ 252, // SKEIN
+ 224, // KECCAK
+ 240, // SHA512
+ 230, // SHABAL
+ 79, // WHIRLPOOL
+ 78, // LUFFA
+ 89, // CUBEHASH
+ 62, // SHAVITE
+ 59, // FUGUE
+ 119, // JH
+ 62, // HAMSI
+ 52, // ECHO
+ 22, // SIMD
+ 47 // GROESTL
+};
+
+static void get_hash_order(const uint32_t* prevblock, uint8_t* output, uint8_t* hashrounds)
+{
+ uint8_t* ord = output;
+ uint8_t hr = 0;
+ uint8_t* data = (uint8_t*)prevblock;
+ uint16_t tspeed = 0;
+
+ for (uint8_t i = 0; i < 6; i++) {
+ ord[i] = data[i] % 16;
+ ord[i + 6] = data[i+1] >> 4;
+ tspeed += Kspeed[ord[i]] + Kspeed[ord[i + 6]];
+ }
+ hr = tspeed + 920 >> 7;
+
+ int8_t c = hr - 12;
+ for (uint8_t i = 0; i < c ; i++) {
+ if (i < 15) {
+ uint8_t j = i >> 1;
+ ord[i + 12] = (i & 1) ? data[j] % 6 : data[j] % 5;
+ } else {
+ ord[i + 12] = data[i - 15] % 4;
+ }
+ }
+ *hashrounds = hr;
+}
+
+void dedal_hash(const char* input, char* output, uint32_t len)
+{
+
+ unsigned char hash[128];
+
+ sph_blake512_context ctx_blake;
+ sph_bmw512_context ctx_bmw;
+ sph_groestl512_context ctx_groestl;
+ sph_jh512_context ctx_jh;
+ sph_keccak512_context ctx_keccak;
+ sph_skein512_context ctx_skein;
+ sph_luffa512_context ctx_luffa;
+ sph_cubehash512_context ctx_cubehash;
+ sph_shavite512_context ctx_shavite;
+ sph_simd512_context ctx_simd;
+ sph_echo512_context ctx_echo;
+ sph_hamsi512_context ctx_hamsi;
+ sph_fugue512_context ctx_fugue;
+ sph_shabal512_context ctx_shabal;
+ sph_whirlpool_context ctx_whirlpool;
+ sph_sha512_context ctx_sha512;
+
+ const void *in = input;
+ int size = len;
+ uint32_t *in32 = (uint32_t*) input;
+ uint8_t hashorder[32] = {};
+ uint8_t hashrounds = 0;
+
+ get_hash_order(&in32[1], hashorder, &hashrounds);
+
+ for (int i = 0; i < hashrounds; i++)
+ {
+ switch (hashorder[i])
+ {
+ case 0:
+ sph_blake512_init(&ctx_blake);
+ sph_blake512(&ctx_blake, in, size);
+ sph_blake512_close(&ctx_blake, hash);
+ break;
+ case 1:
+ sph_bmw512_init(&ctx_bmw);
+ sph_bmw512(&ctx_bmw, in, size);
+ sph_bmw512_close(&ctx_bmw, hash);
+ break;
+ case 2:
+ sph_skein512_init(&ctx_skein);
+ sph_skein512(&ctx_skein, in, size);
+ sph_skein512_close(&ctx_skein, hash);
+ break;
+ case 3:
+ sph_keccak512_init(&ctx_keccak);
+ sph_keccak512(&ctx_keccak, in, size);
+ sph_keccak512_close(&ctx_keccak, hash);
+ break;
+ case 4:
+ sph_sha512_init(&ctx_sha512);
+ sph_sha512(&ctx_sha512, in, size);
+ sph_sha512_close(&ctx_sha512, hash);
+ break;
+ case 5:
+ sph_shabal512_init(&ctx_shabal);
+ sph_shabal512(&ctx_shabal, in, size);
+ sph_shabal512_close(&ctx_shabal, hash);
+ break;
+ case 6:
+ sph_whirlpool_init(&ctx_whirlpool);
+ sph_whirlpool(&ctx_whirlpool, in, size);
+ sph_whirlpool_close(&ctx_whirlpool, hash);
+ break;
+ case 7:
+ sph_luffa512_init(&ctx_luffa);
+ sph_luffa512(&ctx_luffa, in, size);
+ sph_luffa512_close(&ctx_luffa, hash);
+ break;
+ case 8:
+ sph_cubehash512_init(&ctx_cubehash);
+ sph_cubehash512(&ctx_cubehash, in, size);
+ sph_cubehash512_close(&ctx_cubehash, hash);
+ break;
+ case 9:
+ sph_shavite512_init(&ctx_shavite);
+ sph_shavite512(&ctx_shavite, in, size);
+ sph_shavite512_close(&ctx_shavite, hash);
+ break;
+ case 10:
+ sph_fugue512_init(&ctx_fugue);
+ sph_fugue512(&ctx_fugue, in, size);
+ sph_fugue512_close(&ctx_fugue, hash);
+ break;
+ case 11:
+ sph_jh512_init(&ctx_jh);
+ sph_jh512(&ctx_jh, in, size);
+ sph_jh512_close(&ctx_jh, hash);
+ break;
+ case 12:
+ sph_hamsi512_init(&ctx_hamsi);
+ sph_hamsi512(&ctx_hamsi, in, size);
+ sph_hamsi512_close(&ctx_hamsi, hash);
+ break;
+ case 13:
+ sph_echo512_init(&ctx_echo);
+ sph_echo512(&ctx_echo, in, size);
+ sph_echo512_close(&ctx_echo, hash);
+ break;
+ case 14:
+ sph_simd512_init(&ctx_simd);
+ sph_simd512(&ctx_simd, in, size);
+ sph_simd512_close(&ctx_simd, hash);
+ break;
+ case 15:
+ sph_groestl512_init(&ctx_groestl);
+ sph_groestl512(&ctx_groestl, in, size);
+ sph_groestl512_close(&ctx_groestl, hash);
+ break;
+ }
+ in = (void*)hash;
+ size = 64;
+ }
+ memcpy(output, hash, 32);
+}
\ No newline at end of file
diff --git a/stratum/algos/dedal.h b/stratum/algos/dedal.h
new file mode 100644
index 000000000..fbffd439e
--- /dev/null
+++ b/stratum/algos/dedal.h
@@ -0,0 +1,16 @@
+#ifndef DEDALHASH_H
+#define DEDALHASH_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include
+
+void dedal_hash(const char* input, char* output, uint32_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // DEDALHASH_H
\ No newline at end of file
diff --git a/stratum/algos/geek.c b/stratum/algos/geek.c
new file mode 100644
index 000000000..6d05a6aff
--- /dev/null
+++ b/stratum/algos/geek.c
@@ -0,0 +1,76 @@
+#include "geek.h"
+#include
+#include
+#include
+#include
+
+#include "../sha3/sph_blake.h"
+#include "../sha3/sph_bmw.h"
+#include "../sha3/sph_groestl.h"
+#include "../sha3/sph_jh.h"
+#include "../sha3/sph_keccak.h"
+#include "../sha3/sph_skein.h"
+#include "../sha3/sph_luffa.h"
+#include "../sha3/sph_cubehash.h"
+#include "../sha3/sph_shavite.h"
+#include "../sha3/sph_simd.h"
+#include "../sha3/sph_echo.h"
+#include "../sha3/sph_hamsi.h"
+#include "../sha3/sph_fugue.h"
+#include "../sha3/sph_shabal.h"
+#include "../sha3/sph_whirlpool.h"
+
+void geek_hash(const char* input, char* output, uint32_t len)
+{
+ sph_blake512_context ctx_blake;
+ sph_bmw512_context ctx_bmw;
+ sph_groestl512_context ctx_groestl;
+ sph_keccak512_context ctx_keccak;
+ sph_cubehash512_context ctx_cubehash1;
+ sph_echo512_context ctx_echo1;
+ sph_shabal512_context ctx_shabal1;
+ sph_simd512_context ctx_simd1;
+ sph_hamsi512_context ctx_hamsi1;
+
+
+ //these uint512 in the c++ source of the client are backed by an array of uint32
+ uint32_t hashA[16], hashB[16];
+
+ sph_blake512_init(&ctx_blake);
+ sph_blake512 (&ctx_blake, input, len);
+ sph_blake512_close (&ctx_blake, hashA);
+
+ sph_bmw512_init(&ctx_bmw);
+ sph_bmw512 (&ctx_bmw, hashA, 64);
+ sph_bmw512_close(&ctx_bmw, hashB);
+
+ sph_echo512_init (&ctx_echo1);
+ sph_echo512 (&ctx_echo1, hashB, 64);
+ sph_echo512_close(&ctx_echo1, hashA);
+
+ sph_shabal512_init (&ctx_shabal1);
+ sph_shabal512 (&ctx_shabal1, hashA, 64);
+ sph_shabal512_close(&ctx_shabal1, hashB);
+
+ sph_groestl512_init(&ctx_groestl);
+ sph_groestl512 (&ctx_groestl, hashB, 64);
+ sph_groestl512_close(&ctx_groestl, hashA);
+
+ sph_cubehash512_init (&ctx_cubehash1);
+ sph_cubehash512 (&ctx_cubehash1, hashA, 64);
+ sph_cubehash512_close(&ctx_cubehash1, hashB);
+
+ sph_keccak512_init(&ctx_keccak);
+ sph_keccak512 (&ctx_keccak, hashB, 64);
+ sph_keccak512_close(&ctx_keccak, hashA);
+
+ sph_hamsi512_init (&ctx_hamsi1);
+ sph_hamsi512 (&ctx_hamsi1, hashA, 64);
+ sph_hamsi512_close(&ctx_hamsi1, hashB);
+
+ sph_simd512_init (&ctx_simd1);
+ sph_simd512 (&ctx_simd1, hashB, 64);
+ sph_simd512_close(&ctx_simd1, hashA);
+
+ memcpy(output, hashA, 32);
+}
diff --git a/stratum/algos/geek.h b/stratum/algos/geek.h
new file mode 100644
index 000000000..cca287f5f
--- /dev/null
+++ b/stratum/algos/geek.h
@@ -0,0 +1,16 @@
+#ifndef GEEK_H
+#define GEEK_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include
+
+void geek_hash(const char* input, char* output, uint32_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
\ No newline at end of file
diff --git a/stratum/algos/gltalgos.c b/stratum/algos/gltalgos.c
new file mode 100644
index 000000000..955b56d98
--- /dev/null
+++ b/stratum/algos/gltalgos.c
@@ -0,0 +1,470 @@
+#include "gltalgos.h"
+#include
+#include
+#include
+#include
+
+#include "blake2-ref/blake2.h"
+
+#include "../sha3/sph_blake.h"
+#include "../sha3/sph_bmw.h"
+#include "../sha3/sph_groestl.h"
+#include "../sha3/sph_jh.h"
+#include "../sha3/sph_keccak.h"
+#include "../sha3/sph_skein.h"
+#include "../sha3/sph_luffa.h"
+#include "../sha3/sph_cubehash.h"
+#include "../sha3/sph_shavite.h"
+#include "../sha3/sph_simd.h"
+#include "../sha3/sph_echo.h"
+#include "../sha3/sph_hamsi.h"
+#include "../sha3/sph_fugue.h"
+#include "../sha3/sph_shabal.h"
+#include "../sha3/sph_whirlpool.h"
+#include "../sha3/sph_sha2.h"
+#include "../sha3/sph_haval.h"
+#include "../sha3/sph_gost.h"
+
+
+void pawelhash_hash(const char* input, char* output, uint32_t len)
+{
+ sph_fugue512_context ctx_fugue;
+ sph_sha512_context ctx_sha2;
+ sph_skein512_context ctx_skein;
+ sph_jh512_context ctx_jh;
+ sph_keccak512_context ctx_keccak;
+ sph_luffa512_context ctx_luffa;
+ sph_whirlpool_context ctx_whirlpool;
+ sph_shabal512_context ctx_shabal;
+ sph_echo512_context ctx_echo;
+ sph_groestl512_context ctx_groestl;
+ sph_haval256_5_context ctx_haval;
+ sph_bmw512_context ctx_bmw;
+ sph_gost512_context ctx_gost;
+
+ //these uint512 in the c++ source of the client are backed by an array of uint32
+ uint32_t hashA[16], hashB[16];
+
+ sph_fugue512_init(&ctx_fugue);
+ sph_fugue512(&ctx_fugue, input, len);
+ sph_fugue512_close(&ctx_fugue, hashA);
+
+ sph_sha512_init(&ctx_sha2);
+ sph_sha512(&ctx_sha2, hashA, 64);
+ sph_sha512_close(&ctx_sha2, hashB);
+
+ sph_skein512_init(&ctx_skein);
+ sph_skein512(&ctx_skein, hashB, 64);
+ sph_skein512_close(&ctx_skein, hashA);
+
+ sph_jh512_init(&ctx_jh);
+ sph_jh512(&ctx_jh, hashA, 64);
+ sph_jh512_close(&ctx_jh, hashB);
+
+ sph_keccak512_init(&ctx_keccak);
+ sph_keccak512(&ctx_keccak, hashB, 64);
+ sph_keccak512_close(&ctx_keccak, hashA);
+
+ sph_luffa512_init(&ctx_luffa);
+ sph_luffa512(&ctx_luffa, hashA, 64);
+ sph_luffa512_close(&ctx_luffa, hashB);
+
+ sph_whirlpool_init(&ctx_whirlpool);
+ sph_whirlpool(&ctx_whirlpool, hashB, 64);
+ sph_whirlpool_close(&ctx_whirlpool, hashA);
+
+ sph_shabal512_init(&ctx_shabal);
+ sph_shabal512(&ctx_shabal, hashA, 64);
+ sph_shabal512_close(&ctx_shabal, hashB);
+
+ sph_echo512_init(&ctx_echo);
+ sph_echo512(&ctx_echo, hashB, 64);
+ sph_echo512_close(&ctx_echo, hashA);
+
+ sph_groestl512_init(&ctx_groestl);
+ sph_groestl512(&ctx_groestl, hashA, 64);
+ sph_groestl512_close(&ctx_groestl, hashB);
+
+ sph_haval256_5_init(&ctx_haval);
+ sph_haval256_5(&ctx_haval, hashB, 64);
+ sph_haval256_5_close(&ctx_haval, hashA);
+
+ memset(&hashA[8], 0, 32);
+
+ sph_bmw512_init(&ctx_bmw);
+ sph_bmw512(&ctx_bmw, hashA, 64);
+ sph_bmw512_close(&ctx_bmw, hashB);
+
+ sph_echo512_init(&ctx_echo);
+ sph_echo512(&ctx_echo, hashB, 64);
+ sph_echo512_close(&ctx_echo, hashA);
+
+ sph_fugue512_init(&ctx_fugue);
+ sph_fugue512(&ctx_fugue, hashA, 64);
+ sph_fugue512_close(&ctx_fugue, hashB);
+
+ sph_bmw512_init(&ctx_bmw);
+ sph_bmw512(&ctx_bmw, hashB, 64);
+ sph_bmw512_close(&ctx_bmw, hashA);
+
+ sph_gost512_init(&ctx_gost);
+ sph_gost512(&ctx_gost, hashA, 64);
+ sph_gost512_close(&ctx_gost, hashB);
+
+ sph_shabal512_init(&ctx_shabal);
+ sph_shabal512(&ctx_shabal, hashB, 64);
+ sph_shabal512_close(&ctx_shabal, hashA);
+
+ sph_whirlpool_init(&ctx_whirlpool);
+ sph_whirlpool(&ctx_whirlpool, hashA, 64);
+ sph_whirlpool_close(&ctx_whirlpool, hashB);
+
+ sph_groestl512_init(&ctx_groestl);
+ sph_groestl512 (&ctx_groestl, hashB, 64);
+ sph_groestl512_close(&ctx_groestl, hashA);
+
+ memcpy(output, hashA, 32);
+}
+
+void jeonghash_hash(const char* input, char* output, uint32_t len)
+{
+ sph_simd512_context ctx_simd;
+ sph_hamsi512_context ctx_hamsi;
+ sph_shabal512_context ctx_shabal;
+ sph_blake512_context ctx_blake;
+ sph_bmw512_context ctx_bmw;
+ sph_sha512_context ctx_sha2;
+ sph_whirlpool_context ctx_whirlpool;
+ sph_skein512_context ctx_skein;
+
+ //these uint512 in the c++ source of the client are backed by an array of uint32
+ uint32_t hashA[16], hashB[16];
+
+ sph_simd512_init(&ctx_simd);
+ sph_simd512(&ctx_simd, input, len);
+ sph_simd512_close(&ctx_simd, hashA);
+
+ sph_hamsi512_init(&ctx_hamsi);
+ sph_hamsi512(&ctx_hamsi, hashA, 64);
+ sph_hamsi512_close(&ctx_hamsi, hashB);
+
+ sph_shabal512_init(&ctx_shabal);
+ sph_shabal512(&ctx_shabal, hashB, 64);
+ sph_shabal512_close(&ctx_shabal, hashA);
+
+ sph_blake512_init(&ctx_blake);
+ sph_blake512(&ctx_blake, hashA, 64);
+ sph_blake512_close(&ctx_blake, hashB);
+
+ sph_bmw512_init(&ctx_bmw);
+ sph_bmw512(&ctx_bmw, hashB, 64);
+ sph_bmw512_close(&ctx_bmw, hashA);
+
+ sph_sha512_init(&ctx_sha2);
+ sph_sha512(&ctx_sha2, hashA, 64);
+ sph_sha512_close(&ctx_sha2, hashB);
+
+ sph_whirlpool_init(&ctx_whirlpool);
+ sph_whirlpool(&ctx_whirlpool, hashB, 64);
+ sph_whirlpool_close(&ctx_whirlpool, hashA);
+
+ sph_skein512_init(&ctx_skein);
+ sph_skein512(&ctx_skein, hashA, 64);
+ sph_skein512_close(&ctx_skein, hashB);
+
+ sph_skein512_init(&ctx_skein);
+ sph_skein512(&ctx_skein, hashB, 64);
+ sph_skein512_close(&ctx_skein, hashA);
+
+ sph_whirlpool_init(&ctx_whirlpool);
+ sph_whirlpool(&ctx_whirlpool, hashA, 64);
+ sph_whirlpool_close(&ctx_whirlpool, hashB);
+
+ sph_sha512_init(&ctx_sha2);
+ sph_sha512(&ctx_sha2, hashB, 64);
+ sph_sha512_close(&ctx_sha2, hashA);
+
+ sph_bmw512_init(&ctx_bmw);
+ sph_bmw512(&ctx_bmw, hashA, 64);
+ sph_bmw512_close(&ctx_bmw, hashB);
+
+ sph_blake512_init(&ctx_blake);
+ sph_blake512(&ctx_blake, hashB, 64);
+ sph_blake512_close(&ctx_blake, hashA);
+
+ sph_shabal512_init(&ctx_shabal);
+ sph_shabal512(&ctx_shabal, hashA, 64);
+ sph_shabal512_close(&ctx_shabal, hashB);
+
+ sph_hamsi512_init(&ctx_hamsi);
+ sph_hamsi512(&ctx_hamsi, hashB, 64);
+ sph_hamsi512_close(&ctx_hamsi, hashA);
+
+ sph_simd512_init(&ctx_simd);
+ sph_simd512(&ctx_simd, hashA, 64);
+ sph_simd512_close(&ctx_simd, hashB);
+
+ sph_simd512_init(&ctx_simd);
+ sph_simd512(&ctx_simd, hashB, 64);
+ sph_simd512_close(&ctx_simd, hashA);
+
+ sph_hamsi512_init(&ctx_hamsi);
+ sph_hamsi512(&ctx_hamsi, hashA, 64);
+ sph_hamsi512_close(&ctx_hamsi, hashB);
+
+ sph_shabal512_init(&ctx_shabal);
+ sph_shabal512(&ctx_shabal, hashB, 64);
+ sph_shabal512_close(&ctx_shabal, hashA);
+
+ sph_blake512_init(&ctx_blake);
+ sph_blake512(&ctx_blake, hashA, 64);
+ sph_blake512_close(&ctx_blake, hashB);
+
+ sph_bmw512_init(&ctx_bmw);
+ sph_bmw512(&ctx_bmw, hashB, 64);
+ sph_bmw512_close(&ctx_bmw, hashA);
+
+ sph_sha512_init(&ctx_sha2);
+ sph_sha512(&ctx_sha2, hashA, 64);
+ sph_sha512_close(&ctx_sha2, hashB);
+
+ sph_whirlpool_init(&ctx_whirlpool);
+ sph_whirlpool(&ctx_whirlpool, hashB, 64);
+ sph_whirlpool_close(&ctx_whirlpool, hashA);
+
+ sph_skein512_init(&ctx_skein);
+ sph_skein512(&ctx_skein, hashA, 64);
+ sph_skein512_close(&ctx_skein, hashB);
+
+ memcpy(output, hashB, 32);
+}
+
+void astralhash_hash(const char* input, char* output, uint32_t len)
+{
+ sph_luffa512_context ctx_luffa;
+ sph_skein512_context ctx_skein;
+ sph_echo512_context ctx_echo;
+ sph_whirlpool_context ctx_whirlpool;
+ sph_bmw512_context ctx_bmw;
+ sph_blake512_context ctx_blake;
+ sph_shavite512_context ctx_shavite;
+ sph_fugue512_context ctx_fugue;
+ sph_hamsi512_context ctx_hamsi;
+ sph_haval256_5_context ctx_haval;
+ sph_sha512_context ctx_sha2;
+
+ //these uint512 in the c++ source of the client are backed by an array of uint32
+ uint32_t hashA[16], hashB[16];
+
+ sph_luffa512_init(&ctx_luffa);
+ sph_luffa512(&ctx_luffa, input, len);
+ sph_luffa512_close(&ctx_luffa, hashA);
+
+ sph_skein512_init(&ctx_skein);
+ sph_skein512(&ctx_skein, hashA, 64);
+ sph_skein512_close(&ctx_skein, hashB);
+
+ sph_echo512_init(&ctx_echo);
+ sph_echo512(&ctx_echo, hashB, 64);
+ sph_echo512_close(&ctx_echo, hashA);
+
+ sph_whirlpool_init(&ctx_whirlpool);
+ sph_whirlpool(&ctx_whirlpool, hashA, 64);
+ sph_whirlpool_close(&ctx_whirlpool, hashB);
+
+ sph_bmw512_init(&ctx_bmw);
+ sph_bmw512(&ctx_bmw, hashB, 64);
+ sph_bmw512_close(&ctx_bmw, hashA);
+
+ sph_blake512_init(&ctx_blake);
+ sph_blake512(&ctx_blake, hashA, 64);
+ sph_blake512_close(&ctx_blake, hashB);
+
+ sph_shavite512_init(&ctx_shavite);
+ sph_shavite512(&ctx_shavite, hashB, 64);
+ sph_shavite512_close(&ctx_shavite, hashA);
+
+ sph_skein512_init(&ctx_skein);
+ sph_skein512(&ctx_skein, hashA, 64);
+ sph_skein512_close(&ctx_skein, hashB);
+
+ sph_whirlpool_init(&ctx_whirlpool);
+ sph_whirlpool(&ctx_whirlpool, hashB, 64);
+ sph_whirlpool_close(&ctx_whirlpool, hashA);
+
+ sph_fugue512_init(&ctx_fugue);
+ sph_fugue512(&ctx_fugue, hashA, 64);
+ sph_fugue512_close(&ctx_fugue, hashB);
+
+ sph_hamsi512_init(&ctx_hamsi);
+ sph_hamsi512(&ctx_hamsi, hashB, 64);
+ sph_hamsi512_close(&ctx_hamsi, hashA);
+
+ sph_haval256_5_init(&ctx_haval);
+ sph_haval256_5(&ctx_haval, hashA, 64);
+ sph_haval256_5_close(&ctx_haval, hashB);
+
+ memset(&hashB[8], 0, 32);
+
+ sph_sha512_init(&ctx_sha2);
+ sph_sha512(&ctx_sha2, hashB, 64);
+ sph_sha512_close(&ctx_sha2, hashA);
+
+ memcpy(output, hashA, 32);
+}
+
+void padihash_hash(const char* input, char* output, uint32_t len)
+{
+ sph_sha512_context ctx_sha2;
+ sph_jh512_context ctx_jh;
+ sph_luffa512_context ctx_luffa;
+ sph_echo512_context ctx_echo;
+ sph_bmw512_context ctx_bmw;
+ sph_haval256_5_context ctx_haval;
+ sph_cubehash512_context ctx_cubehash;
+ sph_shabal512_context ctx_shabal;
+
+ //these uint512 in the c++ source of the client are backed by an array of uint32
+ uint32_t hashA[16], hashB[16];
+
+ sph_sha512_init(&ctx_sha2);
+ sph_sha512(&ctx_sha2, input, len);
+ sph_sha512_close(&ctx_sha2, hashA);
+
+ sph_jh512_init(&ctx_jh);
+ sph_jh512(&ctx_jh, hashA, 64);
+ sph_jh512_close(&ctx_jh, hashB);
+
+ sph_luffa512_init(&ctx_luffa);
+ sph_luffa512(&ctx_luffa, hashB, 64);
+ sph_luffa512_close(&ctx_luffa, hashA);
+
+ sph_echo512_init(&ctx_echo);
+ sph_echo512(&ctx_echo, hashA, 64);
+ sph_echo512_close(&ctx_echo, hashB);
+
+ sph_bmw512_init(&ctx_bmw);
+ sph_bmw512(&ctx_bmw, hashB, 64);
+ sph_bmw512_close(&ctx_bmw, hashA);
+
+ sph_haval256_5_init(&ctx_haval);
+ sph_haval256_5(&ctx_haval, hashA, 64);
+ sph_haval256_5_close(&ctx_haval, hashB);
+
+ memset(&hashB[8], 0, 32);
+
+ sph_cubehash512_init(&ctx_cubehash);
+ sph_cubehash512(&ctx_cubehash, hashB, 64);
+ sph_cubehash512_close(&ctx_cubehash, hashA);
+
+ sph_shabal512_init(&ctx_shabal);
+ sph_shabal512(&ctx_shabal, hashA, 64);
+ sph_shabal512_close(&ctx_shabal, hashB);
+
+ sph_sha512_init(&ctx_sha2);
+ sph_sha512(&ctx_sha2, hashB, 64);
+ sph_sha512_close(&ctx_sha2, hashA);
+
+ sph_jh512_init(&ctx_jh);
+ sph_jh512(&ctx_jh, hashA, 64);
+ sph_jh512_close(&ctx_jh, hashB);
+
+ sph_luffa512_init(&ctx_luffa);
+ sph_luffa512(&ctx_luffa, hashB, 64);
+ sph_luffa512_close(&ctx_luffa, hashA);
+
+ sph_echo512_init(&ctx_echo);
+ sph_echo512(&ctx_echo, hashA, 64);
+ sph_echo512_close(&ctx_echo, hashB);
+
+ sph_bmw512_init(&ctx_bmw);
+ sph_bmw512(&ctx_bmw, hashB, 64);
+ sph_bmw512_close(&ctx_bmw, hashA);
+
+ sph_haval256_5_init(&ctx_haval);
+ sph_haval256_5(&ctx_haval, hashA, 64);
+ sph_haval256_5_close(&ctx_haval, hashB);
+
+ memset(&hashB[8], 0, 32);
+
+ sph_cubehash512_init(&ctx_cubehash);
+ sph_cubehash512(&ctx_cubehash, hashB, 64);
+ sph_cubehash512_close(&ctx_cubehash, hashA);
+
+ sph_shabal512_init(&ctx_shabal);
+ sph_shabal512(&ctx_shabal, hashA, 64);
+ sph_shabal512_close(&ctx_shabal, hashB);
+
+ sph_shabal512_init(&ctx_shabal);
+ sph_shabal512(&ctx_shabal, hashB, 64);
+ sph_shabal512_close(&ctx_shabal, hashA);
+
+ sph_cubehash512_init(&ctx_cubehash);
+ sph_cubehash512(&ctx_cubehash, hashA, 64);
+ sph_cubehash512_close(&ctx_cubehash, hashB);
+
+ sph_haval256_5_init(&ctx_haval);
+ sph_haval256_5(&ctx_haval, hashB, 64);
+ sph_haval256_5_close(&ctx_haval, hashA);
+
+ memset(&hashA[8], 0, 32);
+
+ sph_bmw512_init(&ctx_bmw);
+ sph_bmw512(&ctx_bmw, hashA, 64);
+ sph_bmw512_close(&ctx_bmw, hashB);
+
+ sph_echo512_init(&ctx_echo);
+ sph_echo512(&ctx_echo, hashB, 64);
+ sph_echo512_close(&ctx_echo, hashA);
+
+ sph_luffa512_init(&ctx_luffa);
+ sph_luffa512(&ctx_luffa, hashA, 64);
+ sph_luffa512_close(&ctx_luffa, hashB);
+
+ sph_jh512_init(&ctx_jh);
+ sph_jh512(&ctx_jh, hashB, 64);
+ sph_jh512_close(&ctx_jh, hashA);
+
+ sph_sha512_init(&ctx_sha2);
+ sph_sha512(&ctx_sha2, hashA, 64);
+ sph_sha512_close(&ctx_sha2, hashB);
+
+ sph_jh512_init(&ctx_jh);
+ sph_jh512(&ctx_jh, hashB, 64);
+ sph_jh512_close(&ctx_jh, hashA);
+
+ sph_bmw512_init(&ctx_bmw);
+ sph_bmw512(&ctx_bmw, hashA, 64);
+ sph_bmw512_close(&ctx_bmw, hashB);
+
+ memcpy(output, hashB, 32);
+}
+
+void globalhash_hash(const char* input, char* output, uint32_t len)
+{
+ sph_gost512_context ctx_gost;
+ sph_blake512_context ctx_blake;
+ blake2b_state ctx_blake2b[1];
+ blake2s_state ctx_blake2s[1];
+
+ //these uint512 in the c++ source of the client are backed by an array of uint32
+ uint32_t hashA[16], hashB[16], finalhash[8]; // finalhash is a 256 unsigned integer
+
+ sph_gost512_init(&ctx_gost);
+ sph_gost512 (&ctx_gost, input, len);
+ sph_gost512_close(&ctx_gost, hashA);
+
+ sph_blake512_init(&ctx_blake);
+ sph_blake512(&ctx_blake, hashA, 64);
+ sph_blake512_close(&ctx_blake, hashB);
+
+ blake2b_init( ctx_blake2b, BLAKE2B_OUTBYTES );
+ blake2b_update( ctx_blake2b, hashB, 64 );
+ blake2b_final( ctx_blake2b, hashA, BLAKE2B_OUTBYTES );
+
+ blake2s_init( ctx_blake2s, BLAKE2S_OUTBYTES );
+ blake2s_update( ctx_blake2s, hashA, 64);
+ blake2s_final( ctx_blake2s, finalhash, BLAKE2S_OUTBYTES );
+
+ memcpy(output, finalhash, 32);
+}
\ No newline at end of file
diff --git a/stratum/algos/gltalgos.h b/stratum/algos/gltalgos.h
new file mode 100644
index 000000000..50d78a3f3
--- /dev/null
+++ b/stratum/algos/gltalgos.h
@@ -0,0 +1,20 @@
+#ifndef GLTALGOS_H
+#define GLTALGOS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include
+
+void pawelhash_hash(const char* input, char* output, uint32_t len);
+void astralhash_hash(const char* input, char* output, uint32_t len);
+void jeonghash_hash(const char* input, char* output, uint32_t len);
+void padihash_hash(const char* input, char* output, uint32_t len);
+void globalhash_hash(const char* input, char* output, uint32_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
\ No newline at end of file
diff --git a/stratum/algos/gr.c b/stratum/algos/gr.c
new file mode 100644
index 000000000..c2b1454ef
--- /dev/null
+++ b/stratum/algos/gr.c
@@ -0,0 +1,276 @@
+#include
+#include
+#include
+#include
+#include "gr.h"
+#include "../sha3/sph_blake.h"
+#include "../sha3/sph_bmw.h"
+#include "../sha3/sph_groestl.h"
+#include "../sha3/sph_jh.h"
+#include "../sha3/sph_keccak.h"
+#include "../sha3/sph_skein.h"
+#include "../sha3/sph_luffa.h"
+#include "../sha3/sph_cubehash.h"
+#include "../sha3/sph_shavite.h"
+#include "../sha3/sph_simd.h"
+#include "../sha3/sph_echo.h"
+#include "../sha3/sph_hamsi.h"
+#include "../sha3/sph_fugue.h"
+#include "../sha3/sph_shabal.h"
+#include "../sha3/sph_whirlpool.h"
+#include "../sha3/sph_sha2.h"
+#include "../sha3/sph_tiger.h"
+#include "../sha3/sph_haval.h"
+#include "Lyra2.h"
+#include "gost.h"
+#include "cryptonote/cryptonight_dark.h"
+#include "cryptonote/cryptonight_dark_lite.h"
+#include "cryptonote/cryptonight_fast.h"
+#include "cryptonote/cryptonight.h"
+#include "cryptonote/cryptonight_lite.h"
+#include "cryptonote/cryptonight_soft_shell.h"
+#include "cryptonote/cryptonight_turtle.h"
+#include "cryptonote/cryptonight_turtle_lite.h"
+
+enum Algo {
+ BLAKE = 0,
+ BMW,
+ GROESTL,
+ JH,
+ KECCAK,
+ SKEIN,
+ LUFFA,
+ CUBEHASH,
+ SHAVITE,
+ SIMD,
+ ECHO,
+ HAMSI,
+ FUGUE,
+ SHABAL,
+ WHIRLPOOL,
+ HASH_FUNC_COUNT
+};
+
+enum CNAlgo {
+ CNDark = 0,
+ CNDarklite,
+ CNFast,
+ CNLite,
+ CNTurtle,
+ CNTurtlelite,
+ CN_HASH_FUNC_COUNT
+};
+
+static void selectAlgo(unsigned char nibble, bool* selectedAlgos, uint8_t* selectedIndex, int algoCount, int* currentCount) {
+ uint8_t algoDigit = (nibble & 0x0F) % algoCount;
+ if(!selectedAlgos[algoDigit]) {
+ selectedAlgos[algoDigit] = true;
+ selectedIndex[currentCount[0]] = algoDigit;
+ currentCount[0] = currentCount[0] + 1;
+ }
+ algoDigit = (nibble >> 4) % algoCount;
+ if(!selectedAlgos[algoDigit]) {
+ selectedAlgos[algoDigit] = true;
+ selectedIndex[currentCount[0]] = algoDigit;
+ currentCount[0] = currentCount[0] + 1;
+ }
+}
+
+static void getAlgoString(void *mem, unsigned int size, uint8_t* selectedAlgoOutput, int algoCount) {
+ int i;
+ unsigned char *p = (unsigned char *)mem;
+ unsigned int len = size/2;
+ unsigned char j = 0;
+ bool selectedAlgo[algoCount];
+ for(int z=0; z < algoCount; z++) {
+ selectedAlgo[z] = false;
+ }
+ int selectedCount = 0;
+ for (i=0;i= 0) {
+ algo = selectedAlgoOutput[(uint8_t)coreSelection];
+ } else {
+ algo = 16; // skip core hashing for this loop iteration
+ }
+ if(cnSelection >=0) {
+ cnAlgo = selectedCNAlgoOutput[(uint8_t)cnSelection];
+ } else {
+ cnAlgo = 14; // skip cn hashing for this loop iteration
+ }
+ //selection cnAlgo. if a CN algo is selected then core algo will not be selected
+ switch(cnAlgo)
+ {
+ case CNDark:
+ cryptonightdark_hash(in, hash, size, 1);
+ break;
+ case CNDarklite:
+ cryptonightdarklite_hash(in, hash, size, 1);
+ break;
+ case CNFast:
+ cryptonightfast_hash(in, hash, size, 1);
+ break;
+ case CNLite:
+ cryptonightlite_hash(in, hash, size, 1);
+ break;
+ case CNTurtle:
+ cryptonightturtle_hash(in, hash, size, 1);
+ break;
+ case CNTurtlelite:
+ cryptonightturtlelite_hash(in, hash, size, 1);
+ break;
+ }
+ //selection core algo
+ switch (algo) {
+ case BLAKE:
+ sph_blake512_init(&ctx_blake);
+ sph_blake512(&ctx_blake, in, size);
+ sph_blake512_close(&ctx_blake, hash);
+ break;
+ case BMW:
+ sph_bmw512_init(&ctx_bmw);
+ sph_bmw512(&ctx_bmw, in, size);
+ sph_bmw512_close(&ctx_bmw, hash);
+ break;
+ case GROESTL:
+ sph_groestl512_init(&ctx_groestl);
+ sph_groestl512(&ctx_groestl, in, size);
+ sph_groestl512_close(&ctx_groestl, hash);
+ break;
+ case JH:
+ sph_jh512_init(&ctx_jh);
+ sph_jh512(&ctx_jh, in, size);
+ sph_jh512_close(&ctx_jh, hash);
+ break;
+ case KECCAK:
+ sph_keccak512_init(&ctx_keccak);
+ sph_keccak512(&ctx_keccak, in, size);
+ sph_keccak512_close(&ctx_keccak, hash);
+ break;
+ case SKEIN:
+ sph_skein512_init(&ctx_skein);
+ sph_skein512(&ctx_skein, in, size);
+ sph_skein512_close(&ctx_skein, hash);
+ break;
+ case LUFFA:
+ sph_luffa512_init(&ctx_luffa);
+ sph_luffa512(&ctx_luffa, in, size);
+ sph_luffa512_close(&ctx_luffa, hash);
+ break;
+ case CUBEHASH:
+ sph_cubehash512_init(&ctx_cubehash);
+ sph_cubehash512(&ctx_cubehash, in, size);
+ sph_cubehash512_close(&ctx_cubehash, hash);
+ break;
+ case SHAVITE:
+ sph_shavite512_init(&ctx_shavite);
+ sph_shavite512(&ctx_shavite, in, size);
+ sph_shavite512_close(&ctx_shavite, hash);
+ break;
+ case SIMD:
+ sph_simd512_init(&ctx_simd);
+ sph_simd512(&ctx_simd, in, size);
+ sph_simd512_close(&ctx_simd, hash);
+ break;
+ case ECHO:
+ sph_echo512_init(&ctx_echo);
+ sph_echo512(&ctx_echo, in, size);
+ sph_echo512_close(&ctx_echo, hash);
+ break;
+ case HAMSI:
+ sph_hamsi512_init(&ctx_hamsi);
+ sph_hamsi512(&ctx_hamsi, in, size);
+ sph_hamsi512_close(&ctx_hamsi, hash);
+ break;
+ case FUGUE:
+ sph_fugue512_init(&ctx_fugue);
+ sph_fugue512(&ctx_fugue, in, size);
+ sph_fugue512_close(&ctx_fugue, hash);
+ break;
+ case SHABAL:
+ sph_shabal512_init(&ctx_shabal);
+ sph_shabal512(&ctx_shabal, in, size);
+ sph_shabal512_close(&ctx_shabal, hash);
+ break;
+ case WHIRLPOOL:
+ sph_whirlpool_init(&ctx_whirlpool);
+ sph_whirlpool(&ctx_whirlpool, in, size);
+ sph_whirlpool_close(&ctx_whirlpool, hash);
+ break;
+ }
+ if(cnSelection >= 0) {
+ memset(&hash[8], 0, 32);
+ }
+ in = (void*) hash;
+ size = 64;
+ }
+ memcpy(output, hash, 32);
+}
diff --git a/stratum/algos/gr.h b/stratum/algos/gr.h
new file mode 100644
index 000000000..4a5ad2abf
--- /dev/null
+++ b/stratum/algos/gr.h
@@ -0,0 +1,14 @@
+#ifndef GR_H
+#define GR_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include
+
+void gr_hash(const char* input, char* output, uint32_t len);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/stratum/algos/heavyhash/heavyhash.c b/stratum/algos/heavyhash/heavyhash.c
new file mode 100644
index 000000000..e0ca35260
--- /dev/null
+++ b/stratum/algos/heavyhash/heavyhash.c
@@ -0,0 +1,149 @@
+#include "heavyhash.h"
+#include "keccak_tiny.h"
+
+#include
+#include
+#include
+#include
+#include
+
+#define EPS 1e-9
+
+struct xoshiro_state {
+ uint64_t s[4];
+};
+
+static inline uint64_t rotl64(const uint64_t x, int k) {
+ return (x << k) | (x >> (64 - k));
+}
+
+static inline uint64_t xoshiro_gen(struct xoshiro_state *state) {
+ const uint64_t result = rotl64(state->s[0] + state->s[3], 23) + state->s[0];
+
+ const uint64_t t = state->s[1] << 17;
+
+ state->s[2] ^= state->s[0];
+ state->s[3] ^= state->s[1];
+ state->s[1] ^= state->s[2];
+ state->s[0] ^= state->s[3];
+
+ state->s[2] ^= t;
+
+ state->s[3] = rotl64(state->s[3], 45);
+
+ return result;
+}
+
+static inline uint64_t le64dec(const void *pp)
+{
+ const uint8_t *p = (uint8_t const *)pp;
+ return ((uint64_t)(p[0]) | ((uint64_t)(p[1]) << 8) |
+ ((uint64_t)(p[2]) << 16) | ((uint64_t)(p[3]) << 24)) |
+ ((uint64_t)(p[4]) << 32) | ((uint64_t)(p[5]) << 40) |
+ ((uint64_t)(p[6]) << 48) | ((uint64_t)(p[7]) << 56);
+}
+
+static int compute_rank(const uint_fast16_t A[64][64])
+{
+ double B[64][64];
+ for (int i = 0; i < 64; ++i){
+ for(int j = 0; j < 64; ++j){
+ B[i][j] = A[i][j];
+ }
+ }
+
+ int rank = 0;
+ bool row_selected[64] = {};
+
+ for (int i = 0; i < 64; ++i) {
+ int j;
+ for (j = 0; j < 64; ++j) {
+ if (!row_selected[j] && fabs(B[j][i]) > EPS)
+ break;
+ }
+ if (j != 64) {
+ ++rank;
+ row_selected[j] = true;
+ for (int p = i + 1; p < 64; ++p)
+ B[j][p] /= B[j][i];
+ for (int k = 0; k < 64; ++k) {
+ if (k != j && fabs(B[k][i]) > EPS) {
+ for (int p = i + 1; p < 64; ++p)
+ B[k][p] -= B[j][p] * B[k][i];
+ }
+ }
+ }
+ }
+ return rank;
+}
+
+static inline bool is_full_rank(const uint_fast16_t matrix[64][64])
+{
+ return compute_rank(matrix) == 64;
+}
+
+static inline void generate_matrix(uint_fast16_t matrix[64][64], struct xoshiro_state *state) {
+ do {
+ for (int i = 0; i < 64; ++i) {
+ for (int j = 0; j < 64; j += 16) {
+ uint64_t value = xoshiro_gen(state);
+ for (int shift = 0; shift < 16; ++shift) {
+ matrix[i][j + shift] = (value >> (4*shift)) & 0xF;
+ }
+ }
+ }
+ } while (!is_full_rank(matrix));
+}
+
+static void heavyhash(const uint_fast16_t matrix[64][64], void* pdata, size_t pdata_len, void* output)
+{
+ uint8_t hash_first[32] __attribute__((aligned(32)));
+ uint8_t hash_second[32] __attribute__((aligned(32)));
+ uint8_t hash_xored[32] __attribute__((aligned(32)));
+
+ uint_fast16_t vector[64] __attribute__((aligned(64)));
+ uint_fast16_t product[64] __attribute__((aligned(64)));
+
+ sha3_256((uint8_t*) hash_first, 32, pdata, pdata_len);
+
+ for (int i = 0; i < 32; ++i) {
+ vector[2*i] = (hash_first[i] >> 4);
+ vector[2*i+1] = hash_first[i] & 0xF;
+ }
+
+ for (int i = 0; i < 64; ++i) {
+ uint_fast16_t sum = 0;
+ for (int j = 0; j < 64; ++j) {
+ sum += matrix[i][j] * vector[j];
+ }
+ product[i] = (sum >> 10);
+ }
+
+ for (int i = 0; i < 32; ++i) {
+ hash_second[i] = (product[2*i] << 4) | (product[2*i+1]);
+ }
+
+ for (int i = 0; i < 32; ++i) {
+ hash_xored[i] = hash_first[i] ^ hash_second[i];
+ }
+ sha3_256(output, 32, hash_xored, 32);
+}
+
+void heavyhash_hash(const char* input, char* output, uint32_t len)
+{
+ uint_fast16_t matrix[64][64] __attribute__((aligned(64)));
+ uint32_t seed[8] __attribute__((aligned(64)));
+
+ sha3_256((void*)seed, 32, (void*)(input + 4), 32);
+
+ struct xoshiro_state state;
+ for (int i = 0; i < 4; ++i)
+ {
+ state.s[i] = le64dec(seed + 2*i);
+ }
+
+ generate_matrix(matrix, &state);
+
+ heavyhash(matrix, (void*)input, len, output);
+
+}
diff --git a/stratum/algos/heavyhash/heavyhash.h b/stratum/algos/heavyhash/heavyhash.h
new file mode 100644
index 000000000..ea2441fe8
--- /dev/null
+++ b/stratum/algos/heavyhash/heavyhash.h
@@ -0,0 +1,21 @@
+#ifndef OPOWPOOL_HEAVYHASH_H
+#define OPOWPOOL_HEAVYHASH_H
+
+
+
+//void compute_blockheader_heavyhash(uint32_t* block_header, void* output);
+
+// yiimp format
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include
+void heavyhash_hash(const char* input, char* output, uint32_t len);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //OPOWPOOL_HEAVYHASH_H
\ No newline at end of file
diff --git a/stratum/algos/heavyhash/keccak_tiny.c b/stratum/algos/heavyhash/keccak_tiny.c
new file mode 100644
index 000000000..bbf6e668e
--- /dev/null
+++ b/stratum/algos/heavyhash/keccak_tiny.c
@@ -0,0 +1,155 @@
+#include "keccak_tiny.h"
+
+#include
+#include
+#include
+#include
+
+/******** The Keccak-f[1600] permutation ********/
+
+/*** Constants. ***/
+static const uint8_t rho[24] = \
+ { 1, 3, 6, 10, 15, 21,
+ 28, 36, 45, 55, 2, 14,
+ 27, 41, 56, 8, 25, 43,
+ 62, 18, 39, 61, 20, 44};
+static const uint8_t pi[24] = \
+ {10, 7, 11, 17, 18, 3,
+ 5, 16, 8, 21, 24, 4,
+ 15, 23, 19, 13, 12, 2,
+ 20, 14, 22, 9, 6, 1};
+static const uint64_t RC[24] = \
+ {1ULL, 0x8082ULL, 0x800000000000808aULL, 0x8000000080008000ULL,
+ 0x808bULL, 0x80000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL,
+ 0x8aULL, 0x88ULL, 0x80008009ULL, 0x8000000aULL,
+ 0x8000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL,
+ 0x8000000000008002ULL, 0x8000000000000080ULL, 0x800aULL, 0x800000008000000aULL,
+ 0x8000000080008081ULL, 0x8000000000008080ULL, 0x80000001ULL, 0x8000000080008008ULL};
+
+/*** Helper macros to unroll the permutation. ***/
+#define rol(x, s) (((x) << s) | ((x) >> (64 - s)))
+#define REPEAT6(e) e e e e e e
+#define REPEAT24(e) REPEAT6(e e e e)
+#define REPEAT5(e) e e e e e
+#define FOR5(v, s, e) \
+ v = 0; \
+ REPEAT5(e; v += s;)
+
+/*** Keccak-f[1600] ***/
+static inline void keccakf(void* state) {
+ uint64_t* a = (uint64_t*)state;
+ uint64_t b[5] = {0};
+ uint64_t t = 0;
+ uint8_t x, y, i = 0;
+
+ REPEAT24(
+ // Theta
+ FOR5(x, 1,
+ b[x] = 0;
+ FOR5(y, 5,
+ b[x] ^= a[x + y]; ))
+ FOR5(x, 1,
+ FOR5(y, 5,
+ a[y + x] ^= b[(x + 4) % 5] ^ rol(b[(x + 1) % 5], 1); ))
+ // Rho and pi
+ t = a[1];
+ x = 0;
+ REPEAT24(b[0] = a[pi[x]];
+ a[pi[x]] = rol(t, rho[x]);
+ t = b[0];
+ x++; )
+ // Chi
+ FOR5(y,
+ 5,
+ FOR5(x, 1,
+ b[x] = a[y + x];)
+ FOR5(x, 1,
+ a[y + x] = b[x] ^ ((~b[(x + 1) % 5]) & b[(x + 2) % 5]); ))
+ // Iota
+ a[0] ^= RC[i];
+ i++; )
+}
+
+/******** The FIPS202-defined functions. ********/
+
+/*** Some helper macros. ***/
+
+#define _(S) do { S } while (0)
+#define FOR(i, ST, L, S) \
+ _(for (size_t i = 0; i < L; i += ST) { S; })
+#define mkapply_ds(NAME, S) \
+ static inline void NAME(uint8_t* dst, \
+ const uint8_t* src, \
+ size_t len) { \
+ FOR(i, 1, len, S); \
+ }
+#define mkapply_sd(NAME, S) \
+ static inline void NAME(const uint8_t* src, \
+ uint8_t* dst, \
+ size_t len) { \
+ FOR(i, 1, len, S); \
+ }
+
+mkapply_ds(xorin, dst[i] ^= src[i]) // xorin
+mkapply_sd(setout, dst[i] = src[i]) // setout
+
+#define P keccakf
+#define Plen 200
+
+// Fold P*F over the full blocks of an input.
+#define foldP(I, L, F) \
+ while (L >= rate) { \
+ F(a, I, rate); \
+ P(a); \
+ I += rate; \
+ L -= rate; \
+ }
+
+/** The sponge-based hash construction. **/
+static inline int hash(uint8_t* out, size_t outlen,
+ const uint8_t* in, size_t inlen,
+ size_t rate, uint8_t delim) {
+ if ((out == NULL) || ((in == NULL) && inlen != 0) || (rate >= Plen)) {
+ return -1;
+ }
+ uint8_t a[Plen] = {0};
+ // Absorb input.
+ foldP(in, inlen, xorin);
+ // Xor in the DS and pad frame.
+ a[inlen] ^= delim;
+ a[rate - 1] ^= 0x80;
+ // Xor in the last block.
+ xorin(a, in, inlen);
+ // Apply P
+ P(a);
+ // Squeeze output.
+ foldP(out, outlen, setout);
+ setout(a, out, outlen);
+ memset(a, 0, 200);
+ return 0;
+}
+
+/*** Helper macros to define SHA3 and SHAKE instances. ***/
+#define defshake(bits) \
+ int shake##bits(uint8_t* out, size_t outlen, \
+ const uint8_t* in, size_t inlen) { \
+ return hash(out, outlen, in, inlen, 200 - (bits / 4), 0x1f); \
+ }
+#define defsha3(bits) \
+ int sha3_##bits(uint8_t* out, size_t outlen, \
+ const uint8_t* in, size_t inlen) { \
+ if (outlen > (bits/8)) { \
+ return -1; \
+ } \
+ return hash(out, outlen, in, inlen, 200 - (bits / 4), 0x06); \
+ }
+
+/*** FIPS202 SHAKE VOFs ***/
+defshake(128)
+defshake(256)
+
+/*** FIPS202 SHA3 FOFs ***/
+defsha3(224)
+defsha3(256)
+defsha3(384)
+defsha3(512)
\ No newline at end of file
diff --git a/stratum/algos/heavyhash/keccak_tiny.h b/stratum/algos/heavyhash/keccak_tiny.h
new file mode 100644
index 000000000..a04867c22
--- /dev/null
+++ b/stratum/algos/heavyhash/keccak_tiny.h
@@ -0,0 +1,29 @@
+#ifndef KECCAK_FIPS202_H
+#define KECCAK_FIPS202_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#include
+#include
+
+#define decshake(bits) \
+ int shake##bits(uint8_t*, size_t, const uint8_t*, size_t);
+
+#define decsha3(bits) \
+ int sha3_##bits(uint8_t*, size_t, const uint8_t*, size_t);
+
+decshake(128)
+decshake(256)
+decsha3(224)
+decsha3(256)
+decsha3(384)
+decsha3(512)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
\ No newline at end of file
diff --git a/stratum/algos/honeycomb/facet_five.c b/stratum/algos/honeycomb/facet_five.c
new file mode 100644
index 000000000..f4509a922
--- /dev/null
+++ b/stratum/algos/honeycomb/facet_five.c
@@ -0,0 +1,1042 @@
+
+
+#include
+#include
+#include
+
+#include "facet_five.h"
+
+#ifdef __cplusplus
+ extern "C"{
+#endif
+
+
+#ifdef _MSC_VER
+ #pragma warning (disable: 4146)
+#endif
+
+typedef bee_u32 u32;
+typedef bee_s32 s32;
+#define C32 BEE_C32
+#define T32 BEE_T32
+#define ROL32 BEE_ROTL32
+
+#define XCAT(x, y) XCAT_(x, y)
+#define XCAT_(x, y) x ## y
+
+/*
+ * The powers of 41 modulo 257. We use exponents from 0 to 255, inclusive.
+ */
+static const s32 alpha_tab[] = {
+ 1, 41, 139, 45, 46, 87, 226, 14, 60, 147, 116, 130,
+ 190, 80, 196, 69, 2, 82, 21, 90, 92, 174, 195, 28,
+ 120, 37, 232, 3, 123, 160, 135, 138, 4, 164, 42, 180,
+ 184, 91, 133, 56, 240, 74, 207, 6, 246, 63, 13, 19,
+ 8, 71, 84, 103, 111, 182, 9, 112, 223, 148, 157, 12,
+ 235, 126, 26, 38, 16, 142, 168, 206, 222, 107, 18, 224,
+ 189, 39, 57, 24, 213, 252, 52, 76, 32, 27, 79, 155,
+ 187, 214, 36, 191, 121, 78, 114, 48, 169, 247, 104, 152,
+ 64, 54, 158, 53, 117, 171, 72, 125, 242, 156, 228, 96,
+ 81, 237, 208, 47, 128, 108, 59, 106, 234, 85, 144, 250,
+ 227, 55, 199, 192, 162, 217, 159, 94, 256, 216, 118, 212,
+ 211, 170, 31, 243, 197, 110, 141, 127, 67, 177, 61, 188,
+ 255, 175, 236, 167, 165, 83, 62, 229, 137, 220, 25, 254,
+ 134, 97, 122, 119, 253, 93, 215, 77, 73, 166, 124, 201,
+ 17, 183, 50, 251, 11, 194, 244, 238, 249, 186, 173, 154,
+ 146, 75, 248, 145, 34, 109, 100, 245, 22, 131, 231, 219,
+ 241, 115, 89, 51, 35, 150, 239, 33, 68, 218, 200, 233,
+ 44, 5, 205, 181, 225, 230, 178, 102, 70, 43, 221, 66,
+ 136, 179, 143, 209, 88, 10, 153, 105, 193, 203, 99, 204,
+ 140, 86, 185, 132, 15, 101, 29, 161, 176, 20, 49, 210,
+ 129, 149, 198, 151, 23, 172, 113, 7, 30, 202, 58, 65,
+ 95, 40, 98, 163
+};
+
+/*
+ * Ranges:
+ * REDS1: from -32768..98302 to -383..383
+ * REDS2: from -2^31..2^31-1 to -32768..98302
+ */
+#define REDS1(x) (((x) & 0xFF) - ((x) >> 8))
+#define REDS2(x) (((x) & 0xFFFF) + ((x) >> 16))
+
+/*
+ * If, upon entry, the values of q[] are all in the -N..N range (where
+ * N >= 98302) then the new values of q[] are in the -2N..2N range.
+ *
+ * Since alpha_tab[v] <= 256, maximum allowed range is for N = 8388608.
+ */
+#define FFT_LOOP(rb, hk, as, id) do { \
+ size_t u, v; \
+ s32 m = q[(rb)]; \
+ s32 n = q[(rb) + (hk)]; \
+ q[(rb)] = m + n; \
+ q[(rb) + (hk)] = m - n; \
+ u = v = 0; \
+ goto id; \
+ for (; u < (hk); u += 4, v += 4 * (as)) { \
+ s32 t; \
+ m = q[(rb) + u + 0]; \
+ n = q[(rb) + u + 0 + (hk)]; \
+ t = REDS2(n * alpha_tab[v + 0 * (as)]); \
+ q[(rb) + u + 0] = m + t; \
+ q[(rb) + u + 0 + (hk)] = m - t; \
+ id: \
+ m = q[(rb) + u + 1]; \
+ n = q[(rb) + u + 1 + (hk)]; \
+ t = REDS2(n * alpha_tab[v + 1 * (as)]); \
+ q[(rb) + u + 1] = m + t; \
+ q[(rb) + u + 1 + (hk)] = m - t; \
+ m = q[(rb) + u + 2]; \
+ n = q[(rb) + u + 2 + (hk)]; \
+ t = REDS2(n * alpha_tab[v + 2 * (as)]); \
+ q[(rb) + u + 2] = m + t; \
+ q[(rb) + u + 2 + (hk)] = m - t; \
+ m = q[(rb) + u + 3]; \
+ n = q[(rb) + u + 3 + (hk)]; \
+ t = REDS2(n * alpha_tab[v + 3 * (as)]); \
+ q[(rb) + u + 3] = m + t; \
+ q[(rb) + u + 3 + (hk)] = m - t; \
+ } \
+ } while (0)
+
+/*
+ * Output ranges:
+ * d0: min= 0 max= 1020
+ * d1: min= -67 max= 4587
+ * d2: min=-4335 max= 4335
+ * d3: min=-4147 max= 507
+ * d4: min= -510 max= 510
+ * d5: min= -252 max= 4402
+ * d6: min=-4335 max= 4335
+ * d7: min=-4332 max= 322
+ */
+#define FFT8(xb, xs, d) do { \
+ s32 x0 = x[(xb)]; \
+ s32 x1 = x[(xb) + (xs)]; \
+ s32 x2 = x[(xb) + 2 * (xs)]; \
+ s32 x3 = x[(xb) + 3 * (xs)]; \
+ s32 a0 = x0 + x2; \
+ s32 a1 = x0 + (x2 << 4); \
+ s32 a2 = x0 - x2; \
+ s32 a3 = x0 - (x2 << 4); \
+ s32 b0 = x1 + x3; \
+ s32 b1 = REDS1((x1 << 2) + (x3 << 6)); \
+ s32 b2 = (x1 << 4) - (x3 << 4); \
+ s32 b3 = REDS1((x1 << 6) + (x3 << 2)); \
+ d ## 0 = a0 + b0; \
+ d ## 1 = a1 + b1; \
+ d ## 2 = a2 + b2; \
+ d ## 3 = a3 + b3; \
+ d ## 4 = a0 - b0; \
+ d ## 5 = a1 - b1; \
+ d ## 6 = a2 - b2; \
+ d ## 7 = a3 - b3; \
+ } while (0)
+
+/*
+ * When k=16, we have alpha=2. Multiplication by alpha^i is then reduced
+ * to some shifting.
+ *
+ * Output: within -591471..591723
+ */
+#define FFT16(xb, xs, rb) do { \
+ s32 d1_0, d1_1, d1_2, d1_3, d1_4, d1_5, d1_6, d1_7; \
+ s32 d2_0, d2_1, d2_2, d2_3, d2_4, d2_5, d2_6, d2_7; \
+ FFT8(xb, (xs) << 1, d1_); \
+ FFT8((xb) + (xs), (xs) << 1, d2_); \
+ q[(rb) + 0] = d1_0 + d2_0; \
+ q[(rb) + 1] = d1_1 + (d2_1 << 1); \
+ q[(rb) + 2] = d1_2 + (d2_2 << 2); \
+ q[(rb) + 3] = d1_3 + (d2_3 << 3); \
+ q[(rb) + 4] = d1_4 + (d2_4 << 4); \
+ q[(rb) + 5] = d1_5 + (d2_5 << 5); \
+ q[(rb) + 6] = d1_6 + (d2_6 << 6); \
+ q[(rb) + 7] = d1_7 + (d2_7 << 7); \
+ q[(rb) + 8] = d1_0 - d2_0; \
+ q[(rb) + 9] = d1_1 - (d2_1 << 1); \
+ q[(rb) + 10] = d1_2 - (d2_2 << 2); \
+ q[(rb) + 11] = d1_3 - (d2_3 << 3); \
+ q[(rb) + 12] = d1_4 - (d2_4 << 4); \
+ q[(rb) + 13] = d1_5 - (d2_5 << 5); \
+ q[(rb) + 14] = d1_6 - (d2_6 << 6); \
+ q[(rb) + 15] = d1_7 - (d2_7 << 7); \
+ } while (0)
+
+/*
+ * Output range: |q| <= 1183446
+ */
+#define FFT32(xb, xs, rb, id) do { \
+ FFT16(xb, (xs) << 1, rb); \
+ FFT16((xb) + (xs), (xs) << 1, (rb) + 16); \
+ FFT_LOOP(rb, 16, 8, id); \
+ } while (0)
+
+/*
+ * Output range: |q| <= 2366892
+ */
+#define FFT64(xb, xs, rb, id) do { \
+ FFT32(xb, (xs) << 1, rb, XCAT(id, a)); \
+ FFT32((xb) + (xs), (xs) << 1, (rb) + 32, XCAT(id, b)); \
+ FFT_LOOP(rb, 32, 4, id); \
+ } while (0)
+
+
+/*
+ * Output range: |q| <= 4733784
+ */
+#define FFT128(xb, xs, rb, id) do { \
+ FFT64(xb, (xs) << 1, rb, XCAT(id, a)); \
+ FFT64((xb) + (xs), (xs) << 1, (rb) + 64, XCAT(id, b)); \
+ FFT_LOOP(rb, 64, 2, id); \
+ } while (0)
+
+
+/*
+ * For SIMD-384 / SIMD-512, the fully unrolled FFT yields a compression
+ * function which does not fit in the 32 kB L1 cache of a typical x86
+ * Intel. We therefore add a function call layer at the FFT64 level.
+ */
+
+static void fft64(unsigned char *x, size_t xs, s32 *q)
+{
+ size_t xd;
+
+ xd = xs << 1;
+ FFT32(0, xd, 0, label_a);
+ FFT32(xs, xd, 32, label_b);
+ FFT_LOOP(0, 32, 4, label_);
+}
+
+/*
+ * Output range: |q| <= 9467568
+ */
+#define FFT256(xb, xs, rb, id) do { \
+ fft64(x + (xb) + ((xs) * 0), (xs) << 2, &q[(rb) + 0]); \
+ fft64(x + (xb) + ((xs) * 2), (xs) << 2, &q[(rb) + 64]); \
+ FFT_LOOP(rb, 64, 2, XCAT(id, aa)); \
+ fft64(x + (xb) + ((xs) * 1), (xs) << 2, &q[(rb) + 128]); \
+ fft64(x + (xb) + ((xs) * 3), (xs) << 2, &q[(rb) + 192]); \
+ FFT_LOOP((rb) + 128, 64, 2, XCAT(id, ab)); \
+ FFT_LOOP(rb, 128, 1, XCAT(id, a)); \
+ } while (0)
+
+/*
+ * alpha^(127*i) mod 257
+ */
+static const unsigned short yoff_s_n[] = {
+ 1, 98, 95, 58, 30, 113, 23, 198, 129, 49, 176, 29,
+ 15, 185, 140, 99, 193, 153, 88, 143, 136, 221, 70, 178,
+ 225, 205, 44, 200, 68, 239, 35, 89, 241, 231, 22, 100,
+ 34, 248, 146, 173, 249, 244, 11, 50, 17, 124, 73, 215,
+ 253, 122, 134, 25, 137, 62, 165, 236, 255, 61, 67, 141,
+ 197, 31, 211, 118, 256, 159, 162, 199, 227, 144, 234, 59,
+ 128, 208, 81, 228, 242, 72, 117, 158, 64, 104, 169, 114,
+ 121, 36, 187, 79, 32, 52, 213, 57, 189, 18, 222, 168,
+ 16, 26, 235, 157, 223, 9, 111, 84, 8, 13, 246, 207,
+ 240, 133, 184, 42, 4, 135, 123, 232, 120, 195, 92, 21,
+ 2, 196, 190, 116, 60, 226, 46, 139
+};
+
+/*
+ * alpha^(127*i) + alpha^(125*i) mod 257
+ */
+static const unsigned short yoff_s_f[] = {
+ 2, 156, 118, 107, 45, 212, 111, 162, 97, 249, 211, 3,
+ 49, 101, 151, 223, 189, 178, 253, 204, 76, 82, 232, 65,
+ 96, 176, 161, 47, 189, 61, 248, 107, 0, 131, 133, 113,
+ 17, 33, 12, 111, 251, 103, 57, 148, 47, 65, 249, 143,
+ 189, 8, 204, 230, 205, 151, 187, 227, 247, 111, 140, 6,
+ 77, 10, 21, 149, 255, 101, 139, 150, 212, 45, 146, 95,
+ 160, 8, 46, 254, 208, 156, 106, 34, 68, 79, 4, 53,
+ 181, 175, 25, 192, 161, 81, 96, 210, 68, 196, 9, 150,
+ 0, 126, 124, 144, 240, 224, 245, 146, 6, 154, 200, 109,
+ 210, 192, 8, 114, 68, 249, 53, 27, 52, 106, 70, 30,
+ 10, 146, 117, 251, 180, 247, 236, 108
+};
+
+/*
+ * beta^(255*i) mod 257
+ */
+static const unsigned short yoff_b_n[] = {
+ 1, 163, 98, 40, 95, 65, 58, 202, 30, 7, 113, 172,
+ 23, 151, 198, 149, 129, 210, 49, 20, 176, 161, 29, 101,
+ 15, 132, 185, 86, 140, 204, 99, 203, 193, 105, 153, 10,
+ 88, 209, 143, 179, 136, 66, 221, 43, 70, 102, 178, 230,
+ 225, 181, 205, 5, 44, 233, 200, 218, 68, 33, 239, 150,
+ 35, 51, 89, 115, 241, 219, 231, 131, 22, 245, 100, 109,
+ 34, 145, 248, 75, 146, 154, 173, 186, 249, 238, 244, 194,
+ 11, 251, 50, 183, 17, 201, 124, 166, 73, 77, 215, 93,
+ 253, 119, 122, 97, 134, 254, 25, 220, 137, 229, 62, 83,
+ 165, 167, 236, 175, 255, 188, 61, 177, 67, 127, 141, 110,
+ 197, 243, 31, 170, 211, 212, 118, 216, 256, 94, 159, 217,
+ 162, 192, 199, 55, 227, 250, 144, 85, 234, 106, 59, 108,
+ 128, 47, 208, 237, 81, 96, 228, 156, 242, 125, 72, 171,
+ 117, 53, 158, 54, 64, 152, 104, 247, 169, 48, 114, 78,
+ 121, 191, 36, 214, 187, 155, 79, 27, 32, 76, 52, 252,
+ 213, 24, 57, 39, 189, 224, 18, 107, 222, 206, 168, 142,
+ 16, 38, 26, 126, 235, 12, 157, 148, 223, 112, 9, 182,
+ 111, 103, 84, 71, 8, 19, 13, 63, 246, 6, 207, 74,
+ 240, 56, 133, 91, 184, 180, 42, 164, 4, 138, 135, 160,
+ 123, 3, 232, 37, 120, 28, 195, 174, 92, 90, 21, 82,
+ 2, 69, 196, 80, 190, 130, 116, 147, 60, 14, 226, 87,
+ 46, 45, 139, 41
+};
+
+/*
+ * beta^(255*i) + beta^(253*i) mod 257
+ */
+static const unsigned short yoff_b_f[] = {
+ 2, 203, 156, 47, 118, 214, 107, 106, 45, 93, 212, 20,
+ 111, 73, 162, 251, 97, 215, 249, 53, 211, 19, 3, 89,
+ 49, 207, 101, 67, 151, 130, 223, 23, 189, 202, 178, 239,
+ 253, 127, 204, 49, 76, 236, 82, 137, 232, 157, 65, 79,
+ 96, 161, 176, 130, 161, 30, 47, 9, 189, 247, 61, 226,
+ 248, 90, 107, 64, 0, 88, 131, 243, 133, 59, 113, 115,
+ 17, 236, 33, 213, 12, 191, 111, 19, 251, 61, 103, 208,
+ 57, 35, 148, 248, 47, 116, 65, 119, 249, 178, 143, 40,
+ 189, 129, 8, 163, 204, 227, 230, 196, 205, 122, 151, 45,
+ 187, 19, 227, 72, 247, 125, 111, 121, 140, 220, 6, 107,
+ 77, 69, 10, 101, 21, 65, 149, 171, 255, 54, 101, 210,
+ 139, 43, 150, 151, 212, 164, 45, 237, 146, 184, 95, 6,
+ 160, 42, 8, 204, 46, 238, 254, 168, 208, 50, 156, 190,
+ 106, 127, 34, 234, 68, 55, 79, 18, 4, 130, 53, 208,
+ 181, 21, 175, 120, 25, 100, 192, 178, 161, 96, 81, 127,
+ 96, 227, 210, 248, 68, 10, 196, 31, 9, 167, 150, 193,
+ 0, 169, 126, 14, 124, 198, 144, 142, 240, 21, 224, 44,
+ 245, 66, 146, 238, 6, 196, 154, 49, 200, 222, 109, 9,
+ 210, 141, 192, 138, 8, 79, 114, 217, 68, 128, 249, 94,
+ 53, 30, 27, 61, 52, 135, 106, 212, 70, 238, 30, 185,
+ 10, 132, 146, 136, 117, 37, 251, 150, 180, 188, 247, 156,
+ 236, 192, 108, 86
+};
+
+#define INNER(l, h, mm) (((u32)((l) * (mm)) & 0xFFFFU) \
+ + ((u32)((h) * (mm)) << 16))
+
+#define W_SMALL(sb, o1, o2, mm) \
+ (INNER(q[8 * (sb) + 2 * 0 + o1], q[8 * (sb) + 2 * 0 + o2], mm), \
+ INNER(q[8 * (sb) + 2 * 1 + o1], q[8 * (sb) + 2 * 1 + o2], mm), \
+ INNER(q[8 * (sb) + 2 * 2 + o1], q[8 * (sb) + 2 * 2 + o2], mm), \
+ INNER(q[8 * (sb) + 2 * 3 + o1], q[8 * (sb) + 2 * 3 + o2], mm)
+
+#define WS_0_0 W_SMALL( 4, 0, 1, 185)
+#define WS_0_1 W_SMALL( 6, 0, 1, 185)
+#define WS_0_2 W_SMALL( 0, 0, 1, 185)
+#define WS_0_3 W_SMALL( 2, 0, 1, 185)
+#define WS_0_4 W_SMALL( 7, 0, 1, 185)
+#define WS_0_5 W_SMALL( 5, 0, 1, 185)
+#define WS_0_6 W_SMALL( 3, 0, 1, 185)
+#define WS_0_7 W_SMALL( 1, 0, 1, 185)
+#define WS_1_0 W_SMALL(15, 0, 1, 185)
+#define WS_1_1 W_SMALL(11, 0, 1, 185)
+#define WS_1_2 W_SMALL(12, 0, 1, 185)
+#define WS_1_3 W_SMALL( 8, 0, 1, 185)
+#define WS_1_4 W_SMALL( 9, 0, 1, 185)
+#define WS_1_5 W_SMALL(13, 0, 1, 185)
+#define WS_1_6 W_SMALL(10, 0, 1, 185)
+#define WS_1_7 W_SMALL(14, 0, 1, 185)
+#define WS_2_0 W_SMALL(17, -128, -64, 233)
+#define WS_2_1 W_SMALL(18, -128, -64, 233)
+#define WS_2_2 W_SMALL(23, -128, -64, 233)
+#define WS_2_3 W_SMALL(20, -128, -64, 233)
+#define WS_2_4 W_SMALL(22, -128, -64, 233)
+#define WS_2_5 W_SMALL(21, -128, -64, 233)
+#define WS_2_6 W_SMALL(16, -128, -64, 233)
+#define WS_2_7 W_SMALL(19, -128, -64, 233)
+#define WS_3_0 W_SMALL(30, -191, -127, 233)
+#define WS_3_1 W_SMALL(24, -191, -127, 233)
+#define WS_3_2 W_SMALL(25, -191, -127, 233)
+#define WS_3_3 W_SMALL(31, -191, -127, 233)
+#define WS_3_4 W_SMALL(27, -191, -127, 233)
+#define WS_3_5 W_SMALL(29, -191, -127, 233)
+#define WS_3_6 W_SMALL(28, -191, -127, 233)
+#define WS_3_7 W_SMALL(26, -191, -127, 233)
+
+#define W_BIG(sb, o1, o2, mm) \
+ (INNER(q[16 * (sb) + 2 * 0 + o1], q[16 * (sb) + 2 * 0 + o2], mm), \
+ INNER(q[16 * (sb) + 2 * 1 + o1], q[16 * (sb) + 2 * 1 + o2], mm), \
+ INNER(q[16 * (sb) + 2 * 2 + o1], q[16 * (sb) + 2 * 2 + o2], mm), \
+ INNER(q[16 * (sb) + 2 * 3 + o1], q[16 * (sb) + 2 * 3 + o2], mm), \
+ INNER(q[16 * (sb) + 2 * 4 + o1], q[16 * (sb) + 2 * 4 + o2], mm), \
+ INNER(q[16 * (sb) + 2 * 5 + o1], q[16 * (sb) + 2 * 5 + o2], mm), \
+ INNER(q[16 * (sb) + 2 * 6 + o1], q[16 * (sb) + 2 * 6 + o2], mm), \
+ INNER(q[16 * (sb) + 2 * 7 + o1], q[16 * (sb) + 2 * 7 + o2], mm)
+
+#define WB_0_0 W_BIG( 4, 0, 1, 185)
+#define WB_0_1 W_BIG( 6, 0, 1, 185)
+#define WB_0_2 W_BIG( 0, 0, 1, 185)
+#define WB_0_3 W_BIG( 2, 0, 1, 185)
+#define WB_0_4 W_BIG( 7, 0, 1, 185)
+#define WB_0_5 W_BIG( 5, 0, 1, 185)
+#define WB_0_6 W_BIG( 3, 0, 1, 185)
+#define WB_0_7 W_BIG( 1, 0, 1, 185)
+#define WB_1_0 W_BIG(15, 0, 1, 185)
+#define WB_1_1 W_BIG(11, 0, 1, 185)
+#define WB_1_2 W_BIG(12, 0, 1, 185)
+#define WB_1_3 W_BIG( 8, 0, 1, 185)
+#define WB_1_4 W_BIG( 9, 0, 1, 185)
+#define WB_1_5 W_BIG(13, 0, 1, 185)
+#define WB_1_6 W_BIG(10, 0, 1, 185)
+#define WB_1_7 W_BIG(14, 0, 1, 185)
+#define WB_2_0 W_BIG(17, -256, -128, 233)
+#define WB_2_1 W_BIG(18, -256, -128, 233)
+#define WB_2_2 W_BIG(23, -256, -128, 233)
+#define WB_2_3 W_BIG(20, -256, -128, 233)
+#define WB_2_4 W_BIG(22, -256, -128, 233)
+#define WB_2_5 W_BIG(21, -256, -128, 233)
+#define WB_2_6 W_BIG(16, -256, -128, 233)
+#define WB_2_7 W_BIG(19, -256, -128, 233)
+#define WB_3_0 W_BIG(30, -383, -255, 233)
+#define WB_3_1 W_BIG(24, -383, -255, 233)
+#define WB_3_2 W_BIG(25, -383, -255, 233)
+#define WB_3_3 W_BIG(31, -383, -255, 233)
+#define WB_3_4 W_BIG(27, -383, -255, 233)
+#define WB_3_5 W_BIG(29, -383, -255, 233)
+#define WB_3_6 W_BIG(28, -383, -255, 233)
+#define WB_3_7 W_BIG(26, -383, -255, 233)
+
+#define IF(x, y, z) ((((y) ^ (z)) & (x)) ^ (z))
+#define MAJ(x, y, z) (((x) & (y)) | (((x) | (y)) & (z)))
+
+#define PP4_0_0 1
+#define PP4_0_1 0
+#define PP4_0_2 3
+#define PP4_0_3 2
+#define PP4_1_0 2
+#define PP4_1_1 3
+#define PP4_1_2 0
+#define PP4_1_3 1
+#define PP4_2_0 3
+#define PP4_2_1 2
+#define PP4_2_2 1
+#define PP4_2_3 0
+
+#define PP8_0_0 1
+#define PP8_0_1 0
+#define PP8_0_2 3
+#define PP8_0_3 2
+#define PP8_0_4 5
+#define PP8_0_5 4
+#define PP8_0_6 7
+#define PP8_0_7 6
+
+#define PP8_1_0 6
+#define PP8_1_1 7
+#define PP8_1_2 4
+#define PP8_1_3 5
+#define PP8_1_4 2
+#define PP8_1_5 3
+#define PP8_1_6 0
+#define PP8_1_7 1
+
+#define PP8_2_0 2
+#define PP8_2_1 3
+#define PP8_2_2 0
+#define PP8_2_3 1
+#define PP8_2_4 6
+#define PP8_2_5 7
+#define PP8_2_6 4
+#define PP8_2_7 5
+
+#define PP8_3_0 3
+#define PP8_3_1 2
+#define PP8_3_2 1
+#define PP8_3_3 0
+#define PP8_3_4 7
+#define PP8_3_5 6
+#define PP8_3_6 5
+#define PP8_3_7 4
+
+#define PP8_4_0 5
+#define PP8_4_1 4
+#define PP8_4_2 7
+#define PP8_4_3 6
+#define PP8_4_4 1
+#define PP8_4_5 0
+#define PP8_4_6 3
+#define PP8_4_7 2
+
+#define PP8_5_0 7
+#define PP8_5_1 6
+#define PP8_5_2 5
+#define PP8_5_3 4
+#define PP8_5_4 3
+#define PP8_5_5 2
+#define PP8_5_6 1
+#define PP8_5_7 0
+
+#define PP8_6_0 4
+#define PP8_6_1 5
+#define PP8_6_2 6
+#define PP8_6_3 7
+#define PP8_6_4 0
+#define PP8_6_5 1
+#define PP8_6_6 2
+#define PP8_6_7 3
+
+#if BEE_SIMD_NOCOPY
+
+#define DECL_STATE_SMALL
+#define READ_STATE_SMALL(sc)
+#define WRITE_STATE_SMALL(sc)
+#define DECL_STATE_BIG
+#define READ_STATE_BIG(sc)
+#define WRITE_STATE_BIG(sc)
+
+#else
+
+#define DECL_STATE_SMALL \
+ u32 A0, A1, A2, A3, B0, B1, B2, B3, C0, C1, C2, C3, D0, D1, D2, D3;
+
+#define READ_STATE_SMALL(sc) do { \
+ A0 = (sc)->state[ 0]; \
+ A1 = (sc)->state[ 1]; \
+ A2 = (sc)->state[ 2]; \
+ A3 = (sc)->state[ 3]; \
+ B0 = (sc)->state[ 4]; \
+ B1 = (sc)->state[ 5]; \
+ B2 = (sc)->state[ 6]; \
+ B3 = (sc)->state[ 7]; \
+ C0 = (sc)->state[ 8]; \
+ C1 = (sc)->state[ 9]; \
+ C2 = (sc)->state[10]; \
+ C3 = (sc)->state[11]; \
+ D0 = (sc)->state[12]; \
+ D1 = (sc)->state[13]; \
+ D2 = (sc)->state[14]; \
+ D3 = (sc)->state[15]; \
+ } while (0)
+
+#define WRITE_STATE_SMALL(sc) do { \
+ (sc)->state[ 0] = A0; \
+ (sc)->state[ 1] = A1; \
+ (sc)->state[ 2] = A2; \
+ (sc)->state[ 3] = A3; \
+ (sc)->state[ 4] = B0; \
+ (sc)->state[ 5] = B1; \
+ (sc)->state[ 6] = B2; \
+ (sc)->state[ 7] = B3; \
+ (sc)->state[ 8] = C0; \
+ (sc)->state[ 9] = C1; \
+ (sc)->state[10] = C2; \
+ (sc)->state[11] = C3; \
+ (sc)->state[12] = D0; \
+ (sc)->state[13] = D1; \
+ (sc)->state[14] = D2; \
+ (sc)->state[15] = D3; \
+ } while (0)
+
+#define DECL_STATE_BIG \
+ u32 A0, A1, A2, A3, A4, A5, A6, A7; \
+ u32 B0, B1, B2, B3, B4, B5, B6, B7; \
+ u32 C0, C1, C2, C3, C4, C5, C6, C7; \
+ u32 D0, D1, D2, D3, D4, D5, D6, D7;
+
+#define READ_STATE_BIG(sc) do { \
+ A0 = (sc)->state[ 0]; \
+ A1 = (sc)->state[ 1]; \
+ A2 = (sc)->state[ 2]; \
+ A3 = (sc)->state[ 3]; \
+ A4 = (sc)->state[ 4]; \
+ A5 = (sc)->state[ 5]; \
+ A6 = (sc)->state[ 6]; \
+ A7 = (sc)->state[ 7]; \
+ B0 = (sc)->state[ 8]; \
+ B1 = (sc)->state[ 9]; \
+ B2 = (sc)->state[10]; \
+ B3 = (sc)->state[11]; \
+ B4 = (sc)->state[12]; \
+ B5 = (sc)->state[13]; \
+ B6 = (sc)->state[14]; \
+ B7 = (sc)->state[15]; \
+ C0 = (sc)->state[16]; \
+ C1 = (sc)->state[17]; \
+ C2 = (sc)->state[18]; \
+ C3 = (sc)->state[19]; \
+ C4 = (sc)->state[20]; \
+ C5 = (sc)->state[21]; \
+ C6 = (sc)->state[22]; \
+ C7 = (sc)->state[23]; \
+ D0 = (sc)->state[24]; \
+ D1 = (sc)->state[25]; \
+ D2 = (sc)->state[26]; \
+ D3 = (sc)->state[27]; \
+ D4 = (sc)->state[28]; \
+ D5 = (sc)->state[29]; \
+ D6 = (sc)->state[30]; \
+ D7 = (sc)->state[31]; \
+ } while (0)
+
+#define WRITE_STATE_BIG(sc) do { \
+ (sc)->state[ 0] = A0; \
+ (sc)->state[ 1] = A1; \
+ (sc)->state[ 2] = A2; \
+ (sc)->state[ 3] = A3; \
+ (sc)->state[ 4] = A4; \
+ (sc)->state[ 5] = A5; \
+ (sc)->state[ 6] = A6; \
+ (sc)->state[ 7] = A7; \
+ (sc)->state[ 8] = B0; \
+ (sc)->state[ 9] = B1; \
+ (sc)->state[10] = B2; \
+ (sc)->state[11] = B3; \
+ (sc)->state[12] = B4; \
+ (sc)->state[13] = B5; \
+ (sc)->state[14] = B6; \
+ (sc)->state[15] = B7; \
+ (sc)->state[16] = C0; \
+ (sc)->state[17] = C1; \
+ (sc)->state[18] = C2; \
+ (sc)->state[19] = C3; \
+ (sc)->state[20] = C4; \
+ (sc)->state[21] = C5; \
+ (sc)->state[22] = C6; \
+ (sc)->state[23] = C7; \
+ (sc)->state[24] = D0; \
+ (sc)->state[25] = D1; \
+ (sc)->state[26] = D2; \
+ (sc)->state[27] = D3; \
+ (sc)->state[28] = D4; \
+ (sc)->state[29] = D5; \
+ (sc)->state[30] = D6; \
+ (sc)->state[31] = D7; \
+ } while (0)
+
+#endif
+
+#define STEP_ELT(n, w, fun, s, ppb) do { \
+ u32 tt = T32(D ## n + (w) + fun(A ## n, B ## n, C ## n)); \
+ A ## n = T32(ROL32(tt, s) + XCAT(tA, XCAT(ppb, n))); \
+ D ## n = C ## n; \
+ C ## n = B ## n; \
+ B ## n = tA ## n; \
+ } while (0)
+
+#define STEP_SMALL(w0, w1, w2, w3, fun, r, s, pp4b) do { \
+ u32 tA0 = ROL32(A0, r); \
+ u32 tA1 = ROL32(A1, r); \
+ u32 tA2 = ROL32(A2, r); \
+ u32 tA3 = ROL32(A3, r); \
+ STEP_ELT(0, w0, fun, s, pp4b); \
+ STEP_ELT(1, w1, fun, s, pp4b); \
+ STEP_ELT(2, w2, fun, s, pp4b); \
+ STEP_ELT(3, w3, fun, s, pp4b); \
+ } while (0)
+
+#define STEP_BIG(w0, w1, w2, w3, w4, w5, w6, w7, fun, r, s, pp8b) do { \
+ u32 tA0 = ROL32(A0, r); \
+ u32 tA1 = ROL32(A1, r); \
+ u32 tA2 = ROL32(A2, r); \
+ u32 tA3 = ROL32(A3, r); \
+ u32 tA4 = ROL32(A4, r); \
+ u32 tA5 = ROL32(A5, r); \
+ u32 tA6 = ROL32(A6, r); \
+ u32 tA7 = ROL32(A7, r); \
+ STEP_ELT(0, w0, fun, s, pp8b); \
+ STEP_ELT(1, w1, fun, s, pp8b); \
+ STEP_ELT(2, w2, fun, s, pp8b); \
+ STEP_ELT(3, w3, fun, s, pp8b); \
+ STEP_ELT(4, w4, fun, s, pp8b); \
+ STEP_ELT(5, w5, fun, s, pp8b); \
+ STEP_ELT(6, w6, fun, s, pp8b); \
+ STEP_ELT(7, w7, fun, s, pp8b); \
+ } while (0)
+
+#define M3_0_0 0_
+#define M3_1_0 1_
+#define M3_2_0 2_
+#define M3_3_0 0_
+#define M3_4_0 1_
+#define M3_5_0 2_
+#define M3_6_0 0_
+#define M3_7_0 1_
+
+#define M3_0_1 1_
+#define M3_1_1 2_
+#define M3_2_1 0_
+#define M3_3_1 1_
+#define M3_4_1 2_
+#define M3_5_1 0_
+#define M3_6_1 1_
+#define M3_7_1 2_
+
+#define M3_0_2 2_
+#define M3_1_2 0_
+#define M3_2_2 1_
+#define M3_3_2 2_
+#define M3_4_2 0_
+#define M3_5_2 1_
+#define M3_6_2 2_
+#define M3_7_2 0_
+
+#define STEP_SMALL_(w, fun, r, s, pp4b) STEP_SMALL w, fun, r, s, pp4b)
+
+#define ONE_ROUND_SMALL(ri, isp, p0, p1, p2, p3) do { \
+ STEP_SMALL_(WS_ ## ri ## 0, \
+ IF, p0, p1, XCAT(PP4_, M3_0_ ## isp)); \
+ STEP_SMALL_(WS_ ## ri ## 1, \
+ IF, p1, p2, XCAT(PP4_, M3_1_ ## isp)); \
+ STEP_SMALL_(WS_ ## ri ## 2, \
+ IF, p2, p3, XCAT(PP4_, M3_2_ ## isp)); \
+ STEP_SMALL_(WS_ ## ri ## 3, \
+ IF, p3, p0, XCAT(PP4_, M3_3_ ## isp)); \
+ STEP_SMALL_(WS_ ## ri ## 4, \
+ MAJ, p0, p1, XCAT(PP4_, M3_4_ ## isp)); \
+ STEP_SMALL_(WS_ ## ri ## 5, \
+ MAJ, p1, p2, XCAT(PP4_, M3_5_ ## isp)); \
+ STEP_SMALL_(WS_ ## ri ## 6, \
+ MAJ, p2, p3, XCAT(PP4_, M3_6_ ## isp)); \
+ STEP_SMALL_(WS_ ## ri ## 7, \
+ MAJ, p3, p0, XCAT(PP4_, M3_7_ ## isp)); \
+ } while (0)
+
+#define M7_0_0 0_
+#define M7_1_0 1_
+#define M7_2_0 2_
+#define M7_3_0 3_
+#define M7_4_0 4_
+#define M7_5_0 5_
+#define M7_6_0 6_
+#define M7_7_0 0_
+
+#define M7_0_1 1_
+#define M7_1_1 2_
+#define M7_2_1 3_
+#define M7_3_1 4_
+#define M7_4_1 5_
+#define M7_5_1 6_
+#define M7_6_1 0_
+#define M7_7_1 1_
+
+#define M7_0_2 2_
+#define M7_1_2 3_
+#define M7_2_2 4_
+#define M7_3_2 5_
+#define M7_4_2 6_
+#define M7_5_2 0_
+#define M7_6_2 1_
+#define M7_7_2 2_
+
+#define M7_0_3 3_
+#define M7_1_3 4_
+#define M7_2_3 5_
+#define M7_3_3 6_
+#define M7_4_3 0_
+#define M7_5_3 1_
+#define M7_6_3 2_
+#define M7_7_3 3_
+
+#define STEP_BIG_(w, fun, r, s, pp8b) STEP_BIG w, fun, r, s, pp8b)
+
+#define ONE_ROUND_BIG(ri, isp, p0, p1, p2, p3) do { \
+ STEP_BIG_(WB_ ## ri ## 0, \
+ IF, p0, p1, XCAT(PP8_, M7_0_ ## isp)); \
+ STEP_BIG_(WB_ ## ri ## 1, \
+ IF, p1, p2, XCAT(PP8_, M7_1_ ## isp)); \
+ STEP_BIG_(WB_ ## ri ## 2, \
+ IF, p2, p3, XCAT(PP8_, M7_2_ ## isp)); \
+ STEP_BIG_(WB_ ## ri ## 3, \
+ IF, p3, p0, XCAT(PP8_, M7_3_ ## isp)); \
+ STEP_BIG_(WB_ ## ri ## 4, \
+ MAJ, p0, p1, XCAT(PP8_, M7_4_ ## isp)); \
+ STEP_BIG_(WB_ ## ri ## 5, \
+ MAJ, p1, p2, XCAT(PP8_, M7_5_ ## isp)); \
+ STEP_BIG_(WB_ ## ri ## 6, \
+ MAJ, p2, p3, XCAT(PP8_, M7_6_ ## isp)); \
+ STEP_BIG_(WB_ ## ri ## 7, \
+ MAJ, p3, p0, XCAT(PP8_, M7_7_ ## isp)); \
+ } while (0)
+
+
+
+
+#if BEE_SIMD_NOCOPY
+#define A0 (sc->state[ 0])
+#define A1 (sc->state[ 1])
+#define A2 (sc->state[ 2])
+#define A3 (sc->state[ 3])
+#define A4 (sc->state[ 4])
+#define A5 (sc->state[ 5])
+#define A6 (sc->state[ 6])
+#define A7 (sc->state[ 7])
+#define B0 (sc->state[ 8])
+#define B1 (sc->state[ 9])
+#define B2 (sc->state[10])
+#define B3 (sc->state[11])
+#define B4 (sc->state[12])
+#define B5 (sc->state[13])
+#define B6 (sc->state[14])
+#define B7 (sc->state[15])
+#define C0 (sc->state[16])
+#define C1 (sc->state[17])
+#define C2 (sc->state[18])
+#define C3 (sc->state[19])
+#define C4 (sc->state[20])
+#define C5 (sc->state[21])
+#define C6 (sc->state[22])
+#define C7 (sc->state[23])
+#define D0 (sc->state[24])
+#define D1 (sc->state[25])
+#define D2 (sc->state[26])
+#define D3 (sc->state[27])
+#define D4 (sc->state[28])
+#define D5 (sc->state[29])
+#define D6 (sc->state[30])
+#define D7 (sc->state[31])
+#endif
+
+static void five_compress(facet_five_context *sc, int last)
+{
+ unsigned char *x;
+ s32 q[256];
+ int i;
+ DECL_STATE_BIG
+#if BEE_SIMD_NOCOPY
+ bee_u32 saved[32];
+#endif
+
+#if BEE_SIMD_NOCOPY
+ memcpy(saved, sc->state, sizeof saved);
+#endif
+
+ x = sc->buf;
+ FFT256(0, 1, 0, ll);
+ if (last) {
+ for (i = 0; i < 256; i ++) {
+ s32 tq;
+
+ tq = q[i] + yoff_b_f[i];
+ tq = REDS2(tq);
+ tq = REDS1(tq);
+ tq = REDS1(tq);
+ q[i] = (tq <= 128 ? tq : tq - 257);
+ }
+ } else {
+ for (i = 0; i < 256; i ++) {
+ s32 tq;
+
+ tq = q[i] + yoff_b_n[i];
+ tq = REDS2(tq);
+ tq = REDS1(tq);
+ tq = REDS1(tq);
+ q[i] = (tq <= 128 ? tq : tq - 257);
+ }
+ }
+ READ_STATE_BIG(sc);
+ A0 ^= bee_dec32le_aligned(x + 0);
+ A1 ^= bee_dec32le_aligned(x + 4);
+ A2 ^= bee_dec32le_aligned(x + 8);
+ A3 ^= bee_dec32le_aligned(x + 12);
+ A4 ^= bee_dec32le_aligned(x + 16);
+ A5 ^= bee_dec32le_aligned(x + 20);
+ A6 ^= bee_dec32le_aligned(x + 24);
+ A7 ^= bee_dec32le_aligned(x + 28);
+ B0 ^= bee_dec32le_aligned(x + 32);
+ B1 ^= bee_dec32le_aligned(x + 36);
+ B2 ^= bee_dec32le_aligned(x + 40);
+ B3 ^= bee_dec32le_aligned(x + 44);
+ B4 ^= bee_dec32le_aligned(x + 48);
+ B5 ^= bee_dec32le_aligned(x + 52);
+ B6 ^= bee_dec32le_aligned(x + 56);
+ B7 ^= bee_dec32le_aligned(x + 60);
+ C0 ^= bee_dec32le_aligned(x + 64);
+ C1 ^= bee_dec32le_aligned(x + 68);
+ C2 ^= bee_dec32le_aligned(x + 72);
+ C3 ^= bee_dec32le_aligned(x + 76);
+ C4 ^= bee_dec32le_aligned(x + 80);
+ C5 ^= bee_dec32le_aligned(x + 84);
+ C6 ^= bee_dec32le_aligned(x + 88);
+ C7 ^= bee_dec32le_aligned(x + 92);
+ D0 ^= bee_dec32le_aligned(x + 96);
+ D1 ^= bee_dec32le_aligned(x + 100);
+ D2 ^= bee_dec32le_aligned(x + 104);
+ D3 ^= bee_dec32le_aligned(x + 108);
+ D4 ^= bee_dec32le_aligned(x + 112);
+ D5 ^= bee_dec32le_aligned(x + 116);
+ D6 ^= bee_dec32le_aligned(x + 120);
+ D7 ^= bee_dec32le_aligned(x + 124);
+
+ ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27);
+ ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7);
+ ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5);
+ ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25);
+#if BEE_SIMD_NOCOPY
+ STEP_BIG(
+ saved[ 0], saved[ 1], saved[ 2], saved[ 3],
+ saved[ 4], saved[ 5], saved[ 6], saved[ 7],
+ IF, 4, 13, PP8_4_);
+ STEP_BIG(
+ saved[ 8], saved[ 9], saved[10], saved[11],
+ saved[12], saved[13], saved[14], saved[15],
+ IF, 13, 10, PP8_5_);
+ STEP_BIG(
+ saved[16], saved[17], saved[18], saved[19],
+ saved[20], saved[21], saved[22], saved[23],
+ IF, 10, 25, PP8_6_);
+ STEP_BIG(
+ saved[24], saved[25], saved[26], saved[27],
+ saved[28], saved[29], saved[30], saved[31],
+ IF, 25, 4, PP8_0_);
+#else
+ STEP_BIG(
+ sc->state[ 0], sc->state[ 1], sc->state[ 2], sc->state[ 3],
+ sc->state[ 4], sc->state[ 5], sc->state[ 6], sc->state[ 7],
+ IF, 4, 13, PP8_4_);
+ STEP_BIG(
+ sc->state[ 8], sc->state[ 9], sc->state[10], sc->state[11],
+ sc->state[12], sc->state[13], sc->state[14], sc->state[15],
+ IF, 13, 10, PP8_5_);
+ STEP_BIG(
+ sc->state[16], sc->state[17], sc->state[18], sc->state[19],
+ sc->state[20], sc->state[21], sc->state[22], sc->state[23],
+ IF, 10, 25, PP8_6_);
+ STEP_BIG(
+ sc->state[24], sc->state[25], sc->state[26], sc->state[27],
+ sc->state[28], sc->state[29], sc->state[30], sc->state[31],
+ IF, 25, 4, PP8_0_);
+ WRITE_STATE_BIG(sc);
+#endif
+}
+
+#if BEE_SIMD_NOCOPY
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef A4
+#undef A5
+#undef A6
+#undef A7
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+#undef B4
+#undef B5
+#undef B6
+#undef B7
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+#undef D0
+#undef D1
+#undef D2
+#undef D3
+#undef D4
+#undef D5
+#undef D6
+#undef D7
+#endif
+
+
+static const u32 IV512[] = {
+ C32(0x0BA16B95), C32(0x72F999AD), C32(0x9FECC2AE), C32(0xBA3264FC),
+ C32(0x5E894929), C32(0x8E9F30E5), C32(0x2F1DAA37), C32(0xF0F2C558),
+ C32(0xAC506643), C32(0xA90635A5), C32(0xE25B878B), C32(0xAAB7878F),
+ C32(0x88817F7A), C32(0x0A02892B), C32(0x559A7550), C32(0x598F657E),
+ C32(0x7EEF60A1), C32(0x6B70E3E8), C32(0x9C1714D1), C32(0xB958E2A8),
+ C32(0xAB02675E), C32(0xED1C014F), C32(0xCD8D65BB), C32(0xFDB7A257),
+ C32(0x09254899), C32(0xD699C7BC), C32(0x9019B6DC), C32(0x2B9022E4),
+ C32(0x8FA14956), C32(0x21BF9BD3), C32(0xB94D0943), C32(0x6FFDDC22)
+};
+
+static void five_init(void *cc, const u32 *iv)
+{
+ facet_five_context *sc;
+
+ sc = cc;
+ memcpy(sc->state, iv, sizeof sc->state);
+ sc->count_low = sc->count_high = 0;
+ sc->ptr = 0;
+}
+
+
+static void five_update(void *cc, const void *data, size_t len)
+{
+ facet_five_context *sc;
+
+ sc = cc;
+ while (len > 0) {
+ size_t clen;
+
+ clen = (sizeof sc->buf) - sc->ptr;
+ if (clen > len)
+ clen = len;
+ memcpy(sc->buf + sc->ptr, data, clen);
+ data = (const unsigned char *)data + clen;
+ len -= clen;
+ if ((sc->ptr += clen) == sizeof sc->buf) {
+ five_compress(sc, 0);
+ sc->ptr = 0;
+ sc->count_low = T32(sc->count_low + 1);
+ if (sc->count_low == 0)
+ sc->count_high ++;
+ }
+ }
+}
+
+static void five_encode_count(unsigned char *dst,
+ u32 low, u32 high, size_t ptr, unsigned n)
+{
+ low = T32(low << 10);
+ high = T32(high << 10) + (low >> 22);
+ low += (ptr << 3) + n;
+ bee_enc32le(dst, low);
+ bee_enc32le(dst + 4, high);
+}
+
+static void five_finalize(void *cc, unsigned ub, unsigned n, void *dst, size_t dst_len)
+{
+ facet_five_context *sc;
+ unsigned char *d;
+ size_t u;
+
+ sc = cc;
+ if (sc->ptr > 0 || n > 0) {
+ memset(sc->buf + sc->ptr, 0,
+ (sizeof sc->buf) - sc->ptr);
+ sc->buf[sc->ptr] = ub & (0xFF << (8 - n));
+ five_compress(sc, 0);
+ }
+ memset(sc->buf, 0, sizeof sc->buf);
+ five_encode_count(sc->buf, sc->count_low, sc->count_high, sc->ptr, n);
+ five_compress(sc, 1);
+ d = dst;
+ for (d = dst, u = 0; u < dst_len; u ++)
+ bee_enc32le(d + (u << 2), sc->state[u]);
+}
+
+/* see facet_four.h */
+void facet_five_init(void *cc)
+{
+ five_init(cc, IV512);
+}
+
+/* see facet_four.h */
+void facet_five(void *cc, const void *data, size_t len)
+{
+ five_update(cc, data, len);
+}
+
+/* see facet_four.h */
+void facet_five_close(void *cc, void *dst)
+{
+ facet_five_addbits_and_close(cc, 0, 0, dst);
+}
+
+/* see facet_four.h */
+void facet_five_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+ five_finalize(cc, ub, n, dst, 16);
+ facet_five_init(cc);
+}
+
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/stratum/algos/honeycomb/facet_five.h b/stratum/algos/honeycomb/facet_five.h
new file mode 100644
index 000000000..67fc69f7d
--- /dev/null
+++ b/stratum/algos/honeycomb/facet_five.h
@@ -0,0 +1,79 @@
+#ifndef FACET_FIVE_H
+#define FACET_FIVE_H
+
+#ifdef __cplusplus
+ extern "C"{
+#endif
+
+#include
+#include "honeycomb_types.h"
+
+
+//#undef BEE_64 //
+
+
+/**
+ * This structure is a context for HoneyComb Facet #5 computations: it contains the
+ * intermediate values and some data from the last entered block. Once
+ * an HoneyComb Facet #5 computation has been performed, the context can be reused for
+ * another computation. This specific structure is used for HoneyComb Facet #5.
+ *
+ * The contents of this structure are private. A running HoneyComb Facet #5 computation
+ * can be cloned by copying the context (e.g. with a simple memcpy() ).
+ */
+typedef struct {
+ unsigned char buf[128]; /* first field, for alignment */
+ size_t ptr;
+ bee_u32 state[32];
+ bee_u32 count_low, count_high;
+} facet_five_context;
+
+
+/**
+ * Initialize an HoneyComb Facet #5 context. This process performs no memory allocation.
+ *
+ * @param cc the HoneyComb Facet #5 context (pointer to a facet_five_context)
+ */
+void facet_five_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that len is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc the HoneyComb Facet #5 context
+ * @param data the input data
+ * @param len the input data length (in bytes)
+ */
+void facet_five(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current HoneyComb Facet #5 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically reinitialized.
+ *
+ * @param cc the HoneyComb Facet #5 context
+ * @param dst the destination buffer
+ */
+void facet_five_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (64 bytes). If bit number i
+ * in ub has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc the HoneyComb Facet #5 context
+ * @param ub the extra bits
+ * @param n the number of extra bits (0 to 7)
+ * @param dst the destination buffer
+ */
+void facet_five_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/stratum/algos/honeycomb/facet_four.c b/stratum/algos/honeycomb/facet_four.c
new file mode 100644
index 000000000..f37264b36
--- /dev/null
+++ b/stratum/algos/honeycomb/facet_four.c
@@ -0,0 +1,780 @@
+#include
+#include
+
+#include "facet_four.h"
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+
+#ifdef _MSC_VER
+ #pragma warning (disable: 4146)
+#endif
+
+#define C32 BEE_C32
+
+/*
+ * As of round 2 of the SHA-3 competition, the published reference
+ * implementation and test vectors are wrong, because they use
+ * big-endian AES tables while the internal decoding uses little-endian.
+ * The code below follows the specification. To turn it into a code
+ * which follows the reference implementation (the one called "BugFix"
+ * on the SHAvite-3 web site, published on Nov 23rd, 2009), comment out
+ * the code below (from the '#define AES_BIG_ENDIAN...' to the definition
+ * of the AES_ROUND_NOKEY macro) and replace it with the version which
+ * is commented out afterwards.
+ */
+
+#define AES_BIG_ENDIAN 0
+#include "facets_helper.c"
+
+static const bee_u32 IV512[] = {
+ C32(0x72FCCDD8), C32(0x79CA4727), C32(0x128A077B), C32(0x40D55AEC),
+ C32(0xD1901A06), C32(0x430AE307), C32(0xB29F5CD1), C32(0xDF07FBFC),
+ C32(0x8E45D73D), C32(0x681AB538), C32(0xBDE86578), C32(0xDD577E47),
+ C32(0xE275EADE), C32(0x502D9FCD), C32(0xB9357178), C32(0x022A4B9A)
+};
+
+#define AES_ROUND_NOKEY(x0, x1, x2, x3) do { \
+ bee_u32 t0 = (x0); \
+ bee_u32 t1 = (x1); \
+ bee_u32 t2 = (x2); \
+ bee_u32 t3 = (x3); \
+ AES_ROUND_NOKEY_LE(t0, t1, t2, t3, x0, x1, x2, x3); \
+ } while (0)
+
+/*
+ * This is the code needed to match the "reference implementation" as
+ * published on Nov 23rd, 2009, instead of the published specification.
+ *
+
+#define AES_BIG_ENDIAN 1
+#include "aes_helper.c"
+
+static const bee_u32 IV512[] = {
+ C32(0xD5652B63), C32(0x25F1E6EA), C32(0xB18F48FA), C32(0xA1EE3A47),
+ C32(0xC8B67B07), C32(0xBDCE48D3), C32(0xE3937B78), C32(0x05DB5186),
+ C32(0x613BE326), C32(0xA11FA303), C32(0x90C833D4), C32(0x79CEE316),
+ C32(0x1E1AF00F), C32(0x2829B165), C32(0x23B25F80), C32(0x21E11499)
+};
+
+#define AES_ROUND_NOKEY(x0, x1, x2, x3) do { \
+ bee_u32 t0 = (x0); \
+ bee_u32 t1 = (x1); \
+ bee_u32 t2 = (x2); \
+ bee_u32 t3 = (x3); \
+ AES_ROUND_NOKEY_BE(t0, t1, t2, t3, x0, x1, x2, x3); \
+ } while (0)
+
+ */
+
+#define KEY_EXPAND_ELT(k0, k1, k2, k3) do { \
+ bee_u32 kt; \
+ AES_ROUND_NOKEY(k1, k2, k3, k0); \
+ kt = (k0); \
+ (k0) = (k1); \
+ (k1) = (k2); \
+ (k2) = (k3); \
+ (k3) = kt; \
+ } while (0)
+
+
+
+
+/*
+ * This function assumes that "msg" is aligned for 32-bit access.
+ */
+static void c512(facet_four_context *sc, const void *msg)
+{
+ bee_u32 p0, p1, p2, p3, p4, p5, p6, p7;
+ bee_u32 p8, p9, pA, pB, pC, pD, pE, pF;
+ bee_u32 x0, x1, x2, x3;
+ bee_u32 rk00, rk01, rk02, rk03, rk04, rk05, rk06, rk07;
+ bee_u32 rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F;
+ bee_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17;
+ bee_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F;
+ int r;
+
+ p0 = sc->h[0x0];
+ p1 = sc->h[0x1];
+ p2 = sc->h[0x2];
+ p3 = sc->h[0x3];
+ p4 = sc->h[0x4];
+ p5 = sc->h[0x5];
+ p6 = sc->h[0x6];
+ p7 = sc->h[0x7];
+ p8 = sc->h[0x8];
+ p9 = sc->h[0x9];
+ pA = sc->h[0xA];
+ pB = sc->h[0xB];
+ pC = sc->h[0xC];
+ pD = sc->h[0xD];
+ pE = sc->h[0xE];
+ pF = sc->h[0xF];
+ /* round 0 */
+ rk00 = bee_dec32le_aligned((const unsigned char *)msg + 0);
+ x0 = p4 ^ rk00;
+ rk01 = bee_dec32le_aligned((const unsigned char *)msg + 4);
+ x1 = p5 ^ rk01;
+ rk02 = bee_dec32le_aligned((const unsigned char *)msg + 8);
+ x2 = p6 ^ rk02;
+ rk03 = bee_dec32le_aligned((const unsigned char *)msg + 12);
+ x3 = p7 ^ rk03;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ rk04 = bee_dec32le_aligned((const unsigned char *)msg + 16);
+ x0 ^= rk04;
+ rk05 = bee_dec32le_aligned((const unsigned char *)msg + 20);
+ x1 ^= rk05;
+ rk06 = bee_dec32le_aligned((const unsigned char *)msg + 24);
+ x2 ^= rk06;
+ rk07 = bee_dec32le_aligned((const unsigned char *)msg + 28);
+ x3 ^= rk07;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ rk08 = bee_dec32le_aligned((const unsigned char *)msg + 32);
+ x0 ^= rk08;
+ rk09 = bee_dec32le_aligned((const unsigned char *)msg + 36);
+ x1 ^= rk09;
+ rk0A = bee_dec32le_aligned((const unsigned char *)msg + 40);
+ x2 ^= rk0A;
+ rk0B = bee_dec32le_aligned((const unsigned char *)msg + 44);
+ x3 ^= rk0B;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ rk0C = bee_dec32le_aligned((const unsigned char *)msg + 48);
+ x0 ^= rk0C;
+ rk0D = bee_dec32le_aligned((const unsigned char *)msg + 52);
+ x1 ^= rk0D;
+ rk0E = bee_dec32le_aligned((const unsigned char *)msg + 56);
+ x2 ^= rk0E;
+ rk0F = bee_dec32le_aligned((const unsigned char *)msg + 60);
+ x3 ^= rk0F;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ p0 ^= x0;
+ p1 ^= x1;
+ p2 ^= x2;
+ p3 ^= x3;
+ rk10 = bee_dec32le_aligned((const unsigned char *)msg + 64);
+ x0 = pC ^ rk10;
+ rk11 = bee_dec32le_aligned((const unsigned char *)msg + 68);
+ x1 = pD ^ rk11;
+ rk12 = bee_dec32le_aligned((const unsigned char *)msg + 72);
+ x2 = pE ^ rk12;
+ rk13 = bee_dec32le_aligned((const unsigned char *)msg + 76);
+ x3 = pF ^ rk13;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ rk14 = bee_dec32le_aligned((const unsigned char *)msg + 80);
+ x0 ^= rk14;
+ rk15 = bee_dec32le_aligned((const unsigned char *)msg + 84);
+ x1 ^= rk15;
+ rk16 = bee_dec32le_aligned((const unsigned char *)msg + 88);
+ x2 ^= rk16;
+ rk17 = bee_dec32le_aligned((const unsigned char *)msg + 92);
+ x3 ^= rk17;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ rk18 = bee_dec32le_aligned((const unsigned char *)msg + 96);
+ x0 ^= rk18;
+ rk19 = bee_dec32le_aligned((const unsigned char *)msg + 100);
+ x1 ^= rk19;
+ rk1A = bee_dec32le_aligned((const unsigned char *)msg + 104);
+ x2 ^= rk1A;
+ rk1B = bee_dec32le_aligned((const unsigned char *)msg + 108);
+ x3 ^= rk1B;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ rk1C = bee_dec32le_aligned((const unsigned char *)msg + 112);
+ x0 ^= rk1C;
+ rk1D = bee_dec32le_aligned((const unsigned char *)msg + 116);
+ x1 ^= rk1D;
+ rk1E = bee_dec32le_aligned((const unsigned char *)msg + 120);
+ x2 ^= rk1E;
+ rk1F = bee_dec32le_aligned((const unsigned char *)msg + 124);
+ x3 ^= rk1F;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ p8 ^= x0;
+ p9 ^= x1;
+ pA ^= x2;
+ pB ^= x3;
+
+ for (r = 0; r < 3; r ++) {
+ /* round 1, 5, 9 */
+ KEY_EXPAND_ELT(rk00, rk01, rk02, rk03);
+ rk00 ^= rk1C;
+ rk01 ^= rk1D;
+ rk02 ^= rk1E;
+ rk03 ^= rk1F;
+ if (r == 0) {
+ rk00 ^= sc->count0;
+ rk01 ^= sc->count1;
+ rk02 ^= sc->count2;
+ rk03 ^= BEE_T32(~sc->count3);
+ }
+ x0 = p0 ^ rk00;
+ x1 = p1 ^ rk01;
+ x2 = p2 ^ rk02;
+ x3 = p3 ^ rk03;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ KEY_EXPAND_ELT(rk04, rk05, rk06, rk07);
+ rk04 ^= rk00;
+ rk05 ^= rk01;
+ rk06 ^= rk02;
+ rk07 ^= rk03;
+ if (r == 1) {
+ rk04 ^= sc->count3;
+ rk05 ^= sc->count2;
+ rk06 ^= sc->count1;
+ rk07 ^= BEE_T32(~sc->count0);
+ }
+ x0 ^= rk04;
+ x1 ^= rk05;
+ x2 ^= rk06;
+ x3 ^= rk07;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ KEY_EXPAND_ELT(rk08, rk09, rk0A, rk0B);
+ rk08 ^= rk04;
+ rk09 ^= rk05;
+ rk0A ^= rk06;
+ rk0B ^= rk07;
+ x0 ^= rk08;
+ x1 ^= rk09;
+ x2 ^= rk0A;
+ x3 ^= rk0B;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ KEY_EXPAND_ELT(rk0C, rk0D, rk0E, rk0F);
+ rk0C ^= rk08;
+ rk0D ^= rk09;
+ rk0E ^= rk0A;
+ rk0F ^= rk0B;
+ x0 ^= rk0C;
+ x1 ^= rk0D;
+ x2 ^= rk0E;
+ x3 ^= rk0F;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ pC ^= x0;
+ pD ^= x1;
+ pE ^= x2;
+ pF ^= x3;
+ KEY_EXPAND_ELT(rk10, rk11, rk12, rk13);
+ rk10 ^= rk0C;
+ rk11 ^= rk0D;
+ rk12 ^= rk0E;
+ rk13 ^= rk0F;
+ x0 = p8 ^ rk10;
+ x1 = p9 ^ rk11;
+ x2 = pA ^ rk12;
+ x3 = pB ^ rk13;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ KEY_EXPAND_ELT(rk14, rk15, rk16, rk17);
+ rk14 ^= rk10;
+ rk15 ^= rk11;
+ rk16 ^= rk12;
+ rk17 ^= rk13;
+ x0 ^= rk14;
+ x1 ^= rk15;
+ x2 ^= rk16;
+ x3 ^= rk17;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ KEY_EXPAND_ELT(rk18, rk19, rk1A, rk1B);
+ rk18 ^= rk14;
+ rk19 ^= rk15;
+ rk1A ^= rk16;
+ rk1B ^= rk17;
+ x0 ^= rk18;
+ x1 ^= rk19;
+ x2 ^= rk1A;
+ x3 ^= rk1B;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ KEY_EXPAND_ELT(rk1C, rk1D, rk1E, rk1F);
+ rk1C ^= rk18;
+ rk1D ^= rk19;
+ rk1E ^= rk1A;
+ rk1F ^= rk1B;
+ if (r == 2) {
+ rk1C ^= sc->count2;
+ rk1D ^= sc->count3;
+ rk1E ^= sc->count0;
+ rk1F ^= BEE_T32(~sc->count1);
+ }
+ x0 ^= rk1C;
+ x1 ^= rk1D;
+ x2 ^= rk1E;
+ x3 ^= rk1F;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ p4 ^= x0;
+ p5 ^= x1;
+ p6 ^= x2;
+ p7 ^= x3;
+ /* round 2, 6, 10 */
+ rk00 ^= rk19;
+ x0 = pC ^ rk00;
+ rk01 ^= rk1A;
+ x1 = pD ^ rk01;
+ rk02 ^= rk1B;
+ x2 = pE ^ rk02;
+ rk03 ^= rk1C;
+ x3 = pF ^ rk03;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ rk04 ^= rk1D;
+ x0 ^= rk04;
+ rk05 ^= rk1E;
+ x1 ^= rk05;
+ rk06 ^= rk1F;
+ x2 ^= rk06;
+ rk07 ^= rk00;
+ x3 ^= rk07;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ rk08 ^= rk01;
+ x0 ^= rk08;
+ rk09 ^= rk02;
+ x1 ^= rk09;
+ rk0A ^= rk03;
+ x2 ^= rk0A;
+ rk0B ^= rk04;
+ x3 ^= rk0B;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ rk0C ^= rk05;
+ x0 ^= rk0C;
+ rk0D ^= rk06;
+ x1 ^= rk0D;
+ rk0E ^= rk07;
+ x2 ^= rk0E;
+ rk0F ^= rk08;
+ x3 ^= rk0F;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ p8 ^= x0;
+ p9 ^= x1;
+ pA ^= x2;
+ pB ^= x3;
+ rk10 ^= rk09;
+ x0 = p4 ^ rk10;
+ rk11 ^= rk0A;
+ x1 = p5 ^ rk11;
+ rk12 ^= rk0B;
+ x2 = p6 ^ rk12;
+ rk13 ^= rk0C;
+ x3 = p7 ^ rk13;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ rk14 ^= rk0D;
+ x0 ^= rk14;
+ rk15 ^= rk0E;
+ x1 ^= rk15;
+ rk16 ^= rk0F;
+ x2 ^= rk16;
+ rk17 ^= rk10;
+ x3 ^= rk17;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ rk18 ^= rk11;
+ x0 ^= rk18;
+ rk19 ^= rk12;
+ x1 ^= rk19;
+ rk1A ^= rk13;
+ x2 ^= rk1A;
+ rk1B ^= rk14;
+ x3 ^= rk1B;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ rk1C ^= rk15;
+ x0 ^= rk1C;
+ rk1D ^= rk16;
+ x1 ^= rk1D;
+ rk1E ^= rk17;
+ x2 ^= rk1E;
+ rk1F ^= rk18;
+ x3 ^= rk1F;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ p0 ^= x0;
+ p1 ^= x1;
+ p2 ^= x2;
+ p3 ^= x3;
+ /* round 3, 7, 11 */
+ KEY_EXPAND_ELT(rk00, rk01, rk02, rk03);
+ rk00 ^= rk1C;
+ rk01 ^= rk1D;
+ rk02 ^= rk1E;
+ rk03 ^= rk1F;
+ x0 = p8 ^ rk00;
+ x1 = p9 ^ rk01;
+ x2 = pA ^ rk02;
+ x3 = pB ^ rk03;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ KEY_EXPAND_ELT(rk04, rk05, rk06, rk07);
+ rk04 ^= rk00;
+ rk05 ^= rk01;
+ rk06 ^= rk02;
+ rk07 ^= rk03;
+ x0 ^= rk04;
+ x1 ^= rk05;
+ x2 ^= rk06;
+ x3 ^= rk07;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ KEY_EXPAND_ELT(rk08, rk09, rk0A, rk0B);
+ rk08 ^= rk04;
+ rk09 ^= rk05;
+ rk0A ^= rk06;
+ rk0B ^= rk07;
+ x0 ^= rk08;
+ x1 ^= rk09;
+ x2 ^= rk0A;
+ x3 ^= rk0B;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ KEY_EXPAND_ELT(rk0C, rk0D, rk0E, rk0F);
+ rk0C ^= rk08;
+ rk0D ^= rk09;
+ rk0E ^= rk0A;
+ rk0F ^= rk0B;
+ x0 ^= rk0C;
+ x1 ^= rk0D;
+ x2 ^= rk0E;
+ x3 ^= rk0F;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ p4 ^= x0;
+ p5 ^= x1;
+ p6 ^= x2;
+ p7 ^= x3;
+ KEY_EXPAND_ELT(rk10, rk11, rk12, rk13);
+ rk10 ^= rk0C;
+ rk11 ^= rk0D;
+ rk12 ^= rk0E;
+ rk13 ^= rk0F;
+ x0 = p0 ^ rk10;
+ x1 = p1 ^ rk11;
+ x2 = p2 ^ rk12;
+ x3 = p3 ^ rk13;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ KEY_EXPAND_ELT(rk14, rk15, rk16, rk17);
+ rk14 ^= rk10;
+ rk15 ^= rk11;
+ rk16 ^= rk12;
+ rk17 ^= rk13;
+ x0 ^= rk14;
+ x1 ^= rk15;
+ x2 ^= rk16;
+ x3 ^= rk17;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ KEY_EXPAND_ELT(rk18, rk19, rk1A, rk1B);
+ rk18 ^= rk14;
+ rk19 ^= rk15;
+ rk1A ^= rk16;
+ rk1B ^= rk17;
+ x0 ^= rk18;
+ x1 ^= rk19;
+ x2 ^= rk1A;
+ x3 ^= rk1B;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ KEY_EXPAND_ELT(rk1C, rk1D, rk1E, rk1F);
+ rk1C ^= rk18;
+ rk1D ^= rk19;
+ rk1E ^= rk1A;
+ rk1F ^= rk1B;
+ x0 ^= rk1C;
+ x1 ^= rk1D;
+ x2 ^= rk1E;
+ x3 ^= rk1F;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ pC ^= x0;
+ pD ^= x1;
+ pE ^= x2;
+ pF ^= x3;
+ /* round 4, 8, 12 */
+ rk00 ^= rk19;
+ x0 = p4 ^ rk00;
+ rk01 ^= rk1A;
+ x1 = p5 ^ rk01;
+ rk02 ^= rk1B;
+ x2 = p6 ^ rk02;
+ rk03 ^= rk1C;
+ x3 = p7 ^ rk03;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ rk04 ^= rk1D;
+ x0 ^= rk04;
+ rk05 ^= rk1E;
+ x1 ^= rk05;
+ rk06 ^= rk1F;
+ x2 ^= rk06;
+ rk07 ^= rk00;
+ x3 ^= rk07;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ rk08 ^= rk01;
+ x0 ^= rk08;
+ rk09 ^= rk02;
+ x1 ^= rk09;
+ rk0A ^= rk03;
+ x2 ^= rk0A;
+ rk0B ^= rk04;
+ x3 ^= rk0B;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ rk0C ^= rk05;
+ x0 ^= rk0C;
+ rk0D ^= rk06;
+ x1 ^= rk0D;
+ rk0E ^= rk07;
+ x2 ^= rk0E;
+ rk0F ^= rk08;
+ x3 ^= rk0F;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ p0 ^= x0;
+ p1 ^= x1;
+ p2 ^= x2;
+ p3 ^= x3;
+ rk10 ^= rk09;
+ x0 = pC ^ rk10;
+ rk11 ^= rk0A;
+ x1 = pD ^ rk11;
+ rk12 ^= rk0B;
+ x2 = pE ^ rk12;
+ rk13 ^= rk0C;
+ x3 = pF ^ rk13;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ rk14 ^= rk0D;
+ x0 ^= rk14;
+ rk15 ^= rk0E;
+ x1 ^= rk15;
+ rk16 ^= rk0F;
+ x2 ^= rk16;
+ rk17 ^= rk10;
+ x3 ^= rk17;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ rk18 ^= rk11;
+ x0 ^= rk18;
+ rk19 ^= rk12;
+ x1 ^= rk19;
+ rk1A ^= rk13;
+ x2 ^= rk1A;
+ rk1B ^= rk14;
+ x3 ^= rk1B;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ rk1C ^= rk15;
+ x0 ^= rk1C;
+ rk1D ^= rk16;
+ x1 ^= rk1D;
+ rk1E ^= rk17;
+ x2 ^= rk1E;
+ rk1F ^= rk18;
+ x3 ^= rk1F;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ p8 ^= x0;
+ p9 ^= x1;
+ pA ^= x2;
+ pB ^= x3;
+ }
+ /* round 13 */
+ KEY_EXPAND_ELT(rk00, rk01, rk02, rk03);
+ rk00 ^= rk1C;
+ rk01 ^= rk1D;
+ rk02 ^= rk1E;
+ rk03 ^= rk1F;
+ x0 = p0 ^ rk00;
+ x1 = p1 ^ rk01;
+ x2 = p2 ^ rk02;
+ x3 = p3 ^ rk03;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ KEY_EXPAND_ELT(rk04, rk05, rk06, rk07);
+ rk04 ^= rk00;
+ rk05 ^= rk01;
+ rk06 ^= rk02;
+ rk07 ^= rk03;
+ x0 ^= rk04;
+ x1 ^= rk05;
+ x2 ^= rk06;
+ x3 ^= rk07;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ KEY_EXPAND_ELT(rk08, rk09, rk0A, rk0B);
+ rk08 ^= rk04;
+ rk09 ^= rk05;
+ rk0A ^= rk06;
+ rk0B ^= rk07;
+ x0 ^= rk08;
+ x1 ^= rk09;
+ x2 ^= rk0A;
+ x3 ^= rk0B;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ KEY_EXPAND_ELT(rk0C, rk0D, rk0E, rk0F);
+ rk0C ^= rk08;
+ rk0D ^= rk09;
+ rk0E ^= rk0A;
+ rk0F ^= rk0B;
+ x0 ^= rk0C;
+ x1 ^= rk0D;
+ x2 ^= rk0E;
+ x3 ^= rk0F;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ pC ^= x0;
+ pD ^= x1;
+ pE ^= x2;
+ pF ^= x3;
+ KEY_EXPAND_ELT(rk10, rk11, rk12, rk13);
+ rk10 ^= rk0C;
+ rk11 ^= rk0D;
+ rk12 ^= rk0E;
+ rk13 ^= rk0F;
+ x0 = p8 ^ rk10;
+ x1 = p9 ^ rk11;
+ x2 = pA ^ rk12;
+ x3 = pB ^ rk13;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ KEY_EXPAND_ELT(rk14, rk15, rk16, rk17);
+ rk14 ^= rk10;
+ rk15 ^= rk11;
+ rk16 ^= rk12;
+ rk17 ^= rk13;
+ x0 ^= rk14;
+ x1 ^= rk15;
+ x2 ^= rk16;
+ x3 ^= rk17;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ KEY_EXPAND_ELT(rk18, rk19, rk1A, rk1B);
+ rk18 ^= rk14 ^ sc->count1;
+ rk19 ^= rk15 ^ sc->count0;
+ rk1A ^= rk16 ^ sc->count3;
+ rk1B ^= rk17 ^ BEE_T32(~sc->count2);
+ x0 ^= rk18;
+ x1 ^= rk19;
+ x2 ^= rk1A;
+ x3 ^= rk1B;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ KEY_EXPAND_ELT(rk1C, rk1D, rk1E, rk1F);
+ rk1C ^= rk18;
+ rk1D ^= rk19;
+ rk1E ^= rk1A;
+ rk1F ^= rk1B;
+ x0 ^= rk1C;
+ x1 ^= rk1D;
+ x2 ^= rk1E;
+ x3 ^= rk1F;
+ AES_ROUND_NOKEY(x0, x1, x2, x3);
+ p4 ^= x0;
+ p5 ^= x1;
+ p6 ^= x2;
+ p7 ^= x3;
+ sc->h[0x0] ^= p8;
+ sc->h[0x1] ^= p9;
+ sc->h[0x2] ^= pA;
+ sc->h[0x3] ^= pB;
+ sc->h[0x4] ^= pC;
+ sc->h[0x5] ^= pD;
+ sc->h[0x6] ^= pE;
+ sc->h[0x7] ^= pF;
+ sc->h[0x8] ^= p0;
+ sc->h[0x9] ^= p1;
+ sc->h[0xA] ^= p2;
+ sc->h[0xB] ^= p3;
+ sc->h[0xC] ^= p4;
+ sc->h[0xD] ^= p5;
+ sc->h[0xE] ^= p6;
+ sc->h[0xF] ^= p7;
+}
+
+
+
+static void four_init(facet_four_context *sc, const bee_u32 *iv)
+{
+ memcpy(sc->h, iv, sizeof sc->h);
+ sc->ptr = 0;
+ sc->count0 = 0;
+ sc->count1 = 0;
+ sc->count2 = 0;
+ sc->count3 = 0;
+}
+
+static void four_core(facet_four_context *sc, const void *data, size_t len)
+{
+ unsigned char *buf;
+ size_t ptr;
+
+ buf = sc->buf;
+ ptr = sc->ptr;
+ while (len > 0) {
+ size_t clen;
+
+ clen = (sizeof sc->buf) - ptr;
+ if (clen > len)
+ clen = len;
+ memcpy(buf + ptr, data, clen);
+ data = (const unsigned char *)data + clen;
+ ptr += clen;
+ len -= clen;
+ if (ptr == sizeof sc->buf) {
+ if ((sc->count0 = BEE_T32(sc->count0 + 1024)) == 0) {
+ sc->count1 = BEE_T32(sc->count1 + 1);
+ if (sc->count1 == 0) {
+ sc->count2 = BEE_T32(sc->count2 + 1);
+ if (sc->count2 == 0) {
+ sc->count3 = BEE_T32(
+ sc->count3 + 1);
+ }
+ }
+ }
+ c512(sc, buf);
+ ptr = 0;
+ }
+ }
+ sc->ptr = ptr;
+}
+
+static void four_close(facet_four_context *sc, unsigned ub, unsigned n, void *dst, size_t out_size_w32)
+{
+ unsigned char *buf;
+ size_t ptr, u;
+ unsigned z;
+ bee_u32 count0, count1, count2, count3;
+
+ buf = sc->buf;
+ ptr = sc->ptr;
+ count0 = (sc->count0 += (ptr << 3) + n);
+ count1 = sc->count1;
+ count2 = sc->count2;
+ count3 = sc->count3;
+ z = 0x80 >> n;
+ z = ((ub & -z) | z) & 0xFF;
+ if (ptr == 0 && n == 0) {
+ buf[0] = 0x80;
+ memset(buf + 1, 0, 109);
+ sc->count0 = sc->count1 = sc->count2 = sc->count3 = 0;
+ } else if (ptr < 110) {
+ buf[ptr ++] = z;
+ memset(buf + ptr, 0, 110 - ptr);
+ } else {
+ buf[ptr ++] = z;
+ memset(buf + ptr, 0, 128 - ptr);
+ c512(sc, buf);
+ memset(buf, 0, 110);
+ sc->count0 = sc->count1 = sc->count2 = sc->count3 = 0;
+ }
+ bee_enc32le(buf + 110, count0);
+ bee_enc32le(buf + 114, count1);
+ bee_enc32le(buf + 118, count2);
+ bee_enc32le(buf + 122, count3);
+ buf[126] = out_size_w32 << 5;
+ buf[127] = out_size_w32 >> 3;
+ c512(sc, buf);
+ for (u = 0; u < out_size_w32; u ++)
+ bee_enc32le((unsigned char *)dst + (u << 2), sc->h[u]);
+}
+
+
+/* see facet_four.h */
+void facet_four_init(void *cc)
+{
+ four_init(cc, IV512);
+}
+
+/* see facet_four.h */
+void facet_four(void *cc, const void *data, size_t len)
+{
+ four_core(cc, data, len);
+}
+
+/* see facet_four.h */
+void facet_four_close(void *cc, void *dst)
+{
+ four_close(cc, 0, 0, dst, 16);
+ four_init(cc, IV512);
+}
+
+/* see facet_four.h */
+void facet_four_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+ four_close(cc, ub, n, dst, 16);
+ four_init(cc, IV512);
+}
+
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/stratum/algos/honeycomb/facet_four.h b/stratum/algos/honeycomb/facet_four.h
new file mode 100644
index 000000000..36c524823
--- /dev/null
+++ b/stratum/algos/honeycomb/facet_four.h
@@ -0,0 +1,76 @@
+#ifndef FACET_FOUR_H
+#define FACET_FOUR_H
+
+#include
+#include "honeycomb_types.h"
+
+#ifdef __cplusplus
+ extern "C"{
+#endif
+
+//#undef BEE_64 //
+
+/**
+ * This structure is a context for HoneyComb Facet #4 computations:
+ * it contains the intermediate values and some data from the last
+ * entered block. Once a HoneyComb Facet #4 computation has been performed, the
+ * context can be reused for another computation.
+ *
+ * The contents of this structure are private. A running HoneyComb Facet #4
+ * computation can be cloned by copying the context (e.g. with a simple memcpy() ).
+ */
+typedef struct{
+ unsigned char buf[128]; /* first field, for alignment */
+ size_t ptr;
+ bee_u32 h[16];
+ bee_u32 count0, count1, count2, count3;
+}facet_four_context;
+
+
+/**
+ * Initialize a HoneyComb Facet #4 context. This process performs no memory allocation.
+ *
+ * @param cc the HoneyComb Facet #4 context (pointer to a facet_four_context)
+ */
+void facet_four_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that len is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc the HoneyComb Facet #4 context
+ * @param data the input data
+ * @param len the input data length (in bytes)
+ */
+void facet_four(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current HoneyComb Facet #4 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically reinitialized.
+ *
+ * @param cc the HoneyComb Facet #4 context
+ * @param dst the destination buffer
+ */
+void facet_four_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (64 bytes). If bit number i
+ * in ub has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte level). The context is automatically reinitialized.
+ *
+ * @param cc the HoneyComb Facet #4 context
+ * @param ub the extra bits
+ * @param n the number of extra bits (0 to 7)
+ * @param dst the destination buffer
+ */
+void facet_four_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/stratum/algos/honeycomb/facet_one.c b/stratum/algos/honeycomb/facet_one.c
new file mode 100644
index 000000000..4e4f50876
--- /dev/null
+++ b/stratum/algos/honeycomb/facet_one.c
@@ -0,0 +1,1702 @@
+
+#include
+#include
+
+#include "facet_one.h"
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+/*
+ * Parameters:
+ *
+ * BEE_KECCAK_64 use a 64-bit type
+ * BEE_KECCAK_UNROLL number of loops to unroll (0/undef for full unroll)
+ * BEE_KECCAK_INTERLEAVE use bit-interleaving (32-bit type only)
+ * BEE_KECCAK_NOCOPY do not copy the state into local variables
+ *
+ * If there is no usable 64-bit type, the code automatically switches
+ * back to the 32-bit implementation.
+ *
+ * Some tests on an Intel Core2 Q6600 (both 64-bit and 32-bit, 32 kB L1
+ * code cache), a PowerPC (G3, 32 kB L1 code cache), an ARM920T core
+ * (16 kB L1 code cache), and a small MIPS-compatible CPU (Broadcom BCM3302,
+ * 8 kB L1 code cache), seem to show that the following are optimal:
+ *
+ * -- x86, 64-bit: use the 64-bit implementation, unroll 8 rounds,
+ * do not copy the state; unrolling 2, 6 or all rounds also provides
+ * near-optimal performance.
+ * -- x86, 32-bit: use the 32-bit implementation, unroll 6 rounds,
+ * interleave, do not copy the state. Unrolling 1, 2, 4 or 8 rounds
+ * also provides near-optimal performance.
+ * -- PowerPC: use the 64-bit implementation, unroll 8 rounds,
+ * copy the state. Unrolling 4 or 6 rounds is near-optimal.
+ * -- ARM: use the 64-bit implementation, unroll 2 or 4 rounds,
+ * copy the state.
+ * -- MIPS: use the 64-bit implementation, unroll 2 rounds, copy
+ * the state. Unrolling only 1 round is also near-optimal.
+ *
+ * Also, interleaving does not always yield actual improvements when
+ * using a 32-bit implementation; in particular when the architecture
+ * does not offer a native rotation opcode (interleaving replaces one
+ * 64-bit rotation with two 32-bit rotations, which is a gain only if
+ * there is a native 32-bit rotation opcode and not a native 64-bit
+ * rotation opcode; also, interleaving implies a small overhead when
+ * processing input words).
+ *
+ * To sum up:
+ * -- when possible, use the 64-bit code
+ * -- exception: on 32-bit x86, use 32-bit code
+ * -- when using 32-bit code, use interleaving
+ * -- copy the state, except on x86
+ * -- unroll 8 rounds on "big" machine, 2 rounds on "small" machines
+ */
+
+#if BEE_SMALL_FOOTPRINT && !defined BEE_SMALL_FOOTPRINT_KECCAK
+ #define BEE_SMALL_FOOTPRINT_KECCAK 1
+#endif
+
+/*
+ * By default, we select the 64-bit implementation if a 64-bit type
+ * is available, unless a 32-bit x86 is detected.
+ */
+#if !defined BEE_KECCAK_64 && BEE_64 && !(defined __i386__ || BEE_I386_GCC || BEE_I386_MSVC )
+ #define BEE_KECCAK_64 1
+#endif
+
+/*
+ * If using a 32-bit implementation, we prefer to interleave.
+ */
+#if !BEE_KECCAK_64 && !defined BEE_KECCAK_INTERLEAVE
+ #define BEE_KECCAK_INTERLEAVE 1
+#endif
+
+/*
+ * Unroll 8 rounds on big systems, 2 rounds on small systems.
+ */
+#ifndef BEE_KECCAK_UNROLL
+ #if BEE_SMALL_FOOTPRINT_KECCAK
+ #define BEE_KECCAK_UNROLL 2
+ #else
+ #define BEE_KECCAK_UNROLL 8
+ #endif
+#endif
+
+/*
+ * We do not want to copy the state to local variables on x86 (32-bit and 64-bit alike).
+ */
+#ifndef BEE_KECCAK_NOCOPY
+ #if defined __i386__ || defined __x86_64 || BEE_I386_MSVC || BEE_I386_GCC
+ #define BEE_KECCAK_NOCOPY 1
+ #else
+ #define BEE_KECCAK_NOCOPY 0
+ #endif
+#endif
+
+#ifdef _MSC_VER
+ #pragma warning (disable: 4146)
+#endif
+
+#if BEE_KECCAK_64
+
+static const bee_u64 RC[] = {
+ BEE_C64(0x0000000000000001), BEE_C64(0x0000000000008082),
+ BEE_C64(0x800000000000808A), BEE_C64(0x8000000080008000),
+ BEE_C64(0x000000000000808B), BEE_C64(0x0000000080000001),
+ BEE_C64(0x8000000080008081), BEE_C64(0x8000000000008009),
+ BEE_C64(0x000000000000008A), BEE_C64(0x0000000000000088),
+ BEE_C64(0x0000000080008009), BEE_C64(0x000000008000000A),
+ BEE_C64(0x000000008000808B), BEE_C64(0x800000000000008B),
+ BEE_C64(0x8000000000008089), BEE_C64(0x8000000000008003),
+ BEE_C64(0x8000000000008002), BEE_C64(0x8000000000000080),
+ BEE_C64(0x000000000000800A), BEE_C64(0x800000008000000A),
+ BEE_C64(0x8000000080008081), BEE_C64(0x8000000000008080),
+ BEE_C64(0x0000000080000001), BEE_C64(0x8000000080008008)
+};
+
+#if BEE_KECCAK_NOCOPY
+
+#define a00 (kc->u.wide[ 0])
+#define a10 (kc->u.wide[ 1])
+#define a20 (kc->u.wide[ 2])
+#define a30 (kc->u.wide[ 3])
+#define a40 (kc->u.wide[ 4])
+#define a01 (kc->u.wide[ 5])
+#define a11 (kc->u.wide[ 6])
+#define a21 (kc->u.wide[ 7])
+#define a31 (kc->u.wide[ 8])
+#define a41 (kc->u.wide[ 9])
+#define a02 (kc->u.wide[10])
+#define a12 (kc->u.wide[11])
+#define a22 (kc->u.wide[12])
+#define a32 (kc->u.wide[13])
+#define a42 (kc->u.wide[14])
+#define a03 (kc->u.wide[15])
+#define a13 (kc->u.wide[16])
+#define a23 (kc->u.wide[17])
+#define a33 (kc->u.wide[18])
+#define a43 (kc->u.wide[19])
+#define a04 (kc->u.wide[20])
+#define a14 (kc->u.wide[21])
+#define a24 (kc->u.wide[22])
+#define a34 (kc->u.wide[23])
+#define a44 (kc->u.wide[24])
+
+#define DECL_STATE
+#define READ_STATE(sc)
+#define WRITE_STATE(sc)
+
+#define INPUT_BUF(size) do { \
+ size_t j; \
+ for (j = 0; j < (size); j += 8) { \
+ kc->u.wide[j >> 3] ^= bee_dec64le_aligned(buf + j); \
+ } \
+ } while (0)
+
+#define INPUT_BUF144 INPUT_BUF(144)
+#define INPUT_BUF136 INPUT_BUF(136)
+#define INPUT_BUF104 INPUT_BUF(104)
+#define INPUT_BUF72 INPUT_BUF(72)
+
+#else
+
+#define DECL_STATE \
+ bee_u64 a00, a01, a02, a03, a04; \
+ bee_u64 a10, a11, a12, a13, a14; \
+ bee_u64 a20, a21, a22, a23, a24; \
+ bee_u64 a30, a31, a32, a33, a34; \
+ bee_u64 a40, a41, a42, a43, a44;
+
+#define READ_STATE(state) do { \
+ a00 = (state)->u.wide[ 0]; \
+ a10 = (state)->u.wide[ 1]; \
+ a20 = (state)->u.wide[ 2]; \
+ a30 = (state)->u.wide[ 3]; \
+ a40 = (state)->u.wide[ 4]; \
+ a01 = (state)->u.wide[ 5]; \
+ a11 = (state)->u.wide[ 6]; \
+ a21 = (state)->u.wide[ 7]; \
+ a31 = (state)->u.wide[ 8]; \
+ a41 = (state)->u.wide[ 9]; \
+ a02 = (state)->u.wide[10]; \
+ a12 = (state)->u.wide[11]; \
+ a22 = (state)->u.wide[12]; \
+ a32 = (state)->u.wide[13]; \
+ a42 = (state)->u.wide[14]; \
+ a03 = (state)->u.wide[15]; \
+ a13 = (state)->u.wide[16]; \
+ a23 = (state)->u.wide[17]; \
+ a33 = (state)->u.wide[18]; \
+ a43 = (state)->u.wide[19]; \
+ a04 = (state)->u.wide[20]; \
+ a14 = (state)->u.wide[21]; \
+ a24 = (state)->u.wide[22]; \
+ a34 = (state)->u.wide[23]; \
+ a44 = (state)->u.wide[24]; \
+ } while (0)
+
+#define WRITE_STATE(state) do { \
+ (state)->u.wide[ 0] = a00; \
+ (state)->u.wide[ 1] = a10; \
+ (state)->u.wide[ 2] = a20; \
+ (state)->u.wide[ 3] = a30; \
+ (state)->u.wide[ 4] = a40; \
+ (state)->u.wide[ 5] = a01; \
+ (state)->u.wide[ 6] = a11; \
+ (state)->u.wide[ 7] = a21; \
+ (state)->u.wide[ 8] = a31; \
+ (state)->u.wide[ 9] = a41; \
+ (state)->u.wide[10] = a02; \
+ (state)->u.wide[11] = a12; \
+ (state)->u.wide[12] = a22; \
+ (state)->u.wide[13] = a32; \
+ (state)->u.wide[14] = a42; \
+ (state)->u.wide[15] = a03; \
+ (state)->u.wide[16] = a13; \
+ (state)->u.wide[17] = a23; \
+ (state)->u.wide[18] = a33; \
+ (state)->u.wide[19] = a43; \
+ (state)->u.wide[20] = a04; \
+ (state)->u.wide[21] = a14; \
+ (state)->u.wide[22] = a24; \
+ (state)->u.wide[23] = a34; \
+ (state)->u.wide[24] = a44; \
+ } while (0)
+
+#define INPUT_BUF144 do { \
+ a00 ^= bee_dec64le_aligned(buf + 0); \
+ a10 ^= bee_dec64le_aligned(buf + 8); \
+ a20 ^= bee_dec64le_aligned(buf + 16); \
+ a30 ^= bee_dec64le_aligned(buf + 24); \
+ a40 ^= bee_dec64le_aligned(buf + 32); \
+ a01 ^= bee_dec64le_aligned(buf + 40); \
+ a11 ^= bee_dec64le_aligned(buf + 48); \
+ a21 ^= bee_dec64le_aligned(buf + 56); \
+ a31 ^= bee_dec64le_aligned(buf + 64); \
+ a41 ^= bee_dec64le_aligned(buf + 72); \
+ a02 ^= bee_dec64le_aligned(buf + 80); \
+ a12 ^= bee_dec64le_aligned(buf + 88); \
+ a22 ^= bee_dec64le_aligned(buf + 96); \
+ a32 ^= bee_dec64le_aligned(buf + 104); \
+ a42 ^= bee_dec64le_aligned(buf + 112); \
+ a03 ^= bee_dec64le_aligned(buf + 120); \
+ a13 ^= bee_dec64le_aligned(buf + 128); \
+ a23 ^= bee_dec64le_aligned(buf + 136); \
+ } while (0)
+
+#define INPUT_BUF136 do { \
+ a00 ^= bee_dec64le_aligned(buf + 0); \
+ a10 ^= bee_dec64le_aligned(buf + 8); \
+ a20 ^= bee_dec64le_aligned(buf + 16); \
+ a30 ^= bee_dec64le_aligned(buf + 24); \
+ a40 ^= bee_dec64le_aligned(buf + 32); \
+ a01 ^= bee_dec64le_aligned(buf + 40); \
+ a11 ^= bee_dec64le_aligned(buf + 48); \
+ a21 ^= bee_dec64le_aligned(buf + 56); \
+ a31 ^= bee_dec64le_aligned(buf + 64); \
+ a41 ^= bee_dec64le_aligned(buf + 72); \
+ a02 ^= bee_dec64le_aligned(buf + 80); \
+ a12 ^= bee_dec64le_aligned(buf + 88); \
+ a22 ^= bee_dec64le_aligned(buf + 96); \
+ a32 ^= bee_dec64le_aligned(buf + 104); \
+ a42 ^= bee_dec64le_aligned(buf + 112); \
+ a03 ^= bee_dec64le_aligned(buf + 120); \
+ a13 ^= bee_dec64le_aligned(buf + 128); \
+ } while (0)
+
+#define INPUT_BUF104 do { \
+ a00 ^= bee_dec64le_aligned(buf + 0); \
+ a10 ^= bee_dec64le_aligned(buf + 8); \
+ a20 ^= bee_dec64le_aligned(buf + 16); \
+ a30 ^= bee_dec64le_aligned(buf + 24); \
+ a40 ^= bee_dec64le_aligned(buf + 32); \
+ a01 ^= bee_dec64le_aligned(buf + 40); \
+ a11 ^= bee_dec64le_aligned(buf + 48); \
+ a21 ^= bee_dec64le_aligned(buf + 56); \
+ a31 ^= bee_dec64le_aligned(buf + 64); \
+ a41 ^= bee_dec64le_aligned(buf + 72); \
+ a02 ^= bee_dec64le_aligned(buf + 80); \
+ a12 ^= bee_dec64le_aligned(buf + 88); \
+ a22 ^= bee_dec64le_aligned(buf + 96); \
+ } while (0)
+
+#define INPUT_BUF72 do { \
+ a00 ^= bee_dec64le_aligned(buf + 0); \
+ a10 ^= bee_dec64le_aligned(buf + 8); \
+ a20 ^= bee_dec64le_aligned(buf + 16); \
+ a30 ^= bee_dec64le_aligned(buf + 24); \
+ a40 ^= bee_dec64le_aligned(buf + 32); \
+ a01 ^= bee_dec64le_aligned(buf + 40); \
+ a11 ^= bee_dec64le_aligned(buf + 48); \
+ a21 ^= bee_dec64le_aligned(buf + 56); \
+ a31 ^= bee_dec64le_aligned(buf + 64); \
+ } while (0)
+
+#define INPUT_BUF(lim) do { \
+ a00 ^= bee_dec64le_aligned(buf + 0); \
+ a10 ^= bee_dec64le_aligned(buf + 8); \
+ a20 ^= bee_dec64le_aligned(buf + 16); \
+ a30 ^= bee_dec64le_aligned(buf + 24); \
+ a40 ^= bee_dec64le_aligned(buf + 32); \
+ a01 ^= bee_dec64le_aligned(buf + 40); \
+ a11 ^= bee_dec64le_aligned(buf + 48); \
+ a21 ^= bee_dec64le_aligned(buf + 56); \
+ a31 ^= bee_dec64le_aligned(buf + 64); \
+ if ((lim) == 72) \
+ break; \
+ a41 ^= bee_dec64le_aligned(buf + 72); \
+ a02 ^= bee_dec64le_aligned(buf + 80); \
+ a12 ^= bee_dec64le_aligned(buf + 88); \
+ a22 ^= bee_dec64le_aligned(buf + 96); \
+ if ((lim) == 104) \
+ break; \
+ a32 ^= bee_dec64le_aligned(buf + 104); \
+ a42 ^= bee_dec64le_aligned(buf + 112); \
+ a03 ^= bee_dec64le_aligned(buf + 120); \
+ a13 ^= bee_dec64le_aligned(buf + 128); \
+ if ((lim) == 136) \
+ break; \
+ a23 ^= bee_dec64le_aligned(buf + 136); \
+ } while (0)
+
+#endif
+
+#define DECL64(x) bee_u64 x
+#define MOV64(d, s) (d = s)
+#define XOR64(d, a, b) (d = a ^ b)
+#define AND64(d, a, b) (d = a & b)
+#define OR64(d, a, b) (d = a | b)
+#define NOT64(d, s) (d = BEE_T64(~s))
+#define ROL64(d, v, n) (d = BEE_ROTL64(v, n))
+#define XOR64_IOTA XOR64
+
+#else
+
+static const struct {
+ bee_u32 high, low;
+} RC[] = {
+#if BEE_KECCAK_INTERLEAVE
+ { BEE_C32(0x00000000), BEE_C32(0x00000001) },
+ { BEE_C32(0x00000089), BEE_C32(0x00000000) },
+ { BEE_C32(0x8000008B), BEE_C32(0x00000000) },
+ { BEE_C32(0x80008080), BEE_C32(0x00000000) },
+ { BEE_C32(0x0000008B), BEE_C32(0x00000001) },
+ { BEE_C32(0x00008000), BEE_C32(0x00000001) },
+ { BEE_C32(0x80008088), BEE_C32(0x00000001) },
+ { BEE_C32(0x80000082), BEE_C32(0x00000001) },
+ { BEE_C32(0x0000000B), BEE_C32(0x00000000) },
+ { BEE_C32(0x0000000A), BEE_C32(0x00000000) },
+ { BEE_C32(0x00008082), BEE_C32(0x00000001) },
+ { BEE_C32(0x00008003), BEE_C32(0x00000000) },
+ { BEE_C32(0x0000808B), BEE_C32(0x00000001) },
+ { BEE_C32(0x8000000B), BEE_C32(0x00000001) },
+ { BEE_C32(0x8000008A), BEE_C32(0x00000001) },
+ { BEE_C32(0x80000081), BEE_C32(0x00000001) },
+ { BEE_C32(0x80000081), BEE_C32(0x00000000) },
+ { BEE_C32(0x80000008), BEE_C32(0x00000000) },
+ { BEE_C32(0x00000083), BEE_C32(0x00000000) },
+ { BEE_C32(0x80008003), BEE_C32(0x00000000) },
+ { BEE_C32(0x80008088), BEE_C32(0x00000001) },
+ { BEE_C32(0x80000088), BEE_C32(0x00000000) },
+ { BEE_C32(0x00008000), BEE_C32(0x00000001) },
+ { BEE_C32(0x80008082), BEE_C32(0x00000000) }
+#else
+ { BEE_C32(0x00000000), BEE_C32(0x00000001) },
+ { BEE_C32(0x00000000), BEE_C32(0x00008082) },
+ { BEE_C32(0x80000000), BEE_C32(0x0000808A) },
+ { BEE_C32(0x80000000), BEE_C32(0x80008000) },
+ { BEE_C32(0x00000000), BEE_C32(0x0000808B) },
+ { BEE_C32(0x00000000), BEE_C32(0x80000001) },
+ { BEE_C32(0x80000000), BEE_C32(0x80008081) },
+ { BEE_C32(0x80000000), BEE_C32(0x00008009) },
+ { BEE_C32(0x00000000), BEE_C32(0x0000008A) },
+ { BEE_C32(0x00000000), BEE_C32(0x00000088) },
+ { BEE_C32(0x00000000), BEE_C32(0x80008009) },
+ { BEE_C32(0x00000000), BEE_C32(0x8000000A) },
+ { BEE_C32(0x00000000), BEE_C32(0x8000808B) },
+ { BEE_C32(0x80000000), BEE_C32(0x0000008B) },
+ { BEE_C32(0x80000000), BEE_C32(0x00008089) },
+ { BEE_C32(0x80000000), BEE_C32(0x00008003) },
+ { BEE_C32(0x80000000), BEE_C32(0x00008002) },
+ { BEE_C32(0x80000000), BEE_C32(0x00000080) },
+ { BEE_C32(0x00000000), BEE_C32(0x0000800A) },
+ { BEE_C32(0x80000000), BEE_C32(0x8000000A) },
+ { BEE_C32(0x80000000), BEE_C32(0x80008081) },
+ { BEE_C32(0x80000000), BEE_C32(0x00008080) },
+ { BEE_C32(0x00000000), BEE_C32(0x80000001) },
+ { BEE_C32(0x80000000), BEE_C32(0x80008008) }
+#endif
+};
+
+#if BEE_KECCAK_INTERLEAVE
+
+#define INTERLEAVE(xl, xh) do { \
+ bee_u32 l, h, t; \
+ l = (xl); h = (xh); \
+ t = (l ^ (l >> 1)) & BEE_C32(0x22222222); l ^= t ^ (t << 1); \
+ t = (h ^ (h >> 1)) & BEE_C32(0x22222222); h ^= t ^ (t << 1); \
+ t = (l ^ (l >> 2)) & BEE_C32(0x0C0C0C0C); l ^= t ^ (t << 2); \
+ t = (h ^ (h >> 2)) & BEE_C32(0x0C0C0C0C); h ^= t ^ (t << 2); \
+ t = (l ^ (l >> 4)) & BEE_C32(0x00F000F0); l ^= t ^ (t << 4); \
+ t = (h ^ (h >> 4)) & BEE_C32(0x00F000F0); h ^= t ^ (t << 4); \
+ t = (l ^ (l >> 8)) & BEE_C32(0x0000FF00); l ^= t ^ (t << 8); \
+ t = (h ^ (h >> 8)) & BEE_C32(0x0000FF00); h ^= t ^ (t << 8); \
+ t = (l ^ BEE_T32(h << 16)) & BEE_C32(0xFFFF0000); \
+ l ^= t; h ^= t >> 16; \
+ (xl) = l; (xh) = h; \
+ } while (0)
+
+#define UNINTERLEAVE(xl, xh) do { \
+ bee_u32 l, h, t; \
+ l = (xl); h = (xh); \
+ t = (l ^ BEE_T32(h << 16)) & BEE_C32(0xFFFF0000); \
+ l ^= t; h ^= t >> 16; \
+ t = (l ^ (l >> 8)) & BEE_C32(0x0000FF00); l ^= t ^ (t << 8); \
+ t = (h ^ (h >> 8)) & BEE_C32(0x0000FF00); h ^= t ^ (t << 8); \
+ t = (l ^ (l >> 4)) & BEE_C32(0x00F000F0); l ^= t ^ (t << 4); \
+ t = (h ^ (h >> 4)) & BEE_C32(0x00F000F0); h ^= t ^ (t << 4); \
+ t = (l ^ (l >> 2)) & BEE_C32(0x0C0C0C0C); l ^= t ^ (t << 2); \
+ t = (h ^ (h >> 2)) & BEE_C32(0x0C0C0C0C); h ^= t ^ (t << 2); \
+ t = (l ^ (l >> 1)) & BEE_C32(0x22222222); l ^= t ^ (t << 1); \
+ t = (h ^ (h >> 1)) & BEE_C32(0x22222222); h ^= t ^ (t << 1); \
+ (xl) = l; (xh) = h; \
+ } while (0)
+
+#else
+
+#define INTERLEAVE(l, h)
+#define UNINTERLEAVE(l, h)
+
+#endif
+
+#if BEE_KECCAK_NOCOPY
+
+#define a00l (kc->u.narrow[2 * 0 + 0])
+#define a00h (kc->u.narrow[2 * 0 + 1])
+#define a10l (kc->u.narrow[2 * 1 + 0])
+#define a10h (kc->u.narrow[2 * 1 + 1])
+#define a20l (kc->u.narrow[2 * 2 + 0])
+#define a20h (kc->u.narrow[2 * 2 + 1])
+#define a30l (kc->u.narrow[2 * 3 + 0])
+#define a30h (kc->u.narrow[2 * 3 + 1])
+#define a40l (kc->u.narrow[2 * 4 + 0])
+#define a40h (kc->u.narrow[2 * 4 + 1])
+#define a01l (kc->u.narrow[2 * 5 + 0])
+#define a01h (kc->u.narrow[2 * 5 + 1])
+#define a11l (kc->u.narrow[2 * 6 + 0])
+#define a11h (kc->u.narrow[2 * 6 + 1])
+#define a21l (kc->u.narrow[2 * 7 + 0])
+#define a21h (kc->u.narrow[2 * 7 + 1])
+#define a31l (kc->u.narrow[2 * 8 + 0])
+#define a31h (kc->u.narrow[2 * 8 + 1])
+#define a41l (kc->u.narrow[2 * 9 + 0])
+#define a41h (kc->u.narrow[2 * 9 + 1])
+#define a02l (kc->u.narrow[2 * 10 + 0])
+#define a02h (kc->u.narrow[2 * 10 + 1])
+#define a12l (kc->u.narrow[2 * 11 + 0])
+#define a12h (kc->u.narrow[2 * 11 + 1])
+#define a22l (kc->u.narrow[2 * 12 + 0])
+#define a22h (kc->u.narrow[2 * 12 + 1])
+#define a32l (kc->u.narrow[2 * 13 + 0])
+#define a32h (kc->u.narrow[2 * 13 + 1])
+#define a42l (kc->u.narrow[2 * 14 + 0])
+#define a42h (kc->u.narrow[2 * 14 + 1])
+#define a03l (kc->u.narrow[2 * 15 + 0])
+#define a03h (kc->u.narrow[2 * 15 + 1])
+#define a13l (kc->u.narrow[2 * 16 + 0])
+#define a13h (kc->u.narrow[2 * 16 + 1])
+#define a23l (kc->u.narrow[2 * 17 + 0])
+#define a23h (kc->u.narrow[2 * 17 + 1])
+#define a33l (kc->u.narrow[2 * 18 + 0])
+#define a33h (kc->u.narrow[2 * 18 + 1])
+#define a43l (kc->u.narrow[2 * 19 + 0])
+#define a43h (kc->u.narrow[2 * 19 + 1])
+#define a04l (kc->u.narrow[2 * 20 + 0])
+#define a04h (kc->u.narrow[2 * 20 + 1])
+#define a14l (kc->u.narrow[2 * 21 + 0])
+#define a14h (kc->u.narrow[2 * 21 + 1])
+#define a24l (kc->u.narrow[2 * 22 + 0])
+#define a24h (kc->u.narrow[2 * 22 + 1])
+#define a34l (kc->u.narrow[2 * 23 + 0])
+#define a34h (kc->u.narrow[2 * 23 + 1])
+#define a44l (kc->u.narrow[2 * 24 + 0])
+#define a44h (kc->u.narrow[2 * 24 + 1])
+
+#define DECL_STATE
+#define READ_STATE(state)
+#define WRITE_STATE(state)
+
+#define INPUT_BUF(size) do { \
+ size_t j; \
+ for (j = 0; j < (size); j += 8) { \
+ bee_u32 tl, th; \
+ tl = bee_dec32le_aligned(buf + j + 0); \
+ th = bee_dec32le_aligned(buf + j + 4); \
+ INTERLEAVE(tl, th); \
+ kc->u.narrow[(j >> 2) + 0] ^= tl; \
+ kc->u.narrow[(j >> 2) + 1] ^= th; \
+ } \
+ } while (0)
+
+#define INPUT_BUF144 INPUT_BUF(144)
+#define INPUT_BUF136 INPUT_BUF(136)
+#define INPUT_BUF104 INPUT_BUF(104)
+#define INPUT_BUF72 INPUT_BUF(72)
+
+#else
+
+#define DECL_STATE \
+ bee_u32 a00l, a00h, a01l, a01h, a02l, a02h, a03l, a03h, a04l, a04h; \
+ bee_u32 a10l, a10h, a11l, a11h, a12l, a12h, a13l, a13h, a14l, a14h; \
+ bee_u32 a20l, a20h, a21l, a21h, a22l, a22h, a23l, a23h, a24l, a24h; \
+ bee_u32 a30l, a30h, a31l, a31h, a32l, a32h, a33l, a33h, a34l, a34h; \
+ bee_u32 a40l, a40h, a41l, a41h, a42l, a42h, a43l, a43h, a44l, a44h;
+
+#define READ_STATE(state) do { \
+ a00l = (state)->u.narrow[2 * 0 + 0]; \
+ a00h = (state)->u.narrow[2 * 0 + 1]; \
+ a10l = (state)->u.narrow[2 * 1 + 0]; \
+ a10h = (state)->u.narrow[2 * 1 + 1]; \
+ a20l = (state)->u.narrow[2 * 2 + 0]; \
+ a20h = (state)->u.narrow[2 * 2 + 1]; \
+ a30l = (state)->u.narrow[2 * 3 + 0]; \
+ a30h = (state)->u.narrow[2 * 3 + 1]; \
+ a40l = (state)->u.narrow[2 * 4 + 0]; \
+ a40h = (state)->u.narrow[2 * 4 + 1]; \
+ a01l = (state)->u.narrow[2 * 5 + 0]; \
+ a01h = (state)->u.narrow[2 * 5 + 1]; \
+ a11l = (state)->u.narrow[2 * 6 + 0]; \
+ a11h = (state)->u.narrow[2 * 6 + 1]; \
+ a21l = (state)->u.narrow[2 * 7 + 0]; \
+ a21h = (state)->u.narrow[2 * 7 + 1]; \
+ a31l = (state)->u.narrow[2 * 8 + 0]; \
+ a31h = (state)->u.narrow[2 * 8 + 1]; \
+ a41l = (state)->u.narrow[2 * 9 + 0]; \
+ a41h = (state)->u.narrow[2 * 9 + 1]; \
+ a02l = (state)->u.narrow[2 * 10 + 0]; \
+ a02h = (state)->u.narrow[2 * 10 + 1]; \
+ a12l = (state)->u.narrow[2 * 11 + 0]; \
+ a12h = (state)->u.narrow[2 * 11 + 1]; \
+ a22l = (state)->u.narrow[2 * 12 + 0]; \
+ a22h = (state)->u.narrow[2 * 12 + 1]; \
+ a32l = (state)->u.narrow[2 * 13 + 0]; \
+ a32h = (state)->u.narrow[2 * 13 + 1]; \
+ a42l = (state)->u.narrow[2 * 14 + 0]; \
+ a42h = (state)->u.narrow[2 * 14 + 1]; \
+ a03l = (state)->u.narrow[2 * 15 + 0]; \
+ a03h = (state)->u.narrow[2 * 15 + 1]; \
+ a13l = (state)->u.narrow[2 * 16 + 0]; \
+ a13h = (state)->u.narrow[2 * 16 + 1]; \
+ a23l = (state)->u.narrow[2 * 17 + 0]; \
+ a23h = (state)->u.narrow[2 * 17 + 1]; \
+ a33l = (state)->u.narrow[2 * 18 + 0]; \
+ a33h = (state)->u.narrow[2 * 18 + 1]; \
+ a43l = (state)->u.narrow[2 * 19 + 0]; \
+ a43h = (state)->u.narrow[2 * 19 + 1]; \
+ a04l = (state)->u.narrow[2 * 20 + 0]; \
+ a04h = (state)->u.narrow[2 * 20 + 1]; \
+ a14l = (state)->u.narrow[2 * 21 + 0]; \
+ a14h = (state)->u.narrow[2 * 21 + 1]; \
+ a24l = (state)->u.narrow[2 * 22 + 0]; \
+ a24h = (state)->u.narrow[2 * 22 + 1]; \
+ a34l = (state)->u.narrow[2 * 23 + 0]; \
+ a34h = (state)->u.narrow[2 * 23 + 1]; \
+ a44l = (state)->u.narrow[2 * 24 + 0]; \
+ a44h = (state)->u.narrow[2 * 24 + 1]; \
+ } while (0)
+
+#define WRITE_STATE(state) do { \
+ (state)->u.narrow[2 * 0 + 0] = a00l; \
+ (state)->u.narrow[2 * 0 + 1] = a00h; \
+ (state)->u.narrow[2 * 1 + 0] = a10l; \
+ (state)->u.narrow[2 * 1 + 1] = a10h; \
+ (state)->u.narrow[2 * 2 + 0] = a20l; \
+ (state)->u.narrow[2 * 2 + 1] = a20h; \
+ (state)->u.narrow[2 * 3 + 0] = a30l; \
+ (state)->u.narrow[2 * 3 + 1] = a30h; \
+ (state)->u.narrow[2 * 4 + 0] = a40l; \
+ (state)->u.narrow[2 * 4 + 1] = a40h; \
+ (state)->u.narrow[2 * 5 + 0] = a01l; \
+ (state)->u.narrow[2 * 5 + 1] = a01h; \
+ (state)->u.narrow[2 * 6 + 0] = a11l; \
+ (state)->u.narrow[2 * 6 + 1] = a11h; \
+ (state)->u.narrow[2 * 7 + 0] = a21l; \
+ (state)->u.narrow[2 * 7 + 1] = a21h; \
+ (state)->u.narrow[2 * 8 + 0] = a31l; \
+ (state)->u.narrow[2 * 8 + 1] = a31h; \
+ (state)->u.narrow[2 * 9 + 0] = a41l; \
+ (state)->u.narrow[2 * 9 + 1] = a41h; \
+ (state)->u.narrow[2 * 10 + 0] = a02l; \
+ (state)->u.narrow[2 * 10 + 1] = a02h; \
+ (state)->u.narrow[2 * 11 + 0] = a12l; \
+ (state)->u.narrow[2 * 11 + 1] = a12h; \
+ (state)->u.narrow[2 * 12 + 0] = a22l; \
+ (state)->u.narrow[2 * 12 + 1] = a22h; \
+ (state)->u.narrow[2 * 13 + 0] = a32l; \
+ (state)->u.narrow[2 * 13 + 1] = a32h; \
+ (state)->u.narrow[2 * 14 + 0] = a42l; \
+ (state)->u.narrow[2 * 14 + 1] = a42h; \
+ (state)->u.narrow[2 * 15 + 0] = a03l; \
+ (state)->u.narrow[2 * 15 + 1] = a03h; \
+ (state)->u.narrow[2 * 16 + 0] = a13l; \
+ (state)->u.narrow[2 * 16 + 1] = a13h; \
+ (state)->u.narrow[2 * 17 + 0] = a23l; \
+ (state)->u.narrow[2 * 17 + 1] = a23h; \
+ (state)->u.narrow[2 * 18 + 0] = a33l; \
+ (state)->u.narrow[2 * 18 + 1] = a33h; \
+ (state)->u.narrow[2 * 19 + 0] = a43l; \
+ (state)->u.narrow[2 * 19 + 1] = a43h; \
+ (state)->u.narrow[2 * 20 + 0] = a04l; \
+ (state)->u.narrow[2 * 20 + 1] = a04h; \
+ (state)->u.narrow[2 * 21 + 0] = a14l; \
+ (state)->u.narrow[2 * 21 + 1] = a14h; \
+ (state)->u.narrow[2 * 22 + 0] = a24l; \
+ (state)->u.narrow[2 * 22 + 1] = a24h; \
+ (state)->u.narrow[2 * 23 + 0] = a34l; \
+ (state)->u.narrow[2 * 23 + 1] = a34h; \
+ (state)->u.narrow[2 * 24 + 0] = a44l; \
+ (state)->u.narrow[2 * 24 + 1] = a44h; \
+ } while (0)
+
+#define READ64(d, off) do { \
+ bee_u32 tl, th; \
+ tl = bee_dec32le_aligned(buf + (off)); \
+ th = bee_dec32le_aligned(buf + (off) + 4); \
+ INTERLEAVE(tl, th); \
+ d ## l ^= tl; \
+ d ## h ^= th; \
+ } while (0)
+
+#define INPUT_BUF144 do { \
+ READ64(a00, 0); \
+ READ64(a10, 8); \
+ READ64(a20, 16); \
+ READ64(a30, 24); \
+ READ64(a40, 32); \
+ READ64(a01, 40); \
+ READ64(a11, 48); \
+ READ64(a21, 56); \
+ READ64(a31, 64); \
+ READ64(a41, 72); \
+ READ64(a02, 80); \
+ READ64(a12, 88); \
+ READ64(a22, 96); \
+ READ64(a32, 104); \
+ READ64(a42, 112); \
+ READ64(a03, 120); \
+ READ64(a13, 128); \
+ READ64(a23, 136); \
+ } while (0)
+
+#define INPUT_BUF136 do { \
+ READ64(a00, 0); \
+ READ64(a10, 8); \
+ READ64(a20, 16); \
+ READ64(a30, 24); \
+ READ64(a40, 32); \
+ READ64(a01, 40); \
+ READ64(a11, 48); \
+ READ64(a21, 56); \
+ READ64(a31, 64); \
+ READ64(a41, 72); \
+ READ64(a02, 80); \
+ READ64(a12, 88); \
+ READ64(a22, 96); \
+ READ64(a32, 104); \
+ READ64(a42, 112); \
+ READ64(a03, 120); \
+ READ64(a13, 128); \
+ } while (0)
+
+#define INPUT_BUF104 do { \
+ READ64(a00, 0); \
+ READ64(a10, 8); \
+ READ64(a20, 16); \
+ READ64(a30, 24); \
+ READ64(a40, 32); \
+ READ64(a01, 40); \
+ READ64(a11, 48); \
+ READ64(a21, 56); \
+ READ64(a31, 64); \
+ READ64(a41, 72); \
+ READ64(a02, 80); \
+ READ64(a12, 88); \
+ READ64(a22, 96); \
+ } while (0)
+
+#define INPUT_BUF72 do { \
+ READ64(a00, 0); \
+ READ64(a10, 8); \
+ READ64(a20, 16); \
+ READ64(a30, 24); \
+ READ64(a40, 32); \
+ READ64(a01, 40); \
+ READ64(a11, 48); \
+ READ64(a21, 56); \
+ READ64(a31, 64); \
+ } while (0)
+
+#define INPUT_BUF(lim) do { \
+ READ64(a00, 0); \
+ READ64(a10, 8); \
+ READ64(a20, 16); \
+ READ64(a30, 24); \
+ READ64(a40, 32); \
+ READ64(a01, 40); \
+ READ64(a11, 48); \
+ READ64(a21, 56); \
+ READ64(a31, 64); \
+ if ((lim) == 72) \
+ break; \
+ READ64(a41, 72); \
+ READ64(a02, 80); \
+ READ64(a12, 88); \
+ READ64(a22, 96); \
+ if ((lim) == 104) \
+ break; \
+ READ64(a32, 104); \
+ READ64(a42, 112); \
+ READ64(a03, 120); \
+ READ64(a13, 128); \
+ if ((lim) == 136) \
+ break; \
+ READ64(a23, 136); \
+ } while (0)
+
+#endif
+
+#define DECL64(x) bee_u64 x ## l, x ## h
+#define MOV64(d, s) (d ## l = s ## l, d ## h = s ## h)
+#define XOR64(d, a, b) (d ## l = a ## l ^ b ## l, d ## h = a ## h ^ b ## h)
+#define AND64(d, a, b) (d ## l = a ## l & b ## l, d ## h = a ## h & b ## h)
+#define OR64(d, a, b) (d ## l = a ## l | b ## l, d ## h = a ## h | b ## h)
+#define NOT64(d, s) (d ## l = BEE_T32(~s ## l), d ## h = BEE_T32(~s ## h))
+#define ROL64(d, v, n) ROL64_ ## n(d, v)
+
+#if BEE_KECCAK_INTERLEAVE
+
+#define ROL64_odd1(d, v) do { \
+ bee_u32 tmp; \
+ tmp = v ## l; \
+ d ## l = BEE_T32(v ## h << 1) | (v ## h >> 31); \
+ d ## h = tmp; \
+ } while (0)
+
+#define ROL64_odd63(d, v) do { \
+ bee_u32 tmp; \
+ tmp = BEE_T32(v ## l << 31) | (v ## l >> 1); \
+ d ## l = v ## h; \
+ d ## h = tmp; \
+ } while (0)
+
+#define ROL64_odd(d, v, n) do { \
+ bee_u32 tmp; \
+ tmp = BEE_T32(v ## l << (n - 1)) | (v ## l >> (33 - n)); \
+ d ## l = BEE_T32(v ## h << n) | (v ## h >> (32 - n)); \
+ d ## h = tmp; \
+ } while (0)
+
+#define ROL64_even(d, v, n) do { \
+ d ## l = BEE_T32(v ## l << n) | (v ## l >> (32 - n)); \
+ d ## h = BEE_T32(v ## h << n) | (v ## h >> (32 - n)); \
+ } while (0)
+
+#define ROL64_0(d, v)
+#define ROL64_1(d, v) ROL64_odd1(d, v)
+#define ROL64_2(d, v) ROL64_even(d, v, 1)
+#define ROL64_3(d, v) ROL64_odd( d, v, 2)
+#define ROL64_4(d, v) ROL64_even(d, v, 2)
+#define ROL64_5(d, v) ROL64_odd( d, v, 3)
+#define ROL64_6(d, v) ROL64_even(d, v, 3)
+#define ROL64_7(d, v) ROL64_odd( d, v, 4)
+#define ROL64_8(d, v) ROL64_even(d, v, 4)
+#define ROL64_9(d, v) ROL64_odd( d, v, 5)
+#define ROL64_10(d, v) ROL64_even(d, v, 5)
+#define ROL64_11(d, v) ROL64_odd( d, v, 6)
+#define ROL64_12(d, v) ROL64_even(d, v, 6)
+#define ROL64_13(d, v) ROL64_odd( d, v, 7)
+#define ROL64_14(d, v) ROL64_even(d, v, 7)
+#define ROL64_15(d, v) ROL64_odd( d, v, 8)
+#define ROL64_16(d, v) ROL64_even(d, v, 8)
+#define ROL64_17(d, v) ROL64_odd( d, v, 9)
+#define ROL64_18(d, v) ROL64_even(d, v, 9)
+#define ROL64_19(d, v) ROL64_odd( d, v, 10)
+#define ROL64_20(d, v) ROL64_even(d, v, 10)
+#define ROL64_21(d, v) ROL64_odd( d, v, 11)
+#define ROL64_22(d, v) ROL64_even(d, v, 11)
+#define ROL64_23(d, v) ROL64_odd( d, v, 12)
+#define ROL64_24(d, v) ROL64_even(d, v, 12)
+#define ROL64_25(d, v) ROL64_odd( d, v, 13)
+#define ROL64_26(d, v) ROL64_even(d, v, 13)
+#define ROL64_27(d, v) ROL64_odd( d, v, 14)
+#define ROL64_28(d, v) ROL64_even(d, v, 14)
+#define ROL64_29(d, v) ROL64_odd( d, v, 15)
+#define ROL64_30(d, v) ROL64_even(d, v, 15)
+#define ROL64_31(d, v) ROL64_odd( d, v, 16)
+#define ROL64_32(d, v) ROL64_even(d, v, 16)
+#define ROL64_33(d, v) ROL64_odd( d, v, 17)
+#define ROL64_34(d, v) ROL64_even(d, v, 17)
+#define ROL64_35(d, v) ROL64_odd( d, v, 18)
+#define ROL64_36(d, v) ROL64_even(d, v, 18)
+#define ROL64_37(d, v) ROL64_odd( d, v, 19)
+#define ROL64_38(d, v) ROL64_even(d, v, 19)
+#define ROL64_39(d, v) ROL64_odd( d, v, 20)
+#define ROL64_40(d, v) ROL64_even(d, v, 20)
+#define ROL64_41(d, v) ROL64_odd( d, v, 21)
+#define ROL64_42(d, v) ROL64_even(d, v, 21)
+#define ROL64_43(d, v) ROL64_odd( d, v, 22)
+#define ROL64_44(d, v) ROL64_even(d, v, 22)
+#define ROL64_45(d, v) ROL64_odd( d, v, 23)
+#define ROL64_46(d, v) ROL64_even(d, v, 23)
+#define ROL64_47(d, v) ROL64_odd( d, v, 24)
+#define ROL64_48(d, v) ROL64_even(d, v, 24)
+#define ROL64_49(d, v) ROL64_odd( d, v, 25)
+#define ROL64_50(d, v) ROL64_even(d, v, 25)
+#define ROL64_51(d, v) ROL64_odd( d, v, 26)
+#define ROL64_52(d, v) ROL64_even(d, v, 26)
+#define ROL64_53(d, v) ROL64_odd( d, v, 27)
+#define ROL64_54(d, v) ROL64_even(d, v, 27)
+#define ROL64_55(d, v) ROL64_odd( d, v, 28)
+#define ROL64_56(d, v) ROL64_even(d, v, 28)
+#define ROL64_57(d, v) ROL64_odd( d, v, 29)
+#define ROL64_58(d, v) ROL64_even(d, v, 29)
+#define ROL64_59(d, v) ROL64_odd( d, v, 30)
+#define ROL64_60(d, v) ROL64_even(d, v, 30)
+#define ROL64_61(d, v) ROL64_odd( d, v, 31)
+#define ROL64_62(d, v) ROL64_even(d, v, 31)
+#define ROL64_63(d, v) ROL64_odd63(d, v)
+
+#else
+
+#define ROL64_small(d, v, n) do { \
+ bee_u32 tmp; \
+ tmp = BEE_T32(v ## l << n) | (v ## h >> (32 - n)); \
+ d ## h = BEE_T32(v ## h << n) | (v ## l >> (32 - n)); \
+ d ## l = tmp; \
+ } while (0)
+
+#define ROL64_0(d, v) 0
+#define ROL64_1(d, v) ROL64_small(d, v, 1)
+#define ROL64_2(d, v) ROL64_small(d, v, 2)
+#define ROL64_3(d, v) ROL64_small(d, v, 3)
+#define ROL64_4(d, v) ROL64_small(d, v, 4)
+#define ROL64_5(d, v) ROL64_small(d, v, 5)
+#define ROL64_6(d, v) ROL64_small(d, v, 6)
+#define ROL64_7(d, v) ROL64_small(d, v, 7)
+#define ROL64_8(d, v) ROL64_small(d, v, 8)
+#define ROL64_9(d, v) ROL64_small(d, v, 9)
+#define ROL64_10(d, v) ROL64_small(d, v, 10)
+#define ROL64_11(d, v) ROL64_small(d, v, 11)
+#define ROL64_12(d, v) ROL64_small(d, v, 12)
+#define ROL64_13(d, v) ROL64_small(d, v, 13)
+#define ROL64_14(d, v) ROL64_small(d, v, 14)
+#define ROL64_15(d, v) ROL64_small(d, v, 15)
+#define ROL64_16(d, v) ROL64_small(d, v, 16)
+#define ROL64_17(d, v) ROL64_small(d, v, 17)
+#define ROL64_18(d, v) ROL64_small(d, v, 18)
+#define ROL64_19(d, v) ROL64_small(d, v, 19)
+#define ROL64_20(d, v) ROL64_small(d, v, 20)
+#define ROL64_21(d, v) ROL64_small(d, v, 21)
+#define ROL64_22(d, v) ROL64_small(d, v, 22)
+#define ROL64_23(d, v) ROL64_small(d, v, 23)
+#define ROL64_24(d, v) ROL64_small(d, v, 24)
+#define ROL64_25(d, v) ROL64_small(d, v, 25)
+#define ROL64_26(d, v) ROL64_small(d, v, 26)
+#define ROL64_27(d, v) ROL64_small(d, v, 27)
+#define ROL64_28(d, v) ROL64_small(d, v, 28)
+#define ROL64_29(d, v) ROL64_small(d, v, 29)
+#define ROL64_30(d, v) ROL64_small(d, v, 30)
+#define ROL64_31(d, v) ROL64_small(d, v, 31)
+
+#define ROL64_32(d, v) do { \
+ bee_u32 tmp; \
+ tmp = v ## l; \
+ d ## l = v ## h; \
+ d ## h = tmp; \
+ } while (0)
+
+#define ROL64_big(d, v, n) do { \
+ bee_u32 trl, trh; \
+ ROL64_small(tr, v, n); \
+ d ## h = trl; \
+ d ## l = trh; \
+ } while (0)
+
+#define ROL64_33(d, v) ROL64_big(d, v, 1)
+#define ROL64_34(d, v) ROL64_big(d, v, 2)
+#define ROL64_35(d, v) ROL64_big(d, v, 3)
+#define ROL64_36(d, v) ROL64_big(d, v, 4)
+#define ROL64_37(d, v) ROL64_big(d, v, 5)
+#define ROL64_38(d, v) ROL64_big(d, v, 6)
+#define ROL64_39(d, v) ROL64_big(d, v, 7)
+#define ROL64_40(d, v) ROL64_big(d, v, 8)
+#define ROL64_41(d, v) ROL64_big(d, v, 9)
+#define ROL64_42(d, v) ROL64_big(d, v, 10)
+#define ROL64_43(d, v) ROL64_big(d, v, 11)
+#define ROL64_44(d, v) ROL64_big(d, v, 12)
+#define ROL64_45(d, v) ROL64_big(d, v, 13)
+#define ROL64_46(d, v) ROL64_big(d, v, 14)
+#define ROL64_47(d, v) ROL64_big(d, v, 15)
+#define ROL64_48(d, v) ROL64_big(d, v, 16)
+#define ROL64_49(d, v) ROL64_big(d, v, 17)
+#define ROL64_50(d, v) ROL64_big(d, v, 18)
+#define ROL64_51(d, v) ROL64_big(d, v, 19)
+#define ROL64_52(d, v) ROL64_big(d, v, 20)
+#define ROL64_53(d, v) ROL64_big(d, v, 21)
+#define ROL64_54(d, v) ROL64_big(d, v, 22)
+#define ROL64_55(d, v) ROL64_big(d, v, 23)
+#define ROL64_56(d, v) ROL64_big(d, v, 24)
+#define ROL64_57(d, v) ROL64_big(d, v, 25)
+#define ROL64_58(d, v) ROL64_big(d, v, 26)
+#define ROL64_59(d, v) ROL64_big(d, v, 27)
+#define ROL64_60(d, v) ROL64_big(d, v, 28)
+#define ROL64_61(d, v) ROL64_big(d, v, 29)
+#define ROL64_62(d, v) ROL64_big(d, v, 30)
+#define ROL64_63(d, v) ROL64_big(d, v, 31)
+
+#endif
+
+#define XOR64_IOTA(d, s, k) \
+ (d ## l = s ## l ^ k.low, d ## h = s ## h ^ k.high)
+
+#endif
+
+
+
+#define TH_ELT(t, c0, c1, c2, c3, c4, d0, d1, d2, d3, d4) do { \
+ DECL64(tt0); \
+ DECL64(tt1); \
+ DECL64(tt2); \
+ DECL64(tt3); \
+ XOR64(tt0, d0, d1); \
+ XOR64(tt1, d2, d3); \
+ XOR64(tt0, tt0, d4); \
+ XOR64(tt0, tt0, tt1); \
+ ROL64(tt0, tt0, 1); \
+ XOR64(tt2, c0, c1); \
+ XOR64(tt3, c2, c3); \
+ XOR64(tt0, tt0, c4); \
+ XOR64(tt2, tt2, tt3); \
+ XOR64(t, tt0, tt2); \
+ } while (0)
+
+#define THETA(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \
+ b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \
+ b40, b41, b42, b43, b44) \
+ do { \
+ DECL64(t0); \
+ DECL64(t1); \
+ DECL64(t2); \
+ DECL64(t3); \
+ DECL64(t4); \
+ TH_ELT(t0, b40, b41, b42, b43, b44, b10, b11, b12, b13, b14); \
+ TH_ELT(t1, b00, b01, b02, b03, b04, b20, b21, b22, b23, b24); \
+ TH_ELT(t2, b10, b11, b12, b13, b14, b30, b31, b32, b33, b34); \
+ TH_ELT(t3, b20, b21, b22, b23, b24, b40, b41, b42, b43, b44); \
+ TH_ELT(t4, b30, b31, b32, b33, b34, b00, b01, b02, b03, b04); \
+ XOR64(b00, b00, t0); \
+ XOR64(b01, b01, t0); \
+ XOR64(b02, b02, t0); \
+ XOR64(b03, b03, t0); \
+ XOR64(b04, b04, t0); \
+ XOR64(b10, b10, t1); \
+ XOR64(b11, b11, t1); \
+ XOR64(b12, b12, t1); \
+ XOR64(b13, b13, t1); \
+ XOR64(b14, b14, t1); \
+ XOR64(b20, b20, t2); \
+ XOR64(b21, b21, t2); \
+ XOR64(b22, b22, t2); \
+ XOR64(b23, b23, t2); \
+ XOR64(b24, b24, t2); \
+ XOR64(b30, b30, t3); \
+ XOR64(b31, b31, t3); \
+ XOR64(b32, b32, t3); \
+ XOR64(b33, b33, t3); \
+ XOR64(b34, b34, t3); \
+ XOR64(b40, b40, t4); \
+ XOR64(b41, b41, t4); \
+ XOR64(b42, b42, t4); \
+ XOR64(b43, b43, t4); \
+ XOR64(b44, b44, t4); \
+ } while (0)
+
+#define RHO(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \
+ b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \
+ b40, b41, b42, b43, b44) \
+ do { \
+ /* ROL64(b00, b00, 0); */ \
+ ROL64(b01, b01, 36); \
+ ROL64(b02, b02, 3); \
+ ROL64(b03, b03, 41); \
+ ROL64(b04, b04, 18); \
+ ROL64(b10, b10, 1); \
+ ROL64(b11, b11, 44); \
+ ROL64(b12, b12, 10); \
+ ROL64(b13, b13, 45); \
+ ROL64(b14, b14, 2); \
+ ROL64(b20, b20, 62); \
+ ROL64(b21, b21, 6); \
+ ROL64(b22, b22, 43); \
+ ROL64(b23, b23, 15); \
+ ROL64(b24, b24, 61); \
+ ROL64(b30, b30, 28); \
+ ROL64(b31, b31, 55); \
+ ROL64(b32, b32, 25); \
+ ROL64(b33, b33, 21); \
+ ROL64(b34, b34, 56); \
+ ROL64(b40, b40, 27); \
+ ROL64(b41, b41, 20); \
+ ROL64(b42, b42, 39); \
+ ROL64(b43, b43, 8); \
+ ROL64(b44, b44, 14); \
+ } while (0)
+
+/*
+ * The KHI macro integrates the "lane complement" optimization. On input,
+ * some words are complemented:
+ * a00 a01 a02 a04 a13 a20 a21 a22 a30 a33 a34 a43
+ * On output, the following words are complemented:
+ * a04 a10 a20 a22 a23 a31
+ *
+ * The (implicit) permutation and the theta expansion will bring back
+ * the input mask for the next round.
+ */
+
+#define KHI_XO(d, a, b, c) do { \
+ DECL64(kt); \
+ OR64(kt, b, c); \
+ XOR64(d, a, kt); \
+ } while (0)
+
+#define KHI_XA(d, a, b, c) do { \
+ DECL64(kt); \
+ AND64(kt, b, c); \
+ XOR64(d, a, kt); \
+ } while (0)
+
+#define KHI(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \
+ b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \
+ b40, b41, b42, b43, b44) \
+ do { \
+ DECL64(c0); \
+ DECL64(c1); \
+ DECL64(c2); \
+ DECL64(c3); \
+ DECL64(c4); \
+ DECL64(bnn); \
+ NOT64(bnn, b20); \
+ KHI_XO(c0, b00, b10, b20); \
+ KHI_XO(c1, b10, bnn, b30); \
+ KHI_XA(c2, b20, b30, b40); \
+ KHI_XO(c3, b30, b40, b00); \
+ KHI_XA(c4, b40, b00, b10); \
+ MOV64(b00, c0); \
+ MOV64(b10, c1); \
+ MOV64(b20, c2); \
+ MOV64(b30, c3); \
+ MOV64(b40, c4); \
+ NOT64(bnn, b41); \
+ KHI_XO(c0, b01, b11, b21); \
+ KHI_XA(c1, b11, b21, b31); \
+ KHI_XO(c2, b21, b31, bnn); \
+ KHI_XO(c3, b31, b41, b01); \
+ KHI_XA(c4, b41, b01, b11); \
+ MOV64(b01, c0); \
+ MOV64(b11, c1); \
+ MOV64(b21, c2); \
+ MOV64(b31, c3); \
+ MOV64(b41, c4); \
+ NOT64(bnn, b32); \
+ KHI_XO(c0, b02, b12, b22); \
+ KHI_XA(c1, b12, b22, b32); \
+ KHI_XA(c2, b22, bnn, b42); \
+ KHI_XO(c3, bnn, b42, b02); \
+ KHI_XA(c4, b42, b02, b12); \
+ MOV64(b02, c0); \
+ MOV64(b12, c1); \
+ MOV64(b22, c2); \
+ MOV64(b32, c3); \
+ MOV64(b42, c4); \
+ NOT64(bnn, b33); \
+ KHI_XA(c0, b03, b13, b23); \
+ KHI_XO(c1, b13, b23, b33); \
+ KHI_XO(c2, b23, bnn, b43); \
+ KHI_XA(c3, bnn, b43, b03); \
+ KHI_XO(c4, b43, b03, b13); \
+ MOV64(b03, c0); \
+ MOV64(b13, c1); \
+ MOV64(b23, c2); \
+ MOV64(b33, c3); \
+ MOV64(b43, c4); \
+ NOT64(bnn, b14); \
+ KHI_XA(c0, b04, bnn, b24); \
+ KHI_XO(c1, bnn, b24, b34); \
+ KHI_XA(c2, b24, b34, b44); \
+ KHI_XO(c3, b34, b44, b04); \
+ KHI_XA(c4, b44, b04, b14); \
+ MOV64(b04, c0); \
+ MOV64(b14, c1); \
+ MOV64(b24, c2); \
+ MOV64(b34, c3); \
+ MOV64(b44, c4); \
+ } while (0)
+
+#define IOTA(r) XOR64_IOTA(a00, a00, r)
+
+#define P0 a00, a01, a02, a03, a04, a10, a11, a12, a13, a14, a20, a21, \
+ a22, a23, a24, a30, a31, a32, a33, a34, a40, a41, a42, a43, a44
+#define P1 a00, a30, a10, a40, a20, a11, a41, a21, a01, a31, a22, a02, \
+ a32, a12, a42, a33, a13, a43, a23, a03, a44, a24, a04, a34, a14
+#define P2 a00, a33, a11, a44, a22, a41, a24, a02, a30, a13, a32, a10, \
+ a43, a21, a04, a23, a01, a34, a12, a40, a14, a42, a20, a03, a31
+#define P3 a00, a23, a41, a14, a32, a24, a42, a10, a33, a01, a43, a11, \
+ a34, a02, a20, a12, a30, a03, a21, a44, a31, a04, a22, a40, a13
+#define P4 a00, a12, a24, a31, a43, a42, a04, a11, a23, a30, a34, a41, \
+ a03, a10, a22, a21, a33, a40, a02, a14, a13, a20, a32, a44, a01
+#define P5 a00, a21, a42, a13, a34, a04, a20, a41, a12, a33, a03, a24, \
+ a40, a11, a32, a02, a23, a44, a10, a31, a01, a22, a43, a14, a30
+#define P6 a00, a02, a04, a01, a03, a20, a22, a24, a21, a23, a40, a42, \
+ a44, a41, a43, a10, a12, a14, a11, a13, a30, a32, a34, a31, a33
+#define P7 a00, a10, a20, a30, a40, a22, a32, a42, a02, a12, a44, a04, \
+ a14, a24, a34, a11, a21, a31, a41, a01, a33, a43, a03, a13, a23
+#define P8 a00, a11, a22, a33, a44, a32, a43, a04, a10, a21, a14, a20, \
+ a31, a42, a03, a41, a02, a13, a24, a30, a23, a34, a40, a01, a12
+#define P9 a00, a41, a32, a23, a14, a43, a34, a20, a11, a02, a31, a22, \
+ a13, a04, a40, a24, a10, a01, a42, a33, a12, a03, a44, a30, a21
+#define P10 a00, a24, a43, a12, a31, a34, a03, a22, a41, a10, a13, a32, \
+ a01, a20, a44, a42, a11, a30, a04, a23, a21, a40, a14, a33, a02
+#define P11 a00, a42, a34, a21, a13, a03, a40, a32, a24, a11, a01, a43, \
+ a30, a22, a14, a04, a41, a33, a20, a12, a02, a44, a31, a23, a10
+#define P12 a00, a04, a03, a02, a01, a40, a44, a43, a42, a41, a30, a34, \
+ a33, a32, a31, a20, a24, a23, a22, a21, a10, a14, a13, a12, a11
+#define P13 a00, a20, a40, a10, a30, a44, a14, a34, a04, a24, a33, a03, \
+ a23, a43, a13, a22, a42, a12, a32, a02, a11, a31, a01, a21, a41
+#define P14 a00, a22, a44, a11, a33, a14, a31, a03, a20, a42, a23, a40, \
+ a12, a34, a01, a32, a04, a21, a43, a10, a41, a13, a30, a02, a24
+#define P15 a00, a32, a14, a41, a23, a31, a13, a40, a22, a04, a12, a44, \
+ a21, a03, a30, a43, a20, a02, a34, a11, a24, a01, a33, a10, a42
+#define P16 a00, a43, a31, a24, a12, a13, a01, a44, a32, a20, a21, a14, \
+ a02, a40, a33, a34, a22, a10, a03, a41, a42, a30, a23, a11, a04
+#define P17 a00, a34, a13, a42, a21, a01, a30, a14, a43, a22, a02, a31, \
+ a10, a44, a23, a03, a32, a11, a40, a24, a04, a33, a12, a41, a20
+#define P18 a00, a03, a01, a04, a02, a30, a33, a31, a34, a32, a10, a13, \
+ a11, a14, a12, a40, a43, a41, a44, a42, a20, a23, a21, a24, a22
+#define P19 a00, a40, a30, a20, a10, a33, a23, a13, a03, a43, a11, a01, \
+ a41, a31, a21, a44, a34, a24, a14, a04, a22, a12, a02, a42, a32
+#define P20 a00, a44, a33, a22, a11, a23, a12, a01, a40, a34, a41, a30, \
+ a24, a13, a02, a14, a03, a42, a31, a20, a32, a21, a10, a04, a43
+#define P21 a00, a14, a23, a32, a41, a12, a21, a30, a44, a03, a24, a33, \
+ a42, a01, a10, a31, a40, a04, a13, a22, a43, a02, a11, a20, a34
+#define P22 a00, a31, a12, a43, a24, a21, a02, a33, a14, a40, a42, a23, \
+ a04, a30, a11, a13, a44, a20, a01, a32, a34, a10, a41, a22, a03
+#define P23 a00, a13, a21, a34, a42, a02, a10, a23, a31, a44, a04, a12, \
+ a20, a33, a41, a01, a14, a22, a30, a43, a03, a11, a24, a32, a40
+
+#define P1_TO_P0 do { \
+ DECL64(t); \
+ MOV64(t, a01); \
+ MOV64(a01, a30); \
+ MOV64(a30, a33); \
+ MOV64(a33, a23); \
+ MOV64(a23, a12); \
+ MOV64(a12, a21); \
+ MOV64(a21, a02); \
+ MOV64(a02, a10); \
+ MOV64(a10, a11); \
+ MOV64(a11, a41); \
+ MOV64(a41, a24); \
+ MOV64(a24, a42); \
+ MOV64(a42, a04); \
+ MOV64(a04, a20); \
+ MOV64(a20, a22); \
+ MOV64(a22, a32); \
+ MOV64(a32, a43); \
+ MOV64(a43, a34); \
+ MOV64(a34, a03); \
+ MOV64(a03, a40); \
+ MOV64(a40, a44); \
+ MOV64(a44, a14); \
+ MOV64(a14, a31); \
+ MOV64(a31, a13); \
+ MOV64(a13, t); \
+ } while (0)
+
+#define P2_TO_P0 do { \
+ DECL64(t); \
+ MOV64(t, a01); \
+ MOV64(a01, a33); \
+ MOV64(a33, a12); \
+ MOV64(a12, a02); \
+ MOV64(a02, a11); \
+ MOV64(a11, a24); \
+ MOV64(a24, a04); \
+ MOV64(a04, a22); \
+ MOV64(a22, a43); \
+ MOV64(a43, a03); \
+ MOV64(a03, a44); \
+ MOV64(a44, a31); \
+ MOV64(a31, t); \
+ MOV64(t, a10); \
+ MOV64(a10, a41); \
+ MOV64(a41, a42); \
+ MOV64(a42, a20); \
+ MOV64(a20, a32); \
+ MOV64(a32, a34); \
+ MOV64(a34, a40); \
+ MOV64(a40, a14); \
+ MOV64(a14, a13); \
+ MOV64(a13, a30); \
+ MOV64(a30, a23); \
+ MOV64(a23, a21); \
+ MOV64(a21, t); \
+ } while (0)
+
+#define P4_TO_P0 do { \
+ DECL64(t); \
+ MOV64(t, a01); \
+ MOV64(a01, a12); \
+ MOV64(a12, a11); \
+ MOV64(a11, a04); \
+ MOV64(a04, a43); \
+ MOV64(a43, a44); \
+ MOV64(a44, t); \
+ MOV64(t, a02); \
+ MOV64(a02, a24); \
+ MOV64(a24, a22); \
+ MOV64(a22, a03); \
+ MOV64(a03, a31); \
+ MOV64(a31, a33); \
+ MOV64(a33, t); \
+ MOV64(t, a10); \
+ MOV64(a10, a42); \
+ MOV64(a42, a32); \
+ MOV64(a32, a40); \
+ MOV64(a40, a13); \
+ MOV64(a13, a23); \
+ MOV64(a23, t); \
+ MOV64(t, a14); \
+ MOV64(a14, a30); \
+ MOV64(a30, a21); \
+ MOV64(a21, a41); \
+ MOV64(a41, a20); \
+ MOV64(a20, a34); \
+ MOV64(a34, t); \
+ } while (0)
+
+#define P6_TO_P0 do { \
+ DECL64(t); \
+ MOV64(t, a01); \
+ MOV64(a01, a02); \
+ MOV64(a02, a04); \
+ MOV64(a04, a03); \
+ MOV64(a03, t); \
+ MOV64(t, a10); \
+ MOV64(a10, a20); \
+ MOV64(a20, a40); \
+ MOV64(a40, a30); \
+ MOV64(a30, t); \
+ MOV64(t, a11); \
+ MOV64(a11, a22); \
+ MOV64(a22, a44); \
+ MOV64(a44, a33); \
+ MOV64(a33, t); \
+ MOV64(t, a12); \
+ MOV64(a12, a24); \
+ MOV64(a24, a43); \
+ MOV64(a43, a31); \
+ MOV64(a31, t); \
+ MOV64(t, a13); \
+ MOV64(a13, a21); \
+ MOV64(a21, a42); \
+ MOV64(a42, a34); \
+ MOV64(a34, t); \
+ MOV64(t, a14); \
+ MOV64(a14, a23); \
+ MOV64(a23, a41); \
+ MOV64(a41, a32); \
+ MOV64(a32, t); \
+ } while (0)
+
+#define P8_TO_P0 do { \
+ DECL64(t); \
+ MOV64(t, a01); \
+ MOV64(a01, a11); \
+ MOV64(a11, a43); \
+ MOV64(a43, t); \
+ MOV64(t, a02); \
+ MOV64(a02, a22); \
+ MOV64(a22, a31); \
+ MOV64(a31, t); \
+ MOV64(t, a03); \
+ MOV64(a03, a33); \
+ MOV64(a33, a24); \
+ MOV64(a24, t); \
+ MOV64(t, a04); \
+ MOV64(a04, a44); \
+ MOV64(a44, a12); \
+ MOV64(a12, t); \
+ MOV64(t, a10); \
+ MOV64(a10, a32); \
+ MOV64(a32, a13); \
+ MOV64(a13, t); \
+ MOV64(t, a14); \
+ MOV64(a14, a21); \
+ MOV64(a21, a20); \
+ MOV64(a20, t); \
+ MOV64(t, a23); \
+ MOV64(a23, a42); \
+ MOV64(a42, a40); \
+ MOV64(a40, t); \
+ MOV64(t, a30); \
+ MOV64(a30, a41); \
+ MOV64(a41, a34); \
+ MOV64(a34, t); \
+ } while (0)
+
+#define P12_TO_P0 do { \
+ DECL64(t); \
+ MOV64(t, a01); \
+ MOV64(a01, a04); \
+ MOV64(a04, t); \
+ MOV64(t, a02); \
+ MOV64(a02, a03); \
+ MOV64(a03, t); \
+ MOV64(t, a10); \
+ MOV64(a10, a40); \
+ MOV64(a40, t); \
+ MOV64(t, a11); \
+ MOV64(a11, a44); \
+ MOV64(a44, t); \
+ MOV64(t, a12); \
+ MOV64(a12, a43); \
+ MOV64(a43, t); \
+ MOV64(t, a13); \
+ MOV64(a13, a42); \
+ MOV64(a42, t); \
+ MOV64(t, a14); \
+ MOV64(a14, a41); \
+ MOV64(a41, t); \
+ MOV64(t, a20); \
+ MOV64(a20, a30); \
+ MOV64(a30, t); \
+ MOV64(t, a21); \
+ MOV64(a21, a34); \
+ MOV64(a34, t); \
+ MOV64(t, a22); \
+ MOV64(a22, a33); \
+ MOV64(a33, t); \
+ MOV64(t, a23); \
+ MOV64(a23, a32); \
+ MOV64(a32, t); \
+ MOV64(t, a24); \
+ MOV64(a24, a31); \
+ MOV64(a31, t); \
+ } while (0)
+
+#define LPAR (
+#define RPAR )
+
+#define KF_ELT(r, s, k) do { \
+ THETA LPAR P ## r RPAR; \
+ RHO LPAR P ## r RPAR; \
+ KHI LPAR P ## s RPAR; \
+ IOTA(k); \
+ } while (0)
+
+#define DO(x) x
+
+#define KECCAK_F_1600 DO(KECCAK_F_1600_)
+
+#if BEE_KECCAK_UNROLL == 1
+
+#define KECCAK_F_1600_ do { \
+ int j; \
+ for (j = 0; j < 24; j ++) { \
+ KF_ELT( 0, 1, RC[j + 0]); \
+ P1_TO_P0; \
+ } \
+ } while (0)
+
+#elif BEE_KECCAK_UNROLL == 2
+
+#define KECCAK_F_1600_ do { \
+ int j; \
+ for (j = 0; j < 24; j += 2) { \
+ KF_ELT( 0, 1, RC[j + 0]); \
+ KF_ELT( 1, 2, RC[j + 1]); \
+ P2_TO_P0; \
+ } \
+ } while (0)
+
+#elif BEE_KECCAK_UNROLL == 4
+
+#define KECCAK_F_1600_ do { \
+ int j; \
+ for (j = 0; j < 24; j += 4) { \
+ KF_ELT( 0, 1, RC[j + 0]); \
+ KF_ELT( 1, 2, RC[j + 1]); \
+ KF_ELT( 2, 3, RC[j + 2]); \
+ KF_ELT( 3, 4, RC[j + 3]); \
+ P4_TO_P0; \
+ } \
+ } while (0)
+
+#elif BEE_KECCAK_UNROLL == 6
+
+#define KECCAK_F_1600_ do { \
+ int j; \
+ for (j = 0; j < 24; j += 6) { \
+ KF_ELT( 0, 1, RC[j + 0]); \
+ KF_ELT( 1, 2, RC[j + 1]); \
+ KF_ELT( 2, 3, RC[j + 2]); \
+ KF_ELT( 3, 4, RC[j + 3]); \
+ KF_ELT( 4, 5, RC[j + 4]); \
+ KF_ELT( 5, 6, RC[j + 5]); \
+ P6_TO_P0; \
+ } \
+ } while (0)
+
+#elif BEE_KECCAK_UNROLL == 8
+
+#define KECCAK_F_1600_ do { \
+ int j; \
+ for (j = 0; j < 24; j += 8) { \
+ KF_ELT( 0, 1, RC[j + 0]); \
+ KF_ELT( 1, 2, RC[j + 1]); \
+ KF_ELT( 2, 3, RC[j + 2]); \
+ KF_ELT( 3, 4, RC[j + 3]); \
+ KF_ELT( 4, 5, RC[j + 4]); \
+ KF_ELT( 5, 6, RC[j + 5]); \
+ KF_ELT( 6, 7, RC[j + 6]); \
+ KF_ELT( 7, 8, RC[j + 7]); \
+ P8_TO_P0; \
+ } \
+ } while (0)
+
+#elif BEE_KECCAK_UNROLL == 12
+
+#define KECCAK_F_1600_ do { \
+ int j; \
+ for (j = 0; j < 24; j += 12) { \
+ KF_ELT( 0, 1, RC[j + 0]); \
+ KF_ELT( 1, 2, RC[j + 1]); \
+ KF_ELT( 2, 3, RC[j + 2]); \
+ KF_ELT( 3, 4, RC[j + 3]); \
+ KF_ELT( 4, 5, RC[j + 4]); \
+ KF_ELT( 5, 6, RC[j + 5]); \
+ KF_ELT( 6, 7, RC[j + 6]); \
+ KF_ELT( 7, 8, RC[j + 7]); \
+ KF_ELT( 8, 9, RC[j + 8]); \
+ KF_ELT( 9, 10, RC[j + 9]); \
+ KF_ELT(10, 11, RC[j + 10]); \
+ KF_ELT(11, 12, RC[j + 11]); \
+ P12_TO_P0; \
+ } \
+ } while (0)
+
+#elif BEE_KECCAK_UNROLL == 0
+
+#define KECCAK_F_1600_ do { \
+ KF_ELT( 0, 1, RC[ 0]); \
+ KF_ELT( 1, 2, RC[ 1]); \
+ KF_ELT( 2, 3, RC[ 2]); \
+ KF_ELT( 3, 4, RC[ 3]); \
+ KF_ELT( 4, 5, RC[ 4]); \
+ KF_ELT( 5, 6, RC[ 5]); \
+ KF_ELT( 6, 7, RC[ 6]); \
+ KF_ELT( 7, 8, RC[ 7]); \
+ KF_ELT( 8, 9, RC[ 8]); \
+ KF_ELT( 9, 10, RC[ 9]); \
+ KF_ELT(10, 11, RC[10]); \
+ KF_ELT(11, 12, RC[11]); \
+ KF_ELT(12, 13, RC[12]); \
+ KF_ELT(13, 14, RC[13]); \
+ KF_ELT(14, 15, RC[14]); \
+ KF_ELT(15, 16, RC[15]); \
+ KF_ELT(16, 17, RC[16]); \
+ KF_ELT(17, 18, RC[17]); \
+ KF_ELT(18, 19, RC[18]); \
+ KF_ELT(19, 20, RC[19]); \
+ KF_ELT(20, 21, RC[20]); \
+ KF_ELT(21, 22, RC[21]); \
+ KF_ELT(22, 23, RC[22]); \
+ KF_ELT(23, 0, RC[23]); \
+ } while (0)
+
+#else
+
+#error Unimplemented unroll count for one.
+
+#endif
+
+static void one_init(facet_one_context *kc, unsigned out_size)
+{
+ int i;
+
+#if BEE_KECCAK_64
+ for (i = 0; i < 25; i ++)
+ kc->u.wide[i] = 0;
+ /*
+ * Initialization for the "lane complement".
+ */
+ kc->u.wide[ 1] = BEE_C64(0xFFFFFFFFFFFFFFFF);
+ kc->u.wide[ 2] = BEE_C64(0xFFFFFFFFFFFFFFFF);
+ kc->u.wide[ 8] = BEE_C64(0xFFFFFFFFFFFFFFFF);
+ kc->u.wide[12] = BEE_C64(0xFFFFFFFFFFFFFFFF);
+ kc->u.wide[17] = BEE_C64(0xFFFFFFFFFFFFFFFF);
+ kc->u.wide[20] = BEE_C64(0xFFFFFFFFFFFFFFFF);
+#else
+
+ for (i = 0; i < 50; i ++)
+ kc->u.narrow[i] = 0;
+ /*
+ * Initialization for the "lane complement".
+ * Note: since we set to all-one full 64-bit words,
+ * interleaving (if applicable) is a no-op.
+ */
+ kc->u.narrow[ 2] = BEE_C32(0xFFFFFFFF);
+ kc->u.narrow[ 3] = BEE_C32(0xFFFFFFFF);
+ kc->u.narrow[ 4] = BEE_C32(0xFFFFFFFF);
+ kc->u.narrow[ 5] = BEE_C32(0xFFFFFFFF);
+ kc->u.narrow[16] = BEE_C32(0xFFFFFFFF);
+ kc->u.narrow[17] = BEE_C32(0xFFFFFFFF);
+ kc->u.narrow[24] = BEE_C32(0xFFFFFFFF);
+ kc->u.narrow[25] = BEE_C32(0xFFFFFFFF);
+ kc->u.narrow[34] = BEE_C32(0xFFFFFFFF);
+ kc->u.narrow[35] = BEE_C32(0xFFFFFFFF);
+ kc->u.narrow[40] = BEE_C32(0xFFFFFFFF);
+ kc->u.narrow[41] = BEE_C32(0xFFFFFFFF);
+#endif
+ kc->ptr = 0;
+ kc->lim = 200 - (out_size >> 2);
+}
+
+static void one_core(facet_one_context *kc, const void *data, size_t len, size_t lim)
+{
+ unsigned char *buf;
+ size_t ptr;
+ DECL_STATE
+
+ buf = kc->buf;
+ ptr = kc->ptr;
+
+ if( len < (lim - ptr) )
+ {
+ memcpy( buf + ptr, data, len );
+ kc->ptr = ptr + len;
+ return;
+ }
+
+ READ_STATE(kc);
+ while (len > 0) {
+ size_t clen;
+
+ clen = (lim - ptr);
+ if (clen > len)
+ clen = len;
+ memcpy(buf + ptr, data, clen);
+ ptr += clen;
+ data = (const unsigned char *)data + clen;
+ len -= clen;
+ if (ptr == lim) {
+ INPUT_BUF(lim);
+ KECCAK_F_1600;
+ ptr = 0;
+ }
+ }
+ WRITE_STATE(kc);
+ kc->ptr = ptr;
+}
+
+#if BEE_KECCAK_64
+
+#define DEFCLOSE(d, lim) \
+ static void one_close ## d( \
+ facet_one_context *kc, unsigned ub, unsigned n, void *dst) \
+ { \
+ unsigned eb; \
+ union { \
+ unsigned char tmp[lim + 1]; \
+ bee_u64 dummy; /* for alignment */ \
+ } u; \
+ size_t j; \
+ \
+ eb = (0x100 | (ub & 0xFF)) >> (8 - n); \
+ if (kc->ptr == (lim - 1)) { \
+ if (n == 7) { \
+ u.tmp[0] = eb; \
+ memset(u.tmp + 1, 0, lim - 1); \
+ u.tmp[lim] = 0x80; \
+ j = 1 + lim; \
+ } else { \
+ u.tmp[0] = eb | 0x80; \
+ j = 1; \
+ } \
+ } else { \
+ j = lim - kc->ptr; \
+ u.tmp[0] = eb; \
+ memset(u.tmp + 1, 0, j - 2); \
+ u.tmp[j - 1] = 0x80; \
+ } \
+ one_core(kc, u.tmp, j, lim); \
+ /* Finalize the "lane complement" */ \
+ kc->u.wide[ 1] = ~kc->u.wide[ 1]; \
+ kc->u.wide[ 2] = ~kc->u.wide[ 2]; \
+ kc->u.wide[ 8] = ~kc->u.wide[ 8]; \
+ kc->u.wide[12] = ~kc->u.wide[12]; \
+ kc->u.wide[17] = ~kc->u.wide[17]; \
+ kc->u.wide[20] = ~kc->u.wide[20]; \
+ for (j = 0; j < d; j += 8) \
+ bee_enc64le_aligned(u.tmp + j, kc->u.wide[j >> 3]); \
+ memcpy(dst, u.tmp, d); \
+ one_init(kc, (unsigned)d << 3); \
+ } \
+
+#else
+
+#define DEFCLOSE(d, lim) \
+ static void one_close ## d( \
+ facet_one_context *kc, unsigned ub, unsigned n, void *dst) \
+ { \
+ unsigned eb; \
+ union { \
+ unsigned char tmp[lim + 1]; \
+ bee_u64 dummy; /* for alignment */ \
+ } u; \
+ size_t j; \
+ \
+ eb = (0x100 | (ub & 0xFF)) >> (8 - n); \
+ if (kc->ptr == (lim - 1)) { \
+ if (n == 7) { \
+ u.tmp[0] = eb; \
+ memset(u.tmp + 1, 0, lim - 1); \
+ u.tmp[lim] = 0x80; \
+ j = 1 + lim; \
+ } else { \
+ u.tmp[0] = eb | 0x80; \
+ j = 1; \
+ } \
+ } else { \
+ j = lim - kc->ptr; \
+ u.tmp[0] = eb; \
+ memset(u.tmp + 1, 0, j - 2); \
+ u.tmp[j - 1] = 0x80; \
+ } \
+ one_core(kc, u.tmp, j, lim); \
+ /* Finalize the "lane complement" */ \
+ kc->u.narrow[ 2] = ~kc->u.narrow[ 2]; \
+ kc->u.narrow[ 3] = ~kc->u.narrow[ 3]; \
+ kc->u.narrow[ 4] = ~kc->u.narrow[ 4]; \
+ kc->u.narrow[ 5] = ~kc->u.narrow[ 5]; \
+ kc->u.narrow[16] = ~kc->u.narrow[16]; \
+ kc->u.narrow[17] = ~kc->u.narrow[17]; \
+ kc->u.narrow[24] = ~kc->u.narrow[24]; \
+ kc->u.narrow[25] = ~kc->u.narrow[25]; \
+ kc->u.narrow[34] = ~kc->u.narrow[34]; \
+ kc->u.narrow[35] = ~kc->u.narrow[35]; \
+ kc->u.narrow[40] = ~kc->u.narrow[40]; \
+ kc->u.narrow[41] = ~kc->u.narrow[41]; \
+ /* un-interleave */ \
+ for (j = 0; j < 50; j += 2) \
+ UNINTERLEAVE(kc->u.narrow[j], kc->u.narrow[j + 1]); \
+ for (j = 0; j < d; j += 4) \
+ bee_enc32le_aligned(u.tmp + j, kc->u.narrow[j >> 2]); \
+ memcpy(dst, u.tmp, d); \
+ one_init(kc, (unsigned)d << 3); \
+ } \
+
+#endif
+
+DEFCLOSE(64, 72)
+
+
+/* see facet_one.h */
+void facet_one_init(void *cc)
+{
+ one_init(cc, 512);
+}
+
+/* see facet_one.h */
+void facet_one(void *cc, const void *data, size_t len)
+{
+ one_core(cc, data, len, 72);
+}
+
+/* see facet_one.h */
+void facet_one_close(void *cc, void *dst)
+{
+ facet_one_addbits_and_close(cc, 0, 0, dst);
+}
+
+/* see facet_one.h */
+void facet_one_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+ one_close64(cc, ub, n, dst);
+}
+
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/stratum/algos/honeycomb/facet_one.h b/stratum/algos/honeycomb/facet_one.h
new file mode 100644
index 000000000..f64db1e56
--- /dev/null
+++ b/stratum/algos/honeycomb/facet_one.h
@@ -0,0 +1,81 @@
+#ifndef FACET_ONE_H
+#define FACET_ONE_H
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#include
+#include "honeycomb_types.h"
+
+
+//#undef BEE_64 //
+
+/**
+ * This structure is a context for HoneyComb Facet #1 computations: it contains the
+ * intermediate values and some data from the last entered block. Once a
+ * HoneyComb Facet #1 computation has been performed, the context can be reused for another computation.
+ *
+ * The contents of this structure are private. A running HoneyComb Facet #1 computation
+ * can be cloned by copying the context (e.g. with a simple memcpy() ).
+ */
+typedef struct {
+ unsigned char buf[144]; /* first field, for alignment */
+ size_t ptr, lim;
+ union
+ {
+#if BEE_64 //FACET_LEN_64
+ bee_u64 wide[25];
+#endif
+ bee_u32 narrow[50];
+ } u;
+
+} facet_one_context;
+
+/**
+ * Initialize a HoneyComb Facet #1 context. This process performs no memory allocation.
+ *
+ * @param cc the HoneyComb Facet #1 context ( pointer to a facet_one_context )
+ */
+void facet_one_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that len is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc the HoneyComb Facet #1 context
+ * @param data the input data
+ * @param len the input data length (in bytes)
+ */
+void facet_one(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current HoneyComb Facet #1 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically reinitialized.
+ *
+ * @param cc the HoneyComb Facet #1 context
+ * @param dst the destination buffer
+ */
+void facet_one_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (64 bytes). If bit number i
+ * in ub has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc the HoneyComb Facet #1 context
+ * @param ub the extra bits
+ * @param n the number of extra bits (0 to 7)
+ * @param dst the destination buffer
+ */
+void facet_one_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/stratum/algos/honeycomb/facet_six.c b/stratum/algos/honeycomb/facet_six.c
new file mode 100644
index 000000000..edb905217
--- /dev/null
+++ b/stratum/algos/honeycomb/facet_six.c
@@ -0,0 +1,632 @@
+#include
+#include
+#include
+
+#include "facet_six.h"
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+
+/*
+ * Some measures tend to show that the 64-bit implementation offers
+ * better performance only on a "64-bit architectures", those which have actual 64-bit registers.
+ */
+#if !defined BEE_ECHO_64 && BEE_64_TRUE
+ #define BEE_ECHO_64 1
+#endif
+
+/*
+ * We can use a 64-bit implementation only if a 64-bit type is available.
+ */
+#if !BEE_64
+ #undef BEE_ECHO_64
+#endif
+
+#ifdef _MSC_VER
+#pragma warning (disable: 4146)
+#endif
+
+#define T32 BEE_T32
+#define C32 BEE_C32
+#if BEE_64
+#define C64 BEE_C64
+#endif
+
+#define AES_BIG_ENDIAN 0
+#include "facets_helper.c"
+
+#if BEE_ECHO_64
+
+#define DECL_STATE_SMALL \
+ bee_u64 W[16][2];
+
+#define DECL_STATE_BIG \
+ bee_u64 W[16][2];
+
+#define INPUT_BLOCK_SMALL(sc) do { \
+ unsigned u; \
+ memcpy(W, sc->u.Vb, 8 * sizeof(bee_u64)); \
+ for (u = 0; u < 12; u ++) { \
+ W[u + 4][0] = bee_dec64le_aligned( \
+ sc->buf + 16 * u); \
+ W[u + 4][1] = bee_dec64le_aligned( \
+ sc->buf + 16 * u + 8); \
+ } \
+ } while (0)
+
+#define INPUT_BLOCK_BIG(sc) do { \
+ unsigned u; \
+ memcpy(W, sc->u.Vb, 16 * sizeof(bee_u64)); \
+ for (u = 0; u < 8; u ++) { \
+ W[u + 8][0] = bee_dec64le_aligned( \
+ sc->buf + 16 * u); \
+ W[u + 8][1] = bee_dec64le_aligned( \
+ sc->buf + 16 * u + 8); \
+ } \
+ } while (0)
+
+
+#define AES_2ROUNDS(X) do { \
+ bee_u32 X0 = (bee_u32)(X[0]); \
+ bee_u32 X1 = (bee_u32)(X[0] >> 32); \
+ bee_u32 X2 = (bee_u32)(X[1]); \
+ bee_u32 X3 = (bee_u32)(X[1] >> 32); \
+ bee_u32 Y0, Y1, Y2, Y3; \
+ AES_ROUND_LE(X0, X1, X2, X3, K0, K1, K2, K3, Y0, Y1, Y2, Y3); \
+ AES_ROUND_NOKEY_LE(Y0, Y1, Y2, Y3, X0, X1, X2, X3); \
+ X[0] = (bee_u64)X0 | ((bee_u64)X1 << 32); \
+ X[1] = (bee_u64)X2 | ((bee_u64)X3 << 32); \
+ if ((K0 = T32(K0 + 1)) == 0) { \
+ if ((K1 = T32(K1 + 1)) == 0) \
+ if ((K2 = T32(K2 + 1)) == 0) \
+ K3 = T32(K3 + 1); \
+ } \
+ } while (0)
+
+#define BIG_SUB_WORDS do { \
+ AES_2ROUNDS(W[ 0]); \
+ AES_2ROUNDS(W[ 1]); \
+ AES_2ROUNDS(W[ 2]); \
+ AES_2ROUNDS(W[ 3]); \
+ AES_2ROUNDS(W[ 4]); \
+ AES_2ROUNDS(W[ 5]); \
+ AES_2ROUNDS(W[ 6]); \
+ AES_2ROUNDS(W[ 7]); \
+ AES_2ROUNDS(W[ 8]); \
+ AES_2ROUNDS(W[ 9]); \
+ AES_2ROUNDS(W[10]); \
+ AES_2ROUNDS(W[11]); \
+ AES_2ROUNDS(W[12]); \
+ AES_2ROUNDS(W[13]); \
+ AES_2ROUNDS(W[14]); \
+ AES_2ROUNDS(W[15]); \
+ } while (0)
+
+
+#define SHIFT_ROW1(a, b, c, d) do { \
+ bee_u64 tmp; \
+ tmp = W[a][0]; \
+ W[a][0] = W[b][0]; \
+ W[b][0] = W[c][0]; \
+ W[c][0] = W[d][0]; \
+ W[d][0] = tmp; \
+ tmp = W[a][1]; \
+ W[a][1] = W[b][1]; \
+ W[b][1] = W[c][1]; \
+ W[c][1] = W[d][1]; \
+ W[d][1] = tmp; \
+ } while (0)
+
+#define SHIFT_ROW2(a, b, c, d) do { \
+ bee_u64 tmp; \
+ tmp = W[a][0]; \
+ W[a][0] = W[c][0]; \
+ W[c][0] = tmp; \
+ tmp = W[b][0]; \
+ W[b][0] = W[d][0]; \
+ W[d][0] = tmp; \
+ tmp = W[a][1]; \
+ W[a][1] = W[c][1]; \
+ W[c][1] = tmp; \
+ tmp = W[b][1]; \
+ W[b][1] = W[d][1]; \
+ W[d][1] = tmp; \
+ } while (0)
+
+#define SHIFT_ROW3(a, b, c, d) SHIFT_ROW1(d, c, b, a)
+
+#define BIG_SHIFT_ROWS do { \
+ SHIFT_ROW1(1, 5, 9, 13); \
+ SHIFT_ROW2(2, 6, 10, 14); \
+ SHIFT_ROW3(3, 7, 11, 15); \
+ } while (0)
+
+
+#define MIX_COLUMN1(ia, ib, ic, id, n) do { \
+ bee_u64 a = W[ia][n]; \
+ bee_u64 b = W[ib][n]; \
+ bee_u64 c = W[ic][n]; \
+ bee_u64 d = W[id][n]; \
+ bee_u64 ab = a ^ b; \
+ bee_u64 bc = b ^ c; \
+ bee_u64 cd = c ^ d; \
+ bee_u64 abx = ((ab & C64(0x8080808080808080)) >> 7) * 27U \
+ ^ ((ab & C64(0x7F7F7F7F7F7F7F7F)) << 1); \
+ bee_u64 bcx = ((bc & C64(0x8080808080808080)) >> 7) * 27U \
+ ^ ((bc & C64(0x7F7F7F7F7F7F7F7F)) << 1); \
+ bee_u64 cdx = ((cd & C64(0x8080808080808080)) >> 7) * 27U \
+ ^ ((cd & C64(0x7F7F7F7F7F7F7F7F)) << 1); \
+ W[ia][n] = abx ^ bc ^ d; \
+ W[ib][n] = bcx ^ a ^ cd; \
+ W[ic][n] = cdx ^ ab ^ d; \
+ W[id][n] = abx ^ bcx ^ cdx ^ ab ^ c; \
+ } while (0)
+
+#define MIX_COLUMN(a, b, c, d) do { \
+ MIX_COLUMN1(a, b, c, d, 0); \
+ MIX_COLUMN1(a, b, c, d, 1); \
+ } while (0)
+
+
+#define BIG_MIX_COLUMNS do { \
+ MIX_COLUMN(0, 1, 2, 3); \
+ MIX_COLUMN(4, 5, 6, 7); \
+ MIX_COLUMN(8, 9, 10, 11); \
+ MIX_COLUMN(12, 13, 14, 15); \
+ } while (0)
+
+#define BIG_ROUND do { \
+ BIG_SUB_WORDS; \
+ BIG_SHIFT_ROWS; \
+ BIG_MIX_COLUMNS; \
+ } while (0)
+
+#define FINAL_SMALL do { \
+ unsigned u; \
+ bee_u64 *VV = &sc->u.Vb[0][0]; \
+ bee_u64 *WW = &W[0][0]; \
+ for (u = 0; u < 8; u ++) { \
+ VV[u] ^= bee_dec64le_aligned(sc->buf + (u * 8)) \
+ ^ bee_dec64le_aligned(sc->buf + (u * 8) + 64) \
+ ^ bee_dec64le_aligned(sc->buf + (u * 8) + 128) \
+ ^ WW[u] ^ WW[u + 8] \
+ ^ WW[u + 16] ^ WW[u + 24]; \
+ } \
+ } while (0)
+
+#define FINAL_BIG do { \
+ unsigned u; \
+ bee_u64 *VV = &sc->u.Vb[0][0]; \
+ bee_u64 *WW = &W[0][0]; \
+ for (u = 0; u < 16; u ++) { \
+ VV[u] ^= bee_dec64le_aligned(sc->buf + (u * 8)) \
+ ^ WW[u] ^ WW[u + 16]; \
+ } \
+ } while (0)
+
+#define COMPRESS_SMALL(sc) do { \
+ bee_u32 K0 = sc->C0; \
+ bee_u32 K1 = sc->C1; \
+ bee_u32 K2 = sc->C2; \
+ bee_u32 K3 = sc->C3; \
+ unsigned u; \
+ INPUT_BLOCK_SMALL(sc); \
+ for (u = 0; u < 8; u ++) { \
+ BIG_ROUND; \
+ } \
+ FINAL_SMALL; \
+ } while (0)
+
+#define COMPRESS_BIG(sc) do { \
+ bee_u32 K0 = sc->C0; \
+ bee_u32 K1 = sc->C1; \
+ bee_u32 K2 = sc->C2; \
+ bee_u32 K3 = sc->C3; \
+ unsigned u; \
+ INPUT_BLOCK_BIG(sc); \
+ for (u = 0; u < 10; u ++) { \
+ BIG_ROUND; \
+ } \
+ FINAL_BIG; \
+ } while (0)
+
+#else
+
+#define DECL_STATE_SMALL \
+ bee_u32 W[16][4];
+
+#define DECL_STATE_BIG \
+ bee_u32 W[16][4];
+
+#define INPUT_BLOCK_SMALL(sc) do { \
+ unsigned u; \
+ memcpy(W, sc->u.Vs, 16 * sizeof(bee_u32)); \
+ for (u = 0; u < 12; u ++) { \
+ W[u + 4][0] = bee_dec32le_aligned( \
+ sc->buf + 16 * u); \
+ W[u + 4][1] = bee_dec32le_aligned( \
+ sc->buf + 16 * u + 4); \
+ W[u + 4][2] = bee_dec32le_aligned( \
+ sc->buf + 16 * u + 8); \
+ W[u + 4][3] = bee_dec32le_aligned( \
+ sc->buf + 16 * u + 12); \
+ } \
+ } while (0)
+
+#define INPUT_BLOCK_BIG(sc) do { \
+ unsigned u; \
+ memcpy(W, sc->u.Vs, 32 * sizeof(bee_u32)); \
+ for (u = 0; u < 8; u ++) { \
+ W[u + 8][0] = bee_dec32le_aligned( \
+ sc->buf + 16 * u); \
+ W[u + 8][1] = bee_dec32le_aligned( \
+ sc->buf + 16 * u + 4); \
+ W[u + 8][2] = bee_dec32le_aligned( \
+ sc->buf + 16 * u + 8); \
+ W[u + 8][3] = bee_dec32le_aligned( \
+ sc->buf + 16 * u + 12); \
+ } \
+ } while (0)
+
+
+#define AES_2ROUNDS(X) do { \
+ bee_u32 Y0, Y1, Y2, Y3; \
+ AES_ROUND_LE(X[0], X[1], X[2], X[3], \
+ K0, K1, K2, K3, Y0, Y1, Y2, Y3); \
+ AES_ROUND_NOKEY_LE(Y0, Y1, Y2, Y3, X[0], X[1], X[2], X[3]); \
+ if ((K0 = T32(K0 + 1)) == 0) { \
+ if ((K1 = T32(K1 + 1)) == 0) \
+ if ((K2 = T32(K2 + 1)) == 0) \
+ K3 = T32(K3 + 1); \
+ } \
+ } while (0)
+
+#define BIG_SUB_WORDS do { \
+ AES_2ROUNDS(W[ 0]); \
+ AES_2ROUNDS(W[ 1]); \
+ AES_2ROUNDS(W[ 2]); \
+ AES_2ROUNDS(W[ 3]); \
+ AES_2ROUNDS(W[ 4]); \
+ AES_2ROUNDS(W[ 5]); \
+ AES_2ROUNDS(W[ 6]); \
+ AES_2ROUNDS(W[ 7]); \
+ AES_2ROUNDS(W[ 8]); \
+ AES_2ROUNDS(W[ 9]); \
+ AES_2ROUNDS(W[10]); \
+ AES_2ROUNDS(W[11]); \
+ AES_2ROUNDS(W[12]); \
+ AES_2ROUNDS(W[13]); \
+ AES_2ROUNDS(W[14]); \
+ AES_2ROUNDS(W[15]); \
+ } while (0)
+
+
+#define SHIFT_ROW1(a, b, c, d) do { \
+ bee_u32 tmp; \
+ tmp = W[a][0]; \
+ W[a][0] = W[b][0]; \
+ W[b][0] = W[c][0]; \
+ W[c][0] = W[d][0]; \
+ W[d][0] = tmp; \
+ tmp = W[a][1]; \
+ W[a][1] = W[b][1]; \
+ W[b][1] = W[c][1]; \
+ W[c][1] = W[d][1]; \
+ W[d][1] = tmp; \
+ tmp = W[a][2]; \
+ W[a][2] = W[b][2]; \
+ W[b][2] = W[c][2]; \
+ W[c][2] = W[d][2]; \
+ W[d][2] = tmp; \
+ tmp = W[a][3]; \
+ W[a][3] = W[b][3]; \
+ W[b][3] = W[c][3]; \
+ W[c][3] = W[d][3]; \
+ W[d][3] = tmp; \
+ } while (0)
+
+#define SHIFT_ROW2(a, b, c, d) do { \
+ bee_u32 tmp; \
+ tmp = W[a][0]; \
+ W[a][0] = W[c][0]; \
+ W[c][0] = tmp; \
+ tmp = W[b][0]; \
+ W[b][0] = W[d][0]; \
+ W[d][0] = tmp; \
+ tmp = W[a][1]; \
+ W[a][1] = W[c][1]; \
+ W[c][1] = tmp; \
+ tmp = W[b][1]; \
+ W[b][1] = W[d][1]; \
+ W[d][1] = tmp; \
+ tmp = W[a][2]; \
+ W[a][2] = W[c][2]; \
+ W[c][2] = tmp; \
+ tmp = W[b][2]; \
+ W[b][2] = W[d][2]; \
+ W[d][2] = tmp; \
+ tmp = W[a][3]; \
+ W[a][3] = W[c][3]; \
+ W[c][3] = tmp; \
+ tmp = W[b][3]; \
+ W[b][3] = W[d][3]; \
+ W[d][3] = tmp; \
+ } while (0)
+
+#define SHIFT_ROW3(a, b, c, d) SHIFT_ROW1(d, c, b, a)
+
+#define BIG_SHIFT_ROWS do { \
+ SHIFT_ROW1(1, 5, 9, 13); \
+ SHIFT_ROW2(2, 6, 10, 14); \
+ SHIFT_ROW3(3, 7, 11, 15); \
+ } while (0)
+
+
+#define MIX_COLUMN1(ia, ib, ic, id, n) do { \
+ bee_u32 a = W[ia][n]; \
+ bee_u32 b = W[ib][n]; \
+ bee_u32 c = W[ic][n]; \
+ bee_u32 d = W[id][n]; \
+ bee_u32 ab = a ^ b; \
+ bee_u32 bc = b ^ c; \
+ bee_u32 cd = c ^ d; \
+ bee_u32 abx = ((ab & C32(0x80808080)) >> 7) * 27U \
+ ^ ((ab & C32(0x7F7F7F7F)) << 1); \
+ bee_u32 bcx = ((bc & C32(0x80808080)) >> 7) * 27U \
+ ^ ((bc & C32(0x7F7F7F7F)) << 1); \
+ bee_u32 cdx = ((cd & C32(0x80808080)) >> 7) * 27U \
+ ^ ((cd & C32(0x7F7F7F7F)) << 1); \
+ W[ia][n] = abx ^ bc ^ d; \
+ W[ib][n] = bcx ^ a ^ cd; \
+ W[ic][n] = cdx ^ ab ^ d; \
+ W[id][n] = abx ^ bcx ^ cdx ^ ab ^ c; \
+ } while (0)
+
+#define MIX_COLUMN(a, b, c, d) do { \
+ MIX_COLUMN1(a, b, c, d, 0); \
+ MIX_COLUMN1(a, b, c, d, 1); \
+ MIX_COLUMN1(a, b, c, d, 2); \
+ MIX_COLUMN1(a, b, c, d, 3); \
+ } while (0)
+
+#define BIG_MIX_COLUMNS do { \
+ MIX_COLUMN(0, 1, 2, 3); \
+ MIX_COLUMN(4, 5, 6, 7); \
+ MIX_COLUMN(8, 9, 10, 11); \
+ MIX_COLUMN(12, 13, 14, 15); \
+ } while (0)
+
+#define BIG_ROUND do { \
+ BIG_SUB_WORDS; \
+ BIG_SHIFT_ROWS; \
+ BIG_MIX_COLUMNS; \
+ } while (0)
+
+#define FINAL_SMALL do { \
+ unsigned u; \
+ bee_u32 *VV = &sc->u.Vs[0][0]; \
+ bee_u32 *WW = &W[0][0]; \
+ for (u = 0; u < 16; u ++) { \
+ VV[u] ^= bee_dec32le_aligned(sc->buf + (u * 4)) \
+ ^ bee_dec32le_aligned(sc->buf + (u * 4) + 64) \
+ ^ bee_dec32le_aligned(sc->buf + (u * 4) + 128) \
+ ^ WW[u] ^ WW[u + 16] \
+ ^ WW[u + 32] ^ WW[u + 48]; \
+ } \
+ } while (0)
+
+#define FINAL_BIG do { \
+ unsigned u; \
+ bee_u32 *VV = &sc->u.Vs[0][0]; \
+ bee_u32 *WW = &W[0][0]; \
+ for (u = 0; u < 32; u ++) { \
+ VV[u] ^= bee_dec32le_aligned(sc->buf + (u * 4)) \
+ ^ WW[u] ^ WW[u + 32]; \
+ } \
+ } while (0)
+
+#define COMPRESS_SMALL(sc) do { \
+ bee_u32 K0 = sc->C0; \
+ bee_u32 K1 = sc->C1; \
+ bee_u32 K2 = sc->C2; \
+ bee_u32 K3 = sc->C3; \
+ unsigned u; \
+ INPUT_BLOCK_SMALL(sc); \
+ for (u = 0; u < 8; u ++) { \
+ BIG_ROUND; \
+ } \
+ FINAL_SMALL; \
+ } while (0)
+
+#define COMPRESS_BIG(sc) do { \
+ bee_u32 K0 = sc->C0; \
+ bee_u32 K1 = sc->C1; \
+ bee_u32 K2 = sc->C2; \
+ bee_u32 K3 = sc->C3; \
+ unsigned u; \
+ INPUT_BLOCK_BIG(sc); \
+ for (u = 0; u < 10; u ++) { \
+ BIG_ROUND; \
+ } \
+ FINAL_BIG; \
+ } while (0)
+
+#endif
+
+#define INCR_COUNTER(sc, val) do { \
+ sc->C0 = T32(sc->C0 + (bee_u32)(val)); \
+ if (sc->C0 < (bee_u32)(val)) { \
+ if ((sc->C1 = T32(sc->C1 + 1)) == 0) \
+ if ((sc->C2 = T32(sc->C2 + 1)) == 0) \
+ sc->C3 = T32(sc->C3 + 1); \
+ } \
+ } while (0)
+
+static void six_init(facet_six_context *sc, unsigned out_len)
+{
+#if BEE_ECHO_64
+ sc->u.Vb[0][0] = (bee_u64)out_len;
+ sc->u.Vb[0][1] = 0;
+ sc->u.Vb[1][0] = (bee_u64)out_len;
+ sc->u.Vb[1][1] = 0;
+ sc->u.Vb[2][0] = (bee_u64)out_len;
+ sc->u.Vb[2][1] = 0;
+ sc->u.Vb[3][0] = (bee_u64)out_len;
+ sc->u.Vb[3][1] = 0;
+ sc->u.Vb[4][0] = (bee_u64)out_len;
+ sc->u.Vb[4][1] = 0;
+ sc->u.Vb[5][0] = (bee_u64)out_len;
+ sc->u.Vb[5][1] = 0;
+ sc->u.Vb[6][0] = (bee_u64)out_len;
+ sc->u.Vb[6][1] = 0;
+ sc->u.Vb[7][0] = (bee_u64)out_len;
+ sc->u.Vb[7][1] = 0;
+#else
+ sc->u.Vs[0][0] = (bee_u32)out_len;
+ sc->u.Vs[0][1] = sc->u.Vs[0][2] = sc->u.Vs[0][3] = 0;
+ sc->u.Vs[1][0] = (bee_u32)out_len;
+ sc->u.Vs[1][1] = sc->u.Vs[1][2] = sc->u.Vs[1][3] = 0;
+ sc->u.Vs[2][0] = (bee_u32)out_len;
+ sc->u.Vs[2][1] = sc->u.Vs[2][2] = sc->u.Vs[2][3] = 0;
+ sc->u.Vs[3][0] = (bee_u32)out_len;
+ sc->u.Vs[3][1] = sc->u.Vs[3][2] = sc->u.Vs[3][3] = 0;
+ sc->u.Vs[4][0] = (bee_u32)out_len;
+ sc->u.Vs[4][1] = sc->u.Vs[4][2] = sc->u.Vs[4][3] = 0;
+ sc->u.Vs[5][0] = (bee_u32)out_len;
+ sc->u.Vs[5][1] = sc->u.Vs[5][2] = sc->u.Vs[5][3] = 0;
+ sc->u.Vs[6][0] = (bee_u32)out_len;
+ sc->u.Vs[6][1] = sc->u.Vs[6][2] = sc->u.Vs[6][3] = 0;
+ sc->u.Vs[7][0] = (bee_u32)out_len;
+ sc->u.Vs[7][1] = sc->u.Vs[7][2] = sc->u.Vs[7][3] = 0;
+#endif
+ sc->ptr = 0;
+ sc->C0 = sc->C1 = sc->C2 = sc->C3 = 0;
+}
+
+static void six_compress(facet_six_context *sc)
+{
+ DECL_STATE_BIG
+
+ COMPRESS_BIG(sc);
+}
+
+static void six_core(facet_six_context *sc, const unsigned char *data, size_t len)
+{
+ unsigned char *buf;
+ size_t ptr;
+
+ buf = sc->buf;
+ ptr = sc->ptr;
+ if (len < (sizeof sc->buf) - ptr) {
+ memcpy(buf + ptr, data, len);
+ ptr += len;
+ sc->ptr = ptr;
+ return;
+ }
+
+ while (len > 0) {
+ size_t clen;
+
+ clen = (sizeof sc->buf) - ptr;
+ if (clen > len)
+ clen = len;
+ memcpy(buf + ptr, data, clen);
+ ptr += clen;
+ data += clen;
+ len -= clen;
+ if (ptr == sizeof sc->buf) {
+ INCR_COUNTER(sc, 1024);
+ six_compress(sc);
+ ptr = 0;
+ }
+ }
+ sc->ptr = ptr;
+}
+
+static void six_close(facet_six_context *sc, unsigned ub, unsigned n, void *dst, unsigned out_size_w32)
+{
+ unsigned char *buf;
+ size_t ptr;
+ unsigned z;
+ unsigned elen;
+ union {
+ unsigned char tmp[64];
+ bee_u32 dummy;
+#if BEE_ECHO_64
+ bee_u64 dummy2;
+#endif
+ } u;
+#if BEE_ECHO_64
+ bee_u64 *VV;
+#else
+ bee_u32 *VV;
+#endif
+ unsigned k;
+
+ buf = sc->buf;
+ ptr = sc->ptr;
+ elen = ((unsigned)ptr << 3) + n;
+ INCR_COUNTER(sc, elen);
+ bee_enc32le_aligned(u.tmp, sc->C0);
+ bee_enc32le_aligned(u.tmp + 4, sc->C1);
+ bee_enc32le_aligned(u.tmp + 8, sc->C2);
+ bee_enc32le_aligned(u.tmp + 12, sc->C3);
+ /*
+ * If elen is zero, then this block actually contains no message
+ * bit, only the first padding bit.
+ */
+ if (elen == 0) {
+ sc->C0 = sc->C1 = sc->C2 = sc->C3 = 0;
+ }
+ z = (unsigned)0x80 >> n;
+ buf[ptr ++] = ((ub & -z) | z) & 0xFF;
+ memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
+ if (ptr > ((sizeof sc->buf) - 18)) {
+ six_compress(sc);
+ sc->C0 = sc->C1 = sc->C2 = sc->C3 = 0;
+ memset(buf, 0, sizeof sc->buf);
+ }
+ bee_enc16le(buf + (sizeof sc->buf) - 18, out_size_w32 << 5);
+ memcpy(buf + (sizeof sc->buf) - 16, u.tmp, 16);
+ six_compress(sc);
+#if BEE_ECHO_64
+ for (VV = &sc->u.Vb[0][0], k = 0; k < ((out_size_w32 + 1) >> 1); k ++)
+ bee_enc64le_aligned(u.tmp + (k << 3), VV[k]);
+#else
+ for (VV = &sc->u.Vs[0][0], k = 0; k < out_size_w32; k ++)
+ bee_enc32le_aligned(u.tmp + (k << 2), VV[k]);
+#endif
+ memcpy(dst, u.tmp, out_size_w32 << 2);
+ six_init(sc, out_size_w32 << 5);
+}
+
+
+/* see facet_six.h */
+void facet_six_init(void *cc)
+{
+ six_init(cc, 512);
+}
+
+/* see facet_six.h */
+void facet_six(void *cc, const void *data, size_t len)
+{
+ six_core(cc, data, len);
+}
+
+/* see facet_six.h */
+void facet_six_close(void *cc, void *dst)
+{
+ six_close(cc, 0, 0, dst, 16);
+}
+
+/* see facet_six.h */
+void facet_six_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+ six_close(cc, ub, n, dst, 16);
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/stratum/algos/honeycomb/facet_six.h b/stratum/algos/honeycomb/facet_six.h
new file mode 100644
index 000000000..cd4653e7f
--- /dev/null
+++ b/stratum/algos/honeycomb/facet_six.h
@@ -0,0 +1,82 @@
+#ifndef FACET_SIX_H
+#define FACET_SIX_H
+
+#ifdef __cplusplus
+ extern "C"{
+#endif
+
+#include
+#include "honeycomb_types.h"
+
+
+#undef BEE_64
+
+/**
+ * This structure is a context for HoneyComb Facet #6 computations: it contains the
+ * intermediate values and some data from the last entered block. Once
+ * an HoneyComb Facet #6 computation has been performed, the context can be reused for
+ * another computation. This specific structure is used for HoneyComb Facet #6.
+ *
+ * The contents of this structure are private. A running HoneyComb Facet #6 computation
+ * can be cloned by copying the context (e.g. with a simple memcpy()).
+ */
+typedef struct {
+ unsigned char buf[128]; /* first field, for alignment */
+ size_t ptr;
+ union {
+ bee_u32 Vs[8][4];
+#if BEE_64
+ bee_u64 Vb[8][2];
+#endif
+ } u;
+ bee_u32 C0, C1, C2, C3;
+} facet_six_context;
+
+
+/**
+ * Initialize an HoneyComb Facet #6 context. This process performs no memory allocation.
+ *
+ * @param cc the HoneyComb Facet #6 context (pointer to a facet_six_context )
+ */
+void facet_six_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that len is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc the HoneyComb Facet #6 context
+ * @param data the input data
+ * @param len the input data length (in bytes)
+ */
+void facet_six(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current HoneyComb Facet #6 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically reinitialized.
+ *
+ * @param cc the HoneyComb Facet #6 context
+ * @param dst the destination buffer
+ */
+void facet_six_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (64 bytes). If bit number i
+ * in ub has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc the HoneyComb Facet #6 context
+ * @param ub the extra bits
+ * @param n the number of extra bits (0 to 7)
+ * @param dst the destination buffer
+ */
+void facet_six_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/stratum/algos/honeycomb/facet_three.c b/stratum/algos/honeycomb/facet_three.c
new file mode 100644
index 000000000..a7e20f762
--- /dev/null
+++ b/stratum/algos/honeycomb/facet_three.c
@@ -0,0 +1,558 @@
+#include
+#include
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#include "facet_three.h"
+
+#ifdef _MSC_VER
+#pragma warning (disable: 4146)
+#endif
+
+/*
+ * Part of this code was automatically generated (the part between
+ * the "BEGIN" and "END" markers).
+ */
+
+#define sM 16
+
+#define C32 BEE_C32
+#define T32 BEE_T32
+
+#define O1 13
+#define O2 9
+#define O3 6
+
+/*
+ * We copy the state into local variables, so that the compiler knows
+ * that it can optimize them at will.
+ */
+
+/* BEGIN -- automatically generated code. */
+
+#define DECL_STATE \
+ bee_u32 A00, A01, A02, A03, A04, A05, A06, A07, \
+ A08, A09, A0A, A0B; \
+ bee_u32 B0, B1, B2, B3, B4, B5, B6, B7, \
+ B8, B9, BA, BB, BC, BD, BE, BF; \
+ bee_u32 C0, C1, C2, C3, C4, C5, C6, C7, \
+ C8, C9, CA, CB, CC, CD, CE, CF; \
+ bee_u32 M0, M1, M2, M3, M4, M5, M6, M7, \
+ M8, M9, MA, MB, MC, MD, ME, MF; \
+ bee_u32 Wlow, Whigh;
+
+#define READ_STATE(state) do { \
+ A00 = (state)->A[0]; \
+ A01 = (state)->A[1]; \
+ A02 = (state)->A[2]; \
+ A03 = (state)->A[3]; \
+ A04 = (state)->A[4]; \
+ A05 = (state)->A[5]; \
+ A06 = (state)->A[6]; \
+ A07 = (state)->A[7]; \
+ A08 = (state)->A[8]; \
+ A09 = (state)->A[9]; \
+ A0A = (state)->A[10]; \
+ A0B = (state)->A[11]; \
+ B0 = (state)->B[0]; \
+ B1 = (state)->B[1]; \
+ B2 = (state)->B[2]; \
+ B3 = (state)->B[3]; \
+ B4 = (state)->B[4]; \
+ B5 = (state)->B[5]; \
+ B6 = (state)->B[6]; \
+ B7 = (state)->B[7]; \
+ B8 = (state)->B[8]; \
+ B9 = (state)->B[9]; \
+ BA = (state)->B[10]; \
+ BB = (state)->B[11]; \
+ BC = (state)->B[12]; \
+ BD = (state)->B[13]; \
+ BE = (state)->B[14]; \
+ BF = (state)->B[15]; \
+ C0 = (state)->C[0]; \
+ C1 = (state)->C[1]; \
+ C2 = (state)->C[2]; \
+ C3 = (state)->C[3]; \
+ C4 = (state)->C[4]; \
+ C5 = (state)->C[5]; \
+ C6 = (state)->C[6]; \
+ C7 = (state)->C[7]; \
+ C8 = (state)->C[8]; \
+ C9 = (state)->C[9]; \
+ CA = (state)->C[10]; \
+ CB = (state)->C[11]; \
+ CC = (state)->C[12]; \
+ CD = (state)->C[13]; \
+ CE = (state)->C[14]; \
+ CF = (state)->C[15]; \
+ Wlow = (state)->Wlow; \
+ Whigh = (state)->Whigh; \
+ } while (0)
+
+#define WRITE_STATE(state) do { \
+ (state)->A[0] = A00; \
+ (state)->A[1] = A01; \
+ (state)->A[2] = A02; \
+ (state)->A[3] = A03; \
+ (state)->A[4] = A04; \
+ (state)->A[5] = A05; \
+ (state)->A[6] = A06; \
+ (state)->A[7] = A07; \
+ (state)->A[8] = A08; \
+ (state)->A[9] = A09; \
+ (state)->A[10] = A0A; \
+ (state)->A[11] = A0B; \
+ (state)->B[0] = B0; \
+ (state)->B[1] = B1; \
+ (state)->B[2] = B2; \
+ (state)->B[3] = B3; \
+ (state)->B[4] = B4; \
+ (state)->B[5] = B5; \
+ (state)->B[6] = B6; \
+ (state)->B[7] = B7; \
+ (state)->B[8] = B8; \
+ (state)->B[9] = B9; \
+ (state)->B[10] = BA; \
+ (state)->B[11] = BB; \
+ (state)->B[12] = BC; \
+ (state)->B[13] = BD; \
+ (state)->B[14] = BE; \
+ (state)->B[15] = BF; \
+ (state)->C[0] = C0; \
+ (state)->C[1] = C1; \
+ (state)->C[2] = C2; \
+ (state)->C[3] = C3; \
+ (state)->C[4] = C4; \
+ (state)->C[5] = C5; \
+ (state)->C[6] = C6; \
+ (state)->C[7] = C7; \
+ (state)->C[8] = C8; \
+ (state)->C[9] = C9; \
+ (state)->C[10] = CA; \
+ (state)->C[11] = CB; \
+ (state)->C[12] = CC; \
+ (state)->C[13] = CD; \
+ (state)->C[14] = CE; \
+ (state)->C[15] = CF; \
+ (state)->Wlow = Wlow; \
+ (state)->Whigh = Whigh; \
+ } while (0)
+
+#define DECODE_BLOCK do { \
+ M0 = bee_dec32le_aligned(buf + 0); \
+ M1 = bee_dec32le_aligned(buf + 4); \
+ M2 = bee_dec32le_aligned(buf + 8); \
+ M3 = bee_dec32le_aligned(buf + 12); \
+ M4 = bee_dec32le_aligned(buf + 16); \
+ M5 = bee_dec32le_aligned(buf + 20); \
+ M6 = bee_dec32le_aligned(buf + 24); \
+ M7 = bee_dec32le_aligned(buf + 28); \
+ M8 = bee_dec32le_aligned(buf + 32); \
+ M9 = bee_dec32le_aligned(buf + 36); \
+ MA = bee_dec32le_aligned(buf + 40); \
+ MB = bee_dec32le_aligned(buf + 44); \
+ MC = bee_dec32le_aligned(buf + 48); \
+ MD = bee_dec32le_aligned(buf + 52); \
+ ME = bee_dec32le_aligned(buf + 56); \
+ MF = bee_dec32le_aligned(buf + 60); \
+ } while (0)
+
+#define INPUT_BLOCK_ADD do { \
+ B0 = T32(B0 + M0); \
+ B1 = T32(B1 + M1); \
+ B2 = T32(B2 + M2); \
+ B3 = T32(B3 + M3); \
+ B4 = T32(B4 + M4); \
+ B5 = T32(B5 + M5); \
+ B6 = T32(B6 + M6); \
+ B7 = T32(B7 + M7); \
+ B8 = T32(B8 + M8); \
+ B9 = T32(B9 + M9); \
+ BA = T32(BA + MA); \
+ BB = T32(BB + MB); \
+ BC = T32(BC + MC); \
+ BD = T32(BD + MD); \
+ BE = T32(BE + ME); \
+ BF = T32(BF + MF); \
+ } while (0)
+
+#define INPUT_BLOCK_SUB do { \
+ C0 = T32(C0 - M0); \
+ C1 = T32(C1 - M1); \
+ C2 = T32(C2 - M2); \
+ C3 = T32(C3 - M3); \
+ C4 = T32(C4 - M4); \
+ C5 = T32(C5 - M5); \
+ C6 = T32(C6 - M6); \
+ C7 = T32(C7 - M7); \
+ C8 = T32(C8 - M8); \
+ C9 = T32(C9 - M9); \
+ CA = T32(CA - MA); \
+ CB = T32(CB - MB); \
+ CC = T32(CC - MC); \
+ CD = T32(CD - MD); \
+ CE = T32(CE - ME); \
+ CF = T32(CF - MF); \
+ } while (0)
+
+#define XOR_W do { \
+ A00 ^= Wlow; \
+ A01 ^= Whigh; \
+ } while (0)
+
+#define SWAP(v1, v2) do { \
+ bee_u32 tmp = (v1); \
+ (v1) = (v2); \
+ (v2) = tmp; \
+ } while (0)
+
+#define SWAP_BC do { \
+ SWAP(B0, C0); \
+ SWAP(B1, C1); \
+ SWAP(B2, C2); \
+ SWAP(B3, C3); \
+ SWAP(B4, C4); \
+ SWAP(B5, C5); \
+ SWAP(B6, C6); \
+ SWAP(B7, C7); \
+ SWAP(B8, C8); \
+ SWAP(B9, C9); \
+ SWAP(BA, CA); \
+ SWAP(BB, CB); \
+ SWAP(BC, CC); \
+ SWAP(BD, CD); \
+ SWAP(BE, CE); \
+ SWAP(BF, CF); \
+ } while (0)
+
+#define PERM_ELT(xa0, xa1, xb0, xb1, xb2, xb3, xc, xm) do { \
+ xa0 = T32((xa0 \
+ ^ (((xa1 << 15) | (xa1 >> 17)) * 5U) \
+ ^ xc) * 3U) \
+ ^ xb1 ^ (xb2 & ~xb3) ^ xm; \
+ xb0 = T32(~(((xb0 << 1) | (xb0 >> 31)) ^ xa0)); \
+ } while (0)
+
+#define PERM_STEP_0 do { \
+ PERM_ELT(A00, A0B, B0, BD, B9, B6, C8, M0); \
+ PERM_ELT(A01, A00, B1, BE, BA, B7, C7, M1); \
+ PERM_ELT(A02, A01, B2, BF, BB, B8, C6, M2); \
+ PERM_ELT(A03, A02, B3, B0, BC, B9, C5, M3); \
+ PERM_ELT(A04, A03, B4, B1, BD, BA, C4, M4); \
+ PERM_ELT(A05, A04, B5, B2, BE, BB, C3, M5); \
+ PERM_ELT(A06, A05, B6, B3, BF, BC, C2, M6); \
+ PERM_ELT(A07, A06, B7, B4, B0, BD, C1, M7); \
+ PERM_ELT(A08, A07, B8, B5, B1, BE, C0, M8); \
+ PERM_ELT(A09, A08, B9, B6, B2, BF, CF, M9); \
+ PERM_ELT(A0A, A09, BA, B7, B3, B0, CE, MA); \
+ PERM_ELT(A0B, A0A, BB, B8, B4, B1, CD, MB); \
+ PERM_ELT(A00, A0B, BC, B9, B5, B2, CC, MC); \
+ PERM_ELT(A01, A00, BD, BA, B6, B3, CB, MD); \
+ PERM_ELT(A02, A01, BE, BB, B7, B4, CA, ME); \
+ PERM_ELT(A03, A02, BF, BC, B8, B5, C9, MF); \
+ } while (0)
+
+#define PERM_STEP_1 do { \
+ PERM_ELT(A04, A03, B0, BD, B9, B6, C8, M0); \
+ PERM_ELT(A05, A04, B1, BE, BA, B7, C7, M1); \
+ PERM_ELT(A06, A05, B2, BF, BB, B8, C6, M2); \
+ PERM_ELT(A07, A06, B3, B0, BC, B9, C5, M3); \
+ PERM_ELT(A08, A07, B4, B1, BD, BA, C4, M4); \
+ PERM_ELT(A09, A08, B5, B2, BE, BB, C3, M5); \
+ PERM_ELT(A0A, A09, B6, B3, BF, BC, C2, M6); \
+ PERM_ELT(A0B, A0A, B7, B4, B0, BD, C1, M7); \
+ PERM_ELT(A00, A0B, B8, B5, B1, BE, C0, M8); \
+ PERM_ELT(A01, A00, B9, B6, B2, BF, CF, M9); \
+ PERM_ELT(A02, A01, BA, B7, B3, B0, CE, MA); \
+ PERM_ELT(A03, A02, BB, B8, B4, B1, CD, MB); \
+ PERM_ELT(A04, A03, BC, B9, B5, B2, CC, MC); \
+ PERM_ELT(A05, A04, BD, BA, B6, B3, CB, MD); \
+ PERM_ELT(A06, A05, BE, BB, B7, B4, CA, ME); \
+ PERM_ELT(A07, A06, BF, BC, B8, B5, C9, MF); \
+ } while (0)
+
+#define PERM_STEP_2 do { \
+ PERM_ELT(A08, A07, B0, BD, B9, B6, C8, M0); \
+ PERM_ELT(A09, A08, B1, BE, BA, B7, C7, M1); \
+ PERM_ELT(A0A, A09, B2, BF, BB, B8, C6, M2); \
+ PERM_ELT(A0B, A0A, B3, B0, BC, B9, C5, M3); \
+ PERM_ELT(A00, A0B, B4, B1, BD, BA, C4, M4); \
+ PERM_ELT(A01, A00, B5, B2, BE, BB, C3, M5); \
+ PERM_ELT(A02, A01, B6, B3, BF, BC, C2, M6); \
+ PERM_ELT(A03, A02, B7, B4, B0, BD, C1, M7); \
+ PERM_ELT(A04, A03, B8, B5, B1, BE, C0, M8); \
+ PERM_ELT(A05, A04, B9, B6, B2, BF, CF, M9); \
+ PERM_ELT(A06, A05, BA, B7, B3, B0, CE, MA); \
+ PERM_ELT(A07, A06, BB, B8, B4, B1, CD, MB); \
+ PERM_ELT(A08, A07, BC, B9, B5, B2, CC, MC); \
+ PERM_ELT(A09, A08, BD, BA, B6, B3, CB, MD); \
+ PERM_ELT(A0A, A09, BE, BB, B7, B4, CA, ME); \
+ PERM_ELT(A0B, A0A, BF, BC, B8, B5, C9, MF); \
+ } while (0)
+
+#define APPLY_P do { \
+ B0 = T32(B0 << 17) | (B0 >> 15); \
+ B1 = T32(B1 << 17) | (B1 >> 15); \
+ B2 = T32(B2 << 17) | (B2 >> 15); \
+ B3 = T32(B3 << 17) | (B3 >> 15); \
+ B4 = T32(B4 << 17) | (B4 >> 15); \
+ B5 = T32(B5 << 17) | (B5 >> 15); \
+ B6 = T32(B6 << 17) | (B6 >> 15); \
+ B7 = T32(B7 << 17) | (B7 >> 15); \
+ B8 = T32(B8 << 17) | (B8 >> 15); \
+ B9 = T32(B9 << 17) | (B9 >> 15); \
+ BA = T32(BA << 17) | (BA >> 15); \
+ BB = T32(BB << 17) | (BB >> 15); \
+ BC = T32(BC << 17) | (BC >> 15); \
+ BD = T32(BD << 17) | (BD >> 15); \
+ BE = T32(BE << 17) | (BE >> 15); \
+ BF = T32(BF << 17) | (BF >> 15); \
+ PERM_STEP_0; \
+ PERM_STEP_1; \
+ PERM_STEP_2; \
+ A0B = T32(A0B + C6); \
+ A0A = T32(A0A + C5); \
+ A09 = T32(A09 + C4); \
+ A08 = T32(A08 + C3); \
+ A07 = T32(A07 + C2); \
+ A06 = T32(A06 + C1); \
+ A05 = T32(A05 + C0); \
+ A04 = T32(A04 + CF); \
+ A03 = T32(A03 + CE); \
+ A02 = T32(A02 + CD); \
+ A01 = T32(A01 + CC); \
+ A00 = T32(A00 + CB); \
+ A0B = T32(A0B + CA); \
+ A0A = T32(A0A + C9); \
+ A09 = T32(A09 + C8); \
+ A08 = T32(A08 + C7); \
+ A07 = T32(A07 + C6); \
+ A06 = T32(A06 + C5); \
+ A05 = T32(A05 + C4); \
+ A04 = T32(A04 + C3); \
+ A03 = T32(A03 + C2); \
+ A02 = T32(A02 + C1); \
+ A01 = T32(A01 + C0); \
+ A00 = T32(A00 + CF); \
+ A0B = T32(A0B + CE); \
+ A0A = T32(A0A + CD); \
+ A09 = T32(A09 + CC); \
+ A08 = T32(A08 + CB); \
+ A07 = T32(A07 + CA); \
+ A06 = T32(A06 + C9); \
+ A05 = T32(A05 + C8); \
+ A04 = T32(A04 + C7); \
+ A03 = T32(A03 + C6); \
+ A02 = T32(A02 + C5); \
+ A01 = T32(A01 + C4); \
+ A00 = T32(A00 + C3); \
+ } while (0)
+
+#define INCR_W do { \
+ if ((Wlow = T32(Wlow + 1)) == 0) \
+ Whigh = T32(Whigh + 1); \
+ } while (0)
+
+static const bee_u32 A_init_512[] = {
+ C32(0x20728DFD), C32(0x46C0BD53), C32(0xE782B699), C32(0x55304632),
+ C32(0x71B4EF90), C32(0x0EA9E82C), C32(0xDBB930F1), C32(0xFAD06B8B),
+ C32(0xBE0CAE40), C32(0x8BD14410), C32(0x76D2ADAC), C32(0x28ACAB7F)
+};
+
+static const bee_u32 B_init_512[] = {
+ C32(0xC1099CB7), C32(0x07B385F3), C32(0xE7442C26), C32(0xCC8AD640),
+ C32(0xEB6F56C7), C32(0x1EA81AA9), C32(0x73B9D314), C32(0x1DE85D08),
+ C32(0x48910A5A), C32(0x893B22DB), C32(0xC5A0DF44), C32(0xBBC4324E),
+ C32(0x72D2F240), C32(0x75941D99), C32(0x6D8BDE82), C32(0xA1A7502B)
+};
+
+static const bee_u32 C_init_512[] = {
+ C32(0xD9BF68D1), C32(0x58BAD750), C32(0x56028CB2), C32(0x8134F359),
+ C32(0xB5D469D8), C32(0x941A8CC2), C32(0x418B2A6E), C32(0x04052780),
+ C32(0x7F07D787), C32(0x5194358F), C32(0x3C60D665), C32(0xBE97D79A),
+ C32(0x950C3434), C32(0xAED9A06D), C32(0x2537DC8D), C32(0x7CDB5969)
+};
+
+/* END -- automatically generated code. */
+
+static void three_init(void *cc, unsigned size)
+{
+ /*
+ * We have precomputed initial states for all the supported
+ * output bit lengths.
+ */
+ const bee_u32 *A_init, *B_init, *C_init;
+ facet_three_context *sc;
+
+ switch (size)
+ {
+ case 512:
+ A_init = A_init_512;
+ B_init = B_init_512;
+ C_init = C_init_512;
+ break;
+ default:
+ return;
+ }
+ sc = cc;
+ memcpy(sc->A, A_init, sizeof sc->A);
+ memcpy(sc->B, B_init, sizeof sc->B);
+ memcpy(sc->C, C_init, sizeof sc->C);
+ sc->Wlow = 1;
+ sc->Whigh = 0;
+ sc->ptr = 0;
+}
+
+static void three_core(void *cc, const unsigned char *data, size_t len)
+{
+ facet_three_context *sc;
+ unsigned char *buf;
+ size_t ptr;
+ DECL_STATE
+
+ sc = cc;
+ buf = sc->buf;
+ ptr = sc->ptr;
+
+ /*
+ * We do not want to copy the state to local variables if the
+ * amount of data is less than what is needed to complete the
+ * current block. Note that it is anyway suboptimal to call
+ * this method many times for small chunks of data.
+ */
+ if (len < (sizeof sc->buf) - ptr) {
+ memcpy(buf + ptr, data, len);
+ ptr += len;
+ sc->ptr = ptr;
+ return;
+ }
+
+ READ_STATE(sc);
+ while (len > 0) {
+ size_t clen;
+
+ clen = (sizeof sc->buf) - ptr;
+ if (clen > len)
+ clen = len;
+ memcpy(buf + ptr, data, clen);
+ ptr += clen;
+ data += clen;
+ len -= clen;
+ if (ptr == sizeof sc->buf) {
+ DECODE_BLOCK;
+ INPUT_BLOCK_ADD;
+ XOR_W;
+ APPLY_P;
+ INPUT_BLOCK_SUB;
+ SWAP_BC;
+ INCR_W;
+ ptr = 0;
+ }
+ }
+ WRITE_STATE(sc);
+ sc->ptr = ptr;
+}
+
+static void three_close(void *cc, unsigned ub, unsigned n, void *dst, unsigned size_words)
+{
+ facet_three_context *sc;
+ unsigned char *buf;
+ size_t ptr;
+ int i;
+ unsigned z;
+ union {
+ unsigned char tmp_out[64];
+ bee_u32 dummy;
+ } u;
+ size_t out_len;
+ DECL_STATE
+
+ sc = cc;
+ buf = sc->buf;
+ ptr = sc->ptr;
+ z = 0x80 >> n;
+ buf[ptr] = ((ub & -z) | z) & 0xFF;
+ memset(buf + ptr + 1, 0, (sizeof sc->buf) - (ptr + 1));
+ READ_STATE(sc);
+ DECODE_BLOCK;
+ INPUT_BLOCK_ADD;
+ XOR_W;
+ APPLY_P;
+ for (i = 0; i < 3; i ++) {
+ SWAP_BC;
+ XOR_W;
+ APPLY_P;
+ }
+
+ /*
+ * We just use our local variables; no need to go through
+ * the state structure. In order to share some code, we
+ * emit the relevant words into a temporary buffer, which
+ * we finally copy into the destination array.
+ */
+ switch (size_words) {
+ case 16:
+ bee_enc32le_aligned(u.tmp_out + 0, B0);
+ bee_enc32le_aligned(u.tmp_out + 4, B1);
+ bee_enc32le_aligned(u.tmp_out + 8, B2);
+ bee_enc32le_aligned(u.tmp_out + 12, B3);
+ /* fall through */
+ case 12:
+ bee_enc32le_aligned(u.tmp_out + 16, B4);
+ bee_enc32le_aligned(u.tmp_out + 20, B5);
+ bee_enc32le_aligned(u.tmp_out + 24, B6);
+ bee_enc32le_aligned(u.tmp_out + 28, B7);
+ /* fall through */
+ case 8:
+ bee_enc32le_aligned(u.tmp_out + 32, B8);
+ /* fall through */
+ case 7:
+ bee_enc32le_aligned(u.tmp_out + 36, B9);
+ /* fall through */
+ case 6:
+ bee_enc32le_aligned(u.tmp_out + 40, BA);
+ bee_enc32le_aligned(u.tmp_out + 44, BB);
+ bee_enc32le_aligned(u.tmp_out + 48, BC);
+ bee_enc32le_aligned(u.tmp_out + 52, BD);
+ bee_enc32le_aligned(u.tmp_out + 56, BE);
+ bee_enc32le_aligned(u.tmp_out + 60, BF);
+ break;
+ default:
+ return;
+ }
+ out_len = size_words << 2;
+ memcpy(dst, u.tmp_out + (sizeof u.tmp_out) - out_len, out_len);
+ three_init(sc, size_words << 5);
+}
+
+
+/* see facet_three.h */
+void facet_three_init(void *cc)
+{
+ three_init(cc, 512);
+}
+
+/* see facet_three.h */
+void facet_three(void *cc, const void *data, size_t len)
+{
+ three_core(cc, data, len);
+}
+
+/* see bee_shabal.h */
+void facet_three_close(void *cc, void *dst)
+{
+ three_close(cc, 0, 0, dst, 16);
+}
+
+/* see bee_shabal.h */
+void facet_three_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+ three_close(cc, ub, n, dst, 16);
+}
+
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/stratum/algos/honeycomb/facet_three.h b/stratum/algos/honeycomb/facet_three.h
new file mode 100644
index 000000000..d2f8fd965
--- /dev/null
+++ b/stratum/algos/honeycomb/facet_three.h
@@ -0,0 +1,80 @@
+
+#ifndef FACET_THREE_H
+#define FACET_THREE_H
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#include
+#include "honeycomb_types.h"
+
+
+//#undef BEE_64 //
+
+
+/**
+ * This structure is a context for HoneyComb Facet #3 computations: it contains the
+ * intermediate values and some data from the last entered block. Once
+ * a HoneyComb Facet #3 computation has been performed, the context can be reused for
+ * another computation.
+ *
+ * The contents of this structure are private. A running HoneyComb Facet #3 computation
+ * can be cloned by copying the context (e.g. with a simple memcpy()).
+ */
+typedef struct {
+ unsigned char buf[64]; /* first field, for alignment */
+ size_t ptr;
+ bee_u32 A[12], B[16], C[16];
+ bee_u32 Whigh, Wlow;
+
+}facet_three_context;
+
+
+
+/**
+ * Initialize a HoneyComb Facet #3 context. This process performs no memory allocation.
+ *
+ * @param cc the HoneyComb Facet #3 context (pointer to a facet_three_context )
+ */
+void facet_three_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that len is zero (in which case this function does nothing).
+ *
+ * @param cc the HoneyComb Facet #3 context
+ * @param data the input data
+ * @param len the input data length (in bytes)
+ */
+void facet_three(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current HoneyComb Facet #3 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically reinitialized.
+ *
+ * @param cc the HoneyComb Facet #3 context
+ * @param dst the destination buffer
+ */
+void facet_three_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (64 bytes). If bit number i
+ * in ub has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc the HoneyComb Facet #3 context
+ * @param ub the extra bits
+ * @param n the number of extra bits (0 to 7)
+ * @param dst the destination buffer
+ */
+void facet_three_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/stratum/algos/honeycomb/facet_two.c b/stratum/algos/honeycomb/facet_two.c
new file mode 100644
index 000000000..6c917ee7e
--- /dev/null
+++ b/stratum/algos/honeycomb/facet_two.c
@@ -0,0 +1,845 @@
+
+#include
+#include
+
+#include "facet_two.h"
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+
+
+#if !defined BEE_JH_64 && BEE_64_TRUE
+ #define BEE_JH_64 1
+#endif
+
+#if !BEE_64
+ #undef BEE_JH_64
+#endif
+
+#ifdef _MSC_VER
+ #pragma warning (disable: 4146)
+#endif
+
+/*
+ * The internal bitslice representation may use either big-endian or
+ * little-endian (true bitslice operations do not care about the bit
+ * ordering, and the bit-swapping linear operations in HoneyComb Facet #2 happen to
+ * be invariant through endianness-swapping). The constants must be
+ * defined according to the chosen endianness; we use some
+ * byte-swapping macros for that.
+ */
+
+#if BEE_LITTLE_ENDIAN
+
+#define C32e(x) ((BEE_C32(x) >> 24) \
+ | ((BEE_C32(x) >> 8) & BEE_C32(0x0000FF00)) \
+ | ((BEE_C32(x) << 8) & BEE_C32(0x00FF0000)) \
+ | ((BEE_C32(x) << 24) & BEE_C32(0xFF000000)))
+#define dec32e_aligned bee_dec32le_aligned
+#define enc32e bee_enc32le
+
+#if BEE_64
+#define C64e(x) ((BEE_C64(x) >> 56) \
+ | ((BEE_C64(x) >> 40) & BEE_C64(0x000000000000FF00)) \
+ | ((BEE_C64(x) >> 24) & BEE_C64(0x0000000000FF0000)) \
+ | ((BEE_C64(x) >> 8) & BEE_C64(0x00000000FF000000)) \
+ | ((BEE_C64(x) << 8) & BEE_C64(0x000000FF00000000)) \
+ | ((BEE_C64(x) << 24) & BEE_C64(0x0000FF0000000000)) \
+ | ((BEE_C64(x) << 40) & BEE_C64(0x00FF000000000000)) \
+ | ((BEE_C64(x) << 56) & BEE_C64(0xFF00000000000000)))
+#define dec64e_aligned bee_dec64le_aligned
+#define enc64e bee_enc64le
+#endif
+
+#else
+
+#define C32e(x) BEE_C32(x)
+#define dec32e_aligned bee_dec32be_aligned
+#define enc32e bee_enc32be
+#if BEE_64
+#define C64e(x) BEE_C64(x)
+#define dec64e_aligned bee_dec64be_aligned
+#define enc64e bee_enc64be
+#endif
+
+#endif
+
+#define Sb(x0, x1, x2, x3, c) do { \
+ x3 = ~x3; \
+ x0 ^= (c) & ~x2; \
+ tmp = (c) ^ (x0 & x1); \
+ x0 ^= x2 & x3; \
+ x3 ^= ~x1 & x2; \
+ x1 ^= x0 & x2; \
+ x2 ^= x0 & ~x3; \
+ x0 ^= x1 | x3; \
+ x3 ^= x1 & x2; \
+ x1 ^= tmp & x0; \
+ x2 ^= tmp; \
+ } while (0)
+
+#define Lb(x0, x1, x2, x3, x4, x5, x6, x7) do { \
+ x4 ^= x1; \
+ x5 ^= x2; \
+ x6 ^= x3 ^ x0; \
+ x7 ^= x0; \
+ x0 ^= x5; \
+ x1 ^= x6; \
+ x2 ^= x7 ^ x4; \
+ x3 ^= x4; \
+ } while (0)
+
+#if BEE_JH_64
+
+static const bee_u64 C[] = {
+ C64e(0x72d5dea2df15f867), C64e(0x7b84150ab7231557),
+ C64e(0x81abd6904d5a87f6), C64e(0x4e9f4fc5c3d12b40),
+ C64e(0xea983ae05c45fa9c), C64e(0x03c5d29966b2999a),
+ C64e(0x660296b4f2bb538a), C64e(0xb556141a88dba231),
+ C64e(0x03a35a5c9a190edb), C64e(0x403fb20a87c14410),
+ C64e(0x1c051980849e951d), C64e(0x6f33ebad5ee7cddc),
+ C64e(0x10ba139202bf6b41), C64e(0xdc786515f7bb27d0),
+ C64e(0x0a2c813937aa7850), C64e(0x3f1abfd2410091d3),
+ C64e(0x422d5a0df6cc7e90), C64e(0xdd629f9c92c097ce),
+ C64e(0x185ca70bc72b44ac), C64e(0xd1df65d663c6fc23),
+ C64e(0x976e6c039ee0b81a), C64e(0x2105457e446ceca8),
+ C64e(0xeef103bb5d8e61fa), C64e(0xfd9697b294838197),
+ C64e(0x4a8e8537db03302f), C64e(0x2a678d2dfb9f6a95),
+ C64e(0x8afe7381f8b8696c), C64e(0x8ac77246c07f4214),
+ C64e(0xc5f4158fbdc75ec4), C64e(0x75446fa78f11bb80),
+ C64e(0x52de75b7aee488bc), C64e(0x82b8001e98a6a3f4),
+ C64e(0x8ef48f33a9a36315), C64e(0xaa5f5624d5b7f989),
+ C64e(0xb6f1ed207c5ae0fd), C64e(0x36cae95a06422c36),
+ C64e(0xce2935434efe983d), C64e(0x533af974739a4ba7),
+ C64e(0xd0f51f596f4e8186), C64e(0x0e9dad81afd85a9f),
+ C64e(0xa7050667ee34626a), C64e(0x8b0b28be6eb91727),
+ C64e(0x47740726c680103f), C64e(0xe0a07e6fc67e487b),
+ C64e(0x0d550aa54af8a4c0), C64e(0x91e3e79f978ef19e),
+ C64e(0x8676728150608dd4), C64e(0x7e9e5a41f3e5b062),
+ C64e(0xfc9f1fec4054207a), C64e(0xe3e41a00cef4c984),
+ C64e(0x4fd794f59dfa95d8), C64e(0x552e7e1124c354a5),
+ C64e(0x5bdf7228bdfe6e28), C64e(0x78f57fe20fa5c4b2),
+ C64e(0x05897cefee49d32e), C64e(0x447e9385eb28597f),
+ C64e(0x705f6937b324314a), C64e(0x5e8628f11dd6e465),
+ C64e(0xc71b770451b920e7), C64e(0x74fe43e823d4878a),
+ C64e(0x7d29e8a3927694f2), C64e(0xddcb7a099b30d9c1),
+ C64e(0x1d1b30fb5bdc1be0), C64e(0xda24494ff29c82bf),
+ C64e(0xa4e7ba31b470bfff), C64e(0x0d324405def8bc48),
+ C64e(0x3baefc3253bbd339), C64e(0x459fc3c1e0298ba0),
+ C64e(0xe5c905fdf7ae090f), C64e(0x947034124290f134),
+ C64e(0xa271b701e344ed95), C64e(0xe93b8e364f2f984a),
+ C64e(0x88401d63a06cf615), C64e(0x47c1444b8752afff),
+ C64e(0x7ebb4af1e20ac630), C64e(0x4670b6c5cc6e8ce6),
+ C64e(0xa4d5a456bd4fca00), C64e(0xda9d844bc83e18ae),
+ C64e(0x7357ce453064d1ad), C64e(0xe8a6ce68145c2567),
+ C64e(0xa3da8cf2cb0ee116), C64e(0x33e906589a94999a),
+ C64e(0x1f60b220c26f847b), C64e(0xd1ceac7fa0d18518),
+ C64e(0x32595ba18ddd19d3), C64e(0x509a1cc0aaa5b446),
+ C64e(0x9f3d6367e4046bba), C64e(0xf6ca19ab0b56ee7e),
+ C64e(0x1fb179eaa9282174), C64e(0xe9bdf7353b3651ee),
+ C64e(0x1d57ac5a7550d376), C64e(0x3a46c2fea37d7001),
+ C64e(0xf735c1af98a4d842), C64e(0x78edec209e6b6779),
+ C64e(0x41836315ea3adba8), C64e(0xfac33b4d32832c83),
+ C64e(0xa7403b1f1c2747f3), C64e(0x5940f034b72d769a),
+ C64e(0xe73e4e6cd2214ffd), C64e(0xb8fd8d39dc5759ef),
+ C64e(0x8d9b0c492b49ebda), C64e(0x5ba2d74968f3700d),
+ C64e(0x7d3baed07a8d5584), C64e(0xf5a5e9f0e4f88e65),
+ C64e(0xa0b8a2f436103b53), C64e(0x0ca8079e753eec5a),
+ C64e(0x9168949256e8884f), C64e(0x5bb05c55f8babc4c),
+ C64e(0xe3bb3b99f387947b), C64e(0x75daf4d6726b1c5d),
+ C64e(0x64aeac28dc34b36d), C64e(0x6c34a550b828db71),
+ C64e(0xf861e2f2108d512a), C64e(0xe3db643359dd75fc),
+ C64e(0x1cacbcf143ce3fa2), C64e(0x67bbd13c02e843b0),
+ C64e(0x330a5bca8829a175), C64e(0x7f34194db416535c),
+ C64e(0x923b94c30e794d1e), C64e(0x797475d7b6eeaf3f),
+ C64e(0xeaa8d4f7be1a3921), C64e(0x5cf47e094c232751),
+ C64e(0x26a32453ba323cd2), C64e(0x44a3174a6da6d5ad),
+ C64e(0xb51d3ea6aff2c908), C64e(0x83593d98916b3c56),
+ C64e(0x4cf87ca17286604d), C64e(0x46e23ecc086ec7f6),
+ C64e(0x2f9833b3b1bc765e), C64e(0x2bd666a5efc4e62a),
+ C64e(0x06f4b6e8bec1d436), C64e(0x74ee8215bcef2163),
+ C64e(0xfdc14e0df453c969), C64e(0xa77d5ac406585826),
+ C64e(0x7ec1141606e0fa16), C64e(0x7e90af3d28639d3f),
+ C64e(0xd2c9f2e3009bd20c), C64e(0x5faace30b7d40c30),
+ C64e(0x742a5116f2e03298), C64e(0x0deb30d8e3cef89a),
+ C64e(0x4bc59e7bb5f17992), C64e(0xff51e66e048668d3),
+ C64e(0x9b234d57e6966731), C64e(0xcce6a6f3170a7505),
+ C64e(0xb17681d913326cce), C64e(0x3c175284f805a262),
+ C64e(0xf42bcbb378471547), C64e(0xff46548223936a48),
+ C64e(0x38df58074e5e6565), C64e(0xf2fc7c89fc86508e),
+ C64e(0x31702e44d00bca86), C64e(0xf04009a23078474e),
+ C64e(0x65a0ee39d1f73883), C64e(0xf75ee937e42c3abd),
+ C64e(0x2197b2260113f86f), C64e(0xa344edd1ef9fdee7),
+ C64e(0x8ba0df15762592d9), C64e(0x3c85f7f612dc42be),
+ C64e(0xd8a7ec7cab27b07e), C64e(0x538d7ddaaa3ea8de),
+ C64e(0xaa25ce93bd0269d8), C64e(0x5af643fd1a7308f9),
+ C64e(0xc05fefda174a19a5), C64e(0x974d66334cfd216a),
+ C64e(0x35b49831db411570), C64e(0xea1e0fbbedcd549b),
+ C64e(0x9ad063a151974072), C64e(0xf6759dbf91476fe2)
+};
+
+#define Ceven_hi(r) (C[((r) << 2) + 0])
+#define Ceven_lo(r) (C[((r) << 2) + 1])
+#define Codd_hi(r) (C[((r) << 2) + 2])
+#define Codd_lo(r) (C[((r) << 2) + 3])
+
+#define S(x0, x1, x2, x3, cb, r) do { \
+ Sb(x0 ## h, x1 ## h, x2 ## h, x3 ## h, cb ## hi(r)); \
+ Sb(x0 ## l, x1 ## l, x2 ## l, x3 ## l, cb ## lo(r)); \
+ } while (0)
+
+#define L(x0, x1, x2, x3, x4, x5, x6, x7) do { \
+ Lb(x0 ## h, x1 ## h, x2 ## h, x3 ## h, \
+ x4 ## h, x5 ## h, x6 ## h, x7 ## h); \
+ Lb(x0 ## l, x1 ## l, x2 ## l, x3 ## l, \
+ x4 ## l, x5 ## l, x6 ## l, x7 ## l); \
+ } while (0)
+
+#define Wz(x, c, n) do { \
+ bee_u64 t = (x ## h & (c)) << (n); \
+ x ## h = ((x ## h >> (n)) & (c)) | t; \
+ t = (x ## l & (c)) << (n); \
+ x ## l = ((x ## l >> (n)) & (c)) | t; \
+ } while (0)
+
+#define W0(x) Wz(x, BEE_C64(0x5555555555555555), 1)
+#define W1(x) Wz(x, BEE_C64(0x3333333333333333), 2)
+#define W2(x) Wz(x, BEE_C64(0x0F0F0F0F0F0F0F0F), 4)
+#define W3(x) Wz(x, BEE_C64(0x00FF00FF00FF00FF), 8)
+#define W4(x) Wz(x, BEE_C64(0x0000FFFF0000FFFF), 16)
+#define W5(x) Wz(x, BEE_C64(0x00000000FFFFFFFF), 32)
+#define W6(x) do { \
+ bee_u64 t = x ## h; \
+ x ## h = x ## l; \
+ x ## l = t; \
+ } while (0)
+
+#define DECL_STATE \
+ bee_u64 h0h, h1h, h2h, h3h, h4h, h5h, h6h, h7h; \
+ bee_u64 h0l, h1l, h2l, h3l, h4l, h5l, h6l, h7l; \
+ bee_u64 tmp;
+
+#define READ_STATE(state) do { \
+ h0h = (state)->H.wide[ 0]; \
+ h0l = (state)->H.wide[ 1]; \
+ h1h = (state)->H.wide[ 2]; \
+ h1l = (state)->H.wide[ 3]; \
+ h2h = (state)->H.wide[ 4]; \
+ h2l = (state)->H.wide[ 5]; \
+ h3h = (state)->H.wide[ 6]; \
+ h3l = (state)->H.wide[ 7]; \
+ h4h = (state)->H.wide[ 8]; \
+ h4l = (state)->H.wide[ 9]; \
+ h5h = (state)->H.wide[10]; \
+ h5l = (state)->H.wide[11]; \
+ h6h = (state)->H.wide[12]; \
+ h6l = (state)->H.wide[13]; \
+ h7h = (state)->H.wide[14]; \
+ h7l = (state)->H.wide[15]; \
+ } while (0)
+
+#define WRITE_STATE(state) do { \
+ (state)->H.wide[ 0] = h0h; \
+ (state)->H.wide[ 1] = h0l; \
+ (state)->H.wide[ 2] = h1h; \
+ (state)->H.wide[ 3] = h1l; \
+ (state)->H.wide[ 4] = h2h; \
+ (state)->H.wide[ 5] = h2l; \
+ (state)->H.wide[ 6] = h3h; \
+ (state)->H.wide[ 7] = h3l; \
+ (state)->H.wide[ 8] = h4h; \
+ (state)->H.wide[ 9] = h4l; \
+ (state)->H.wide[10] = h5h; \
+ (state)->H.wide[11] = h5l; \
+ (state)->H.wide[12] = h6h; \
+ (state)->H.wide[13] = h6l; \
+ (state)->H.wide[14] = h7h; \
+ (state)->H.wide[15] = h7l; \
+ } while (0)
+
+#define INPUT_BUF1 \
+ bee_u64 m0h = dec64e_aligned(buf + 0); \
+ bee_u64 m0l = dec64e_aligned(buf + 8); \
+ bee_u64 m1h = dec64e_aligned(buf + 16); \
+ bee_u64 m1l = dec64e_aligned(buf + 24); \
+ bee_u64 m2h = dec64e_aligned(buf + 32); \
+ bee_u64 m2l = dec64e_aligned(buf + 40); \
+ bee_u64 m3h = dec64e_aligned(buf + 48); \
+ bee_u64 m3l = dec64e_aligned(buf + 56); \
+ h0h ^= m0h; \
+ h0l ^= m0l; \
+ h1h ^= m1h; \
+ h1l ^= m1l; \
+ h2h ^= m2h; \
+ h2l ^= m2l; \
+ h3h ^= m3h; \
+ h3l ^= m3l;
+
+#define INPUT_BUF2 \
+ h4h ^= m0h; \
+ h4l ^= m0l; \
+ h5h ^= m1h; \
+ h5l ^= m1l; \
+ h6h ^= m2h; \
+ h6l ^= m2l; \
+ h7h ^= m3h; \
+ h7l ^= m3l;
+
+
+static const bee_u64 IV512[] = {
+ C64e(0x6fd14b963e00aa17), C64e(0x636a2e057a15d543),
+ C64e(0x8a225e8d0c97ef0b), C64e(0xe9341259f2b3c361),
+ C64e(0x891da0c1536f801e), C64e(0x2aa9056bea2b6d80),
+ C64e(0x588eccdb2075baa6), C64e(0xa90f3a76baf83bf7),
+ C64e(0x0169e60541e34a69), C64e(0x46b58a8e2e6fe65a),
+ C64e(0x1047a7d0c1843c24), C64e(0x3b6e71b12d5ac199),
+ C64e(0xcf57f6ec9db1f856), C64e(0xa706887c5716b156),
+ C64e(0xe3c2fcdfe68517fb), C64e(0x545a4678cc8cdd4b)
+};
+
+#else
+
+static const bee_u32 C[] = {
+ C32e(0x72d5dea2), C32e(0xdf15f867), C32e(0x7b84150a),
+ C32e(0xb7231557), C32e(0x81abd690), C32e(0x4d5a87f6),
+ C32e(0x4e9f4fc5), C32e(0xc3d12b40), C32e(0xea983ae0),
+ C32e(0x5c45fa9c), C32e(0x03c5d299), C32e(0x66b2999a),
+ C32e(0x660296b4), C32e(0xf2bb538a), C32e(0xb556141a),
+ C32e(0x88dba231), C32e(0x03a35a5c), C32e(0x9a190edb),
+ C32e(0x403fb20a), C32e(0x87c14410), C32e(0x1c051980),
+ C32e(0x849e951d), C32e(0x6f33ebad), C32e(0x5ee7cddc),
+ C32e(0x10ba1392), C32e(0x02bf6b41), C32e(0xdc786515),
+ C32e(0xf7bb27d0), C32e(0x0a2c8139), C32e(0x37aa7850),
+ C32e(0x3f1abfd2), C32e(0x410091d3), C32e(0x422d5a0d),
+ C32e(0xf6cc7e90), C32e(0xdd629f9c), C32e(0x92c097ce),
+ C32e(0x185ca70b), C32e(0xc72b44ac), C32e(0xd1df65d6),
+ C32e(0x63c6fc23), C32e(0x976e6c03), C32e(0x9ee0b81a),
+ C32e(0x2105457e), C32e(0x446ceca8), C32e(0xeef103bb),
+ C32e(0x5d8e61fa), C32e(0xfd9697b2), C32e(0x94838197),
+ C32e(0x4a8e8537), C32e(0xdb03302f), C32e(0x2a678d2d),
+ C32e(0xfb9f6a95), C32e(0x8afe7381), C32e(0xf8b8696c),
+ C32e(0x8ac77246), C32e(0xc07f4214), C32e(0xc5f4158f),
+ C32e(0xbdc75ec4), C32e(0x75446fa7), C32e(0x8f11bb80),
+ C32e(0x52de75b7), C32e(0xaee488bc), C32e(0x82b8001e),
+ C32e(0x98a6a3f4), C32e(0x8ef48f33), C32e(0xa9a36315),
+ C32e(0xaa5f5624), C32e(0xd5b7f989), C32e(0xb6f1ed20),
+ C32e(0x7c5ae0fd), C32e(0x36cae95a), C32e(0x06422c36),
+ C32e(0xce293543), C32e(0x4efe983d), C32e(0x533af974),
+ C32e(0x739a4ba7), C32e(0xd0f51f59), C32e(0x6f4e8186),
+ C32e(0x0e9dad81), C32e(0xafd85a9f), C32e(0xa7050667),
+ C32e(0xee34626a), C32e(0x8b0b28be), C32e(0x6eb91727),
+ C32e(0x47740726), C32e(0xc680103f), C32e(0xe0a07e6f),
+ C32e(0xc67e487b), C32e(0x0d550aa5), C32e(0x4af8a4c0),
+ C32e(0x91e3e79f), C32e(0x978ef19e), C32e(0x86767281),
+ C32e(0x50608dd4), C32e(0x7e9e5a41), C32e(0xf3e5b062),
+ C32e(0xfc9f1fec), C32e(0x4054207a), C32e(0xe3e41a00),
+ C32e(0xcef4c984), C32e(0x4fd794f5), C32e(0x9dfa95d8),
+ C32e(0x552e7e11), C32e(0x24c354a5), C32e(0x5bdf7228),
+ C32e(0xbdfe6e28), C32e(0x78f57fe2), C32e(0x0fa5c4b2),
+ C32e(0x05897cef), C32e(0xee49d32e), C32e(0x447e9385),
+ C32e(0xeb28597f), C32e(0x705f6937), C32e(0xb324314a),
+ C32e(0x5e8628f1), C32e(0x1dd6e465), C32e(0xc71b7704),
+ C32e(0x51b920e7), C32e(0x74fe43e8), C32e(0x23d4878a),
+ C32e(0x7d29e8a3), C32e(0x927694f2), C32e(0xddcb7a09),
+ C32e(0x9b30d9c1), C32e(0x1d1b30fb), C32e(0x5bdc1be0),
+ C32e(0xda24494f), C32e(0xf29c82bf), C32e(0xa4e7ba31),
+ C32e(0xb470bfff), C32e(0x0d324405), C32e(0xdef8bc48),
+ C32e(0x3baefc32), C32e(0x53bbd339), C32e(0x459fc3c1),
+ C32e(0xe0298ba0), C32e(0xe5c905fd), C32e(0xf7ae090f),
+ C32e(0x94703412), C32e(0x4290f134), C32e(0xa271b701),
+ C32e(0xe344ed95), C32e(0xe93b8e36), C32e(0x4f2f984a),
+ C32e(0x88401d63), C32e(0xa06cf615), C32e(0x47c1444b),
+ C32e(0x8752afff), C32e(0x7ebb4af1), C32e(0xe20ac630),
+ C32e(0x4670b6c5), C32e(0xcc6e8ce6), C32e(0xa4d5a456),
+ C32e(0xbd4fca00), C32e(0xda9d844b), C32e(0xc83e18ae),
+ C32e(0x7357ce45), C32e(0x3064d1ad), C32e(0xe8a6ce68),
+ C32e(0x145c2567), C32e(0xa3da8cf2), C32e(0xcb0ee116),
+ C32e(0x33e90658), C32e(0x9a94999a), C32e(0x1f60b220),
+ C32e(0xc26f847b), C32e(0xd1ceac7f), C32e(0xa0d18518),
+ C32e(0x32595ba1), C32e(0x8ddd19d3), C32e(0x509a1cc0),
+ C32e(0xaaa5b446), C32e(0x9f3d6367), C32e(0xe4046bba),
+ C32e(0xf6ca19ab), C32e(0x0b56ee7e), C32e(0x1fb179ea),
+ C32e(0xa9282174), C32e(0xe9bdf735), C32e(0x3b3651ee),
+ C32e(0x1d57ac5a), C32e(0x7550d376), C32e(0x3a46c2fe),
+ C32e(0xa37d7001), C32e(0xf735c1af), C32e(0x98a4d842),
+ C32e(0x78edec20), C32e(0x9e6b6779), C32e(0x41836315),
+ C32e(0xea3adba8), C32e(0xfac33b4d), C32e(0x32832c83),
+ C32e(0xa7403b1f), C32e(0x1c2747f3), C32e(0x5940f034),
+ C32e(0xb72d769a), C32e(0xe73e4e6c), C32e(0xd2214ffd),
+ C32e(0xb8fd8d39), C32e(0xdc5759ef), C32e(0x8d9b0c49),
+ C32e(0x2b49ebda), C32e(0x5ba2d749), C32e(0x68f3700d),
+ C32e(0x7d3baed0), C32e(0x7a8d5584), C32e(0xf5a5e9f0),
+ C32e(0xe4f88e65), C32e(0xa0b8a2f4), C32e(0x36103b53),
+ C32e(0x0ca8079e), C32e(0x753eec5a), C32e(0x91689492),
+ C32e(0x56e8884f), C32e(0x5bb05c55), C32e(0xf8babc4c),
+ C32e(0xe3bb3b99), C32e(0xf387947b), C32e(0x75daf4d6),
+ C32e(0x726b1c5d), C32e(0x64aeac28), C32e(0xdc34b36d),
+ C32e(0x6c34a550), C32e(0xb828db71), C32e(0xf861e2f2),
+ C32e(0x108d512a), C32e(0xe3db6433), C32e(0x59dd75fc),
+ C32e(0x1cacbcf1), C32e(0x43ce3fa2), C32e(0x67bbd13c),
+ C32e(0x02e843b0), C32e(0x330a5bca), C32e(0x8829a175),
+ C32e(0x7f34194d), C32e(0xb416535c), C32e(0x923b94c3),
+ C32e(0x0e794d1e), C32e(0x797475d7), C32e(0xb6eeaf3f),
+ C32e(0xeaa8d4f7), C32e(0xbe1a3921), C32e(0x5cf47e09),
+ C32e(0x4c232751), C32e(0x26a32453), C32e(0xba323cd2),
+ C32e(0x44a3174a), C32e(0x6da6d5ad), C32e(0xb51d3ea6),
+ C32e(0xaff2c908), C32e(0x83593d98), C32e(0x916b3c56),
+ C32e(0x4cf87ca1), C32e(0x7286604d), C32e(0x46e23ecc),
+ C32e(0x086ec7f6), C32e(0x2f9833b3), C32e(0xb1bc765e),
+ C32e(0x2bd666a5), C32e(0xefc4e62a), C32e(0x06f4b6e8),
+ C32e(0xbec1d436), C32e(0x74ee8215), C32e(0xbcef2163),
+ C32e(0xfdc14e0d), C32e(0xf453c969), C32e(0xa77d5ac4),
+ C32e(0x06585826), C32e(0x7ec11416), C32e(0x06e0fa16),
+ C32e(0x7e90af3d), C32e(0x28639d3f), C32e(0xd2c9f2e3),
+ C32e(0x009bd20c), C32e(0x5faace30), C32e(0xb7d40c30),
+ C32e(0x742a5116), C32e(0xf2e03298), C32e(0x0deb30d8),
+ C32e(0xe3cef89a), C32e(0x4bc59e7b), C32e(0xb5f17992),
+ C32e(0xff51e66e), C32e(0x048668d3), C32e(0x9b234d57),
+ C32e(0xe6966731), C32e(0xcce6a6f3), C32e(0x170a7505),
+ C32e(0xb17681d9), C32e(0x13326cce), C32e(0x3c175284),
+ C32e(0xf805a262), C32e(0xf42bcbb3), C32e(0x78471547),
+ C32e(0xff465482), C32e(0x23936a48), C32e(0x38df5807),
+ C32e(0x4e5e6565), C32e(0xf2fc7c89), C32e(0xfc86508e),
+ C32e(0x31702e44), C32e(0xd00bca86), C32e(0xf04009a2),
+ C32e(0x3078474e), C32e(0x65a0ee39), C32e(0xd1f73883),
+ C32e(0xf75ee937), C32e(0xe42c3abd), C32e(0x2197b226),
+ C32e(0x0113f86f), C32e(0xa344edd1), C32e(0xef9fdee7),
+ C32e(0x8ba0df15), C32e(0x762592d9), C32e(0x3c85f7f6),
+ C32e(0x12dc42be), C32e(0xd8a7ec7c), C32e(0xab27b07e),
+ C32e(0x538d7dda), C32e(0xaa3ea8de), C32e(0xaa25ce93),
+ C32e(0xbd0269d8), C32e(0x5af643fd), C32e(0x1a7308f9),
+ C32e(0xc05fefda), C32e(0x174a19a5), C32e(0x974d6633),
+ C32e(0x4cfd216a), C32e(0x35b49831), C32e(0xdb411570),
+ C32e(0xea1e0fbb), C32e(0xedcd549b), C32e(0x9ad063a1),
+ C32e(0x51974072), C32e(0xf6759dbf), C32e(0x91476fe2)
+};
+
+#define Ceven_w3(r) (C[((r) << 3) + 0])
+#define Ceven_w2(r) (C[((r) << 3) + 1])
+#define Ceven_w1(r) (C[((r) << 3) + 2])
+#define Ceven_w0(r) (C[((r) << 3) + 3])
+#define Codd_w3(r) (C[((r) << 3) + 4])
+#define Codd_w2(r) (C[((r) << 3) + 5])
+#define Codd_w1(r) (C[((r) << 3) + 6])
+#define Codd_w0(r) (C[((r) << 3) + 7])
+
+#define S(x0, x1, x2, x3, cb, r) do { \
+ Sb(x0 ## 3, x1 ## 3, x2 ## 3, x3 ## 3, cb ## w3(r)); \
+ Sb(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, cb ## w2(r)); \
+ Sb(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, cb ## w1(r)); \
+ Sb(x0 ## 0, x1 ## 0, x2 ## 0, x3 ## 0, cb ## w0(r)); \
+ } while (0)
+
+#define L(x0, x1, x2, x3, x4, x5, x6, x7) do { \
+ Lb(x0 ## 3, x1 ## 3, x2 ## 3, x3 ## 3, \
+ x4 ## 3, x5 ## 3, x6 ## 3, x7 ## 3); \
+ Lb(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, \
+ x4 ## 2, x5 ## 2, x6 ## 2, x7 ## 2); \
+ Lb(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, \
+ x4 ## 1, x5 ## 1, x6 ## 1, x7 ## 1); \
+ Lb(x0 ## 0, x1 ## 0, x2 ## 0, x3 ## 0, \
+ x4 ## 0, x5 ## 0, x6 ## 0, x7 ## 0); \
+ } while (0)
+
+#define Wz(x, c, n) do { \
+ bee_u32 t = (x ## 3 & (c)) << (n); \
+ x ## 3 = ((x ## 3 >> (n)) & (c)) | t; \
+ t = (x ## 2 & (c)) << (n); \
+ x ## 2 = ((x ## 2 >> (n)) & (c)) | t; \
+ t = (x ## 1 & (c)) << (n); \
+ x ## 1 = ((x ## 1 >> (n)) & (c)) | t; \
+ t = (x ## 0 & (c)) << (n); \
+ x ## 0 = ((x ## 0 >> (n)) & (c)) | t; \
+ } while (0)
+
+#define W0(x) Wz(x, BEE_C32(0x55555555), 1)
+#define W1(x) Wz(x, BEE_C32(0x33333333), 2)
+#define W2(x) Wz(x, BEE_C32(0x0F0F0F0F), 4)
+#define W3(x) Wz(x, BEE_C32(0x00FF00FF), 8)
+#define W4(x) Wz(x, BEE_C32(0x0000FFFF), 16)
+#define W5(x) do { \
+ bee_u32 t = x ## 3; \
+ x ## 3 = x ## 2; \
+ x ## 2 = t; \
+ t = x ## 1; \
+ x ## 1 = x ## 0; \
+ x ## 0 = t; \
+ } while (0)
+#define W6(x) do { \
+ bee_u32 t = x ## 3; \
+ x ## 3 = x ## 1; \
+ x ## 1 = t; \
+ t = x ## 2; \
+ x ## 2 = x ## 0; \
+ x ## 0 = t; \
+ } while (0)
+
+#define DECL_STATE \
+ bee_u32 h03, h02, h01, h00, h13, h12, h11, h10; \
+ bee_u32 h23, h22, h21, h20, h33, h32, h31, h30; \
+ bee_u32 h43, h42, h41, h40, h53, h52, h51, h50; \
+ bee_u32 h63, h62, h61, h60, h73, h72, h71, h70; \
+ bee_u32 tmp;
+
+#define READ_STATE(state) do { \
+ h03 = (state)->H.narrow[ 0]; \
+ h02 = (state)->H.narrow[ 1]; \
+ h01 = (state)->H.narrow[ 2]; \
+ h00 = (state)->H.narrow[ 3]; \
+ h13 = (state)->H.narrow[ 4]; \
+ h12 = (state)->H.narrow[ 5]; \
+ h11 = (state)->H.narrow[ 6]; \
+ h10 = (state)->H.narrow[ 7]; \
+ h23 = (state)->H.narrow[ 8]; \
+ h22 = (state)->H.narrow[ 9]; \
+ h21 = (state)->H.narrow[10]; \
+ h20 = (state)->H.narrow[11]; \
+ h33 = (state)->H.narrow[12]; \
+ h32 = (state)->H.narrow[13]; \
+ h31 = (state)->H.narrow[14]; \
+ h30 = (state)->H.narrow[15]; \
+ h43 = (state)->H.narrow[16]; \
+ h42 = (state)->H.narrow[17]; \
+ h41 = (state)->H.narrow[18]; \
+ h40 = (state)->H.narrow[19]; \
+ h53 = (state)->H.narrow[20]; \
+ h52 = (state)->H.narrow[21]; \
+ h51 = (state)->H.narrow[22]; \
+ h50 = (state)->H.narrow[23]; \
+ h63 = (state)->H.narrow[24]; \
+ h62 = (state)->H.narrow[25]; \
+ h61 = (state)->H.narrow[26]; \
+ h60 = (state)->H.narrow[27]; \
+ h73 = (state)->H.narrow[28]; \
+ h72 = (state)->H.narrow[29]; \
+ h71 = (state)->H.narrow[30]; \
+ h70 = (state)->H.narrow[31]; \
+ } while (0)
+
+#define WRITE_STATE(state) do { \
+ (state)->H.narrow[ 0] = h03; \
+ (state)->H.narrow[ 1] = h02; \
+ (state)->H.narrow[ 2] = h01; \
+ (state)->H.narrow[ 3] = h00; \
+ (state)->H.narrow[ 4] = h13; \
+ (state)->H.narrow[ 5] = h12; \
+ (state)->H.narrow[ 6] = h11; \
+ (state)->H.narrow[ 7] = h10; \
+ (state)->H.narrow[ 8] = h23; \
+ (state)->H.narrow[ 9] = h22; \
+ (state)->H.narrow[10] = h21; \
+ (state)->H.narrow[11] = h20; \
+ (state)->H.narrow[12] = h33; \
+ (state)->H.narrow[13] = h32; \
+ (state)->H.narrow[14] = h31; \
+ (state)->H.narrow[15] = h30; \
+ (state)->H.narrow[16] = h43; \
+ (state)->H.narrow[17] = h42; \
+ (state)->H.narrow[18] = h41; \
+ (state)->H.narrow[19] = h40; \
+ (state)->H.narrow[20] = h53; \
+ (state)->H.narrow[21] = h52; \
+ (state)->H.narrow[22] = h51; \
+ (state)->H.narrow[23] = h50; \
+ (state)->H.narrow[24] = h63; \
+ (state)->H.narrow[25] = h62; \
+ (state)->H.narrow[26] = h61; \
+ (state)->H.narrow[27] = h60; \
+ (state)->H.narrow[28] = h73; \
+ (state)->H.narrow[29] = h72; \
+ (state)->H.narrow[30] = h71; \
+ (state)->H.narrow[31] = h70; \
+ } while (0)
+
+#define INPUT_BUF1 \
+ bee_u32 m03 = dec32e_aligned(buf + 0); \
+ bee_u32 m02 = dec32e_aligned(buf + 4); \
+ bee_u32 m01 = dec32e_aligned(buf + 8); \
+ bee_u32 m00 = dec32e_aligned(buf + 12); \
+ bee_u32 m13 = dec32e_aligned(buf + 16); \
+ bee_u32 m12 = dec32e_aligned(buf + 20); \
+ bee_u32 m11 = dec32e_aligned(buf + 24); \
+ bee_u32 m10 = dec32e_aligned(buf + 28); \
+ bee_u32 m23 = dec32e_aligned(buf + 32); \
+ bee_u32 m22 = dec32e_aligned(buf + 36); \
+ bee_u32 m21 = dec32e_aligned(buf + 40); \
+ bee_u32 m20 = dec32e_aligned(buf + 44); \
+ bee_u32 m33 = dec32e_aligned(buf + 48); \
+ bee_u32 m32 = dec32e_aligned(buf + 52); \
+ bee_u32 m31 = dec32e_aligned(buf + 56); \
+ bee_u32 m30 = dec32e_aligned(buf + 60); \
+ h03 ^= m03; \
+ h02 ^= m02; \
+ h01 ^= m01; \
+ h00 ^= m00; \
+ h13 ^= m13; \
+ h12 ^= m12; \
+ h11 ^= m11; \
+ h10 ^= m10; \
+ h23 ^= m23; \
+ h22 ^= m22; \
+ h21 ^= m21; \
+ h20 ^= m20; \
+ h33 ^= m33; \
+ h32 ^= m32; \
+ h31 ^= m31; \
+ h30 ^= m30;
+
+#define INPUT_BUF2 \
+ h43 ^= m03; \
+ h42 ^= m02; \
+ h41 ^= m01; \
+ h40 ^= m00; \
+ h53 ^= m13; \
+ h52 ^= m12; \
+ h51 ^= m11; \
+ h50 ^= m10; \
+ h63 ^= m23; \
+ h62 ^= m22; \
+ h61 ^= m21; \
+ h60 ^= m20; \
+ h73 ^= m33; \
+ h72 ^= m32; \
+ h71 ^= m31; \
+ h70 ^= m30;
+
+static const bee_u32 IV512[] = {
+ C32e(0x6fd14b96), C32e(0x3e00aa17), C32e(0x636a2e05), C32e(0x7a15d543),
+ C32e(0x8a225e8d), C32e(0x0c97ef0b), C32e(0xe9341259), C32e(0xf2b3c361),
+ C32e(0x891da0c1), C32e(0x536f801e), C32e(0x2aa9056b), C32e(0xea2b6d80),
+ C32e(0x588eccdb), C32e(0x2075baa6), C32e(0xa90f3a76), C32e(0xbaf83bf7),
+ C32e(0x0169e605), C32e(0x41e34a69), C32e(0x46b58a8e), C32e(0x2e6fe65a),
+ C32e(0x1047a7d0), C32e(0xc1843c24), C32e(0x3b6e71b1), C32e(0x2d5ac199),
+ C32e(0xcf57f6ec), C32e(0x9db1f856), C32e(0xa706887c), C32e(0x5716b156),
+ C32e(0xe3c2fcdf), C32e(0xe68517fb), C32e(0x545a4678), C32e(0xcc8cdd4b)
+};
+
+#endif
+
+#define SL(ro) SLu(r + ro, ro)
+
+#define SLu(r, ro) do { \
+ S(h0, h2, h4, h6, Ceven_, r); \
+ S(h1, h3, h5, h7, Codd_, r); \
+ L(h0, h2, h4, h6, h1, h3, h5, h7); \
+ W ## ro(h1); \
+ W ## ro(h3); \
+ W ## ro(h5); \
+ W ## ro(h7); \
+ } while (0)
+
+
+
+#if BEE_JH_64
+
+/*
+ * On a "true 64-bit" architecture, we can unroll at will.
+ */
+
+#define E8 do { \
+ SLu( 0, 0); \
+ SLu( 1, 1); \
+ SLu( 2, 2); \
+ SLu( 3, 3); \
+ SLu( 4, 4); \
+ SLu( 5, 5); \
+ SLu( 6, 6); \
+ SLu( 7, 0); \
+ SLu( 8, 1); \
+ SLu( 9, 2); \
+ SLu(10, 3); \
+ SLu(11, 4); \
+ SLu(12, 5); \
+ SLu(13, 6); \
+ SLu(14, 0); \
+ SLu(15, 1); \
+ SLu(16, 2); \
+ SLu(17, 3); \
+ SLu(18, 4); \
+ SLu(19, 5); \
+ SLu(20, 6); \
+ SLu(21, 0); \
+ SLu(22, 1); \
+ SLu(23, 2); \
+ SLu(24, 3); \
+ SLu(25, 4); \
+ SLu(26, 5); \
+ SLu(27, 6); \
+ SLu(28, 0); \
+ SLu(29, 1); \
+ SLu(30, 2); \
+ SLu(31, 3); \
+ SLu(32, 4); \
+ SLu(33, 5); \
+ SLu(34, 6); \
+ SLu(35, 0); \
+ SLu(36, 1); \
+ SLu(37, 2); \
+ SLu(38, 3); \
+ SLu(39, 4); \
+ SLu(40, 5); \
+ SLu(41, 6); \
+ } while (0)
+
+#else
+
+/*
+ * We are not aiming at a small footprint, but we are still using a
+ * 32-bit implementation. Full loop unrolling would smash the L1
+ * cache on some "big" architectures (32 kB L1 cache).
+ */
+
+#define E8 do { \
+ unsigned r; \
+ for (r = 0; r < 42; r += 7) { \
+ SL(0); \
+ SL(1); \
+ SL(2); \
+ SL(3); \
+ SL(4); \
+ SL(5); \
+ SL(6); \
+ } \
+ } while (0)
+
+#endif
+
+
+static void two_init(facet_two_context *sc, const void *iv)
+{
+ sc->ptr = 0;
+#if BEE_JH_64
+ memcpy(sc->H.wide, iv, sizeof sc->H.wide);
+#else
+ memcpy(sc->H.narrow, iv, sizeof sc->H.narrow);
+#endif
+#if BEE_64
+ sc->block_count = 0;
+#else
+ sc->block_count_high = 0;
+ sc->block_count_low = 0;
+#endif
+}
+
+static void two_core(facet_two_context *sc, const void *data, size_t len)
+{
+ unsigned char *buf;
+ size_t ptr;
+ DECL_STATE
+
+ buf = sc->buf;
+ ptr = sc->ptr;
+ if (len < (sizeof sc->buf) - ptr) {
+ memcpy(buf + ptr, data, len);
+ ptr += len;
+ sc->ptr = ptr;
+ return;
+ }
+
+ READ_STATE(sc);
+ while (len > 0) {
+ size_t clen;
+
+ clen = (sizeof sc->buf) - ptr;
+ if (clen > len)
+ clen = len;
+ memcpy(buf + ptr, data, clen);
+ ptr += clen;
+ data = (const unsigned char *)data + clen;
+ len -= clen;
+ if (ptr == sizeof sc->buf) {
+ INPUT_BUF1;
+ E8;
+ INPUT_BUF2;
+#if BEE_64
+ sc->block_count ++;
+#else
+ if ((sc->block_count_low = BEE_T32(
+ sc->block_count_low + 1)) == 0)
+ sc->block_count_high ++;
+#endif
+ ptr = 0;
+ }
+ }
+ WRITE_STATE(sc);
+ sc->ptr = ptr;
+}
+
+static void two_close(facet_two_context *sc, unsigned ub, unsigned n, void *dst, size_t out_size_w32, const void *iv)
+{
+ unsigned z;
+ unsigned char buf[128];
+ size_t numz, u;
+#if BEE_64
+ bee_u64 l0, l1;
+#else
+ bee_u32 l0, l1, l2, l3;
+#endif
+
+ z = 0x80 >> n;
+ buf[0] = ((ub & -z) | z) & 0xFF;
+ if (sc->ptr == 0 && n == 0) {
+ numz = 47;
+ } else {
+ numz = 111 - sc->ptr;
+ }
+ memset(buf + 1, 0, numz);
+#if BEE_64
+ l0 = BEE_T64(sc->block_count << 9) + (sc->ptr << 3) + n;
+ l1 = BEE_T64(sc->block_count >> 55);
+ bee_enc64be(buf + numz + 1, l1);
+ bee_enc64be(buf + numz + 9, l0);
+#else
+ l0 = BEE_T32(sc->block_count_low << 9) + (sc->ptr << 3) + n;
+ l1 = BEE_T32(sc->block_count_low >> 23)
+ + BEE_T32(sc->block_count_high << 9);
+ l2 = BEE_T32(sc->block_count_high >> 23);
+ l3 = 0;
+ bee_enc32be(buf + numz + 1, l3);
+ bee_enc32be(buf + numz + 5, l2);
+ bee_enc32be(buf + numz + 9, l1);
+ bee_enc32be(buf + numz + 13, l0);
+#endif
+ two_core(sc, buf, numz + 17);
+#if BEE_JH_64
+ for (u = 0; u < 8; u ++)
+ enc64e(buf + (u << 3), sc->H.wide[u + 8]);
+#else
+ for (u = 0; u < 16; u ++)
+ enc32e(buf + (u << 2), sc->H.narrow[u + 16]);
+#endif
+ memcpy(dst, buf + ((16 - out_size_w32) << 2), out_size_w32 << 2);
+ two_init(sc, iv);
+}
+
+
+/* see facet_two.h */
+void facet_two_init(void *cc)
+{
+ two_init(cc, IV512);
+}
+
+/* see facet_two.h */
+void facet_two(void *cc, const void *data, size_t len)
+{
+ two_core(cc, data, len);
+}
+
+/* see facet_two.h */
+void facet_two_close(void *cc, void *dst)
+{
+ two_close(cc, 0, 0, dst, 16, IV512);
+}
+
+/* see facet_two.h */
+void facet_two_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+ two_close(cc, ub, n, dst, 16, IV512);
+}
+
+
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/stratum/algos/honeycomb/facet_two.h b/stratum/algos/honeycomb/facet_two.h
new file mode 100644
index 000000000..79f6d85cd
--- /dev/null
+++ b/stratum/algos/honeycomb/facet_two.h
@@ -0,0 +1,85 @@
+#ifndef FACET_TWO_H
+#define FACET_TWO_H
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#include
+#include "honeycomb_types.h"
+
+
+//#undef BEE_64 //
+
+/**
+ * This structure is a context for HoneyComb Facet #2 computations: it contains the
+ * intermediate values and some data from the last entered block. Once
+ * a HoneyComb Facet #2 computation has been performed, the context can be reused for another computation.
+ *
+ * The contents of this structure are private. A running HoneyComb Facet #2 computation
+ * can be cloned by copying the context (e.g. with a simple memcpy() ).
+ */
+typedef struct {
+ unsigned char buf[64]; /* first field, for alignment */
+ size_t ptr;
+ union {
+#if BEE_64
+ bee_u64 wide[16];
+#endif
+ bee_u32 narrow[32];
+ } H;
+#if BEE_64
+ bee_u64 block_count;
+#else
+ bee_u32 block_count_high, block_count_low;
+#endif
+} facet_two_context;
+
+
+/**
+ * Initialize a HoneyComb Facet #2 context. This process performs no memory allocation.
+ *
+ * @param cc the HoneyComb Facet #2 context (pointer to a facet_two_context )
+ */
+void facet_two_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that len is zero (in which case this function does nothing).
+ *
+ * @param cc the HoneyComb Facet #2 context
+ * @param data the input data
+ * @param len the input data length (in bytes)
+ */
+void facet_two(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current HoneyComb Facet #2 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically reinitialized.
+ *
+ * @param cc the HoneyComb Facet #2 context
+ * @param dst the destination buffer
+ */
+void facet_two_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (64 bytes). If bit number i
+ * in ub has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc the HoneyComb Facet #2 context
+ * @param ub the extra bits
+ * @param n the number of extra bits (0 to 7)
+ * @param dst the destination buffer
+ */
+void facet_two_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/stratum/algos/honeycomb/facets_helper.c b/stratum/algos/honeycomb/facets_helper.c
new file mode 100644
index 000000000..2d0f06857
--- /dev/null
+++ b/stratum/algos/honeycomb/facets_helper.c
@@ -0,0 +1,350 @@
+
+#include "honeycomb_types.h"
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#if AES_BIG_ENDIAN
+
+#define AESx(x) ( ((BEE_C32(x) >> 24) & BEE_C32(0x000000FF)) \
+ | ((BEE_C32(x) >> 8) & BEE_C32(0x0000FF00)) \
+ | ((BEE_C32(x) << 8) & BEE_C32(0x00FF0000)) \
+ | ((BEE_C32(x) << 24) & BEE_C32(0xFF000000)))
+
+#define AES0 AES0_BE
+#define AES1 AES1_BE
+#define AES2 AES2_BE
+#define AES3 AES3_BE
+
+#define AES_ROUND_BE(X0, X1, X2, X3, K0, K1, K2, K3, Y0, Y1, Y2, Y3) do { \
+ (Y0) = AES0[((X0) >> 24) & 0xFF] \
+ ^ AES1[((X1) >> 16) & 0xFF] \
+ ^ AES2[((X2) >> 8) & 0xFF] \
+ ^ AES3[(X3) & 0xFF] ^ (K0); \
+ (Y1) = AES0[((X1) >> 24) & 0xFF] \
+ ^ AES1[((X2) >> 16) & 0xFF] \
+ ^ AES2[((X3) >> 8) & 0xFF] \
+ ^ AES3[(X0) & 0xFF] ^ (K1); \
+ (Y2) = AES0[((X2) >> 24) & 0xFF] \
+ ^ AES1[((X3) >> 16) & 0xFF] \
+ ^ AES2[((X0) >> 8) & 0xFF] \
+ ^ AES3[(X1) & 0xFF] ^ (K2); \
+ (Y3) = AES0[((X3) >> 24) & 0xFF] \
+ ^ AES1[((X0) >> 16) & 0xFF] \
+ ^ AES2[((X1) >> 8) & 0xFF] \
+ ^ AES3[(X2) & 0xFF] ^ (K3); \
+ } while (0)
+
+#define AES_ROUND_NOKEY_BE(X0, X1, X2, X3, Y0, Y1, Y2, Y3) \
+ AES_ROUND_BE(X0, X1, X2, X3, 0, 0, 0, 0, Y0, Y1, Y2, Y3)
+
+#else
+
+#define AESx(x) BEE_C32(x)
+#define AES0 AES0_LE
+#define AES1 AES1_LE
+#define AES2 AES2_LE
+#define AES3 AES3_LE
+
+#define AES_ROUND_LE(X0, X1, X2, X3, K0, K1, K2, K3, Y0, Y1, Y2, Y3) do { \
+ (Y0) = AES0[(X0) & 0xFF] \
+ ^ AES1[((X1) >> 8) & 0xFF] \
+ ^ AES2[((X2) >> 16) & 0xFF] \
+ ^ AES3[((X3) >> 24) & 0xFF] ^ (K0); \
+ (Y1) = AES0[(X1) & 0xFF] \
+ ^ AES1[((X2) >> 8) & 0xFF] \
+ ^ AES2[((X3) >> 16) & 0xFF] \
+ ^ AES3[((X0) >> 24) & 0xFF] ^ (K1); \
+ (Y2) = AES0[(X2) & 0xFF] \
+ ^ AES1[((X3) >> 8) & 0xFF] \
+ ^ AES2[((X0) >> 16) & 0xFF] \
+ ^ AES3[((X1) >> 24) & 0xFF] ^ (K2); \
+ (Y3) = AES0[(X3) & 0xFF] \
+ ^ AES1[((X0) >> 8) & 0xFF] \
+ ^ AES2[((X1) >> 16) & 0xFF] \
+ ^ AES3[((X2) >> 24) & 0xFF] ^ (K3); \
+ } while (0)
+
+#define AES_ROUND_NOKEY_LE(X0, X1, X2, X3, Y0, Y1, Y2, Y3) \
+ AES_ROUND_LE(X0, X1, X2, X3, 0, 0, 0, 0, Y0, Y1, Y2, Y3)
+
+#endif
+
+/*
+ * The AES*[] tables allow us to perform a fast evaluation of an AES
+ * round; table AESi[] combines SubBytes for a byte at row i, and
+ * MixColumns for the column where that byte goes after ShiftRows.
+ */
+
+static const bee_u32 AES0[256] = {
+ AESx(0xA56363C6), AESx(0x847C7CF8), AESx(0x997777EE), AESx(0x8D7B7BF6),
+ AESx(0x0DF2F2FF), AESx(0xBD6B6BD6), AESx(0xB16F6FDE), AESx(0x54C5C591),
+ AESx(0x50303060), AESx(0x03010102), AESx(0xA96767CE), AESx(0x7D2B2B56),
+ AESx(0x19FEFEE7), AESx(0x62D7D7B5), AESx(0xE6ABAB4D), AESx(0x9A7676EC),
+ AESx(0x45CACA8F), AESx(0x9D82821F), AESx(0x40C9C989), AESx(0x877D7DFA),
+ AESx(0x15FAFAEF), AESx(0xEB5959B2), AESx(0xC947478E), AESx(0x0BF0F0FB),
+ AESx(0xECADAD41), AESx(0x67D4D4B3), AESx(0xFDA2A25F), AESx(0xEAAFAF45),
+ AESx(0xBF9C9C23), AESx(0xF7A4A453), AESx(0x967272E4), AESx(0x5BC0C09B),
+ AESx(0xC2B7B775), AESx(0x1CFDFDE1), AESx(0xAE93933D), AESx(0x6A26264C),
+ AESx(0x5A36366C), AESx(0x413F3F7E), AESx(0x02F7F7F5), AESx(0x4FCCCC83),
+ AESx(0x5C343468), AESx(0xF4A5A551), AESx(0x34E5E5D1), AESx(0x08F1F1F9),
+ AESx(0x937171E2), AESx(0x73D8D8AB), AESx(0x53313162), AESx(0x3F15152A),
+ AESx(0x0C040408), AESx(0x52C7C795), AESx(0x65232346), AESx(0x5EC3C39D),
+ AESx(0x28181830), AESx(0xA1969637), AESx(0x0F05050A), AESx(0xB59A9A2F),
+ AESx(0x0907070E), AESx(0x36121224), AESx(0x9B80801B), AESx(0x3DE2E2DF),
+ AESx(0x26EBEBCD), AESx(0x6927274E), AESx(0xCDB2B27F), AESx(0x9F7575EA),
+ AESx(0x1B090912), AESx(0x9E83831D), AESx(0x742C2C58), AESx(0x2E1A1A34),
+ AESx(0x2D1B1B36), AESx(0xB26E6EDC), AESx(0xEE5A5AB4), AESx(0xFBA0A05B),
+ AESx(0xF65252A4), AESx(0x4D3B3B76), AESx(0x61D6D6B7), AESx(0xCEB3B37D),
+ AESx(0x7B292952), AESx(0x3EE3E3DD), AESx(0x712F2F5E), AESx(0x97848413),
+ AESx(0xF55353A6), AESx(0x68D1D1B9), AESx(0x00000000), AESx(0x2CEDEDC1),
+ AESx(0x60202040), AESx(0x1FFCFCE3), AESx(0xC8B1B179), AESx(0xED5B5BB6),
+ AESx(0xBE6A6AD4), AESx(0x46CBCB8D), AESx(0xD9BEBE67), AESx(0x4B393972),
+ AESx(0xDE4A4A94), AESx(0xD44C4C98), AESx(0xE85858B0), AESx(0x4ACFCF85),
+ AESx(0x6BD0D0BB), AESx(0x2AEFEFC5), AESx(0xE5AAAA4F), AESx(0x16FBFBED),
+ AESx(0xC5434386), AESx(0xD74D4D9A), AESx(0x55333366), AESx(0x94858511),
+ AESx(0xCF45458A), AESx(0x10F9F9E9), AESx(0x06020204), AESx(0x817F7FFE),
+ AESx(0xF05050A0), AESx(0x443C3C78), AESx(0xBA9F9F25), AESx(0xE3A8A84B),
+ AESx(0xF35151A2), AESx(0xFEA3A35D), AESx(0xC0404080), AESx(0x8A8F8F05),
+ AESx(0xAD92923F), AESx(0xBC9D9D21), AESx(0x48383870), AESx(0x04F5F5F1),
+ AESx(0xDFBCBC63), AESx(0xC1B6B677), AESx(0x75DADAAF), AESx(0x63212142),
+ AESx(0x30101020), AESx(0x1AFFFFE5), AESx(0x0EF3F3FD), AESx(0x6DD2D2BF),
+ AESx(0x4CCDCD81), AESx(0x140C0C18), AESx(0x35131326), AESx(0x2FECECC3),
+ AESx(0xE15F5FBE), AESx(0xA2979735), AESx(0xCC444488), AESx(0x3917172E),
+ AESx(0x57C4C493), AESx(0xF2A7A755), AESx(0x827E7EFC), AESx(0x473D3D7A),
+ AESx(0xAC6464C8), AESx(0xE75D5DBA), AESx(0x2B191932), AESx(0x957373E6),
+ AESx(0xA06060C0), AESx(0x98818119), AESx(0xD14F4F9E), AESx(0x7FDCDCA3),
+ AESx(0x66222244), AESx(0x7E2A2A54), AESx(0xAB90903B), AESx(0x8388880B),
+ AESx(0xCA46468C), AESx(0x29EEEEC7), AESx(0xD3B8B86B), AESx(0x3C141428),
+ AESx(0x79DEDEA7), AESx(0xE25E5EBC), AESx(0x1D0B0B16), AESx(0x76DBDBAD),
+ AESx(0x3BE0E0DB), AESx(0x56323264), AESx(0x4E3A3A74), AESx(0x1E0A0A14),
+ AESx(0xDB494992), AESx(0x0A06060C), AESx(0x6C242448), AESx(0xE45C5CB8),
+ AESx(0x5DC2C29F), AESx(0x6ED3D3BD), AESx(0xEFACAC43), AESx(0xA66262C4),
+ AESx(0xA8919139), AESx(0xA4959531), AESx(0x37E4E4D3), AESx(0x8B7979F2),
+ AESx(0x32E7E7D5), AESx(0x43C8C88B), AESx(0x5937376E), AESx(0xB76D6DDA),
+ AESx(0x8C8D8D01), AESx(0x64D5D5B1), AESx(0xD24E4E9C), AESx(0xE0A9A949),
+ AESx(0xB46C6CD8), AESx(0xFA5656AC), AESx(0x07F4F4F3), AESx(0x25EAEACF),
+ AESx(0xAF6565CA), AESx(0x8E7A7AF4), AESx(0xE9AEAE47), AESx(0x18080810),
+ AESx(0xD5BABA6F), AESx(0x887878F0), AESx(0x6F25254A), AESx(0x722E2E5C),
+ AESx(0x241C1C38), AESx(0xF1A6A657), AESx(0xC7B4B473), AESx(0x51C6C697),
+ AESx(0x23E8E8CB), AESx(0x7CDDDDA1), AESx(0x9C7474E8), AESx(0x211F1F3E),
+ AESx(0xDD4B4B96), AESx(0xDCBDBD61), AESx(0x868B8B0D), AESx(0x858A8A0F),
+ AESx(0x907070E0), AESx(0x423E3E7C), AESx(0xC4B5B571), AESx(0xAA6666CC),
+ AESx(0xD8484890), AESx(0x05030306), AESx(0x01F6F6F7), AESx(0x120E0E1C),
+ AESx(0xA36161C2), AESx(0x5F35356A), AESx(0xF95757AE), AESx(0xD0B9B969),
+ AESx(0x91868617), AESx(0x58C1C199), AESx(0x271D1D3A), AESx(0xB99E9E27),
+ AESx(0x38E1E1D9), AESx(0x13F8F8EB), AESx(0xB398982B), AESx(0x33111122),
+ AESx(0xBB6969D2), AESx(0x70D9D9A9), AESx(0x898E8E07), AESx(0xA7949433),
+ AESx(0xB69B9B2D), AESx(0x221E1E3C), AESx(0x92878715), AESx(0x20E9E9C9),
+ AESx(0x49CECE87), AESx(0xFF5555AA), AESx(0x78282850), AESx(0x7ADFDFA5),
+ AESx(0x8F8C8C03), AESx(0xF8A1A159), AESx(0x80898909), AESx(0x170D0D1A),
+ AESx(0xDABFBF65), AESx(0x31E6E6D7), AESx(0xC6424284), AESx(0xB86868D0),
+ AESx(0xC3414182), AESx(0xB0999929), AESx(0x772D2D5A), AESx(0x110F0F1E),
+ AESx(0xCBB0B07B), AESx(0xFC5454A8), AESx(0xD6BBBB6D), AESx(0x3A16162C)
+};
+
+static const bee_u32 AES1[256] = {
+ AESx(0x6363C6A5), AESx(0x7C7CF884), AESx(0x7777EE99), AESx(0x7B7BF68D),
+ AESx(0xF2F2FF0D), AESx(0x6B6BD6BD), AESx(0x6F6FDEB1), AESx(0xC5C59154),
+ AESx(0x30306050), AESx(0x01010203), AESx(0x6767CEA9), AESx(0x2B2B567D),
+ AESx(0xFEFEE719), AESx(0xD7D7B562), AESx(0xABAB4DE6), AESx(0x7676EC9A),
+ AESx(0xCACA8F45), AESx(0x82821F9D), AESx(0xC9C98940), AESx(0x7D7DFA87),
+ AESx(0xFAFAEF15), AESx(0x5959B2EB), AESx(0x47478EC9), AESx(0xF0F0FB0B),
+ AESx(0xADAD41EC), AESx(0xD4D4B367), AESx(0xA2A25FFD), AESx(0xAFAF45EA),
+ AESx(0x9C9C23BF), AESx(0xA4A453F7), AESx(0x7272E496), AESx(0xC0C09B5B),
+ AESx(0xB7B775C2), AESx(0xFDFDE11C), AESx(0x93933DAE), AESx(0x26264C6A),
+ AESx(0x36366C5A), AESx(0x3F3F7E41), AESx(0xF7F7F502), AESx(0xCCCC834F),
+ AESx(0x3434685C), AESx(0xA5A551F4), AESx(0xE5E5D134), AESx(0xF1F1F908),
+ AESx(0x7171E293), AESx(0xD8D8AB73), AESx(0x31316253), AESx(0x15152A3F),
+ AESx(0x0404080C), AESx(0xC7C79552), AESx(0x23234665), AESx(0xC3C39D5E),
+ AESx(0x18183028), AESx(0x969637A1), AESx(0x05050A0F), AESx(0x9A9A2FB5),
+ AESx(0x07070E09), AESx(0x12122436), AESx(0x80801B9B), AESx(0xE2E2DF3D),
+ AESx(0xEBEBCD26), AESx(0x27274E69), AESx(0xB2B27FCD), AESx(0x7575EA9F),
+ AESx(0x0909121B), AESx(0x83831D9E), AESx(0x2C2C5874), AESx(0x1A1A342E),
+ AESx(0x1B1B362D), AESx(0x6E6EDCB2), AESx(0x5A5AB4EE), AESx(0xA0A05BFB),
+ AESx(0x5252A4F6), AESx(0x3B3B764D), AESx(0xD6D6B761), AESx(0xB3B37DCE),
+ AESx(0x2929527B), AESx(0xE3E3DD3E), AESx(0x2F2F5E71), AESx(0x84841397),
+ AESx(0x5353A6F5), AESx(0xD1D1B968), AESx(0x00000000), AESx(0xEDEDC12C),
+ AESx(0x20204060), AESx(0xFCFCE31F), AESx(0xB1B179C8), AESx(0x5B5BB6ED),
+ AESx(0x6A6AD4BE), AESx(0xCBCB8D46), AESx(0xBEBE67D9), AESx(0x3939724B),
+ AESx(0x4A4A94DE), AESx(0x4C4C98D4), AESx(0x5858B0E8), AESx(0xCFCF854A),
+ AESx(0xD0D0BB6B), AESx(0xEFEFC52A), AESx(0xAAAA4FE5), AESx(0xFBFBED16),
+ AESx(0x434386C5), AESx(0x4D4D9AD7), AESx(0x33336655), AESx(0x85851194),
+ AESx(0x45458ACF), AESx(0xF9F9E910), AESx(0x02020406), AESx(0x7F7FFE81),
+ AESx(0x5050A0F0), AESx(0x3C3C7844), AESx(0x9F9F25BA), AESx(0xA8A84BE3),
+ AESx(0x5151A2F3), AESx(0xA3A35DFE), AESx(0x404080C0), AESx(0x8F8F058A),
+ AESx(0x92923FAD), AESx(0x9D9D21BC), AESx(0x38387048), AESx(0xF5F5F104),
+ AESx(0xBCBC63DF), AESx(0xB6B677C1), AESx(0xDADAAF75), AESx(0x21214263),
+ AESx(0x10102030), AESx(0xFFFFE51A), AESx(0xF3F3FD0E), AESx(0xD2D2BF6D),
+ AESx(0xCDCD814C), AESx(0x0C0C1814), AESx(0x13132635), AESx(0xECECC32F),
+ AESx(0x5F5FBEE1), AESx(0x979735A2), AESx(0x444488CC), AESx(0x17172E39),
+ AESx(0xC4C49357), AESx(0xA7A755F2), AESx(0x7E7EFC82), AESx(0x3D3D7A47),
+ AESx(0x6464C8AC), AESx(0x5D5DBAE7), AESx(0x1919322B), AESx(0x7373E695),
+ AESx(0x6060C0A0), AESx(0x81811998), AESx(0x4F4F9ED1), AESx(0xDCDCA37F),
+ AESx(0x22224466), AESx(0x2A2A547E), AESx(0x90903BAB), AESx(0x88880B83),
+ AESx(0x46468CCA), AESx(0xEEEEC729), AESx(0xB8B86BD3), AESx(0x1414283C),
+ AESx(0xDEDEA779), AESx(0x5E5EBCE2), AESx(0x0B0B161D), AESx(0xDBDBAD76),
+ AESx(0xE0E0DB3B), AESx(0x32326456), AESx(0x3A3A744E), AESx(0x0A0A141E),
+ AESx(0x494992DB), AESx(0x06060C0A), AESx(0x2424486C), AESx(0x5C5CB8E4),
+ AESx(0xC2C29F5D), AESx(0xD3D3BD6E), AESx(0xACAC43EF), AESx(0x6262C4A6),
+ AESx(0x919139A8), AESx(0x959531A4), AESx(0xE4E4D337), AESx(0x7979F28B),
+ AESx(0xE7E7D532), AESx(0xC8C88B43), AESx(0x37376E59), AESx(0x6D6DDAB7),
+ AESx(0x8D8D018C), AESx(0xD5D5B164), AESx(0x4E4E9CD2), AESx(0xA9A949E0),
+ AESx(0x6C6CD8B4), AESx(0x5656ACFA), AESx(0xF4F4F307), AESx(0xEAEACF25),
+ AESx(0x6565CAAF), AESx(0x7A7AF48E), AESx(0xAEAE47E9), AESx(0x08081018),
+ AESx(0xBABA6FD5), AESx(0x7878F088), AESx(0x25254A6F), AESx(0x2E2E5C72),
+ AESx(0x1C1C3824), AESx(0xA6A657F1), AESx(0xB4B473C7), AESx(0xC6C69751),
+ AESx(0xE8E8CB23), AESx(0xDDDDA17C), AESx(0x7474E89C), AESx(0x1F1F3E21),
+ AESx(0x4B4B96DD), AESx(0xBDBD61DC), AESx(0x8B8B0D86), AESx(0x8A8A0F85),
+ AESx(0x7070E090), AESx(0x3E3E7C42), AESx(0xB5B571C4), AESx(0x6666CCAA),
+ AESx(0x484890D8), AESx(0x03030605), AESx(0xF6F6F701), AESx(0x0E0E1C12),
+ AESx(0x6161C2A3), AESx(0x35356A5F), AESx(0x5757AEF9), AESx(0xB9B969D0),
+ AESx(0x86861791), AESx(0xC1C19958), AESx(0x1D1D3A27), AESx(0x9E9E27B9),
+ AESx(0xE1E1D938), AESx(0xF8F8EB13), AESx(0x98982BB3), AESx(0x11112233),
+ AESx(0x6969D2BB), AESx(0xD9D9A970), AESx(0x8E8E0789), AESx(0x949433A7),
+ AESx(0x9B9B2DB6), AESx(0x1E1E3C22), AESx(0x87871592), AESx(0xE9E9C920),
+ AESx(0xCECE8749), AESx(0x5555AAFF), AESx(0x28285078), AESx(0xDFDFA57A),
+ AESx(0x8C8C038F), AESx(0xA1A159F8), AESx(0x89890980), AESx(0x0D0D1A17),
+ AESx(0xBFBF65DA), AESx(0xE6E6D731), AESx(0x424284C6), AESx(0x6868D0B8),
+ AESx(0x414182C3), AESx(0x999929B0), AESx(0x2D2D5A77), AESx(0x0F0F1E11),
+ AESx(0xB0B07BCB), AESx(0x5454A8FC), AESx(0xBBBB6DD6), AESx(0x16162C3A)
+};
+
+static const bee_u32 AES2[256] = {
+ AESx(0x63C6A563), AESx(0x7CF8847C), AESx(0x77EE9977), AESx(0x7BF68D7B),
+ AESx(0xF2FF0DF2), AESx(0x6BD6BD6B), AESx(0x6FDEB16F), AESx(0xC59154C5),
+ AESx(0x30605030), AESx(0x01020301), AESx(0x67CEA967), AESx(0x2B567D2B),
+ AESx(0xFEE719FE), AESx(0xD7B562D7), AESx(0xAB4DE6AB), AESx(0x76EC9A76),
+ AESx(0xCA8F45CA), AESx(0x821F9D82), AESx(0xC98940C9), AESx(0x7DFA877D),
+ AESx(0xFAEF15FA), AESx(0x59B2EB59), AESx(0x478EC947), AESx(0xF0FB0BF0),
+ AESx(0xAD41ECAD), AESx(0xD4B367D4), AESx(0xA25FFDA2), AESx(0xAF45EAAF),
+ AESx(0x9C23BF9C), AESx(0xA453F7A4), AESx(0x72E49672), AESx(0xC09B5BC0),
+ AESx(0xB775C2B7), AESx(0xFDE11CFD), AESx(0x933DAE93), AESx(0x264C6A26),
+ AESx(0x366C5A36), AESx(0x3F7E413F), AESx(0xF7F502F7), AESx(0xCC834FCC),
+ AESx(0x34685C34), AESx(0xA551F4A5), AESx(0xE5D134E5), AESx(0xF1F908F1),
+ AESx(0x71E29371), AESx(0xD8AB73D8), AESx(0x31625331), AESx(0x152A3F15),
+ AESx(0x04080C04), AESx(0xC79552C7), AESx(0x23466523), AESx(0xC39D5EC3),
+ AESx(0x18302818), AESx(0x9637A196), AESx(0x050A0F05), AESx(0x9A2FB59A),
+ AESx(0x070E0907), AESx(0x12243612), AESx(0x801B9B80), AESx(0xE2DF3DE2),
+ AESx(0xEBCD26EB), AESx(0x274E6927), AESx(0xB27FCDB2), AESx(0x75EA9F75),
+ AESx(0x09121B09), AESx(0x831D9E83), AESx(0x2C58742C), AESx(0x1A342E1A),
+ AESx(0x1B362D1B), AESx(0x6EDCB26E), AESx(0x5AB4EE5A), AESx(0xA05BFBA0),
+ AESx(0x52A4F652), AESx(0x3B764D3B), AESx(0xD6B761D6), AESx(0xB37DCEB3),
+ AESx(0x29527B29), AESx(0xE3DD3EE3), AESx(0x2F5E712F), AESx(0x84139784),
+ AESx(0x53A6F553), AESx(0xD1B968D1), AESx(0x00000000), AESx(0xEDC12CED),
+ AESx(0x20406020), AESx(0xFCE31FFC), AESx(0xB179C8B1), AESx(0x5BB6ED5B),
+ AESx(0x6AD4BE6A), AESx(0xCB8D46CB), AESx(0xBE67D9BE), AESx(0x39724B39),
+ AESx(0x4A94DE4A), AESx(0x4C98D44C), AESx(0x58B0E858), AESx(0xCF854ACF),
+ AESx(0xD0BB6BD0), AESx(0xEFC52AEF), AESx(0xAA4FE5AA), AESx(0xFBED16FB),
+ AESx(0x4386C543), AESx(0x4D9AD74D), AESx(0x33665533), AESx(0x85119485),
+ AESx(0x458ACF45), AESx(0xF9E910F9), AESx(0x02040602), AESx(0x7FFE817F),
+ AESx(0x50A0F050), AESx(0x3C78443C), AESx(0x9F25BA9F), AESx(0xA84BE3A8),
+ AESx(0x51A2F351), AESx(0xA35DFEA3), AESx(0x4080C040), AESx(0x8F058A8F),
+ AESx(0x923FAD92), AESx(0x9D21BC9D), AESx(0x38704838), AESx(0xF5F104F5),
+ AESx(0xBC63DFBC), AESx(0xB677C1B6), AESx(0xDAAF75DA), AESx(0x21426321),
+ AESx(0x10203010), AESx(0xFFE51AFF), AESx(0xF3FD0EF3), AESx(0xD2BF6DD2),
+ AESx(0xCD814CCD), AESx(0x0C18140C), AESx(0x13263513), AESx(0xECC32FEC),
+ AESx(0x5FBEE15F), AESx(0x9735A297), AESx(0x4488CC44), AESx(0x172E3917),
+ AESx(0xC49357C4), AESx(0xA755F2A7), AESx(0x7EFC827E), AESx(0x3D7A473D),
+ AESx(0x64C8AC64), AESx(0x5DBAE75D), AESx(0x19322B19), AESx(0x73E69573),
+ AESx(0x60C0A060), AESx(0x81199881), AESx(0x4F9ED14F), AESx(0xDCA37FDC),
+ AESx(0x22446622), AESx(0x2A547E2A), AESx(0x903BAB90), AESx(0x880B8388),
+ AESx(0x468CCA46), AESx(0xEEC729EE), AESx(0xB86BD3B8), AESx(0x14283C14),
+ AESx(0xDEA779DE), AESx(0x5EBCE25E), AESx(0x0B161D0B), AESx(0xDBAD76DB),
+ AESx(0xE0DB3BE0), AESx(0x32645632), AESx(0x3A744E3A), AESx(0x0A141E0A),
+ AESx(0x4992DB49), AESx(0x060C0A06), AESx(0x24486C24), AESx(0x5CB8E45C),
+ AESx(0xC29F5DC2), AESx(0xD3BD6ED3), AESx(0xAC43EFAC), AESx(0x62C4A662),
+ AESx(0x9139A891), AESx(0x9531A495), AESx(0xE4D337E4), AESx(0x79F28B79),
+ AESx(0xE7D532E7), AESx(0xC88B43C8), AESx(0x376E5937), AESx(0x6DDAB76D),
+ AESx(0x8D018C8D), AESx(0xD5B164D5), AESx(0x4E9CD24E), AESx(0xA949E0A9),
+ AESx(0x6CD8B46C), AESx(0x56ACFA56), AESx(0xF4F307F4), AESx(0xEACF25EA),
+ AESx(0x65CAAF65), AESx(0x7AF48E7A), AESx(0xAE47E9AE), AESx(0x08101808),
+ AESx(0xBA6FD5BA), AESx(0x78F08878), AESx(0x254A6F25), AESx(0x2E5C722E),
+ AESx(0x1C38241C), AESx(0xA657F1A6), AESx(0xB473C7B4), AESx(0xC69751C6),
+ AESx(0xE8CB23E8), AESx(0xDDA17CDD), AESx(0x74E89C74), AESx(0x1F3E211F),
+ AESx(0x4B96DD4B), AESx(0xBD61DCBD), AESx(0x8B0D868B), AESx(0x8A0F858A),
+ AESx(0x70E09070), AESx(0x3E7C423E), AESx(0xB571C4B5), AESx(0x66CCAA66),
+ AESx(0x4890D848), AESx(0x03060503), AESx(0xF6F701F6), AESx(0x0E1C120E),
+ AESx(0x61C2A361), AESx(0x356A5F35), AESx(0x57AEF957), AESx(0xB969D0B9),
+ AESx(0x86179186), AESx(0xC19958C1), AESx(0x1D3A271D), AESx(0x9E27B99E),
+ AESx(0xE1D938E1), AESx(0xF8EB13F8), AESx(0x982BB398), AESx(0x11223311),
+ AESx(0x69D2BB69), AESx(0xD9A970D9), AESx(0x8E07898E), AESx(0x9433A794),
+ AESx(0x9B2DB69B), AESx(0x1E3C221E), AESx(0x87159287), AESx(0xE9C920E9),
+ AESx(0xCE8749CE), AESx(0x55AAFF55), AESx(0x28507828), AESx(0xDFA57ADF),
+ AESx(0x8C038F8C), AESx(0xA159F8A1), AESx(0x89098089), AESx(0x0D1A170D),
+ AESx(0xBF65DABF), AESx(0xE6D731E6), AESx(0x4284C642), AESx(0x68D0B868),
+ AESx(0x4182C341), AESx(0x9929B099), AESx(0x2D5A772D), AESx(0x0F1E110F),
+ AESx(0xB07BCBB0), AESx(0x54A8FC54), AESx(0xBB6DD6BB), AESx(0x162C3A16)
+};
+
+static const bee_u32 AES3[256] = {
+ AESx(0xC6A56363), AESx(0xF8847C7C), AESx(0xEE997777), AESx(0xF68D7B7B),
+ AESx(0xFF0DF2F2), AESx(0xD6BD6B6B), AESx(0xDEB16F6F), AESx(0x9154C5C5),
+ AESx(0x60503030), AESx(0x02030101), AESx(0xCEA96767), AESx(0x567D2B2B),
+ AESx(0xE719FEFE), AESx(0xB562D7D7), AESx(0x4DE6ABAB), AESx(0xEC9A7676),
+ AESx(0x8F45CACA), AESx(0x1F9D8282), AESx(0x8940C9C9), AESx(0xFA877D7D),
+ AESx(0xEF15FAFA), AESx(0xB2EB5959), AESx(0x8EC94747), AESx(0xFB0BF0F0),
+ AESx(0x41ECADAD), AESx(0xB367D4D4), AESx(0x5FFDA2A2), AESx(0x45EAAFAF),
+ AESx(0x23BF9C9C), AESx(0x53F7A4A4), AESx(0xE4967272), AESx(0x9B5BC0C0),
+ AESx(0x75C2B7B7), AESx(0xE11CFDFD), AESx(0x3DAE9393), AESx(0x4C6A2626),
+ AESx(0x6C5A3636), AESx(0x7E413F3F), AESx(0xF502F7F7), AESx(0x834FCCCC),
+ AESx(0x685C3434), AESx(0x51F4A5A5), AESx(0xD134E5E5), AESx(0xF908F1F1),
+ AESx(0xE2937171), AESx(0xAB73D8D8), AESx(0x62533131), AESx(0x2A3F1515),
+ AESx(0x080C0404), AESx(0x9552C7C7), AESx(0x46652323), AESx(0x9D5EC3C3),
+ AESx(0x30281818), AESx(0x37A19696), AESx(0x0A0F0505), AESx(0x2FB59A9A),
+ AESx(0x0E090707), AESx(0x24361212), AESx(0x1B9B8080), AESx(0xDF3DE2E2),
+ AESx(0xCD26EBEB), AESx(0x4E692727), AESx(0x7FCDB2B2), AESx(0xEA9F7575),
+ AESx(0x121B0909), AESx(0x1D9E8383), AESx(0x58742C2C), AESx(0x342E1A1A),
+ AESx(0x362D1B1B), AESx(0xDCB26E6E), AESx(0xB4EE5A5A), AESx(0x5BFBA0A0),
+ AESx(0xA4F65252), AESx(0x764D3B3B), AESx(0xB761D6D6), AESx(0x7DCEB3B3),
+ AESx(0x527B2929), AESx(0xDD3EE3E3), AESx(0x5E712F2F), AESx(0x13978484),
+ AESx(0xA6F55353), AESx(0xB968D1D1), AESx(0x00000000), AESx(0xC12CEDED),
+ AESx(0x40602020), AESx(0xE31FFCFC), AESx(0x79C8B1B1), AESx(0xB6ED5B5B),
+ AESx(0xD4BE6A6A), AESx(0x8D46CBCB), AESx(0x67D9BEBE), AESx(0x724B3939),
+ AESx(0x94DE4A4A), AESx(0x98D44C4C), AESx(0xB0E85858), AESx(0x854ACFCF),
+ AESx(0xBB6BD0D0), AESx(0xC52AEFEF), AESx(0x4FE5AAAA), AESx(0xED16FBFB),
+ AESx(0x86C54343), AESx(0x9AD74D4D), AESx(0x66553333), AESx(0x11948585),
+ AESx(0x8ACF4545), AESx(0xE910F9F9), AESx(0x04060202), AESx(0xFE817F7F),
+ AESx(0xA0F05050), AESx(0x78443C3C), AESx(0x25BA9F9F), AESx(0x4BE3A8A8),
+ AESx(0xA2F35151), AESx(0x5DFEA3A3), AESx(0x80C04040), AESx(0x058A8F8F),
+ AESx(0x3FAD9292), AESx(0x21BC9D9D), AESx(0x70483838), AESx(0xF104F5F5),
+ AESx(0x63DFBCBC), AESx(0x77C1B6B6), AESx(0xAF75DADA), AESx(0x42632121),
+ AESx(0x20301010), AESx(0xE51AFFFF), AESx(0xFD0EF3F3), AESx(0xBF6DD2D2),
+ AESx(0x814CCDCD), AESx(0x18140C0C), AESx(0x26351313), AESx(0xC32FECEC),
+ AESx(0xBEE15F5F), AESx(0x35A29797), AESx(0x88CC4444), AESx(0x2E391717),
+ AESx(0x9357C4C4), AESx(0x55F2A7A7), AESx(0xFC827E7E), AESx(0x7A473D3D),
+ AESx(0xC8AC6464), AESx(0xBAE75D5D), AESx(0x322B1919), AESx(0xE6957373),
+ AESx(0xC0A06060), AESx(0x19988181), AESx(0x9ED14F4F), AESx(0xA37FDCDC),
+ AESx(0x44662222), AESx(0x547E2A2A), AESx(0x3BAB9090), AESx(0x0B838888),
+ AESx(0x8CCA4646), AESx(0xC729EEEE), AESx(0x6BD3B8B8), AESx(0x283C1414),
+ AESx(0xA779DEDE), AESx(0xBCE25E5E), AESx(0x161D0B0B), AESx(0xAD76DBDB),
+ AESx(0xDB3BE0E0), AESx(0x64563232), AESx(0x744E3A3A), AESx(0x141E0A0A),
+ AESx(0x92DB4949), AESx(0x0C0A0606), AESx(0x486C2424), AESx(0xB8E45C5C),
+ AESx(0x9F5DC2C2), AESx(0xBD6ED3D3), AESx(0x43EFACAC), AESx(0xC4A66262),
+ AESx(0x39A89191), AESx(0x31A49595), AESx(0xD337E4E4), AESx(0xF28B7979),
+ AESx(0xD532E7E7), AESx(0x8B43C8C8), AESx(0x6E593737), AESx(0xDAB76D6D),
+ AESx(0x018C8D8D), AESx(0xB164D5D5), AESx(0x9CD24E4E), AESx(0x49E0A9A9),
+ AESx(0xD8B46C6C), AESx(0xACFA5656), AESx(0xF307F4F4), AESx(0xCF25EAEA),
+ AESx(0xCAAF6565), AESx(0xF48E7A7A), AESx(0x47E9AEAE), AESx(0x10180808),
+ AESx(0x6FD5BABA), AESx(0xF0887878), AESx(0x4A6F2525), AESx(0x5C722E2E),
+ AESx(0x38241C1C), AESx(0x57F1A6A6), AESx(0x73C7B4B4), AESx(0x9751C6C6),
+ AESx(0xCB23E8E8), AESx(0xA17CDDDD), AESx(0xE89C7474), AESx(0x3E211F1F),
+ AESx(0x96DD4B4B), AESx(0x61DCBDBD), AESx(0x0D868B8B), AESx(0x0F858A8A),
+ AESx(0xE0907070), AESx(0x7C423E3E), AESx(0x71C4B5B5), AESx(0xCCAA6666),
+ AESx(0x90D84848), AESx(0x06050303), AESx(0xF701F6F6), AESx(0x1C120E0E),
+ AESx(0xC2A36161), AESx(0x6A5F3535), AESx(0xAEF95757), AESx(0x69D0B9B9),
+ AESx(0x17918686), AESx(0x9958C1C1), AESx(0x3A271D1D), AESx(0x27B99E9E),
+ AESx(0xD938E1E1), AESx(0xEB13F8F8), AESx(0x2BB39898), AESx(0x22331111),
+ AESx(0xD2BB6969), AESx(0xA970D9D9), AESx(0x07898E8E), AESx(0x33A79494),
+ AESx(0x2DB69B9B), AESx(0x3C221E1E), AESx(0x15928787), AESx(0xC920E9E9),
+ AESx(0x8749CECE), AESx(0xAAFF5555), AESx(0x50782828), AESx(0xA57ADFDF),
+ AESx(0x038F8C8C), AESx(0x59F8A1A1), AESx(0x09808989), AESx(0x1A170D0D),
+ AESx(0x65DABFBF), AESx(0xD731E6E6), AESx(0x84C64242), AESx(0xD0B86868),
+ AESx(0x82C34141), AESx(0x29B09999), AESx(0x5A772D2D), AESx(0x1E110F0F),
+ AESx(0x7BCBB0B0), AESx(0xA8FC5454), AESx(0x6DD6BBBB), AESx(0x2C3A1616)
+};
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/stratum/algos/honeycomb/honeycomb_types.h b/stratum/algos/honeycomb/honeycomb_types.h
new file mode 100644
index 000000000..3f47cae51
--- /dev/null
+++ b/stratum/algos/honeycomb/honeycomb_types.h
@@ -0,0 +1,1165 @@
+#ifndef HONEYCOMB_TYPESH
+#define HONEYCOMB_TYPESH
+
+#include
+
+//
+// All our I/O functions are defined over octet streams. We do not know
+// how to handle input data if bytes are not octets.
+//
+#if CHAR_BIT != 8
+ #error This code requires 8-bit bytes
+#endif
+
+//
+// We want to define the types "bee_u32" and "bee_u64" which hold
+// unsigned values of at least, respectively, 32 and 64 bits. These
+// tests should select appropriate types for most platforms. The
+// macro "BEE_64" is defined if the 64-bit is supported.
+//
+
+#undef BEE_64
+#undef BEE_64_TRUE
+
+#if defined __STDC__ && __STDC_VERSION__ >= 199901L
+
+ //
+ // On C99 implementations, we can use to get an exact 64-bit
+ // type, if any, or otherwise use a wider type (which must exist, for
+ // C99 conformance).
+ //
+
+ #include
+
+ #ifdef UINT32_MAX
+ typedef uint32_t bee_u32;
+ typedef int32_t bee_s32;
+ #else
+ typedef uint_fast32_t bee_u32;
+ typedef int_fast32_t bee_s32;
+ #endif
+
+ #if !BEE_NO_64
+ #ifdef UINT64_MAX
+ typedef uint64_t bee_u64;
+ typedef int64_t bee_s64;
+ #else
+ typedef uint_fast64_t bee_u64;
+ typedef int_fast64_t bee_s64;
+ #endif
+ #endif
+
+ #define BEE_C32(x) ((bee_u32)(x))
+
+ #if !BEE_NO_64
+ #define BEE_C64(x) ((bee_u64)(x))
+ #define BEE_64 1
+ #endif
+
+#else
+
+ //
+ // On non-C99 systems, we use "unsigned int" if it is wide enough,
+ // "unsigned long" otherwise. This supports all "reasonable" architectures.
+ // We have to be cautious: pre-C99 preprocessors handle constants
+ // differently in '#if' expressions. Hence the shifts to test UINT_MAX.
+ //
+
+ #if ((UINT_MAX >> 11) >> 11) >= 0x3FF
+
+ typedef unsigned int bee_u32;
+ typedef int bee_s32;
+
+ #define BEE_C32(x) ((bee_u32)(x ## U))
+
+ #else
+
+ typedef unsigned long bee_u32;
+ typedef long bee_s32;
+
+ #define BEE_C32(x) ((bee_u32)(x ## UL))
+
+ #endif
+
+ #if !BEE_NO_64
+
+ //
+ // We want a 64-bit type. We use "unsigned long" if it is wide enough (as
+ // is common on 64-bit architectures such as AMD64, Alpha or Sparcv9),
+ // "unsigned long long" otherwise, if available. We use ULLONG_MAX to
+ // test whether "unsigned long long" is available; we also know that
+ // gcc features this type, even if the libc header do not know it.
+ //
+
+ #if ((ULONG_MAX >> 31) >> 31) >= 3
+
+ typedef unsigned long bee_u64;
+ typedef long bee_s64;
+
+ #define BEE_C64(x) ((bee_u64)(x ## UL))
+
+ #define BEE_64 1
+
+ #elif ((ULLONG_MAX >> 31) >> 31) >= 3 || defined __GNUC__
+
+ typedef unsigned long long bee_u64;
+ typedef long long bee_s64;
+
+ #define BEE_C64(x) ((bee_u64)(x ## ULL))
+
+ #define BEE_64 1
+
+ #else
+
+ //
+ // No 64-bit type...
+ //
+
+ #endif
+
+ #endif
+
+#endif
+
+
+//
+// If the "unsigned long" type has length 64 bits or more, then this is
+// a "true" 64-bit architectures. This is also true with Visual C on
+// amd64, even though the "long" type is limited to 32 bits.
+//
+#if BEE_64 && (((ULONG_MAX >> 31) >> 31) >= 3 || defined _M_X64)
+ #define BEE_64_TRUE 1
+#endif
+
+//
+// Implementation note: some processors have specific opcodes to perform
+// a rotation. Recent versions of gcc recognize the expression above and
+// use the relevant opcodes, when appropriate.
+//
+
+#define BEE_T32(x) ((x) & BEE_C32(0xFFFFFFFF))
+#define BEE_ROTL32(x, n) BEE_T32(((x) << (n)) | ((x) >> (32 - (n))))
+#define BEE_ROTR32(x, n) BEE_ROTL32(x, (32 - (n)))
+
+#if BEE_64
+ #define BEE_T64(x) ((x) & BEE_C64(0xFFFFFFFFFFFFFFFF))
+ #define BEE_ROTL64(x, n) BEE_T64(((x) << (n)) | ((x) >> (64 - (n))))
+ #define BEE_ROTR64(x, n) BEE_ROTL64(x, (64 - (n)))
+#endif
+
+#ifndef DOXYGEN_IGNORE
+ //
+ // Define BEE_INLINE to be an "inline" qualifier, if available. We define
+ // some small macro-like functions which benefit greatly from being inlined.
+ //
+ #if (defined __STDC__ && __STDC_VERSION__ >= 199901L) || defined __GNUC__
+ #define BEE_INLINE inline
+ #elif defined _MSC_VER
+ #define BEE_INLINE __inline
+ #else
+ #define BEE_INLINE
+ #endif
+#endif
+
+//
+// We define some macros which qualify the architecture. These macros
+// may be explicit set externally (e.g. as compiler parameters). The
+// code below sets those macros if they are not already defined.
+//
+// Most macros are boolean, thus evaluate to either zero or non-zero.
+// The BEE_UPTR macro is special, in that it evaluates to a C type,
+// or is not defined.
+//
+// BEE_UPTR if defined: unsigned type to cast pointers into
+//
+// BEE_UNALIGNED non-zero if unaligned accesses are efficient
+// BEE_LITTLE_ENDIAN non-zero if architecture is known to be little-endian
+// BEE_BIG_ENDIAN non-zero if architecture is known to be big-endian
+// BEE_LITTLE_FAST non-zero if little-endian decoding is fast
+// BEE_BIG_FAST non-zero if big-endian decoding is fast
+//
+// If BEE_UPTR is defined, then encoding and decoding of 32-bit and 64-bit
+// values will try to be "smart". Either BEE_LITTLE_ENDIAN or BEE_BIG_ENDIAN
+// _must_ be non-zero in those situations. The 32-bit and 64-bit types
+// _must_ also have an exact width.
+//
+// BEE_SPARCV9_GCC_32 UltraSPARC-compatible with gcc, 32-bit mode
+// BEE_SPARCV9_GCC_64 UltraSPARC-compatible with gcc, 64-bit mode
+// BEE_SPARCV9_GCC UltraSPARC-compatible with gcc
+// BEE_I386_GCC x86-compatible (32-bit) with gcc
+// BEE_I386_MSVC x86-compatible (32-bit) with Microsoft Visual C
+// BEE_AMD64_GCC x86-compatible (64-bit) with gcc
+// BEE_AMD64_MSVC x86-compatible (64-bit) with Microsoft Visual C
+// BEE_PPC32_GCC PowerPC, 32-bit, with gcc
+// BEE_PPC64_GCC PowerPC, 64-bit, with gcc
+//
+// TODO: enhance automatic detection, for more architectures and compilers.
+// Endianness is the most important. BEE_UNALIGNED and BEE_UPTR help with
+// some very fast functions (e.g. MD4) when using unaligned input data.
+// The CPU-specific-with-GCC macros are useful only for inline assembly,
+// normally restrained to this header file.
+//
+
+//
+// 32-bit x86, aka "i386 compatible".
+//
+#if defined __i386__ || defined _M_IX86
+ #define BEE_DETECT_UNALIGNED 1
+ #define BEE_DETECT_LITTLE_ENDIAN 1
+ #define BEE_DETECT_UPTR bee_u32
+ #ifdef __GNUC__
+ #define BEE_DETECT_I386_GCC 1
+ #endif
+ #ifdef _MSC_VER
+ #define BEE_DETECT_I386_MSVC 1
+ #endif
+//
+// 64-bit x86, hereafter known as "amd64".
+//
+#elif defined __x86_64 || defined _M_X64
+ #define BEE_DETECT_UNALIGNED 1
+ #define BEE_DETECT_LITTLE_ENDIAN 1
+ #define BEE_DETECT_UPTR bee_u64
+ #ifdef __GNUC__
+ #define BEE_DETECT_AMD64_GCC 1
+ #endif
+ #ifdef _MSC_VER
+ #define BEE_DETECT_AMD64_MSVC 1
+ #endif
+//
+// 64-bit Sparc architecture (implies v9).
+//
+#elif ((defined __sparc__ || defined __sparc) && defined __arch64__) \
+ || defined __sparcv9
+ #define BEE_DETECT_BIG_ENDIAN 1
+ #define BEE_DETECT_UPTR bee_u64
+ #ifdef __GNUC__
+ #define BEE_DETECT_SPARCV9_GCC_64 1
+ #define BEE_DETECT_LITTLE_FAST 1
+ #endif
+//
+// 32-bit Sparc.
+//
+#elif (defined __sparc__ || defined __sparc) \
+ && !(defined __sparcv9 || defined __arch64__)
+ #define BEE_DETECT_BIG_ENDIAN 1
+ #define BEE_DETECT_UPTR bee_u32
+ #if defined __GNUC__ && defined __sparc_v9__
+ #define BEE_DETECT_SPARCV9_GCC_32 1
+ #define BEE_DETECT_LITTLE_FAST 1
+ #endif
+//
+// ARM, little-endian.
+///
+#elif defined __arm__ && __ARMEL__
+ #define BEE_DETECT_LITTLE_ENDIAN 1
+//
+// MIPS, little-endian.
+//
+#elif MIPSEL || _MIPSEL || __MIPSEL || __MIPSEL__
+ #define BEE_DETECT_LITTLE_ENDIAN 1
+//
+// MIPS, big-endian.
+//
+#elif MIPSEB || _MIPSEB || __MIPSEB || __MIPSEB__
+ #define BEE_DETECT_BIG_ENDIAN 1
+//
+// PowerPC.
+//
+#elif defined __powerpc__ || defined __POWERPC__ || defined __ppc__ \
+ || defined _ARCH_PPC
+
+ //
+ // Note: we do not declare cross-endian access to be "fast": even if
+ // using inline assembly, implementation should still assume that
+ // keeping the decoded word in a temporary is faster than decoding
+ // it again.
+ ///
+ #if defined __GNUC__
+ #if BEE_64_TRUE
+ #define BEE_DETECT_PPC64_GCC 1
+ #else
+ #define BEE_DETECT_PPC32_GCC 1
+ #endif
+ #endif
+
+ #if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN
+ #define BEE_DETECT_BIG_ENDIAN 1
+ #elif defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN
+ #define BEE_DETECT_LITTLE_ENDIAN 1
+ #endif
+//
+// Itanium, 64-bit.
+///
+#elif defined __ia64 || defined __ia64__ \
+ || defined __itanium__ || defined _M_IA64
+
+ #if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN
+ #define BEE_DETECT_BIG_ENDIAN 1
+ #else
+ #define BEE_DETECT_LITTLE_ENDIAN 1
+ #endif
+ #if defined __LP64__ || defined _LP64
+ #define BEE_DETECT_UPTR bee_u64
+ #else
+ #define BEE_DETECT_UPTR bee_u32
+ #endif
+#endif
+
+#if defined BEE_DETECT_SPARCV9_GCC_32 || defined BEE_DETECT_SPARCV9_GCC_64
+ #define BEE_DETECT_SPARCV9_GCC 1
+#endif
+
+#if defined BEE_DETECT_UNALIGNED && !defined BEE_UNALIGNED
+ #define BEE_UNALIGNED BEE_DETECT_UNALIGNED
+#endif
+#if defined BEE_DETECT_UPTR && !defined BEE_UPTR
+ #define BEE_UPTR BEE_DETECT_UPTR
+#endif
+#if defined BEE_DETECT_LITTLE_ENDIAN && !defined BEE_LITTLE_ENDIAN
+ #define BEE_LITTLE_ENDIAN BEE_DETECT_LITTLE_ENDIAN
+#endif
+#if defined BEE_DETECT_BIG_ENDIAN && !defined BEE_BIG_ENDIAN
+ #define BEE_BIG_ENDIAN BEE_DETECT_BIG_ENDIAN
+#endif
+#if defined BEE_DETECT_LITTLE_FAST && !defined BEE_LITTLE_FAST
+ #define BEE_LITTLE_FAST BEE_DETECT_LITTLE_FAST
+#endif
+#if defined BEE_DETECT_BIG_FAST && !defined BEE_BIG_FAST
+ #define BEE_BIG_FAST BEE_DETECT_BIG_FAST
+#endif
+#if defined BEE_DETECT_SPARCV9_GCC_32 && !defined BEE_SPARCV9_GCC_32
+ #define BEE_SPARCV9_GCC_32 BEE_DETECT_SPARCV9_GCC_32
+#endif
+#if defined BEE_DETECT_SPARCV9_GCC_64 && !defined BEE_SPARCV9_GCC_64
+ #define BEE_SPARCV9_GCC_64 BEE_DETECT_SPARCV9_GCC_64
+#endif
+#if defined BEE_DETECT_SPARCV9_GCC && !defined BEE_SPARCV9_GCC
+ #define BEE_SPARCV9_GCC BEE_DETECT_SPARCV9_GCC
+#endif
+#if defined BEE_DETECT_I386_GCC && !defined BEE_I386_GCC
+ #define BEE_I386_GCC BEE_DETECT_I386_GCC
+#endif
+#if defined BEE_DETECT_I386_MSVC && !defined BEE_I386_MSVC
+ #define BEE_I386_MSVC BEE_DETECT_I386_MSVC
+#endif
+#if defined BEE_DETECT_AMD64_GCC && !defined BEE_AMD64_GCC
+ #define BEE_AMD64_GCC BEE_DETECT_AMD64_GCC
+#endif
+#if defined BEE_DETECT_AMD64_MSVC && !defined BEE_AMD64_MSVC
+ #define BEE_AMD64_MSVC BEE_DETECT_AMD64_MSVC
+#endif
+#if defined BEE_DETECT_PPC32_GCC && !defined BEE_PPC32_GCC
+ #define BEE_PPC32_GCC BEE_DETECT_PPC32_GCC
+#endif
+#if defined BEE_DETECT_PPC64_GCC && !defined BEE_PPC64_GCC
+ #define BEE_PPC64_GCC BEE_DETECT_PPC64_GCC
+#endif
+
+#if BEE_LITTLE_ENDIAN && !defined BEE_LITTLE_FAST
+ #define BEE_LITTLE_FAST 1
+#endif
+#if BEE_BIG_ENDIAN && !defined BEE_BIG_FAST
+ #define BEE_BIG_FAST 1
+#endif
+
+#if defined BEE_UPTR && !(BEE_LITTLE_ENDIAN || BEE_BIG_ENDIAN)
+ #error BEE_UPTR defined, but endianness is not known.
+#endif
+
+
+#if BEE_I386_GCC && !BEE_NO_ASM
+ //
+ // On x86 32-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit
+ // values.
+ //
+ static BEE_INLINE bee_u32 bee_bswap32(bee_u32 x)
+ {
+ __asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x));
+ return x;
+ }
+
+ #if BEE_64
+ static BEE_INLINE bee_u64 bee_bswap64(bee_u64 x)
+ {
+ return ((bee_u64)bee_bswap32((bee_u32)x) << 32)
+ | (bee_u64)bee_bswap32((bee_u32)(x >> 32));
+ }
+ #endif
+
+#elif BEE_AMD64_GCC && !BEE_NO_ASM
+ //
+ // On x86 64-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit
+ // and 64-bit values.
+ //
+ static BEE_INLINE bee_u32 bee_bswap32(bee_u32 x)
+ {
+ __asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x));
+ return x;
+ }
+
+ #if BEE_64
+ static BEE_INLINE bee_u64 bee_bswap64(bee_u64 x)
+ {
+ __asm__ __volatile__ ("bswapq %0" : "=r" (x) : "0" (x));
+ return x;
+ }
+ #endif
+
+ //
+ // Disabled code. Apparently, Microsoft Visual C 2005 is smart enough
+ // to generate proper opcodes for endianness swapping with the pure C
+ // implementation below.
+ //
+ //
+ //#elif BEE_I386_MSVC && !BEE_NO_ASM
+ //
+ //static __inline bee_u32 __declspec(naked) __fastcall
+ //bee_bswap32(bee_u32 x)
+ //{
+ // __asm {
+ // bswap ecx
+ // mov eax,ecx
+ // ret
+ // }
+ //}
+ //
+ //#if BEE_64
+ //
+ //static BEE_INLINE bee_u64
+ //bee_bswap64(bee_u64 x)
+ //{
+ // return ((bee_u64)bee_bswap32((bee_u32)x) << 32)
+ // | (bee_u64)bee_bswap32((bee_u32)(x >> 32));
+ //}
+ //
+ //#endif
+ //
+ //
+ // [end of disabled code]
+ //
+#else
+ static BEE_INLINE bee_u32 bee_bswap32(bee_u32 x)
+ {
+ x = BEE_T32((x << 16) | (x >> 16));
+ x = ((x & BEE_C32(0xFF00FF00)) >> 8)
+ | ((x & BEE_C32(0x00FF00FF)) << 8);
+ return x;
+ }
+
+ #if BEE_64
+ //
+ // Byte-swap a 64-bit value.
+ //
+ // @param x the input value
+ // @return the byte-swapped value
+ ///
+ static BEE_INLINE bee_u64 bee_bswap64(bee_u64 x)
+ {
+ x = BEE_T64((x << 32) | (x >> 32));
+ x = ((x & BEE_C64(0xFFFF0000FFFF0000)) >> 16)
+ | ((x & BEE_C64(0x0000FFFF0000FFFF)) << 16);
+ x = ((x & BEE_C64(0xFF00FF00FF00FF00)) >> 8)
+ | ((x & BEE_C64(0x00FF00FF00FF00FF)) << 8);
+ return x;
+ }
+ #endif
+#endif
+
+#if BEE_SPARCV9_GCC && !BEE_NO_ASM
+ //
+ // On UltraSPARC systems, native ordering is big-endian, but it is
+ // possible to perform little-endian read accesses by specifying the
+ // address space 0x88 (ASI_PRIMARY_LITTLE). Basically, either we use
+ // the opcode "lda [%reg]0x88,%dst", where %reg is the register which
+ // contains the source address and %dst is the destination register,
+ // or we use "lda [%reg+imm]%asi,%dst", which uses the %asi register
+ // to get the address space name. The latter format is better since it
+ // combines an addition and the actual access in a single opcode; but
+ // it requires the setting (and subsequent resetting) of %asi, which is
+ // slow. Some operations (i.e. MD5 compression function) combine many
+ // successive little-endian read accesses, which may share the same
+ // %asi setting. The macros below contain the appropriate inline
+ // assembly.
+ //
+
+ #define BEE_SPARCV9_SET_ASI \
+ bee_u32 bee_sparcv9_asi; \
+ __asm__ __volatile__ ( \
+ "rd %%asi,%0\n\twr %%g0,0x88,%%asi" : "=r" (bee_sparcv9_asi));
+
+ #define BEE_SPARCV9_RESET_ASI \
+ __asm__ __volatile__ ("wr %%g0,%0,%%asi" : : "r" (bee_sparcv9_asi));
+
+ #define BEE_SPARCV9_DEC32LE(base, idx) ({ \
+ bee_u32 bee_sparcv9_tmp; \
+ __asm__ __volatile__ ("lda [%1+" #idx "*4]%%asi,%0" \
+ : "=r" (bee_sparcv9_tmp) : "r" (base)); \
+ bee_sparcv9_tmp; \
+ })
+#endif
+
+//-----------------------------------------------------------------------------------------
+//--.
+static BEE_INLINE void bee_enc16be(void *dst, unsigned val)
+{
+ ((unsigned char *)dst)[0] = (val >> 8);
+ ((unsigned char *)dst)[1] = val;
+}
+
+//-----------------------------------------------------------------------------------------
+//--.
+static BEE_INLINE unsigned bee_dec16be(const void *src)
+{
+ return ((unsigned)(((const unsigned char *)src)[0]) << 8)
+ | (unsigned)(((const unsigned char *)src)[1]);
+}
+
+//-----------------------------------------------------------------------------------------
+//--.
+static BEE_INLINE void bee_enc16le(void *dst, unsigned val)
+{
+ ((unsigned char *)dst)[0] = val;
+ ((unsigned char *)dst)[1] = val >> 8;
+}
+
+//-----------------------------------------------------------------------------------------
+//--.
+static BEE_INLINE unsigned bee_dec16le(const void *src)
+{
+ return (unsigned)(((const unsigned char *)src)[0])
+ | ((unsigned)(((const unsigned char *)src)[1]) << 8);
+}
+
+//-----------------------------------------------------------------------------------------
+//--.
+///
+// Encode a 32-bit value into the provided buffer (big endian convention).
+//
+// @param dst the destination buffer
+// @param val the 32-bit value to encode
+//
+static BEE_INLINE void bee_enc32be(void *dst, bee_u32 val)
+{
+ #if defined BEE_UPTR
+ #if BEE_UNALIGNED
+ #if BEE_LITTLE_ENDIAN
+ val = bee_bswap32(val);
+ #endif
+ *(bee_u32 *)dst = val;
+ #else
+ if (((BEE_UPTR)dst & 3) == 0) {
+ #if BEE_LITTLE_ENDIAN
+ val = bee_bswap32(val);
+ #endif
+ *(bee_u32 *)dst = val;
+ } else {
+ ((unsigned char *)dst)[0] = (val >> 24);
+ ((unsigned char *)dst)[1] = (val >> 16);
+ ((unsigned char *)dst)[2] = (val >> 8);
+ ((unsigned char *)dst)[3] = val;
+ }
+ #endif
+ #else
+ ((unsigned char *)dst)[0] = (val >> 24);
+ ((unsigned char *)dst)[1] = (val >> 16);
+ ((unsigned char *)dst)[2] = (val >> 8);
+ ((unsigned char *)dst)[3] = val;
+ #endif
+}
+
+//-----------------------------------------------------------------------------------------
+//--
+//
+// Encode a 32-bit value into the provided buffer (big endian convention).
+// The destination buffer must be properly aligned.
+//
+// @param dst the destination buffer (32-bit aligned)
+// @param val the value to encode
+//
+static BEE_INLINE void bee_enc32be_aligned(void *dst, bee_u32 val)
+{
+ #if BEE_LITTLE_ENDIAN
+ *(bee_u32 *)dst = bee_bswap32(val);
+ #elif BEE_BIG_ENDIAN
+ *(bee_u32 *)dst = val;
+ #else
+ ((unsigned char *)dst)[0] = (val >> 24);
+ ((unsigned char *)dst)[1] = (val >> 16);
+ ((unsigned char *)dst)[2] = (val >> 8);
+ ((unsigned char *)dst)[3] = val;
+ #endif
+}
+
+//-----------------------------------------------------------------------------------------
+//--.
+//
+// Decode a 32-bit value from the provided buffer (big endian convention).
+//
+// @param src the source buffer
+// @return the decoded value
+//
+static BEE_INLINE bee_u32 bee_dec32be( const void *src )
+{
+ #if defined BEE_UPTR
+ #if BEE_UNALIGNED
+ #if BEE_LITTLE_ENDIAN
+ return bee_bswap32(*(const bee_u32 *)src);
+ #else
+ return *(const bee_u32 *)src;
+ #endif
+ #else
+ if (((BEE_UPTR)src & 3) == 0) {
+ #if BEE_LITTLE_ENDIAN
+ return bee_bswap32(*(const bee_u32 *)src);
+ #else
+ return *(const bee_u32 *)src;
+ #endif
+ } else {
+ return ((bee_u32)(((const unsigned char *)src)[0]) << 24)
+ | ((bee_u32)(((const unsigned char *)src)[1]) << 16)
+ | ((bee_u32)(((const unsigned char *)src)[2]) << 8)
+ | (bee_u32)(((const unsigned char *)src)[3]);
+ }
+ #endif
+ #else
+ return ((bee_u32)(((const unsigned char *)src)[0]) << 24)
+ | ((bee_u32)(((const unsigned char *)src)[1]) << 16)
+ | ((bee_u32)(((const unsigned char *)src)[2]) << 8)
+ | (bee_u32)(((const unsigned char *)src)[3]);
+ #endif
+}
+
+//-----------------------------------------------------------------------------------------
+//--.
+//
+// Decode a 32-bit value from the provided buffer (big endian convention).
+// The source buffer must be properly aligned.
+//
+// @param src the source buffer (32-bit aligned)
+// @return the decoded value
+//
+static BEE_INLINE bee_u32 bee_dec32be_aligned(const void *src)
+{
+ #if BEE_LITTLE_ENDIAN
+ return bee_bswap32(*(const bee_u32 *)src);
+ #elif BEE_BIG_ENDIAN
+ return *(const bee_u32 *)src;
+ #else
+ return ((bee_u32)(((const unsigned char *)src)[0]) << 24)
+ | ((bee_u32)(((const unsigned char *)src)[1]) << 16)
+ | ((bee_u32)(((const unsigned char *)src)[2]) << 8)
+ | (bee_u32)(((const unsigned char *)src)[3]);
+ #endif
+}
+
+//-----------------------------------------------------------------------------------------
+//--.
+//
+// Encode a 32-bit value into the provided buffer (little endian convention).
+//
+// @param dst the destination buffer
+// @param val the 32-bit value to encode
+//
+static BEE_INLINE void bee_enc32le(void *dst, bee_u32 val)
+{
+ #if defined BEE_UPTR
+ #if BEE_UNALIGNED
+ #if BEE_BIG_ENDIAN
+ val = bee_bswap32(val);
+ #endif
+ *(bee_u32 *)dst = val;
+ #else
+ if (((BEE_UPTR)dst & 3) == 0) {
+ #if BEE_BIG_ENDIAN
+ val = bee_bswap32(val);
+ #endif
+ *(bee_u32 *)dst = val;
+ } else {
+ ((unsigned char *)dst)[0] = val;
+ ((unsigned char *)dst)[1] = (val >> 8);
+ ((unsigned char *)dst)[2] = (val >> 16);
+ ((unsigned char *)dst)[3] = (val >> 24);
+ }
+ #endif
+ #else
+ ((unsigned char *)dst)[0] = val;
+ ((unsigned char *)dst)[1] = (val >> 8);
+ ((unsigned char *)dst)[2] = (val >> 16);
+ ((unsigned char *)dst)[3] = (val >> 24);
+ #endif
+}
+
+//-----------------------------------------------------------------------------------------
+//--.
+//
+// Encode a 32-bit value into the provided buffer (little endian convention).
+// The destination buffer must be properly aligned.
+//
+// @param dst the destination buffer (32-bit aligned)
+// @param val the value to encode
+//
+static BEE_INLINE void bee_enc32le_aligned(void *dst, bee_u32 val)
+{
+#if BEE_LITTLE_ENDIAN
+ *(bee_u32 *)dst = val;
+#elif BEE_BIG_ENDIAN
+ *(bee_u32 *)dst = bee_bswap32(val);
+#else
+ ((unsigned char *)dst)[0] = val;
+ ((unsigned char *)dst)[1] = (val >> 8);
+ ((unsigned char *)dst)[2] = (val >> 16);
+ ((unsigned char *)dst)[3] = (val >> 24);
+#endif
+}
+
+//-----------------------------------------------------------------------------------------
+//--.
+//
+// Decode a 32-bit value from the provided buffer (little endian convention).
+//
+// @param src the source buffer
+// @return the decoded value
+//
+static BEE_INLINE bee_u32 bee_dec32le(const void *src)
+{
+#if defined BEE_UPTR
+#if BEE_UNALIGNED
+#if BEE_BIG_ENDIAN
+ return bee_bswap32(*(const bee_u32 *)src);
+#else
+ return *(const bee_u32 *)src;
+#endif
+#else
+ if (((BEE_UPTR)src & 3) == 0) {
+#if BEE_BIG_ENDIAN
+#if BEE_SPARCV9_GCC && !BEE_NO_ASM
+ bee_u32 tmp;
+
+ //
+ // "__volatile__" is needed here because without it,
+ // gcc-3.4.3 miscompiles the code and performs the
+ // access before the test on the address, thus triggering
+ // a bus error...
+ //
+ __asm__ __volatile__ (
+ "lda [%1]0x88,%0" : "=r" (tmp) : "r" (src));
+ return tmp;
+//
+// On PowerPC, this turns out not to be worth the effort: the inline
+// assembly makes GCC optimizer uncomfortable, which tends to nullify
+// the decoding gains.
+//
+// For most hash functions, using this inline assembly trick changes
+// hashing speed by less than 5% and often _reduces_ it. The biggest
+// gains are for MD4 (+11%) and CubeHash (+30%). For all others, it is
+// less then 10%. The speed gain on CubeHash is probably due to the
+// chronic shortage of registers that CubeHash endures; for the other
+// functions, the generic code appears to be efficient enough already.
+//
+//#elif (BEE_PPC32_GCC || BEE_PPC64_GCC) && !BEE_NO_ASM
+// bee_u32 tmp;
+//
+// __asm__ __volatile__ (
+// "lwbrx %0,0,%1" : "=r" (tmp) : "r" (src));
+// return tmp;
+//
+#else
+ return bee_bswap32(*(const bee_u32 *)src);
+#endif
+#else
+ return *(const bee_u32 *)src;
+#endif
+ } else {
+ return (bee_u32)(((const unsigned char *)src)[0])
+ | ((bee_u32)(((const unsigned char *)src)[1]) << 8)
+ | ((bee_u32)(((const unsigned char *)src)[2]) << 16)
+ | ((bee_u32)(((const unsigned char *)src)[3]) << 24);
+ }
+#endif
+#else
+ return (bee_u32)(((const unsigned char *)src)[0])
+ | ((bee_u32)(((const unsigned char *)src)[1]) << 8)
+ | ((bee_u32)(((const unsigned char *)src)[2]) << 16)
+ | ((bee_u32)(((const unsigned char *)src)[3]) << 24);
+#endif
+}
+
+//-----------------------------------------------------------------------------------------
+//--.
+//
+// Decode a 32-bit value from the provided buffer (little endian convention).
+// The source buffer must be properly aligned.
+//
+// @param src the source buffer (32-bit aligned)
+// @return the decoded value
+//
+static BEE_INLINE bee_u32 bee_dec32le_aligned(const void *src)
+{
+#if BEE_LITTLE_ENDIAN
+ return *(const bee_u32 *)src;
+#elif BEE_BIG_ENDIAN
+#if BEE_SPARCV9_GCC && !BEE_NO_ASM
+ bee_u32 tmp;
+
+ __asm__ __volatile__ ("lda [%1]0x88,%0" : "=r" (tmp) : "r" (src));
+ return tmp;
+//
+// Not worth it generally.
+//
+//#elif (BEE_PPC32_GCC || BEE_PPC64_GCC) && !BEE_NO_ASM
+// bee_u32 tmp;
+//
+// __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (tmp) : "r" (src));
+// return tmp;
+///
+#else
+ return bee_bswap32(*(const bee_u32 *)src);
+#endif
+#else
+ return (bee_u32)(((const unsigned char *)src)[0])
+ | ((bee_u32)(((const unsigned char *)src)[1]) << 8)
+ | ((bee_u32)(((const unsigned char *)src)[2]) << 16)
+ | ((bee_u32)(((const unsigned char *)src)[3]) << 24);
+#endif
+}
+
+#if BEE_64
+ //-----------------------------------------------------------------------------------------
+ //--.
+ //
+ // Encode a 64-bit value into the provided buffer (big endian convention).
+ //
+ // @param dst the destination buffer
+ // @param val the 64-bit value to encode
+ //
+ static BEE_INLINE void
+ bee_enc64be(void *dst, bee_u64 val)
+ {
+ #if defined BEE_UPTR
+ #if BEE_UNALIGNED
+ #if BEE_LITTLE_ENDIAN
+ val = bee_bswap64(val);
+ #endif
+ *(bee_u64 *)dst = val;
+ #else
+ if (((BEE_UPTR)dst & 7) == 0) {
+ #if BEE_LITTLE_ENDIAN
+ val = bee_bswap64(val);
+ #endif
+ *(bee_u64 *)dst = val;
+ } else {
+ ((unsigned char *)dst)[0] = (val >> 56);
+ ((unsigned char *)dst)[1] = (val >> 48);
+ ((unsigned char *)dst)[2] = (val >> 40);
+ ((unsigned char *)dst)[3] = (val >> 32);
+ ((unsigned char *)dst)[4] = (val >> 24);
+ ((unsigned char *)dst)[5] = (val >> 16);
+ ((unsigned char *)dst)[6] = (val >> 8);
+ ((unsigned char *)dst)[7] = val;
+ }
+ #endif
+ #else
+ ((unsigned char *)dst)[0] = (val >> 56);
+ ((unsigned char *)dst)[1] = (val >> 48);
+ ((unsigned char *)dst)[2] = (val >> 40);
+ ((unsigned char *)dst)[3] = (val >> 32);
+ ((unsigned char *)dst)[4] = (val >> 24);
+ ((unsigned char *)dst)[5] = (val >> 16);
+ ((unsigned char *)dst)[6] = (val >> 8);
+ ((unsigned char *)dst)[7] = val;
+ #endif
+ }
+
+ //-----------------------------------------------------------------------------------------
+ //--.
+ //
+ // Encode a 64-bit value into the provided buffer (big endian convention).
+ // The destination buffer must be properly aligned.
+ //
+ // @param dst the destination buffer (64-bit aligned)
+ // @param val the value to encode
+ //
+ static BEE_INLINE void bee_enc64be_aligned(void *dst, bee_u64 val)
+ {
+ #if BEE_LITTLE_ENDIAN
+ *(bee_u64 *)dst = bee_bswap64(val);
+ #elif BEE_BIG_ENDIAN
+ *(bee_u64 *)dst = val;
+ #else
+ ((unsigned char *)dst)[0] = (val >> 56);
+ ((unsigned char *)dst)[1] = (val >> 48);
+ ((unsigned char *)dst)[2] = (val >> 40);
+ ((unsigned char *)dst)[3] = (val >> 32);
+ ((unsigned char *)dst)[4] = (val >> 24);
+ ((unsigned char *)dst)[5] = (val >> 16);
+ ((unsigned char *)dst)[6] = (val >> 8);
+ ((unsigned char *)dst)[7] = val;
+ #endif
+ }
+
+ //-----------------------------------------------------------------------------------------
+ //--.
+ //
+ // Decode a 64-bit value from the provided buffer (big endian convention).
+ //
+ // @param src the source buffer
+ // @return the decoded value
+ //
+ static BEE_INLINE bee_u64 bee_dec64be(const void *src)
+ {
+ #if defined BEE_UPTR
+ #if BEE_UNALIGNED
+ #if BEE_LITTLE_ENDIAN
+ return bee_bswap64(*(const bee_u64 *)src);
+ #else
+ return *(const bee_u64 *)src;
+ #endif
+ #else
+ if (((BEE_UPTR)src & 7) == 0) {
+ #if BEE_LITTLE_ENDIAN
+ return bee_bswap64(*(const bee_u64 *)src);
+ #else
+ return *(const bee_u64 *)src;
+ #endif
+ } else {
+ return ((bee_u64)(((const unsigned char *)src)[0]) << 56)
+ | ((bee_u64)(((const unsigned char *)src)[1]) << 48)
+ | ((bee_u64)(((const unsigned char *)src)[2]) << 40)
+ | ((bee_u64)(((const unsigned char *)src)[3]) << 32)
+ | ((bee_u64)(((const unsigned char *)src)[4]) << 24)
+ | ((bee_u64)(((const unsigned char *)src)[5]) << 16)
+ | ((bee_u64)(((const unsigned char *)src)[6]) << 8)
+ | (bee_u64)(((const unsigned char *)src)[7]);
+ }
+ #endif
+ #else
+ return ((bee_u64)(((const unsigned char *)src)[0]) << 56)
+ | ((bee_u64)(((const unsigned char *)src)[1]) << 48)
+ | ((bee_u64)(((const unsigned char *)src)[2]) << 40)
+ | ((bee_u64)(((const unsigned char *)src)[3]) << 32)
+ | ((bee_u64)(((const unsigned char *)src)[4]) << 24)
+ | ((bee_u64)(((const unsigned char *)src)[5]) << 16)
+ | ((bee_u64)(((const unsigned char *)src)[6]) << 8)
+ | (bee_u64)(((const unsigned char *)src)[7]);
+ #endif
+ }
+
+ //-----------------------------------------------------------------------------------------
+ //--.
+ //
+ // Decode a 64-bit value from the provided buffer (big endian convention).
+ // The source buffer must be properly aligned.
+ //
+ // @param src the source buffer (64-bit aligned)
+ // @return the decoded value
+ //
+ static BEE_INLINE bee_u64 bee_dec64be_aligned(const void *src)
+ {
+ #if BEE_LITTLE_ENDIAN
+ return bee_bswap64(*(const bee_u64 *)src);
+ #elif BEE_BIG_ENDIAN
+ return *(const bee_u64 *)src;
+ #else
+ return ((bee_u64)(((const unsigned char *)src)[0]) << 56)
+ | ((bee_u64)(((const unsigned char *)src)[1]) << 48)
+ | ((bee_u64)(((const unsigned char *)src)[2]) << 40)
+ | ((bee_u64)(((const unsigned char *)src)[3]) << 32)
+ | ((bee_u64)(((const unsigned char *)src)[4]) << 24)
+ | ((bee_u64)(((const unsigned char *)src)[5]) << 16)
+ | ((bee_u64)(((const unsigned char *)src)[6]) << 8)
+ | (bee_u64)(((const unsigned char *)src)[7]);
+ #endif
+ }
+
+ //-----------------------------------------------------------------------------------------
+ //--.
+ //
+ // Encode a 64-bit value into the provided buffer (little endian convention).
+ //
+ // @param dst the destination buffer
+ // @param val the 64-bit value to encode
+ //
+ static BEE_INLINE void bee_enc64le(void *dst, bee_u64 val)
+ {
+ #if defined BEE_UPTR
+ #if BEE_UNALIGNED
+ #if BEE_BIG_ENDIAN
+ val = bee_bswap64(val);
+ #endif
+ *(bee_u64 *)dst = val;
+ #else
+ if (((BEE_UPTR)dst & 7) == 0) {
+ #if BEE_BIG_ENDIAN
+ val = bee_bswap64(val);
+ #endif
+ *(bee_u64 *)dst = val;
+ } else {
+ ((unsigned char *)dst)[0] = val;
+ ((unsigned char *)dst)[1] = (val >> 8);
+ ((unsigned char *)dst)[2] = (val >> 16);
+ ((unsigned char *)dst)[3] = (val >> 24);
+ ((unsigned char *)dst)[4] = (val >> 32);
+ ((unsigned char *)dst)[5] = (val >> 40);
+ ((unsigned char *)dst)[6] = (val >> 48);
+ ((unsigned char *)dst)[7] = (val >> 56);
+ }
+ #endif
+ #else
+ ((unsigned char *)dst)[0] = val;
+ ((unsigned char *)dst)[1] = (val >> 8);
+ ((unsigned char *)dst)[2] = (val >> 16);
+ ((unsigned char *)dst)[3] = (val >> 24);
+ ((unsigned char *)dst)[4] = (val >> 32);
+ ((unsigned char *)dst)[5] = (val >> 40);
+ ((unsigned char *)dst)[6] = (val >> 48);
+ ((unsigned char *)dst)[7] = (val >> 56);
+ #endif
+ }
+
+ //-----------------------------------------------------------------------------------------
+ //--.
+ //
+ // Encode a 64-bit value into the provided buffer (little endian convention).
+ // The destination buffer must be properly aligned.
+ //
+ // @param dst the destination buffer (64-bit aligned)
+ // @param val the value to encode
+ //
+ static BEE_INLINE void bee_enc64le_aligned(void *dst, bee_u64 val)
+ {
+ #if BEE_LITTLE_ENDIAN
+ *(bee_u64 *)dst = val;
+ #elif BEE_BIG_ENDIAN
+ *(bee_u64 *)dst = bee_bswap64(val);
+ #else
+ ((unsigned char *)dst)[0] = val;
+ ((unsigned char *)dst)[1] = (val >> 8);
+ ((unsigned char *)dst)[2] = (val >> 16);
+ ((unsigned char *)dst)[3] = (val >> 24);
+ ((unsigned char *)dst)[4] = (val >> 32);
+ ((unsigned char *)dst)[5] = (val >> 40);
+ ((unsigned char *)dst)[6] = (val >> 48);
+ ((unsigned char *)dst)[7] = (val >> 56);
+ #endif
+ }
+
+ //-----------------------------------------------------------------------------------------
+ //--.
+ //
+ // Decode a 64-bit value from the provided buffer (little endian convention).
+ //
+ // @param src the source buffer
+ // @return the decoded value
+ //
+ static BEE_INLINE bee_u64 bee_dec64le(const void *src)
+ {
+ #if defined BEE_UPTR
+ #if BEE_UNALIGNED
+ #if BEE_BIG_ENDIAN
+ return bee_bswap64(*(const bee_u64 *)src);
+ #else
+ return *(const bee_u64 *)src;
+ #endif
+ #else
+ if (((BEE_UPTR)src & 7) == 0) {
+ #if BEE_BIG_ENDIAN
+ #if BEE_SPARCV9_GCC_64 && !BEE_NO_ASM
+ bee_u64 tmp;
+
+ __asm__ __volatile__ (
+ "ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src));
+ return tmp;
+ //
+ // Not worth it generally.
+ //
+ //#elif BEE_PPC32_GCC && !BEE_NO_ASM
+ // return (bee_u64)bee_dec32le_aligned(src)
+ // | ((bee_u64)bee_dec32le_aligned(
+ // (const char *)src + 4) << 32);
+ //#elif BEE_PPC64_GCC && !BEE_NO_ASM
+ // bee_u64 tmp;
+ //
+ // __asm__ __volatile__ (
+ // "ldbrx %0,0,%1" : "=r" (tmp) : "r" (src));
+ // return tmp;
+ //
+ #else
+ return bee_bswap64(*(const bee_u64 *)src);
+ #endif
+ #else
+ return *(const bee_u64 *)src;
+ #endif
+ } else {
+ return (bee_u64)(((const unsigned char *)src)[0])
+ | ((bee_u64)(((const unsigned char *)src)[1]) << 8)
+ | ((bee_u64)(((const unsigned char *)src)[2]) << 16)
+ | ((bee_u64)(((const unsigned char *)src)[3]) << 24)
+ | ((bee_u64)(((const unsigned char *)src)[4]) << 32)
+ | ((bee_u64)(((const unsigned char *)src)[5]) << 40)
+ | ((bee_u64)(((const unsigned char *)src)[6]) << 48)
+ | ((bee_u64)(((const unsigned char *)src)[7]) << 56);
+ }
+ #endif
+ #else
+ return (bee_u64)(((const unsigned char *)src)[0])
+ | ((bee_u64)(((const unsigned char *)src)[1]) << 8)
+ | ((bee_u64)(((const unsigned char *)src)[2]) << 16)
+ | ((bee_u64)(((const unsigned char *)src)[3]) << 24)
+ | ((bee_u64)(((const unsigned char *)src)[4]) << 32)
+ | ((bee_u64)(((const unsigned char *)src)[5]) << 40)
+ | ((bee_u64)(((const unsigned char *)src)[6]) << 48)
+ | ((bee_u64)(((const unsigned char *)src)[7]) << 56);
+ #endif
+ }
+
+ //-----------------------------------------------------------------------------------------
+ //--.
+ //
+ // Decode a 64-bit value from the provided buffer (little endian convention).
+ // The source buffer must be properly aligned.
+ //
+ // @param src the source buffer (64-bit aligned)
+ // @return the decoded value
+ //
+ static BEE_INLINE bee_u64 bee_dec64le_aligned(const void *src)
+ {
+ #if BEE_LITTLE_ENDIAN
+ return *(const bee_u64 *)src;
+ #elif BEE_BIG_ENDIAN
+ #if BEE_SPARCV9_GCC_64 && !BEE_NO_ASM
+ bee_u64 tmp;
+
+ __asm__ __volatile__ ("ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src));
+ return tmp;
+ //
+ // Not worth it generally.
+ //
+ //#elif BEE_PPC32_GCC && !BEE_NO_ASM
+ // return (bee_u64)bee_dec32le_aligned(src)
+ // | ((bee_u64)bee_dec32le_aligned((const char *)src + 4) << 32);
+ //#elif BEE_PPC64_GCC && !BEE_NO_ASM
+ // bee_u64 tmp;
+ //
+ // __asm__ __volatile__ ("ldbrx %0,0,%1" : "=r" (tmp) : "r" (src));
+ // return tmp;
+ ///
+ #else
+ return bee_bswap64(*(const bee_u64 *)src);
+ #endif
+ #else
+ return (bee_u64)(((const unsigned char *)src)[0])
+ | ((bee_u64)(((const unsigned char *)src)[1]) << 8)
+ | ((bee_u64)(((const unsigned char *)src)[2]) << 16)
+ | ((bee_u64)(((const unsigned char *)src)[3]) << 24)
+ | ((bee_u64)(((const unsigned char *)src)[4]) << 32)
+ | ((bee_u64)(((const unsigned char *)src)[5]) << 40)
+ | ((bee_u64)(((const unsigned char *)src)[6]) << 48)
+ | ((bee_u64)(((const unsigned char *)src)[7]) << 56);
+ #endif
+ }
+
+#endif
+
+
+#endif
diff --git a/stratum/algos/hsr14.c b/stratum/algos/hsr14.c
index b31decdb4..6cdd85b58 100644
--- a/stratum/algos/hsr14.c
+++ b/stratum/algos/hsr14.c
@@ -16,8 +16,7 @@
#include "../sha3/sph_echo.h"
#include "../sha3/sph_hamsi.h"
#include "../sha3/sph_fugue.h"
-
-#include "sm3.h"
+#include "../sha3/sph_sm3.h"
#include "common.h"
@@ -87,7 +86,7 @@ void hsr_hash(const char* input, char* output, uint32_t len)
sm3_init(&ctx_sm3);
sm3_update(&ctx_sm3, hash, 64);
memset(hash, 0, sizeof hash);
- sm3_close(&ctx_sm3, hash);
+ sph_sm3_close(&ctx_sm3, hash);
sph_hamsi512_init(&ctx_hamsi1);
sph_hamsi512(&ctx_hamsi1, hash, 64);
diff --git a/stratum/algos/lane.c b/stratum/algos/lane.c
new file mode 100644
index 000000000..7a6ea9d1b
--- /dev/null
+++ b/stratum/algos/lane.c
@@ -0,0 +1,2151 @@
+/*
+ * Copyright (c) 2008 Sebastiaan Indesteege
+ *
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Optimised ANSI-C implementation of LANE
+ */
+
+#include "lane.h"
+
+#define T8(x) ((x) & 0xff)
+#define B0(x) (T8((x) ))
+#define B1(x) (T8((x) >> 8))
+#define B2(x) (T8((x) >> 16))
+#define B3(x) (T8((x) >> 24))
+#define MSB32(x) ((u32)((((u64)(x))>>32) & 0xffffffff))
+#define LSB32(x) ((u32)((((u64)(x)) ) & 0xffffffff))
+#ifdef LANE_BIG_ENDIAN
+#define U8TO32_BIG(c) (((u32*)(c))[0])
+#define U32TO8_BIG(c, v) ((u32*)(c))[0]=v
+#else
+#define U8TO32_BIG(c) (((u32)T8(*((u8*)(c))) << 24) | \
+ ((u32)T8(*(((u8*)(c)) + 1)) << 16) | \
+ ((u32)T8(*(((u8*)(c)) + 2)) << 8) | \
+ ((u32)T8(*(((u8*)(c)) + 3))))
+#define U32TO8_BIG(c, v) do { \
+ u32 tmp_portable_h_x = (v); \
+ u8 *tmp_portable_h_d = (c); \
+ tmp_portable_h_d[0] = T8(tmp_portable_h_x >> 24); \
+ tmp_portable_h_d[1] = T8(tmp_portable_h_x >> 16); \
+ tmp_portable_h_d[2] = T8(tmp_portable_h_x >> 8); \
+ tmp_portable_h_d[3] = T8(tmp_portable_h_x); \
+ } while (0)
+#endif /* LANE_BIG_ENDIAN */
+
+static const u32 iv224[8] = {
+ 0xc8245a86U, 0x8d733102U, 0x314ddcb9U, 0xf60a7ef4U,
+ 0x57b8c917U, 0xeefeaec2U, 0xff4fc3beU, 0x87c4728eU
+};
+
+static const u32 iv256[8] = {
+ 0xbe292e17U, 0xbb541ff2U, 0xfe54b6f7U, 0x30b1c96aU,
+ 0x7b259268U, 0x8539bdf3U, 0x97c4bdd6U, 0x49763fb8U
+};
+
+static const u32 iv384[16] = {
+ 0x148922ceU, 0x548c3001U, 0x76978bc8U, 0x266e008cU,
+ 0x3dc60765U, 0xd85b09d9U, 0x4cb1c8d8U, 0xe2cab952U,
+ 0xdb72be8eU, 0x685f0783U, 0xfa436c3dU, 0x4b9acb90U,
+ 0x5088dd47U, 0x932f55a9U, 0xa0c415c6U, 0xdb6dd795U
+};
+
+static const u32 iv512[16] = {
+ 0x9b603481U, 0x1d5a931bU, 0x69c4e6e0U, 0x975e2681U,
+ 0xb863ba53U, 0x8d1be11bU, 0x77340080U, 0xd42c48a5U,
+ 0x3a3a1d61U, 0x1cf3a1c4U, 0xf0a30347U, 0x7e56a44aU,
+ 0x9530ee60U, 0xdadb05b6U, 0x3ae3ac7cU, 0xd732ac6aU
+};
+
+static const u32 T0[256] = {
+ 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
+ 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
+ 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
+ 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
+ 0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
+ 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
+ 0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
+ 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
+ 0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
+ 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
+ 0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
+ 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
+ 0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
+ 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
+ 0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
+ 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
+ 0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
+ 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
+ 0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
+ 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
+ 0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
+ 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
+ 0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
+ 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
+ 0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
+ 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
+ 0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
+ 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
+ 0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
+ 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
+ 0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
+ 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
+ 0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
+ 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
+ 0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
+ 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
+ 0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
+ 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
+ 0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
+ 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
+ 0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
+ 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
+ 0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
+ 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
+ 0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
+ 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
+ 0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
+ 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
+ 0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
+ 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
+ 0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
+ 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
+ 0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
+ 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
+ 0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
+ 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
+ 0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
+ 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
+ 0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
+ 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
+ 0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
+ 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
+ 0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
+ 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
+};
+static const u32 T1[256] = {
+ 0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
+ 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
+ 0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
+ 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
+ 0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
+ 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
+ 0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
+ 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
+ 0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
+ 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
+ 0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
+ 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
+ 0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
+ 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
+ 0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
+ 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
+ 0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
+ 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
+ 0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
+ 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
+ 0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
+ 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
+ 0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
+ 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
+ 0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
+ 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
+ 0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
+ 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
+ 0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
+ 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
+ 0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
+ 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
+ 0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
+ 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
+ 0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
+ 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
+ 0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
+ 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
+ 0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
+ 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
+ 0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
+ 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
+ 0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
+ 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
+ 0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
+ 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
+ 0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
+ 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
+ 0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
+ 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
+ 0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
+ 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
+ 0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
+ 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
+ 0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
+ 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
+ 0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
+ 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
+ 0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
+ 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
+ 0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
+ 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
+ 0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
+ 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
+};
+static const u32 T2[256] = {
+ 0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
+ 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
+ 0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
+ 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
+ 0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
+ 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
+ 0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
+ 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
+ 0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
+ 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
+ 0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
+ 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
+ 0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
+ 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
+ 0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
+ 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
+ 0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
+ 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
+ 0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
+ 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
+ 0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
+ 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
+ 0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
+ 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
+ 0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
+ 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
+ 0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
+ 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
+ 0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
+ 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
+ 0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
+ 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
+ 0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
+ 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
+ 0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
+ 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
+ 0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
+ 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
+ 0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
+ 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
+ 0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
+ 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
+ 0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
+ 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
+ 0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
+ 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
+ 0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
+ 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
+ 0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
+ 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
+ 0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
+ 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
+ 0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
+ 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
+ 0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
+ 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
+ 0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
+ 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
+ 0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
+ 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
+ 0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
+ 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
+ 0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
+ 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
+};
+static const u32 T3[256] = {
+ 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
+ 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
+ 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
+ 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
+ 0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
+ 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
+ 0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
+ 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
+ 0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
+ 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
+ 0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
+ 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
+ 0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
+ 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
+ 0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
+ 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
+ 0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
+ 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
+ 0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
+ 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
+ 0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
+ 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
+ 0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
+ 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
+ 0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
+ 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
+ 0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
+ 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
+ 0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
+ 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
+ 0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
+ 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
+ 0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
+ 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
+ 0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
+ 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
+ 0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
+ 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
+ 0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
+ 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
+ 0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
+ 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
+ 0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
+ 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
+ 0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
+ 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
+ 0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
+ 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
+ 0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
+ 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
+ 0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
+ 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
+ 0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
+ 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
+ 0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
+ 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
+ 0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
+ 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
+ 0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
+ 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
+ 0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
+ 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
+ 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
+ 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
+};
+
+static const u32 C[768] = {
+ 0x07fc703d, 0xd3fe381f, 0xb9ff1c0e, 0x5cff8e07, 0xfe7fc702, 0x7f3fe381, 0xef9ff1c1, 0xa7cff8e1,
+ 0x83e7fc71, 0x91f3fe39, 0x98f9ff1d, 0x9c7cff8f, 0x9e3e7fc6, 0x4f1f3fe3, 0xf78f9ff0, 0x7bc7cff8,
+ 0x3de3e7fc, 0x1ef1f3fe, 0x0f78f9ff, 0xd7bc7cfe, 0x6bde3e7f, 0xe5ef1f3e, 0x72f78f9f, 0xe97bc7ce,
+ 0x74bde3e7, 0xea5ef1f2, 0x752f78f9, 0xea97bc7d, 0xa54bde3f, 0x82a5ef1e, 0x4152f78f, 0xf0a97bc6,
+ 0x7854bde3, 0xec2a5ef0, 0x76152f78, 0x3b0a97bc, 0x1d854bde, 0x0ec2a5ef, 0xd76152f6, 0x6bb0a97b,
+ 0xe5d854bc, 0x72ec2a5e, 0x3976152f, 0xccbb0a96, 0x665d854b, 0xe32ec2a4, 0x71976152, 0x38cbb0a9,
+ 0xcc65d855, 0xb632ec2b, 0x8b197614, 0x458cbb0a, 0x22c65d85, 0xc1632ec3, 0xb0b19760, 0x5858cbb0,
+ 0x2c2c65d8, 0x161632ec, 0x0b0b1976, 0x05858cbb, 0xd2c2c65c, 0x6961632e, 0x34b0b197, 0xca5858ca,
+ 0x652c2c65, 0xe2961633, 0xa14b0b18, 0x50a5858c, 0x2852c2c6, 0x14296163, 0xda14b0b0, 0x6d0a5858,
+ 0x36852c2c, 0x1b429616, 0x0da14b0b, 0xd6d0a584, 0x6b6852c2, 0x35b42961, 0xcada14b1, 0xb56d0a59,
+ 0x8ab6852d, 0x955b4297, 0x9aada14a, 0x4d56d0a5, 0xf6ab6853, 0xab55b428, 0x55aada14, 0x2ad56d0a,
+ 0x156ab685, 0xdab55b43, 0xbd5aada0, 0x5ead56d0, 0x2f56ab68, 0x17ab55b4, 0x0bd5aada, 0x05ead56d,
+ 0xd2f56ab7, 0xb97ab55a, 0x5cbd5aad, 0xfe5ead57, 0xaf2f56aa, 0x5797ab55, 0xfbcbd5ab, 0xade5ead4,
+ 0x56f2f56a, 0x2b797ab5, 0xc5bcbd5b, 0xb2de5eac, 0x596f2f56, 0x2cb797ab, 0xc65bcbd4, 0x632de5ea,
+ 0x3196f2f5, 0xc8cb797b, 0xb465bcbc, 0x5a32de5e, 0x2d196f2f, 0xc68cb796, 0x63465bcb, 0xe1a32de4,
+ 0x70d196f2, 0x3868cb79, 0xcc3465bd, 0xb61a32df, 0x8b0d196e, 0x45868cb7, 0xf2c3465a, 0x7961a32d,
+ 0xecb0d197, 0xa65868ca, 0x532c3465, 0xf9961a33, 0xaccb0d18, 0x5665868c, 0x2b32c346, 0x159961a3,
+ 0xdaccb0d0, 0x6d665868, 0x36b32c34, 0x1b59961a, 0x0daccb0d, 0xd6d66587, 0xbb6b32c2, 0x5db59961,
+ 0xfedaccb1, 0xaf6d6659, 0x87b6b32d, 0x93db5997, 0x99edacca, 0x4cf6d665, 0xf67b6b33, 0xab3db598,
+ 0x559edacc, 0x2acf6d66, 0x1567b6b3, 0xdab3db58, 0x6d59edac, 0x36acf6d6, 0x1b567b6b, 0xddab3db4,
+ 0x6ed59eda, 0x376acf6d, 0xcbb567b7, 0xb5dab3da, 0x5aed59ed, 0xfd76acf7, 0xaebb567a, 0x575dab3d,
+ 0xfbaed59f, 0xadd76ace, 0x56ebb567, 0xfb75dab2, 0x7dbaed59, 0xeedd76ad, 0xa76ebb57, 0x83b75daa,
+ 0x41dbaed5, 0xf0edd76b, 0xa876ebb4, 0x543b75da, 0x2a1dbaed, 0xc50edd77, 0xb2876eba, 0x5943b75d,
+ 0xfca1dbaf, 0xae50edd6, 0x572876eb, 0xfb943b74, 0x7dca1dba, 0x3ee50edd, 0xcf72876f, 0xb7b943b6,
+ 0x5bdca1db, 0xfdee50ec, 0x7ef72876, 0x3f7b943b, 0xcfbdca1c, 0x67dee50e, 0x33ef7287, 0xc9f7b942,
+ 0x64fbdca1, 0xe27dee51, 0xa13ef729, 0x809f7b95, 0x904fbdcb, 0x9827dee4, 0x4c13ef72, 0x2609f7b9,
+ 0xc304fbdd, 0xb1827def, 0x88c13ef6, 0x44609f7b, 0xf2304fbc, 0x791827de, 0x3c8c13ef, 0xce4609f6,
+ 0x672304fb, 0xe391827c, 0x71c8c13e, 0x38e4609f, 0xcc72304e, 0x66391827, 0xe31c8c12, 0x718e4609,
+ 0xe8c72305, 0xa4639183, 0x8231c8c0, 0x4118e460, 0x208c7230, 0x10463918, 0x08231c8c, 0x04118e46,
+ 0x0208c723, 0xd1046390, 0x688231c8, 0x344118e4, 0x1a208c72, 0x0d104639, 0xd688231d, 0xbb44118f,
+ 0x8da208c6, 0x46d10463, 0xf3688230, 0x79b44118, 0x3cda208c, 0x1e6d1046, 0x0f368823, 0xd79b4410,
+ 0x6bcda208, 0x35e6d104, 0x1af36882, 0x0d79b441, 0xd6bcda21, 0xbb5e6d11, 0x8daf3689, 0x96d79b45,
+ 0x9b6bcda3, 0x9db5e6d0, 0x4edaf368, 0x276d79b4, 0x13b6bcda, 0x09db5e6d, 0xd4edaf37, 0xba76d79a,
+ 0x5d3b6bcd, 0xfe9db5e7, 0xaf4edaf2, 0x57a76d79, 0xfbd3b6bd, 0xade9db5f, 0x86f4edae, 0x437a76d7,
+ 0xf1bd3b6a, 0x78de9db5, 0xec6f4edb, 0xa637a76c, 0x531bd3b6, 0x298de9db, 0xc4c6f4ec, 0x62637a76,
+ 0x3131bd3b, 0xc898de9c, 0x644c6f4e, 0x322637a7, 0xc9131bd2, 0x64898de9, 0xe244c6f5, 0xa122637b,
+ 0x809131bc, 0x404898de, 0x20244c6f, 0xc0122636, 0x6009131b, 0xe004898c, 0x700244c6, 0x38012263,
+ 0xcc009130, 0x66004898, 0x3300244c, 0x19801226, 0x0cc00913, 0xd6600488, 0x6b300244, 0x35980122,
+ 0x1acc0091, 0xdd660049, 0xbeb30025, 0x8f598013, 0x97acc008, 0x4bd66004, 0x25eb3002, 0x12f59801,
+ 0xd97acc01, 0xbcbd6601, 0x8e5eb301, 0x972f5981, 0x9b97acc1, 0x9dcbd661, 0x9ee5eb31, 0x9f72f599,
+ 0x9fb97acd, 0x9fdcbd67, 0x9fee5eb2, 0x4ff72f59, 0xf7fb97ad, 0xabfdcbd7, 0x85fee5ea, 0x42ff72f5,
+ 0xf17fb97b, 0xa8bfdcbc, 0x545fee5e, 0x2a2ff72f, 0xc517fb96, 0x628bfdcb, 0xe145fee4, 0x70a2ff72,
+ 0x38517fb9, 0xcc28bfdd, 0xb6145fef, 0x8b0a2ff6, 0x458517fb, 0xf2c28bfc, 0x796145fe, 0x3cb0a2ff,
+ 0xce58517e, 0x672c28bf, 0xe396145e, 0x71cb0a2f, 0xe8e58516, 0x7472c28b, 0xea396144, 0x751cb0a2,
+ 0x3a8e5851, 0xcd472c29, 0xb6a39615, 0x8b51cb0b, 0x95a8e584, 0x4ad472c2, 0x256a3961, 0xc2b51cb1,
+ 0xb15a8e59, 0x88ad472d, 0x9456a397, 0x9a2b51ca, 0x4d15a8e5, 0xf68ad473, 0xab456a38, 0x55a2b51c,
+ 0x2ad15a8e, 0x1568ad47, 0xdab456a2, 0x6d5a2b51, 0xe6ad15a9, 0xa3568ad5, 0x81ab456b, 0x90d5a2b4,
+ 0x486ad15a, 0x243568ad, 0xc21ab457, 0xb10d5a2a, 0x5886ad15, 0xfc43568b, 0xae21ab44, 0x5710d5a2,
+ 0x2b886ad1, 0xc5c43569, 0xb2e21ab5, 0x89710d5b, 0x94b886ac, 0x4a5c4356, 0x252e21ab, 0xc29710d4,
+ 0x614b886a, 0x30a5c435, 0xc852e21b, 0xb429710c, 0x5a14b886, 0x2d0a5c43, 0xc6852e20, 0x63429710,
+ 0x31a14b88, 0x18d0a5c4, 0x0c6852e2, 0x06342971, 0xd31a14b9, 0xb98d0a5d, 0x8cc6852f, 0x96634296,
+ 0x4b31a14b, 0xf598d0a4, 0x7acc6852, 0x3d663429, 0xceb31a15, 0xb7598d0b, 0x8bacc684, 0x45d66342,
+ 0x22eb31a1, 0xc17598d1, 0xb0bacc69, 0x885d6635, 0x942eb31b, 0x9a17598c, 0x4d0bacc6, 0x2685d663,
+ 0xc342eb30, 0x61a17598, 0x30d0bacc, 0x18685d66, 0x0c342eb3, 0xd61a1758, 0x6b0d0bac, 0x358685d6,
+ 0x1ac342eb, 0xdd61a174, 0x6eb0d0ba, 0x3758685d, 0xcbac342f, 0xb5d61a16, 0x5aeb0d0b, 0xfd758684,
+ 0x7ebac342, 0x3f5d61a1, 0xcfaeb0d1, 0xb7d75869, 0x8bebac35, 0x95f5d61b, 0x9afaeb0c, 0x4d7d7586,
+ 0x26bebac3, 0xc35f5d60, 0x61afaeb0, 0x30d7d758, 0x186bebac, 0x0c35f5d6, 0x061afaeb, 0xd30d7d74,
+ 0x6986beba, 0x34c35f5d, 0xca61afaf, 0xb530d7d6, 0x5a986beb, 0xfd4c35f4, 0x7ea61afa, 0x3f530d7d,
+ 0xcfa986bf, 0xb7d4c35e, 0x5bea61af, 0xfdf530d6, 0x7efa986b, 0xef7d4c34, 0x77bea61a, 0x3bdf530d,
+ 0xcdefa987, 0xb6f7d4c2, 0x5b7bea61, 0xfdbdf531, 0xaedefa99, 0x876f7d4d, 0x93b7bea7, 0x99dbdf52,
+ 0x4cedefa9, 0xf676f7d5, 0xab3b7beb, 0x859dbdf4, 0x42cedefa, 0x21676f7d, 0xc0b3b7bf, 0xb059dbde,
+ 0x582cedef, 0xfc1676f6, 0x7e0b3b7b, 0xef059dbc, 0x7782cede, 0x3bc1676f, 0xcde0b3b6, 0x66f059db,
+ 0xe3782cec, 0x71bc1676, 0x38de0b3b, 0xcc6f059c, 0x663782ce, 0x331bc167, 0xc98de0b2, 0x64c6f059,
+ 0xe263782d, 0xa131bc17, 0x8098de0a, 0x404c6f05, 0xf0263783, 0xa8131bc0, 0x54098de0, 0x2a04c6f0,
+ 0x15026378, 0x0a8131bc, 0x054098de, 0x02a04c6f, 0xd1502636, 0x68a8131b, 0xe454098c, 0x722a04c6,
+ 0x39150263, 0xcc8a8130, 0x66454098, 0x3322a04c, 0x19915026, 0x0cc8a813, 0xd6645408, 0x6b322a04,
+ 0x35991502, 0x1acc8a81, 0xdd664541, 0xbeb322a1, 0x8f599151, 0x97acc8a9, 0x9bd66455, 0x9deb322b,
+ 0x9ef59914, 0x4f7acc8a, 0x27bd6645, 0xc3deb323, 0xb1ef5990, 0x58f7acc8, 0x2c7bd664, 0x163deb32,
+ 0x0b1ef599, 0xd58f7acd, 0xbac7bd67, 0x8d63deb2, 0x46b1ef59, 0xf358f7ad, 0xa9ac7bd7, 0x84d63dea,
+ 0x426b1ef5, 0xf1358f7b, 0xa89ac7bc, 0x544d63de, 0x2a26b1ef, 0xc51358f6, 0x6289ac7b, 0xe144d63c,
+ 0x70a26b1e, 0x3851358f, 0xcc289ac6, 0x66144d63, 0xe30a26b0, 0x71851358, 0x38c289ac, 0x1c6144d6,
+ 0x0e30a26b, 0xd7185134, 0x6b8c289a, 0x35c6144d, 0xcae30a27, 0xb5718512, 0x5ab8c289, 0xfd5c6145,
+ 0xaeae30a3, 0x87571850, 0x43ab8c28, 0x21d5c614, 0x10eae30a, 0x08757185, 0xd43ab8c3, 0xba1d5c60,
+ 0x5d0eae30, 0x2e875718, 0x1743ab8c, 0x0ba1d5c6, 0x05d0eae3, 0xd2e87570, 0x69743ab8, 0x34ba1d5c,
+ 0x1a5d0eae, 0x0d2e8757, 0xd69743aa, 0x6b4ba1d5, 0xe5a5d0eb, 0xa2d2e874, 0x5169743a, 0x28b4ba1d,
+ 0xc45a5d0f, 0xb22d2e86, 0x59169743, 0xfc8b4ba0, 0x7e45a5d0, 0x3f22d2e8, 0x1f916974, 0x0fc8b4ba,
+ 0x07e45a5d, 0xd3f22d2f, 0xb9f91696, 0x5cfc8b4b, 0xfe7e45a4, 0x7f3f22d2, 0x3f9f9169, 0xcfcfc8b5,
+ 0xb7e7e45b, 0x8bf3f22c, 0x45f9f916, 0x22fcfc8b, 0xc17e7e44, 0x60bf3f22, 0x305f9f91, 0xc82fcfc9,
+ 0xb417e7e5, 0x8a0bf3f3, 0x9505f9f8, 0x4a82fcfc, 0x25417e7e, 0x12a0bf3f, 0xd9505f9e, 0x6ca82fcf,
+ 0xe65417e6, 0x732a0bf3, 0xe99505f8, 0x74ca82fc, 0x3a65417e, 0x1d32a0bf, 0xde99505e, 0x6f4ca82f,
+ 0xe7a65416, 0x73d32a0b, 0xe9e99504, 0x74f4ca82, 0x3a7a6541, 0xcd3d32a1, 0xb69e9951, 0x8b4f4ca9,
+ 0x95a7a655, 0x9ad3d32b, 0x9d69e994, 0x4eb4f4ca, 0x275a7a65, 0xc3ad3d33, 0xb1d69e98, 0x58eb4f4c,
+ 0x2c75a7a6, 0x163ad3d3, 0xdb1d69e8, 0x6d8eb4f4, 0x36c75a7a, 0x1b63ad3d, 0xddb1d69f, 0xbed8eb4e,
+ 0x5f6c75a7, 0xffb63ad2, 0x7fdb1d69, 0xefed8eb5, 0xa7f6c75b, 0x83fb63ac, 0x41fdb1d6, 0x20fed8eb,
+ 0xc07f6c74, 0x603fb63a, 0x301fdb1d, 0xc80fed8f, 0xb407f6c6, 0x5a03fb63, 0xfd01fdb0, 0x7e80fed8,
+ 0x3f407f6c, 0x1fa03fb6, 0x0fd01fdb, 0xd7e80fec, 0x6bf407f6, 0x35fa03fb, 0xcafd01fc, 0x657e80fe,
+ 0x32bf407f, 0xc95fa03e, 0x64afd01f, 0xe257e80e, 0x712bf407, 0xe895fa02, 0x744afd01, 0xea257e81,
+ 0xa512bf41, 0x82895fa1, 0x9144afd1, 0x98a257e9, 0x9c512bf5, 0x9e2895fb, 0x9f144afc, 0x4f8a257e,
+ 0x27c512bf, 0xc3e2895e, 0x61f144af, 0xe0f8a256, 0x707c512b, 0xe83e2894, 0x741f144a, 0x3a0f8a25,
+ 0xcd07c513, 0xb683e288, 0x5b41f144, 0x2da0f8a2, 0x16d07c51, 0xdb683e29, 0xbdb41f15, 0x8eda0f8b,
+ 0x976d07c4, 0x4bb683e2, 0x25db41f1, 0xc2eda0f9, 0xb176d07d, 0x88bb683f, 0x945db41e, 0x4a2eda0f,
+ 0xf5176d06, 0x7a8bb683, 0xed45db40, 0x76a2eda0, 0x3b5176d0, 0x1da8bb68, 0x0ed45db4, 0x076a2eda,
+ 0x03b5176d, 0xd1da8bb7, 0xb8ed45da, 0x5c76a2ed, 0xfe3b5177, 0xaf1da8ba, 0x578ed45d, 0xfbc76a2f,
+ 0xade3b516, 0x56f1da8b, 0xfb78ed44, 0x7dbc76a2, 0x3ede3b51, 0xcf6f1da9, 0xb7b78ed5, 0x8bdbc76b,
+ 0x95ede3b4, 0x4af6f1da, 0x257b78ed, 0xc2bdbc77, 0xb15ede3a, 0x58af6f1d, 0xfc57b78f, 0xae2bdbc6,
+ 0x5715ede3, 0xfb8af6f0, 0x7dc57b78, 0x3ee2bdbc, 0x1f715ede, 0x0fb8af6f, 0xd7dc57b6, 0x6bee2bdb,
+};
+
+void lane256_compress(const u8 m[64], u32 h[8], const u32 ctrh, const u32 ctrl)
+{
+ u32 t0, t1, t2, t3, t4, t5, t6, t7; /* temp */
+ u32 s00, s01, s02, s03, s04, s05, s06, s07; /* lane 0 */
+ u32 s10, s11, s12, s13, s14, s15, s16, s17; /* lane 1 */
+ u32 s20, s21, s22, s23, s24, s25, s26, s27; /* lane 2 */
+ u32 s30, s31, s32, s33, s34, s35, s36, s37; /* lane 3 */
+ u32 s40, s41, s42, s43, s44, s45, s46, s47; /* lane 4 */
+ u32 s50, s51, s52, s53, s54, s55, s56, s57; /* lane 5 */
+ u32 s60, s61, s62, s63, s64, s65, s66, s67; /* lane 6 */
+ u32 s70, s71, s72, s73, s74, s75, s76, s77; /* lane 7 */
+
+ /* Message expansion */
+ s30 = h[0];
+ s31 = h[1];
+ s32 = h[2];
+ s33 = h[3];
+ s34 = h[4];
+ s35 = h[5];
+ s36 = h[6];
+ s37 = h[7];
+ s40 = U8TO32_BIG(m + 0);
+ s41 = U8TO32_BIG(m + 4);
+ s42 = U8TO32_BIG(m + 8);
+ s43 = U8TO32_BIG(m + 12);
+ s44 = U8TO32_BIG(m + 16);
+ s45 = U8TO32_BIG(m + 20);
+ s46 = U8TO32_BIG(m + 24);
+ s47 = U8TO32_BIG(m + 28);
+ s50 = U8TO32_BIG(m + 32);
+ s51 = U8TO32_BIG(m + 36);
+ s52 = U8TO32_BIG(m + 40);
+ s53 = U8TO32_BIG(m + 44);
+ s54 = U8TO32_BIG(m + 48);
+ s55 = U8TO32_BIG(m + 52);
+ s56 = U8TO32_BIG(m + 56);
+ s57 = U8TO32_BIG(m + 60);
+ s00 = s30 ^ s40 ^ s44 ^ s50 ^ s54;
+ s01 = s31 ^ s41 ^ s45 ^ s51 ^ s55;
+ s02 = s32 ^ s42 ^ s46 ^ s52 ^ s56;
+ s03 = s33 ^ s43 ^ s47 ^ s53 ^ s57;
+ s04 = s34 ^ s40 ^ s50;
+ s05 = s35 ^ s41 ^ s51;
+ s06 = s36 ^ s42 ^ s52;
+ s07 = s37 ^ s43 ^ s53;
+ s10 = s00 ^ s34 ^ s44;
+ s11 = s01 ^ s35 ^ s45;
+ s12 = s02 ^ s36 ^ s46;
+ s13 = s03 ^ s37 ^ s47;
+ s14 = s30 ^ s44 ^ s50;
+ s15 = s31 ^ s45 ^ s51;
+ s16 = s32 ^ s46 ^ s52;
+ s17 = s33 ^ s47 ^ s53;
+ s20 = s00 ^ s34 ^ s54;
+ s21 = s01 ^ s35 ^ s55;
+ s22 = s02 ^ s36 ^ s56;
+ s23 = s03 ^ s37 ^ s57;
+ s24 = s30 ^ s40 ^ s54;
+ s25 = s31 ^ s41 ^ s55;
+ s26 = s32 ^ s42 ^ s56;
+ s27 = s33 ^ s43 ^ s57;
+
+ /* Lane 0 */
+ t0 = T0[B3(s00)] ^ T1[B2(s01)] ^ T2[B1(s02)] ^ T3[B0(s03)] ^ C[ 0];
+ t1 = T0[B3(s01)] ^ T1[B2(s02)] ^ T2[B1(s03)] ^ T3[B0(s00)] ^ C[ 1];
+ t4 = T0[B3(s02)] ^ T1[B2(s03)] ^ T2[B1(s00)] ^ T3[B0(s01)] ^ C[ 2];
+ t5 = T0[B3(s03)] ^ T1[B2(s00)] ^ T2[B1(s01)] ^ T3[B0(s02)] ^ C[ 3] ^ ctrh;
+ t2 = T0[B3(s04)] ^ T1[B2(s05)] ^ T2[B1(s06)] ^ T3[B0(s07)] ^ C[ 4];
+ t3 = T0[B3(s05)] ^ T1[B2(s06)] ^ T2[B1(s07)] ^ T3[B0(s04)] ^ C[ 5];
+ t6 = T0[B3(s06)] ^ T1[B2(s07)] ^ T2[B1(s04)] ^ T3[B0(s05)] ^ C[ 6];
+ t7 = T0[B3(s07)] ^ T1[B2(s04)] ^ T2[B1(s05)] ^ T3[B0(s06)] ^ C[ 7];
+
+ s00 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[ 8];
+ s01 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[ 9];
+ s04 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[10];
+ s05 = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[11] ^ ctrl;
+ s02 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[12];
+ s03 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[13];
+ s06 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[14];
+ s07 = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[15];
+
+ t0 = T0[B3(s00)] ^ T1[B2(s01)] ^ T2[B1(s02)] ^ T3[B0(s03)] ^ C[16];
+ t1 = T0[B3(s01)] ^ T1[B2(s02)] ^ T2[B1(s03)] ^ T3[B0(s00)] ^ C[17];
+ t4 = T0[B3(s02)] ^ T1[B2(s03)] ^ T2[B1(s00)] ^ T3[B0(s01)] ^ C[18];
+ t5 = T0[B3(s03)] ^ T1[B2(s00)] ^ T2[B1(s01)] ^ T3[B0(s02)] ^ C[19] ^ ctrh;
+ t2 = T0[B3(s04)] ^ T1[B2(s05)] ^ T2[B1(s06)] ^ T3[B0(s07)] ^ C[20];
+ t3 = T0[B3(s05)] ^ T1[B2(s06)] ^ T2[B1(s07)] ^ T3[B0(s04)] ^ C[21];
+ t6 = T0[B3(s06)] ^ T1[B2(s07)] ^ T2[B1(s04)] ^ T3[B0(s05)] ^ C[22];
+ t7 = T0[B3(s07)] ^ T1[B2(s04)] ^ T2[B1(s05)] ^ T3[B0(s06)] ^ C[23];
+
+ s00 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[24];
+ s01 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[25];
+ s04 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[26];
+ s05 = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[27] ^ ctrl;
+ s02 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[28];
+ s03 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[29];
+ s06 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[30];
+ s07 = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[31];
+
+ t0 = T0[B3(s00)] ^ T1[B2(s01)] ^ T2[B1(s02)] ^ T3[B0(s03)] ^ C[32];
+ t1 = T0[B3(s01)] ^ T1[B2(s02)] ^ T2[B1(s03)] ^ T3[B0(s00)] ^ C[33];
+ t4 = T0[B3(s02)] ^ T1[B2(s03)] ^ T2[B1(s00)] ^ T3[B0(s01)] ^ C[34];
+ t5 = T0[B3(s03)] ^ T1[B2(s00)] ^ T2[B1(s01)] ^ T3[B0(s02)] ^ C[35] ^ ctrh;
+ t2 = T0[B3(s04)] ^ T1[B2(s05)] ^ T2[B1(s06)] ^ T3[B0(s07)] ^ C[36];
+ t3 = T0[B3(s05)] ^ T1[B2(s06)] ^ T2[B1(s07)] ^ T3[B0(s04)] ^ C[37];
+ t6 = T0[B3(s06)] ^ T1[B2(s07)] ^ T2[B1(s04)] ^ T3[B0(s05)] ^ C[38];
+ t7 = T0[B3(s07)] ^ T1[B2(s04)] ^ T2[B1(s05)] ^ T3[B0(s06)] ^ C[39];
+
+ s60 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )];
+ s61 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )];
+ s64 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )];
+ s65 = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )];
+ s62 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )];
+ s63 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )];
+ s66 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )];
+ s67 = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )];
+
+ /* Lane 1 */
+ t0 = T0[B3(s10)] ^ T1[B2(s11)] ^ T2[B1(s12)] ^ T3[B0(s13)] ^ C[ 0+40];
+ t1 = T0[B3(s11)] ^ T1[B2(s12)] ^ T2[B1(s13)] ^ T3[B0(s10)] ^ C[ 1+40];
+ t4 = T0[B3(s12)] ^ T1[B2(s13)] ^ T2[B1(s10)] ^ T3[B0(s11)] ^ C[ 2+40];
+ t5 = T0[B3(s13)] ^ T1[B2(s10)] ^ T2[B1(s11)] ^ T3[B0(s12)] ^ C[ 3+40] ^ ctrl;
+ t2 = T0[B3(s14)] ^ T1[B2(s15)] ^ T2[B1(s16)] ^ T3[B0(s17)] ^ C[ 4+40];
+ t3 = T0[B3(s15)] ^ T1[B2(s16)] ^ T2[B1(s17)] ^ T3[B0(s14)] ^ C[ 5+40];
+ t6 = T0[B3(s16)] ^ T1[B2(s17)] ^ T2[B1(s14)] ^ T3[B0(s15)] ^ C[ 6+40];
+ t7 = T0[B3(s17)] ^ T1[B2(s14)] ^ T2[B1(s15)] ^ T3[B0(s16)] ^ C[ 7+40];
+
+ s10 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[ 8+40];
+ s11 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[ 9+40];
+ s14 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[10+40];
+ s15 = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[11+40] ^ ctrh;
+ s12 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[12+40];
+ s13 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[13+40];
+ s16 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[14+40];
+ s17 = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[15+40];
+
+ t0 = T0[B3(s10)] ^ T1[B2(s11)] ^ T2[B1(s12)] ^ T3[B0(s13)] ^ C[16+40];
+ t1 = T0[B3(s11)] ^ T1[B2(s12)] ^ T2[B1(s13)] ^ T3[B0(s10)] ^ C[17+40];
+ t4 = T0[B3(s12)] ^ T1[B2(s13)] ^ T2[B1(s10)] ^ T3[B0(s11)] ^ C[18+40];
+ t5 = T0[B3(s13)] ^ T1[B2(s10)] ^ T2[B1(s11)] ^ T3[B0(s12)] ^ C[19+40] ^ ctrl;
+ t2 = T0[B3(s14)] ^ T1[B2(s15)] ^ T2[B1(s16)] ^ T3[B0(s17)] ^ C[20+40];
+ t3 = T0[B3(s15)] ^ T1[B2(s16)] ^ T2[B1(s17)] ^ T3[B0(s14)] ^ C[21+40];
+ t6 = T0[B3(s16)] ^ T1[B2(s17)] ^ T2[B1(s14)] ^ T3[B0(s15)] ^ C[22+40];
+ t7 = T0[B3(s17)] ^ T1[B2(s14)] ^ T2[B1(s15)] ^ T3[B0(s16)] ^ C[23+40];
+
+ s10 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[24+40];
+ s11 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[25+40];
+ s14 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[26+40];
+ s15 = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[27+40] ^ ctrh;
+ s12 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[28+40];
+ s13 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[29+40];
+ s16 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[30+40];
+ s17 = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[31+40];
+
+ t0 = T0[B3(s10)] ^ T1[B2(s11)] ^ T2[B1(s12)] ^ T3[B0(s13)] ^ C[32+40];
+ t1 = T0[B3(s11)] ^ T1[B2(s12)] ^ T2[B1(s13)] ^ T3[B0(s10)] ^ C[33+40];
+ t4 = T0[B3(s12)] ^ T1[B2(s13)] ^ T2[B1(s10)] ^ T3[B0(s11)] ^ C[34+40];
+ t5 = T0[B3(s13)] ^ T1[B2(s10)] ^ T2[B1(s11)] ^ T3[B0(s12)] ^ C[35+40] ^ ctrl;
+ t2 = T0[B3(s14)] ^ T1[B2(s15)] ^ T2[B1(s16)] ^ T3[B0(s17)] ^ C[36+40];
+ t3 = T0[B3(s15)] ^ T1[B2(s16)] ^ T2[B1(s17)] ^ T3[B0(s14)] ^ C[37+40];
+ t6 = T0[B3(s16)] ^ T1[B2(s17)] ^ T2[B1(s14)] ^ T3[B0(s15)] ^ C[38+40];
+ t7 = T0[B3(s17)] ^ T1[B2(s14)] ^ T2[B1(s15)] ^ T3[B0(s16)] ^ C[39+40];
+
+ s60 ^= T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )];
+ s61 ^= T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )];
+ s64 ^= T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )];
+ s65 ^= T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )];
+ s62 ^= T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )];
+ s63 ^= T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )];
+ s66 ^= T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )];
+ s67 ^= T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )];
+
+ /* Lane 2 */
+ t0 = T0[B3(s20)] ^ T1[B2(s21)] ^ T2[B1(s22)] ^ T3[B0(s23)] ^ C[ 0+80];
+ t1 = T0[B3(s21)] ^ T1[B2(s22)] ^ T2[B1(s23)] ^ T3[B0(s20)] ^ C[ 1+80];
+ t4 = T0[B3(s22)] ^ T1[B2(s23)] ^ T2[B1(s20)] ^ T3[B0(s21)] ^ C[ 2+80];
+ t5 = T0[B3(s23)] ^ T1[B2(s20)] ^ T2[B1(s21)] ^ T3[B0(s22)] ^ C[ 3+80] ^ ctrh;
+ t2 = T0[B3(s24)] ^ T1[B2(s25)] ^ T2[B1(s26)] ^ T3[B0(s27)] ^ C[ 4+80];
+ t3 = T0[B3(s25)] ^ T1[B2(s26)] ^ T2[B1(s27)] ^ T3[B0(s24)] ^ C[ 5+80];
+ t6 = T0[B3(s26)] ^ T1[B2(s27)] ^ T2[B1(s24)] ^ T3[B0(s25)] ^ C[ 6+80];
+ t7 = T0[B3(s27)] ^ T1[B2(s24)] ^ T2[B1(s25)] ^ T3[B0(s26)] ^ C[ 7+80];
+
+ s20 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[ 8+80];
+ s21 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[ 9+80];
+ s24 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[10+80];
+ s25 = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[11+80] ^ ctrl;
+ s22 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[12+80];
+ s23 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[13+80];
+ s26 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[14+80];
+ s27 = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[15+80];
+
+ t0 = T0[B3(s20)] ^ T1[B2(s21)] ^ T2[B1(s22)] ^ T3[B0(s23)] ^ C[16+80];
+ t1 = T0[B3(s21)] ^ T1[B2(s22)] ^ T2[B1(s23)] ^ T3[B0(s20)] ^ C[17+80];
+ t4 = T0[B3(s22)] ^ T1[B2(s23)] ^ T2[B1(s20)] ^ T3[B0(s21)] ^ C[18+80];
+ t5 = T0[B3(s23)] ^ T1[B2(s20)] ^ T2[B1(s21)] ^ T3[B0(s22)] ^ C[19+80] ^ ctrh;
+ t2 = T0[B3(s24)] ^ T1[B2(s25)] ^ T2[B1(s26)] ^ T3[B0(s27)] ^ C[20+80];
+ t3 = T0[B3(s25)] ^ T1[B2(s26)] ^ T2[B1(s27)] ^ T3[B0(s24)] ^ C[21+80];
+ t6 = T0[B3(s26)] ^ T1[B2(s27)] ^ T2[B1(s24)] ^ T3[B0(s25)] ^ C[22+80];
+ t7 = T0[B3(s27)] ^ T1[B2(s24)] ^ T2[B1(s25)] ^ T3[B0(s26)] ^ C[23+80];
+
+ s20 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[24+80];
+ s21 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[25+80];
+ s24 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[26+80];
+ s25 = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[27+80] ^ ctrl;
+ s22 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[28+80];
+ s23 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[29+80];
+ s26 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[30+80];
+ s27 = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[31+80];
+
+ t0 = T0[B3(s20)] ^ T1[B2(s21)] ^ T2[B1(s22)] ^ T3[B0(s23)] ^ C[32+80];
+ t1 = T0[B3(s21)] ^ T1[B2(s22)] ^ T2[B1(s23)] ^ T3[B0(s20)] ^ C[33+80];
+ t4 = T0[B3(s22)] ^ T1[B2(s23)] ^ T2[B1(s20)] ^ T3[B0(s21)] ^ C[34+80];
+ t5 = T0[B3(s23)] ^ T1[B2(s20)] ^ T2[B1(s21)] ^ T3[B0(s22)] ^ C[35+80] ^ ctrh;
+ t2 = T0[B3(s24)] ^ T1[B2(s25)] ^ T2[B1(s26)] ^ T3[B0(s27)] ^ C[36+80];
+ t3 = T0[B3(s25)] ^ T1[B2(s26)] ^ T2[B1(s27)] ^ T3[B0(s24)] ^ C[37+80];
+ t6 = T0[B3(s26)] ^ T1[B2(s27)] ^ T2[B1(s24)] ^ T3[B0(s25)] ^ C[38+80];
+ t7 = T0[B3(s27)] ^ T1[B2(s24)] ^ T2[B1(s25)] ^ T3[B0(s26)] ^ C[39+80];
+
+ s60 ^= T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )];
+ s61 ^= T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )];
+ s64 ^= T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )];
+ s65 ^= T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )];
+ s62 ^= T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )];
+ s63 ^= T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )];
+ s66 ^= T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )];
+ s67 ^= T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )];
+
+ /* Lane 3 */
+ t0 = T0[B3(s30)] ^ T1[B2(s31)] ^ T2[B1(s32)] ^ T3[B0(s33)] ^ C[ 0+120];
+ t1 = T0[B3(s31)] ^ T1[B2(s32)] ^ T2[B1(s33)] ^ T3[B0(s30)] ^ C[ 1+120];
+ t4 = T0[B3(s32)] ^ T1[B2(s33)] ^ T2[B1(s30)] ^ T3[B0(s31)] ^ C[ 2+120];
+ t5 = T0[B3(s33)] ^ T1[B2(s30)] ^ T2[B1(s31)] ^ T3[B0(s32)] ^ C[ 3+120] ^ ctrl;
+ t2 = T0[B3(s34)] ^ T1[B2(s35)] ^ T2[B1(s36)] ^ T3[B0(s37)] ^ C[ 4+120];
+ t3 = T0[B3(s35)] ^ T1[B2(s36)] ^ T2[B1(s37)] ^ T3[B0(s34)] ^ C[ 5+120];
+ t6 = T0[B3(s36)] ^ T1[B2(s37)] ^ T2[B1(s34)] ^ T3[B0(s35)] ^ C[ 6+120];
+ t7 = T0[B3(s37)] ^ T1[B2(s34)] ^ T2[B1(s35)] ^ T3[B0(s36)] ^ C[ 7+120];
+
+ s30 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[ 8+120];
+ s31 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[ 9+120];
+ s34 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[10+120];
+ s35 = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[11+120] ^ ctrh;
+ s32 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[12+120];
+ s33 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[13+120];
+ s36 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[14+120];
+ s37 = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[15+120];
+
+ t0 = T0[B3(s30)] ^ T1[B2(s31)] ^ T2[B1(s32)] ^ T3[B0(s33)] ^ C[16+120];
+ t1 = T0[B3(s31)] ^ T1[B2(s32)] ^ T2[B1(s33)] ^ T3[B0(s30)] ^ C[17+120];
+ t4 = T0[B3(s32)] ^ T1[B2(s33)] ^ T2[B1(s30)] ^ T3[B0(s31)] ^ C[18+120];
+ t5 = T0[B3(s33)] ^ T1[B2(s30)] ^ T2[B1(s31)] ^ T3[B0(s32)] ^ C[19+120] ^ ctrl;
+ t2 = T0[B3(s34)] ^ T1[B2(s35)] ^ T2[B1(s36)] ^ T3[B0(s37)] ^ C[20+120];
+ t3 = T0[B3(s35)] ^ T1[B2(s36)] ^ T2[B1(s37)] ^ T3[B0(s34)] ^ C[21+120];
+ t6 = T0[B3(s36)] ^ T1[B2(s37)] ^ T2[B1(s34)] ^ T3[B0(s35)] ^ C[22+120];
+ t7 = T0[B3(s37)] ^ T1[B2(s34)] ^ T2[B1(s35)] ^ T3[B0(s36)] ^ C[23+120];
+
+ s30 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[24+120];
+ s31 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[25+120];
+ s34 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[26+120];
+ s35 = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[27+120] ^ ctrh;
+ s32 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[28+120];
+ s33 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[29+120];
+ s36 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[30+120];
+ s37 = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[31+120];
+
+ t0 = T0[B3(s30)] ^ T1[B2(s31)] ^ T2[B1(s32)] ^ T3[B0(s33)] ^ C[32+120];
+ t1 = T0[B3(s31)] ^ T1[B2(s32)] ^ T2[B1(s33)] ^ T3[B0(s30)] ^ C[33+120];
+ t4 = T0[B3(s32)] ^ T1[B2(s33)] ^ T2[B1(s30)] ^ T3[B0(s31)] ^ C[34+120];
+ t5 = T0[B3(s33)] ^ T1[B2(s30)] ^ T2[B1(s31)] ^ T3[B0(s32)] ^ C[35+120] ^ ctrl;
+ t2 = T0[B3(s34)] ^ T1[B2(s35)] ^ T2[B1(s36)] ^ T3[B0(s37)] ^ C[36+120];
+ t3 = T0[B3(s35)] ^ T1[B2(s36)] ^ T2[B1(s37)] ^ T3[B0(s34)] ^ C[37+120];
+ t6 = T0[B3(s36)] ^ T1[B2(s37)] ^ T2[B1(s34)] ^ T3[B0(s35)] ^ C[38+120];
+ t7 = T0[B3(s37)] ^ T1[B2(s34)] ^ T2[B1(s35)] ^ T3[B0(s36)] ^ C[39+120];
+
+ s70 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )];
+ s71 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )];
+ s74 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )];
+ s75 = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )];
+ s72 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )];
+ s73 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )];
+ s76 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )];
+ s77 = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )];
+
+ /* Lane 4 */
+ t0 = T0[B3(s40)] ^ T1[B2(s41)] ^ T2[B1(s42)] ^ T3[B0(s43)] ^ C[ 0+160];
+ t1 = T0[B3(s41)] ^ T1[B2(s42)] ^ T2[B1(s43)] ^ T3[B0(s40)] ^ C[ 1+160];
+ t4 = T0[B3(s42)] ^ T1[B2(s43)] ^ T2[B1(s40)] ^ T3[B0(s41)] ^ C[ 2+160];
+ t5 = T0[B3(s43)] ^ T1[B2(s40)] ^ T2[B1(s41)] ^ T3[B0(s42)] ^ C[ 3+160] ^ ctrh;
+ t2 = T0[B3(s44)] ^ T1[B2(s45)] ^ T2[B1(s46)] ^ T3[B0(s47)] ^ C[ 4+160];
+ t3 = T0[B3(s45)] ^ T1[B2(s46)] ^ T2[B1(s47)] ^ T3[B0(s44)] ^ C[ 5+160];
+ t6 = T0[B3(s46)] ^ T1[B2(s47)] ^ T2[B1(s44)] ^ T3[B0(s45)] ^ C[ 6+160];
+ t7 = T0[B3(s47)] ^ T1[B2(s44)] ^ T2[B1(s45)] ^ T3[B0(s46)] ^ C[ 7+160];
+
+ s40 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[ 8+160];
+ s41 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[ 9+160];
+ s44 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[10+160];
+ s45 = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[11+160] ^ ctrl;
+ s42 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[12+160];
+ s43 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[13+160];
+ s46 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[14+160];
+ s47 = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[15+160];
+
+ t0 = T0[B3(s40)] ^ T1[B2(s41)] ^ T2[B1(s42)] ^ T3[B0(s43)] ^ C[16+160];
+ t1 = T0[B3(s41)] ^ T1[B2(s42)] ^ T2[B1(s43)] ^ T3[B0(s40)] ^ C[17+160];
+ t4 = T0[B3(s42)] ^ T1[B2(s43)] ^ T2[B1(s40)] ^ T3[B0(s41)] ^ C[18+160];
+ t5 = T0[B3(s43)] ^ T1[B2(s40)] ^ T2[B1(s41)] ^ T3[B0(s42)] ^ C[19+160] ^ ctrh;
+ t2 = T0[B3(s44)] ^ T1[B2(s45)] ^ T2[B1(s46)] ^ T3[B0(s47)] ^ C[20+160];
+ t3 = T0[B3(s45)] ^ T1[B2(s46)] ^ T2[B1(s47)] ^ T3[B0(s44)] ^ C[21+160];
+ t6 = T0[B3(s46)] ^ T1[B2(s47)] ^ T2[B1(s44)] ^ T3[B0(s45)] ^ C[22+160];
+ t7 = T0[B3(s47)] ^ T1[B2(s44)] ^ T2[B1(s45)] ^ T3[B0(s46)] ^ C[23+160];
+
+ s40 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[24+160];
+ s41 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[25+160];
+ s44 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[26+160];
+ s45 = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[27+160] ^ ctrl;
+ s42 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[28+160];
+ s43 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[29+160];
+ s46 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[30+160];
+ s47 = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[31+160];
+
+ t0 = T0[B3(s40)] ^ T1[B2(s41)] ^ T2[B1(s42)] ^ T3[B0(s43)] ^ C[32+160];
+ t1 = T0[B3(s41)] ^ T1[B2(s42)] ^ T2[B1(s43)] ^ T3[B0(s40)] ^ C[33+160];
+ t4 = T0[B3(s42)] ^ T1[B2(s43)] ^ T2[B1(s40)] ^ T3[B0(s41)] ^ C[34+160];
+ t5 = T0[B3(s43)] ^ T1[B2(s40)] ^ T2[B1(s41)] ^ T3[B0(s42)] ^ C[35+160] ^ ctrh;
+ t2 = T0[B3(s44)] ^ T1[B2(s45)] ^ T2[B1(s46)] ^ T3[B0(s47)] ^ C[36+160];
+ t3 = T0[B3(s45)] ^ T1[B2(s46)] ^ T2[B1(s47)] ^ T3[B0(s44)] ^ C[37+160];
+ t6 = T0[B3(s46)] ^ T1[B2(s47)] ^ T2[B1(s44)] ^ T3[B0(s45)] ^ C[38+160];
+ t7 = T0[B3(s47)] ^ T1[B2(s44)] ^ T2[B1(s45)] ^ T3[B0(s46)] ^ C[39+160];
+
+ s70 ^= T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )];
+ s71 ^= T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )];
+ s74 ^= T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )];
+ s75 ^= T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )];
+ s72 ^= T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )];
+ s73 ^= T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )];
+ s76 ^= T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )];
+ s77 ^= T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )];
+
+ /* Lane 5 */
+ t0 = T0[B3(s50)] ^ T1[B2(s51)] ^ T2[B1(s52)] ^ T3[B0(s53)] ^ C[ 0+200];
+ t1 = T0[B3(s51)] ^ T1[B2(s52)] ^ T2[B1(s53)] ^ T3[B0(s50)] ^ C[ 1+200];
+ t4 = T0[B3(s52)] ^ T1[B2(s53)] ^ T2[B1(s50)] ^ T3[B0(s51)] ^ C[ 2+200];
+ t5 = T0[B3(s53)] ^ T1[B2(s50)] ^ T2[B1(s51)] ^ T3[B0(s52)] ^ C[ 3+200] ^ ctrl;
+ t2 = T0[B3(s54)] ^ T1[B2(s55)] ^ T2[B1(s56)] ^ T3[B0(s57)] ^ C[ 4+200];
+ t3 = T0[B3(s55)] ^ T1[B2(s56)] ^ T2[B1(s57)] ^ T3[B0(s54)] ^ C[ 5+200];
+ t6 = T0[B3(s56)] ^ T1[B2(s57)] ^ T2[B1(s54)] ^ T3[B0(s55)] ^ C[ 6+200];
+ t7 = T0[B3(s57)] ^ T1[B2(s54)] ^ T2[B1(s55)] ^ T3[B0(s56)] ^ C[ 7+200];
+
+ s50 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[ 8+200];
+ s51 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[ 9+200];
+ s54 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[10+200];
+ s55 = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[11+200] ^ ctrh;
+ s52 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[12+200];
+ s53 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[13+200];
+ s56 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[14+200];
+ s57 = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[15+200];
+
+ t0 = T0[B3(s50)] ^ T1[B2(s51)] ^ T2[B1(s52)] ^ T3[B0(s53)] ^ C[16+200];
+ t1 = T0[B3(s51)] ^ T1[B2(s52)] ^ T2[B1(s53)] ^ T3[B0(s50)] ^ C[17+200];
+ t4 = T0[B3(s52)] ^ T1[B2(s53)] ^ T2[B1(s50)] ^ T3[B0(s51)] ^ C[18+200];
+ t5 = T0[B3(s53)] ^ T1[B2(s50)] ^ T2[B1(s51)] ^ T3[B0(s52)] ^ C[19+200] ^ ctrl;
+ t2 = T0[B3(s54)] ^ T1[B2(s55)] ^ T2[B1(s56)] ^ T3[B0(s57)] ^ C[20+200];
+ t3 = T0[B3(s55)] ^ T1[B2(s56)] ^ T2[B1(s57)] ^ T3[B0(s54)] ^ C[21+200];
+ t6 = T0[B3(s56)] ^ T1[B2(s57)] ^ T2[B1(s54)] ^ T3[B0(s55)] ^ C[22+200];
+ t7 = T0[B3(s57)] ^ T1[B2(s54)] ^ T2[B1(s55)] ^ T3[B0(s56)] ^ C[23+200];
+
+ s50 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[24+200];
+ s51 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[25+200];
+ s54 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[26+200];
+ s55 = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[27+200] ^ ctrh;
+ s52 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[28+200];
+ s53 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[29+200];
+ s56 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[30+200];
+ s57 = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[31+200];
+
+ t0 = T0[B3(s50)] ^ T1[B2(s51)] ^ T2[B1(s52)] ^ T3[B0(s53)] ^ C[32+200];
+ t1 = T0[B3(s51)] ^ T1[B2(s52)] ^ T2[B1(s53)] ^ T3[B0(s50)] ^ C[33+200];
+ t4 = T0[B3(s52)] ^ T1[B2(s53)] ^ T2[B1(s50)] ^ T3[B0(s51)] ^ C[34+200];
+ t5 = T0[B3(s53)] ^ T1[B2(s50)] ^ T2[B1(s51)] ^ T3[B0(s52)] ^ C[35+200] ^ ctrl;
+ t2 = T0[B3(s54)] ^ T1[B2(s55)] ^ T2[B1(s56)] ^ T3[B0(s57)] ^ C[36+200];
+ t3 = T0[B3(s55)] ^ T1[B2(s56)] ^ T2[B1(s57)] ^ T3[B0(s54)] ^ C[37+200];
+ t6 = T0[B3(s56)] ^ T1[B2(s57)] ^ T2[B1(s54)] ^ T3[B0(s55)] ^ C[38+200];
+ t7 = T0[B3(s57)] ^ T1[B2(s54)] ^ T2[B1(s55)] ^ T3[B0(s56)] ^ C[39+200];
+
+ s70 ^= T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )];
+ s71 ^= T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )];
+ s74 ^= T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )];
+ s75 ^= T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )];
+ s72 ^= T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )];
+ s73 ^= T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )];
+ s76 ^= T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )];
+ s77 ^= T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )];
+
+ /* Lane 6 */
+ t0 = T0[B3(s60)] ^ T1[B2(s61)] ^ T2[B1(s62)] ^ T3[B0(s63)] ^ C[ 0+240];
+ t1 = T0[B3(s61)] ^ T1[B2(s62)] ^ T2[B1(s63)] ^ T3[B0(s60)] ^ C[ 1+240];
+ t4 = T0[B3(s62)] ^ T1[B2(s63)] ^ T2[B1(s60)] ^ T3[B0(s61)] ^ C[ 2+240];
+ t5 = T0[B3(s63)] ^ T1[B2(s60)] ^ T2[B1(s61)] ^ T3[B0(s62)] ^ C[ 3+240] ^ ctrh;
+ t2 = T0[B3(s64)] ^ T1[B2(s65)] ^ T2[B1(s66)] ^ T3[B0(s67)] ^ C[ 4+240];
+ t3 = T0[B3(s65)] ^ T1[B2(s66)] ^ T2[B1(s67)] ^ T3[B0(s64)] ^ C[ 5+240];
+ t6 = T0[B3(s66)] ^ T1[B2(s67)] ^ T2[B1(s64)] ^ T3[B0(s65)] ^ C[ 6+240];
+ t7 = T0[B3(s67)] ^ T1[B2(s64)] ^ T2[B1(s65)] ^ T3[B0(s66)] ^ C[ 7+240];
+
+ s60 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[ 8+240];
+ s61 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[ 9+240];
+ s64 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[10+240];
+ s65 = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[11+240] ^ ctrl;
+ s62 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[12+240];
+ s63 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[13+240];
+ s66 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[14+240];
+ s67 = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[15+240];
+
+ h[0] = T0[B3(s60)] ^ T1[B2(s61)] ^ T2[B1(s62)] ^ T3[B0(s63)];
+ h[1] = T0[B3(s61)] ^ T1[B2(s62)] ^ T2[B1(s63)] ^ T3[B0(s60)];
+ h[4] = T0[B3(s62)] ^ T1[B2(s63)] ^ T2[B1(s60)] ^ T3[B0(s61)];
+ h[5] = T0[B3(s63)] ^ T1[B2(s60)] ^ T2[B1(s61)] ^ T3[B0(s62)];
+ h[2] = T0[B3(s64)] ^ T1[B2(s65)] ^ T2[B1(s66)] ^ T3[B0(s67)];
+ h[3] = T0[B3(s65)] ^ T1[B2(s66)] ^ T2[B1(s67)] ^ T3[B0(s64)];
+ h[6] = T0[B3(s66)] ^ T1[B2(s67)] ^ T2[B1(s64)] ^ T3[B0(s65)];
+ h[7] = T0[B3(s67)] ^ T1[B2(s64)] ^ T2[B1(s65)] ^ T3[B0(s66)];
+
+ /* Lane 7 */
+ t0 = T0[B3(s70)] ^ T1[B2(s71)] ^ T2[B1(s72)] ^ T3[B0(s73)] ^ C[ 0+256];
+ t1 = T0[B3(s71)] ^ T1[B2(s72)] ^ T2[B1(s73)] ^ T3[B0(s70)] ^ C[ 1+256];
+ t4 = T0[B3(s72)] ^ T1[B2(s73)] ^ T2[B1(s70)] ^ T3[B0(s71)] ^ C[ 2+256];
+ t5 = T0[B3(s73)] ^ T1[B2(s70)] ^ T2[B1(s71)] ^ T3[B0(s72)] ^ C[ 3+256] ^ ctrh;
+ t2 = T0[B3(s74)] ^ T1[B2(s75)] ^ T2[B1(s76)] ^ T3[B0(s77)] ^ C[ 4+256];
+ t3 = T0[B3(s75)] ^ T1[B2(s76)] ^ T2[B1(s77)] ^ T3[B0(s74)] ^ C[ 5+256];
+ t6 = T0[B3(s76)] ^ T1[B2(s77)] ^ T2[B1(s74)] ^ T3[B0(s75)] ^ C[ 6+256];
+ t7 = T0[B3(s77)] ^ T1[B2(s74)] ^ T2[B1(s75)] ^ T3[B0(s76)] ^ C[ 7+256];
+
+ s70 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[ 8+256];
+ s71 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[ 9+256];
+ s74 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[10+256];
+ s75 = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[11+256] ^ ctrl;
+ s72 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[12+256];
+ s73 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[13+256];
+ s76 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[14+256];
+ s77 = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[15+256];
+
+ h[0] ^= T0[B3(s70)] ^ T1[B2(s71)] ^ T2[B1(s72)] ^ T3[B0(s73)];
+ h[1] ^= T0[B3(s71)] ^ T1[B2(s72)] ^ T2[B1(s73)] ^ T3[B0(s70)];
+ h[4] ^= T0[B3(s72)] ^ T1[B2(s73)] ^ T2[B1(s70)] ^ T3[B0(s71)];
+ h[5] ^= T0[B3(s73)] ^ T1[B2(s70)] ^ T2[B1(s71)] ^ T3[B0(s72)];
+ h[2] ^= T0[B3(s74)] ^ T1[B2(s75)] ^ T2[B1(s76)] ^ T3[B0(s77)];
+ h[3] ^= T0[B3(s75)] ^ T1[B2(s76)] ^ T2[B1(s77)] ^ T3[B0(s74)];
+ h[6] ^= T0[B3(s76)] ^ T1[B2(s77)] ^ T2[B1(s74)] ^ T3[B0(s75)];
+ h[7] ^= T0[B3(s77)] ^ T1[B2(s74)] ^ T2[B1(s75)] ^ T3[B0(s76)];
+}
+
+void lane512_compress(const u8 m[128], u32 h[16], const u32 ctrh, const u32 ctrl)
+{
+ u32 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, ta, tb, tc, td, te, tf; /* temp */
+ u32 s00, s01, s02, s03, s04, s05, s06, s07, s08, s09, s0a, s0b, s0c, s0d, s0e, s0f; /* lane 0 */
+ u32 s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s1a, s1b, s1c, s1d, s1e, s1f; /* lane 1 */
+ u32 s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s2a, s2b, s2c, s2d, s2e, s2f; /* lane 2 */
+ u32 s30, s31, s32, s33, s34, s35, s36, s37, s38, s39, s3a, s3b, s3c, s3d, s3e, s3f; /* lane 3 */
+ u32 s40, s41, s42, s43, s44, s45, s46, s47, s48, s49, s4a, s4b, s4c, s4d, s4e, s4f; /* lane 4 */
+ u32 s50, s51, s52, s53, s54, s55, s56, s57, s58, s59, s5a, s5b, s5c, s5d, s5e, s5f; /* lane 5 */
+ u32 s60, s61, s62, s63, s64, s65, s66, s67, s68, s69, s6a, s6b, s6c, s6d, s6e, s6f; /* lane 6 */
+ u32 s70, s71, s72, s73, s74, s75, s76, s77, s78, s79, s7a, s7b, s7c, s7d, s7e, s7f; /* lane 7 */
+
+ /* Message expansion */
+ s30 = h[0];
+ s31 = h[1];
+ s32 = h[2];
+ s33 = h[3];
+ s34 = h[4];
+ s35 = h[5];
+ s36 = h[6];
+ s37 = h[7];
+ s38 = h[8];
+ s39 = h[9];
+ s3a = h[10];
+ s3b = h[11];
+ s3c = h[12];
+ s3d = h[13];
+ s3e = h[14];
+ s3f = h[15];
+ s40 = U8TO32_BIG(m + 0);
+ s41 = U8TO32_BIG(m + 4);
+ s42 = U8TO32_BIG(m + 8);
+ s43 = U8TO32_BIG(m + 12);
+ s44 = U8TO32_BIG(m + 16);
+ s45 = U8TO32_BIG(m + 20);
+ s46 = U8TO32_BIG(m + 24);
+ s47 = U8TO32_BIG(m + 28);
+ s48 = U8TO32_BIG(m + 32);
+ s49 = U8TO32_BIG(m + 36);
+ s4a = U8TO32_BIG(m + 40);
+ s4b = U8TO32_BIG(m + 44);
+ s4c = U8TO32_BIG(m + 48);
+ s4d = U8TO32_BIG(m + 52);
+ s4e = U8TO32_BIG(m + 56);
+ s4f = U8TO32_BIG(m + 60);
+ s50 = U8TO32_BIG(m + 64);
+ s51 = U8TO32_BIG(m + 68);
+ s52 = U8TO32_BIG(m + 72);
+ s53 = U8TO32_BIG(m + 76);
+ s54 = U8TO32_BIG(m + 80);
+ s55 = U8TO32_BIG(m + 84);
+ s56 = U8TO32_BIG(m + 88);
+ s57 = U8TO32_BIG(m + 92);
+ s58 = U8TO32_BIG(m + 96);
+ s59 = U8TO32_BIG(m + 100);
+ s5a = U8TO32_BIG(m + 104);
+ s5b = U8TO32_BIG(m + 108);
+ s5c = U8TO32_BIG(m + 112);
+ s5d = U8TO32_BIG(m + 116);
+ s5e = U8TO32_BIG(m + 120);
+ s5f = U8TO32_BIG(m + 124);
+ s00 = s30 ^ s40 ^ s48 ^ s50 ^ s58;
+ s01 = s31 ^ s41 ^ s49 ^ s51 ^ s59;
+ s02 = s32 ^ s42 ^ s4a ^ s52 ^ s5a;
+ s03 = s33 ^ s43 ^ s4b ^ s53 ^ s5b;
+ s04 = s34 ^ s44 ^ s4c ^ s54 ^ s5c;
+ s05 = s35 ^ s45 ^ s4d ^ s55 ^ s5d;
+ s06 = s36 ^ s46 ^ s4e ^ s56 ^ s5e;
+ s07 = s37 ^ s47 ^ s4f ^ s57 ^ s5f;
+ s08 = s38 ^ s40 ^ s50;
+ s09 = s39 ^ s41 ^ s51;
+ s0a = s3a ^ s42 ^ s52;
+ s0b = s3b ^ s43 ^ s53;
+ s0c = s3c ^ s44 ^ s54;
+ s0d = s3d ^ s45 ^ s55;
+ s0e = s3e ^ s46 ^ s56;
+ s0f = s3f ^ s47 ^ s57;
+ s10 = s00 ^ s38 ^ s48;
+ s11 = s01 ^ s39 ^ s49;
+ s12 = s02 ^ s3a ^ s4a;
+ s13 = s03 ^ s3b ^ s4b;
+ s14 = s04 ^ s3c ^ s4c;
+ s15 = s05 ^ s3d ^ s4d;
+ s16 = s06 ^ s3e ^ s4e;
+ s17 = s07 ^ s3f ^ s4f;
+ s18 = s30 ^ s48 ^ s50;
+ s19 = s31 ^ s49 ^ s51;
+ s1a = s32 ^ s4a ^ s52;
+ s1b = s33 ^ s4b ^ s53;
+ s1c = s34 ^ s4c ^ s54;
+ s1d = s35 ^ s4d ^ s55;
+ s1e = s36 ^ s4e ^ s56;
+ s1f = s37 ^ s4f ^ s57;
+ s20 = s00 ^ s38 ^ s58;
+ s21 = s01 ^ s39 ^ s59;
+ s22 = s02 ^ s3a ^ s5a;
+ s23 = s03 ^ s3b ^ s5b;
+ s24 = s04 ^ s3c ^ s5c;
+ s25 = s05 ^ s3d ^ s5d;
+ s26 = s06 ^ s3e ^ s5e;
+ s27 = s07 ^ s3f ^ s5f;
+ s28 = s30 ^ s40 ^ s58;
+ s29 = s31 ^ s41 ^ s59;
+ s2a = s32 ^ s42 ^ s5a;
+ s2b = s33 ^ s43 ^ s5b;
+ s2c = s34 ^ s44 ^ s5c;
+ s2d = s35 ^ s45 ^ s5d;
+ s2e = s36 ^ s46 ^ s5e;
+ s2f = s37 ^ s47 ^ s5f;
+
+ /* Lane 0 */
+ t0 = T0[B3(s00)] ^ T1[B2(s01)] ^ T2[B1(s02)] ^ T3[B0(s03)] ^ C[ 0];
+ t4 = T0[B3(s01)] ^ T1[B2(s02)] ^ T2[B1(s03)] ^ T3[B0(s00)] ^ C[ 1];
+ t8 = T0[B3(s02)] ^ T1[B2(s03)] ^ T2[B1(s00)] ^ T3[B0(s01)] ^ C[ 2];
+ tc = T0[B3(s03)] ^ T1[B2(s00)] ^ T2[B1(s01)] ^ T3[B0(s02)] ^ C[ 3] ^ ctrh;
+ t1 = T0[B3(s04)] ^ T1[B2(s05)] ^ T2[B1(s06)] ^ T3[B0(s07)] ^ C[ 4];
+ t5 = T0[B3(s05)] ^ T1[B2(s06)] ^ T2[B1(s07)] ^ T3[B0(s04)] ^ C[ 5];
+ t9 = T0[B3(s06)] ^ T1[B2(s07)] ^ T2[B1(s04)] ^ T3[B0(s05)] ^ C[ 6];
+ td = T0[B3(s07)] ^ T1[B2(s04)] ^ T2[B1(s05)] ^ T3[B0(s06)] ^ C[ 7];
+ t2 = T0[B3(s08)] ^ T1[B2(s09)] ^ T2[B1(s0a)] ^ T3[B0(s0b)] ^ C[ 8];
+ t6 = T0[B3(s09)] ^ T1[B2(s0a)] ^ T2[B1(s0b)] ^ T3[B0(s08)] ^ C[ 9];
+ ta = T0[B3(s0a)] ^ T1[B2(s0b)] ^ T2[B1(s08)] ^ T3[B0(s09)] ^ C[ 10];
+ te = T0[B3(s0b)] ^ T1[B2(s08)] ^ T2[B1(s09)] ^ T3[B0(s0a)] ^ C[ 11];
+ t3 = T0[B3(s0c)] ^ T1[B2(s0d)] ^ T2[B1(s0e)] ^ T3[B0(s0f)] ^ C[ 12];
+ t7 = T0[B3(s0d)] ^ T1[B2(s0e)] ^ T2[B1(s0f)] ^ T3[B0(s0c)] ^ C[ 13];
+ tb = T0[B3(s0e)] ^ T1[B2(s0f)] ^ T2[B1(s0c)] ^ T3[B0(s0d)] ^ C[ 14];
+ tf = T0[B3(s0f)] ^ T1[B2(s0c)] ^ T2[B1(s0d)] ^ T3[B0(s0e)] ^ C[ 15];
+
+ s00 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[ 16];
+ s04 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[ 17];
+ s08 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[ 18];
+ s0c = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[ 19] ^ ctrl;
+ s01 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[ 20];
+ s05 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[ 21];
+ s09 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[ 22];
+ s0d = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[ 23];
+ s02 = T0[B3(t8 )] ^ T1[B2(t9 )] ^ T2[B1(ta )] ^ T3[B0(tb )] ^ C[ 24];
+ s06 = T0[B3(t9 )] ^ T1[B2(ta )] ^ T2[B1(tb )] ^ T3[B0(t8 )] ^ C[ 25];
+ s0a = T0[B3(ta )] ^ T1[B2(tb )] ^ T2[B1(t8 )] ^ T3[B0(t9 )] ^ C[ 26];
+ s0e = T0[B3(tb )] ^ T1[B2(t8 )] ^ T2[B1(t9 )] ^ T3[B0(ta )] ^ C[ 27];
+ s03 = T0[B3(tc )] ^ T1[B2(td )] ^ T2[B1(te )] ^ T3[B0(tf )] ^ C[ 28];
+ s07 = T0[B3(td )] ^ T1[B2(te )] ^ T2[B1(tf )] ^ T3[B0(tc )] ^ C[ 29];
+ s0b = T0[B3(te )] ^ T1[B2(tf )] ^ T2[B1(tc )] ^ T3[B0(td )] ^ C[ 30];
+ s0f = T0[B3(tf )] ^ T1[B2(tc )] ^ T2[B1(td )] ^ T3[B0(te )] ^ C[ 31];
+
+ t0 = T0[B3(s00)] ^ T1[B2(s01)] ^ T2[B1(s02)] ^ T3[B0(s03)] ^ C[ 32];
+ t4 = T0[B3(s01)] ^ T1[B2(s02)] ^ T2[B1(s03)] ^ T3[B0(s00)] ^ C[ 33];
+ t8 = T0[B3(s02)] ^ T1[B2(s03)] ^ T2[B1(s00)] ^ T3[B0(s01)] ^ C[ 34];
+ tc = T0[B3(s03)] ^ T1[B2(s00)] ^ T2[B1(s01)] ^ T3[B0(s02)] ^ C[ 35] ^ ctrh;
+ t1 = T0[B3(s04)] ^ T1[B2(s05)] ^ T2[B1(s06)] ^ T3[B0(s07)] ^ C[ 36];
+ t5 = T0[B3(s05)] ^ T1[B2(s06)] ^ T2[B1(s07)] ^ T3[B0(s04)] ^ C[ 37];
+ t9 = T0[B3(s06)] ^ T1[B2(s07)] ^ T2[B1(s04)] ^ T3[B0(s05)] ^ C[ 38];
+ td = T0[B3(s07)] ^ T1[B2(s04)] ^ T2[B1(s05)] ^ T3[B0(s06)] ^ C[ 39];
+ t2 = T0[B3(s08)] ^ T1[B2(s09)] ^ T2[B1(s0a)] ^ T3[B0(s0b)] ^ C[ 40];
+ t6 = T0[B3(s09)] ^ T1[B2(s0a)] ^ T2[B1(s0b)] ^ T3[B0(s08)] ^ C[ 41];
+ ta = T0[B3(s0a)] ^ T1[B2(s0b)] ^ T2[B1(s08)] ^ T3[B0(s09)] ^ C[ 42];
+ te = T0[B3(s0b)] ^ T1[B2(s08)] ^ T2[B1(s09)] ^ T3[B0(s0a)] ^ C[ 43];
+ t3 = T0[B3(s0c)] ^ T1[B2(s0d)] ^ T2[B1(s0e)] ^ T3[B0(s0f)] ^ C[ 44];
+ t7 = T0[B3(s0d)] ^ T1[B2(s0e)] ^ T2[B1(s0f)] ^ T3[B0(s0c)] ^ C[ 45];
+ tb = T0[B3(s0e)] ^ T1[B2(s0f)] ^ T2[B1(s0c)] ^ T3[B0(s0d)] ^ C[ 46];
+ tf = T0[B3(s0f)] ^ T1[B2(s0c)] ^ T2[B1(s0d)] ^ T3[B0(s0e)] ^ C[ 47];
+
+ s00 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[ 48];
+ s04 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[ 49];
+ s08 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[ 50];
+ s0c = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[ 51] ^ ctrl;
+ s01 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[ 52];
+ s05 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[ 53];
+ s09 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[ 54];
+ s0d = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[ 55];
+ s02 = T0[B3(t8 )] ^ T1[B2(t9 )] ^ T2[B1(ta )] ^ T3[B0(tb )] ^ C[ 56];
+ s06 = T0[B3(t9 )] ^ T1[B2(ta )] ^ T2[B1(tb )] ^ T3[B0(t8 )] ^ C[ 57];
+ s0a = T0[B3(ta )] ^ T1[B2(tb )] ^ T2[B1(t8 )] ^ T3[B0(t9 )] ^ C[ 58];
+ s0e = T0[B3(tb )] ^ T1[B2(t8 )] ^ T2[B1(t9 )] ^ T3[B0(ta )] ^ C[ 59];
+ s03 = T0[B3(tc )] ^ T1[B2(td )] ^ T2[B1(te )] ^ T3[B0(tf )] ^ C[ 60];
+ s07 = T0[B3(td )] ^ T1[B2(te )] ^ T2[B1(tf )] ^ T3[B0(tc )] ^ C[ 61];
+ s0b = T0[B3(te )] ^ T1[B2(tf )] ^ T2[B1(tc )] ^ T3[B0(td )] ^ C[ 62];
+ s0f = T0[B3(tf )] ^ T1[B2(tc )] ^ T2[B1(td )] ^ T3[B0(te )] ^ C[ 63];
+
+ t0 = T0[B3(s00)] ^ T1[B2(s01)] ^ T2[B1(s02)] ^ T3[B0(s03)] ^ C[ 64];
+ t4 = T0[B3(s01)] ^ T1[B2(s02)] ^ T2[B1(s03)] ^ T3[B0(s00)] ^ C[ 65];
+ t8 = T0[B3(s02)] ^ T1[B2(s03)] ^ T2[B1(s00)] ^ T3[B0(s01)] ^ C[ 66];
+ tc = T0[B3(s03)] ^ T1[B2(s00)] ^ T2[B1(s01)] ^ T3[B0(s02)] ^ C[ 67] ^ ctrh;
+ t1 = T0[B3(s04)] ^ T1[B2(s05)] ^ T2[B1(s06)] ^ T3[B0(s07)] ^ C[ 68];
+ t5 = T0[B3(s05)] ^ T1[B2(s06)] ^ T2[B1(s07)] ^ T3[B0(s04)] ^ C[ 69];
+ t9 = T0[B3(s06)] ^ T1[B2(s07)] ^ T2[B1(s04)] ^ T3[B0(s05)] ^ C[ 70];
+ td = T0[B3(s07)] ^ T1[B2(s04)] ^ T2[B1(s05)] ^ T3[B0(s06)] ^ C[ 71];
+ t2 = T0[B3(s08)] ^ T1[B2(s09)] ^ T2[B1(s0a)] ^ T3[B0(s0b)] ^ C[ 72];
+ t6 = T0[B3(s09)] ^ T1[B2(s0a)] ^ T2[B1(s0b)] ^ T3[B0(s08)] ^ C[ 73];
+ ta = T0[B3(s0a)] ^ T1[B2(s0b)] ^ T2[B1(s08)] ^ T3[B0(s09)] ^ C[ 74];
+ te = T0[B3(s0b)] ^ T1[B2(s08)] ^ T2[B1(s09)] ^ T3[B0(s0a)] ^ C[ 75];
+ t3 = T0[B3(s0c)] ^ T1[B2(s0d)] ^ T2[B1(s0e)] ^ T3[B0(s0f)] ^ C[ 76];
+ t7 = T0[B3(s0d)] ^ T1[B2(s0e)] ^ T2[B1(s0f)] ^ T3[B0(s0c)] ^ C[ 77];
+ tb = T0[B3(s0e)] ^ T1[B2(s0f)] ^ T2[B1(s0c)] ^ T3[B0(s0d)] ^ C[ 78];
+ tf = T0[B3(s0f)] ^ T1[B2(s0c)] ^ T2[B1(s0d)] ^ T3[B0(s0e)] ^ C[ 79];
+
+ s00 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[ 80];
+ s04 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[ 81];
+ s08 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[ 82];
+ s0c = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[ 83] ^ ctrl;
+ s01 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[ 84];
+ s05 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[ 85];
+ s09 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[ 86];
+ s0d = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[ 87];
+ s02 = T0[B3(t8 )] ^ T1[B2(t9 )] ^ T2[B1(ta )] ^ T3[B0(tb )] ^ C[ 88];
+ s06 = T0[B3(t9 )] ^ T1[B2(ta )] ^ T2[B1(tb )] ^ T3[B0(t8 )] ^ C[ 89];
+ s0a = T0[B3(ta )] ^ T1[B2(tb )] ^ T2[B1(t8 )] ^ T3[B0(t9 )] ^ C[ 90];
+ s0e = T0[B3(tb )] ^ T1[B2(t8 )] ^ T2[B1(t9 )] ^ T3[B0(ta )] ^ C[ 91];
+ s03 = T0[B3(tc )] ^ T1[B2(td )] ^ T2[B1(te )] ^ T3[B0(tf )] ^ C[ 92];
+ s07 = T0[B3(td )] ^ T1[B2(te )] ^ T2[B1(tf )] ^ T3[B0(tc )] ^ C[ 93];
+ s0b = T0[B3(te )] ^ T1[B2(tf )] ^ T2[B1(tc )] ^ T3[B0(td )] ^ C[ 94];
+ s0f = T0[B3(tf )] ^ T1[B2(tc )] ^ T2[B1(td )] ^ T3[B0(te )] ^ C[ 95];
+
+ t0 = T0[B3(s00)] ^ T1[B2(s01)] ^ T2[B1(s02)] ^ T3[B0(s03)] ^ C[ 96];
+ t4 = T0[B3(s01)] ^ T1[B2(s02)] ^ T2[B1(s03)] ^ T3[B0(s00)] ^ C[ 97];
+ t8 = T0[B3(s02)] ^ T1[B2(s03)] ^ T2[B1(s00)] ^ T3[B0(s01)] ^ C[ 98];
+ tc = T0[B3(s03)] ^ T1[B2(s00)] ^ T2[B1(s01)] ^ T3[B0(s02)] ^ C[ 99] ^ ctrh;
+ t1 = T0[B3(s04)] ^ T1[B2(s05)] ^ T2[B1(s06)] ^ T3[B0(s07)] ^ C[100];
+ t5 = T0[B3(s05)] ^ T1[B2(s06)] ^ T2[B1(s07)] ^ T3[B0(s04)] ^ C[101];
+ t9 = T0[B3(s06)] ^ T1[B2(s07)] ^ T2[B1(s04)] ^ T3[B0(s05)] ^ C[102];
+ td = T0[B3(s07)] ^ T1[B2(s04)] ^ T2[B1(s05)] ^ T3[B0(s06)] ^ C[103];
+ t2 = T0[B3(s08)] ^ T1[B2(s09)] ^ T2[B1(s0a)] ^ T3[B0(s0b)] ^ C[104];
+ t6 = T0[B3(s09)] ^ T1[B2(s0a)] ^ T2[B1(s0b)] ^ T3[B0(s08)] ^ C[105];
+ ta = T0[B3(s0a)] ^ T1[B2(s0b)] ^ T2[B1(s08)] ^ T3[B0(s09)] ^ C[106];
+ te = T0[B3(s0b)] ^ T1[B2(s08)] ^ T2[B1(s09)] ^ T3[B0(s0a)] ^ C[107];
+ t3 = T0[B3(s0c)] ^ T1[B2(s0d)] ^ T2[B1(s0e)] ^ T3[B0(s0f)] ^ C[108];
+ t7 = T0[B3(s0d)] ^ T1[B2(s0e)] ^ T2[B1(s0f)] ^ T3[B0(s0c)] ^ C[109];
+ tb = T0[B3(s0e)] ^ T1[B2(s0f)] ^ T2[B1(s0c)] ^ T3[B0(s0d)] ^ C[110];
+ tf = T0[B3(s0f)] ^ T1[B2(s0c)] ^ T2[B1(s0d)] ^ T3[B0(s0e)] ^ C[111];
+
+ s60 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )];
+ s64 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )];
+ s68 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )];
+ s6c = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )];
+ s61 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )];
+ s65 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )];
+ s69 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )];
+ s6d = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )];
+ s62 = T0[B3(t8 )] ^ T1[B2(t9 )] ^ T2[B1(ta )] ^ T3[B0(tb )];
+ s66 = T0[B3(t9 )] ^ T1[B2(ta )] ^ T2[B1(tb )] ^ T3[B0(t8 )];
+ s6a = T0[B3(ta )] ^ T1[B2(tb )] ^ T2[B1(t8 )] ^ T3[B0(t9 )];
+ s6e = T0[B3(tb )] ^ T1[B2(t8 )] ^ T2[B1(t9 )] ^ T3[B0(ta )];
+ s63 = T0[B3(tc )] ^ T1[B2(td )] ^ T2[B1(te )] ^ T3[B0(tf )];
+ s67 = T0[B3(td )] ^ T1[B2(te )] ^ T2[B1(tf )] ^ T3[B0(tc )];
+ s6b = T0[B3(te )] ^ T1[B2(tf )] ^ T2[B1(tc )] ^ T3[B0(td )];
+ s6f = T0[B3(tf )] ^ T1[B2(tc )] ^ T2[B1(td )] ^ T3[B0(te )];
+
+ /* Lane 1 */
+ t0 = T0[B3(s10)] ^ T1[B2(s11)] ^ T2[B1(s12)] ^ T3[B0(s13)] ^ C[ 0+112];
+ t4 = T0[B3(s11)] ^ T1[B2(s12)] ^ T2[B1(s13)] ^ T3[B0(s10)] ^ C[ 1+112];
+ t8 = T0[B3(s12)] ^ T1[B2(s13)] ^ T2[B1(s10)] ^ T3[B0(s11)] ^ C[ 2+112];
+ tc = T0[B3(s13)] ^ T1[B2(s10)] ^ T2[B1(s11)] ^ T3[B0(s12)] ^ C[ 3+112] ^ ctrl;
+ t1 = T0[B3(s14)] ^ T1[B2(s15)] ^ T2[B1(s16)] ^ T3[B0(s17)] ^ C[ 4+112];
+ t5 = T0[B3(s15)] ^ T1[B2(s16)] ^ T2[B1(s17)] ^ T3[B0(s14)] ^ C[ 5+112];
+ t9 = T0[B3(s16)] ^ T1[B2(s17)] ^ T2[B1(s14)] ^ T3[B0(s15)] ^ C[ 6+112];
+ td = T0[B3(s17)] ^ T1[B2(s14)] ^ T2[B1(s15)] ^ T3[B0(s16)] ^ C[ 7+112];
+ t2 = T0[B3(s18)] ^ T1[B2(s19)] ^ T2[B1(s1a)] ^ T3[B0(s1b)] ^ C[ 8+112];
+ t6 = T0[B3(s19)] ^ T1[B2(s1a)] ^ T2[B1(s1b)] ^ T3[B0(s18)] ^ C[ 9+112];
+ ta = T0[B3(s1a)] ^ T1[B2(s1b)] ^ T2[B1(s18)] ^ T3[B0(s19)] ^ C[ 10+112];
+ te = T0[B3(s1b)] ^ T1[B2(s18)] ^ T2[B1(s19)] ^ T3[B0(s1a)] ^ C[ 11+112];
+ t3 = T0[B3(s1c)] ^ T1[B2(s1d)] ^ T2[B1(s1e)] ^ T3[B0(s1f)] ^ C[ 12+112];
+ t7 = T0[B3(s1d)] ^ T1[B2(s1e)] ^ T2[B1(s1f)] ^ T3[B0(s1c)] ^ C[ 13+112];
+ tb = T0[B3(s1e)] ^ T1[B2(s1f)] ^ T2[B1(s1c)] ^ T3[B0(s1d)] ^ C[ 14+112];
+ tf = T0[B3(s1f)] ^ T1[B2(s1c)] ^ T2[B1(s1d)] ^ T3[B0(s1e)] ^ C[ 15+112];
+
+ s10 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[ 16+112];
+ s14 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[ 17+112];
+ s18 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[ 18+112];
+ s1c = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[ 19+112] ^ ctrh;
+ s11 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[ 20+112];
+ s15 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[ 21+112];
+ s19 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[ 22+112];
+ s1d = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[ 23+112];
+ s12 = T0[B3(t8 )] ^ T1[B2(t9 )] ^ T2[B1(ta )] ^ T3[B0(tb )] ^ C[ 24+112];
+ s16 = T0[B3(t9 )] ^ T1[B2(ta )] ^ T2[B1(tb )] ^ T3[B0(t8 )] ^ C[ 25+112];
+ s1a = T0[B3(ta )] ^ T1[B2(tb )] ^ T2[B1(t8 )] ^ T3[B0(t9 )] ^ C[ 26+112];
+ s1e = T0[B3(tb )] ^ T1[B2(t8 )] ^ T2[B1(t9 )] ^ T3[B0(ta )] ^ C[ 27+112];
+ s13 = T0[B3(tc )] ^ T1[B2(td )] ^ T2[B1(te )] ^ T3[B0(tf )] ^ C[ 28+112];
+ s17 = T0[B3(td )] ^ T1[B2(te )] ^ T2[B1(tf )] ^ T3[B0(tc )] ^ C[ 29+112];
+ s1b = T0[B3(te )] ^ T1[B2(tf )] ^ T2[B1(tc )] ^ T3[B0(td )] ^ C[ 30+112];
+ s1f = T0[B3(tf )] ^ T1[B2(tc )] ^ T2[B1(td )] ^ T3[B0(te )] ^ C[ 31+112];
+
+ t0 = T0[B3(s10)] ^ T1[B2(s11)] ^ T2[B1(s12)] ^ T3[B0(s13)] ^ C[ 32+112];
+ t4 = T0[B3(s11)] ^ T1[B2(s12)] ^ T2[B1(s13)] ^ T3[B0(s10)] ^ C[ 33+112];
+ t8 = T0[B3(s12)] ^ T1[B2(s13)] ^ T2[B1(s10)] ^ T3[B0(s11)] ^ C[ 34+112];
+ tc = T0[B3(s13)] ^ T1[B2(s10)] ^ T2[B1(s11)] ^ T3[B0(s12)] ^ C[ 35+112] ^ ctrl;
+ t1 = T0[B3(s14)] ^ T1[B2(s15)] ^ T2[B1(s16)] ^ T3[B0(s17)] ^ C[ 36+112];
+ t5 = T0[B3(s15)] ^ T1[B2(s16)] ^ T2[B1(s17)] ^ T3[B0(s14)] ^ C[ 37+112];
+ t9 = T0[B3(s16)] ^ T1[B2(s17)] ^ T2[B1(s14)] ^ T3[B0(s15)] ^ C[ 38+112];
+ td = T0[B3(s17)] ^ T1[B2(s14)] ^ T2[B1(s15)] ^ T3[B0(s16)] ^ C[ 39+112];
+ t2 = T0[B3(s18)] ^ T1[B2(s19)] ^ T2[B1(s1a)] ^ T3[B0(s1b)] ^ C[ 40+112];
+ t6 = T0[B3(s19)] ^ T1[B2(s1a)] ^ T2[B1(s1b)] ^ T3[B0(s18)] ^ C[ 41+112];
+ ta = T0[B3(s1a)] ^ T1[B2(s1b)] ^ T2[B1(s18)] ^ T3[B0(s19)] ^ C[ 42+112];
+ te = T0[B3(s1b)] ^ T1[B2(s18)] ^ T2[B1(s19)] ^ T3[B0(s1a)] ^ C[ 43+112];
+ t3 = T0[B3(s1c)] ^ T1[B2(s1d)] ^ T2[B1(s1e)] ^ T3[B0(s1f)] ^ C[ 44+112];
+ t7 = T0[B3(s1d)] ^ T1[B2(s1e)] ^ T2[B1(s1f)] ^ T3[B0(s1c)] ^ C[ 45+112];
+ tb = T0[B3(s1e)] ^ T1[B2(s1f)] ^ T2[B1(s1c)] ^ T3[B0(s1d)] ^ C[ 46+112];
+ tf = T0[B3(s1f)] ^ T1[B2(s1c)] ^ T2[B1(s1d)] ^ T3[B0(s1e)] ^ C[ 47+112];
+
+ s10 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[ 48+112];
+ s14 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[ 49+112];
+ s18 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[ 50+112];
+ s1c = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[ 51+112] ^ ctrh;
+ s11 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[ 52+112];
+ s15 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[ 53+112];
+ s19 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[ 54+112];
+ s1d = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[ 55+112];
+ s12 = T0[B3(t8 )] ^ T1[B2(t9 )] ^ T2[B1(ta )] ^ T3[B0(tb )] ^ C[ 56+112];
+ s16 = T0[B3(t9 )] ^ T1[B2(ta )] ^ T2[B1(tb )] ^ T3[B0(t8 )] ^ C[ 57+112];
+ s1a = T0[B3(ta )] ^ T1[B2(tb )] ^ T2[B1(t8 )] ^ T3[B0(t9 )] ^ C[ 58+112];
+ s1e = T0[B3(tb )] ^ T1[B2(t8 )] ^ T2[B1(t9 )] ^ T3[B0(ta )] ^ C[ 59+112];
+ s13 = T0[B3(tc )] ^ T1[B2(td )] ^ T2[B1(te )] ^ T3[B0(tf )] ^ C[ 60+112];
+ s17 = T0[B3(td )] ^ T1[B2(te )] ^ T2[B1(tf )] ^ T3[B0(tc )] ^ C[ 61+112];
+ s1b = T0[B3(te )] ^ T1[B2(tf )] ^ T2[B1(tc )] ^ T3[B0(td )] ^ C[ 62+112];
+ s1f = T0[B3(tf )] ^ T1[B2(tc )] ^ T2[B1(td )] ^ T3[B0(te )] ^ C[ 63+112];
+
+ t0 = T0[B3(s10)] ^ T1[B2(s11)] ^ T2[B1(s12)] ^ T3[B0(s13)] ^ C[ 64+112];
+ t4 = T0[B3(s11)] ^ T1[B2(s12)] ^ T2[B1(s13)] ^ T3[B0(s10)] ^ C[ 65+112];
+ t8 = T0[B3(s12)] ^ T1[B2(s13)] ^ T2[B1(s10)] ^ T3[B0(s11)] ^ C[ 66+112];
+ tc = T0[B3(s13)] ^ T1[B2(s10)] ^ T2[B1(s11)] ^ T3[B0(s12)] ^ C[ 67+112] ^ ctrl;
+ t1 = T0[B3(s14)] ^ T1[B2(s15)] ^ T2[B1(s16)] ^ T3[B0(s17)] ^ C[ 68+112];
+ t5 = T0[B3(s15)] ^ T1[B2(s16)] ^ T2[B1(s17)] ^ T3[B0(s14)] ^ C[ 69+112];
+ t9 = T0[B3(s16)] ^ T1[B2(s17)] ^ T2[B1(s14)] ^ T3[B0(s15)] ^ C[ 70+112];
+ td = T0[B3(s17)] ^ T1[B2(s14)] ^ T2[B1(s15)] ^ T3[B0(s16)] ^ C[ 71+112];
+ t2 = T0[B3(s18)] ^ T1[B2(s19)] ^ T2[B1(s1a)] ^ T3[B0(s1b)] ^ C[ 72+112];
+ t6 = T0[B3(s19)] ^ T1[B2(s1a)] ^ T2[B1(s1b)] ^ T3[B0(s18)] ^ C[ 73+112];
+ ta = T0[B3(s1a)] ^ T1[B2(s1b)] ^ T2[B1(s18)] ^ T3[B0(s19)] ^ C[ 74+112];
+ te = T0[B3(s1b)] ^ T1[B2(s18)] ^ T2[B1(s19)] ^ T3[B0(s1a)] ^ C[ 75+112];
+ t3 = T0[B3(s1c)] ^ T1[B2(s1d)] ^ T2[B1(s1e)] ^ T3[B0(s1f)] ^ C[ 76+112];
+ t7 = T0[B3(s1d)] ^ T1[B2(s1e)] ^ T2[B1(s1f)] ^ T3[B0(s1c)] ^ C[ 77+112];
+ tb = T0[B3(s1e)] ^ T1[B2(s1f)] ^ T2[B1(s1c)] ^ T3[B0(s1d)] ^ C[ 78+112];
+ tf = T0[B3(s1f)] ^ T1[B2(s1c)] ^ T2[B1(s1d)] ^ T3[B0(s1e)] ^ C[ 79+112];
+
+ s10 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[ 80+112];
+ s14 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[ 81+112];
+ s18 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[ 82+112];
+ s1c = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[ 83+112] ^ ctrh;
+ s11 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[ 84+112];
+ s15 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[ 85+112];
+ s19 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[ 86+112];
+ s1d = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[ 87+112];
+ s12 = T0[B3(t8 )] ^ T1[B2(t9 )] ^ T2[B1(ta )] ^ T3[B0(tb )] ^ C[ 88+112];
+ s16 = T0[B3(t9 )] ^ T1[B2(ta )] ^ T2[B1(tb )] ^ T3[B0(t8 )] ^ C[ 89+112];
+ s1a = T0[B3(ta )] ^ T1[B2(tb )] ^ T2[B1(t8 )] ^ T3[B0(t9 )] ^ C[ 90+112];
+ s1e = T0[B3(tb )] ^ T1[B2(t8 )] ^ T2[B1(t9 )] ^ T3[B0(ta )] ^ C[ 91+112];
+ s13 = T0[B3(tc )] ^ T1[B2(td )] ^ T2[B1(te )] ^ T3[B0(tf )] ^ C[ 92+112];
+ s17 = T0[B3(td )] ^ T1[B2(te )] ^ T2[B1(tf )] ^ T3[B0(tc )] ^ C[ 93+112];
+ s1b = T0[B3(te )] ^ T1[B2(tf )] ^ T2[B1(tc )] ^ T3[B0(td )] ^ C[ 94+112];
+ s1f = T0[B3(tf )] ^ T1[B2(tc )] ^ T2[B1(td )] ^ T3[B0(te )] ^ C[ 95+112];
+
+ t0 = T0[B3(s10)] ^ T1[B2(s11)] ^ T2[B1(s12)] ^ T3[B0(s13)] ^ C[ 96+112];
+ t4 = T0[B3(s11)] ^ T1[B2(s12)] ^ T2[B1(s13)] ^ T3[B0(s10)] ^ C[ 97+112];
+ t8 = T0[B3(s12)] ^ T1[B2(s13)] ^ T2[B1(s10)] ^ T3[B0(s11)] ^ C[ 98+112];
+ tc = T0[B3(s13)] ^ T1[B2(s10)] ^ T2[B1(s11)] ^ T3[B0(s12)] ^ C[ 99+112] ^ ctrl;
+ t1 = T0[B3(s14)] ^ T1[B2(s15)] ^ T2[B1(s16)] ^ T3[B0(s17)] ^ C[100+112];
+ t5 = T0[B3(s15)] ^ T1[B2(s16)] ^ T2[B1(s17)] ^ T3[B0(s14)] ^ C[101+112];
+ t9 = T0[B3(s16)] ^ T1[B2(s17)] ^ T2[B1(s14)] ^ T3[B0(s15)] ^ C[102+112];
+ td = T0[B3(s17)] ^ T1[B2(s14)] ^ T2[B1(s15)] ^ T3[B0(s16)] ^ C[103+112];
+ t2 = T0[B3(s18)] ^ T1[B2(s19)] ^ T2[B1(s1a)] ^ T3[B0(s1b)] ^ C[104+112];
+ t6 = T0[B3(s19)] ^ T1[B2(s1a)] ^ T2[B1(s1b)] ^ T3[B0(s18)] ^ C[105+112];
+ ta = T0[B3(s1a)] ^ T1[B2(s1b)] ^ T2[B1(s18)] ^ T3[B0(s19)] ^ C[106+112];
+ te = T0[B3(s1b)] ^ T1[B2(s18)] ^ T2[B1(s19)] ^ T3[B0(s1a)] ^ C[107+112];
+ t3 = T0[B3(s1c)] ^ T1[B2(s1d)] ^ T2[B1(s1e)] ^ T3[B0(s1f)] ^ C[108+112];
+ t7 = T0[B3(s1d)] ^ T1[B2(s1e)] ^ T2[B1(s1f)] ^ T3[B0(s1c)] ^ C[109+112];
+ tb = T0[B3(s1e)] ^ T1[B2(s1f)] ^ T2[B1(s1c)] ^ T3[B0(s1d)] ^ C[110+112];
+ tf = T0[B3(s1f)] ^ T1[B2(s1c)] ^ T2[B1(s1d)] ^ T3[B0(s1e)] ^ C[111+112];
+
+ s60 ^= T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )];
+ s64 ^= T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )];
+ s68 ^= T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )];
+ s6c ^= T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )];
+ s61 ^= T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )];
+ s65 ^= T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )];
+ s69 ^= T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )];
+ s6d ^= T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )];
+ s62 ^= T0[B3(t8 )] ^ T1[B2(t9 )] ^ T2[B1(ta )] ^ T3[B0(tb )];
+ s66 ^= T0[B3(t9 )] ^ T1[B2(ta )] ^ T2[B1(tb )] ^ T3[B0(t8 )];
+ s6a ^= T0[B3(ta )] ^ T1[B2(tb )] ^ T2[B1(t8 )] ^ T3[B0(t9 )];
+ s6e ^= T0[B3(tb )] ^ T1[B2(t8 )] ^ T2[B1(t9 )] ^ T3[B0(ta )];
+ s63 ^= T0[B3(tc )] ^ T1[B2(td )] ^ T2[B1(te )] ^ T3[B0(tf )];
+ s67 ^= T0[B3(td )] ^ T1[B2(te )] ^ T2[B1(tf )] ^ T3[B0(tc )];
+ s6b ^= T0[B3(te )] ^ T1[B2(tf )] ^ T2[B1(tc )] ^ T3[B0(td )];
+ s6f ^= T0[B3(tf )] ^ T1[B2(tc )] ^ T2[B1(td )] ^ T3[B0(te )];
+
+ /* Lane 2 */
+ t0 = T0[B3(s20)] ^ T1[B2(s21)] ^ T2[B1(s22)] ^ T3[B0(s23)] ^ C[ 0+224];
+ t4 = T0[B3(s21)] ^ T1[B2(s22)] ^ T2[B1(s23)] ^ T3[B0(s20)] ^ C[ 1+224];
+ t8 = T0[B3(s22)] ^ T1[B2(s23)] ^ T2[B1(s20)] ^ T3[B0(s21)] ^ C[ 2+224];
+ tc = T0[B3(s23)] ^ T1[B2(s20)] ^ T2[B1(s21)] ^ T3[B0(s22)] ^ C[ 3+224] ^ ctrh;
+ t1 = T0[B3(s24)] ^ T1[B2(s25)] ^ T2[B1(s26)] ^ T3[B0(s27)] ^ C[ 4+224];
+ t5 = T0[B3(s25)] ^ T1[B2(s26)] ^ T2[B1(s27)] ^ T3[B0(s24)] ^ C[ 5+224];
+ t9 = T0[B3(s26)] ^ T1[B2(s27)] ^ T2[B1(s24)] ^ T3[B0(s25)] ^ C[ 6+224];
+ td = T0[B3(s27)] ^ T1[B2(s24)] ^ T2[B1(s25)] ^ T3[B0(s26)] ^ C[ 7+224];
+ t2 = T0[B3(s28)] ^ T1[B2(s29)] ^ T2[B1(s2a)] ^ T3[B0(s2b)] ^ C[ 8+224];
+ t6 = T0[B3(s29)] ^ T1[B2(s2a)] ^ T2[B1(s2b)] ^ T3[B0(s28)] ^ C[ 9+224];
+ ta = T0[B3(s2a)] ^ T1[B2(s2b)] ^ T2[B1(s28)] ^ T3[B0(s29)] ^ C[ 10+224];
+ te = T0[B3(s2b)] ^ T1[B2(s28)] ^ T2[B1(s29)] ^ T3[B0(s2a)] ^ C[ 11+224];
+ t3 = T0[B3(s2c)] ^ T1[B2(s2d)] ^ T2[B1(s2e)] ^ T3[B0(s2f)] ^ C[ 12+224];
+ t7 = T0[B3(s2d)] ^ T1[B2(s2e)] ^ T2[B1(s2f)] ^ T3[B0(s2c)] ^ C[ 13+224];
+ tb = T0[B3(s2e)] ^ T1[B2(s2f)] ^ T2[B1(s2c)] ^ T3[B0(s2d)] ^ C[ 14+224];
+ tf = T0[B3(s2f)] ^ T1[B2(s2c)] ^ T2[B1(s2d)] ^ T3[B0(s2e)] ^ C[ 15+224];
+
+ s20 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[ 16+224];
+ s24 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[ 17+224];
+ s28 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[ 18+224];
+ s2c = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[ 19+224] ^ ctrl;
+ s21 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[ 20+224];
+ s25 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[ 21+224];
+ s29 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[ 22+224];
+ s2d = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[ 23+224];
+ s22 = T0[B3(t8 )] ^ T1[B2(t9 )] ^ T2[B1(ta )] ^ T3[B0(tb )] ^ C[ 24+224];
+ s26 = T0[B3(t9 )] ^ T1[B2(ta )] ^ T2[B1(tb )] ^ T3[B0(t8 )] ^ C[ 25+224];
+ s2a = T0[B3(ta )] ^ T1[B2(tb )] ^ T2[B1(t8 )] ^ T3[B0(t9 )] ^ C[ 26+224];
+ s2e = T0[B3(tb )] ^ T1[B2(t8 )] ^ T2[B1(t9 )] ^ T3[B0(ta )] ^ C[ 27+224];
+ s23 = T0[B3(tc )] ^ T1[B2(td )] ^ T2[B1(te )] ^ T3[B0(tf )] ^ C[ 28+224];
+ s27 = T0[B3(td )] ^ T1[B2(te )] ^ T2[B1(tf )] ^ T3[B0(tc )] ^ C[ 29+224];
+ s2b = T0[B3(te )] ^ T1[B2(tf )] ^ T2[B1(tc )] ^ T3[B0(td )] ^ C[ 30+224];
+ s2f = T0[B3(tf )] ^ T1[B2(tc )] ^ T2[B1(td )] ^ T3[B0(te )] ^ C[ 31+224];
+
+ t0 = T0[B3(s20)] ^ T1[B2(s21)] ^ T2[B1(s22)] ^ T3[B0(s23)] ^ C[ 32+224];
+ t4 = T0[B3(s21)] ^ T1[B2(s22)] ^ T2[B1(s23)] ^ T3[B0(s20)] ^ C[ 33+224];
+ t8 = T0[B3(s22)] ^ T1[B2(s23)] ^ T2[B1(s20)] ^ T3[B0(s21)] ^ C[ 34+224];
+ tc = T0[B3(s23)] ^ T1[B2(s20)] ^ T2[B1(s21)] ^ T3[B0(s22)] ^ C[ 35+224] ^ ctrh;
+ t1 = T0[B3(s24)] ^ T1[B2(s25)] ^ T2[B1(s26)] ^ T3[B0(s27)] ^ C[ 36+224];
+ t5 = T0[B3(s25)] ^ T1[B2(s26)] ^ T2[B1(s27)] ^ T3[B0(s24)] ^ C[ 37+224];
+ t9 = T0[B3(s26)] ^ T1[B2(s27)] ^ T2[B1(s24)] ^ T3[B0(s25)] ^ C[ 38+224];
+ td = T0[B3(s27)] ^ T1[B2(s24)] ^ T2[B1(s25)] ^ T3[B0(s26)] ^ C[ 39+224];
+ t2 = T0[B3(s28)] ^ T1[B2(s29)] ^ T2[B1(s2a)] ^ T3[B0(s2b)] ^ C[ 40+224];
+ t6 = T0[B3(s29)] ^ T1[B2(s2a)] ^ T2[B1(s2b)] ^ T3[B0(s28)] ^ C[ 41+224];
+ ta = T0[B3(s2a)] ^ T1[B2(s2b)] ^ T2[B1(s28)] ^ T3[B0(s29)] ^ C[ 42+224];
+ te = T0[B3(s2b)] ^ T1[B2(s28)] ^ T2[B1(s29)] ^ T3[B0(s2a)] ^ C[ 43+224];
+ t3 = T0[B3(s2c)] ^ T1[B2(s2d)] ^ T2[B1(s2e)] ^ T3[B0(s2f)] ^ C[ 44+224];
+ t7 = T0[B3(s2d)] ^ T1[B2(s2e)] ^ T2[B1(s2f)] ^ T3[B0(s2c)] ^ C[ 45+224];
+ tb = T0[B3(s2e)] ^ T1[B2(s2f)] ^ T2[B1(s2c)] ^ T3[B0(s2d)] ^ C[ 46+224];
+ tf = T0[B3(s2f)] ^ T1[B2(s2c)] ^ T2[B1(s2d)] ^ T3[B0(s2e)] ^ C[ 47+224];
+
+ s20 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[ 48+224];
+ s24 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[ 49+224];
+ s28 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[ 50+224];
+ s2c = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[ 51+224] ^ ctrl;
+ s21 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[ 52+224];
+ s25 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[ 53+224];
+ s29 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[ 54+224];
+ s2d = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[ 55+224];
+ s22 = T0[B3(t8 )] ^ T1[B2(t9 )] ^ T2[B1(ta )] ^ T3[B0(tb )] ^ C[ 56+224];
+ s26 = T0[B3(t9 )] ^ T1[B2(ta )] ^ T2[B1(tb )] ^ T3[B0(t8 )] ^ C[ 57+224];
+ s2a = T0[B3(ta )] ^ T1[B2(tb )] ^ T2[B1(t8 )] ^ T3[B0(t9 )] ^ C[ 58+224];
+ s2e = T0[B3(tb )] ^ T1[B2(t8 )] ^ T2[B1(t9 )] ^ T3[B0(ta )] ^ C[ 59+224];
+ s23 = T0[B3(tc )] ^ T1[B2(td )] ^ T2[B1(te )] ^ T3[B0(tf )] ^ C[ 60+224];
+ s27 = T0[B3(td )] ^ T1[B2(te )] ^ T2[B1(tf )] ^ T3[B0(tc )] ^ C[ 61+224];
+ s2b = T0[B3(te )] ^ T1[B2(tf )] ^ T2[B1(tc )] ^ T3[B0(td )] ^ C[ 62+224];
+ s2f = T0[B3(tf )] ^ T1[B2(tc )] ^ T2[B1(td )] ^ T3[B0(te )] ^ C[ 63+224];
+
+ t0 = T0[B3(s20)] ^ T1[B2(s21)] ^ T2[B1(s22)] ^ T3[B0(s23)] ^ C[ 64+224];
+ t4 = T0[B3(s21)] ^ T1[B2(s22)] ^ T2[B1(s23)] ^ T3[B0(s20)] ^ C[ 65+224];
+ t8 = T0[B3(s22)] ^ T1[B2(s23)] ^ T2[B1(s20)] ^ T3[B0(s21)] ^ C[ 66+224];
+ tc = T0[B3(s23)] ^ T1[B2(s20)] ^ T2[B1(s21)] ^ T3[B0(s22)] ^ C[ 67+224] ^ ctrh;
+ t1 = T0[B3(s24)] ^ T1[B2(s25)] ^ T2[B1(s26)] ^ T3[B0(s27)] ^ C[ 68+224];
+ t5 = T0[B3(s25)] ^ T1[B2(s26)] ^ T2[B1(s27)] ^ T3[B0(s24)] ^ C[ 69+224];
+ t9 = T0[B3(s26)] ^ T1[B2(s27)] ^ T2[B1(s24)] ^ T3[B0(s25)] ^ C[ 70+224];
+ td = T0[B3(s27)] ^ T1[B2(s24)] ^ T2[B1(s25)] ^ T3[B0(s26)] ^ C[ 71+224];
+ t2 = T0[B3(s28)] ^ T1[B2(s29)] ^ T2[B1(s2a)] ^ T3[B0(s2b)] ^ C[ 72+224];
+ t6 = T0[B3(s29)] ^ T1[B2(s2a)] ^ T2[B1(s2b)] ^ T3[B0(s28)] ^ C[ 73+224];
+ ta = T0[B3(s2a)] ^ T1[B2(s2b)] ^ T2[B1(s28)] ^ T3[B0(s29)] ^ C[ 74+224];
+ te = T0[B3(s2b)] ^ T1[B2(s28)] ^ T2[B1(s29)] ^ T3[B0(s2a)] ^ C[ 75+224];
+ t3 = T0[B3(s2c)] ^ T1[B2(s2d)] ^ T2[B1(s2e)] ^ T3[B0(s2f)] ^ C[ 76+224];
+ t7 = T0[B3(s2d)] ^ T1[B2(s2e)] ^ T2[B1(s2f)] ^ T3[B0(s2c)] ^ C[ 77+224];
+ tb = T0[B3(s2e)] ^ T1[B2(s2f)] ^ T2[B1(s2c)] ^ T3[B0(s2d)] ^ C[ 78+224];
+ tf = T0[B3(s2f)] ^ T1[B2(s2c)] ^ T2[B1(s2d)] ^ T3[B0(s2e)] ^ C[ 79+224];
+
+ s20 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[ 80+224];
+ s24 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[ 81+224];
+ s28 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[ 82+224];
+ s2c = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[ 83+224] ^ ctrl;
+ s21 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[ 84+224];
+ s25 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[ 85+224];
+ s29 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[ 86+224];
+ s2d = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[ 87+224];
+ s22 = T0[B3(t8 )] ^ T1[B2(t9 )] ^ T2[B1(ta )] ^ T3[B0(tb )] ^ C[ 88+224];
+ s26 = T0[B3(t9 )] ^ T1[B2(ta )] ^ T2[B1(tb )] ^ T3[B0(t8 )] ^ C[ 89+224];
+ s2a = T0[B3(ta )] ^ T1[B2(tb )] ^ T2[B1(t8 )] ^ T3[B0(t9 )] ^ C[ 90+224];
+ s2e = T0[B3(tb )] ^ T1[B2(t8 )] ^ T2[B1(t9 )] ^ T3[B0(ta )] ^ C[ 91+224];
+ s23 = T0[B3(tc )] ^ T1[B2(td )] ^ T2[B1(te )] ^ T3[B0(tf )] ^ C[ 92+224];
+ s27 = T0[B3(td )] ^ T1[B2(te )] ^ T2[B1(tf )] ^ T3[B0(tc )] ^ C[ 93+224];
+ s2b = T0[B3(te )] ^ T1[B2(tf )] ^ T2[B1(tc )] ^ T3[B0(td )] ^ C[ 94+224];
+ s2f = T0[B3(tf )] ^ T1[B2(tc )] ^ T2[B1(td )] ^ T3[B0(te )] ^ C[ 95+224];
+
+ t0 = T0[B3(s20)] ^ T1[B2(s21)] ^ T2[B1(s22)] ^ T3[B0(s23)] ^ C[ 96+224];
+ t4 = T0[B3(s21)] ^ T1[B2(s22)] ^ T2[B1(s23)] ^ T3[B0(s20)] ^ C[ 97+224];
+ t8 = T0[B3(s22)] ^ T1[B2(s23)] ^ T2[B1(s20)] ^ T3[B0(s21)] ^ C[ 98+224];
+ tc = T0[B3(s23)] ^ T1[B2(s20)] ^ T2[B1(s21)] ^ T3[B0(s22)] ^ C[ 99+224] ^ ctrh;
+ t1 = T0[B3(s24)] ^ T1[B2(s25)] ^ T2[B1(s26)] ^ T3[B0(s27)] ^ C[100+224];
+ t5 = T0[B3(s25)] ^ T1[B2(s26)] ^ T2[B1(s27)] ^ T3[B0(s24)] ^ C[101+224];
+ t9 = T0[B3(s26)] ^ T1[B2(s27)] ^ T2[B1(s24)] ^ T3[B0(s25)] ^ C[102+224];
+ td = T0[B3(s27)] ^ T1[B2(s24)] ^ T2[B1(s25)] ^ T3[B0(s26)] ^ C[103+224];
+ t2 = T0[B3(s28)] ^ T1[B2(s29)] ^ T2[B1(s2a)] ^ T3[B0(s2b)] ^ C[104+224];
+ t6 = T0[B3(s29)] ^ T1[B2(s2a)] ^ T2[B1(s2b)] ^ T3[B0(s28)] ^ C[105+224];
+ ta = T0[B3(s2a)] ^ T1[B2(s2b)] ^ T2[B1(s28)] ^ T3[B0(s29)] ^ C[106+224];
+ te = T0[B3(s2b)] ^ T1[B2(s28)] ^ T2[B1(s29)] ^ T3[B0(s2a)] ^ C[107+224];
+ t3 = T0[B3(s2c)] ^ T1[B2(s2d)] ^ T2[B1(s2e)] ^ T3[B0(s2f)] ^ C[108+224];
+ t7 = T0[B3(s2d)] ^ T1[B2(s2e)] ^ T2[B1(s2f)] ^ T3[B0(s2c)] ^ C[109+224];
+ tb = T0[B3(s2e)] ^ T1[B2(s2f)] ^ T2[B1(s2c)] ^ T3[B0(s2d)] ^ C[110+224];
+ tf = T0[B3(s2f)] ^ T1[B2(s2c)] ^ T2[B1(s2d)] ^ T3[B0(s2e)] ^ C[111+224];
+
+ s60 ^= T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )];
+ s64 ^= T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )];
+ s68 ^= T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )];
+ s6c ^= T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )];
+ s61 ^= T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )];
+ s65 ^= T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )];
+ s69 ^= T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )];
+ s6d ^= T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )];
+ s62 ^= T0[B3(t8 )] ^ T1[B2(t9 )] ^ T2[B1(ta )] ^ T3[B0(tb )];
+ s66 ^= T0[B3(t9 )] ^ T1[B2(ta )] ^ T2[B1(tb )] ^ T3[B0(t8 )];
+ s6a ^= T0[B3(ta )] ^ T1[B2(tb )] ^ T2[B1(t8 )] ^ T3[B0(t9 )];
+ s6e ^= T0[B3(tb )] ^ T1[B2(t8 )] ^ T2[B1(t9 )] ^ T3[B0(ta )];
+ s63 ^= T0[B3(tc )] ^ T1[B2(td )] ^ T2[B1(te )] ^ T3[B0(tf )];
+ s67 ^= T0[B3(td )] ^ T1[B2(te )] ^ T2[B1(tf )] ^ T3[B0(tc )];
+ s6b ^= T0[B3(te )] ^ T1[B2(tf )] ^ T2[B1(tc )] ^ T3[B0(td )];
+ s6f ^= T0[B3(tf )] ^ T1[B2(tc )] ^ T2[B1(td )] ^ T3[B0(te )];
+
+ /* Lane 3 */
+ t0 = T0[B3(s30)] ^ T1[B2(s31)] ^ T2[B1(s32)] ^ T3[B0(s33)] ^ C[ 0+336];
+ t4 = T0[B3(s31)] ^ T1[B2(s32)] ^ T2[B1(s33)] ^ T3[B0(s30)] ^ C[ 1+336];
+ t8 = T0[B3(s32)] ^ T1[B2(s33)] ^ T2[B1(s30)] ^ T3[B0(s31)] ^ C[ 2+336];
+ tc = T0[B3(s33)] ^ T1[B2(s30)] ^ T2[B1(s31)] ^ T3[B0(s32)] ^ C[ 3+336] ^ ctrl;
+ t1 = T0[B3(s34)] ^ T1[B2(s35)] ^ T2[B1(s36)] ^ T3[B0(s37)] ^ C[ 4+336];
+ t5 = T0[B3(s35)] ^ T1[B2(s36)] ^ T2[B1(s37)] ^ T3[B0(s34)] ^ C[ 5+336];
+ t9 = T0[B3(s36)] ^ T1[B2(s37)] ^ T2[B1(s34)] ^ T3[B0(s35)] ^ C[ 6+336];
+ td = T0[B3(s37)] ^ T1[B2(s34)] ^ T2[B1(s35)] ^ T3[B0(s36)] ^ C[ 7+336];
+ t2 = T0[B3(s38)] ^ T1[B2(s39)] ^ T2[B1(s3a)] ^ T3[B0(s3b)] ^ C[ 8+336];
+ t6 = T0[B3(s39)] ^ T1[B2(s3a)] ^ T2[B1(s3b)] ^ T3[B0(s38)] ^ C[ 9+336];
+ ta = T0[B3(s3a)] ^ T1[B2(s3b)] ^ T2[B1(s38)] ^ T3[B0(s39)] ^ C[ 10+336];
+ te = T0[B3(s3b)] ^ T1[B2(s38)] ^ T2[B1(s39)] ^ T3[B0(s3a)] ^ C[ 11+336];
+ t3 = T0[B3(s3c)] ^ T1[B2(s3d)] ^ T2[B1(s3e)] ^ T3[B0(s3f)] ^ C[ 12+336];
+ t7 = T0[B3(s3d)] ^ T1[B2(s3e)] ^ T2[B1(s3f)] ^ T3[B0(s3c)] ^ C[ 13+336];
+ tb = T0[B3(s3e)] ^ T1[B2(s3f)] ^ T2[B1(s3c)] ^ T3[B0(s3d)] ^ C[ 14+336];
+ tf = T0[B3(s3f)] ^ T1[B2(s3c)] ^ T2[B1(s3d)] ^ T3[B0(s3e)] ^ C[ 15+336];
+
+ s30 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[ 16+336];
+ s34 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[ 17+336];
+ s38 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[ 18+336];
+ s3c = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[ 19+336] ^ ctrh;
+ s31 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[ 20+336];
+ s35 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[ 21+336];
+ s39 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[ 22+336];
+ s3d = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[ 23+336];
+ s32 = T0[B3(t8 )] ^ T1[B2(t9 )] ^ T2[B1(ta )] ^ T3[B0(tb )] ^ C[ 24+336];
+ s36 = T0[B3(t9 )] ^ T1[B2(ta )] ^ T2[B1(tb )] ^ T3[B0(t8 )] ^ C[ 25+336];
+ s3a = T0[B3(ta )] ^ T1[B2(tb )] ^ T2[B1(t8 )] ^ T3[B0(t9 )] ^ C[ 26+336];
+ s3e = T0[B3(tb )] ^ T1[B2(t8 )] ^ T2[B1(t9 )] ^ T3[B0(ta )] ^ C[ 27+336];
+ s33 = T0[B3(tc )] ^ T1[B2(td )] ^ T2[B1(te )] ^ T3[B0(tf )] ^ C[ 28+336];
+ s37 = T0[B3(td )] ^ T1[B2(te )] ^ T2[B1(tf )] ^ T3[B0(tc )] ^ C[ 29+336];
+ s3b = T0[B3(te )] ^ T1[B2(tf )] ^ T2[B1(tc )] ^ T3[B0(td )] ^ C[ 30+336];
+ s3f = T0[B3(tf )] ^ T1[B2(tc )] ^ T2[B1(td )] ^ T3[B0(te )] ^ C[ 31+336];
+
+ t0 = T0[B3(s30)] ^ T1[B2(s31)] ^ T2[B1(s32)] ^ T3[B0(s33)] ^ C[ 32+336];
+ t4 = T0[B3(s31)] ^ T1[B2(s32)] ^ T2[B1(s33)] ^ T3[B0(s30)] ^ C[ 33+336];
+ t8 = T0[B3(s32)] ^ T1[B2(s33)] ^ T2[B1(s30)] ^ T3[B0(s31)] ^ C[ 34+336];
+ tc = T0[B3(s33)] ^ T1[B2(s30)] ^ T2[B1(s31)] ^ T3[B0(s32)] ^ C[ 35+336] ^ ctrl;
+ t1 = T0[B3(s34)] ^ T1[B2(s35)] ^ T2[B1(s36)] ^ T3[B0(s37)] ^ C[ 36+336];
+ t5 = T0[B3(s35)] ^ T1[B2(s36)] ^ T2[B1(s37)] ^ T3[B0(s34)] ^ C[ 37+336];
+ t9 = T0[B3(s36)] ^ T1[B2(s37)] ^ T2[B1(s34)] ^ T3[B0(s35)] ^ C[ 38+336];
+ td = T0[B3(s37)] ^ T1[B2(s34)] ^ T2[B1(s35)] ^ T3[B0(s36)] ^ C[ 39+336];
+ t2 = T0[B3(s38)] ^ T1[B2(s39)] ^ T2[B1(s3a)] ^ T3[B0(s3b)] ^ C[ 40+336];
+ t6 = T0[B3(s39)] ^ T1[B2(s3a)] ^ T2[B1(s3b)] ^ T3[B0(s38)] ^ C[ 41+336];
+ ta = T0[B3(s3a)] ^ T1[B2(s3b)] ^ T2[B1(s38)] ^ T3[B0(s39)] ^ C[ 42+336];
+ te = T0[B3(s3b)] ^ T1[B2(s38)] ^ T2[B1(s39)] ^ T3[B0(s3a)] ^ C[ 43+336];
+ t3 = T0[B3(s3c)] ^ T1[B2(s3d)] ^ T2[B1(s3e)] ^ T3[B0(s3f)] ^ C[ 44+336];
+ t7 = T0[B3(s3d)] ^ T1[B2(s3e)] ^ T2[B1(s3f)] ^ T3[B0(s3c)] ^ C[ 45+336];
+ tb = T0[B3(s3e)] ^ T1[B2(s3f)] ^ T2[B1(s3c)] ^ T3[B0(s3d)] ^ C[ 46+336];
+ tf = T0[B3(s3f)] ^ T1[B2(s3c)] ^ T2[B1(s3d)] ^ T3[B0(s3e)] ^ C[ 47+336];
+
+ s30 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[ 48+336];
+ s34 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[ 49+336];
+ s38 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[ 50+336];
+ s3c = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[ 51+336] ^ ctrh;
+ s31 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[ 52+336];
+ s35 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[ 53+336];
+ s39 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[ 54+336];
+ s3d = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[ 55+336];
+ s32 = T0[B3(t8 )] ^ T1[B2(t9 )] ^ T2[B1(ta )] ^ T3[B0(tb )] ^ C[ 56+336];
+ s36 = T0[B3(t9 )] ^ T1[B2(ta )] ^ T2[B1(tb )] ^ T3[B0(t8 )] ^ C[ 57+336];
+ s3a = T0[B3(ta )] ^ T1[B2(tb )] ^ T2[B1(t8 )] ^ T3[B0(t9 )] ^ C[ 58+336];
+ s3e = T0[B3(tb )] ^ T1[B2(t8 )] ^ T2[B1(t9 )] ^ T3[B0(ta )] ^ C[ 59+336];
+ s33 = T0[B3(tc )] ^ T1[B2(td )] ^ T2[B1(te )] ^ T3[B0(tf )] ^ C[ 60+336];
+ s37 = T0[B3(td )] ^ T1[B2(te )] ^ T2[B1(tf )] ^ T3[B0(tc )] ^ C[ 61+336];
+ s3b = T0[B3(te )] ^ T1[B2(tf )] ^ T2[B1(tc )] ^ T3[B0(td )] ^ C[ 62+336];
+ s3f = T0[B3(tf )] ^ T1[B2(tc )] ^ T2[B1(td )] ^ T3[B0(te )] ^ C[ 63+336];
+
+ t0 = T0[B3(s30)] ^ T1[B2(s31)] ^ T2[B1(s32)] ^ T3[B0(s33)] ^ C[ 64+336];
+ t4 = T0[B3(s31)] ^ T1[B2(s32)] ^ T2[B1(s33)] ^ T3[B0(s30)] ^ C[ 65+336];
+ t8 = T0[B3(s32)] ^ T1[B2(s33)] ^ T2[B1(s30)] ^ T3[B0(s31)] ^ C[ 66+336];
+ tc = T0[B3(s33)] ^ T1[B2(s30)] ^ T2[B1(s31)] ^ T3[B0(s32)] ^ C[ 67+336] ^ ctrl;
+ t1 = T0[B3(s34)] ^ T1[B2(s35)] ^ T2[B1(s36)] ^ T3[B0(s37)] ^ C[ 68+336];
+ t5 = T0[B3(s35)] ^ T1[B2(s36)] ^ T2[B1(s37)] ^ T3[B0(s34)] ^ C[ 69+336];
+ t9 = T0[B3(s36)] ^ T1[B2(s37)] ^ T2[B1(s34)] ^ T3[B0(s35)] ^ C[ 70+336];
+ td = T0[B3(s37)] ^ T1[B2(s34)] ^ T2[B1(s35)] ^ T3[B0(s36)] ^ C[ 71+336];
+ t2 = T0[B3(s38)] ^ T1[B2(s39)] ^ T2[B1(s3a)] ^ T3[B0(s3b)] ^ C[ 72+336];
+ t6 = T0[B3(s39)] ^ T1[B2(s3a)] ^ T2[B1(s3b)] ^ T3[B0(s38)] ^ C[ 73+336];
+ ta = T0[B3(s3a)] ^ T1[B2(s3b)] ^ T2[B1(s38)] ^ T3[B0(s39)] ^ C[ 74+336];
+ te = T0[B3(s3b)] ^ T1[B2(s38)] ^ T2[B1(s39)] ^ T3[B0(s3a)] ^ C[ 75+336];
+ t3 = T0[B3(s3c)] ^ T1[B2(s3d)] ^ T2[B1(s3e)] ^ T3[B0(s3f)] ^ C[ 76+336];
+ t7 = T0[B3(s3d)] ^ T1[B2(s3e)] ^ T2[B1(s3f)] ^ T3[B0(s3c)] ^ C[ 77+336];
+ tb = T0[B3(s3e)] ^ T1[B2(s3f)] ^ T2[B1(s3c)] ^ T3[B0(s3d)] ^ C[ 78+336];
+ tf = T0[B3(s3f)] ^ T1[B2(s3c)] ^ T2[B1(s3d)] ^ T3[B0(s3e)] ^ C[ 79+336];
+
+ s30 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[ 80+336];
+ s34 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[ 81+336];
+ s38 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[ 82+336];
+ s3c = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[ 83+336] ^ ctrh;
+ s31 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[ 84+336];
+ s35 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[ 85+336];
+ s39 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[ 86+336];
+ s3d = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[ 87+336];
+ s32 = T0[B3(t8 )] ^ T1[B2(t9 )] ^ T2[B1(ta )] ^ T3[B0(tb )] ^ C[ 88+336];
+ s36 = T0[B3(t9 )] ^ T1[B2(ta )] ^ T2[B1(tb )] ^ T3[B0(t8 )] ^ C[ 89+336];
+ s3a = T0[B3(ta )] ^ T1[B2(tb )] ^ T2[B1(t8 )] ^ T3[B0(t9 )] ^ C[ 90+336];
+ s3e = T0[B3(tb )] ^ T1[B2(t8 )] ^ T2[B1(t9 )] ^ T3[B0(ta )] ^ C[ 91+336];
+ s33 = T0[B3(tc )] ^ T1[B2(td )] ^ T2[B1(te )] ^ T3[B0(tf )] ^ C[ 92+336];
+ s37 = T0[B3(td )] ^ T1[B2(te )] ^ T2[B1(tf )] ^ T3[B0(tc )] ^ C[ 93+336];
+ s3b = T0[B3(te )] ^ T1[B2(tf )] ^ T2[B1(tc )] ^ T3[B0(td )] ^ C[ 94+336];
+ s3f = T0[B3(tf )] ^ T1[B2(tc )] ^ T2[B1(td )] ^ T3[B0(te )] ^ C[ 95+336];
+
+ t0 = T0[B3(s30)] ^ T1[B2(s31)] ^ T2[B1(s32)] ^ T3[B0(s33)] ^ C[ 96+336];
+ t4 = T0[B3(s31)] ^ T1[B2(s32)] ^ T2[B1(s33)] ^ T3[B0(s30)] ^ C[ 97+336];
+ t8 = T0[B3(s32)] ^ T1[B2(s33)] ^ T2[B1(s30)] ^ T3[B0(s31)] ^ C[ 98+336];
+ tc = T0[B3(s33)] ^ T1[B2(s30)] ^ T2[B1(s31)] ^ T3[B0(s32)] ^ C[ 99+336] ^ ctrl;
+ t1 = T0[B3(s34)] ^ T1[B2(s35)] ^ T2[B1(s36)] ^ T3[B0(s37)] ^ C[100+336];
+ t5 = T0[B3(s35)] ^ T1[B2(s36)] ^ T2[B1(s37)] ^ T3[B0(s34)] ^ C[101+336];
+ t9 = T0[B3(s36)] ^ T1[B2(s37)] ^ T2[B1(s34)] ^ T3[B0(s35)] ^ C[102+336];
+ td = T0[B3(s37)] ^ T1[B2(s34)] ^ T2[B1(s35)] ^ T3[B0(s36)] ^ C[103+336];
+ t2 = T0[B3(s38)] ^ T1[B2(s39)] ^ T2[B1(s3a)] ^ T3[B0(s3b)] ^ C[104+336];
+ t6 = T0[B3(s39)] ^ T1[B2(s3a)] ^ T2[B1(s3b)] ^ T3[B0(s38)] ^ C[105+336];
+ ta = T0[B3(s3a)] ^ T1[B2(s3b)] ^ T2[B1(s38)] ^ T3[B0(s39)] ^ C[106+336];
+ te = T0[B3(s3b)] ^ T1[B2(s38)] ^ T2[B1(s39)] ^ T3[B0(s3a)] ^ C[107+336];
+ t3 = T0[B3(s3c)] ^ T1[B2(s3d)] ^ T2[B1(s3e)] ^ T3[B0(s3f)] ^ C[108+336];
+ t7 = T0[B3(s3d)] ^ T1[B2(s3e)] ^ T2[B1(s3f)] ^ T3[B0(s3c)] ^ C[109+336];
+ tb = T0[B3(s3e)] ^ T1[B2(s3f)] ^ T2[B1(s3c)] ^ T3[B0(s3d)] ^ C[110+336];
+ tf = T0[B3(s3f)] ^ T1[B2(s3c)] ^ T2[B1(s3d)] ^ T3[B0(s3e)] ^ C[111+336];
+
+ s70 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )];
+ s74 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )];
+ s78 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )];
+ s7c = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )];
+ s71 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )];
+ s75 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )];
+ s79 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )];
+ s7d = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )];
+ s72 = T0[B3(t8 )] ^ T1[B2(t9 )] ^ T2[B1(ta )] ^ T3[B0(tb )];
+ s76 = T0[B3(t9 )] ^ T1[B2(ta )] ^ T2[B1(tb )] ^ T3[B0(t8 )];
+ s7a = T0[B3(ta )] ^ T1[B2(tb )] ^ T2[B1(t8 )] ^ T3[B0(t9 )];
+ s7e = T0[B3(tb )] ^ T1[B2(t8 )] ^ T2[B1(t9 )] ^ T3[B0(ta )];
+ s73 = T0[B3(tc )] ^ T1[B2(td )] ^ T2[B1(te )] ^ T3[B0(tf )];
+ s77 = T0[B3(td )] ^ T1[B2(te )] ^ T2[B1(tf )] ^ T3[B0(tc )];
+ s7b = T0[B3(te )] ^ T1[B2(tf )] ^ T2[B1(tc )] ^ T3[B0(td )];
+ s7f = T0[B3(tf )] ^ T1[B2(tc )] ^ T2[B1(td )] ^ T3[B0(te )];
+
+ /* Lane 4 */
+ t0 = T0[B3(s40)] ^ T1[B2(s41)] ^ T2[B1(s42)] ^ T3[B0(s43)] ^ C[ 0+448];
+ t4 = T0[B3(s41)] ^ T1[B2(s42)] ^ T2[B1(s43)] ^ T3[B0(s40)] ^ C[ 1+448];
+ t8 = T0[B3(s42)] ^ T1[B2(s43)] ^ T2[B1(s40)] ^ T3[B0(s41)] ^ C[ 2+448];
+ tc = T0[B3(s43)] ^ T1[B2(s40)] ^ T2[B1(s41)] ^ T3[B0(s42)] ^ C[ 3+448] ^ ctrh;
+ t1 = T0[B3(s44)] ^ T1[B2(s45)] ^ T2[B1(s46)] ^ T3[B0(s47)] ^ C[ 4+448];
+ t5 = T0[B3(s45)] ^ T1[B2(s46)] ^ T2[B1(s47)] ^ T3[B0(s44)] ^ C[ 5+448];
+ t9 = T0[B3(s46)] ^ T1[B2(s47)] ^ T2[B1(s44)] ^ T3[B0(s45)] ^ C[ 6+448];
+ td = T0[B3(s47)] ^ T1[B2(s44)] ^ T2[B1(s45)] ^ T3[B0(s46)] ^ C[ 7+448];
+ t2 = T0[B3(s48)] ^ T1[B2(s49)] ^ T2[B1(s4a)] ^ T3[B0(s4b)] ^ C[ 8+448];
+ t6 = T0[B3(s49)] ^ T1[B2(s4a)] ^ T2[B1(s4b)] ^ T3[B0(s48)] ^ C[ 9+448];
+ ta = T0[B3(s4a)] ^ T1[B2(s4b)] ^ T2[B1(s48)] ^ T3[B0(s49)] ^ C[ 10+448];
+ te = T0[B3(s4b)] ^ T1[B2(s48)] ^ T2[B1(s49)] ^ T3[B0(s4a)] ^ C[ 11+448];
+ t3 = T0[B3(s4c)] ^ T1[B2(s4d)] ^ T2[B1(s4e)] ^ T3[B0(s4f)] ^ C[ 12+448];
+ t7 = T0[B3(s4d)] ^ T1[B2(s4e)] ^ T2[B1(s4f)] ^ T3[B0(s4c)] ^ C[ 13+448];
+ tb = T0[B3(s4e)] ^ T1[B2(s4f)] ^ T2[B1(s4c)] ^ T3[B0(s4d)] ^ C[ 14+448];
+ tf = T0[B3(s4f)] ^ T1[B2(s4c)] ^ T2[B1(s4d)] ^ T3[B0(s4e)] ^ C[ 15+448];
+
+ s40 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[ 16+448];
+ s44 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[ 17+448];
+ s48 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[ 18+448];
+ s4c = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[ 19+448] ^ ctrl;
+ s41 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[ 20+448];
+ s45 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[ 21+448];
+ s49 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[ 22+448];
+ s4d = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[ 23+448];
+ s42 = T0[B3(t8 )] ^ T1[B2(t9 )] ^ T2[B1(ta )] ^ T3[B0(tb )] ^ C[ 24+448];
+ s46 = T0[B3(t9 )] ^ T1[B2(ta )] ^ T2[B1(tb )] ^ T3[B0(t8 )] ^ C[ 25+448];
+ s4a = T0[B3(ta )] ^ T1[B2(tb )] ^ T2[B1(t8 )] ^ T3[B0(t9 )] ^ C[ 26+448];
+ s4e = T0[B3(tb )] ^ T1[B2(t8 )] ^ T2[B1(t9 )] ^ T3[B0(ta )] ^ C[ 27+448];
+ s43 = T0[B3(tc )] ^ T1[B2(td )] ^ T2[B1(te )] ^ T3[B0(tf )] ^ C[ 28+448];
+ s47 = T0[B3(td )] ^ T1[B2(te )] ^ T2[B1(tf )] ^ T3[B0(tc )] ^ C[ 29+448];
+ s4b = T0[B3(te )] ^ T1[B2(tf )] ^ T2[B1(tc )] ^ T3[B0(td )] ^ C[ 30+448];
+ s4f = T0[B3(tf )] ^ T1[B2(tc )] ^ T2[B1(td )] ^ T3[B0(te )] ^ C[ 31+448];
+
+ t0 = T0[B3(s40)] ^ T1[B2(s41)] ^ T2[B1(s42)] ^ T3[B0(s43)] ^ C[ 32+448];
+ t4 = T0[B3(s41)] ^ T1[B2(s42)] ^ T2[B1(s43)] ^ T3[B0(s40)] ^ C[ 33+448];
+ t8 = T0[B3(s42)] ^ T1[B2(s43)] ^ T2[B1(s40)] ^ T3[B0(s41)] ^ C[ 34+448];
+ tc = T0[B3(s43)] ^ T1[B2(s40)] ^ T2[B1(s41)] ^ T3[B0(s42)] ^ C[ 35+448] ^ ctrh;
+ t1 = T0[B3(s44)] ^ T1[B2(s45)] ^ T2[B1(s46)] ^ T3[B0(s47)] ^ C[ 36+448];
+ t5 = T0[B3(s45)] ^ T1[B2(s46)] ^ T2[B1(s47)] ^ T3[B0(s44)] ^ C[ 37+448];
+ t9 = T0[B3(s46)] ^ T1[B2(s47)] ^ T2[B1(s44)] ^ T3[B0(s45)] ^ C[ 38+448];
+ td = T0[B3(s47)] ^ T1[B2(s44)] ^ T2[B1(s45)] ^ T3[B0(s46)] ^ C[ 39+448];
+ t2 = T0[B3(s48)] ^ T1[B2(s49)] ^ T2[B1(s4a)] ^ T3[B0(s4b)] ^ C[ 40+448];
+ t6 = T0[B3(s49)] ^ T1[B2(s4a)] ^ T2[B1(s4b)] ^ T3[B0(s48)] ^ C[ 41+448];
+ ta = T0[B3(s4a)] ^ T1[B2(s4b)] ^ T2[B1(s48)] ^ T3[B0(s49)] ^ C[ 42+448];
+ te = T0[B3(s4b)] ^ T1[B2(s48)] ^ T2[B1(s49)] ^ T3[B0(s4a)] ^ C[ 43+448];
+ t3 = T0[B3(s4c)] ^ T1[B2(s4d)] ^ T2[B1(s4e)] ^ T3[B0(s4f)] ^ C[ 44+448];
+ t7 = T0[B3(s4d)] ^ T1[B2(s4e)] ^ T2[B1(s4f)] ^ T3[B0(s4c)] ^ C[ 45+448];
+ tb = T0[B3(s4e)] ^ T1[B2(s4f)] ^ T2[B1(s4c)] ^ T3[B0(s4d)] ^ C[ 46+448];
+ tf = T0[B3(s4f)] ^ T1[B2(s4c)] ^ T2[B1(s4d)] ^ T3[B0(s4e)] ^ C[ 47+448];
+
+ s40 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[ 48+448];
+ s44 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[ 49+448];
+ s48 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[ 50+448];
+ s4c = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[ 51+448] ^ ctrl;
+ s41 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[ 52+448];
+ s45 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[ 53+448];
+ s49 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[ 54+448];
+ s4d = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[ 55+448];
+ s42 = T0[B3(t8 )] ^ T1[B2(t9 )] ^ T2[B1(ta )] ^ T3[B0(tb )] ^ C[ 56+448];
+ s46 = T0[B3(t9 )] ^ T1[B2(ta )] ^ T2[B1(tb )] ^ T3[B0(t8 )] ^ C[ 57+448];
+ s4a = T0[B3(ta )] ^ T1[B2(tb )] ^ T2[B1(t8 )] ^ T3[B0(t9 )] ^ C[ 58+448];
+ s4e = T0[B3(tb )] ^ T1[B2(t8 )] ^ T2[B1(t9 )] ^ T3[B0(ta )] ^ C[ 59+448];
+ s43 = T0[B3(tc )] ^ T1[B2(td )] ^ T2[B1(te )] ^ T3[B0(tf )] ^ C[ 60+448];
+ s47 = T0[B3(td )] ^ T1[B2(te )] ^ T2[B1(tf )] ^ T3[B0(tc )] ^ C[ 61+448];
+ s4b = T0[B3(te )] ^ T1[B2(tf )] ^ T2[B1(tc )] ^ T3[B0(td )] ^ C[ 62+448];
+ s4f = T0[B3(tf )] ^ T1[B2(tc )] ^ T2[B1(td )] ^ T3[B0(te )] ^ C[ 63+448];
+
+ t0 = T0[B3(s40)] ^ T1[B2(s41)] ^ T2[B1(s42)] ^ T3[B0(s43)] ^ C[ 64+448];
+ t4 = T0[B3(s41)] ^ T1[B2(s42)] ^ T2[B1(s43)] ^ T3[B0(s40)] ^ C[ 65+448];
+ t8 = T0[B3(s42)] ^ T1[B2(s43)] ^ T2[B1(s40)] ^ T3[B0(s41)] ^ C[ 66+448];
+ tc = T0[B3(s43)] ^ T1[B2(s40)] ^ T2[B1(s41)] ^ T3[B0(s42)] ^ C[ 67+448] ^ ctrh;
+ t1 = T0[B3(s44)] ^ T1[B2(s45)] ^ T2[B1(s46)] ^ T3[B0(s47)] ^ C[ 68+448];
+ t5 = T0[B3(s45)] ^ T1[B2(s46)] ^ T2[B1(s47)] ^ T3[B0(s44)] ^ C[ 69+448];
+ t9 = T0[B3(s46)] ^ T1[B2(s47)] ^ T2[B1(s44)] ^ T3[B0(s45)] ^ C[ 70+448];
+ td = T0[B3(s47)] ^ T1[B2(s44)] ^ T2[B1(s45)] ^ T3[B0(s46)] ^ C[ 71+448];
+ t2 = T0[B3(s48)] ^ T1[B2(s49)] ^ T2[B1(s4a)] ^ T3[B0(s4b)] ^ C[ 72+448];
+ t6 = T0[B3(s49)] ^ T1[B2(s4a)] ^ T2[B1(s4b)] ^ T3[B0(s48)] ^ C[ 73+448];
+ ta = T0[B3(s4a)] ^ T1[B2(s4b)] ^ T2[B1(s48)] ^ T3[B0(s49)] ^ C[ 74+448];
+ te = T0[B3(s4b)] ^ T1[B2(s48)] ^ T2[B1(s49)] ^ T3[B0(s4a)] ^ C[ 75+448];
+ t3 = T0[B3(s4c)] ^ T1[B2(s4d)] ^ T2[B1(s4e)] ^ T3[B0(s4f)] ^ C[ 76+448];
+ t7 = T0[B3(s4d)] ^ T1[B2(s4e)] ^ T2[B1(s4f)] ^ T3[B0(s4c)] ^ C[ 77+448];
+ tb = T0[B3(s4e)] ^ T1[B2(s4f)] ^ T2[B1(s4c)] ^ T3[B0(s4d)] ^ C[ 78+448];
+ tf = T0[B3(s4f)] ^ T1[B2(s4c)] ^ T2[B1(s4d)] ^ T3[B0(s4e)] ^ C[ 79+448];
+
+ s40 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[ 80+448];
+ s44 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[ 81+448];
+ s48 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[ 82+448];
+ s4c = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[ 83+448] ^ ctrl;
+ s41 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[ 84+448];
+ s45 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[ 85+448];
+ s49 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[ 86+448];
+ s4d = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[ 87+448];
+ s42 = T0[B3(t8 )] ^ T1[B2(t9 )] ^ T2[B1(ta )] ^ T3[B0(tb )] ^ C[ 88+448];
+ s46 = T0[B3(t9 )] ^ T1[B2(ta )] ^ T2[B1(tb )] ^ T3[B0(t8 )] ^ C[ 89+448];
+ s4a = T0[B3(ta )] ^ T1[B2(tb )] ^ T2[B1(t8 )] ^ T3[B0(t9 )] ^ C[ 90+448];
+ s4e = T0[B3(tb )] ^ T1[B2(t8 )] ^ T2[B1(t9 )] ^ T3[B0(ta )] ^ C[ 91+448];
+ s43 = T0[B3(tc )] ^ T1[B2(td )] ^ T2[B1(te )] ^ T3[B0(tf )] ^ C[ 92+448];
+ s47 = T0[B3(td )] ^ T1[B2(te )] ^ T2[B1(tf )] ^ T3[B0(tc )] ^ C[ 93+448];
+ s4b = T0[B3(te )] ^ T1[B2(tf )] ^ T2[B1(tc )] ^ T3[B0(td )] ^ C[ 94+448];
+ s4f = T0[B3(tf )] ^ T1[B2(tc )] ^ T2[B1(td )] ^ T3[B0(te )] ^ C[ 95+448];
+
+ t0 = T0[B3(s40)] ^ T1[B2(s41)] ^ T2[B1(s42)] ^ T3[B0(s43)] ^ C[ 96+448];
+ t4 = T0[B3(s41)] ^ T1[B2(s42)] ^ T2[B1(s43)] ^ T3[B0(s40)] ^ C[ 97+448];
+ t8 = T0[B3(s42)] ^ T1[B2(s43)] ^ T2[B1(s40)] ^ T3[B0(s41)] ^ C[ 98+448];
+ tc = T0[B3(s43)] ^ T1[B2(s40)] ^ T2[B1(s41)] ^ T3[B0(s42)] ^ C[ 99+448] ^ ctrh;
+ t1 = T0[B3(s44)] ^ T1[B2(s45)] ^ T2[B1(s46)] ^ T3[B0(s47)] ^ C[100+448];
+ t5 = T0[B3(s45)] ^ T1[B2(s46)] ^ T2[B1(s47)] ^ T3[B0(s44)] ^ C[101+448];
+ t9 = T0[B3(s46)] ^ T1[B2(s47)] ^ T2[B1(s44)] ^ T3[B0(s45)] ^ C[102+448];
+ td = T0[B3(s47)] ^ T1[B2(s44)] ^ T2[B1(s45)] ^ T3[B0(s46)] ^ C[103+448];
+ t2 = T0[B3(s48)] ^ T1[B2(s49)] ^ T2[B1(s4a)] ^ T3[B0(s4b)] ^ C[104+448];
+ t6 = T0[B3(s49)] ^ T1[B2(s4a)] ^ T2[B1(s4b)] ^ T3[B0(s48)] ^ C[105+448];
+ ta = T0[B3(s4a)] ^ T1[B2(s4b)] ^ T2[B1(s48)] ^ T3[B0(s49)] ^ C[106+448];
+ te = T0[B3(s4b)] ^ T1[B2(s48)] ^ T2[B1(s49)] ^ T3[B0(s4a)] ^ C[107+448];
+ t3 = T0[B3(s4c)] ^ T1[B2(s4d)] ^ T2[B1(s4e)] ^ T3[B0(s4f)] ^ C[108+448];
+ t7 = T0[B3(s4d)] ^ T1[B2(s4e)] ^ T2[B1(s4f)] ^ T3[B0(s4c)] ^ C[109+448];
+ tb = T0[B3(s4e)] ^ T1[B2(s4f)] ^ T2[B1(s4c)] ^ T3[B0(s4d)] ^ C[110+448];
+ tf = T0[B3(s4f)] ^ T1[B2(s4c)] ^ T2[B1(s4d)] ^ T3[B0(s4e)] ^ C[111+448];
+
+ s70 ^= T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )];
+ s74 ^= T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )];
+ s78 ^= T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )];
+ s7c ^= T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )];
+ s71 ^= T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )];
+ s75 ^= T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )];
+ s79 ^= T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )];
+ s7d ^= T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )];
+ s72 ^= T0[B3(t8 )] ^ T1[B2(t9 )] ^ T2[B1(ta )] ^ T3[B0(tb )];
+ s76 ^= T0[B3(t9 )] ^ T1[B2(ta )] ^ T2[B1(tb )] ^ T3[B0(t8 )];
+ s7a ^= T0[B3(ta )] ^ T1[B2(tb )] ^ T2[B1(t8 )] ^ T3[B0(t9 )];
+ s7e ^= T0[B3(tb )] ^ T1[B2(t8 )] ^ T2[B1(t9 )] ^ T3[B0(ta )];
+ s73 ^= T0[B3(tc )] ^ T1[B2(td )] ^ T2[B1(te )] ^ T3[B0(tf )];
+ s77 ^= T0[B3(td )] ^ T1[B2(te )] ^ T2[B1(tf )] ^ T3[B0(tc )];
+ s7b ^= T0[B3(te )] ^ T1[B2(tf )] ^ T2[B1(tc )] ^ T3[B0(td )];
+ s7f ^= T0[B3(tf )] ^ T1[B2(tc )] ^ T2[B1(td )] ^ T3[B0(te )];
+
+ /* Lane 5 */
+ t0 = T0[B3(s50)] ^ T1[B2(s51)] ^ T2[B1(s52)] ^ T3[B0(s53)] ^ C[ 0+560];
+ t4 = T0[B3(s51)] ^ T1[B2(s52)] ^ T2[B1(s53)] ^ T3[B0(s50)] ^ C[ 1+560];
+ t8 = T0[B3(s52)] ^ T1[B2(s53)] ^ T2[B1(s50)] ^ T3[B0(s51)] ^ C[ 2+560];
+ tc = T0[B3(s53)] ^ T1[B2(s50)] ^ T2[B1(s51)] ^ T3[B0(s52)] ^ C[ 3+560] ^ ctrl;
+ t1 = T0[B3(s54)] ^ T1[B2(s55)] ^ T2[B1(s56)] ^ T3[B0(s57)] ^ C[ 4+560];
+ t5 = T0[B3(s55)] ^ T1[B2(s56)] ^ T2[B1(s57)] ^ T3[B0(s54)] ^ C[ 5+560];
+ t9 = T0[B3(s56)] ^ T1[B2(s57)] ^ T2[B1(s54)] ^ T3[B0(s55)] ^ C[ 6+560];
+ td = T0[B3(s57)] ^ T1[B2(s54)] ^ T2[B1(s55)] ^ T3[B0(s56)] ^ C[ 7+560];
+ t2 = T0[B3(s58)] ^ T1[B2(s59)] ^ T2[B1(s5a)] ^ T3[B0(s5b)] ^ C[ 8+560];
+ t6 = T0[B3(s59)] ^ T1[B2(s5a)] ^ T2[B1(s5b)] ^ T3[B0(s58)] ^ C[ 9+560];
+ ta = T0[B3(s5a)] ^ T1[B2(s5b)] ^ T2[B1(s58)] ^ T3[B0(s59)] ^ C[ 10+560];
+ te = T0[B3(s5b)] ^ T1[B2(s58)] ^ T2[B1(s59)] ^ T3[B0(s5a)] ^ C[ 11+560];
+ t3 = T0[B3(s5c)] ^ T1[B2(s5d)] ^ T2[B1(s5e)] ^ T3[B0(s5f)] ^ C[ 12+560];
+ t7 = T0[B3(s5d)] ^ T1[B2(s5e)] ^ T2[B1(s5f)] ^ T3[B0(s5c)] ^ C[ 13+560];
+ tb = T0[B3(s5e)] ^ T1[B2(s5f)] ^ T2[B1(s5c)] ^ T3[B0(s5d)] ^ C[ 14+560];
+ tf = T0[B3(s5f)] ^ T1[B2(s5c)] ^ T2[B1(s5d)] ^ T3[B0(s5e)] ^ C[ 15+560];
+
+ s50 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[ 16+560];
+ s54 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[ 17+560];
+ s58 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[ 18+560];
+ s5c = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[ 19+560] ^ ctrh;
+ s51 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[ 20+560];
+ s55 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[ 21+560];
+ s59 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[ 22+560];
+ s5d = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[ 23+560];
+ s52 = T0[B3(t8 )] ^ T1[B2(t9 )] ^ T2[B1(ta )] ^ T3[B0(tb )] ^ C[ 24+560];
+ s56 = T0[B3(t9 )] ^ T1[B2(ta )] ^ T2[B1(tb )] ^ T3[B0(t8 )] ^ C[ 25+560];
+ s5a = T0[B3(ta )] ^ T1[B2(tb )] ^ T2[B1(t8 )] ^ T3[B0(t9 )] ^ C[ 26+560];
+ s5e = T0[B3(tb )] ^ T1[B2(t8 )] ^ T2[B1(t9 )] ^ T3[B0(ta )] ^ C[ 27+560];
+ s53 = T0[B3(tc )] ^ T1[B2(td )] ^ T2[B1(te )] ^ T3[B0(tf )] ^ C[ 28+560];
+ s57 = T0[B3(td )] ^ T1[B2(te )] ^ T2[B1(tf )] ^ T3[B0(tc )] ^ C[ 29+560];
+ s5b = T0[B3(te )] ^ T1[B2(tf )] ^ T2[B1(tc )] ^ T3[B0(td )] ^ C[ 30+560];
+ s5f = T0[B3(tf )] ^ T1[B2(tc )] ^ T2[B1(td )] ^ T3[B0(te )] ^ C[ 31+560];
+
+ t0 = T0[B3(s50)] ^ T1[B2(s51)] ^ T2[B1(s52)] ^ T3[B0(s53)] ^ C[ 32+560];
+ t4 = T0[B3(s51)] ^ T1[B2(s52)] ^ T2[B1(s53)] ^ T3[B0(s50)] ^ C[ 33+560];
+ t8 = T0[B3(s52)] ^ T1[B2(s53)] ^ T2[B1(s50)] ^ T3[B0(s51)] ^ C[ 34+560];
+ tc = T0[B3(s53)] ^ T1[B2(s50)] ^ T2[B1(s51)] ^ T3[B0(s52)] ^ C[ 35+560] ^ ctrl;
+ t1 = T0[B3(s54)] ^ T1[B2(s55)] ^ T2[B1(s56)] ^ T3[B0(s57)] ^ C[ 36+560];
+ t5 = T0[B3(s55)] ^ T1[B2(s56)] ^ T2[B1(s57)] ^ T3[B0(s54)] ^ C[ 37+560];
+ t9 = T0[B3(s56)] ^ T1[B2(s57)] ^ T2[B1(s54)] ^ T3[B0(s55)] ^ C[ 38+560];
+ td = T0[B3(s57)] ^ T1[B2(s54)] ^ T2[B1(s55)] ^ T3[B0(s56)] ^ C[ 39+560];
+ t2 = T0[B3(s58)] ^ T1[B2(s59)] ^ T2[B1(s5a)] ^ T3[B0(s5b)] ^ C[ 40+560];
+ t6 = T0[B3(s59)] ^ T1[B2(s5a)] ^ T2[B1(s5b)] ^ T3[B0(s58)] ^ C[ 41+560];
+ ta = T0[B3(s5a)] ^ T1[B2(s5b)] ^ T2[B1(s58)] ^ T3[B0(s59)] ^ C[ 42+560];
+ te = T0[B3(s5b)] ^ T1[B2(s58)] ^ T2[B1(s59)] ^ T3[B0(s5a)] ^ C[ 43+560];
+ t3 = T0[B3(s5c)] ^ T1[B2(s5d)] ^ T2[B1(s5e)] ^ T3[B0(s5f)] ^ C[ 44+560];
+ t7 = T0[B3(s5d)] ^ T1[B2(s5e)] ^ T2[B1(s5f)] ^ T3[B0(s5c)] ^ C[ 45+560];
+ tb = T0[B3(s5e)] ^ T1[B2(s5f)] ^ T2[B1(s5c)] ^ T3[B0(s5d)] ^ C[ 46+560];
+ tf = T0[B3(s5f)] ^ T1[B2(s5c)] ^ T2[B1(s5d)] ^ T3[B0(s5e)] ^ C[ 47+560];
+
+ s50 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[ 48+560];
+ s54 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[ 49+560];
+ s58 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[ 50+560];
+ s5c = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[ 51+560] ^ ctrh;
+ s51 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[ 52+560];
+ s55 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[ 53+560];
+ s59 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[ 54+560];
+ s5d = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[ 55+560];
+ s52 = T0[B3(t8 )] ^ T1[B2(t9 )] ^ T2[B1(ta )] ^ T3[B0(tb )] ^ C[ 56+560];
+ s56 = T0[B3(t9 )] ^ T1[B2(ta )] ^ T2[B1(tb )] ^ T3[B0(t8 )] ^ C[ 57+560];
+ s5a = T0[B3(ta )] ^ T1[B2(tb )] ^ T2[B1(t8 )] ^ T3[B0(t9 )] ^ C[ 58+560];
+ s5e = T0[B3(tb )] ^ T1[B2(t8 )] ^ T2[B1(t9 )] ^ T3[B0(ta )] ^ C[ 59+560];
+ s53 = T0[B3(tc )] ^ T1[B2(td )] ^ T2[B1(te )] ^ T3[B0(tf )] ^ C[ 60+560];
+ s57 = T0[B3(td )] ^ T1[B2(te )] ^ T2[B1(tf )] ^ T3[B0(tc )] ^ C[ 61+560];
+ s5b = T0[B3(te )] ^ T1[B2(tf )] ^ T2[B1(tc )] ^ T3[B0(td )] ^ C[ 62+560];
+ s5f = T0[B3(tf )] ^ T1[B2(tc )] ^ T2[B1(td )] ^ T3[B0(te )] ^ C[ 63+560];
+
+ t0 = T0[B3(s50)] ^ T1[B2(s51)] ^ T2[B1(s52)] ^ T3[B0(s53)] ^ C[ 64+560];
+ t4 = T0[B3(s51)] ^ T1[B2(s52)] ^ T2[B1(s53)] ^ T3[B0(s50)] ^ C[ 65+560];
+ t8 = T0[B3(s52)] ^ T1[B2(s53)] ^ T2[B1(s50)] ^ T3[B0(s51)] ^ C[ 66+560];
+ tc = T0[B3(s53)] ^ T1[B2(s50)] ^ T2[B1(s51)] ^ T3[B0(s52)] ^ C[ 67+560] ^ ctrl;
+ t1 = T0[B3(s54)] ^ T1[B2(s55)] ^ T2[B1(s56)] ^ T3[B0(s57)] ^ C[ 68+560];
+ t5 = T0[B3(s55)] ^ T1[B2(s56)] ^ T2[B1(s57)] ^ T3[B0(s54)] ^ C[ 69+560];
+ t9 = T0[B3(s56)] ^ T1[B2(s57)] ^ T2[B1(s54)] ^ T3[B0(s55)] ^ C[ 70+560];
+ td = T0[B3(s57)] ^ T1[B2(s54)] ^ T2[B1(s55)] ^ T3[B0(s56)] ^ C[ 71+560];
+ t2 = T0[B3(s58)] ^ T1[B2(s59)] ^ T2[B1(s5a)] ^ T3[B0(s5b)] ^ C[ 72+560];
+ t6 = T0[B3(s59)] ^ T1[B2(s5a)] ^ T2[B1(s5b)] ^ T3[B0(s58)] ^ C[ 73+560];
+ ta = T0[B3(s5a)] ^ T1[B2(s5b)] ^ T2[B1(s58)] ^ T3[B0(s59)] ^ C[ 74+560];
+ te = T0[B3(s5b)] ^ T1[B2(s58)] ^ T2[B1(s59)] ^ T3[B0(s5a)] ^ C[ 75+560];
+ t3 = T0[B3(s5c)] ^ T1[B2(s5d)] ^ T2[B1(s5e)] ^ T3[B0(s5f)] ^ C[ 76+560];
+ t7 = T0[B3(s5d)] ^ T1[B2(s5e)] ^ T2[B1(s5f)] ^ T3[B0(s5c)] ^ C[ 77+560];
+ tb = T0[B3(s5e)] ^ T1[B2(s5f)] ^ T2[B1(s5c)] ^ T3[B0(s5d)] ^ C[ 78+560];
+ tf = T0[B3(s5f)] ^ T1[B2(s5c)] ^ T2[B1(s5d)] ^ T3[B0(s5e)] ^ C[ 79+560];
+
+ s50 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[ 80+560];
+ s54 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[ 81+560];
+ s58 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[ 82+560];
+ s5c = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[ 83+560] ^ ctrh;
+ s51 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[ 84+560];
+ s55 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[ 85+560];
+ s59 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[ 86+560];
+ s5d = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[ 87+560];
+ s52 = T0[B3(t8 )] ^ T1[B2(t9 )] ^ T2[B1(ta )] ^ T3[B0(tb )] ^ C[ 88+560];
+ s56 = T0[B3(t9 )] ^ T1[B2(ta )] ^ T2[B1(tb )] ^ T3[B0(t8 )] ^ C[ 89+560];
+ s5a = T0[B3(ta )] ^ T1[B2(tb )] ^ T2[B1(t8 )] ^ T3[B0(t9 )] ^ C[ 90+560];
+ s5e = T0[B3(tb )] ^ T1[B2(t8 )] ^ T2[B1(t9 )] ^ T3[B0(ta )] ^ C[ 91+560];
+ s53 = T0[B3(tc )] ^ T1[B2(td )] ^ T2[B1(te )] ^ T3[B0(tf )] ^ C[ 92+560];
+ s57 = T0[B3(td )] ^ T1[B2(te )] ^ T2[B1(tf )] ^ T3[B0(tc )] ^ C[ 93+560];
+ s5b = T0[B3(te )] ^ T1[B2(tf )] ^ T2[B1(tc )] ^ T3[B0(td )] ^ C[ 94+560];
+ s5f = T0[B3(tf )] ^ T1[B2(tc )] ^ T2[B1(td )] ^ T3[B0(te )] ^ C[ 95+560];
+
+ t0 = T0[B3(s50)] ^ T1[B2(s51)] ^ T2[B1(s52)] ^ T3[B0(s53)] ^ C[ 96+560];
+ t4 = T0[B3(s51)] ^ T1[B2(s52)] ^ T2[B1(s53)] ^ T3[B0(s50)] ^ C[ 97+560];
+ t8 = T0[B3(s52)] ^ T1[B2(s53)] ^ T2[B1(s50)] ^ T3[B0(s51)] ^ C[ 98+560];
+ tc = T0[B3(s53)] ^ T1[B2(s50)] ^ T2[B1(s51)] ^ T3[B0(s52)] ^ C[ 99+560] ^ ctrl;
+ t1 = T0[B3(s54)] ^ T1[B2(s55)] ^ T2[B1(s56)] ^ T3[B0(s57)] ^ C[100+560];
+ t5 = T0[B3(s55)] ^ T1[B2(s56)] ^ T2[B1(s57)] ^ T3[B0(s54)] ^ C[101+560];
+ t9 = T0[B3(s56)] ^ T1[B2(s57)] ^ T2[B1(s54)] ^ T3[B0(s55)] ^ C[102+560];
+ td = T0[B3(s57)] ^ T1[B2(s54)] ^ T2[B1(s55)] ^ T3[B0(s56)] ^ C[103+560];
+ t2 = T0[B3(s58)] ^ T1[B2(s59)] ^ T2[B1(s5a)] ^ T3[B0(s5b)] ^ C[104+560];
+ t6 = T0[B3(s59)] ^ T1[B2(s5a)] ^ T2[B1(s5b)] ^ T3[B0(s58)] ^ C[105+560];
+ ta = T0[B3(s5a)] ^ T1[B2(s5b)] ^ T2[B1(s58)] ^ T3[B0(s59)] ^ C[106+560];
+ te = T0[B3(s5b)] ^ T1[B2(s58)] ^ T2[B1(s59)] ^ T3[B0(s5a)] ^ C[107+560];
+ t3 = T0[B3(s5c)] ^ T1[B2(s5d)] ^ T2[B1(s5e)] ^ T3[B0(s5f)] ^ C[108+560];
+ t7 = T0[B3(s5d)] ^ T1[B2(s5e)] ^ T2[B1(s5f)] ^ T3[B0(s5c)] ^ C[109+560];
+ tb = T0[B3(s5e)] ^ T1[B2(s5f)] ^ T2[B1(s5c)] ^ T3[B0(s5d)] ^ C[110+560];
+ tf = T0[B3(s5f)] ^ T1[B2(s5c)] ^ T2[B1(s5d)] ^ T3[B0(s5e)] ^ C[111+560];
+
+ s70 ^= T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )];
+ s74 ^= T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )];
+ s78 ^= T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )];
+ s7c ^= T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )];
+ s71 ^= T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )];
+ s75 ^= T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )];
+ s79 ^= T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )];
+ s7d ^= T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )];
+ s72 ^= T0[B3(t8 )] ^ T1[B2(t9 )] ^ T2[B1(ta )] ^ T3[B0(tb )];
+ s76 ^= T0[B3(t9 )] ^ T1[B2(ta )] ^ T2[B1(tb )] ^ T3[B0(t8 )];
+ s7a ^= T0[B3(ta )] ^ T1[B2(tb )] ^ T2[B1(t8 )] ^ T3[B0(t9 )];
+ s7e ^= T0[B3(tb )] ^ T1[B2(t8 )] ^ T2[B1(t9 )] ^ T3[B0(ta )];
+ s73 ^= T0[B3(tc )] ^ T1[B2(td )] ^ T2[B1(te )] ^ T3[B0(tf )];
+ s77 ^= T0[B3(td )] ^ T1[B2(te )] ^ T2[B1(tf )] ^ T3[B0(tc )];
+ s7b ^= T0[B3(te )] ^ T1[B2(tf )] ^ T2[B1(tc )] ^ T3[B0(td )];
+ s7f ^= T0[B3(tf )] ^ T1[B2(tc )] ^ T2[B1(td )] ^ T3[B0(te )];
+
+ /* Lane 6 */
+ t0 = T0[B3(s60)] ^ T1[B2(s61)] ^ T2[B1(s62)] ^ T3[B0(s63)] ^ C[ 0+672];
+ t4 = T0[B3(s61)] ^ T1[B2(s62)] ^ T2[B1(s63)] ^ T3[B0(s60)] ^ C[ 1+672];
+ t8 = T0[B3(s62)] ^ T1[B2(s63)] ^ T2[B1(s60)] ^ T3[B0(s61)] ^ C[ 2+672];
+ tc = T0[B3(s63)] ^ T1[B2(s60)] ^ T2[B1(s61)] ^ T3[B0(s62)] ^ C[ 3+672] ^ ctrh;
+ t1 = T0[B3(s64)] ^ T1[B2(s65)] ^ T2[B1(s66)] ^ T3[B0(s67)] ^ C[ 4+672];
+ t5 = T0[B3(s65)] ^ T1[B2(s66)] ^ T2[B1(s67)] ^ T3[B0(s64)] ^ C[ 5+672];
+ t9 = T0[B3(s66)] ^ T1[B2(s67)] ^ T2[B1(s64)] ^ T3[B0(s65)] ^ C[ 6+672];
+ td = T0[B3(s67)] ^ T1[B2(s64)] ^ T2[B1(s65)] ^ T3[B0(s66)] ^ C[ 7+672];
+ t2 = T0[B3(s68)] ^ T1[B2(s69)] ^ T2[B1(s6a)] ^ T3[B0(s6b)] ^ C[ 8+672];
+ t6 = T0[B3(s69)] ^ T1[B2(s6a)] ^ T2[B1(s6b)] ^ T3[B0(s68)] ^ C[ 9+672];
+ ta = T0[B3(s6a)] ^ T1[B2(s6b)] ^ T2[B1(s68)] ^ T3[B0(s69)] ^ C[ 10+672];
+ te = T0[B3(s6b)] ^ T1[B2(s68)] ^ T2[B1(s69)] ^ T3[B0(s6a)] ^ C[ 11+672];
+ t3 = T0[B3(s6c)] ^ T1[B2(s6d)] ^ T2[B1(s6e)] ^ T3[B0(s6f)] ^ C[ 12+672];
+ t7 = T0[B3(s6d)] ^ T1[B2(s6e)] ^ T2[B1(s6f)] ^ T3[B0(s6c)] ^ C[ 13+672];
+ tb = T0[B3(s6e)] ^ T1[B2(s6f)] ^ T2[B1(s6c)] ^ T3[B0(s6d)] ^ C[ 14+672];
+ tf = T0[B3(s6f)] ^ T1[B2(s6c)] ^ T2[B1(s6d)] ^ T3[B0(s6e)] ^ C[ 15+672];
+
+ s60 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[ 16+672];
+ s64 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[ 17+672];
+ s68 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[ 18+672];
+ s6c = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[ 19+672] ^ ctrl;
+ s61 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[ 20+672];
+ s65 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[ 21+672];
+ s69 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[ 22+672];
+ s6d = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[ 23+672];
+ s62 = T0[B3(t8 )] ^ T1[B2(t9 )] ^ T2[B1(ta )] ^ T3[B0(tb )] ^ C[ 24+672];
+ s66 = T0[B3(t9 )] ^ T1[B2(ta )] ^ T2[B1(tb )] ^ T3[B0(t8 )] ^ C[ 25+672];
+ s6a = T0[B3(ta )] ^ T1[B2(tb )] ^ T2[B1(t8 )] ^ T3[B0(t9 )] ^ C[ 26+672];
+ s6e = T0[B3(tb )] ^ T1[B2(t8 )] ^ T2[B1(t9 )] ^ T3[B0(ta )] ^ C[ 27+672];
+ s63 = T0[B3(tc )] ^ T1[B2(td )] ^ T2[B1(te )] ^ T3[B0(tf )] ^ C[ 28+672];
+ s67 = T0[B3(td )] ^ T1[B2(te )] ^ T2[B1(tf )] ^ T3[B0(tc )] ^ C[ 29+672];
+ s6b = T0[B3(te )] ^ T1[B2(tf )] ^ T2[B1(tc )] ^ T3[B0(td )] ^ C[ 30+672];
+ s6f = T0[B3(tf )] ^ T1[B2(tc )] ^ T2[B1(td )] ^ T3[B0(te )] ^ C[ 31+672];
+
+ t0 = T0[B3(s60)] ^ T1[B2(s61)] ^ T2[B1(s62)] ^ T3[B0(s63)] ^ C[ 32+672];
+ t4 = T0[B3(s61)] ^ T1[B2(s62)] ^ T2[B1(s63)] ^ T3[B0(s60)] ^ C[ 33+672];
+ t8 = T0[B3(s62)] ^ T1[B2(s63)] ^ T2[B1(s60)] ^ T3[B0(s61)] ^ C[ 34+672];
+ tc = T0[B3(s63)] ^ T1[B2(s60)] ^ T2[B1(s61)] ^ T3[B0(s62)] ^ C[ 35+672] ^ ctrh;
+ t1 = T0[B3(s64)] ^ T1[B2(s65)] ^ T2[B1(s66)] ^ T3[B0(s67)] ^ C[ 36+672];
+ t5 = T0[B3(s65)] ^ T1[B2(s66)] ^ T2[B1(s67)] ^ T3[B0(s64)] ^ C[ 37+672];
+ t9 = T0[B3(s66)] ^ T1[B2(s67)] ^ T2[B1(s64)] ^ T3[B0(s65)] ^ C[ 38+672];
+ td = T0[B3(s67)] ^ T1[B2(s64)] ^ T2[B1(s65)] ^ T3[B0(s66)] ^ C[ 39+672];
+ t2 = T0[B3(s68)] ^ T1[B2(s69)] ^ T2[B1(s6a)] ^ T3[B0(s6b)] ^ C[ 40+672];
+ t6 = T0[B3(s69)] ^ T1[B2(s6a)] ^ T2[B1(s6b)] ^ T3[B0(s68)] ^ C[ 41+672];
+ ta = T0[B3(s6a)] ^ T1[B2(s6b)] ^ T2[B1(s68)] ^ T3[B0(s69)] ^ C[ 42+672];
+ te = T0[B3(s6b)] ^ T1[B2(s68)] ^ T2[B1(s69)] ^ T3[B0(s6a)] ^ C[ 43+672];
+ t3 = T0[B3(s6c)] ^ T1[B2(s6d)] ^ T2[B1(s6e)] ^ T3[B0(s6f)] ^ C[ 44+672];
+ t7 = T0[B3(s6d)] ^ T1[B2(s6e)] ^ T2[B1(s6f)] ^ T3[B0(s6c)] ^ C[ 45+672];
+ tb = T0[B3(s6e)] ^ T1[B2(s6f)] ^ T2[B1(s6c)] ^ T3[B0(s6d)] ^ C[ 46+672];
+ tf = T0[B3(s6f)] ^ T1[B2(s6c)] ^ T2[B1(s6d)] ^ T3[B0(s6e)] ^ C[ 47+672];
+
+ h[ 0] = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )];
+ h[ 4] = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )];
+ h[ 8] = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )];
+ h[12] = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )];
+ h[ 1] = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )];
+ h[ 5] = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )];
+ h[ 9] = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )];
+ h[13] = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )];
+ h[ 2] = T0[B3(t8 )] ^ T1[B2(t9 )] ^ T2[B1(ta )] ^ T3[B0(tb )];
+ h[ 6] = T0[B3(t9 )] ^ T1[B2(ta )] ^ T2[B1(tb )] ^ T3[B0(t8 )];
+ h[10] = T0[B3(ta )] ^ T1[B2(tb )] ^ T2[B1(t8 )] ^ T3[B0(t9 )];
+ h[14] = T0[B3(tb )] ^ T1[B2(t8 )] ^ T2[B1(t9 )] ^ T3[B0(ta )];
+ h[ 3] = T0[B3(tc )] ^ T1[B2(td )] ^ T2[B1(te )] ^ T3[B0(tf )];
+ h[ 7] = T0[B3(td )] ^ T1[B2(te )] ^ T2[B1(tf )] ^ T3[B0(tc )];
+ h[11] = T0[B3(te )] ^ T1[B2(tf )] ^ T2[B1(tc )] ^ T3[B0(td )];
+ h[15] = T0[B3(tf )] ^ T1[B2(tc )] ^ T2[B1(td )] ^ T3[B0(te )];
+
+ /* Lane 7 */
+ t0 = T0[B3(s70)] ^ T1[B2(s71)] ^ T2[B1(s72)] ^ T3[B0(s73)] ^ C[ 0+720];
+ t4 = T0[B3(s71)] ^ T1[B2(s72)] ^ T2[B1(s73)] ^ T3[B0(s70)] ^ C[ 1+720];
+ t8 = T0[B3(s72)] ^ T1[B2(s73)] ^ T2[B1(s70)] ^ T3[B0(s71)] ^ C[ 2+720];
+ tc = T0[B3(s73)] ^ T1[B2(s70)] ^ T2[B1(s71)] ^ T3[B0(s72)] ^ C[ 3+720] ^ ctrl;
+ t1 = T0[B3(s74)] ^ T1[B2(s75)] ^ T2[B1(s76)] ^ T3[B0(s77)] ^ C[ 4+720];
+ t5 = T0[B3(s75)] ^ T1[B2(s76)] ^ T2[B1(s77)] ^ T3[B0(s74)] ^ C[ 5+720];
+ t9 = T0[B3(s76)] ^ T1[B2(s77)] ^ T2[B1(s74)] ^ T3[B0(s75)] ^ C[ 6+720];
+ td = T0[B3(s77)] ^ T1[B2(s74)] ^ T2[B1(s75)] ^ T3[B0(s76)] ^ C[ 7+720];
+ t2 = T0[B3(s78)] ^ T1[B2(s79)] ^ T2[B1(s7a)] ^ T3[B0(s7b)] ^ C[ 8+720];
+ t6 = T0[B3(s79)] ^ T1[B2(s7a)] ^ T2[B1(s7b)] ^ T3[B0(s78)] ^ C[ 9+720];
+ ta = T0[B3(s7a)] ^ T1[B2(s7b)] ^ T2[B1(s78)] ^ T3[B0(s79)] ^ C[ 10+720];
+ te = T0[B3(s7b)] ^ T1[B2(s78)] ^ T2[B1(s79)] ^ T3[B0(s7a)] ^ C[ 11+720];
+ t3 = T0[B3(s7c)] ^ T1[B2(s7d)] ^ T2[B1(s7e)] ^ T3[B0(s7f)] ^ C[ 12+720];
+ t7 = T0[B3(s7d)] ^ T1[B2(s7e)] ^ T2[B1(s7f)] ^ T3[B0(s7c)] ^ C[ 13+720];
+ tb = T0[B3(s7e)] ^ T1[B2(s7f)] ^ T2[B1(s7c)] ^ T3[B0(s7d)] ^ C[ 14+720];
+ tf = T0[B3(s7f)] ^ T1[B2(s7c)] ^ T2[B1(s7d)] ^ T3[B0(s7e)] ^ C[ 15+720];
+
+ s70 = T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )] ^ C[ 16+720];
+ s74 = T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )] ^ C[ 17+720];
+ s78 = T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )] ^ C[ 18+720];
+ s7c = T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )] ^ C[ 19+720] ^ ctrh;
+ s71 = T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )] ^ C[ 20+720];
+ s75 = T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )] ^ C[ 21+720];
+ s79 = T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )] ^ C[ 22+720];
+ s7d = T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )] ^ C[ 23+720];
+ s72 = T0[B3(t8 )] ^ T1[B2(t9 )] ^ T2[B1(ta )] ^ T3[B0(tb )] ^ C[ 24+720];
+ s76 = T0[B3(t9 )] ^ T1[B2(ta )] ^ T2[B1(tb )] ^ T3[B0(t8 )] ^ C[ 25+720];
+ s7a = T0[B3(ta )] ^ T1[B2(tb )] ^ T2[B1(t8 )] ^ T3[B0(t9 )] ^ C[ 26+720];
+ s7e = T0[B3(tb )] ^ T1[B2(t8 )] ^ T2[B1(t9 )] ^ T3[B0(ta )] ^ C[ 27+720];
+ s73 = T0[B3(tc )] ^ T1[B2(td )] ^ T2[B1(te )] ^ T3[B0(tf )] ^ C[ 28+720];
+ s77 = T0[B3(td )] ^ T1[B2(te )] ^ T2[B1(tf )] ^ T3[B0(tc )] ^ C[ 29+720];
+ s7b = T0[B3(te )] ^ T1[B2(tf )] ^ T2[B1(tc )] ^ T3[B0(td )] ^ C[ 30+720];
+ s7f = T0[B3(tf )] ^ T1[B2(tc )] ^ T2[B1(td )] ^ T3[B0(te )] ^ C[ 31+720];
+
+ t0 = T0[B3(s70)] ^ T1[B2(s71)] ^ T2[B1(s72)] ^ T3[B0(s73)] ^ C[ 32+720];
+ t4 = T0[B3(s71)] ^ T1[B2(s72)] ^ T2[B1(s73)] ^ T3[B0(s70)] ^ C[ 33+720];
+ t8 = T0[B3(s72)] ^ T1[B2(s73)] ^ T2[B1(s70)] ^ T3[B0(s71)] ^ C[ 34+720];
+ tc = T0[B3(s73)] ^ T1[B2(s70)] ^ T2[B1(s71)] ^ T3[B0(s72)] ^ C[ 35+720] ^ ctrl;
+ t1 = T0[B3(s74)] ^ T1[B2(s75)] ^ T2[B1(s76)] ^ T3[B0(s77)] ^ C[ 36+720];
+ t5 = T0[B3(s75)] ^ T1[B2(s76)] ^ T2[B1(s77)] ^ T3[B0(s74)] ^ C[ 37+720];
+ t9 = T0[B3(s76)] ^ T1[B2(s77)] ^ T2[B1(s74)] ^ T3[B0(s75)] ^ C[ 38+720];
+ td = T0[B3(s77)] ^ T1[B2(s74)] ^ T2[B1(s75)] ^ T3[B0(s76)] ^ C[ 39+720];
+ t2 = T0[B3(s78)] ^ T1[B2(s79)] ^ T2[B1(s7a)] ^ T3[B0(s7b)] ^ C[ 40+720];
+ t6 = T0[B3(s79)] ^ T1[B2(s7a)] ^ T2[B1(s7b)] ^ T3[B0(s78)] ^ C[ 41+720];
+ ta = T0[B3(s7a)] ^ T1[B2(s7b)] ^ T2[B1(s78)] ^ T3[B0(s79)] ^ C[ 42+720];
+ te = T0[B3(s7b)] ^ T1[B2(s78)] ^ T2[B1(s79)] ^ T3[B0(s7a)] ^ C[ 43+720];
+ t3 = T0[B3(s7c)] ^ T1[B2(s7d)] ^ T2[B1(s7e)] ^ T3[B0(s7f)] ^ C[ 44+720];
+ t7 = T0[B3(s7d)] ^ T1[B2(s7e)] ^ T2[B1(s7f)] ^ T3[B0(s7c)] ^ C[ 45+720];
+ tb = T0[B3(s7e)] ^ T1[B2(s7f)] ^ T2[B1(s7c)] ^ T3[B0(s7d)] ^ C[ 46+720];
+ tf = T0[B3(s7f)] ^ T1[B2(s7c)] ^ T2[B1(s7d)] ^ T3[B0(s7e)] ^ C[ 47+720];
+
+ h[ 0] ^= T0[B3(t0 )] ^ T1[B2(t1 )] ^ T2[B1(t2 )] ^ T3[B0(t3 )];
+ h[ 4] ^= T0[B3(t1 )] ^ T1[B2(t2 )] ^ T2[B1(t3 )] ^ T3[B0(t0 )];
+ h[ 8] ^= T0[B3(t2 )] ^ T1[B2(t3 )] ^ T2[B1(t0 )] ^ T3[B0(t1 )];
+ h[12] ^= T0[B3(t3 )] ^ T1[B2(t0 )] ^ T2[B1(t1 )] ^ T3[B0(t2 )];
+ h[ 1] ^= T0[B3(t4 )] ^ T1[B2(t5 )] ^ T2[B1(t6 )] ^ T3[B0(t7 )];
+ h[ 5] ^= T0[B3(t5 )] ^ T1[B2(t6 )] ^ T2[B1(t7 )] ^ T3[B0(t4 )];
+ h[ 9] ^= T0[B3(t6 )] ^ T1[B2(t7 )] ^ T2[B1(t4 )] ^ T3[B0(t5 )];
+ h[13] ^= T0[B3(t7 )] ^ T1[B2(t4 )] ^ T2[B1(t5 )] ^ T3[B0(t6 )];
+ h[ 2] ^= T0[B3(t8 )] ^ T1[B2(t9 )] ^ T2[B1(ta )] ^ T3[B0(tb )];
+ h[ 6] ^= T0[B3(t9 )] ^ T1[B2(ta )] ^ T2[B1(tb )] ^ T3[B0(t8 )];
+ h[10] ^= T0[B3(ta )] ^ T1[B2(tb )] ^ T2[B1(t8 )] ^ T3[B0(t9 )];
+ h[14] ^= T0[B3(tb )] ^ T1[B2(t8 )] ^ T2[B1(t9 )] ^ T3[B0(ta )];
+ h[ 3] ^= T0[B3(tc )] ^ T1[B2(td )] ^ T2[B1(te )] ^ T3[B0(tf )];
+ h[ 7] ^= T0[B3(td )] ^ T1[B2(te )] ^ T2[B1(tf )] ^ T3[B0(tc )];
+ h[11] ^= T0[B3(te )] ^ T1[B2(tf )] ^ T2[B1(tc )] ^ T3[B0(td )];
+ h[15] ^= T0[B3(tf )] ^ T1[B2(tc )] ^ T2[B1(td )] ^ T3[B0(te )];
+}
+
+HashReturn laneInit (hashState *state, int hashbitlen)
+{
+ if (hashbitlen != 224 && hashbitlen != 256 && hashbitlen != 384 && hashbitlen != 512)
+ return BAD_HASHBITLEN;
+
+ state->hashbitlen = hashbitlen;
+ state->ctr = 0;
+
+ switch (state->hashbitlen) {
+ case 224:
+ memcpy(state->h, iv224, 8*sizeof(u32));
+ break;
+ case 256: default:
+ memcpy(state->h, iv256, 8*sizeof(u32));
+ break;
+ case 384:
+ memcpy(state->h, iv384, 16*sizeof(u32));
+ break;
+ case 512:
+ memcpy(state->h, iv512, 16*sizeof(u32));
+ break;
+ }
+
+ return SUCCESS;
+}
+
+HashReturn laneUpdate (hashState *state, const BitSequence *data, DataLength databitlen)
+{
+ u64 buffill;
+ u64 bytes;
+
+ switch (state->hashbitlen) {
+ case 224: case 256: default:
+ buffill = (state->ctr >> 3) & 0x3f;
+ bytes = databitlen >> 3;
+
+ if (state->ctr & 0x7)
+ return BAD_DATABITLEN; /* Only the last call to Update() may contain a fractional byte */
+
+ /* Check if we have some stuff left in the buffer. If so, fill it, and process it */
+ if (buffill) {
+ const u64 n = buffill + bytes > 64 ? 64-buffill : bytes; /* number of bytes to copy */
+ memcpy(state->buffer + buffill, data, n);
+ state->ctr += n << 3;
+ if (buffill + n == 64) /* full buffer now */
+ lane256_compress(state->buffer, state->h, MSB32(state->ctr), LSB32(state->ctr));
+ data += n;
+ bytes -= n;
+ }
+
+ /* Now process as many full blocks as we can directly from the input message */
+ while (bytes >= 64) {
+ state->ctr += 64 << 3;
+ lane256_compress(data, state->h, MSB32(state->ctr), LSB32(state->ctr));
+ data += 64;
+ bytes -= 64;
+ }
+ break;
+
+ case 384: case 512:
+ buffill = (state->ctr >> 3) & 0x7f;
+ bytes = databitlen >> 3;
+
+ if (state->ctr & 0x7)
+ return BAD_DATABITLEN; /* Only the last call to Update() may contain a fractional byte */
+
+ /* Check if we have some stuff left in the buffer. If so, fill it, and process it */
+ if (buffill) {
+ const u64 n = buffill + bytes > 128 ? 128-buffill : bytes; /* number of bytes to copy */
+ memcpy(state->buffer + buffill, data, n);
+ state->ctr += n << 3;
+ if (buffill + n == 128) /* full buffer now */
+ lane512_compress(state->buffer, state->h, MSB32(state->ctr), LSB32(state->ctr));
+ data += n;
+ bytes -= n;
+ }
+
+ /* Now process as many full blocks as we can directly from the input message */
+ while (bytes >= 128) {
+ state->ctr += 128 << 3;
+ lane512_compress(data, state->h, MSB32(state->ctr), LSB32(state->ctr));
+ data += 128;
+ bytes -= 128;
+ }
+ break;
+ }
+
+ /* And finally, save the last, incomplete message block */
+ if (bytes || (databitlen & 0x7)) {
+ memcpy(state->buffer, data, databitlen & 0x7 ? bytes+1 : bytes); /* also copy partial byte */
+ state->ctr += (bytes << 3) + (databitlen & 0x7);
+ }
+
+ return SUCCESS;
+}
+
+HashReturn laneFinal (hashState *state, BitSequence *hashval)
+{
+
+ switch (state->hashbitlen) {
+ case 224: case 256: default:
+ /* do zero padding and compress last block, if there is some data in the buffer */
+ if (state->ctr & 0x1ff) {
+ const u64 n = (((state->ctr & 0x1ff) - 1) >> 3) + 1; /* number of bytes in buffer that are (partially) filled */
+ if (n < 64)
+ memset(state->buffer + n, 0, 64-n);
+ state->buffer[(state->ctr >> 3)&0x3f] &= ~(0xff >> (state->ctr & 0x7)); /* zero-pad partial byte */
+ lane256_compress(state->buffer, state->h, MSB32(state->ctr), LSB32(state->ctr));
+ }
+
+ /* output transformation */
+ memset(state->buffer, 0, 64);
+ state->buffer[0] = 0x00; /* flag byte 0x00: output transformation without seed */
+ state->buffer[1] = T8(state->ctr >> 56); /* message length in big-endian */
+ state->buffer[2] = T8(state->ctr >> 48);
+ state->buffer[3] = T8(state->ctr >> 40);
+ state->buffer[4] = T8(state->ctr >> 32);
+ state->buffer[5] = T8(state->ctr >> 24);
+ state->buffer[6] = T8(state->ctr >> 16);
+ state->buffer[7] = T8(state->ctr >> 8);
+ state->buffer[8] = T8(state->ctr >> 0);
+ lane256_compress(state->buffer, state->h, 0, 0);
+
+ /* write back result */
+ U32TO8_BIG(hashval, state->h[0]);
+ U32TO8_BIG(hashval+4, state->h[1]);
+ U32TO8_BIG(hashval+8, state->h[2]);
+ U32TO8_BIG(hashval+12, state->h[3]);
+ U32TO8_BIG(hashval+16, state->h[4]);
+ U32TO8_BIG(hashval+20, state->h[5]);
+ U32TO8_BIG(hashval+24, state->h[6]);
+ U32TO8_BIG(hashval+28, state->h[7]);
+
+ break;
+
+ case 384: case 512:
+ /* do zero padding and compress last block, if there is some data in the buffer */
+ if (state->ctr & 0x3ff) {
+ const u64 n = (((state->ctr & 0x3ff) - 1) >> 3) + 1; /* number of bytes in buffer that are (partially) filled */
+ if (n < 128)
+ memset(state->buffer + n, 0, 128-n);
+ state->buffer[(state->ctr >> 3)&0x7f] &= ~(0xff >> (state->ctr & 0x7)); /* zero-pad partial byte */
+ lane512_compress(state->buffer, state->h, MSB32(state->ctr), LSB32(state->ctr));
+ }
+
+ /* output transformation */
+ memset(state->buffer, 0, 128);
+ state->buffer[0] = 0x00; /* flag byte 0x00: output transformation without seed */
+ state->buffer[1] = T8(state->ctr >> 56); /* message length in big-endian */
+ state->buffer[2] = T8(state->ctr >> 48);
+ state->buffer[3] = T8(state->ctr >> 40);
+ state->buffer[4] = T8(state->ctr >> 32);
+ state->buffer[5] = T8(state->ctr >> 24);
+ state->buffer[6] = T8(state->ctr >> 16);
+ state->buffer[7] = T8(state->ctr >> 8);
+ state->buffer[8] = T8(state->ctr >> 0);
+ lane512_compress(state->buffer, state->h, 0, 0);
+
+ /* write back result */
+ U32TO8_BIG(hashval, state->h[0]);
+ U32TO8_BIG(hashval+4, state->h[1]);
+ U32TO8_BIG(hashval+8, state->h[2]);
+ U32TO8_BIG(hashval+12, state->h[3]);
+ U32TO8_BIG(hashval+16, state->h[4]);
+ U32TO8_BIG(hashval+20, state->h[5]);
+ U32TO8_BIG(hashval+24, state->h[6]);
+ U32TO8_BIG(hashval+28, state->h[7]);
+ U32TO8_BIG(hashval+32, state->h[8]);
+ U32TO8_BIG(hashval+36, state->h[9]);
+ U32TO8_BIG(hashval+40, state->h[10]);
+ U32TO8_BIG(hashval+44, state->h[11]);
+ U32TO8_BIG(hashval+48, state->h[12]);
+ U32TO8_BIG(hashval+52, state->h[13]);
+ U32TO8_BIG(hashval+56, state->h[14]);
+ U32TO8_BIG(hashval+60, state->h[15]);
+
+ break;
+ }
+
+ return SUCCESS;
+}
+
+HashReturn laneHash (int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval)
+{
+ hashState state;
+ HashReturn hashReturn;
+
+ if ((hashReturn = laneInit(&state, hashbitlen)) != SUCCESS)
+ return hashReturn;
+ if ((hashReturn = laneUpdate(&state, data, databitlen)) != SUCCESS)
+ return hashReturn;
+ if ((hashReturn = laneFinal(&state, hashval)) != SUCCESS)
+ return hashReturn;
+ return SUCCESS;
+}
\ No newline at end of file
diff --git a/stratum/algos/lane.h b/stratum/algos/lane.h
new file mode 100644
index 000000000..0fd2e6b0e
--- /dev/null
+++ b/stratum/algos/lane.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2008 Sebastiaan Indesteege
+ *
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Optimised ANSI-C implementation of LANE
+ */
+
+#ifndef LANE_H
+#define LANE_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include
+
+typedef unsigned char BitSequence;
+typedef unsigned long long DataLength;
+
+typedef enum { SUCCESS = 0, FAIL = 1, BAD_HASHBITLEN = 2, BAD_DATABITLEN = 3 } HashReturn;
+
+typedef unsigned char u8;
+typedef unsigned int u32;
+typedef unsigned long long u64;
+
+typedef struct {
+ int hashbitlen;
+ u64 ctr;
+ u32 h[16];
+ u8 buffer[128];
+} hashState;
+
+HashReturn laneInit (hashState *state, int hashbitlen);
+HashReturn laneUpdate (hashState *state, const BitSequence *data, DataLength databitlen);
+HashReturn laneFinal (hashState *state, BitSequence *hashval);
+HashReturn laneHash (int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* LANE_H */
\ No newline at end of file
diff --git a/stratum/algos/lbk3.c b/stratum/algos/lbk3.c
new file mode 100644
index 000000000..3d9ccf71b
--- /dev/null
+++ b/stratum/algos/lbk3.c
@@ -0,0 +1,27 @@
+#include