diff --git a/crypto/blake2b/blake2b-compress-avx2.h b/crypto/blake2b/blake2b-compress-avx2.h index bc7e42d25..79afdc068 100644 --- a/crypto/blake2b/blake2b-compress-avx2.h +++ b/crypto/blake2b/blake2b-compress-avx2.h @@ -2,30 +2,30 @@ #ifndef blake2b_compress_avx2_H #define blake2b_compress_avx2_H -#define LOADU128(p) _mm_loadu_si128((__m128i *) (p)) -#define STOREU128(p, r) _mm_storeu_si128((__m128i *) (p), r) +#define LOADU128(p) _mm_loadu_si128((__m128i *)(p)) +#define STOREU128(p, r) _mm_storeu_si128((__m128i *)(p), r) -#define LOAD(p) _mm256_load_si256((__m256i *) (p)) -#define STORE(p, r) _mm256_store_si256((__m256i *) (p), r) +#define LOAD(p) _mm256_load_si256((__m256i *)(p)) +#define STORE(p, r) _mm256_store_si256((__m256i *)(p), r) -#define LOADU(p) _mm256_loadu_si256((__m256i *) (p)) -#define STOREU(p, r) _mm256_storeu_si256((__m256i *) (p), r) +#define LOADU(p) _mm256_loadu_si256((__m256i *)(p)) +#define STOREU(p, r) _mm256_storeu_si256((__m256i *)(p), r) static inline uint64_t LOADU64(const void *p) { - uint64_t v; - memcpy(&v, p, sizeof v); - return v; + uint64_t v; + memcpy(&v, p, sizeof v); + return v; } -#define ROTATE16 \ - _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, \ - 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9) +#define ROTATE16 \ + _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, 3, \ + 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9) -#define ROTATE24 \ - _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, \ - 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10) +#define ROTATE24 \ + _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, \ + 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10) #define ADD(a, b) _mm256_add_epi64(a, b) #define SUB(a, b) _mm256_sub_epi64(a, b) @@ -40,98 +40,104 @@ LOADU64(const void *p) #define ROT63(x) _mm256_or_si256(_mm256_srli_epi64((x), 63), ADD((x), (x))) #define BLAKE2B_G1_V1(a, b, c, d, m) \ - do { \ - a = ADD(a, m); \ - a = ADD(a, b); \ - d = XOR(d, a); \ - d = ROT32(d); \ - c = ADD(c, d); \ - b = XOR(b, c); \ - b = ROT24(b); \ - } while (0) + do \ + { \ + a = ADD(a, m); \ + a = ADD(a, b); \ + d = XOR(d, a); \ + d = ROT32(d); \ + c = ADD(c, d); \ + b = XOR(b, c); \ + b = ROT24(b); \ + } while(0) #define BLAKE2B_G2_V1(a, b, c, d, m) \ - do { \ - a = ADD(a, m); \ - a = ADD(a, b); \ - d = XOR(d, a); \ - d = ROT16(d); \ - c = ADD(c, d); \ - b = XOR(b, c); \ - b = ROT63(b); \ - } while (0) - -#define BLAKE2B_DIAG_V1(a, b, c, d) \ - do { \ - d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(2, 1, 0, 3)); \ - c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(1, 0, 3, 2)); \ - b = _mm256_permute4x64_epi64(b, _MM_SHUFFLE(0, 3, 2, 1)); \ - } while (0) - -#define BLAKE2B_UNDIAG_V1(a, b, c, d) \ - do { \ - d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(0, 3, 2, 1)); \ - c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(1, 0, 3, 2)); \ - b = _mm256_permute4x64_epi64(b, _MM_SHUFFLE(2, 1, 0, 3)); \ - } while (0) + do \ + { \ + a = ADD(a, m); \ + a = ADD(a, b); \ + d = XOR(d, a); \ + d = ROT16(d); \ + c = ADD(c, d); \ + b = XOR(b, c); \ + b = ROT63(b); \ + } while(0) + +#define BLAKE2B_DIAG_V1(a, b, c, d) \ + do \ + { \ + d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(2, 1, 0, 3)); \ + c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(1, 0, 3, 2)); \ + b = _mm256_permute4x64_epi64(b, _MM_SHUFFLE(0, 3, 2, 1)); \ + } while(0) + +#define BLAKE2B_UNDIAG_V1(a, b, c, d) \ + do \ + { \ + d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(0, 3, 2, 1)); \ + c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(1, 0, 3, 2)); \ + b = _mm256_permute4x64_epi64(b, _MM_SHUFFLE(2, 1, 0, 3)); \ + } while(0) #include "blake2b-load-avx2.h" #define BLAKE2B_ROUND_V1(a, b, c, d, r, m) \ - do { \ - __m256i b0; \ - BLAKE2B_LOAD_MSG_##r##_1(b0); \ - BLAKE2B_G1_V1(a, b, c, d, b0); \ - BLAKE2B_LOAD_MSG_##r##_2(b0); \ - BLAKE2B_G2_V1(a, b, c, d, b0); \ - BLAKE2B_DIAG_V1(a, b, c, d); \ - BLAKE2B_LOAD_MSG_##r##_3(b0); \ - BLAKE2B_G1_V1(a, b, c, d, b0); \ - BLAKE2B_LOAD_MSG_##r##_4(b0); \ - BLAKE2B_G2_V1(a, b, c, d, b0); \ - BLAKE2B_UNDIAG_V1(a, b, c, d); \ - } while (0) - -#define BLAKE2B_ROUNDS_V1(a, b, c, d, m) \ - do { \ - BLAKE2B_ROUND_V1(a, b, c, d, 0, (m)); \ - BLAKE2B_ROUND_V1(a, b, c, d, 1, (m)); \ - BLAKE2B_ROUND_V1(a, b, c, d, 2, (m)); \ - BLAKE2B_ROUND_V1(a, b, c, d, 3, (m)); \ - BLAKE2B_ROUND_V1(a, b, c, d, 4, (m)); \ - BLAKE2B_ROUND_V1(a, b, c, d, 5, (m)); \ - BLAKE2B_ROUND_V1(a, b, c, d, 6, (m)); \ - BLAKE2B_ROUND_V1(a, b, c, d, 7, (m)); \ - BLAKE2B_ROUND_V1(a, b, c, d, 8, (m)); \ - BLAKE2B_ROUND_V1(a, b, c, d, 9, (m)); \ - BLAKE2B_ROUND_V1(a, b, c, d, 10, (m)); \ - BLAKE2B_ROUND_V1(a, b, c, d, 11, (m)); \ - } while (0) - -#define DECLARE_MESSAGE_WORDS(m) \ - const __m256i m0 = _mm256_broadcastsi128_si256(LOADU128((m) + 0)); \ - const __m256i m1 = _mm256_broadcastsi128_si256(LOADU128((m) + 16)); \ - const __m256i m2 = _mm256_broadcastsi128_si256(LOADU128((m) + 32)); \ - const __m256i m3 = _mm256_broadcastsi128_si256(LOADU128((m) + 48)); \ - const __m256i m4 = _mm256_broadcastsi128_si256(LOADU128((m) + 64)); \ - const __m256i m5 = _mm256_broadcastsi128_si256(LOADU128((m) + 80)); \ - const __m256i m6 = _mm256_broadcastsi128_si256(LOADU128((m) + 96)); \ - const __m256i m7 = _mm256_broadcastsi128_si256(LOADU128((m) + 112)); \ - __m256i t0, t1; - -#define BLAKE2B_COMPRESS_V1(a, b, m, t0, t1, f0, f1) \ - do { \ - DECLARE_MESSAGE_WORDS(m) \ - const __m256i iv0 = a; \ - const __m256i iv1 = b; \ - __m256i c = LOAD(&blake2b_IV[0]); \ - __m256i d = \ - XOR(LOAD(&blake2b_IV[4]), _mm256_set_epi64x(f1, f0, t1, t0)); \ - BLAKE2B_ROUNDS_V1(a, b, c, d, m); \ - a = XOR(a, c); \ - b = XOR(b, d); \ - a = XOR(a, iv0); \ - b = XOR(b, iv1); \ - } while (0) + do \ + { \ + __m256i b0; \ + BLAKE2B_LOAD_MSG_##r##_1(b0); \ + BLAKE2B_G1_V1(a, b, c, d, b0); \ + BLAKE2B_LOAD_MSG_##r##_2(b0); \ + BLAKE2B_G2_V1(a, b, c, d, b0); \ + BLAKE2B_DIAG_V1(a, b, c, d); \ + BLAKE2B_LOAD_MSG_##r##_3(b0); \ + BLAKE2B_G1_V1(a, b, c, d, b0); \ + BLAKE2B_LOAD_MSG_##r##_4(b0); \ + BLAKE2B_G2_V1(a, b, c, d, b0); \ + BLAKE2B_UNDIAG_V1(a, b, c, d); \ + } while(0) + +#define BLAKE2B_ROUNDS_V1(a, b, c, d, m) \ + do \ + { \ + BLAKE2B_ROUND_V1(a, b, c, d, 0, (m)); \ + BLAKE2B_ROUND_V1(a, b, c, d, 1, (m)); \ + BLAKE2B_ROUND_V1(a, b, c, d, 2, (m)); \ + BLAKE2B_ROUND_V1(a, b, c, d, 3, (m)); \ + BLAKE2B_ROUND_V1(a, b, c, d, 4, (m)); \ + BLAKE2B_ROUND_V1(a, b, c, d, 5, (m)); \ + BLAKE2B_ROUND_V1(a, b, c, d, 6, (m)); \ + BLAKE2B_ROUND_V1(a, b, c, d, 7, (m)); \ + BLAKE2B_ROUND_V1(a, b, c, d, 8, (m)); \ + BLAKE2B_ROUND_V1(a, b, c, d, 9, (m)); \ + BLAKE2B_ROUND_V1(a, b, c, d, 10, (m)); \ + BLAKE2B_ROUND_V1(a, b, c, d, 11, (m)); \ + } while(0) + +#define DECLARE_MESSAGE_WORDS(m) \ + const __m256i m0 = _mm256_broadcastsi128_si256(LOADU128((m) + 0)); \ + const __m256i m1 = _mm256_broadcastsi128_si256(LOADU128((m) + 16)); \ + const __m256i m2 = _mm256_broadcastsi128_si256(LOADU128((m) + 32)); \ + const __m256i m3 = _mm256_broadcastsi128_si256(LOADU128((m) + 48)); \ + const __m256i m4 = _mm256_broadcastsi128_si256(LOADU128((m) + 64)); \ + const __m256i m5 = _mm256_broadcastsi128_si256(LOADU128((m) + 80)); \ + const __m256i m6 = _mm256_broadcastsi128_si256(LOADU128((m) + 96)); \ + const __m256i m7 = _mm256_broadcastsi128_si256(LOADU128((m) + 112)); \ + __m256i t0, t1; + +#define BLAKE2B_COMPRESS_V1(a, b, m, t0, t1, f0, f1) \ + do \ + { \ + DECLARE_MESSAGE_WORDS(m) \ + const __m256i iv0 = a; \ + const __m256i iv1 = b; \ + __m256i c = LOAD(&blake2b_IV[0]); \ + __m256i d = XOR(LOAD(&blake2b_IV[4]), _mm256_set_epi64x(f1, f0, t1, t0)); \ + BLAKE2B_ROUNDS_V1(a, b, c, d, m); \ + a = XOR(a, c); \ + b = XOR(b, d); \ + a = XOR(a, iv0); \ + b = XOR(b, iv1); \ + } while(0) #endif diff --git a/crypto/blake2b/blake2b-compress-sse41.h b/crypto/blake2b/blake2b-compress-sse41.h index ac78e5bb1..de56d48f6 100644 --- a/crypto/blake2b/blake2b-compress-sse41.h +++ b/crypto/blake2b/blake2b-compress-sse41.h @@ -2,102 +2,99 @@ #ifndef blake2b_compress_sse41_H #define blake2b_compress_sse41_H -#define LOADU(p) _mm_loadu_si128((const __m128i *) (const void *) (p)) -#define STOREU(p, r) _mm_storeu_si128((__m128i *) (void *) (p), r) +#define LOADU(p) _mm_loadu_si128((const __m128i *)(const void *)(p)) +#define STOREU(p, r) _mm_storeu_si128((__m128i *)(void *)(p), r) -#define _mm_roti_epi64(x, c) \ - (-(c) == 32) \ - ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \ - : (-(c) == 24) \ - ? _mm_shuffle_epi8((x), r24) \ - : (-(c) == 16) \ - ? _mm_shuffle_epi8((x), r16) \ - : (-(c) == 63) \ - ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \ - _mm_add_epi64((x), (x))) \ - : _mm_xor_si128(_mm_srli_epi64((x), -(c)), \ - _mm_slli_epi64((x), 64 - (-(c)))) +#define _mm_roti_epi64(x, c) \ + (-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \ + : (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \ + : (-(c) == 16) \ + ? _mm_shuffle_epi8((x), r16) \ + : (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \ + _mm_add_epi64((x), (x))) \ + : _mm_xor_si128(_mm_srli_epi64((x), -(c)), \ + _mm_slli_epi64((x), 64 - (-(c)))) #define G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1) \ - row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \ - row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \ + row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \ + row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \ \ - row4l = _mm_xor_si128(row4l, row1l); \ - row4h = _mm_xor_si128(row4h, row1h); \ + row4l = _mm_xor_si128(row4l, row1l); \ + row4h = _mm_xor_si128(row4h, row1h); \ \ - row4l = _mm_roti_epi64(row4l, -32); \ - row4h = _mm_roti_epi64(row4h, -32); \ + row4l = _mm_roti_epi64(row4l, -32); \ + row4h = _mm_roti_epi64(row4h, -32); \ \ - row3l = _mm_add_epi64(row3l, row4l); \ - row3h = _mm_add_epi64(row3h, row4h); \ + row3l = _mm_add_epi64(row3l, row4l); \ + row3h = _mm_add_epi64(row3h, row4h); \ \ - row2l = _mm_xor_si128(row2l, row3l); \ - row2h = _mm_xor_si128(row2h, row3h); \ + row2l = _mm_xor_si128(row2l, row3l); \ + row2h = _mm_xor_si128(row2h, row3h); \ \ - row2l = _mm_roti_epi64(row2l, -24); \ - row2h = _mm_roti_epi64(row2h, -24); + row2l = _mm_roti_epi64(row2l, -24); \ + row2h = _mm_roti_epi64(row2h, -24); #define G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1) \ - row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \ - row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \ + row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \ + row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \ \ - row4l = _mm_xor_si128(row4l, row1l); \ - row4h = _mm_xor_si128(row4h, row1h); \ + row4l = _mm_xor_si128(row4l, row1l); \ + row4h = _mm_xor_si128(row4h, row1h); \ \ - row4l = _mm_roti_epi64(row4l, -16); \ - row4h = _mm_roti_epi64(row4h, -16); \ + row4l = _mm_roti_epi64(row4l, -16); \ + row4h = _mm_roti_epi64(row4h, -16); \ \ - row3l = _mm_add_epi64(row3l, row4l); \ - row3h = _mm_add_epi64(row3h, row4h); \ + row3l = _mm_add_epi64(row3l, row4l); \ + row3h = _mm_add_epi64(row3h, row4h); \ \ - row2l = _mm_xor_si128(row2l, row3l); \ - row2h = _mm_xor_si128(row2h, row3h); \ + row2l = _mm_xor_si128(row2l, row3l); \ + row2h = _mm_xor_si128(row2h, row3h); \ \ - row2l = _mm_roti_epi64(row2l, -63); \ - row2h = _mm_roti_epi64(row2h, -63); + row2l = _mm_roti_epi64(row2l, -63); \ + row2h = _mm_roti_epi64(row2h, -63); #define DIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h) \ - t0 = _mm_alignr_epi8(row2h, row2l, 8); \ - t1 = _mm_alignr_epi8(row2l, row2h, 8); \ - row2l = t0; \ - row2h = t1; \ + t0 = _mm_alignr_epi8(row2h, row2l, 8); \ + t1 = _mm_alignr_epi8(row2l, row2h, 8); \ + row2l = t0; \ + row2h = t1; \ \ - t0 = row3l; \ - row3l = row3h; \ - row3h = t0; \ + t0 = row3l; \ + row3l = row3h; \ + row3h = t0; \ \ - t0 = _mm_alignr_epi8(row4h, row4l, 8); \ - t1 = _mm_alignr_epi8(row4l, row4h, 8); \ - row4l = t1; \ - row4h = t0; + t0 = _mm_alignr_epi8(row4h, row4l, 8); \ + t1 = _mm_alignr_epi8(row4l, row4h, 8); \ + row4l = t1; \ + row4h = t0; #define UNDIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h) \ - t0 = _mm_alignr_epi8(row2l, row2h, 8); \ - t1 = _mm_alignr_epi8(row2h, row2l, 8); \ - row2l = t0; \ - row2h = t1; \ + t0 = _mm_alignr_epi8(row2l, row2h, 8); \ + t1 = _mm_alignr_epi8(row2h, row2l, 8); \ + row2l = t0; \ + row2h = t1; \ \ - t0 = row3l; \ - row3l = row3h; \ - row3h = t0; \ + t0 = row3l; \ + row3l = row3h; \ + row3h = t0; \ \ - t0 = _mm_alignr_epi8(row4l, row4h, 8); \ - t1 = _mm_alignr_epi8(row4h, row4l, 8); \ - row4l = t1; \ - row4h = t0; + t0 = _mm_alignr_epi8(row4l, row4h, 8); \ + t1 = _mm_alignr_epi8(row4h, row4l, 8); \ + row4l = t1; \ + row4h = t0; #include "blake2b-load-sse41.h" -#define ROUND(r) \ - LOAD_MSG_##r##_1(b0, b1); \ - G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \ - LOAD_MSG_##r##_2(b0, b1); \ - G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \ - DIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); \ - LOAD_MSG_##r##_3(b0, b1); \ - G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \ - LOAD_MSG_##r##_4(b0, b1); \ - G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \ - UNDIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); +#define ROUND(r) \ + LOAD_MSG_##r##_1(b0, b1); \ + G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \ + LOAD_MSG_##r##_2(b0, b1); \ + G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \ + DIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); \ + LOAD_MSG_##r##_3(b0, b1); \ + G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \ + LOAD_MSG_##r##_4(b0, b1); \ + G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \ + UNDIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); #endif diff --git a/crypto/blake2b/blake2b-compress-ssse3.h b/crypto/blake2b/blake2b-compress-ssse3.h index 9a7164fe2..ec57a02b1 100644 --- a/crypto/blake2b/blake2b-compress-ssse3.h +++ b/crypto/blake2b/blake2b-compress-ssse3.h @@ -2,102 +2,99 @@ #ifndef blake2b_compress_ssse3_H #define blake2b_compress_ssse3_H -#define LOADU(p) _mm_loadu_si128((const __m128i *) (const void *) (p)) -#define STOREU(p, r) _mm_storeu_si128((__m128i *) (void *) (p), r) +#define LOADU(p) _mm_loadu_si128((const __m128i *)(const void *)(p)) +#define STOREU(p, r) _mm_storeu_si128((__m128i *)(void *)(p), r) -#define _mm_roti_epi64(x, c) \ - (-(c) == 32) \ - ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \ - : (-(c) == 24) \ - ? _mm_shuffle_epi8((x), r24) \ - : (-(c) == 16) \ - ? _mm_shuffle_epi8((x), r16) \ - : (-(c) == 63) \ - ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \ - _mm_add_epi64((x), (x))) \ - : _mm_xor_si128(_mm_srli_epi64((x), -(c)), \ - _mm_slli_epi64((x), 64 - (-(c)))) +#define _mm_roti_epi64(x, c) \ + (-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \ + : (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \ + : (-(c) == 16) \ + ? _mm_shuffle_epi8((x), r16) \ + : (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \ + _mm_add_epi64((x), (x))) \ + : _mm_xor_si128(_mm_srli_epi64((x), -(c)), \ + _mm_slli_epi64((x), 64 - (-(c)))) #define G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1) \ - row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \ - row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \ + row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \ + row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \ \ - row4l = _mm_xor_si128(row4l, row1l); \ - row4h = _mm_xor_si128(row4h, row1h); \ + row4l = _mm_xor_si128(row4l, row1l); \ + row4h = _mm_xor_si128(row4h, row1h); \ \ - row4l = _mm_roti_epi64(row4l, -32); \ - row4h = _mm_roti_epi64(row4h, -32); \ + row4l = _mm_roti_epi64(row4l, -32); \ + row4h = _mm_roti_epi64(row4h, -32); \ \ - row3l = _mm_add_epi64(row3l, row4l); \ - row3h = _mm_add_epi64(row3h, row4h); \ + row3l = _mm_add_epi64(row3l, row4l); \ + row3h = _mm_add_epi64(row3h, row4h); \ \ - row2l = _mm_xor_si128(row2l, row3l); \ - row2h = _mm_xor_si128(row2h, row3h); \ + row2l = _mm_xor_si128(row2l, row3l); \ + row2h = _mm_xor_si128(row2h, row3h); \ \ - row2l = _mm_roti_epi64(row2l, -24); \ - row2h = _mm_roti_epi64(row2h, -24); + row2l = _mm_roti_epi64(row2l, -24); \ + row2h = _mm_roti_epi64(row2h, -24); #define G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1) \ - row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \ - row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \ + row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \ + row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \ \ - row4l = _mm_xor_si128(row4l, row1l); \ - row4h = _mm_xor_si128(row4h, row1h); \ + row4l = _mm_xor_si128(row4l, row1l); \ + row4h = _mm_xor_si128(row4h, row1h); \ \ - row4l = _mm_roti_epi64(row4l, -16); \ - row4h = _mm_roti_epi64(row4h, -16); \ + row4l = _mm_roti_epi64(row4l, -16); \ + row4h = _mm_roti_epi64(row4h, -16); \ \ - row3l = _mm_add_epi64(row3l, row4l); \ - row3h = _mm_add_epi64(row3h, row4h); \ + row3l = _mm_add_epi64(row3l, row4l); \ + row3h = _mm_add_epi64(row3h, row4h); \ \ - row2l = _mm_xor_si128(row2l, row3l); \ - row2h = _mm_xor_si128(row2h, row3h); \ + row2l = _mm_xor_si128(row2l, row3l); \ + row2h = _mm_xor_si128(row2h, row3h); \ \ - row2l = _mm_roti_epi64(row2l, -63); \ - row2h = _mm_roti_epi64(row2h, -63); + row2l = _mm_roti_epi64(row2l, -63); \ + row2h = _mm_roti_epi64(row2h, -63); #define DIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h) \ - t0 = _mm_alignr_epi8(row2h, row2l, 8); \ - t1 = _mm_alignr_epi8(row2l, row2h, 8); \ - row2l = t0; \ - row2h = t1; \ + t0 = _mm_alignr_epi8(row2h, row2l, 8); \ + t1 = _mm_alignr_epi8(row2l, row2h, 8); \ + row2l = t0; \ + row2h = t1; \ \ - t0 = row3l; \ - row3l = row3h; \ - row3h = t0; \ + t0 = row3l; \ + row3l = row3h; \ + row3h = t0; \ \ - t0 = _mm_alignr_epi8(row4h, row4l, 8); \ - t1 = _mm_alignr_epi8(row4l, row4h, 8); \ - row4l = t1; \ - row4h = t0; + t0 = _mm_alignr_epi8(row4h, row4l, 8); \ + t1 = _mm_alignr_epi8(row4l, row4h, 8); \ + row4l = t1; \ + row4h = t0; #define UNDIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h) \ - t0 = _mm_alignr_epi8(row2l, row2h, 8); \ - t1 = _mm_alignr_epi8(row2h, row2l, 8); \ - row2l = t0; \ - row2h = t1; \ + t0 = _mm_alignr_epi8(row2l, row2h, 8); \ + t1 = _mm_alignr_epi8(row2h, row2l, 8); \ + row2l = t0; \ + row2h = t1; \ \ - t0 = row3l; \ - row3l = row3h; \ - row3h = t0; \ + t0 = row3l; \ + row3l = row3h; \ + row3h = t0; \ \ - t0 = _mm_alignr_epi8(row4l, row4h, 8); \ - t1 = _mm_alignr_epi8(row4h, row4l, 8); \ - row4l = t1; \ - row4h = t0; + t0 = _mm_alignr_epi8(row4l, row4h, 8); \ + t1 = _mm_alignr_epi8(row4h, row4l, 8); \ + row4l = t1; \ + row4h = t0; #include "blake2b-load-sse2.h" -#define ROUND(r) \ - LOAD_MSG_##r##_1(b0, b1); \ - G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \ - LOAD_MSG_##r##_2(b0, b1); \ - G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \ - DIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); \ - LOAD_MSG_##r##_3(b0, b1); \ - G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \ - LOAD_MSG_##r##_4(b0, b1); \ - G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \ - UNDIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); +#define ROUND(r) \ + LOAD_MSG_##r##_1(b0, b1); \ + G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \ + LOAD_MSG_##r##_2(b0, b1); \ + G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \ + DIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); \ + LOAD_MSG_##r##_3(b0, b1); \ + G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \ + LOAD_MSG_##r##_4(b0, b1); \ + G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \ + UNDIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); #endif diff --git a/crypto/blake2b/blake2b-load-avx2.h b/crypto/blake2b/blake2b-load-avx2.h index 8c15f177c..9d06890d4 100644 --- a/crypto/blake2b/blake2b-load-avx2.h +++ b/crypto/blake2b/blake2b-load-avx2.h @@ -1,340 +1,388 @@ #ifndef blake2b_load_avx2_H #define blake2b_load_avx2_H -#define BLAKE2B_LOAD_MSG_0_1(b0) \ - do { \ - t0 = _mm256_unpacklo_epi64(m0, m1); \ - t1 = _mm256_unpacklo_epi64(m2, m3); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_0_2(b0) \ - do { \ - t0 = _mm256_unpackhi_epi64(m0, m1); \ - t1 = _mm256_unpackhi_epi64(m2, m3); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_0_3(b0) \ - do { \ - t0 = _mm256_unpacklo_epi64(m4, m5); \ - t1 = _mm256_unpacklo_epi64(m6, m7); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_0_4(b0) \ - do { \ - t0 = _mm256_unpackhi_epi64(m4, m5); \ - t1 = _mm256_unpackhi_epi64(m6, m7); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_1_1(b0) \ - do { \ - t0 = _mm256_unpacklo_epi64(m7, m2); \ - t1 = _mm256_unpackhi_epi64(m4, m6); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_1_2(b0) \ - do { \ - t0 = _mm256_unpacklo_epi64(m5, m4); \ - t1 = _mm256_alignr_epi8(m3, m7, 8); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_1_3(b0) \ - do { \ - t0 = _mm256_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \ - t1 = _mm256_unpackhi_epi64(m5, m2); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_1_4(b0) \ - do { \ - t0 = _mm256_unpacklo_epi64(m6, m1); \ - t1 = _mm256_unpackhi_epi64(m3, m1); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_2_1(b0) \ - do { \ - t0 = _mm256_alignr_epi8(m6, m5, 8); \ - t1 = _mm256_unpackhi_epi64(m2, m7); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_2_2(b0) \ - do { \ - t0 = _mm256_unpacklo_epi64(m4, m0); \ - t1 = _mm256_blend_epi32(m6, m1, 0x33); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_2_3(b0) \ - do { \ - t0 = _mm256_blend_epi32(m1, m5, 0x33); \ - t1 = _mm256_unpackhi_epi64(m3, m4); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_2_4(b0) \ - do { \ - t0 = _mm256_unpacklo_epi64(m7, m3); \ - t1 = _mm256_alignr_epi8(m2, m0, 8); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_3_1(b0) \ - do { \ - t0 = _mm256_unpackhi_epi64(m3, m1); \ - t1 = _mm256_unpackhi_epi64(m6, m5); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_3_2(b0) \ - do { \ - t0 = _mm256_unpackhi_epi64(m4, m0); \ - t1 = _mm256_unpacklo_epi64(m6, m7); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_3_3(b0) \ - do { \ - t0 = _mm256_blend_epi32(m2, m1, 0x33); \ - t1 = _mm256_blend_epi32(m7, m2, 0x33); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_3_4(b0) \ - do { \ - t0 = _mm256_unpacklo_epi64(m3, m5); \ - t1 = _mm256_unpacklo_epi64(m0, m4); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_4_1(b0) \ - do { \ - t0 = _mm256_unpackhi_epi64(m4, m2); \ - t1 = _mm256_unpacklo_epi64(m1, m5); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_4_2(b0) \ - do { \ - t0 = _mm256_blend_epi32(m3, m0, 0x33); \ - t1 = _mm256_blend_epi32(m7, m2, 0x33); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_4_3(b0) \ - do { \ - t0 = _mm256_blend_epi32(m5, m7, 0x33); \ - t1 = _mm256_blend_epi32(m1, m3, 0x33); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_4_4(b0) \ - do { \ - t0 = _mm256_alignr_epi8(m6, m0, 8); \ - t1 = _mm256_blend_epi32(m6, m4, 0x33); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_5_1(b0) \ - do { \ - t0 = _mm256_unpacklo_epi64(m1, m3); \ - t1 = _mm256_unpacklo_epi64(m0, m4); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_5_2(b0) \ - do { \ - t0 = _mm256_unpacklo_epi64(m6, m5); \ - t1 = _mm256_unpackhi_epi64(m5, m1); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_5_3(b0) \ - do { \ - t0 = _mm256_blend_epi32(m3, m2, 0x33); \ - t1 = _mm256_unpackhi_epi64(m7, m0); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_5_4(b0) \ - do { \ - t0 = _mm256_unpackhi_epi64(m6, m2); \ - t1 = _mm256_blend_epi32(m4, m7, 0x33); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_6_1(b0) \ - do { \ - t0 = _mm256_blend_epi32(m0, m6, 0x33); \ - t1 = _mm256_unpacklo_epi64(m7, m2); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_6_2(b0) \ - do { \ - t0 = _mm256_unpackhi_epi64(m2, m7); \ - t1 = _mm256_alignr_epi8(m5, m6, 8); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_6_3(b0) \ - do { \ - t0 = _mm256_unpacklo_epi64(m0, m3); \ - t1 = _mm256_shuffle_epi32(m4, _MM_SHUFFLE(1, 0, 3, 2)); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_6_4(b0) \ - do { \ - t0 = _mm256_unpackhi_epi64(m3, m1); \ - t1 = _mm256_blend_epi32(m5, m1, 0x33); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_7_1(b0) \ - do { \ - t0 = _mm256_unpackhi_epi64(m6, m3); \ - t1 = _mm256_blend_epi32(m1, m6, 0x33); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_7_2(b0) \ - do { \ - t0 = _mm256_alignr_epi8(m7, m5, 8); \ - t1 = _mm256_unpackhi_epi64(m0, m4); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_7_3(b0) \ - do { \ - t0 = _mm256_unpackhi_epi64(m2, m7); \ - t1 = _mm256_unpacklo_epi64(m4, m1); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_7_4(b0) \ - do { \ - t0 = _mm256_unpacklo_epi64(m0, m2); \ - t1 = _mm256_unpacklo_epi64(m3, m5); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_8_1(b0) \ - do { \ - t0 = _mm256_unpacklo_epi64(m3, m7); \ - t1 = _mm256_alignr_epi8(m0, m5, 8); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_8_2(b0) \ - do { \ - t0 = _mm256_unpackhi_epi64(m7, m4); \ - t1 = _mm256_alignr_epi8(m4, m1, 8); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_8_3(b0) \ - do { \ - t0 = m6; \ - t1 = _mm256_alignr_epi8(m5, m0, 8); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_8_4(b0) \ - do { \ - t0 = _mm256_blend_epi32(m3, m1, 0x33); \ - t1 = m2; \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_9_1(b0) \ - do { \ - t0 = _mm256_unpacklo_epi64(m5, m4); \ - t1 = _mm256_unpackhi_epi64(m3, m0); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_9_2(b0) \ - do { \ - t0 = _mm256_unpacklo_epi64(m1, m2); \ - t1 = _mm256_blend_epi32(m2, m3, 0x33); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_9_3(b0) \ - do { \ - t0 = _mm256_unpackhi_epi64(m7, m4); \ - t1 = _mm256_unpackhi_epi64(m1, m6); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_9_4(b0) \ - do { \ - t0 = _mm256_alignr_epi8(m7, m5, 8); \ - t1 = _mm256_unpacklo_epi64(m6, m0); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_10_1(b0) \ - do { \ - t0 = _mm256_unpacklo_epi64(m0, m1); \ - t1 = _mm256_unpacklo_epi64(m2, m3); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_10_2(b0) \ - do { \ - t0 = _mm256_unpackhi_epi64(m0, m1); \ - t1 = _mm256_unpackhi_epi64(m2, m3); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_10_3(b0) \ - do { \ - t0 = _mm256_unpacklo_epi64(m4, m5); \ - t1 = _mm256_unpacklo_epi64(m6, m7); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_10_4(b0) \ - do { \ - t0 = _mm256_unpackhi_epi64(m4, m5); \ - t1 = _mm256_unpackhi_epi64(m6, m7); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_11_1(b0) \ - do { \ - t0 = _mm256_unpacklo_epi64(m7, m2); \ - t1 = _mm256_unpackhi_epi64(m4, m6); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_11_2(b0) \ - do { \ - t0 = _mm256_unpacklo_epi64(m5, m4); \ - t1 = _mm256_alignr_epi8(m3, m7, 8); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_11_3(b0) \ - do { \ - t0 = _mm256_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \ - t1 = _mm256_unpackhi_epi64(m5, m2); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) - -#define BLAKE2B_LOAD_MSG_11_4(b0) \ - do { \ - t0 = _mm256_unpacklo_epi64(m6, m1); \ - t1 = _mm256_unpackhi_epi64(m3, m1); \ - b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ - } while (0) +#define BLAKE2B_LOAD_MSG_0_1(b0) \ + do \ + { \ + t0 = _mm256_unpacklo_epi64(m0, m1); \ + t1 = _mm256_unpacklo_epi64(m2, m3); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_0_2(b0) \ + do \ + { \ + t0 = _mm256_unpackhi_epi64(m0, m1); \ + t1 = _mm256_unpackhi_epi64(m2, m3); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_0_3(b0) \ + do \ + { \ + t0 = _mm256_unpacklo_epi64(m4, m5); \ + t1 = _mm256_unpacklo_epi64(m6, m7); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_0_4(b0) \ + do \ + { \ + t0 = _mm256_unpackhi_epi64(m4, m5); \ + t1 = _mm256_unpackhi_epi64(m6, m7); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_1_1(b0) \ + do \ + { \ + t0 = _mm256_unpacklo_epi64(m7, m2); \ + t1 = _mm256_unpackhi_epi64(m4, m6); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_1_2(b0) \ + do \ + { \ + t0 = _mm256_unpacklo_epi64(m5, m4); \ + t1 = _mm256_alignr_epi8(m3, m7, 8); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_1_3(b0) \ + do \ + { \ + t0 = _mm256_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \ + t1 = _mm256_unpackhi_epi64(m5, m2); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_1_4(b0) \ + do \ + { \ + t0 = _mm256_unpacklo_epi64(m6, m1); \ + t1 = _mm256_unpackhi_epi64(m3, m1); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_2_1(b0) \ + do \ + { \ + t0 = _mm256_alignr_epi8(m6, m5, 8); \ + t1 = _mm256_unpackhi_epi64(m2, m7); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_2_2(b0) \ + do \ + { \ + t0 = _mm256_unpacklo_epi64(m4, m0); \ + t1 = _mm256_blend_epi32(m6, m1, 0x33); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_2_3(b0) \ + do \ + { \ + t0 = _mm256_blend_epi32(m1, m5, 0x33); \ + t1 = _mm256_unpackhi_epi64(m3, m4); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_2_4(b0) \ + do \ + { \ + t0 = _mm256_unpacklo_epi64(m7, m3); \ + t1 = _mm256_alignr_epi8(m2, m0, 8); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_3_1(b0) \ + do \ + { \ + t0 = _mm256_unpackhi_epi64(m3, m1); \ + t1 = _mm256_unpackhi_epi64(m6, m5); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_3_2(b0) \ + do \ + { \ + t0 = _mm256_unpackhi_epi64(m4, m0); \ + t1 = _mm256_unpacklo_epi64(m6, m7); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_3_3(b0) \ + do \ + { \ + t0 = _mm256_blend_epi32(m2, m1, 0x33); \ + t1 = _mm256_blend_epi32(m7, m2, 0x33); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_3_4(b0) \ + do \ + { \ + t0 = _mm256_unpacklo_epi64(m3, m5); \ + t1 = _mm256_unpacklo_epi64(m0, m4); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_4_1(b0) \ + do \ + { \ + t0 = _mm256_unpackhi_epi64(m4, m2); \ + t1 = _mm256_unpacklo_epi64(m1, m5); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_4_2(b0) \ + do \ + { \ + t0 = _mm256_blend_epi32(m3, m0, 0x33); \ + t1 = _mm256_blend_epi32(m7, m2, 0x33); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_4_3(b0) \ + do \ + { \ + t0 = _mm256_blend_epi32(m5, m7, 0x33); \ + t1 = _mm256_blend_epi32(m1, m3, 0x33); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_4_4(b0) \ + do \ + { \ + t0 = _mm256_alignr_epi8(m6, m0, 8); \ + t1 = _mm256_blend_epi32(m6, m4, 0x33); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_5_1(b0) \ + do \ + { \ + t0 = _mm256_unpacklo_epi64(m1, m3); \ + t1 = _mm256_unpacklo_epi64(m0, m4); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_5_2(b0) \ + do \ + { \ + t0 = _mm256_unpacklo_epi64(m6, m5); \ + t1 = _mm256_unpackhi_epi64(m5, m1); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_5_3(b0) \ + do \ + { \ + t0 = _mm256_blend_epi32(m3, m2, 0x33); \ + t1 = _mm256_unpackhi_epi64(m7, m0); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_5_4(b0) \ + do \ + { \ + t0 = _mm256_unpackhi_epi64(m6, m2); \ + t1 = _mm256_blend_epi32(m4, m7, 0x33); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_6_1(b0) \ + do \ + { \ + t0 = _mm256_blend_epi32(m0, m6, 0x33); \ + t1 = _mm256_unpacklo_epi64(m7, m2); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_6_2(b0) \ + do \ + { \ + t0 = _mm256_unpackhi_epi64(m2, m7); \ + t1 = _mm256_alignr_epi8(m5, m6, 8); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_6_3(b0) \ + do \ + { \ + t0 = _mm256_unpacklo_epi64(m0, m3); \ + t1 = _mm256_shuffle_epi32(m4, _MM_SHUFFLE(1, 0, 3, 2)); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_6_4(b0) \ + do \ + { \ + t0 = _mm256_unpackhi_epi64(m3, m1); \ + t1 = _mm256_blend_epi32(m5, m1, 0x33); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_7_1(b0) \ + do \ + { \ + t0 = _mm256_unpackhi_epi64(m6, m3); \ + t1 = _mm256_blend_epi32(m1, m6, 0x33); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_7_2(b0) \ + do \ + { \ + t0 = _mm256_alignr_epi8(m7, m5, 8); \ + t1 = _mm256_unpackhi_epi64(m0, m4); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_7_3(b0) \ + do \ + { \ + t0 = _mm256_unpackhi_epi64(m2, m7); \ + t1 = _mm256_unpacklo_epi64(m4, m1); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_7_4(b0) \ + do \ + { \ + t0 = _mm256_unpacklo_epi64(m0, m2); \ + t1 = _mm256_unpacklo_epi64(m3, m5); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_8_1(b0) \ + do \ + { \ + t0 = _mm256_unpacklo_epi64(m3, m7); \ + t1 = _mm256_alignr_epi8(m0, m5, 8); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_8_2(b0) \ + do \ + { \ + t0 = _mm256_unpackhi_epi64(m7, m4); \ + t1 = _mm256_alignr_epi8(m4, m1, 8); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_8_3(b0) \ + do \ + { \ + t0 = m6; \ + t1 = _mm256_alignr_epi8(m5, m0, 8); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_8_4(b0) \ + do \ + { \ + t0 = _mm256_blend_epi32(m3, m1, 0x33); \ + t1 = m2; \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_9_1(b0) \ + do \ + { \ + t0 = _mm256_unpacklo_epi64(m5, m4); \ + t1 = _mm256_unpackhi_epi64(m3, m0); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_9_2(b0) \ + do \ + { \ + t0 = _mm256_unpacklo_epi64(m1, m2); \ + t1 = _mm256_blend_epi32(m2, m3, 0x33); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_9_3(b0) \ + do \ + { \ + t0 = _mm256_unpackhi_epi64(m7, m4); \ + t1 = _mm256_unpackhi_epi64(m1, m6); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_9_4(b0) \ + do \ + { \ + t0 = _mm256_alignr_epi8(m7, m5, 8); \ + t1 = _mm256_unpacklo_epi64(m6, m0); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_10_1(b0) \ + do \ + { \ + t0 = _mm256_unpacklo_epi64(m0, m1); \ + t1 = _mm256_unpacklo_epi64(m2, m3); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_10_2(b0) \ + do \ + { \ + t0 = _mm256_unpackhi_epi64(m0, m1); \ + t1 = _mm256_unpackhi_epi64(m2, m3); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_10_3(b0) \ + do \ + { \ + t0 = _mm256_unpacklo_epi64(m4, m5); \ + t1 = _mm256_unpacklo_epi64(m6, m7); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_10_4(b0) \ + do \ + { \ + t0 = _mm256_unpackhi_epi64(m4, m5); \ + t1 = _mm256_unpackhi_epi64(m6, m7); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_11_1(b0) \ + do \ + { \ + t0 = _mm256_unpacklo_epi64(m7, m2); \ + t1 = _mm256_unpackhi_epi64(m4, m6); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_11_2(b0) \ + do \ + { \ + t0 = _mm256_unpacklo_epi64(m5, m4); \ + t1 = _mm256_alignr_epi8(m3, m7, 8); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_11_3(b0) \ + do \ + { \ + t0 = _mm256_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \ + t1 = _mm256_unpackhi_epi64(m5, m2); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) + +#define BLAKE2B_LOAD_MSG_11_4(b0) \ + do \ + { \ + t0 = _mm256_unpacklo_epi64(m6, m1); \ + t1 = _mm256_unpackhi_epi64(m3, m1); \ + b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ + } while(0) #endif diff --git a/crypto/blake2b/blake2b-load-sse2.h b/crypto/blake2b/blake2b-load-sse2.h index 8e67421ac..5f7703289 100644 --- a/crypto/blake2b/blake2b-load-sse2.h +++ b/crypto/blake2b/blake2b-load-sse2.h @@ -16,149 +16,149 @@ #ifndef blake2b_load_sse2_H #define blake2b_load_sse2_H -#define LOAD_MSG_0_1(b0, b1) \ - b0 = _mm_set_epi64x(m2, m0); \ - b1 = _mm_set_epi64x(m6, m4) -#define LOAD_MSG_0_2(b0, b1) \ - b0 = _mm_set_epi64x(m3, m1); \ - b1 = _mm_set_epi64x(m7, m5) -#define LOAD_MSG_0_3(b0, b1) \ - b0 = _mm_set_epi64x(m10, m8); \ - b1 = _mm_set_epi64x(m14, m12) -#define LOAD_MSG_0_4(b0, b1) \ - b0 = _mm_set_epi64x(m11, m9); \ - b1 = _mm_set_epi64x(m15, m13) -#define LOAD_MSG_1_1(b0, b1) \ - b0 = _mm_set_epi64x(m4, m14); \ - b1 = _mm_set_epi64x(m13, m9) -#define LOAD_MSG_1_2(b0, b1) \ - b0 = _mm_set_epi64x(m8, m10); \ - b1 = _mm_set_epi64x(m6, m15) -#define LOAD_MSG_1_3(b0, b1) \ - b0 = _mm_set_epi64x(m0, m1); \ - b1 = _mm_set_epi64x(m5, m11) -#define LOAD_MSG_1_4(b0, b1) \ - b0 = _mm_set_epi64x(m2, m12); \ - b1 = _mm_set_epi64x(m3, m7) -#define LOAD_MSG_2_1(b0, b1) \ - b0 = _mm_set_epi64x(m12, m11); \ - b1 = _mm_set_epi64x(m15, m5) -#define LOAD_MSG_2_2(b0, b1) \ - b0 = _mm_set_epi64x(m0, m8); \ - b1 = _mm_set_epi64x(m13, m2) -#define LOAD_MSG_2_3(b0, b1) \ - b0 = _mm_set_epi64x(m3, m10); \ - b1 = _mm_set_epi64x(m9, m7) -#define LOAD_MSG_2_4(b0, b1) \ - b0 = _mm_set_epi64x(m6, m14); \ - b1 = _mm_set_epi64x(m4, m1) -#define LOAD_MSG_3_1(b0, b1) \ - b0 = _mm_set_epi64x(m3, m7); \ - b1 = _mm_set_epi64x(m11, m13) -#define LOAD_MSG_3_2(b0, b1) \ - b0 = _mm_set_epi64x(m1, m9); \ - b1 = _mm_set_epi64x(m14, m12) -#define LOAD_MSG_3_3(b0, b1) \ - b0 = _mm_set_epi64x(m5, m2); \ - b1 = _mm_set_epi64x(m15, m4) -#define LOAD_MSG_3_4(b0, b1) \ - b0 = _mm_set_epi64x(m10, m6); \ - b1 = _mm_set_epi64x(m8, m0) -#define LOAD_MSG_4_1(b0, b1) \ - b0 = _mm_set_epi64x(m5, m9); \ - b1 = _mm_set_epi64x(m10, m2) -#define LOAD_MSG_4_2(b0, b1) \ - b0 = _mm_set_epi64x(m7, m0); \ - b1 = _mm_set_epi64x(m15, m4) -#define LOAD_MSG_4_3(b0, b1) \ - b0 = _mm_set_epi64x(m11, m14); \ - b1 = _mm_set_epi64x(m3, m6) -#define LOAD_MSG_4_4(b0, b1) \ - b0 = _mm_set_epi64x(m12, m1); \ - b1 = _mm_set_epi64x(m13, m8) -#define LOAD_MSG_5_1(b0, b1) \ - b0 = _mm_set_epi64x(m6, m2); \ - b1 = _mm_set_epi64x(m8, m0) -#define LOAD_MSG_5_2(b0, b1) \ - b0 = _mm_set_epi64x(m10, m12); \ - b1 = _mm_set_epi64x(m3, m11) -#define LOAD_MSG_5_3(b0, b1) \ - b0 = _mm_set_epi64x(m7, m4); \ - b1 = _mm_set_epi64x(m1, m15) -#define LOAD_MSG_5_4(b0, b1) \ - b0 = _mm_set_epi64x(m5, m13); \ - b1 = _mm_set_epi64x(m9, m14) -#define LOAD_MSG_6_1(b0, b1) \ - b0 = _mm_set_epi64x(m1, m12); \ - b1 = _mm_set_epi64x(m4, m14) -#define LOAD_MSG_6_2(b0, b1) \ - b0 = _mm_set_epi64x(m15, m5); \ - b1 = _mm_set_epi64x(m10, m13) -#define LOAD_MSG_6_3(b0, b1) \ - b0 = _mm_set_epi64x(m6, m0); \ - b1 = _mm_set_epi64x(m8, m9) -#define LOAD_MSG_6_4(b0, b1) \ - b0 = _mm_set_epi64x(m3, m7); \ - b1 = _mm_set_epi64x(m11, m2) -#define LOAD_MSG_7_1(b0, b1) \ - b0 = _mm_set_epi64x(m7, m13); \ - b1 = _mm_set_epi64x(m3, m12) -#define LOAD_MSG_7_2(b0, b1) \ - b0 = _mm_set_epi64x(m14, m11); \ - b1 = _mm_set_epi64x(m9, m1) -#define LOAD_MSG_7_3(b0, b1) \ - b0 = _mm_set_epi64x(m15, m5); \ - b1 = _mm_set_epi64x(m2, m8) -#define LOAD_MSG_7_4(b0, b1) \ - b0 = _mm_set_epi64x(m4, m0); \ - b1 = _mm_set_epi64x(m10, m6) -#define LOAD_MSG_8_1(b0, b1) \ - b0 = _mm_set_epi64x(m14, m6); \ - b1 = _mm_set_epi64x(m0, m11) -#define LOAD_MSG_8_2(b0, b1) \ - b0 = _mm_set_epi64x(m9, m15); \ - b1 = _mm_set_epi64x(m8, m3) -#define LOAD_MSG_8_3(b0, b1) \ - b0 = _mm_set_epi64x(m13, m12); \ - b1 = _mm_set_epi64x(m10, m1) -#define LOAD_MSG_8_4(b0, b1) \ - b0 = _mm_set_epi64x(m7, m2); \ - b1 = _mm_set_epi64x(m5, m4) -#define LOAD_MSG_9_1(b0, b1) \ - b0 = _mm_set_epi64x(m8, m10); \ - b1 = _mm_set_epi64x(m1, m7) -#define LOAD_MSG_9_2(b0, b1) \ - b0 = _mm_set_epi64x(m4, m2); \ - b1 = _mm_set_epi64x(m5, m6) -#define LOAD_MSG_9_3(b0, b1) \ - b0 = _mm_set_epi64x(m9, m15); \ - b1 = _mm_set_epi64x(m13, m3) -#define LOAD_MSG_9_4(b0, b1) \ - b0 = _mm_set_epi64x(m14, m11); \ - b1 = _mm_set_epi64x(m0, m12) -#define LOAD_MSG_10_1(b0, b1) \ - b0 = _mm_set_epi64x(m2, m0); \ - b1 = _mm_set_epi64x(m6, m4) -#define LOAD_MSG_10_2(b0, b1) \ - b0 = _mm_set_epi64x(m3, m1); \ - b1 = _mm_set_epi64x(m7, m5) -#define LOAD_MSG_10_3(b0, b1) \ - b0 = _mm_set_epi64x(m10, m8); \ - b1 = _mm_set_epi64x(m14, m12) -#define LOAD_MSG_10_4(b0, b1) \ - b0 = _mm_set_epi64x(m11, m9); \ - b1 = _mm_set_epi64x(m15, m13) -#define LOAD_MSG_11_1(b0, b1) \ - b0 = _mm_set_epi64x(m4, m14); \ - b1 = _mm_set_epi64x(m13, m9) -#define LOAD_MSG_11_2(b0, b1) \ - b0 = _mm_set_epi64x(m8, m10); \ - b1 = _mm_set_epi64x(m6, m15) -#define LOAD_MSG_11_3(b0, b1) \ - b0 = _mm_set_epi64x(m0, m1); \ - b1 = _mm_set_epi64x(m5, m11) -#define LOAD_MSG_11_4(b0, b1) \ - b0 = _mm_set_epi64x(m2, m12); \ - b1 = _mm_set_epi64x(m3, m7) +#define LOAD_MSG_0_1(b0, b1) \ + b0 = _mm_set_epi64x(m2, m0); \ + b1 = _mm_set_epi64x(m6, m4) +#define LOAD_MSG_0_2(b0, b1) \ + b0 = _mm_set_epi64x(m3, m1); \ + b1 = _mm_set_epi64x(m7, m5) +#define LOAD_MSG_0_3(b0, b1) \ + b0 = _mm_set_epi64x(m10, m8); \ + b1 = _mm_set_epi64x(m14, m12) +#define LOAD_MSG_0_4(b0, b1) \ + b0 = _mm_set_epi64x(m11, m9); \ + b1 = _mm_set_epi64x(m15, m13) +#define LOAD_MSG_1_1(b0, b1) \ + b0 = _mm_set_epi64x(m4, m14); \ + b1 = _mm_set_epi64x(m13, m9) +#define LOAD_MSG_1_2(b0, b1) \ + b0 = _mm_set_epi64x(m8, m10); \ + b1 = _mm_set_epi64x(m6, m15) +#define LOAD_MSG_1_3(b0, b1) \ + b0 = _mm_set_epi64x(m0, m1); \ + b1 = _mm_set_epi64x(m5, m11) +#define LOAD_MSG_1_4(b0, b1) \ + b0 = _mm_set_epi64x(m2, m12); \ + b1 = _mm_set_epi64x(m3, m7) +#define LOAD_MSG_2_1(b0, b1) \ + b0 = _mm_set_epi64x(m12, m11); \ + b1 = _mm_set_epi64x(m15, m5) +#define LOAD_MSG_2_2(b0, b1) \ + b0 = _mm_set_epi64x(m0, m8); \ + b1 = _mm_set_epi64x(m13, m2) +#define LOAD_MSG_2_3(b0, b1) \ + b0 = _mm_set_epi64x(m3, m10); \ + b1 = _mm_set_epi64x(m9, m7) +#define LOAD_MSG_2_4(b0, b1) \ + b0 = _mm_set_epi64x(m6, m14); \ + b1 = _mm_set_epi64x(m4, m1) +#define LOAD_MSG_3_1(b0, b1) \ + b0 = _mm_set_epi64x(m3, m7); \ + b1 = _mm_set_epi64x(m11, m13) +#define LOAD_MSG_3_2(b0, b1) \ + b0 = _mm_set_epi64x(m1, m9); \ + b1 = _mm_set_epi64x(m14, m12) +#define LOAD_MSG_3_3(b0, b1) \ + b0 = _mm_set_epi64x(m5, m2); \ + b1 = _mm_set_epi64x(m15, m4) +#define LOAD_MSG_3_4(b0, b1) \ + b0 = _mm_set_epi64x(m10, m6); \ + b1 = _mm_set_epi64x(m8, m0) +#define LOAD_MSG_4_1(b0, b1) \ + b0 = _mm_set_epi64x(m5, m9); \ + b1 = _mm_set_epi64x(m10, m2) +#define LOAD_MSG_4_2(b0, b1) \ + b0 = _mm_set_epi64x(m7, m0); \ + b1 = _mm_set_epi64x(m15, m4) +#define LOAD_MSG_4_3(b0, b1) \ + b0 = _mm_set_epi64x(m11, m14); \ + b1 = _mm_set_epi64x(m3, m6) +#define LOAD_MSG_4_4(b0, b1) \ + b0 = _mm_set_epi64x(m12, m1); \ + b1 = _mm_set_epi64x(m13, m8) +#define LOAD_MSG_5_1(b0, b1) \ + b0 = _mm_set_epi64x(m6, m2); \ + b1 = _mm_set_epi64x(m8, m0) +#define LOAD_MSG_5_2(b0, b1) \ + b0 = _mm_set_epi64x(m10, m12); \ + b1 = _mm_set_epi64x(m3, m11) +#define LOAD_MSG_5_3(b0, b1) \ + b0 = _mm_set_epi64x(m7, m4); \ + b1 = _mm_set_epi64x(m1, m15) +#define LOAD_MSG_5_4(b0, b1) \ + b0 = _mm_set_epi64x(m5, m13); \ + b1 = _mm_set_epi64x(m9, m14) +#define LOAD_MSG_6_1(b0, b1) \ + b0 = _mm_set_epi64x(m1, m12); \ + b1 = _mm_set_epi64x(m4, m14) +#define LOAD_MSG_6_2(b0, b1) \ + b0 = _mm_set_epi64x(m15, m5); \ + b1 = _mm_set_epi64x(m10, m13) +#define LOAD_MSG_6_3(b0, b1) \ + b0 = _mm_set_epi64x(m6, m0); \ + b1 = _mm_set_epi64x(m8, m9) +#define LOAD_MSG_6_4(b0, b1) \ + b0 = _mm_set_epi64x(m3, m7); \ + b1 = _mm_set_epi64x(m11, m2) +#define LOAD_MSG_7_1(b0, b1) \ + b0 = _mm_set_epi64x(m7, m13); \ + b1 = _mm_set_epi64x(m3, m12) +#define LOAD_MSG_7_2(b0, b1) \ + b0 = _mm_set_epi64x(m14, m11); \ + b1 = _mm_set_epi64x(m9, m1) +#define LOAD_MSG_7_3(b0, b1) \ + b0 = _mm_set_epi64x(m15, m5); \ + b1 = _mm_set_epi64x(m2, m8) +#define LOAD_MSG_7_4(b0, b1) \ + b0 = _mm_set_epi64x(m4, m0); \ + b1 = _mm_set_epi64x(m10, m6) +#define LOAD_MSG_8_1(b0, b1) \ + b0 = _mm_set_epi64x(m14, m6); \ + b1 = _mm_set_epi64x(m0, m11) +#define LOAD_MSG_8_2(b0, b1) \ + b0 = _mm_set_epi64x(m9, m15); \ + b1 = _mm_set_epi64x(m8, m3) +#define LOAD_MSG_8_3(b0, b1) \ + b0 = _mm_set_epi64x(m13, m12); \ + b1 = _mm_set_epi64x(m10, m1) +#define LOAD_MSG_8_4(b0, b1) \ + b0 = _mm_set_epi64x(m7, m2); \ + b1 = _mm_set_epi64x(m5, m4) +#define LOAD_MSG_9_1(b0, b1) \ + b0 = _mm_set_epi64x(m8, m10); \ + b1 = _mm_set_epi64x(m1, m7) +#define LOAD_MSG_9_2(b0, b1) \ + b0 = _mm_set_epi64x(m4, m2); \ + b1 = _mm_set_epi64x(m5, m6) +#define LOAD_MSG_9_3(b0, b1) \ + b0 = _mm_set_epi64x(m9, m15); \ + b1 = _mm_set_epi64x(m13, m3) +#define LOAD_MSG_9_4(b0, b1) \ + b0 = _mm_set_epi64x(m14, m11); \ + b1 = _mm_set_epi64x(m0, m12) +#define LOAD_MSG_10_1(b0, b1) \ + b0 = _mm_set_epi64x(m2, m0); \ + b1 = _mm_set_epi64x(m6, m4) +#define LOAD_MSG_10_2(b0, b1) \ + b0 = _mm_set_epi64x(m3, m1); \ + b1 = _mm_set_epi64x(m7, m5) +#define LOAD_MSG_10_3(b0, b1) \ + b0 = _mm_set_epi64x(m10, m8); \ + b1 = _mm_set_epi64x(m14, m12) +#define LOAD_MSG_10_4(b0, b1) \ + b0 = _mm_set_epi64x(m11, m9); \ + b1 = _mm_set_epi64x(m15, m13) +#define LOAD_MSG_11_1(b0, b1) \ + b0 = _mm_set_epi64x(m4, m14); \ + b1 = _mm_set_epi64x(m13, m9) +#define LOAD_MSG_11_2(b0, b1) \ + b0 = _mm_set_epi64x(m8, m10); \ + b1 = _mm_set_epi64x(m6, m15) +#define LOAD_MSG_11_3(b0, b1) \ + b0 = _mm_set_epi64x(m0, m1); \ + b1 = _mm_set_epi64x(m5, m11) +#define LOAD_MSG_11_4(b0, b1) \ + b0 = _mm_set_epi64x(m2, m12); \ + b1 = _mm_set_epi64x(m3, m7) #endif diff --git a/crypto/blake2b/blake2b-load-sse41.h b/crypto/blake2b/blake2b-load-sse41.h index 31745fc13..6efc7869b 100644 --- a/crypto/blake2b/blake2b-load-sse41.h +++ b/crypto/blake2b/blake2b-load-sse41.h @@ -16,292 +16,340 @@ #ifndef blake2b_load_sse41_H #define blake2b_load_sse41_H -#define LOAD_MSG_0_1(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m0, m1); \ - b1 = _mm_unpacklo_epi64(m2, m3); \ - } while (0) - -#define LOAD_MSG_0_2(b0, b1) \ - do { \ - b0 = _mm_unpackhi_epi64(m0, m1); \ - b1 = _mm_unpackhi_epi64(m2, m3); \ - } while (0) - -#define LOAD_MSG_0_3(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m4, m5); \ - b1 = _mm_unpacklo_epi64(m6, m7); \ - } while (0) - -#define LOAD_MSG_0_4(b0, b1) \ - do { \ - b0 = _mm_unpackhi_epi64(m4, m5); \ - b1 = _mm_unpackhi_epi64(m6, m7); \ - } while (0) - -#define LOAD_MSG_1_1(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m7, m2); \ - b1 = _mm_unpackhi_epi64(m4, m6); \ - } while (0) - -#define LOAD_MSG_1_2(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m5, m4); \ - b1 = _mm_alignr_epi8(m3, m7, 8); \ - } while (0) - -#define LOAD_MSG_1_3(b0, b1) \ - do { \ - b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \ - b1 = _mm_unpackhi_epi64(m5, m2); \ - } while (0) - -#define LOAD_MSG_1_4(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m6, m1); \ - b1 = _mm_unpackhi_epi64(m3, m1); \ - } while (0) - -#define LOAD_MSG_2_1(b0, b1) \ - do { \ - b0 = _mm_alignr_epi8(m6, m5, 8); \ - b1 = _mm_unpackhi_epi64(m2, m7); \ - } while (0) - -#define LOAD_MSG_2_2(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m4, m0); \ - b1 = _mm_blend_epi16(m1, m6, 0xF0); \ - } while (0) - -#define LOAD_MSG_2_3(b0, b1) \ - do { \ - b0 = _mm_blend_epi16(m5, m1, 0xF0); \ - b1 = _mm_unpackhi_epi64(m3, m4); \ - } while (0) - -#define LOAD_MSG_2_4(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m7, m3); \ - b1 = _mm_alignr_epi8(m2, m0, 8); \ - } while (0) - -#define LOAD_MSG_3_1(b0, b1) \ - do { \ - b0 = _mm_unpackhi_epi64(m3, m1); \ - b1 = _mm_unpackhi_epi64(m6, m5); \ - } while (0) - -#define LOAD_MSG_3_2(b0, b1) \ - do { \ - b0 = _mm_unpackhi_epi64(m4, m0); \ - b1 = _mm_unpacklo_epi64(m6, m7); \ - } while (0) - -#define LOAD_MSG_3_3(b0, b1) \ - do { \ - b0 = _mm_blend_epi16(m1, m2, 0xF0); \ - b1 = _mm_blend_epi16(m2, m7, 0xF0); \ - } while (0) - -#define LOAD_MSG_3_4(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m3, m5); \ - b1 = _mm_unpacklo_epi64(m0, m4); \ - } while (0) - -#define LOAD_MSG_4_1(b0, b1) \ - do { \ - b0 = _mm_unpackhi_epi64(m4, m2); \ - b1 = _mm_unpacklo_epi64(m1, m5); \ - } while (0) - -#define LOAD_MSG_4_2(b0, b1) \ - do { \ - b0 = _mm_blend_epi16(m0, m3, 0xF0); \ - b1 = _mm_blend_epi16(m2, m7, 0xF0); \ - } while (0) - -#define LOAD_MSG_4_3(b0, b1) \ - do { \ - b0 = _mm_blend_epi16(m7, m5, 0xF0); \ - b1 = _mm_blend_epi16(m3, m1, 0xF0); \ - } while (0) - -#define LOAD_MSG_4_4(b0, b1) \ - do { \ - b0 = _mm_alignr_epi8(m6, m0, 8); \ - b1 = _mm_blend_epi16(m4, m6, 0xF0); \ - } while (0) - -#define LOAD_MSG_5_1(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m1, m3); \ - b1 = _mm_unpacklo_epi64(m0, m4); \ - } while (0) - -#define LOAD_MSG_5_2(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m6, m5); \ - b1 = _mm_unpackhi_epi64(m5, m1); \ - } while (0) - -#define LOAD_MSG_5_3(b0, b1) \ - do { \ - b0 = _mm_blend_epi16(m2, m3, 0xF0); \ - b1 = _mm_unpackhi_epi64(m7, m0); \ - } while (0) - -#define LOAD_MSG_5_4(b0, b1) \ - do { \ - b0 = _mm_unpackhi_epi64(m6, m2); \ - b1 = _mm_blend_epi16(m7, m4, 0xF0); \ - } while (0) - -#define LOAD_MSG_6_1(b0, b1) \ - do { \ - b0 = _mm_blend_epi16(m6, m0, 0xF0); \ - b1 = _mm_unpacklo_epi64(m7, m2); \ - } while (0) - -#define LOAD_MSG_6_2(b0, b1) \ - do { \ - b0 = _mm_unpackhi_epi64(m2, m7); \ - b1 = _mm_alignr_epi8(m5, m6, 8); \ - } while (0) - -#define LOAD_MSG_6_3(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m0, m3); \ - b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1, 0, 3, 2)); \ - } while (0) - -#define LOAD_MSG_6_4(b0, b1) \ - do { \ - b0 = _mm_unpackhi_epi64(m3, m1); \ - b1 = _mm_blend_epi16(m1, m5, 0xF0); \ - } while (0) - -#define LOAD_MSG_7_1(b0, b1) \ - do { \ - b0 = _mm_unpackhi_epi64(m6, m3); \ - b1 = _mm_blend_epi16(m6, m1, 0xF0); \ - } while (0) - -#define LOAD_MSG_7_2(b0, b1) \ - do { \ - b0 = _mm_alignr_epi8(m7, m5, 8); \ - b1 = _mm_unpackhi_epi64(m0, m4); \ - } while (0) - -#define LOAD_MSG_7_3(b0, b1) \ - do { \ - b0 = _mm_unpackhi_epi64(m2, m7); \ - b1 = _mm_unpacklo_epi64(m4, m1); \ - } while (0) - -#define LOAD_MSG_7_4(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m0, m2); \ - b1 = _mm_unpacklo_epi64(m3, m5); \ - } while (0) - -#define LOAD_MSG_8_1(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m3, m7); \ - b1 = _mm_alignr_epi8(m0, m5, 8); \ - } while (0) - -#define LOAD_MSG_8_2(b0, b1) \ - do { \ - b0 = _mm_unpackhi_epi64(m7, m4); \ - b1 = _mm_alignr_epi8(m4, m1, 8); \ - } while (0) - -#define LOAD_MSG_8_3(b0, b1) \ - do { \ - b0 = m6; \ - b1 = _mm_alignr_epi8(m5, m0, 8); \ - } while (0) - -#define LOAD_MSG_8_4(b0, b1) \ - do { \ - b0 = _mm_blend_epi16(m1, m3, 0xF0); \ - b1 = m2; \ - } while (0) - -#define LOAD_MSG_9_1(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m5, m4); \ - b1 = _mm_unpackhi_epi64(m3, m0); \ - } while (0) - -#define LOAD_MSG_9_2(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m1, m2); \ - b1 = _mm_blend_epi16(m3, m2, 0xF0); \ - } while (0) - -#define LOAD_MSG_9_3(b0, b1) \ - do { \ - b0 = _mm_unpackhi_epi64(m7, m4); \ - b1 = _mm_unpackhi_epi64(m1, m6); \ - } while (0) - -#define LOAD_MSG_9_4(b0, b1) \ - do { \ - b0 = _mm_alignr_epi8(m7, m5, 8); \ - b1 = _mm_unpacklo_epi64(m6, m0); \ - } while (0) - -#define LOAD_MSG_10_1(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m0, m1); \ - b1 = _mm_unpacklo_epi64(m2, m3); \ - } while (0) - -#define LOAD_MSG_10_2(b0, b1) \ - do { \ - b0 = _mm_unpackhi_epi64(m0, m1); \ - b1 = _mm_unpackhi_epi64(m2, m3); \ - } while (0) - -#define LOAD_MSG_10_3(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m4, m5); \ - b1 = _mm_unpacklo_epi64(m6, m7); \ - } while (0) - -#define LOAD_MSG_10_4(b0, b1) \ - do { \ - b0 = _mm_unpackhi_epi64(m4, m5); \ - b1 = _mm_unpackhi_epi64(m6, m7); \ - } while (0) - -#define LOAD_MSG_11_1(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m7, m2); \ - b1 = _mm_unpackhi_epi64(m4, m6); \ - } while (0) - -#define LOAD_MSG_11_2(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m5, m4); \ - b1 = _mm_alignr_epi8(m3, m7, 8); \ - } while (0) - -#define LOAD_MSG_11_3(b0, b1) \ - do { \ - b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \ - b1 = _mm_unpackhi_epi64(m5, m2); \ - } while (0) - -#define LOAD_MSG_11_4(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m6, m1); \ - b1 = _mm_unpackhi_epi64(m3, m1); \ - } while (0) +#define LOAD_MSG_0_1(b0, b1) \ + do \ + { \ + b0 = _mm_unpacklo_epi64(m0, m1); \ + b1 = _mm_unpacklo_epi64(m2, m3); \ + } while(0) + +#define LOAD_MSG_0_2(b0, b1) \ + do \ + { \ + b0 = _mm_unpackhi_epi64(m0, m1); \ + b1 = _mm_unpackhi_epi64(m2, m3); \ + } while(0) + +#define LOAD_MSG_0_3(b0, b1) \ + do \ + { \ + b0 = _mm_unpacklo_epi64(m4, m5); \ + b1 = _mm_unpacklo_epi64(m6, m7); \ + } while(0) + +#define LOAD_MSG_0_4(b0, b1) \ + do \ + { \ + b0 = _mm_unpackhi_epi64(m4, m5); \ + b1 = _mm_unpackhi_epi64(m6, m7); \ + } while(0) + +#define LOAD_MSG_1_1(b0, b1) \ + do \ + { \ + b0 = _mm_unpacklo_epi64(m7, m2); \ + b1 = _mm_unpackhi_epi64(m4, m6); \ + } while(0) + +#define LOAD_MSG_1_2(b0, b1) \ + do \ + { \ + b0 = _mm_unpacklo_epi64(m5, m4); \ + b1 = _mm_alignr_epi8(m3, m7, 8); \ + } while(0) + +#define LOAD_MSG_1_3(b0, b1) \ + do \ + { \ + b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \ + b1 = _mm_unpackhi_epi64(m5, m2); \ + } while(0) + +#define LOAD_MSG_1_4(b0, b1) \ + do \ + { \ + b0 = _mm_unpacklo_epi64(m6, m1); \ + b1 = _mm_unpackhi_epi64(m3, m1); \ + } while(0) + +#define LOAD_MSG_2_1(b0, b1) \ + do \ + { \ + b0 = _mm_alignr_epi8(m6, m5, 8); \ + b1 = _mm_unpackhi_epi64(m2, m7); \ + } while(0) + +#define LOAD_MSG_2_2(b0, b1) \ + do \ + { \ + b0 = _mm_unpacklo_epi64(m4, m0); \ + b1 = _mm_blend_epi16(m1, m6, 0xF0); \ + } while(0) + +#define LOAD_MSG_2_3(b0, b1) \ + do \ + { \ + b0 = _mm_blend_epi16(m5, m1, 0xF0); \ + b1 = _mm_unpackhi_epi64(m3, m4); \ + } while(0) + +#define LOAD_MSG_2_4(b0, b1) \ + do \ + { \ + b0 = _mm_unpacklo_epi64(m7, m3); \ + b1 = _mm_alignr_epi8(m2, m0, 8); \ + } while(0) + +#define LOAD_MSG_3_1(b0, b1) \ + do \ + { \ + b0 = _mm_unpackhi_epi64(m3, m1); \ + b1 = _mm_unpackhi_epi64(m6, m5); \ + } while(0) + +#define LOAD_MSG_3_2(b0, b1) \ + do \ + { \ + b0 = _mm_unpackhi_epi64(m4, m0); \ + b1 = _mm_unpacklo_epi64(m6, m7); \ + } while(0) + +#define LOAD_MSG_3_3(b0, b1) \ + do \ + { \ + b0 = _mm_blend_epi16(m1, m2, 0xF0); \ + b1 = _mm_blend_epi16(m2, m7, 0xF0); \ + } while(0) + +#define LOAD_MSG_3_4(b0, b1) \ + do \ + { \ + b0 = _mm_unpacklo_epi64(m3, m5); \ + b1 = _mm_unpacklo_epi64(m0, m4); \ + } while(0) + +#define LOAD_MSG_4_1(b0, b1) \ + do \ + { \ + b0 = _mm_unpackhi_epi64(m4, m2); \ + b1 = _mm_unpacklo_epi64(m1, m5); \ + } while(0) + +#define LOAD_MSG_4_2(b0, b1) \ + do \ + { \ + b0 = _mm_blend_epi16(m0, m3, 0xF0); \ + b1 = _mm_blend_epi16(m2, m7, 0xF0); \ + } while(0) + +#define LOAD_MSG_4_3(b0, b1) \ + do \ + { \ + b0 = _mm_blend_epi16(m7, m5, 0xF0); \ + b1 = _mm_blend_epi16(m3, m1, 0xF0); \ + } while(0) + +#define LOAD_MSG_4_4(b0, b1) \ + do \ + { \ + b0 = _mm_alignr_epi8(m6, m0, 8); \ + b1 = _mm_blend_epi16(m4, m6, 0xF0); \ + } while(0) + +#define LOAD_MSG_5_1(b0, b1) \ + do \ + { \ + b0 = _mm_unpacklo_epi64(m1, m3); \ + b1 = _mm_unpacklo_epi64(m0, m4); \ + } while(0) + +#define LOAD_MSG_5_2(b0, b1) \ + do \ + { \ + b0 = _mm_unpacklo_epi64(m6, m5); \ + b1 = _mm_unpackhi_epi64(m5, m1); \ + } while(0) + +#define LOAD_MSG_5_3(b0, b1) \ + do \ + { \ + b0 = _mm_blend_epi16(m2, m3, 0xF0); \ + b1 = _mm_unpackhi_epi64(m7, m0); \ + } while(0) + +#define LOAD_MSG_5_4(b0, b1) \ + do \ + { \ + b0 = _mm_unpackhi_epi64(m6, m2); \ + b1 = _mm_blend_epi16(m7, m4, 0xF0); \ + } while(0) + +#define LOAD_MSG_6_1(b0, b1) \ + do \ + { \ + b0 = _mm_blend_epi16(m6, m0, 0xF0); \ + b1 = _mm_unpacklo_epi64(m7, m2); \ + } while(0) + +#define LOAD_MSG_6_2(b0, b1) \ + do \ + { \ + b0 = _mm_unpackhi_epi64(m2, m7); \ + b1 = _mm_alignr_epi8(m5, m6, 8); \ + } while(0) + +#define LOAD_MSG_6_3(b0, b1) \ + do \ + { \ + b0 = _mm_unpacklo_epi64(m0, m3); \ + b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1, 0, 3, 2)); \ + } while(0) + +#define LOAD_MSG_6_4(b0, b1) \ + do \ + { \ + b0 = _mm_unpackhi_epi64(m3, m1); \ + b1 = _mm_blend_epi16(m1, m5, 0xF0); \ + } while(0) + +#define LOAD_MSG_7_1(b0, b1) \ + do \ + { \ + b0 = _mm_unpackhi_epi64(m6, m3); \ + b1 = _mm_blend_epi16(m6, m1, 0xF0); \ + } while(0) + +#define LOAD_MSG_7_2(b0, b1) \ + do \ + { \ + b0 = _mm_alignr_epi8(m7, m5, 8); \ + b1 = _mm_unpackhi_epi64(m0, m4); \ + } while(0) + +#define LOAD_MSG_7_3(b0, b1) \ + do \ + { \ + b0 = _mm_unpackhi_epi64(m2, m7); \ + b1 = _mm_unpacklo_epi64(m4, m1); \ + } while(0) + +#define LOAD_MSG_7_4(b0, b1) \ + do \ + { \ + b0 = _mm_unpacklo_epi64(m0, m2); \ + b1 = _mm_unpacklo_epi64(m3, m5); \ + } while(0) + +#define LOAD_MSG_8_1(b0, b1) \ + do \ + { \ + b0 = _mm_unpacklo_epi64(m3, m7); \ + b1 = _mm_alignr_epi8(m0, m5, 8); \ + } while(0) + +#define LOAD_MSG_8_2(b0, b1) \ + do \ + { \ + b0 = _mm_unpackhi_epi64(m7, m4); \ + b1 = _mm_alignr_epi8(m4, m1, 8); \ + } while(0) + +#define LOAD_MSG_8_3(b0, b1) \ + do \ + { \ + b0 = m6; \ + b1 = _mm_alignr_epi8(m5, m0, 8); \ + } while(0) + +#define LOAD_MSG_8_4(b0, b1) \ + do \ + { \ + b0 = _mm_blend_epi16(m1, m3, 0xF0); \ + b1 = m2; \ + } while(0) + +#define LOAD_MSG_9_1(b0, b1) \ + do \ + { \ + b0 = _mm_unpacklo_epi64(m5, m4); \ + b1 = _mm_unpackhi_epi64(m3, m0); \ + } while(0) + +#define LOAD_MSG_9_2(b0, b1) \ + do \ + { \ + b0 = _mm_unpacklo_epi64(m1, m2); \ + b1 = _mm_blend_epi16(m3, m2, 0xF0); \ + } while(0) + +#define LOAD_MSG_9_3(b0, b1) \ + do \ + { \ + b0 = _mm_unpackhi_epi64(m7, m4); \ + b1 = _mm_unpackhi_epi64(m1, m6); \ + } while(0) + +#define LOAD_MSG_9_4(b0, b1) \ + do \ + { \ + b0 = _mm_alignr_epi8(m7, m5, 8); \ + b1 = _mm_unpacklo_epi64(m6, m0); \ + } while(0) + +#define LOAD_MSG_10_1(b0, b1) \ + do \ + { \ + b0 = _mm_unpacklo_epi64(m0, m1); \ + b1 = _mm_unpacklo_epi64(m2, m3); \ + } while(0) + +#define LOAD_MSG_10_2(b0, b1) \ + do \ + { \ + b0 = _mm_unpackhi_epi64(m0, m1); \ + b1 = _mm_unpackhi_epi64(m2, m3); \ + } while(0) + +#define LOAD_MSG_10_3(b0, b1) \ + do \ + { \ + b0 = _mm_unpacklo_epi64(m4, m5); \ + b1 = _mm_unpacklo_epi64(m6, m7); \ + } while(0) + +#define LOAD_MSG_10_4(b0, b1) \ + do \ + { \ + b0 = _mm_unpackhi_epi64(m4, m5); \ + b1 = _mm_unpackhi_epi64(m6, m7); \ + } while(0) + +#define LOAD_MSG_11_1(b0, b1) \ + do \ + { \ + b0 = _mm_unpacklo_epi64(m7, m2); \ + b1 = _mm_unpackhi_epi64(m4, m6); \ + } while(0) + +#define LOAD_MSG_11_2(b0, b1) \ + do \ + { \ + b0 = _mm_unpacklo_epi64(m5, m4); \ + b1 = _mm_alignr_epi8(m3, m7, 8); \ + } while(0) + +#define LOAD_MSG_11_3(b0, b1) \ + do \ + { \ + b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \ + b1 = _mm_unpackhi_epi64(m5, m2); \ + } while(0) + +#define LOAD_MSG_11_4(b0, b1) \ + do \ + { \ + b0 = _mm_unpacklo_epi64(m6, m1); \ + b1 = _mm_unpackhi_epi64(m3, m1); \ + } while(0) #endif diff --git a/crypto/chacha20/dolbeau/chacha20_dolbeau-avx2.c b/crypto/chacha20/dolbeau/chacha20_dolbeau-avx2.c index 9960112d6..e47875d83 100644 --- a/crypto/chacha20/dolbeau/chacha20_dolbeau-avx2.c +++ b/crypto/chacha20/dolbeau/chacha20_dolbeau-avx2.c @@ -24,15 +24,17 @@ #ifndef __amd64__ #ifdef __clang__ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) #else -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __target__("sse2"))) +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __target__("sse2"))) #endif static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi64_si128(long long __a) { - return (__m128i){ __a, 0 }; + return (__m128i){__a, 0}; } #endif diff --git a/crypto/chacha20/dolbeau/u0.h b/crypto/chacha20/dolbeau/u0.h index 17c3ff8e0..bb4577f73 100644 --- a/crypto/chacha20/dolbeau/u0.h +++ b/crypto/chacha20/dolbeau/u0.h @@ -1,86 +1,89 @@ -if (bytes > 0) { - __m128i x_0, x_1, x_2, x_3; - __m128i t_1; - const __m128i rot16 = - _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2); - const __m128i rot8 = - _mm_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3); - uint8_t partialblock[64]; - - unsigned int i; - - x_0 = _mm_loadu_si128((__m128i*) (x + 0)); - x_1 = _mm_loadu_si128((__m128i*) (x + 4)); - x_2 = _mm_loadu_si128((__m128i*) (x + 8)); - x_3 = _mm_loadu_si128((__m128i*) (x + 12)); - - for (i = 0; i < ROUNDS; i += 2) { - x_0 = _mm_add_epi32(x_0, x_1); - x_3 = _mm_xor_si128(x_3, x_0); - x_3 = _mm_shuffle_epi8(x_3, rot16); - - x_2 = _mm_add_epi32(x_2, x_3); - x_1 = _mm_xor_si128(x_1, x_2); - - t_1 = x_1; - x_1 = _mm_slli_epi32(x_1, 12); - t_1 = _mm_srli_epi32(t_1, 20); - x_1 = _mm_xor_si128(x_1, t_1); - - x_0 = _mm_add_epi32(x_0, x_1); - x_3 = _mm_xor_si128(x_3, x_0); - x_0 = _mm_shuffle_epi32(x_0, 0x93); - x_3 = _mm_shuffle_epi8(x_3, rot8); - - x_2 = _mm_add_epi32(x_2, x_3); - x_3 = _mm_shuffle_epi32(x_3, 0x4e); - x_1 = _mm_xor_si128(x_1, x_2); - x_2 = _mm_shuffle_epi32(x_2, 0x39); - - t_1 = x_1; - x_1 = _mm_slli_epi32(x_1, 7); - t_1 = _mm_srli_epi32(t_1, 25); - x_1 = _mm_xor_si128(x_1, t_1); - - x_0 = _mm_add_epi32(x_0, x_1); - x_3 = _mm_xor_si128(x_3, x_0); - x_3 = _mm_shuffle_epi8(x_3, rot16); - - x_2 = _mm_add_epi32(x_2, x_3); - x_1 = _mm_xor_si128(x_1, x_2); - - t_1 = x_1; - x_1 = _mm_slli_epi32(x_1, 12); - t_1 = _mm_srli_epi32(t_1, 20); - x_1 = _mm_xor_si128(x_1, t_1); - - x_0 = _mm_add_epi32(x_0, x_1); - x_3 = _mm_xor_si128(x_3, x_0); - x_0 = _mm_shuffle_epi32(x_0, 0x39); - x_3 = _mm_shuffle_epi8(x_3, rot8); - - x_2 = _mm_add_epi32(x_2, x_3); - x_3 = _mm_shuffle_epi32(x_3, 0x4e); - x_1 = _mm_xor_si128(x_1, x_2); - x_2 = _mm_shuffle_epi32(x_2, 0x93); - - t_1 = x_1; - x_1 = _mm_slli_epi32(x_1, 7); - t_1 = _mm_srli_epi32(t_1, 25); - x_1 = _mm_xor_si128(x_1, t_1); - } - x_0 = _mm_add_epi32(x_0, _mm_loadu_si128((__m128i*) (x + 0))); - x_1 = _mm_add_epi32(x_1, _mm_loadu_si128((__m128i*) (x + 4))); - x_2 = _mm_add_epi32(x_2, _mm_loadu_si128((__m128i*) (x + 8))); - x_3 = _mm_add_epi32(x_3, _mm_loadu_si128((__m128i*) (x + 12))); - _mm_storeu_si128((__m128i*) (partialblock + 0), x_0); - _mm_storeu_si128((__m128i*) (partialblock + 16), x_1); - _mm_storeu_si128((__m128i*) (partialblock + 32), x_2); - _mm_storeu_si128((__m128i*) (partialblock + 48), x_3); - - for (i = 0; i < bytes; i++) { - c[i] = m[i] ^ partialblock[i]; - } - - sodium_memzero(partialblock, sizeof partialblock); +if(bytes > 0) +{ + __m128i x_0, x_1, x_2, x_3; + __m128i t_1; + const __m128i rot16 = + _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2); + const __m128i rot8 = + _mm_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3); + uint8_t partialblock[64]; + + unsigned int i; + + x_0 = _mm_loadu_si128((__m128i*)(x + 0)); + x_1 = _mm_loadu_si128((__m128i*)(x + 4)); + x_2 = _mm_loadu_si128((__m128i*)(x + 8)); + x_3 = _mm_loadu_si128((__m128i*)(x + 12)); + + for(i = 0; i < ROUNDS; i += 2) + { + x_0 = _mm_add_epi32(x_0, x_1); + x_3 = _mm_xor_si128(x_3, x_0); + x_3 = _mm_shuffle_epi8(x_3, rot16); + + x_2 = _mm_add_epi32(x_2, x_3); + x_1 = _mm_xor_si128(x_1, x_2); + + t_1 = x_1; + x_1 = _mm_slli_epi32(x_1, 12); + t_1 = _mm_srli_epi32(t_1, 20); + x_1 = _mm_xor_si128(x_1, t_1); + + x_0 = _mm_add_epi32(x_0, x_1); + x_3 = _mm_xor_si128(x_3, x_0); + x_0 = _mm_shuffle_epi32(x_0, 0x93); + x_3 = _mm_shuffle_epi8(x_3, rot8); + + x_2 = _mm_add_epi32(x_2, x_3); + x_3 = _mm_shuffle_epi32(x_3, 0x4e); + x_1 = _mm_xor_si128(x_1, x_2); + x_2 = _mm_shuffle_epi32(x_2, 0x39); + + t_1 = x_1; + x_1 = _mm_slli_epi32(x_1, 7); + t_1 = _mm_srli_epi32(t_1, 25); + x_1 = _mm_xor_si128(x_1, t_1); + + x_0 = _mm_add_epi32(x_0, x_1); + x_3 = _mm_xor_si128(x_3, x_0); + x_3 = _mm_shuffle_epi8(x_3, rot16); + + x_2 = _mm_add_epi32(x_2, x_3); + x_1 = _mm_xor_si128(x_1, x_2); + + t_1 = x_1; + x_1 = _mm_slli_epi32(x_1, 12); + t_1 = _mm_srli_epi32(t_1, 20); + x_1 = _mm_xor_si128(x_1, t_1); + + x_0 = _mm_add_epi32(x_0, x_1); + x_3 = _mm_xor_si128(x_3, x_0); + x_0 = _mm_shuffle_epi32(x_0, 0x39); + x_3 = _mm_shuffle_epi8(x_3, rot8); + + x_2 = _mm_add_epi32(x_2, x_3); + x_3 = _mm_shuffle_epi32(x_3, 0x4e); + x_1 = _mm_xor_si128(x_1, x_2); + x_2 = _mm_shuffle_epi32(x_2, 0x93); + + t_1 = x_1; + x_1 = _mm_slli_epi32(x_1, 7); + t_1 = _mm_srli_epi32(t_1, 25); + x_1 = _mm_xor_si128(x_1, t_1); + } + x_0 = _mm_add_epi32(x_0, _mm_loadu_si128((__m128i*)(x + 0))); + x_1 = _mm_add_epi32(x_1, _mm_loadu_si128((__m128i*)(x + 4))); + x_2 = _mm_add_epi32(x_2, _mm_loadu_si128((__m128i*)(x + 8))); + x_3 = _mm_add_epi32(x_3, _mm_loadu_si128((__m128i*)(x + 12))); + _mm_storeu_si128((__m128i*)(partialblock + 0), x_0); + _mm_storeu_si128((__m128i*)(partialblock + 16), x_1); + _mm_storeu_si128((__m128i*)(partialblock + 32), x_2); + _mm_storeu_si128((__m128i*)(partialblock + 48), x_3); + + for(i = 0; i < bytes; i++) + { + c[i] = m[i] ^ partialblock[i]; + } + + sodium_memzero(partialblock, sizeof partialblock); } diff --git a/crypto/chacha20/dolbeau/u1.h b/crypto/chacha20/dolbeau/u1.h index 867b44bcf..c7a35bd0a 100644 --- a/crypto/chacha20/dolbeau/u1.h +++ b/crypto/chacha20/dolbeau/u1.h @@ -1,98 +1,101 @@ -while (bytes >= 64) { - __m128i x_0, x_1, x_2, x_3; - __m128i t_1; - const __m128i rot16 = - _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2); - const __m128i rot8 = - _mm_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3); - - uint32_t in12; - uint32_t in13; - int i; - - x_0 = _mm_loadu_si128((__m128i*) (x + 0)); - x_1 = _mm_loadu_si128((__m128i*) (x + 4)); - x_2 = _mm_loadu_si128((__m128i*) (x + 8)); - x_3 = _mm_loadu_si128((__m128i*) (x + 12)); - - for (i = 0; i < ROUNDS; i += 2) { - x_0 = _mm_add_epi32(x_0, x_1); - x_3 = _mm_xor_si128(x_3, x_0); - x_3 = _mm_shuffle_epi8(x_3, rot16); - - x_2 = _mm_add_epi32(x_2, x_3); - x_1 = _mm_xor_si128(x_1, x_2); - - t_1 = x_1; - x_1 = _mm_slli_epi32(x_1, 12); - t_1 = _mm_srli_epi32(t_1, 20); - x_1 = _mm_xor_si128(x_1, t_1); - - x_0 = _mm_add_epi32(x_0, x_1); - x_3 = _mm_xor_si128(x_3, x_0); - x_0 = _mm_shuffle_epi32(x_0, 0x93); - x_3 = _mm_shuffle_epi8(x_3, rot8); - - x_2 = _mm_add_epi32(x_2, x_3); - x_3 = _mm_shuffle_epi32(x_3, 0x4e); - x_1 = _mm_xor_si128(x_1, x_2); - x_2 = _mm_shuffle_epi32(x_2, 0x39); - - t_1 = x_1; - x_1 = _mm_slli_epi32(x_1, 7); - t_1 = _mm_srli_epi32(t_1, 25); - x_1 = _mm_xor_si128(x_1, t_1); - - x_0 = _mm_add_epi32(x_0, x_1); - x_3 = _mm_xor_si128(x_3, x_0); - x_3 = _mm_shuffle_epi8(x_3, rot16); - - x_2 = _mm_add_epi32(x_2, x_3); - x_1 = _mm_xor_si128(x_1, x_2); - - t_1 = x_1; - x_1 = _mm_slli_epi32(x_1, 12); - t_1 = _mm_srli_epi32(t_1, 20); - x_1 = _mm_xor_si128(x_1, t_1); - - x_0 = _mm_add_epi32(x_0, x_1); - x_3 = _mm_xor_si128(x_3, x_0); - x_0 = _mm_shuffle_epi32(x_0, 0x39); - x_3 = _mm_shuffle_epi8(x_3, rot8); - - x_2 = _mm_add_epi32(x_2, x_3); - x_3 = _mm_shuffle_epi32(x_3, 0x4e); - x_1 = _mm_xor_si128(x_1, x_2); - x_2 = _mm_shuffle_epi32(x_2, 0x93); - - t_1 = x_1; - x_1 = _mm_slli_epi32(x_1, 7); - t_1 = _mm_srli_epi32(t_1, 25); - x_1 = _mm_xor_si128(x_1, t_1); - } - x_0 = _mm_add_epi32(x_0, _mm_loadu_si128((__m128i*) (x + 0))); - x_1 = _mm_add_epi32(x_1, _mm_loadu_si128((__m128i*) (x + 4))); - x_2 = _mm_add_epi32(x_2, _mm_loadu_si128((__m128i*) (x + 8))); - x_3 = _mm_add_epi32(x_3, _mm_loadu_si128((__m128i*) (x + 12))); - x_0 = _mm_xor_si128(x_0, _mm_loadu_si128((__m128i*) (m + 0))); - x_1 = _mm_xor_si128(x_1, _mm_loadu_si128((__m128i*) (m + 16))); - x_2 = _mm_xor_si128(x_2, _mm_loadu_si128((__m128i*) (m + 32))); - x_3 = _mm_xor_si128(x_3, _mm_loadu_si128((__m128i*) (m + 48))); - _mm_storeu_si128((__m128i*) (c + 0), x_0); - _mm_storeu_si128((__m128i*) (c + 16), x_1); - _mm_storeu_si128((__m128i*) (c + 32), x_2); - _mm_storeu_si128((__m128i*) (c + 48), x_3); - - in12 = x[12]; - in13 = x[13]; - in12++; - if (in12 == 0) { - in13++; - } - x[12] = in12; - x[13] = in13; - - bytes -= 64; - c += 64; - m += 64; +while(bytes >= 64) +{ + __m128i x_0, x_1, x_2, x_3; + __m128i t_1; + const __m128i rot16 = + _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2); + const __m128i rot8 = + _mm_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3); + + uint32_t in12; + uint32_t in13; + int i; + + x_0 = _mm_loadu_si128((__m128i*)(x + 0)); + x_1 = _mm_loadu_si128((__m128i*)(x + 4)); + x_2 = _mm_loadu_si128((__m128i*)(x + 8)); + x_3 = _mm_loadu_si128((__m128i*)(x + 12)); + + for(i = 0; i < ROUNDS; i += 2) + { + x_0 = _mm_add_epi32(x_0, x_1); + x_3 = _mm_xor_si128(x_3, x_0); + x_3 = _mm_shuffle_epi8(x_3, rot16); + + x_2 = _mm_add_epi32(x_2, x_3); + x_1 = _mm_xor_si128(x_1, x_2); + + t_1 = x_1; + x_1 = _mm_slli_epi32(x_1, 12); + t_1 = _mm_srli_epi32(t_1, 20); + x_1 = _mm_xor_si128(x_1, t_1); + + x_0 = _mm_add_epi32(x_0, x_1); + x_3 = _mm_xor_si128(x_3, x_0); + x_0 = _mm_shuffle_epi32(x_0, 0x93); + x_3 = _mm_shuffle_epi8(x_3, rot8); + + x_2 = _mm_add_epi32(x_2, x_3); + x_3 = _mm_shuffle_epi32(x_3, 0x4e); + x_1 = _mm_xor_si128(x_1, x_2); + x_2 = _mm_shuffle_epi32(x_2, 0x39); + + t_1 = x_1; + x_1 = _mm_slli_epi32(x_1, 7); + t_1 = _mm_srli_epi32(t_1, 25); + x_1 = _mm_xor_si128(x_1, t_1); + + x_0 = _mm_add_epi32(x_0, x_1); + x_3 = _mm_xor_si128(x_3, x_0); + x_3 = _mm_shuffle_epi8(x_3, rot16); + + x_2 = _mm_add_epi32(x_2, x_3); + x_1 = _mm_xor_si128(x_1, x_2); + + t_1 = x_1; + x_1 = _mm_slli_epi32(x_1, 12); + t_1 = _mm_srli_epi32(t_1, 20); + x_1 = _mm_xor_si128(x_1, t_1); + + x_0 = _mm_add_epi32(x_0, x_1); + x_3 = _mm_xor_si128(x_3, x_0); + x_0 = _mm_shuffle_epi32(x_0, 0x39); + x_3 = _mm_shuffle_epi8(x_3, rot8); + + x_2 = _mm_add_epi32(x_2, x_3); + x_3 = _mm_shuffle_epi32(x_3, 0x4e); + x_1 = _mm_xor_si128(x_1, x_2); + x_2 = _mm_shuffle_epi32(x_2, 0x93); + + t_1 = x_1; + x_1 = _mm_slli_epi32(x_1, 7); + t_1 = _mm_srli_epi32(t_1, 25); + x_1 = _mm_xor_si128(x_1, t_1); + } + x_0 = _mm_add_epi32(x_0, _mm_loadu_si128((__m128i*)(x + 0))); + x_1 = _mm_add_epi32(x_1, _mm_loadu_si128((__m128i*)(x + 4))); + x_2 = _mm_add_epi32(x_2, _mm_loadu_si128((__m128i*)(x + 8))); + x_3 = _mm_add_epi32(x_3, _mm_loadu_si128((__m128i*)(x + 12))); + x_0 = _mm_xor_si128(x_0, _mm_loadu_si128((__m128i*)(m + 0))); + x_1 = _mm_xor_si128(x_1, _mm_loadu_si128((__m128i*)(m + 16))); + x_2 = _mm_xor_si128(x_2, _mm_loadu_si128((__m128i*)(m + 32))); + x_3 = _mm_xor_si128(x_3, _mm_loadu_si128((__m128i*)(m + 48))); + _mm_storeu_si128((__m128i*)(c + 0), x_0); + _mm_storeu_si128((__m128i*)(c + 16), x_1); + _mm_storeu_si128((__m128i*)(c + 32), x_2); + _mm_storeu_si128((__m128i*)(c + 48), x_3); + + in12 = x[12]; + in13 = x[13]; + in12++; + if(in12 == 0) + { + in13++; + } + x[12] = in12; + x[13] = in13; + + bytes -= 64; + c += 64; + m += 64; } diff --git a/crypto/chacha20/dolbeau/u4.h b/crypto/chacha20/dolbeau/u4.h index 3ff834260..aff2b45e2 100644 --- a/crypto/chacha20/dolbeau/u4.h +++ b/crypto/chacha20/dolbeau/u4.h @@ -1,174 +1,177 @@ #define VEC4_ROT(A, IMM) \ - _mm_or_si128(_mm_slli_epi32(A, IMM), _mm_srli_epi32(A, (32 - IMM))) + _mm_or_si128(_mm_slli_epi32(A, IMM), _mm_srli_epi32(A, (32 - IMM))) /* same, but replace 2 of the shift/shift/or "rotation" by byte shuffles (8 & * 16) (better) */ #define VEC4_QUARTERROUND_SHUFFLE(A, B, C, D) \ - x_##A = _mm_add_epi32(x_##A, x_##B); \ - t_##A = _mm_xor_si128(x_##D, x_##A); \ - x_##D = _mm_shuffle_epi8(t_##A, rot16); \ - x_##C = _mm_add_epi32(x_##C, x_##D); \ - t_##C = _mm_xor_si128(x_##B, x_##C); \ - x_##B = VEC4_ROT(t_##C, 12); \ - x_##A = _mm_add_epi32(x_##A, x_##B); \ - t_##A = _mm_xor_si128(x_##D, x_##A); \ - x_##D = _mm_shuffle_epi8(t_##A, rot8); \ - x_##C = _mm_add_epi32(x_##C, x_##D); \ - t_##C = _mm_xor_si128(x_##B, x_##C); \ - x_##B = VEC4_ROT(t_##C, 7) + x_##A = _mm_add_epi32(x_##A, x_##B); \ + t_##A = _mm_xor_si128(x_##D, x_##A); \ + x_##D = _mm_shuffle_epi8(t_##A, rot16); \ + x_##C = _mm_add_epi32(x_##C, x_##D); \ + t_##C = _mm_xor_si128(x_##B, x_##C); \ + x_##B = VEC4_ROT(t_##C, 12); \ + x_##A = _mm_add_epi32(x_##A, x_##B); \ + t_##A = _mm_xor_si128(x_##D, x_##A); \ + x_##D = _mm_shuffle_epi8(t_##A, rot8); \ + x_##C = _mm_add_epi32(x_##C, x_##D); \ + t_##C = _mm_xor_si128(x_##B, x_##C); \ + x_##B = VEC4_ROT(t_##C, 7) #define VEC4_QUARTERROUND(A, B, C, D) VEC4_QUARTERROUND_SHUFFLE(A, B, C, D) -if (bytes >= 256) { - /* constant for shuffling bytes (replacing multiple-of-8 rotates) */ - __m128i rot16 = - _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2); - __m128i rot8 = - _mm_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3); - - __m128i x_0 = _mm_set1_epi32(x[0]); - __m128i x_1 = _mm_set1_epi32(x[1]); - __m128i x_2 = _mm_set1_epi32(x[2]); - __m128i x_3 = _mm_set1_epi32(x[3]); - __m128i x_4 = _mm_set1_epi32(x[4]); - __m128i x_5 = _mm_set1_epi32(x[5]); - __m128i x_6 = _mm_set1_epi32(x[6]); - __m128i x_7 = _mm_set1_epi32(x[7]); - __m128i x_8 = _mm_set1_epi32(x[8]); - __m128i x_9 = _mm_set1_epi32(x[9]); - __m128i x_10 = _mm_set1_epi32(x[10]); - __m128i x_11 = _mm_set1_epi32(x[11]); - __m128i x_12; - __m128i x_13; - __m128i x_14 = _mm_set1_epi32(x[14]); - __m128i x_15 = _mm_set1_epi32(x[15]); - __m128i orig0 = x_0; - __m128i orig1 = x_1; - __m128i orig2 = x_2; - __m128i orig3 = x_3; - __m128i orig4 = x_4; - __m128i orig5 = x_5; - __m128i orig6 = x_6; - __m128i orig7 = x_7; - __m128i orig8 = x_8; - __m128i orig9 = x_9; - __m128i orig10 = x_10; - __m128i orig11 = x_11; - __m128i orig12; - __m128i orig13; - __m128i orig14 = x_14; - __m128i orig15 = x_15; - __m128i t_0, t_1, t_2, t_3, t_4, t_5, t_6, t_7, t_8, t_9, t_10, t_11, t_12, - t_13, t_14, t_15; - - uint32_t in12, in13; - int i; - - while (bytes >= 256) { - const __m128i addv12 = _mm_set_epi64x(1, 0); - const __m128i addv13 = _mm_set_epi64x(3, 2); - __m128i t12, t13; - uint64_t in1213; - - x_0 = orig0; - x_1 = orig1; - x_2 = orig2; - x_3 = orig3; - x_4 = orig4; - x_5 = orig5; - x_6 = orig6; - x_7 = orig7; - x_8 = orig8; - x_9 = orig9; - x_10 = orig10; - x_11 = orig11; - x_14 = orig14; - x_15 = orig15; - - in12 = x[12]; - in13 = x[13]; - in1213 = ((uint64_t) in12) | (((uint64_t) in13) << 32); - t12 = _mm_set1_epi64x(in1213); - t13 = _mm_set1_epi64x(in1213); - - x_12 = _mm_add_epi64(addv12, t12); - x_13 = _mm_add_epi64(addv13, t13); - - t12 = _mm_unpacklo_epi32(x_12, x_13); - t13 = _mm_unpackhi_epi32(x_12, x_13); - - x_12 = _mm_unpacklo_epi32(t12, t13); - x_13 = _mm_unpackhi_epi32(t12, t13); - - orig12 = x_12; - orig13 = x_13; - - in1213 += 4; - - x[12] = in1213 & 0xFFFFFFFF; - x[13] = (in1213 >> 32) & 0xFFFFFFFF; - - for (i = 0; i < ROUNDS; i += 2) { - VEC4_QUARTERROUND(0, 4, 8, 12); - VEC4_QUARTERROUND(1, 5, 9, 13); - VEC4_QUARTERROUND(2, 6, 10, 14); - VEC4_QUARTERROUND(3, 7, 11, 15); - VEC4_QUARTERROUND(0, 5, 10, 15); - VEC4_QUARTERROUND(1, 6, 11, 12); - VEC4_QUARTERROUND(2, 7, 8, 13); - VEC4_QUARTERROUND(3, 4, 9, 14); - } - -#define ONEQUAD_TRANSPOSE(A, B, C, D) \ - { \ - __m128i t0, t1, t2, t3; \ - \ - x_##A = _mm_add_epi32(x_##A, orig##A); \ - x_##B = _mm_add_epi32(x_##B, orig##B); \ - x_##C = _mm_add_epi32(x_##C, orig##C); \ - x_##D = _mm_add_epi32(x_##D, orig##D); \ - t_##A = _mm_unpacklo_epi32(x_##A, x_##B); \ - t_##B = _mm_unpacklo_epi32(x_##C, x_##D); \ - t_##C = _mm_unpackhi_epi32(x_##A, x_##B); \ - t_##D = _mm_unpackhi_epi32(x_##C, x_##D); \ - x_##A = _mm_unpacklo_epi64(t_##A, t_##B); \ - x_##B = _mm_unpackhi_epi64(t_##A, t_##B); \ - x_##C = _mm_unpacklo_epi64(t_##C, t_##D); \ - x_##D = _mm_unpackhi_epi64(t_##C, t_##D); \ - \ - t0 = _mm_xor_si128(x_##A, _mm_loadu_si128((__m128i*) (m + 0))); \ - _mm_storeu_si128((__m128i*) (c + 0), t0); \ - t1 = _mm_xor_si128(x_##B, _mm_loadu_si128((__m128i*) (m + 64))); \ - _mm_storeu_si128((__m128i*) (c + 64), t1); \ - t2 = _mm_xor_si128(x_##C, _mm_loadu_si128((__m128i*) (m + 128))); \ - _mm_storeu_si128((__m128i*) (c + 128), t2); \ - t3 = _mm_xor_si128(x_##D, _mm_loadu_si128((__m128i*) (m + 192))); \ - _mm_storeu_si128((__m128i*) (c + 192), t3); \ +if(bytes >= 256) +{ + /* constant for shuffling bytes (replacing multiple-of-8 rotates) */ + __m128i rot16 = + _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2); + __m128i rot8 = + _mm_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3); + + __m128i x_0 = _mm_set1_epi32(x[0]); + __m128i x_1 = _mm_set1_epi32(x[1]); + __m128i x_2 = _mm_set1_epi32(x[2]); + __m128i x_3 = _mm_set1_epi32(x[3]); + __m128i x_4 = _mm_set1_epi32(x[4]); + __m128i x_5 = _mm_set1_epi32(x[5]); + __m128i x_6 = _mm_set1_epi32(x[6]); + __m128i x_7 = _mm_set1_epi32(x[7]); + __m128i x_8 = _mm_set1_epi32(x[8]); + __m128i x_9 = _mm_set1_epi32(x[9]); + __m128i x_10 = _mm_set1_epi32(x[10]); + __m128i x_11 = _mm_set1_epi32(x[11]); + __m128i x_12; + __m128i x_13; + __m128i x_14 = _mm_set1_epi32(x[14]); + __m128i x_15 = _mm_set1_epi32(x[15]); + __m128i orig0 = x_0; + __m128i orig1 = x_1; + __m128i orig2 = x_2; + __m128i orig3 = x_3; + __m128i orig4 = x_4; + __m128i orig5 = x_5; + __m128i orig6 = x_6; + __m128i orig7 = x_7; + __m128i orig8 = x_8; + __m128i orig9 = x_9; + __m128i orig10 = x_10; + __m128i orig11 = x_11; + __m128i orig12; + __m128i orig13; + __m128i orig14 = x_14; + __m128i orig15 = x_15; + __m128i t_0, t_1, t_2, t_3, t_4, t_5, t_6, t_7, t_8, t_9, t_10, t_11, t_12, + t_13, t_14, t_15; + + uint32_t in12, in13; + int i; + + while(bytes >= 256) + { + const __m128i addv12 = _mm_set_epi64x(1, 0); + const __m128i addv13 = _mm_set_epi64x(3, 2); + __m128i t12, t13; + uint64_t in1213; + + x_0 = orig0; + x_1 = orig1; + x_2 = orig2; + x_3 = orig3; + x_4 = orig4; + x_5 = orig5; + x_6 = orig6; + x_7 = orig7; + x_8 = orig8; + x_9 = orig9; + x_10 = orig10; + x_11 = orig11; + x_14 = orig14; + x_15 = orig15; + + in12 = x[12]; + in13 = x[13]; + in1213 = ((uint64_t)in12) | (((uint64_t)in13) << 32); + t12 = _mm_set1_epi64x(in1213); + t13 = _mm_set1_epi64x(in1213); + + x_12 = _mm_add_epi64(addv12, t12); + x_13 = _mm_add_epi64(addv13, t13); + + t12 = _mm_unpacklo_epi32(x_12, x_13); + t13 = _mm_unpackhi_epi32(x_12, x_13); + + x_12 = _mm_unpacklo_epi32(t12, t13); + x_13 = _mm_unpackhi_epi32(t12, t13); + + orig12 = x_12; + orig13 = x_13; + + in1213 += 4; + + x[12] = in1213 & 0xFFFFFFFF; + x[13] = (in1213 >> 32) & 0xFFFFFFFF; + + for(i = 0; i < ROUNDS; i += 2) + { + VEC4_QUARTERROUND(0, 4, 8, 12); + VEC4_QUARTERROUND(1, 5, 9, 13); + VEC4_QUARTERROUND(2, 6, 10, 14); + VEC4_QUARTERROUND(3, 7, 11, 15); + VEC4_QUARTERROUND(0, 5, 10, 15); + VEC4_QUARTERROUND(1, 6, 11, 12); + VEC4_QUARTERROUND(2, 7, 8, 13); + VEC4_QUARTERROUND(3, 4, 9, 14); } +#define ONEQUAD_TRANSPOSE(A, B, C, D) \ + { \ + __m128i t0, t1, t2, t3; \ + \ + x_##A = _mm_add_epi32(x_##A, orig##A); \ + x_##B = _mm_add_epi32(x_##B, orig##B); \ + x_##C = _mm_add_epi32(x_##C, orig##C); \ + x_##D = _mm_add_epi32(x_##D, orig##D); \ + t_##A = _mm_unpacklo_epi32(x_##A, x_##B); \ + t_##B = _mm_unpacklo_epi32(x_##C, x_##D); \ + t_##C = _mm_unpackhi_epi32(x_##A, x_##B); \ + t_##D = _mm_unpackhi_epi32(x_##C, x_##D); \ + x_##A = _mm_unpacklo_epi64(t_##A, t_##B); \ + x_##B = _mm_unpackhi_epi64(t_##A, t_##B); \ + x_##C = _mm_unpacklo_epi64(t_##C, t_##D); \ + x_##D = _mm_unpackhi_epi64(t_##C, t_##D); \ + \ + t0 = _mm_xor_si128(x_##A, _mm_loadu_si128((__m128i*)(m + 0))); \ + _mm_storeu_si128((__m128i*)(c + 0), t0); \ + t1 = _mm_xor_si128(x_##B, _mm_loadu_si128((__m128i*)(m + 64))); \ + _mm_storeu_si128((__m128i*)(c + 64), t1); \ + t2 = _mm_xor_si128(x_##C, _mm_loadu_si128((__m128i*)(m + 128))); \ + _mm_storeu_si128((__m128i*)(c + 128), t2); \ + t3 = _mm_xor_si128(x_##D, _mm_loadu_si128((__m128i*)(m + 192))); \ + _mm_storeu_si128((__m128i*)(c + 192), t3); \ + } + #define ONEQUAD(A, B, C, D) ONEQUAD_TRANSPOSE(A, B, C, D) - ONEQUAD(0, 1, 2, 3); - m += 16; - c += 16; - ONEQUAD(4, 5, 6, 7); - m += 16; - c += 16; - ONEQUAD(8, 9, 10, 11); - m += 16; - c += 16; - ONEQUAD(12, 13, 14, 15); - m -= 48; - c -= 48; + ONEQUAD(0, 1, 2, 3); + m += 16; + c += 16; + ONEQUAD(4, 5, 6, 7); + m += 16; + c += 16; + ONEQUAD(8, 9, 10, 11); + m += 16; + c += 16; + ONEQUAD(12, 13, 14, 15); + m -= 48; + c -= 48; #undef ONEQUAD #undef ONEQUAD_TRANSPOSE - bytes -= 256; - c += 256; - m += 256; - } + bytes -= 256; + c += 256; + m += 256; + } } #undef VEC4_ROT #undef VEC4_QUARTERROUND diff --git a/crypto/chacha20/dolbeau/u8.h b/crypto/chacha20/dolbeau/u8.h index 22bf9fcfa..5600aeb44 100644 --- a/crypto/chacha20/dolbeau/u8.h +++ b/crypto/chacha20/dolbeau/u8.h @@ -1,346 +1,344 @@ #define VEC8_ROT(A, IMM) \ - _mm256_or_si256(_mm256_slli_epi32(A, IMM), _mm256_srli_epi32(A, (32 - IMM))) + _mm256_or_si256(_mm256_slli_epi32(A, IMM), _mm256_srli_epi32(A, (32 - IMM))) /* implements a vector quarter round by-the-book (naive!) */ #define VEC8_QUARTERROUND_NAIVE(A, B, C, D) \ - x_##A = _mm256_add_epi32(x_##A, x_##B); \ - t_##A = _mm256_xor_si256(x_##D, x_##A); \ - x_##D = VEC8_ROT(t_##A, 16); \ - x_##C = _mm256_add_epi32(x_##C, x_##D); \ - t_##C = _mm256_xor_si256(x_##B, x_##C); \ - x_##B = VEC8_ROT(t_##C, 12); \ - x_##A = _mm256_add_epi32(x_##A, x_##B); \ - t_##A = _mm256_xor_si256(x_##D, x_##A); \ - x_##D = VEC8_ROT(t_##A, 8); \ - x_##C = _mm256_add_epi32(x_##C, x_##D); \ - t_##C = _mm256_xor_si256(x_##B, x_##C); \ - x_##B = VEC8_ROT(t_##C, 7) + x_##A = _mm256_add_epi32(x_##A, x_##B); \ + t_##A = _mm256_xor_si256(x_##D, x_##A); \ + x_##D = VEC8_ROT(t_##A, 16); \ + x_##C = _mm256_add_epi32(x_##C, x_##D); \ + t_##C = _mm256_xor_si256(x_##B, x_##C); \ + x_##B = VEC8_ROT(t_##C, 12); \ + x_##A = _mm256_add_epi32(x_##A, x_##B); \ + t_##A = _mm256_xor_si256(x_##D, x_##A); \ + x_##D = VEC8_ROT(t_##A, 8); \ + x_##C = _mm256_add_epi32(x_##C, x_##D); \ + t_##C = _mm256_xor_si256(x_##B, x_##C); \ + x_##B = VEC8_ROT(t_##C, 7) /* same, but replace 2 of the shift/shift/or "rotation" by byte shuffles (8 & * 16) (better) */ -#define VEC8_QUARTERROUND_SHUFFLE(A, B, C, D) \ - x_##A = _mm256_add_epi32(x_##A, x_##B); \ - t_##A = _mm256_xor_si256(x_##D, x_##A); \ - x_##D = _mm256_shuffle_epi8(t_##A, rot16); \ - x_##C = _mm256_add_epi32(x_##C, x_##D); \ - t_##C = _mm256_xor_si256(x_##B, x_##C); \ - x_##B = VEC8_ROT(t_##C, 12); \ - x_##A = _mm256_add_epi32(x_##A, x_##B); \ - t_##A = _mm256_xor_si256(x_##D, x_##A); \ - x_##D = _mm256_shuffle_epi8(t_##A, rot8); \ - x_##C = _mm256_add_epi32(x_##C, x_##D); \ - t_##C = _mm256_xor_si256(x_##B, x_##C); \ - x_##B = VEC8_ROT(t_##C, 7) +#define VEC8_QUARTERROUND_SHUFFLE(A, B, C, D) \ + x_##A = _mm256_add_epi32(x_##A, x_##B); \ + t_##A = _mm256_xor_si256(x_##D, x_##A); \ + x_##D = _mm256_shuffle_epi8(t_##A, rot16); \ + x_##C = _mm256_add_epi32(x_##C, x_##D); \ + t_##C = _mm256_xor_si256(x_##B, x_##C); \ + x_##B = VEC8_ROT(t_##C, 12); \ + x_##A = _mm256_add_epi32(x_##A, x_##B); \ + t_##A = _mm256_xor_si256(x_##D, x_##A); \ + x_##D = _mm256_shuffle_epi8(t_##A, rot8); \ + x_##C = _mm256_add_epi32(x_##C, x_##D); \ + t_##C = _mm256_xor_si256(x_##B, x_##C); \ + x_##B = VEC8_ROT(t_##C, 7) /* same, but replace 2 of the shift/shift/or "rotation" by byte & word shuffles * (8 & 16) (not as good as previous) */ -#define VEC8_QUARTERROUND_SHUFFLE2(A, B, C, D) \ - x_##A = _mm256_add_epi32(x_##A, x_##B); \ - t_##A = _mm256_xor_si256(x_##D, x_##A); \ - x_##D = _mm256_shufflehi_epi16(_mm256_shufflelo_epi16(t_##A, 0xb1), 0xb1); \ - x_##C = _mm256_add_epi32(x_##C, x_##D); \ - t_##C = _mm256_xor_si256(x_##B, x_##C); \ - x_##B = VEC8_ROT(t_##C, 12); \ - x_##A = _mm256_add_epi32(x_##A, x_##B); \ - t_##A = _mm256_xor_si256(x_##D, x_##A); \ - x_##D = _mm256_shuffle_epi8(t_##A, rot8); \ - x_##C = _mm256_add_epi32(x_##C, x_##D); \ - t_##C = _mm256_xor_si256(x_##B, x_##C); \ - x_##B = VEC8_ROT(t_##C, 7) +#define VEC8_QUARTERROUND_SHUFFLE2(A, B, C, D) \ + x_##A = _mm256_add_epi32(x_##A, x_##B); \ + t_##A = _mm256_xor_si256(x_##D, x_##A); \ + x_##D = _mm256_shufflehi_epi16(_mm256_shufflelo_epi16(t_##A, 0xb1), 0xb1); \ + x_##C = _mm256_add_epi32(x_##C, x_##D); \ + t_##C = _mm256_xor_si256(x_##B, x_##C); \ + x_##B = VEC8_ROT(t_##C, 12); \ + x_##A = _mm256_add_epi32(x_##A, x_##B); \ + t_##A = _mm256_xor_si256(x_##D, x_##A); \ + x_##D = _mm256_shuffle_epi8(t_##A, rot8); \ + x_##C = _mm256_add_epi32(x_##C, x_##D); \ + t_##C = _mm256_xor_si256(x_##B, x_##C); \ + x_##B = VEC8_ROT(t_##C, 7) #define VEC8_QUARTERROUND(A, B, C, D) VEC8_QUARTERROUND_SHUFFLE(A, B, C, D) -#define VEC8_LINE1(A, B, C, D) \ - x_##A = _mm256_add_epi32(x_##A, x_##B); \ - x_##D = _mm256_shuffle_epi8(_mm256_xor_si256(x_##D, x_##A), rot16) -#define VEC8_LINE2(A, B, C, D) \ - x_##C = _mm256_add_epi32(x_##C, x_##D); \ - x_##B = VEC8_ROT(_mm256_xor_si256(x_##B, x_##C), 12) -#define VEC8_LINE3(A, B, C, D) \ - x_##A = _mm256_add_epi32(x_##A, x_##B); \ - x_##D = _mm256_shuffle_epi8(_mm256_xor_si256(x_##D, x_##A), rot8) -#define VEC8_LINE4(A, B, C, D) \ - x_##C = _mm256_add_epi32(x_##C, x_##D); \ - x_##B = VEC8_ROT(_mm256_xor_si256(x_##B, x_##C), 7) +#define VEC8_LINE1(A, B, C, D) \ + x_##A = _mm256_add_epi32(x_##A, x_##B); \ + x_##D = _mm256_shuffle_epi8(_mm256_xor_si256(x_##D, x_##A), rot16) +#define VEC8_LINE2(A, B, C, D) \ + x_##C = _mm256_add_epi32(x_##C, x_##D); \ + x_##B = VEC8_ROT(_mm256_xor_si256(x_##B, x_##C), 12) +#define VEC8_LINE3(A, B, C, D) \ + x_##A = _mm256_add_epi32(x_##A, x_##B); \ + x_##D = _mm256_shuffle_epi8(_mm256_xor_si256(x_##D, x_##A), rot8) +#define VEC8_LINE4(A, B, C, D) \ + x_##C = _mm256_add_epi32(x_##C, x_##D); \ + x_##B = VEC8_ROT(_mm256_xor_si256(x_##B, x_##C), 7) #define VEC8_ROUND_SEQ(A1, B1, C1, D1, A2, B2, C2, D2, A3, B3, C3, D3, A4, B4, \ C4, D4) \ - VEC8_LINE1(A1, B1, C1, D1); \ - VEC8_LINE1(A2, B2, C2, D2); \ - VEC8_LINE1(A3, B3, C3, D3); \ - VEC8_LINE1(A4, B4, C4, D4); \ - VEC8_LINE2(A1, B1, C1, D1); \ - VEC8_LINE2(A2, B2, C2, D2); \ - VEC8_LINE2(A3, B3, C3, D3); \ - VEC8_LINE2(A4, B4, C4, D4); \ - VEC8_LINE3(A1, B1, C1, D1); \ - VEC8_LINE3(A2, B2, C2, D2); \ - VEC8_LINE3(A3, B3, C3, D3); \ - VEC8_LINE3(A4, B4, C4, D4); \ - VEC8_LINE4(A1, B1, C1, D1); \ - VEC8_LINE4(A2, B2, C2, D2); \ - VEC8_LINE4(A3, B3, C3, D3); \ - VEC8_LINE4(A4, B4, C4, D4) + VEC8_LINE1(A1, B1, C1, D1); \ + VEC8_LINE1(A2, B2, C2, D2); \ + VEC8_LINE1(A3, B3, C3, D3); \ + VEC8_LINE1(A4, B4, C4, D4); \ + VEC8_LINE2(A1, B1, C1, D1); \ + VEC8_LINE2(A2, B2, C2, D2); \ + VEC8_LINE2(A3, B3, C3, D3); \ + VEC8_LINE2(A4, B4, C4, D4); \ + VEC8_LINE3(A1, B1, C1, D1); \ + VEC8_LINE3(A2, B2, C2, D2); \ + VEC8_LINE3(A3, B3, C3, D3); \ + VEC8_LINE3(A4, B4, C4, D4); \ + VEC8_LINE4(A1, B1, C1, D1); \ + VEC8_LINE4(A2, B2, C2, D2); \ + VEC8_LINE4(A3, B3, C3, D3); \ + VEC8_LINE4(A4, B4, C4, D4) #define VEC8_ROUND_HALF(A1, B1, C1, D1, A2, B2, C2, D2, A3, B3, C3, D3, A4, \ B4, C4, D4) \ - VEC8_LINE1(A1, B1, C1, D1); \ - VEC8_LINE1(A2, B2, C2, D2); \ - VEC8_LINE2(A1, B1, C1, D1); \ - VEC8_LINE2(A2, B2, C2, D2); \ - VEC8_LINE3(A1, B1, C1, D1); \ - VEC8_LINE3(A2, B2, C2, D2); \ - VEC8_LINE4(A1, B1, C1, D1); \ - VEC8_LINE4(A2, B2, C2, D2); \ - VEC8_LINE1(A3, B3, C3, D3); \ - VEC8_LINE1(A4, B4, C4, D4); \ - VEC8_LINE2(A3, B3, C3, D3); \ - VEC8_LINE2(A4, B4, C4, D4); \ - VEC8_LINE3(A3, B3, C3, D3); \ - VEC8_LINE3(A4, B4, C4, D4); \ - VEC8_LINE4(A3, B3, C3, D3); \ - VEC8_LINE4(A4, B4, C4, D4) + VEC8_LINE1(A1, B1, C1, D1); \ + VEC8_LINE1(A2, B2, C2, D2); \ + VEC8_LINE2(A1, B1, C1, D1); \ + VEC8_LINE2(A2, B2, C2, D2); \ + VEC8_LINE3(A1, B1, C1, D1); \ + VEC8_LINE3(A2, B2, C2, D2); \ + VEC8_LINE4(A1, B1, C1, D1); \ + VEC8_LINE4(A2, B2, C2, D2); \ + VEC8_LINE1(A3, B3, C3, D3); \ + VEC8_LINE1(A4, B4, C4, D4); \ + VEC8_LINE2(A3, B3, C3, D3); \ + VEC8_LINE2(A4, B4, C4, D4); \ + VEC8_LINE3(A3, B3, C3, D3); \ + VEC8_LINE3(A4, B4, C4, D4); \ + VEC8_LINE4(A3, B3, C3, D3); \ + VEC8_LINE4(A4, B4, C4, D4) #define VEC8_ROUND_HALFANDHALF(A1, B1, C1, D1, A2, B2, C2, D2, A3, B3, C3, D3, \ A4, B4, C4, D4) \ - VEC8_LINE1(A1, B1, C1, D1); \ - VEC8_LINE1(A2, B2, C2, D2); \ - VEC8_LINE2(A1, B1, C1, D1); \ - VEC8_LINE2(A2, B2, C2, D2); \ - VEC8_LINE1(A3, B3, C3, D3); \ - VEC8_LINE1(A4, B4, C4, D4); \ - VEC8_LINE2(A3, B3, C3, D3); \ - VEC8_LINE2(A4, B4, C4, D4); \ - VEC8_LINE3(A1, B1, C1, D1); \ - VEC8_LINE3(A2, B2, C2, D2); \ - VEC8_LINE4(A1, B1, C1, D1); \ - VEC8_LINE4(A2, B2, C2, D2); \ - VEC8_LINE3(A3, B3, C3, D3); \ - VEC8_LINE3(A4, B4, C4, D4); \ - VEC8_LINE4(A3, B3, C3, D3); \ - VEC8_LINE4(A4, B4, C4, D4) + VEC8_LINE1(A1, B1, C1, D1); \ + VEC8_LINE1(A2, B2, C2, D2); \ + VEC8_LINE2(A1, B1, C1, D1); \ + VEC8_LINE2(A2, B2, C2, D2); \ + VEC8_LINE1(A3, B3, C3, D3); \ + VEC8_LINE1(A4, B4, C4, D4); \ + VEC8_LINE2(A3, B3, C3, D3); \ + VEC8_LINE2(A4, B4, C4, D4); \ + VEC8_LINE3(A1, B1, C1, D1); \ + VEC8_LINE3(A2, B2, C2, D2); \ + VEC8_LINE4(A1, B1, C1, D1); \ + VEC8_LINE4(A2, B2, C2, D2); \ + VEC8_LINE3(A3, B3, C3, D3); \ + VEC8_LINE3(A4, B4, C4, D4); \ + VEC8_LINE4(A3, B3, C3, D3); \ + VEC8_LINE4(A4, B4, C4, D4) #define VEC8_ROUND(A1, B1, C1, D1, A2, B2, C2, D2, A3, B3, C3, D3, A4, B4, C4, \ D4) \ - VEC8_ROUND_SEQ(A1, B1, C1, D1, A2, B2, C2, D2, A3, B3, C3, D3, A4, B4, C4, \ - D4) - -if (bytes >= 512) { - /* constant for shuffling bytes (replacing multiple-of-8 rotates) */ - __m256i rot16 = - _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2, - 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2); - __m256i rot8 = - _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3, - 14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3); - uint32_t in12, in13; - - /* the naive way seems as fast (if not a bit faster) than the vector way */ - __m256i x_0 = _mm256_set1_epi32(x[0]); - __m256i x_1 = _mm256_set1_epi32(x[1]); - __m256i x_2 = _mm256_set1_epi32(x[2]); - __m256i x_3 = _mm256_set1_epi32(x[3]); - __m256i x_4 = _mm256_set1_epi32(x[4]); - __m256i x_5 = _mm256_set1_epi32(x[5]); - __m256i x_6 = _mm256_set1_epi32(x[6]); - __m256i x_7 = _mm256_set1_epi32(x[7]); - __m256i x_8 = _mm256_set1_epi32(x[8]); - __m256i x_9 = _mm256_set1_epi32(x[9]); - __m256i x_10 = _mm256_set1_epi32(x[10]); - __m256i x_11 = _mm256_set1_epi32(x[11]); - __m256i x_12; - __m256i x_13; - __m256i x_14 = _mm256_set1_epi32(x[14]); - __m256i x_15 = _mm256_set1_epi32(x[15]); - - __m256i orig0 = x_0; - __m256i orig1 = x_1; - __m256i orig2 = x_2; - __m256i orig3 = x_3; - __m256i orig4 = x_4; - __m256i orig5 = x_5; - __m256i orig6 = x_6; - __m256i orig7 = x_7; - __m256i orig8 = x_8; - __m256i orig9 = x_9; - __m256i orig10 = x_10; - __m256i orig11 = x_11; - __m256i orig12; - __m256i orig13; - __m256i orig14 = x_14; - __m256i orig15 = x_15; - __m256i t_0, t_1, t_2, t_3, t_4, t_5, t_6, t_7, t_8, t_9, t_10, t_11, t_12, - t_13, t_14, t_15; - - while (bytes >= 512) { - const __m256i addv12 = _mm256_set_epi64x(3, 2, 1, 0); - const __m256i addv13 = _mm256_set_epi64x(7, 6, 5, 4); - const __m256i permute = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0); - __m256i t12, t13; - - uint64_t in1213; - int i; - - x_0 = orig0; - x_1 = orig1; - x_2 = orig2; - x_3 = orig3; - x_4 = orig4; - x_5 = orig5; - x_6 = orig6; - x_7 = orig7; - x_8 = orig8; - x_9 = orig9; - x_10 = orig10; - x_11 = orig11; - x_14 = orig14; - x_15 = orig15; - - in12 = x[12]; - in13 = x[13]; - in1213 = ((uint64_t) in12) | (((uint64_t) in13) << 32); - x_12 = x_13 = _mm256_broadcastq_epi64(_mm_cvtsi64_si128(in1213)); - - t12 = _mm256_add_epi64(addv12, x_12); - t13 = _mm256_add_epi64(addv13, x_13); - - x_12 = _mm256_unpacklo_epi32(t12, t13); - x_13 = _mm256_unpackhi_epi32(t12, t13); - - t12 = _mm256_unpacklo_epi32(x_12, x_13); - t13 = _mm256_unpackhi_epi32(x_12, x_13); - - /* required because unpack* are intra-lane */ - x_12 = _mm256_permutevar8x32_epi32(t12, permute); - x_13 = _mm256_permutevar8x32_epi32(t13, permute); - - orig12 = x_12; - orig13 = x_13; - - in1213 += 8; - - x[12] = in1213 & 0xFFFFFFFF; - x[13] = (in1213 >> 32) & 0xFFFFFFFF; - - for (i = 0; i < ROUNDS; i += 2) { - VEC8_ROUND(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15); - VEC8_ROUND(0, 5, 10, 15, 1, 6, 11, 12, 2, 7, 8, 13, 3, 4, 9, 14); - } - -#define ONEQUAD_TRANSPOSE(A, B, C, D) \ - { \ - __m128i t0, t1, t2, t3; \ - x_##A = _mm256_add_epi32(x_##A, orig##A); \ - x_##B = _mm256_add_epi32(x_##B, orig##B); \ - x_##C = _mm256_add_epi32(x_##C, orig##C); \ - x_##D = _mm256_add_epi32(x_##D, orig##D); \ - t_##A = _mm256_unpacklo_epi32(x_##A, x_##B); \ - t_##B = _mm256_unpacklo_epi32(x_##C, x_##D); \ - t_##C = _mm256_unpackhi_epi32(x_##A, x_##B); \ - t_##D = _mm256_unpackhi_epi32(x_##C, x_##D); \ - x_##A = _mm256_unpacklo_epi64(t_##A, t_##B); \ - x_##B = _mm256_unpackhi_epi64(t_##A, t_##B); \ - x_##C = _mm256_unpacklo_epi64(t_##C, t_##D); \ - x_##D = _mm256_unpackhi_epi64(t_##C, t_##D); \ - t0 = _mm_xor_si128(_mm256_extracti128_si256(x_##A, 0), \ - _mm_loadu_si128((__m128i*) (m + 0))); \ - _mm_storeu_si128((__m128i*) (c + 0), t0); \ - t1 = _mm_xor_si128(_mm256_extracti128_si256(x_##B, 0), \ - _mm_loadu_si128((__m128i*) (m + 64))); \ - _mm_storeu_si128((__m128i*) (c + 64), t1); \ - t2 = _mm_xor_si128(_mm256_extracti128_si256(x_##C, 0), \ - _mm_loadu_si128((__m128i*) (m + 128))); \ - _mm_storeu_si128((__m128i*) (c + 128), t2); \ - t3 = _mm_xor_si128(_mm256_extracti128_si256(x_##D, 0), \ - _mm_loadu_si128((__m128i*) (m + 192))); \ - _mm_storeu_si128((__m128i*) (c + 192), t3); \ - t0 = _mm_xor_si128(_mm256_extracti128_si256(x_##A, 1), \ - _mm_loadu_si128((__m128i*) (m + 256))); \ - _mm_storeu_si128((__m128i*) (c + 256), t0); \ - t1 = _mm_xor_si128(_mm256_extracti128_si256(x_##B, 1), \ - _mm_loadu_si128((__m128i*) (m + 320))); \ - _mm_storeu_si128((__m128i*) (c + 320), t1); \ - t2 = _mm_xor_si128(_mm256_extracti128_si256(x_##C, 1), \ - _mm_loadu_si128((__m128i*) (m + 384))); \ - _mm_storeu_si128((__m128i*) (c + 384), t2); \ - t3 = _mm_xor_si128(_mm256_extracti128_si256(x_##D, 1), \ - _mm_loadu_si128((__m128i*) (m + 448))); \ - _mm_storeu_si128((__m128i*) (c + 448), t3); \ + VEC8_ROUND_SEQ(A1, B1, C1, D1, A2, B2, C2, D2, A3, B3, C3, D3, A4, B4, C4, D4) + +if(bytes >= 512) +{ + /* constant for shuffling bytes (replacing multiple-of-8 rotates) */ + __m256i rot16 = + _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2, 13, + 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2); + __m256i rot8 = + _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3, 14, + 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3); + uint32_t in12, in13; + + /* the naive way seems as fast (if not a bit faster) than the vector way */ + __m256i x_0 = _mm256_set1_epi32(x[0]); + __m256i x_1 = _mm256_set1_epi32(x[1]); + __m256i x_2 = _mm256_set1_epi32(x[2]); + __m256i x_3 = _mm256_set1_epi32(x[3]); + __m256i x_4 = _mm256_set1_epi32(x[4]); + __m256i x_5 = _mm256_set1_epi32(x[5]); + __m256i x_6 = _mm256_set1_epi32(x[6]); + __m256i x_7 = _mm256_set1_epi32(x[7]); + __m256i x_8 = _mm256_set1_epi32(x[8]); + __m256i x_9 = _mm256_set1_epi32(x[9]); + __m256i x_10 = _mm256_set1_epi32(x[10]); + __m256i x_11 = _mm256_set1_epi32(x[11]); + __m256i x_12; + __m256i x_13; + __m256i x_14 = _mm256_set1_epi32(x[14]); + __m256i x_15 = _mm256_set1_epi32(x[15]); + + __m256i orig0 = x_0; + __m256i orig1 = x_1; + __m256i orig2 = x_2; + __m256i orig3 = x_3; + __m256i orig4 = x_4; + __m256i orig5 = x_5; + __m256i orig6 = x_6; + __m256i orig7 = x_7; + __m256i orig8 = x_8; + __m256i orig9 = x_9; + __m256i orig10 = x_10; + __m256i orig11 = x_11; + __m256i orig12; + __m256i orig13; + __m256i orig14 = x_14; + __m256i orig15 = x_15; + __m256i t_0, t_1, t_2, t_3, t_4, t_5, t_6, t_7, t_8, t_9, t_10, t_11, t_12, + t_13, t_14, t_15; + + while(bytes >= 512) + { + const __m256i addv12 = _mm256_set_epi64x(3, 2, 1, 0); + const __m256i addv13 = _mm256_set_epi64x(7, 6, 5, 4); + const __m256i permute = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0); + __m256i t12, t13; + + uint64_t in1213; + int i; + + x_0 = orig0; + x_1 = orig1; + x_2 = orig2; + x_3 = orig3; + x_4 = orig4; + x_5 = orig5; + x_6 = orig6; + x_7 = orig7; + x_8 = orig8; + x_9 = orig9; + x_10 = orig10; + x_11 = orig11; + x_14 = orig14; + x_15 = orig15; + + in12 = x[12]; + in13 = x[13]; + in1213 = ((uint64_t)in12) | (((uint64_t)in13) << 32); + x_12 = x_13 = _mm256_broadcastq_epi64(_mm_cvtsi64_si128(in1213)); + + t12 = _mm256_add_epi64(addv12, x_12); + t13 = _mm256_add_epi64(addv13, x_13); + + x_12 = _mm256_unpacklo_epi32(t12, t13); + x_13 = _mm256_unpackhi_epi32(t12, t13); + + t12 = _mm256_unpacklo_epi32(x_12, x_13); + t13 = _mm256_unpackhi_epi32(x_12, x_13); + + /* required because unpack* are intra-lane */ + x_12 = _mm256_permutevar8x32_epi32(t12, permute); + x_13 = _mm256_permutevar8x32_epi32(t13, permute); + + orig12 = x_12; + orig13 = x_13; + + in1213 += 8; + + x[12] = in1213 & 0xFFFFFFFF; + x[13] = (in1213 >> 32) & 0xFFFFFFFF; + + for(i = 0; i < ROUNDS; i += 2) + { + VEC8_ROUND(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15); + VEC8_ROUND(0, 5, 10, 15, 1, 6, 11, 12, 2, 7, 8, 13, 3, 4, 9, 14); } +#define ONEQUAD_TRANSPOSE(A, B, C, D) \ + { \ + __m128i t0, t1, t2, t3; \ + x_##A = _mm256_add_epi32(x_##A, orig##A); \ + x_##B = _mm256_add_epi32(x_##B, orig##B); \ + x_##C = _mm256_add_epi32(x_##C, orig##C); \ + x_##D = _mm256_add_epi32(x_##D, orig##D); \ + t_##A = _mm256_unpacklo_epi32(x_##A, x_##B); \ + t_##B = _mm256_unpacklo_epi32(x_##C, x_##D); \ + t_##C = _mm256_unpackhi_epi32(x_##A, x_##B); \ + t_##D = _mm256_unpackhi_epi32(x_##C, x_##D); \ + x_##A = _mm256_unpacklo_epi64(t_##A, t_##B); \ + x_##B = _mm256_unpackhi_epi64(t_##A, t_##B); \ + x_##C = _mm256_unpacklo_epi64(t_##C, t_##D); \ + x_##D = _mm256_unpackhi_epi64(t_##C, t_##D); \ + t0 = _mm_xor_si128(_mm256_extracti128_si256(x_##A, 0), \ + _mm_loadu_si128((__m128i*)(m + 0))); \ + _mm_storeu_si128((__m128i*)(c + 0), t0); \ + t1 = _mm_xor_si128(_mm256_extracti128_si256(x_##B, 0), \ + _mm_loadu_si128((__m128i*)(m + 64))); \ + _mm_storeu_si128((__m128i*)(c + 64), t1); \ + t2 = _mm_xor_si128(_mm256_extracti128_si256(x_##C, 0), \ + _mm_loadu_si128((__m128i*)(m + 128))); \ + _mm_storeu_si128((__m128i*)(c + 128), t2); \ + t3 = _mm_xor_si128(_mm256_extracti128_si256(x_##D, 0), \ + _mm_loadu_si128((__m128i*)(m + 192))); \ + _mm_storeu_si128((__m128i*)(c + 192), t3); \ + t0 = _mm_xor_si128(_mm256_extracti128_si256(x_##A, 1), \ + _mm_loadu_si128((__m128i*)(m + 256))); \ + _mm_storeu_si128((__m128i*)(c + 256), t0); \ + t1 = _mm_xor_si128(_mm256_extracti128_si256(x_##B, 1), \ + _mm_loadu_si128((__m128i*)(m + 320))); \ + _mm_storeu_si128((__m128i*)(c + 320), t1); \ + t2 = _mm_xor_si128(_mm256_extracti128_si256(x_##C, 1), \ + _mm_loadu_si128((__m128i*)(m + 384))); \ + _mm_storeu_si128((__m128i*)(c + 384), t2); \ + t3 = _mm_xor_si128(_mm256_extracti128_si256(x_##D, 1), \ + _mm_loadu_si128((__m128i*)(m + 448))); \ + _mm_storeu_si128((__m128i*)(c + 448), t3); \ + } + #define ONEQUAD(A, B, C, D) ONEQUAD_TRANSPOSE(A, B, C, D) -#define ONEQUAD_UNPCK(A, B, C, D) \ - { \ - x_##A = _mm256_add_epi32(x_##A, orig##A); \ - x_##B = _mm256_add_epi32(x_##B, orig##B); \ - x_##C = _mm256_add_epi32(x_##C, orig##C); \ - x_##D = _mm256_add_epi32(x_##D, orig##D); \ - t_##A = _mm256_unpacklo_epi32(x_##A, x_##B); \ - t_##B = _mm256_unpacklo_epi32(x_##C, x_##D); \ - t_##C = _mm256_unpackhi_epi32(x_##A, x_##B); \ - t_##D = _mm256_unpackhi_epi32(x_##C, x_##D); \ - x_##A = _mm256_unpacklo_epi64(t_##A, t_##B); \ - x_##B = _mm256_unpackhi_epi64(t_##A, t_##B); \ - x_##C = _mm256_unpacklo_epi64(t_##C, t_##D); \ - x_##D = _mm256_unpackhi_epi64(t_##C, t_##D); \ - } +#define ONEQUAD_UNPCK(A, B, C, D) \ + { \ + x_##A = _mm256_add_epi32(x_##A, orig##A); \ + x_##B = _mm256_add_epi32(x_##B, orig##B); \ + x_##C = _mm256_add_epi32(x_##C, orig##C); \ + x_##D = _mm256_add_epi32(x_##D, orig##D); \ + t_##A = _mm256_unpacklo_epi32(x_##A, x_##B); \ + t_##B = _mm256_unpacklo_epi32(x_##C, x_##D); \ + t_##C = _mm256_unpackhi_epi32(x_##A, x_##B); \ + t_##D = _mm256_unpackhi_epi32(x_##C, x_##D); \ + x_##A = _mm256_unpacklo_epi64(t_##A, t_##B); \ + x_##B = _mm256_unpackhi_epi64(t_##A, t_##B); \ + x_##C = _mm256_unpacklo_epi64(t_##C, t_##D); \ + x_##D = _mm256_unpackhi_epi64(t_##C, t_##D); \ + } #define ONEOCTO(A, B, C, D, A2, B2, C2, D2) \ - { \ - ONEQUAD_UNPCK(A, B, C, D); \ - ONEQUAD_UNPCK(A2, B2, C2, D2); \ - t_##A = _mm256_permute2x128_si256(x_##A, x_##A2, 0x20); \ - t_##A2 = _mm256_permute2x128_si256(x_##A, x_##A2, 0x31); \ - t_##B = _mm256_permute2x128_si256(x_##B, x_##B2, 0x20); \ - t_##B2 = _mm256_permute2x128_si256(x_##B, x_##B2, 0x31); \ - t_##C = _mm256_permute2x128_si256(x_##C, x_##C2, 0x20); \ - t_##C2 = _mm256_permute2x128_si256(x_##C, x_##C2, 0x31); \ - t_##D = _mm256_permute2x128_si256(x_##D, x_##D2, 0x20); \ - t_##D2 = _mm256_permute2x128_si256(x_##D, x_##D2, 0x31); \ - t_##A = \ - _mm256_xor_si256(t_##A, _mm256_loadu_si256((__m256i*) (m + 0))); \ - t_##B = \ - _mm256_xor_si256(t_##B, _mm256_loadu_si256((__m256i*) (m + 64))); \ - t_##C = \ - _mm256_xor_si256(t_##C, _mm256_loadu_si256((__m256i*) (m + 128))); \ - t_##D = \ - _mm256_xor_si256(t_##D, _mm256_loadu_si256((__m256i*) (m + 192))); \ - t_##A2 = _mm256_xor_si256(t_##A2, \ - _mm256_loadu_si256((__m256i*) (m + 256))); \ - t_##B2 = _mm256_xor_si256(t_##B2, \ - _mm256_loadu_si256((__m256i*) (m + 320))); \ - t_##C2 = _mm256_xor_si256(t_##C2, \ - _mm256_loadu_si256((__m256i*) (m + 384))); \ - t_##D2 = _mm256_xor_si256(t_##D2, \ - _mm256_loadu_si256((__m256i*) (m + 448))); \ - _mm256_storeu_si256((__m256i*) (c + 0), t_##A); \ - _mm256_storeu_si256((__m256i*) (c + 64), t_##B); \ - _mm256_storeu_si256((__m256i*) (c + 128), t_##C); \ - _mm256_storeu_si256((__m256i*) (c + 192), t_##D); \ - _mm256_storeu_si256((__m256i*) (c + 256), t_##A2); \ - _mm256_storeu_si256((__m256i*) (c + 320), t_##B2); \ - _mm256_storeu_si256((__m256i*) (c + 384), t_##C2); \ - _mm256_storeu_si256((__m256i*) (c + 448), t_##D2); \ - } - - ONEOCTO(0, 1, 2, 3, 4, 5, 6, 7); - m += 32; - c += 32; - ONEOCTO(8, 9, 10, 11, 12, 13, 14, 15); - m -= 32; - c -= 32; + { \ + ONEQUAD_UNPCK(A, B, C, D); \ + ONEQUAD_UNPCK(A2, B2, C2, D2); \ + t_##A = _mm256_permute2x128_si256(x_##A, x_##A2, 0x20); \ + t_##A2 = _mm256_permute2x128_si256(x_##A, x_##A2, 0x31); \ + t_##B = _mm256_permute2x128_si256(x_##B, x_##B2, 0x20); \ + t_##B2 = _mm256_permute2x128_si256(x_##B, x_##B2, 0x31); \ + t_##C = _mm256_permute2x128_si256(x_##C, x_##C2, 0x20); \ + t_##C2 = _mm256_permute2x128_si256(x_##C, x_##C2, 0x31); \ + t_##D = _mm256_permute2x128_si256(x_##D, x_##D2, 0x20); \ + t_##D2 = _mm256_permute2x128_si256(x_##D, x_##D2, 0x31); \ + t_##A = _mm256_xor_si256(t_##A, _mm256_loadu_si256((__m256i*)(m + 0))); \ + t_##B = _mm256_xor_si256(t_##B, _mm256_loadu_si256((__m256i*)(m + 64))); \ + t_##C = _mm256_xor_si256(t_##C, _mm256_loadu_si256((__m256i*)(m + 128))); \ + t_##D = _mm256_xor_si256(t_##D, _mm256_loadu_si256((__m256i*)(m + 192))); \ + t_##A2 = \ + _mm256_xor_si256(t_##A2, _mm256_loadu_si256((__m256i*)(m + 256))); \ + t_##B2 = \ + _mm256_xor_si256(t_##B2, _mm256_loadu_si256((__m256i*)(m + 320))); \ + t_##C2 = \ + _mm256_xor_si256(t_##C2, _mm256_loadu_si256((__m256i*)(m + 384))); \ + t_##D2 = \ + _mm256_xor_si256(t_##D2, _mm256_loadu_si256((__m256i*)(m + 448))); \ + _mm256_storeu_si256((__m256i*)(c + 0), t_##A); \ + _mm256_storeu_si256((__m256i*)(c + 64), t_##B); \ + _mm256_storeu_si256((__m256i*)(c + 128), t_##C); \ + _mm256_storeu_si256((__m256i*)(c + 192), t_##D); \ + _mm256_storeu_si256((__m256i*)(c + 256), t_##A2); \ + _mm256_storeu_si256((__m256i*)(c + 320), t_##B2); \ + _mm256_storeu_si256((__m256i*)(c + 384), t_##C2); \ + _mm256_storeu_si256((__m256i*)(c + 448), t_##D2); \ + } + + ONEOCTO(0, 1, 2, 3, 4, 5, 6, 7); + m += 32; + c += 32; + ONEOCTO(8, 9, 10, 11, 12, 13, 14, 15); + m -= 32; + c -= 32; #undef ONEQUAD #undef ONEQUAD_TRANSPOSE #undef ONEQUAD_UNPCK #undef ONEOCTO - bytes -= 512; - c += 512; - m += 512; - } + bytes -= 512; + c += 512; + m += 512; + } } #undef VEC8_ROT #undef VEC8_QUARTERROUND diff --git a/crypto/chacha20/stream_chacha20.h b/crypto/chacha20/stream_chacha20.h index d6b71c5e0..978ce7c97 100644 --- a/crypto/chacha20/stream_chacha20.h +++ b/crypto/chacha20/stream_chacha20.h @@ -4,19 +4,18 @@ #include -typedef struct crypto_stream_chacha20_implementation { - int (*stream)(unsigned char *c, unsigned long long clen, - const unsigned char *n, const unsigned char *k); - int (*stream_ietf)(unsigned char *c, unsigned long long clen, - const unsigned char *n, const unsigned char *k); - int (*stream_xor_ic)(unsigned char *c, const unsigned char *m, - unsigned long long mlen, - const unsigned char *n, uint64_t ic, - const unsigned char *k); - int (*stream_ietf_xor_ic)(unsigned char *c, const unsigned char *m, - unsigned long long mlen, - const unsigned char *n, uint32_t ic, - const unsigned char *k); +typedef struct crypto_stream_chacha20_implementation +{ + int (*stream)(unsigned char *c, unsigned long long clen, + const unsigned char *n, const unsigned char *k); + int (*stream_ietf)(unsigned char *c, unsigned long long clen, + const unsigned char *n, const unsigned char *k); + int (*stream_xor_ic)(unsigned char *c, const unsigned char *m, + unsigned long long mlen, const unsigned char *n, + uint64_t ic, const unsigned char *k); + int (*stream_ietf_xor_ic)(unsigned char *c, const unsigned char *m, + unsigned long long mlen, const unsigned char *n, + uint32_t ic, const unsigned char *k); } crypto_stream_chacha20_implementation; #endif diff --git a/crypto/csrng/randombytes.c b/crypto/csrng/randombytes.c index 692d7b85f..1531857ea 100644 --- a/crypto/csrng/randombytes.c +++ b/crypto/csrng/randombytes.c @@ -74,8 +74,8 @@ randombytes_stir(void) try { var window_ = 'object' == = typeof window ? window : self; - var crypto_ = typeof window_.crypto != = 'undefined' ? window_.crypto - : window_.msCrypto; + var crypto_ = typeof window_.crypto != = + 'undefined' ? window_.crypto : window_.msCrypto; var randomValuesStandard = function() { var buf = new Uint32Array(1); diff --git a/crypto/csrng/randombytes_salsa20_random.c b/crypto/csrng/randombytes_salsa20_random.c index 365d32003..19d2582b4 100644 --- a/crypto/csrng/randombytes_salsa20_random.c +++ b/crypto/csrng/randombytes_salsa20_random.c @@ -69,7 +69,7 @@ typedef NTSTATUS(FAR PASCAL *CNGAPI_DRBG)(BCRYPT_ALG_HANDLE, UCHAR *, ULONG, #endif #ifndef TLS -#ifdef _WIN32 +#ifdef _WIN32 #ifdef _MSC_VER #define TLS __declspec(thread) #else diff --git a/crypto/curve25519/ref10/fe_25_5/base.h b/crypto/curve25519/ref10/fe_25_5/base.h index e18530bbb..b1840ac57 100644 --- a/crypto/curve25519/ref10/fe_25_5/base.h +++ b/crypto/curve25519/ref10/fe_25_5/base.h @@ -1,1344 +1,1573 @@ -{ /* 0/31 */ - { - { 25967493, -14356035, 29566456, 3660896, -12694345, 4014787, 27544626, -11754271, -6079156, 2047605 }, - { -12545711, 934262, -2722910, 3049990, -727428, 9406986, 12720692, 5043384, 19500929, -15469378 }, - { -8738181, 4489570, 9688441, -14785194, 10184609, -12363380, 29287919, 11864899, -24514362, -4438546 } - }, - { - { -12815894, -12976347, -21581243, 11784320, -25355658, -2750717, -11717903, -3814571, -358445, -10211303 }, - { -21703237, 6903825, 27185491, 6451973, -29577724, -9554005, -15616551, 11189268, -26829678, -5319081 }, - { 26966642, 11152617, 32442495, 15396054, 14353839, -12752335, -3128826, -9541118, -15472047, -4166697 } - }, - { - { 15636291, -9688557, 24204773, -7912398, 616977, -16685262, 27787600, -14772189, 28944400, -1550024 }, - { 16568933, 4717097, -11556148, -1102322, 15682896, -11807043, 16354577, -11775962, 7689662, 11199574 }, - { 30464156, -5976125, -11779434, -15670865, 23220365, 15915852, 7512774, 10017326, -17749093, -9920357 } - }, - { - { -17036878, 13921892, 10945806, -6033431, 27105052, -16084379, -28926210, 15006023, 3284568, -6276540 }, - { 23599295, -8306047, -11193664, -7687416, 13236774, 10506355, 7464579, 9656445, 13059162, 10374397 }, - { 7798556, 16710257, 3033922, 2874086, 28997861, 2835604, 32406664, -3839045, -641708, -101325 } - }, - { - { 10861363, 11473154, 27284546, 1981175, -30064349, 12577861, 32867885, 14515107, -15438304, 10819380 }, - { 4708026, 6336745, 20377586, 9066809, -11272109, 6594696, -25653668, 12483688, -12668491, 5581306 }, - { 19563160, 16186464, -29386857, 4097519, 10237984, -4348115, 28542350, 13850243, -23678021, -15815942 } - }, - { - { -15371964, -12862754, 32573250, 4720197, -26436522, 5875511, -19188627, -15224819, -9818940, -12085777 }, - { -8549212, 109983, 15149363, 2178705, 22900618, 4543417, 3044240, -15689887, 1762328, 14866737 }, - { -18199695, -15951423, -10473290, 1707278, -17185920, 3916101, -28236412, 3959421, 27914454, 4383652 } - }, - { - { 5153746, 9909285, 1723747, -2777874, 30523605, 5516873, 19480852, 5230134, -23952439, -15175766 }, - { -30269007, -3463509, 7665486, 10083793, 28475525, 1649722, 20654025, 16520125, 30598449, 7715701 }, - { 28881845, 14381568, 9657904, 3680757, -20181635, 7843316, -31400660, 1370708, 29794553, -1409300 } - }, - { - { 14499471, -2729599, -33191113, -4254652, 28494862, 14271267, 30290735, 10876454, -33154098, 2381726 }, - { -7195431, -2655363, -14730155, 462251, -27724326, 3941372, -6236617, 3696005, -32300832, 15351955 }, - { 27431194, 8222322, 16448760, -3907995, -18707002, 11938355, -32961401, -2970515, 29551813, 10109425 } - } -}, -{ /* 1/31 */ - { - { -13657040, -13155431, -31283750, 11777098, 21447386, 6519384, -2378284, -1627556, 10092783, -4764171 }, - { 27939166, 14210322, 4677035, 16277044, -22964462, -12398139, -32508754, 12005538, -17810127, 12803510 }, - { 17228999, -15661624, -1233527, 300140, -1224870, -11714777, 30364213, -9038194, 18016357, 4397660 } - }, - { - { -10958843, -7690207, 4776341, -14954238, 27850028, -15602212, -26619106, 14544525, -17477504, 982639 }, - { 29253598, 15796703, -2863982, -9908884, 10057023, 3163536, 7332899, -4120128, -21047696, 9934963 }, - { 5793303, 16271923, -24131614, -10116404, 29188560, 1206517, -14747930, 4559895, -30123922, -10897950 } - }, - { - { -27643952, -11493006, 16282657, -11036493, 28414021, -15012264, 24191034, 4541697, -13338309, 5500568 }, - { 12650548, -1497113, 9052871, 11355358, -17680037, -8400164, -17430592, 12264343, 10874051, 13524335 }, - { 25556948, -3045990, 714651, 2510400, 23394682, -10415330, 33119038, 5080568, -22528059, 5376628 } - }, - { - { -26088264, -4011052, -17013699, -3537628, -6726793, 1920897, -22321305, -9447443, 4535768, 1569007 }, - { -2255422, 14606630, -21692440, -8039818, 28430649, 8775819, -30494562, 3044290, 31848280, 12543772 }, - { -22028579, 2943893, -31857513, 6777306, 13784462, -4292203, -27377195, -2062731, 7718482, 14474653 } - }, - { - { 2385315, 2454213, -22631320, 46603, -4437935, -15680415, 656965, -7236665, 24316168, -5253567 }, - { 13741529, 10911568, -33233417, -8603737, -20177830, -1033297, 33040651, -13424532, -20729456, 8321686 }, - { 21060490, -2212744, 15712757, -4336099, 1639040, 10656336, 23845965, -11874838, -9984458, 608372 } - }, - { - { -13672732, -15087586, -10889693, -7557059, -6036909, 11305547, 1123968, -6780577, 27229399, 23887 }, - { -23244140, -294205, -11744728, 14712571, -29465699, -2029617, 12797024, -6440308, -1633405, 16678954 }, - { -29500620, 4770662, -16054387, 14001338, 7830047, 9564805, -1508144, -4795045, -17169265, 4904953 } - }, - { - { 24059557, 14617003, 19037157, -15039908, 19766093, -14906429, 5169211, 16191880, 2128236, -4326833 }, - { -16981152, 4124966, -8540610, -10653797, 30336522, -14105247, -29806336, 916033, -6882542, -2986532 }, - { -22630907, 12419372, -7134229, -7473371, -16478904, 16739175, 285431, 2763829, 15736322, 4143876 } - }, - { - { 2379352, 11839345, -4110402, -5988665, 11274298, 794957, 212801, -14594663, 23527084, -16458268 }, - { 33431127, -11130478, -17838966, -15626900, 8909499, 8376530, -32625340, 4087881, -15188911, -14416214 }, - { 1767683, 7197987, -13205226, -2022635, -13091350, 448826, 5799055, 4357868, -4774191, -16323038 } - } -}, -{ /* 2/31 */ - { - { 6721966, 13833823, -23523388, -1551314, 26354293, -11863321, 23365147, -3949732, 7390890, 2759800 }, - { 4409041, 2052381, 23373853, 10530217, 7676779, -12885954, 21302353, -4264057, 1244380, -12919645 }, - { -4421239, 7169619, 4982368, -2957590, 30256825, -2777540, 14086413, 9208236, 15886429, 16489664 } - }, - { - { 1996075, 10375649, 14346367, 13311202, -6874135, -16438411, -13693198, 398369, -30606455, -712933 }, - { -25307465, 9795880, -2777414, 14878809, -33531835, 14780363, 13348553, 12076947, -30836462, 5113182 }, - { -17770784, 11797796, 31950843, 13929123, -25888302, 12288344, -30341101, -7336386, 13847711, 5387222 } - }, - { - { -18582163, -3416217, 17824843, -2340966, 22744343, -10442611, 8763061, 3617786, -19600662, 10370991 }, - { 20246567, -14369378, 22358229, -543712, 18507283, -10413996, 14554437, -8746092, 32232924, 16763880 }, - { 9648505, 10094563, 26416693, 14745928, -30374318, -6472621, 11094161, 15689506, 3140038, -16510092 } - }, - { - { -16160072, 5472695, 31895588, 4744994, 8823515, 10365685, -27224800, 9448613, -28774454, 366295 }, - { 19153450, 11523972, -11096490, -6503142, -24647631, 5420647, 28344573, 8041113, 719605, 11671788 }, - { 8678025, 2694440, -6808014, 2517372, 4964326, 11152271, -15432916, -15266516, 27000813, -10195553 } - }, - { - { -15157904, 7134312, 8639287, -2814877, -7235688, 10421742, 564065, 5336097, 6750977, -14521026 }, - { 11836410, -3979488, 26297894, 16080799, 23455045, 15735944, 1695823, -8819122, 8169720, 16220347 }, - { -18115838, 8653647, 17578566, -6092619, -8025777, -16012763, -11144307, -2627664, -5990708, -14166033 } - }, - { - { -23308498, -10968312, 15213228, -10081214, -30853605, -11050004, 27884329, 2847284, 2655861, 1738395 }, - { -27537433, -14253021, -25336301, -8002780, -9370762, 8129821, 21651608, -3239336, -19087449, -11005278 }, - { 1533110, 3437855, 23735889, 459276, 29970501, 11335377, 26030092, 5821408, 10478196, 8544890 } - }, - { - { 32173121, -16129311, 24896207, 3921497, 22579056, -3410854, 19270449, 12217473, 17789017, -3395995 }, - { -30552961, -2228401, -15578829, -10147201, 13243889, 517024, 15479401, -3853233, 30460520, 1052596 }, - { -11614875, 13323618, 32618793, 8175907, -15230173, 12596687, 27491595, -4612359, 3179268, -9478891 } - }, - { - { 31947069, -14366651, -4640583, -15339921, -15125977, -6039709, -14756777, -16411740, 19072640, -9511060 }, - { 11685058, 11822410, 3158003, -13952594, 33402194, -4165066, 5977896, -5215017, 473099, 5040608 }, - { -20290863, 8198642, -27410132, 11602123, 1290375, -2799760, 28326862, 1721092, -19558642, -3131606 } - } -}, -{ /* 3/31 */ - { - { 7881532, 10687937, 7578723, 7738378, -18951012, -2553952, 21820786, 8076149, -27868496, 11538389 }, - { -19935666, 3899861, 18283497, -6801568, -15728660, -11249211, 8754525, 7446702, -5676054, 5797016 }, - { -11295600, -3793569, -15782110, -7964573, 12708869, -8456199, 2014099, -9050574, -2369172, -5877341 } - }, - { - { -22472376, -11568741, -27682020, 1146375, 18956691, 16640559, 1192730, -3714199, 15123619, 10811505 }, - { 14352098, -3419715, -18942044, 10822655, 32750596, 4699007, -70363, 15776356, -28886779, -11974553 }, - { -28241164, -8072475, -4978962, -5315317, 29416931, 1847569, -20654173, -16484855, 4714547, -9600655 } - }, - { - { 15200332, 8368572, 19679101, 15970074, -31872674, 1959451, 24611599, -4543832, -11745876, 12340220 }, - { 12876937, -10480056, 33134381, 6590940, -6307776, 14872440, 9613953, 8241152, 15370987, 9608631 }, - { -4143277, -12014408, 8446281, -391603, 4407738, 13629032, -7724868, 15866074, -28210621, -8814099 } - }, - { - { 26660628, -15677655, 8393734, 358047, -7401291, 992988, -23904233, 858697, 20571223, 8420556 }, - { 14620715, 13067227, -15447274, 8264467, 14106269, 15080814, 33531827, 12516406, -21574435, -12476749 }, - { 236881, 10476226, 57258, -14677024, 6472998, 2466984, 17258519, 7256740, 8791136, 15069930 } - }, - { - { 1276410, -9371918, 22949635, -16322807, -23493039, -5702186, 14711875, 4874229, -30663140, -2331391 }, - { 5855666, 4990204, -13711848, 7294284, -7804282, 1924647, -1423175, -7912378, -33069337, 9234253 }, - { 20590503, -9018988, 31529744, -7352666, -2706834, 10650548, 31559055, -11609587, 18979186, 13396066 } - }, - { - { 24474287, 4968103, 22267082, 4407354, 24063882, -8325180, -18816887, 13594782, 33514650, 7021958 }, - { -11566906, -6565505, -21365085, 15928892, -26158305, 4315421, -25948728, -3916677, -21480480, 12868082 }, - { -28635013, 13504661, 19988037, -2132761, 21078225, 6443208, -21446107, 2244500, -12455797, -8089383 } - }, - { - { -30595528, 13793479, -5852820, 319136, -25723172, -6263899, 33086546, 8957937, -15233648, 5540521 }, - { -11630176, -11503902, -8119500, -7643073, 2620056, 1022908, -23710744, -1568984, -16128528, -14962807 }, - { 23152971, 775386, 27395463, 14006635, -9701118, 4649512, 1689819, 892185, -11513277, -15205948 } - }, - { - { 9770129, 9586738, 26496094, 4324120, 1556511, -3550024, 27453819, 4763127, -19179614, 5867134 }, - { -32765025, 1927590, 31726409, -4753295, 23962434, -16019500, 27846559, 5931263, -29749703, -16108455 }, - { 27461885, -2977536, 22380810, 1815854, -23033753, -3031938, 7283490, -15148073, -19526700, 7734629 } - } -}, -{ /* 4/31 */ - { - { -8010264, -9590817, -11120403, 6196038, 29344158, -13430885, 7585295, -3176626, 18549497, 15302069 }, - { -32658337, -6171222, -7672793, -11051681, 6258878, 13504381, 10458790, -6418461, -8872242, 8424746 }, - { 24687205, 8613276, -30667046, -3233545, 1863892, -1830544, 19206234, 7134917, -11284482, -828919 } - }, - { - { 11334899, -9218022, 8025293, 12707519, 17523892, -10476071, 10243738, -14685461, -5066034, 16498837 }, - { 8911542, 6887158, -9584260, -6958590, 11145641, -9543680, 17303925, -14124238, 6536641, 10543906 }, - { -28946384, 15479763, -17466835, 568876, -1497683, 11223454, -2669190, -16625574, -27235709, 8876771 } - }, - { - { -25742899, -12566864, -15649966, -846607, -33026686, -796288, -33481822, 15824474, -604426, -9039817 }, - { 10330056, 70051, 7957388, -9002667, 9764902, 15609756, 27698697, -4890037, 1657394, 3084098 }, - { 10477963, -7470260, 12119566, -13250805, 29016247, -5365589, 31280319, 14396151, -30233575, 15272409 } - }, - { - { -12288309, 3169463, 28813183, 16658753, 25116432, -5630466, -25173957, -12636138, -25014757, 1950504 }, - { -26180358, 9489187, 11053416, -14746161, -31053720, 5825630, -8384306, -8767532, 15341279, 8373727 }, - { 28685821, 7759505, -14378516, -12002860, -31971820, 4079242, 298136, -10232602, -2878207, 15190420 } - }, - { - { -32932876, 13806336, -14337485, -15794431, -24004620, 10940928, 8669718, 2742393, -26033313, -6875003 }, - { -1580388, -11729417, -25979658, -11445023, -17411874, -10912854, 9291594, -16247779, -12154742, 6048605 }, - { -30305315, 14843444, 1539301, 11864366, 20201677, 1900163, 13934231, 5128323, 11213262, 9168384 } - }, - { - { -26280513, 11007847, 19408960, -940758, -18592965, -4328580, -5088060, -11105150, 20470157, -16398701 }, - { -23136053, 9282192, 14855179, -15390078, -7362815, -14408560, -22783952, 14461608, 14042978, 5230683 }, - { 29969567, -2741594, -16711867, -8552442, 9175486, -2468974, 21556951, 3506042, -5933891, -12449708 } - }, - { - { -3144746, 8744661, 19704003, 4581278, -20430686, 6830683, -21284170, 8971513, -28539189, 15326563 }, - { -19464629, 10110288, -17262528, -3503892, -23500387, 1355669, -15523050, 15300988, -20514118, 9168260 }, - { -5353335, 4488613, -23803248, 16314347, 7780487, -15638939, -28948358, 9601605, 33087103, -9011387 } - }, - { - { -19443170, -15512900, -20797467, -12445323, -29824447, 10229461, -27444329, -15000531, -5996870, 15664672 }, - { 23294591, -16632613, -22650781, -8470978, 27844204, 11461195, 13099750, -2460356, 18151676, 13417686 }, - { -24722913, -4176517, -31150679, 5988919, -26858785, 6685065, 1661597, -12551441, 15271676, -15452665 } - } -}, -{ /* 5/31 */ - { - { 11433042, -13228665, 8239631, -5279517, -1985436, -725718, -18698764, 2167544, -6921301, -13440182 }, - { -31436171, 15575146, 30436815, 12192228, -22463353, 9395379, -9917708, -8638997, 12215110, 12028277 }, - { 14098400, 6555944, 23007258, 5757252, -15427832, -12950502, 30123440, 4617780, -16900089, -655628 } - }, - { - { -4026201, -15240835, 11893168, 13718664, -14809462, 1847385, -15819999, 10154009, 23973261, -12684474 }, - { -26531820, -3695990, -1908898, 2534301, -31870557, -16550355, 18341390, -11419951, 32013174, -10103539 }, - { -25479301, 10876443, -11771086, -14625140, -12369567, 1838104, 21911214, 6354752, 4425632, -837822 } - }, - { - { -10433389, -14612966, 22229858, -3091047, -13191166, 776729, -17415375, -12020462, 4725005, 14044970 }, - { 19268650, -7304421, 1555349, 8692754, -21474059, -9910664, 6347390, -1411784, -19522291, -16109756 }, - { -24864089, 12986008, -10898878, -5558584, -11312371, -148526, 19541418, 8180106, 9282262, 10282508 } - }, - { - { -26205082, 4428547, -8661196, -13194263, 4098402, -14165257, 15522535, 8372215, 5542595, -10702683 }, - { -10562541, 14895633, 26814552, -16673850, -17480754, -2489360, -2781891, 6993761, -18093885, 10114655 }, - { -20107055, -929418, 31422704, 10427861, -7110749, 6150669, -29091755, -11529146, 25953725, -106158 } - }, - { - { -4234397, -8039292, -9119125, 3046000, 2101609, -12607294, 19390020, 6094296, -3315279, 12831125 }, - { -15998678, 7578152, 5310217, 14408357, -33548620, -224739, 31575954, 6326196, 7381791, -2421839 }, - { -20902779, 3296811, 24736065, -16328389, 18374254, 7318640, 6295303, 8082724, -15362489, 12339664 } - }, - { - { 27724736, 2291157, 6088201, -14184798, 1792727, 5857634, 13848414, 15768922, 25091167, 14856294 }, - { -18866652, 8331043, 24373479, 8541013, -701998, -9269457, 12927300, -12695493, -22182473, -9012899 }, - { -11423429, -5421590, 11632845, 3405020, 30536730, -11674039, -27260765, 13866390, 30146206, 9142070 } - }, - { - { 3924129, -15307516, -13817122, -10054960, 12291820, -668366, -27702774, 9326384, -8237858, 4171294 }, - { -15921940, 16037937, 6713787, 16606682, -21612135, 2790944, 26396185, 3731949, 345228, -5462949 }, - { -21327538, 13448259, 25284571, 1143661, 20614966, -8849387, 2031539, -12391231, -16253183, -13582083 } - }, - { - { 31016211, -16722429, 26371392, -14451233, -5027349, 14854137, 17477601, 3842657, 28012650, -16405420 }, - { -5075835, 9368966, -8562079, -4600902, -15249953, 6970560, -9189873, 16292057, -8867157, 3507940 }, - { 29439664, 3537914, 23333589, 6997794, -17555561, -11018068, -15209202, -15051267, -9164929, 6580396 } - } -}, -{ /* 6/31 */ - { - { -12185861, -7679788, 16438269, 10826160, -8696817, -6235611, 17860444, -9273846, -2095802, 9304567 }, - { 20714564, -4336911, 29088195, 7406487, 11426967, -5095705, 14792667, -14608617, 5289421, -477127 }, - { -16665533, -10650790, -6160345, -13305760, 9192020, -1802462, 17271490, 12349094, 26939669, -3752294 } - }, - { - { -12889898, 9373458, 31595848, 16374215, 21471720, 13221525, -27283495, -12348559, -3698806, 117887 }, - { 22263325, -6560050, 3984570, -11174646, -15114008, -566785, 28311253, 5358056, -23319780, 541964 }, - { 16259219, 3261970, 2309254, -15534474, -16885711, -4581916, 24134070, -16705829, -13337066, -13552195 } - }, - { - { 9378160, -13140186, -22845982, -12745264, 28198281, -7244098, -2399684, -717351, 690426, 14876244 }, - { 24977353, -314384, -8223969, -13465086, 28432343, -1176353, -13068804, -12297348, -22380984, 6618999 }, - { -1538174, 11685646, 12944378, 13682314, -24389511, -14413193, 8044829, -13817328, 32239829, -5652762 } - }, - { - { -18603066, 4762990, -926250, 8885304, -28412480, -3187315, 9781647, -10350059, 32779359, 5095274 }, - { -33008130, -5214506, -32264887, -3685216, 9460461, -9327423, -24601656, 14506724, 21639561, -2630236 }, - { -16400943, -13112215, 25239338, 15531969, 3987758, -4499318, -1289502, -6863535, 17874574, 558605 } - }, - { - { -13600129, 10240081, 9171883, 16131053, -20869254, 9599700, 33499487, 5080151, 2085892, 5119761 }, - { -22205145, -2519528, -16381601, 414691, -25019550, 2170430, 30634760, -8363614, -31999993, -5759884 }, - { -6845704, 15791202, 8550074, -1312654, 29928809, -12092256, 27534430, -7192145, -22351378, 12961482 } - }, - { - { -24492060, -9570771, 10368194, 11582341, -23397293, -2245287, 16533930, 8206996, -30194652, -5159638 }, - { -11121496, -3382234, 2307366, 6362031, -135455, 8868177, -16835630, 7031275, 7589640, 8945490 }, - { -32152748, 8917967, 6661220, -11677616, -1192060, -15793393, 7251489, -11182180, 24099109, -14456170 } - }, - { - { 5019558, -7907470, 4244127, -14714356, -26933272, 6453165, -19118182, -13289025, -6231896, -10280736 }, - { 10853594, 10721687, 26480089, 5861829, -22995819, 1972175, -1866647, -10557898, -3363451, -6441124 }, - { -17002408, 5906790, 221599, -6563147, 7828208, -13248918, 24362661, -2008168, -13866408, 7421392 } - }, - { - { 8139927, -6546497, 32257646, -5890546, 30375719, 1886181, -21175108, 15441252, 28826358, -4123029 }, - { 6267086, 9695052, 7709135, -16603597, -32869068, -1886135, 14795160, -7840124, 13746021, -1742048 }, - { 28584902, 7787108, -6732942, -15050729, 22846041, -7571236, -3181936, -363524, 4771362, -8419958 } - } -}, -{ /* 7/31 */ - { - { 24949256, 6376279, -27466481, -8174608, -18646154, -9930606, 33543569, -12141695, 3569627, 11342593 }, - { 26514989, 4740088, 27912651, 3697550, 19331575, -11472339, 6809886, 4608608, 7325975, -14801071 }, - { -11618399, -14554430, -24321212, 7655128, -1369274, 5214312, -27400540, 10258390, -17646694, -8186692 } - }, - { - { 11431204, 15823007, 26570245, 14329124, 18029990, 4796082, -31446179, 15580664, 9280358, -3973687 }, - { -160783, -10326257, -22855316, -4304997, -20861367, -13621002, -32810901, -11181622, -15545091, 4387441 }, - { -20799378, 12194512, 3937617, -5805892, -27154820, 9340370, -24513992, 8548137, 20617071, -7482001 } - }, - { - { -938825, -3930586, -8714311, 16124718, 24603125, -6225393, -13775352, -11875822, 24345683, 10325460 }, - { -19855277, -1568885, -22202708, 8714034, 14007766, 6928528, 16318175, -1010689, 4766743, 3552007 }, - { -21751364, -16730916, 1351763, -803421, -4009670, 3950935, 3217514, 14481909, 10988822, -3994762 } - }, - { - { 15564307, -14311570, 3101243, 5684148, 30446780, -8051356, 12677127, -6505343, -8295852, 13296005 }, - { -9442290, 6624296, -30298964, -11913677, -4670981, -2057379, 31521204, 9614054, -30000824, 12074674 }, - { 4771191, -135239, 14290749, -13089852, 27992298, 14998318, -1413936, -1556716, 29832613, -16391035 } - }, - { - { 7064884, -7541174, -19161962, -5067537, -18891269, -2912736, 25825242, 5293297, -27122660, 13101590 }, - { -2298563, 2439670, -7466610, 1719965, -27267541, -16328445, 32512469, -5317593, -30356070, -4190957 }, - { -30006540, 10162316, -33180176, 3981723, -16482138, -13070044, 14413974, 9515896, 19568978, 9628812 } - }, - { - { 33053803, 199357, 15894591, 1583059, 27380243, -4580435, -17838894, -6106839, -6291786, 3437740 }, - { -18978877, 3884493, 19469877, 12726490, 15913552, 13614290, -22961733, 70104, 7463304, 4176122 }, - { -27124001, 10659917, 11482427, -16070381, 12771467, -6635117, -32719404, -5322751, 24216882, 5944158 } - }, - { - { 8894125, 7450974, -2664149, -9765752, -28080517, -12389115, 19345746, 14680796, 11632993, 5847885 }, - { 26942781, -2315317, 9129564, -4906607, 26024105, 11769399, -11518837, 6367194, -9727230, 4782140 }, - { 19916461, -4828410, -22910704, -11414391, 25606324, -5972441, 33253853, 8220911, 6358847, -1873857 } - }, - { - { 801428, -2081702, 16569428, 11065167, 29875704, 96627, 7908388, -4480480, -13538503, 1387155 }, - { 19646058, 5720633, -11416706, 12814209, 11607948, 12749789, 14147075, 15156355, -21866831, 11835260 }, - { 19299512, 1155910, 28703737, 14890794, 2925026, 7269399, 26121523, 15467869, -26560550, 5052483 } - } -}, -{ /* 8/31 */ - { - { -3017432, 10058206, 1980837, 3964243, 22160966, 12322533, -6431123, -12618185, 12228557, -7003677 }, - { 32944382, 14922211, -22844894, 5188528, 21913450, -8719943, 4001465, 13238564, -6114803, 8653815 }, - { 22865569, -4652735, 27603668, -12545395, 14348958, 8234005, 24808405, 5719875, 28483275, 2841751 } - }, - { - { -16420968, -1113305, -327719, -12107856, 21886282, -15552774, -1887966, -315658, 19932058, -12739203 }, - { -11656086, 10087521, -8864888, -5536143, -19278573, -3055912, 3999228, 13239134, -4777469, -13910208 }, - { 1382174, -11694719, 17266790, 9194690, -13324356, 9720081, 20403944, 11284705, -14013818, 3093230 } - }, - { - { 16650921, -11037932, -1064178, 1570629, -8329746, 7352753, -302424, 16271225, -24049421, -6691850 }, - { -21911077, -5927941, -4611316, -5560156, -31744103, -10785293, 24123614, 15193618, -21652117, -16739389 }, - { -9935934, -4289447, -25279823, 4372842, 2087473, 10399484, 31870908, 14690798, 17361620, 11864968 } - }, - { - { -11307610, 6210372, 13206574, 5806320, -29017692, -13967200, -12331205, -7486601, -25578460, -16240689 }, - { 14668462, -12270235, 26039039, 15305210, 25515617, 4542480, 10453892, 6577524, 9145645, -6443880 }, - { 5974874, 3053895, -9433049, -10385191, -31865124, 3225009, -7972642, 3936128, -5652273, -3050304 } - }, - { - { 30625386, -4729400, -25555961, -12792866, -20484575, 7695099, 17097188, -16303496, -27999779, 1803632 }, - { -3553091, 9865099, -5228566, 4272701, -5673832, -16689700, 14911344, 12196514, -21405489, 7047412 }, - { 20093277, 9920966, -11138194, -5343857, 13161587, 12044805, -32856851, 4124601, -32343828, -10257566 } - }, - { - { -20788824, 14084654, -13531713, 7842147, 19119038, -13822605, 4752377, -8714640, -21679658, 2288038 }, - { -26819236, -3283715, 29965059, 3039786, -14473765, 2540457, 29457502, 14625692, -24819617, 12570232 }, - { -1063558, -11551823, 16920318, 12494842, 1278292, -5869109, -21159943, -3498680, -11974704, 4724943 } - }, - { - { 17960970, -11775534, -4140968, -9702530, -8876562, -1410617, -12907383, -8659932, -29576300, 1903856 }, - { 23134274, -14279132, -10681997, -1611936, 20684485, 15770816, -12989750, 3190296, 26955097, 14109738 }, - { 15308788, 5320727, -30113809, -14318877, 22902008, 7767164, 29425325, -11277562, 31960942, 11934971 } - }, - { - { -27395711, 8435796, 4109644, 12222639, -24627868, 14818669, 20638173, 4875028, 10491392, 1379718 }, - { -13159415, 9197841, 3875503, -8936108, -1383712, -5879801, 33518459, 16176658, 21432314, 12180697 }, - { -11787308, 11500838, 13787581, -13832590, -22430679, 10140205, 1465425, 12689540, -10301319, -13872883 } - } -}, -{ /* 9/31 */ - { - { 5414091, -15386041, -21007664, 9643570, 12834970, 1186149, -2622916, -1342231, 26128231, 6032912 }, - { -26337395, -13766162, 32496025, -13653919, 17847801, -12669156, 3604025, 8316894, -25875034, -10437358 }, - { 3296484, 6223048, 24680646, -12246460, -23052020, 5903205, -8862297, -4639164, 12376617, 3188849 } - }, - { - { 29190488, -14659046, 27549113, -1183516, 3520066, -10697301, 32049515, -7309113, -16109234, -9852307 }, - { -14744486, -9309156, 735818, -598978, -20407687, -5057904, 25246078, -15795669, 18640741, -960977 }, - { -6928835, -16430795, 10361374, 5642961, 4910474, 12345252, -31638386, -494430, 10530747, 1053335 } - }, - { - { -29265967, -14186805, -13538216, -12117373, -19457059, -10655384, -31462369, -2948985, 24018831, 15026644 }, - { -22592535, -3145277, -2289276, 5953843, -13440189, 9425631, 25310643, 13003497, -2314791, -15145616 }, - { -27419985, -603321, -8043984, -1669117, -26092265, 13987819, -27297622, 187899, -23166419, -2531735 } - }, - { - { -21744398, -13810475, 1844840, 5021428, -10434399, -15911473, 9716667, 16266922, -5070217, 726099 }, - { 29370922, -6053998, 7334071, -15342259, 9385287, 2247707, -13661962, -4839461, 30007388, -15823341 }, - { -936379, 16086691, 23751945, -543318, -1167538, -5189036, 9137109, 730663, 9835848, 4555336 } - }, - { - { -23376435, 1410446, -22253753, -12899614, 30867635, 15826977, 17693930, 544696, -11985298, 12422646 }, - { 31117226, -12215734, -13502838, 6561947, -9876867, -12757670, -5118685, -4096706, 29120153, 13924425 }, - { -17400879, -14233209, 19675799, -2734756, -11006962, -5858820, -9383939, -11317700, 7240931, -237388 } - }, - { - { -31361739, -11346780, -15007447, -5856218, -22453340, -12152771, 1222336, 4389483, 3293637, -15551743 }, - { -16684801, -14444245, 11038544, 11054958, -13801175, -3338533, -24319580, 7733547, 12796905, -6335822 }, - { -8759414, -10817836, -25418864, 10783769, -30615557, -9746811, -28253339, 3647836, 3222231, -11160462 } - }, - { - { 18606113, 1693100, -25448386, -15170272, 4112353, 10045021, 23603893, -2048234, -7550776, 2484985 }, - { 9255317, -3131197, -12156162, -1004256, 13098013, -9214866, 16377220, -2102812, -19802075, -3034702 }, - { -22729289, 7496160, -5742199, 11329249, 19991973, -3347502, -31718148, 9936966, -30097688, -10618797 } - }, - { - { 21878590, -5001297, 4338336, 13643897, -3036865, 13160960, 19708896, 5415497, -7360503, -4109293 }, - { 27736861, 10103576, 12500508, 8502413, -3413016, -9633558, 10436918, -1550276, -23659143, -8132100 }, - { 19492550, -12104365, -29681976, -852630, -3208171, 12403437, 30066266, 8367329, 13243957, 8709688 } - } -}, -{ /* 10/31 */ - { - { 12015105, 2801261, 28198131, 10151021, 24818120, -4743133, -11194191, -5645734, 5150968, 7274186 }, - { 2831366, -12492146, 1478975, 6122054, 23825128, -12733586, 31097299, 6083058, 31021603, -9793610 }, - { -2529932, -2229646, 445613, 10720828, -13849527, -11505937, -23507731, 16354465, 15067285, -14147707 } - }, - { - { 7840942, 14037873, -33364863, 15934016, -728213, -3642706, 21403988, 1057586, -19379462, -12403220 }, - { 915865, -16469274, 15608285, -8789130, -24357026, 6060030, -17371319, 8410997, -7220461, 16527025 }, - { 32922597, -556987, 20336074, -16184568, 10903705, -5384487, 16957574, 52992, 23834301, 6588044 } - }, - { - { 32752030, 11232950, 3381995, -8714866, 22652988, -10744103, 17159699, 16689107, -20314580, -1305992 }, - { -4689649, 9166776, -25710296, -10847306, 11576752, 12733943, 7924251, -2752281, 1976123, -7249027 }, - { 21251222, 16309901, -2983015, -6783122, 30810597, 12967303, 156041, -3371252, 12331345, -8237197 } - }, - { - { 8651614, -4477032, -16085636, -4996994, 13002507, 2950805, 29054427, -5106970, 10008136, -4667901 }, - { 31486080, 15114593, -14261250, 12951354, 14369431, -7387845, 16347321, -13662089, 8684155, -10532952 }, - { 19443825, 11385320, 24468943, -9659068, -23919258, 2187569, -26263207, -6086921, 31316348, 14219878 } - }, - { - { -28594490, 1193785, 32245219, 11392485, 31092169, 15722801, 27146014, 6992409, 29126555, 9207390 }, - { 32382935, 1110093, 18477781, 11028262, -27411763, -7548111, -4980517, 10843782, -7957600, -14435730 }, - { 2814918, 7836403, 27519878, -7868156, -20894015, -11553689, -21494559, 8550130, 28346258, 1994730 } - }, - { - { -19578299, 8085545, -14000519, -3948622, 2785838, -16231307, -19516951, 7174894, 22628102, 8115180 }, - { -30405132, 955511, -11133838, -15078069, -32447087, -13278079, -25651578, 3317160, -9943017, 930272 }, - { -15303681, -6833769, 28856490, 1357446, 23421993, 1057177, 24091212, -1388970, -22765376, -10650715 } - }, - { - { -22751231, -5303997, -12907607, -12768866, -15811511, -7797053, -14839018, -16554220, -1867018, 8398970 }, - { -31969310, 2106403, -4736360, 1362501, 12813763, 16200670, 22981545, -6291273, 18009408, -15772772 }, - { -17220923, -9545221, -27784654, 14166835, 29815394, 7444469, 29551787, -3727419, 19288549, 1325865 } - }, - { - { 15100157, -15835752, -23923978, -1005098, -26450192, 15509408, 12376730, -3479146, 33166107, -8042750 }, - { 20909231, 13023121, -9209752, 16251778, -5778415, -8094914, 12412151, 10018715, 2213263, -13878373 }, - { 32529814, -11074689, 30361439, -16689753, -9135940, 1513226, 22922121, 6382134, -5766928, 8371348 } - } -}, -{ /* 11/31 */ - { - { 9923462, 11271500, 12616794, 3544722, -29998368, -1721626, 12891687, -8193132, -26442943, 10486144 }, - { -22597207, -7012665, 8587003, -8257861, 4084309, -12970062, 361726, 2610596, -23921530, -11455195 }, - { 5408411, -1136691, -4969122, 10561668, 24145918, 14240566, 31319731, -4235541, 19985175, -3436086 } - }, - { - { -13994457, 16616821, 14549246, 3341099, 32155958, 13648976, -17577068, 8849297, 65030, 8370684 }, - { -8320926, -12049626, 31204563, 5839400, -20627288, -1057277, -19442942, 6922164, 12743482, -9800518 }, - { -2361371, 12678785, 28815050, 4759974, -23893047, 4884717, 23783145, 11038569, 18800704, 255233 } - }, - { - { -5269658, -1773886, 13957886, 7990715, 23132995, 728773, 13393847, 9066957, 19258688, -14753793 }, - { -2936654, -10827535, -10432089, 14516793, -3640786, 4372541, -31934921, 2209390, -1524053, 2055794 }, - { 580882, 16705327, 5468415, -2683018, -30926419, -14696000, -7203346, -8994389, -30021019, 7394435 } - }, - { - { 23838809, 1822728, -15738443, 15242727, 8318092, -3733104, -21672180, -3492205, -4821741, 14799921 }, - { 13345610, 9759151, 3371034, -16137791, 16353039, 8577942, 31129804, 13496856, -9056018, 7402518 }, - { 2286874, -4435931, -20042458, -2008336, -13696227, 5038122, 11006906, -15760352, 8205061, 1607563 } - }, - { - { 14414086, -8002132, 3331830, -3208217, 22249151, -5594188, 18364661, -2906958, 30019587, -9029278 }, - { -27688051, 1585953, -10775053, 931069, -29120221, -11002319, -14410829, 12029093, 9944378, 8024 }, - { 4368715, -3709630, 29874200, -15022983, -20230386, -11410704, -16114594, -999085, -8142388, 5640030 } - }, - { - { 10299610, 13746483, 11661824, 16234854, 7630238, 5998374, 9809887, -16694564, 15219798, -14327783 }, - { 27425505, -5719081, 3055006, 10660664, 23458024, 595578, -15398605, -1173195, -18342183, 9742717 }, - { 6744077, 2427284, 26042789, 2720740, -847906, 1118974, 32324614, 7406442, 12420155, 1994844 } - }, - { - { 14012521, -5024720, -18384453, -9578469, -26485342, -3936439, -13033478, -10909803, 24319929, -6446333 }, - { 16412690, -4507367, 10772641, 15929391, -17068788, -4658621, 10555945, -10484049, -30102368, -4739048 }, - { 22397382, -7767684, -9293161, -12792868, 17166287, -9755136, -27333065, 6199366, 21880021, -12250760 } - }, - { - { -4283307, 5368523, -31117018, 8163389, -30323063, 3209128, 16557151, 8890729, 8840445, 4957760 }, - { -15447727, 709327, -6919446, -10870178, -29777922, 6522332, -21720181, 12130072, -14796503, 5005757 }, - { -2114751, -14308128, 23019042, 15765735, -25269683, 6002752, 10183197, -13239326, -16395286, -2176112 } - } -}, -{ /* 12/31 */ - { - { -19025756, 1632005, 13466291, -7995100, -23640451, 16573537, -32013908, -3057104, 22208662, 2000468 }, - { 3065073, -1412761, -25598674, -361432, -17683065, -5703415, -8164212, 11248527, -3691214, -7414184 }, - { 10379208, -6045554, 8877319, 1473647, -29291284, -12507580, 16690915, 2553332, -3132688, 16400289 } - }, - { - { 15716668, 1254266, -18472690, 7446274, -8448918, 6344164, -22097271, -7285580, 26894937, 9132066 }, - { 24158887, 12938817, 11085297, -8177598, -28063478, -4457083, -30576463, 64452, -6817084, -2692882 }, - { 13488534, 7794716, 22236231, 5989356, 25426474, -12578208, 2350710, -3418511, -4688006, 2364226 } - }, - { - { 16335052, 9132434, 25640582, 6678888, 1725628, 8517937, -11807024, -11697457, 15445875, -7798101 }, - { 29004207, -7867081, 28661402, -640412, -12794003, -7943086, 31863255, -4135540, -278050, -15759279 }, - { -6122061, -14866665, -28614905, 14569919, -10857999, -3591829, 10343412, -6976290, -29828287, -10815811 } - }, - { - { 27081650, 3463984, 14099042, -4517604, 1616303, -6205604, 29542636, 15372179, 17293797, 960709 }, - { 20263915, 11434237, -5765435, 11236810, 13505955, -10857102, -16111345, 6493122, -19384511, 7639714 }, - { -2830798, -14839232, 25403038, -8215196, -8317012, -16173699, 18006287, -16043750, 29994677, -15808121 } - }, - { - { 9769828, 5202651, -24157398, -13631392, -28051003, -11561624, -24613141, -13860782, -31184575, 709464 }, - { 12286395, 13076066, -21775189, -1176622, -25003198, 4057652, -32018128, -8890874, 16102007, 13205847 }, - { 13733362, 5599946, 10557076, 3195751, -5557991, 8536970, -25540170, 8525972, 10151379, 10394400 } - }, - { - { 4024660, -16137551, 22436262, 12276534, -9099015, -2686099, 19698229, 11743039, -33302334, 8934414 }, - { -15879800, -4525240, -8580747, -2934061, 14634845, -698278, -9449077, 3137094, -11536886, 11721158 }, - { 17555939, -5013938, 8268606, 2331751, -22738815, 9761013, 9319229, 8835153, -9205489, -1280045 } - }, - { - { -461409, -7830014, 20614118, 16688288, -7514766, -4807119, 22300304, 505429, 6108462, -6183415 }, - { -5070281, 12367917, -30663534, 3234473, 32617080, -8422642, 29880583, -13483331, -26898490, -7867459 }, - { -31975283, 5726539, 26934134, 10237677, -3173717, -605053, 24199304, 3795095, 7592688, -14992079 } - }, - { - { 21594432, -14964228, 17466408, -4077222, 32537084, 2739898, 6407723, 12018833, -28256052, 4298412 }, - { -20650503, -11961496, -27236275, 570498, 3767144, -1717540, 13891942, -1569194, 13717174, 10805743 }, - { -14676630, -15644296, 15287174, 11927123, 24177847, -8175568, -796431, 14860609, -26938930, -5863836 } - } -}, -{ /* 13/31 */ - { - { 12962541, 5311799, -10060768, 11658280, 18855286, -7954201, 13286263, -12808704, -4381056, 9882022 }, - { 18512079, 11319350, -20123124, 15090309, 18818594, 5271736, -22727904, 3666879, -23967430, -3299429 }, - { -6789020, -3146043, 16192429, 13241070, 15898607, -14206114, -10084880, -6661110, -2403099, 5276065 } - }, - { - { 30169808, -5317648, 26306206, -11750859, 27814964, 7069267, 7152851, 3684982, 1449224, 13082861 }, - { 10342826, 3098505, 2119311, 193222, 25702612, 12233820, 23697382, 15056736, -21016438, -8202000 }, - { -33150110, 3261608, 22745853, 7948688, 19370557, -15177665, -26171976, 6482814, -10300080, -11060101 } - }, - { - { 32869458, -5408545, 25609743, 15678670, -10687769, -15471071, 26112421, 2521008, -22664288, 6904815 }, - { 29506923, 4457497, 3377935, -9796444, -30510046, 12935080, 1561737, 3841096, -29003639, -6657642 }, - { 10340844, -6630377, -18656632, -2278430, 12621151, -13339055, 30878497, -11824370, -25584551, 5181966 } - }, - { - { 25940115, -12658025, 17324188, -10307374, -8671468, 15029094, 24396252, -16450922, -2322852, -12388574 }, - { -21765684, 9916823, -1300409, 4079498, -1028346, 11909559, 1782390, 12641087, 20603771, -6561742 }, - { -18882287, -11673380, 24849422, 11501709, 13161720, -4768874, 1925523, 11914390, 4662781, 7820689 } - }, - { - { 12241050, -425982, 8132691, 9393934, 32846760, -1599620, 29749456, 12172924, 16136752, 15264020 }, - { -10349955, -14680563, -8211979, 2330220, -17662549, -14545780, 10658213, 6671822, 19012087, 3772772 }, - { 3753511, -3421066, 10617074, 2028709, 14841030, -6721664, 28718732, -15762884, 20527771, 12988982 } - }, - { - { -14822485, -5797269, -3707987, 12689773, -898983, -10914866, -24183046, -10564943, 3299665, -12424953 }, - { -16777703, -15253301, -9642417, 4978983, 3308785, 8755439, 6943197, 6461331, -25583147, 8991218 }, - { -17226263, 1816362, -1673288, -6086439, 31783888, -8175991, -32948145, 7417950, -30242287, 1507265 } - }, - { - { 29692663, 6829891, -10498800, 4334896, 20945975, -11906496, -28887608, 8209391, 14606362, -10647073 }, - { -3481570, 8707081, 32188102, 5672294, 22096700, 1711240, -33020695, 9761487, 4170404, -2085325 }, - { -11587470, 14855945, -4127778, -1531857, -26649089, 15084046, 22186522, 16002000, -14276837, -8400798 } - }, - { - { -4811456, 13761029, -31703877, -2483919, -3312471, 7869047, -7113572, -9620092, 13240845, 10965870 }, - { -7742563, -8256762, -14768334, -13656260, -23232383, 12387166, 4498947, 14147411, 29514390, 4302863 }, - { -13413405, -12407859, 20757302, -13801832, 14785143, 8976368, -5061276, -2144373, 17846988, -13971927 } - } -}, -{ /* 14/31 */ - { - { -2244452, -754728, -4597030, -1066309, -6247172, 1455299, -21647728, -9214789, -5222701, 12650267 }, - { -9906797, -16070310, 21134160, 12198166, -27064575, 708126, 387813, 13770293, -19134326, 10958663 }, - { 22470984, 12369526, 23446014, -5441109, -21520802, -9698723, -11772496, -11574455, -25083830, 4271862 } - }, - { - { -25169565, -10053642, -19909332, 15361595, -5984358, 2159192, 75375, -4278529, -32526221, 8469673 }, - { 15854970, 4148314, -8893890, 7259002, 11666551, 13824734, -30531198, 2697372, 24154791, -9460943 }, - { 15446137, -15806644, 29759747, 14019369, 30811221, -9610191, -31582008, 12840104, 24913809, 9815020 } - }, - { - { -4709286, -5614269, -31841498, -12288893, -14443537, 10799414, -9103676, 13438769, 18735128, 9466238 }, - { 11933045, 9281483, 5081055, -5183824, -2628162, -4905629, -7727821, -10896103, -22728655, 16199064 }, - { 14576810, 379472, -26786533, -8317236, -29426508, -10812974, -102766, 1876699, 30801119, 2164795 } - }, - { - { 15995086, 3199873, 13672555, 13712240, -19378835, -4647646, -13081610, -15496269, -13492807, 1268052 }, - { -10290614, -3659039, -3286592, 10948818, 23037027, 3794475, -3470338, -12600221, -17055369, 3565904 }, - { 29210088, -9419337, -5919792, -4952785, 10834811, -13327726, -16512102, -10820713, -27162222, -14030531 } - }, - { - { -13161890, 15508588, 16663704, -8156150, -28349942, 9019123, -29183421, -3769423, 2244111, -14001979 }, - { -5152875, -3800936, -9306475, -6071583, 16243069, 14684434, -25673088, -16180800, 13491506, 4641841 }, - { 10813417, 643330, -19188515, -728916, 30292062, -16600078, 27548447, -7721242, 14476989, -12767431 } - }, - { - { 10292079, 9984945, 6481436, 8279905, -7251514, 7032743, 27282937, -1644259, -27912810, 12651324 }, - { -31185513, -813383, 22271204, 11835308, 10201545, 15351028, 17099662, 3988035, 21721536, -3148940 }, - { 10202177, -6545839, -31373232, -9574638, -32150642, -8119683, -12906320, 3852694, 13216206, 14842320 } - }, - { - { -15815640, -10601066, -6538952, -7258995, -6984659, -6581778, -31500847, 13765824, -27434397, 9900184 }, - { 14465505, -13833331, -32133984, -14738873, -27443187, 12990492, 33046193, 15796406, -7051866, -8040114 }, - { 30924417, -8279620, 6359016, -12816335, 16508377, 9071735, -25488601, 15413635, 9524356, -7018878 } - }, - { - { 12274201, -13175547, 32627641, -1785326, 6736625, 13267305, 5237659, -5109483, 15663516, 4035784 }, - { -2951309, 8903985, 17349946, 601635, -16432815, -4612556, -13732739, -15889334, -22258478, 4659091 }, - { -16916263, -4952973, -30393711, -15158821, 20774812, 15897498, 5736189, 15026997, -2178256, -13455585 } - } -}, -{ /* 15/31 */ - { - { -8858980, -2219056, 28571666, -10155518, -474467, -10105698, -3801496, 278095, 23440562, -290208 }, - { 10226241, -5928702, 15139956, 120818, -14867693, 5218603, 32937275, 11551483, -16571960, -7442864 }, - { 17932739, -12437276, -24039557, 10749060, 11316803, 7535897, 22503767, 5561594, -3646624, 3898661 } - }, - { - { 7749907, -969567, -16339731, -16464, -25018111, 15122143, -1573531, 7152530, 21831162, 1245233 }, - { 26958459, -14658026, 4314586, 8346991, -5677764, 11960072, -32589295, -620035, -30402091, -16716212 }, - { -12165896, 9166947, 33491384, 13673479, 29787085, 13096535, 6280834, 14587357, -22338025, 13987525 } - }, - { - { -24349909, 7778775, 21116000, 15572597, -4833266, -5357778, -4300898, -5124639, -7469781, -2858068 }, - { 9681908, -6737123, -31951644, 13591838, -6883821, 386950, 31622781, 6439245, -14581012, 4091397 }, - { -8426427, 1470727, -28109679, -1596990, 3978627, -5123623, -19622683, 12092163, 29077877, -14741988 } - }, - { - { 5269168, -6859726, -13230211, -8020715, 25932563, 1763552, -5606110, -5505881, -20017847, 2357889 }, - { 32264008, -15407652, -5387735, -1160093, -2091322, -3946900, 23104804, -12869908, 5727338, 189038 }, - { 14609123, -8954470, -6000566, -16622781, -14577387, -7743898, -26745169, 10942115, -25888931, -14884697 } - }, - { - { 20513500, 5557931, -15604613, 7829531, 26413943, -2019404, -21378968, 7471781, 13913677, -5137875 }, - { -25574376, 11967826, 29233242, 12948236, -6754465, 4713227, -8940970, 14059180, 12878652, 8511905 }, - { -25656801, 3393631, -2955415, -7075526, -2250709, 9366908, -30223418, 6812974, 5568676, -3127656 } - }, - { - { 11630004, 12144454, 2116339, 13606037, 27378885, 15676917, -17408753, -13504373, -14395196, 8070818 }, - { 27117696, -10007378, -31282771, -5570088, 1127282, 12772488, -29845906, 10483306, -11552749, -1028714 }, - { 10637467, -5688064, 5674781, 1072708, -26343588, -6982302, -1683975, 9177853, -27493162, 15431203 } - }, - { - { 20525145, 10892566, -12742472, 12779443, -29493034, 16150075, -28240519, 14943142, -15056790, -7935931 }, - { -30024462, 5626926, -551567, -9981087, 753598, 11981191, 25244767, -3239766, -3356550, 9594024 }, - { -23752644, 2636870, -5163910, -10103818, 585134, 7877383, 11345683, -6492290, 13352335, -10977084 } - }, - { - { -1931799, -5407458, 3304649, -12884869, 17015806, -4877091, -29783850, -7752482, -13215537, -319204 }, - { 20239939, 6607058, 6203985, 3483793, -18386976, -779229, -20723742, 15077870, -22750759, 14523817 }, - { 27406042, -6041657, 27423596, -4497394, 4996214, 10002360, -28842031, -4545494, -30172742, -4805667 } - } -}, -{ /* 16/31 */ - { - { 11374242, 12660715, 17861383, -12540833, 10935568, 1099227, -13886076, -9091740, -27727044, 11358504 }, - { -12730809, 10311867, 1510375, 10778093, -2119455, -9145702, 32676003, 11149336, -26123651, 4985768 }, - { -19096303, 341147, -6197485, -239033, 15756973, -8796662, -983043, 13794114, -19414307, -15621255 } - }, - { - { 6490081, 11940286, 25495923, -7726360, 8668373, -8751316, 3367603, 6970005, -1691065, -9004790 }, - { 1656497, 13457317, 15370807, 6364910, 13605745, 8362338, -19174622, -5475723, -16796596, -5031438 }, - { -22273315, -13524424, -64685, -4334223, -18605636, -10921968, -20571065, -7007978, -99853, -10237333 } - }, - { - { 17747465, 10039260, 19368299, -4050591, -20630635, -16041286, 31992683, -15857976, -29260363, -5511971 }, - { 31932027, -4986141, -19612382, 16366580, 22023614, 88450, 11371999, -3744247, 4882242, -10626905 }, - { 29796507, 37186, 19818052, 10115756, -11829032, 3352736, 18551198, 3272828, -5190932, -4162409 } - }, - { - { 12501286, 4044383, -8612957, -13392385, -32430052, 5136599, -19230378, -3529697, 330070, -3659409 }, - { 6384877, 2899513, 17807477, 7663917, -2358888, 12363165, 25366522, -8573892, -271295, 12071499 }, - { -8365515, -4042521, 25133448, -4517355, -6211027, 2265927, -32769618, 1936675, -5159697, 3829363 } - }, - { - { 28425966, -5835433, -577090, -4697198, -14217555, 6870930, 7921550, -6567787, 26333140, 14267664 }, - { -11067219, 11871231, 27385719, -10559544, -4585914, -11189312, 10004786, -8709488, -21761224, 8930324 }, - { -21197785, -16396035, 25654216, -1725397, 12282012, 11008919, 1541940, 4757911, -26491501, -16408940 } - }, - { - { 13537262, -7759490, -20604840, 10961927, -5922820, -13218065, -13156584, 6217254, -15943699, 13814990 }, - { -17422573, 15157790, 18705543, 29619, 24409717, -260476, 27361681, 9257833, -1956526, -1776914 }, - { -25045300, -10191966, 15366585, 15166509, -13105086, 8423556, -29171540, 12361135, -18685978, 4578290 } - }, - { - { 24579768, 3711570, 1342322, -11180126, -27005135, 14124956, -22544529, 14074919, 21964432, 8235257 }, - { -6528613, -2411497, 9442966, -5925588, 12025640, -1487420, -2981514, -1669206, 13006806, 2355433 }, - { -16304899, -13605259, -6632427, -5142349, 16974359, -10911083, 27202044, 1719366, 1141648, -12796236 } - }, - { - { -12863944, -13219986, -8318266, -11018091, -6810145, -4843894, 13475066, -3133972, 32674895, 13715045 }, - { 11423335, -5468059, 32344216, 8962751, 24989809, 9241752, -13265253, 16086212, -28740881, -15642093 }, - { -1409668, 12530728, -6368726, 10847387, 19531186, -14132160, -11709148, 7791794, -27245943, 4383347 } - } -}, -{ /* 17/31 */ - { - { -28970898, 5271447, -1266009, -9736989, -12455236, 16732599, -4862407, -4906449, 27193557, 6245191 }, - { -15193956, 5362278, -1783893, 2695834, 4960227, 12840725, 23061898, 3260492, 22510453, 8577507 }, - { -12632451, 11257346, -32692994, 13548177, -721004, 10879011, 31168030, 13952092, -29571492, -3635906 } - }, - { - { 3877321, -9572739, 32416692, 5405324, -11004407, -13656635, 3759769, 11935320, 5611860, 8164018 }, - { -16275802, 14667797, 15906460, 12155291, -22111149, -9039718, 32003002, -8832289, 5773085, -8422109 }, - { -23788118, -8254300, 1950875, 8937633, 18686727, 16459170, -905725, 12376320, 31632953, 190926 } - }, - { - { -24593607, -16138885, -8423991, 13378746, 14162407, 6901328, -8288749, 4508564, -25341555, -3627528 }, - { 8884438, -5884009, 6023974, 10104341, -6881569, -4941533, 18722941, -14786005, -1672488, 827625 }, - { -32720583, -16289296, -32503547, 7101210, 13354605, 2659080, -1800575, -14108036, -24878478, 1541286 } - }, - { - { 2901347, -1117687, 3880376, -10059388, -17620940, -3612781, -21802117, -3567481, 20456845, -1885033 }, - { 27019610, 12299467, -13658288, -1603234, -12861660, -4861471, -19540150, -5016058, 29439641, 15138866 }, - { 21536104, -6626420, -32447818, -10690208, -22408077, 5175814, -5420040, -16361163, 7779328, 109896 } - }, - { - { 30279744, 14648750, -8044871, 6425558, 13639621, -743509, 28698390, 12180118, 23177719, -554075 }, - { 26572847, 3405927, -31701700, 12890905, -19265668, 5335866, -6493768, 2378492, 4439158, -13279347 }, - { -22716706, 3489070, -9225266, -332753, 18875722, -1140095, 14819434, -12731527, -17717757, -5461437 } - }, - { - { -5056483, 16566551, 15953661, 3767752, -10436499, 15627060, -820954, 2177225, 8550082, -15114165 }, - { -18473302, 16596775, -381660, 15663611, 22860960, 15585581, -27844109, -3582739, -23260460, -8428588 }, - { -32480551, 15707275, -8205912, -5652081, 29464558, 2713815, -22725137, 15860482, -21902570, 1494193 } - }, - { - { -19562091, -14087393, -25583872, -9299552, 13127842, 759709, 21923482, 16529112, 8742704, 12967017 }, - { -28464899, 1553205, 32536856, -10473729, -24691605, -406174, -8914625, -2933896, -29903758, 15553883 }, - { 21877909, 3230008, 9881174, 10539357, -4797115, 2841332, 11543572, 14513274, 19375923, -12647961 } - }, - { - { 8832269, -14495485, 13253511, 5137575, 5037871, 4078777, 24880818, -6222716, 2862653, 9455043 }, - { 29306751, 5123106, 20245049, -14149889, 9592566, 8447059, -2077124, -2990080, 15511449, 4789663 }, - { -20679756, 7004547, 8824831, -9434977, -4045704, -3750736, -5754762, 108893, 23513200, 16652362 } - } -}, -{ /* 18/31 */ - { - { -33256173, 4144782, -4476029, -6579123, 10770039, -7155542, -6650416, -12936300, -18319198, 10212860 }, - { 2756081, 8598110, 7383731, -6859892, 22312759, -1105012, 21179801, 2600940, -9988298, -12506466 }, - { -24645692, 13317462, -30449259, -15653928, 21365574, -10869657, 11344424, 864440, -2499677, -16710063 } - }, - { - { -26432803, 6148329, -17184412, -14474154, 18782929, -275997, -22561534, 211300, 2719757, 4940997 }, - { -1323882, 3911313, -6948744, 14759765, -30027150, 7851207, 21690126, 8518463, 26699843, 5276295 }, - { -13149873, -6429067, 9396249, 365013, 24703301, -10488939, 1321586, 149635, -15452774, 7159369 } - }, - { - { 9987780, -3404759, 17507962, 9505530, 9731535, -2165514, 22356009, 8312176, 22477218, -8403385 }, - { 18155857, -16504990, 19744716, 9006923, 15154154, -10538976, 24256460, -4864995, -22548173, 9334109 }, - { 2986088, -4911893, 10776628, -3473844, 10620590, -7083203, -21413845, 14253545, -22587149, 536906 } - }, - { - { 4377756, 8115836, 24567078, 15495314, 11625074, 13064599, 7390551, 10589625, 10838060, -15420424 }, - { -19342404, 867880, 9277171, -3218459, -14431572, -1986443, 19295826, -15796950, 6378260, 699185 }, - { 7895026, 4057113, -7081772, -13077756, -17886831, -323126, -716039, 15693155, -5045064, -13373962 } - }, - { - { -7737563, -5869402, -14566319, -7406919, 11385654, 13201616, 31730678, -10962840, -3918636, -9669325 }, - { 10188286, -15770834, -7336361, 13427543, 22223443, 14896287, 30743455, 7116568, -21786507, 5427593 }, - { 696102, 13206899, 27047647, -10632082, 15285305, -9853179, 10798490, -4578720, 19236243, 12477404 } - }, - { - { -11229439, 11243796, -17054270, -8040865, -788228, -8167967, -3897669, 11180504, -23169516, 7733644 }, - { 17800790, -14036179, -27000429, -11766671, 23887827, 3149671, 23466177, -10538171, 10322027, 15313801 }, - { 26246234, 11968874, 32263343, -5468728, 6830755, -13323031, -15794704, -101982, -24449242, 10890804 } - }, - { - { -31365647, 10271363, -12660625, -6267268, 16690207, -13062544, -14982212, 16484931, 25180797, -5334884 }, - { -586574, 10376444, -32586414, -11286356, 19801893, 10997610, 2276632, 9482883, 316878, 13820577 }, - { -9882808, -4510367, -2115506, 16457136, -11100081, 11674996, 30756178, -7515054, 30696930, -3712849 } - }, - { - { 32988917, -9603412, 12499366, 7910787, -10617257, -11931514, -7342816, -9985397, -32349517, 7392473 }, - { -8855661, 15927861, 9866406, -3649411, -2396914, -16655781, -30409476, -9134995, 25112947, -2926644 }, - { -2504044, -436966, 25621774, -5678772, 15085042, -5479877, -24884878, -13526194, 5537438, -13914319 } - } -}, -{ /* 19/31 */ - { - { -11225584, 2320285, -9584280, 10149187, -33444663, 5808648, -14876251, -1729667, 31234590, 6090599 }, - { -9633316, 116426, 26083934, 2897444, -6364437, -2688086, 609721, 15878753, -6970405, -9034768 }, - { -27757857, 247744, -15194774, -9002551, 23288161, -10011936, -23869595, 6503646, 20650474, 1804084 } - }, - { - { -27589786, 15456424, 8972517, 8469608, 15640622, 4439847, 3121995, -10329713, 27842616, -202328 }, - { -15306973, 2839644, 22530074, 10026331, 4602058, 5048462, 28248656, 5031932, -11375082, 12714369 }, - { 20807691, -7270825, 29286141, 11421711, -27876523, -13868230, -21227475, 1035546, -19733229, 12796920 } - }, - { - { 12076899, -14301286, -8785001, -11848922, -25012791, 16400684, -17591495, -12899438, 3480665, -15182815 }, - { -32361549, 5457597, 28548107, 7833186, 7303070, -11953545, -24363064, -15921875, -33374054, 2771025 }, - { -21389266, 421932, 26597266, 6860826, 22486084, -6737172, -17137485, -4210226, -24552282, 15673397 } - }, - { - { -20184622, 2338216, 19788685, -9620956, -4001265, -8740893, -20271184, 4733254, 3727144, -12934448 }, - { 6120119, 814863, -11794402, -622716, 6812205, -15747771, 2019594, 7975683, 31123697, -10958981 }, - { 30069250, -11435332, 30434654, 2958439, 18399564, -976289, 12296869, 9204260, -16432438, 9648165 } - }, - { - { 32705432, -1550977, 30705658, 7451065, -11805606, 9631813, 3305266, 5248604, -26008332, -11377501 }, - { 17219865, 2375039, -31570947, -5575615, -19459679, 9219903, 294711, 15298639, 2662509, -16297073 }, - { -1172927, -7558695, -4366770, -4287744, -21346413, -8434326, 32087529, -1222777, 32247248, -14389861 } - }, - { - { 14312628, 1221556, 17395390, -8700143, -4945741, -8684635, -28197744, -9637817, -16027623, -13378845 }, - { -1428825, -9678990, -9235681, 6549687, -7383069, -468664, 23046502, 9803137, 17597934, 2346211 }, - { 18510800, 15337574, 26171504, 981392, -22241552, 7827556, -23491134, -11323352, 3059833, -11782870 } - }, - { - { 10141598, 6082907, 17829293, -1947643, 9830092, 13613136, -25556636, -5544586, -33502212, 3592096 }, - { 33114168, -15889352, -26525686, -13343397, 33076705, 8716171, 1151462, 1521897, -982665, -6837803 }, - { -32939165, -4255815, 23947181, -324178, -33072974, -12305637, -16637686, 3891704, 26353178, 693168 } - }, - { - { 30374239, 1595580, -16884039, 13186931, 4600344, 406904, 9585294, -400668, 31375464, 14369965 }, - { -14370654, -7772529, 1510301, 6434173, -18784789, -6262728, 32732230, -13108839, 17901441, 16011505 }, - { 18171223, -11934626, -12500402, 15197122, -11038147, -15230035, -19172240, -16046376, 8764035, 12309598 } - } -}, -{ /* 20/31 */ - { - { 5975908, -5243188, -19459362, -9681747, -11541277, 14015782, -23665757, 1228319, 17544096, -10593782 }, - { 5811932, -1715293, 3442887, -2269310, -18367348, -8359541, -18044043, -15410127, -5565381, 12348900 }, - { -31399660, 11407555, 25755363, 6891399, -3256938, 14872274, -24849353, 8141295, -10632534, -585479 } - }, - { - { -12675304, 694026, -5076145, 13300344, 14015258, -14451394, -9698672, -11329050, 30944593, 1130208 }, - { 8247766, -6710942, -26562381, -7709309, -14401939, -14648910, 4652152, 2488540, 23550156, -271232 }, - { 17294316, -3788438, 7026748, 15626851, 22990044, 113481, 2267737, -5908146, -408818, -137719 } - }, - { - { 16091085, -16253926, 18599252, 7340678, 2137637, -1221657, -3364161, 14550936, 3260525, -7166271 }, - { -4910104, -13332887, 18550887, 10864893, -16459325, -7291596, -23028869, -13204905, -12748722, 2701326 }, - { -8574695, 16099415, 4629974, -16340524, -20786213, -6005432, -10018363, 9276971, 11329923, 1862132 } - }, - { - { 14763076, -15903608, -30918270, 3689867, 3511892, 10313526, -21951088, 12219231, -9037963, -940300 }, - { 8894987, -3446094, 6150753, 3013931, 301220, 15693451, -31981216, -2909717, -15438168, 11595570 }, - { 15214962, 3537601, -26238722, -14058872, 4418657, -15230761, 13947276, 10730794, -13489462, -4363670 } - }, - { - { -2538306, 7682793, 32759013, 263109, -29984731, -7955452, -22332124, -10188635, 977108, 699994 }, - { -12466472, 4195084, -9211532, 550904, -15565337, 12917920, 19118110, -439841, -30534533, -14337913 }, - { 31788461, -14507657, 4799989, 7372237, 8808585, -14747943, 9408237, -10051775, 12493932, -5409317 } - }, - { - { -25680606, 5260744, -19235809, -6284470, -3695942, 16566087, 27218280, 2607121, 29375955, 6024730 }, - { 842132, -2794693, -4763381, -8722815, 26332018, -12405641, 11831880, 6985184, -9940361, 2854096 }, - { -4847262, -7969331, 2516242, -5847713, 9695691, -7221186, 16512645, 960770, 12121869, 16648078 } - }, - { - { -15218652, 14667096, -13336229, 2013717, 30598287, -464137, -31504922, -7882064, 20237806, 2838411 }, - { -19288047, 4453152, 15298546, -16178388, 22115043, -15972604, 12544294, -13470457, 1068881, -12499905 }, - { -9558883, -16518835, 33238498, 13506958, 30505848, -1114596, -8486907, -2630053, 12521378, 4845654 } - }, - { - { -28198521, 10744108, -2958380, 10199664, 7759311, -13088600, 3409348, -873400, -6482306, -12885870 }, - { -23561822, 6230156, -20382013, 10655314, -24040585, -11621172, 10477734, -1240216, -3113227, 13974498 }, - { 12966261, 15550616, -32038948, -1615346, 21025980, -629444, 5642325, 7188737, 18895762, 12629579 } - } -}, -{ /* 21/31 */ - { - { 14741879, -14946887, 22177208, -11721237, 1279741, 8058600, 11758140, 789443, 32195181, 3895677 }, - { 10758205, 15755439, -4509950, 9243698, -4879422, 6879879, -2204575, -3566119, -8982069, 4429647 }, - { -2453894, 15725973, -20436342, -10410672, -5803908, -11040220, -7135870, -11642895, 18047436, -15281743 } - }, - { - { -25173001, -11307165, 29759956, 11776784, -22262383, -15820455, 10993114, -12850837, -17620701, -9408468 }, - { 21987233, 700364, -24505048, 14972008, -7774265, -5718395, 32155026, 2581431, -29958985, 8773375 }, - { -25568350, 454463, -13211935, 16126715, 25240068, 8594567, 20656846, 12017935, -7874389, -13920155 } - }, - { - { 6028182, 6263078, -31011806, -11301710, -818919, 2461772, -31841174, -5468042, -1721788, -2776725 }, - { -12278994, 16624277, 987579, -5922598, 32908203, 1248608, 7719845, -4166698, 28408820, 6816612 }, - { -10358094, -8237829, 19549651, -12169222, 22082623, 16147817, 20613181, 13982702, -10339570, 5067943 } - }, - { - { -30505967, -3821767, 12074681, 13582412, -19877972, 2443951, -19719286, 12746132, 5331210, -10105944 }, - { 30528811, 3601899, -1957090, 4619785, -27361822, -15436388, 24180793, -12570394, 27679908, -1648928 }, - { 9402404, -13957065, 32834043, 10838634, -26580150, -13237195, 26653274, -8685565, 22611444, -12715406 } - }, - { - { 22190590, 1118029, 22736441, 15130463, -30460692, -5991321, 19189625, -4648942, 4854859, 6622139 }, - { -8310738, -2953450, -8262579, -3388049, -10401731, -271929, 13424426, -3567227, 26404409, 13001963 }, - { -31241838, -15415700, -2994250, 8939346, 11562230, -12840670, -26064365, -11621720, -15405155, 11020693 } - }, - { - { 1866042, -7949489, -7898649, -10301010, 12483315, 13477547, 3175636, -12424163, 28761762, 1406734 }, - { -448555, -1777666, 13018551, 3194501, -9580420, -11161737, 24760585, -4347088, 25577411, -13378680 }, - { -24290378, 4759345, -690653, -1852816, 2066747, 10693769, -29595790, 9884936, -9368926, 4745410 } - }, - { - { -9141284, 6049714, -19531061, -4341411, -31260798, 9944276, -15462008, -11311852, 10931924, -11931931 }, - { -16561513, 14112680, -8012645, 4817318, -8040464, -11414606, -22853429, 10856641, -20470770, 13434654 }, - { 22759489, -10073434, -16766264, -1871422, 13637442, -10168091, 1765144, -12654326, 28445307, -5364710 } - }, - { - { 29875063, 12493613, 2795536, -3786330, 1710620, 15181182, -10195717, -8788675, 9074234, 1167180 }, - { -26205683, 11014233, -9842651, -2635485, -26908120, 7532294, -18716888, -9535498, 3843903, 9367684 }, - { -10969595, -6403711, 9591134, 9582310, 11349256, 108879, 16235123, 8601684, -139197, 4242895 } - } -}, -{ /* 22/31 */ - { - { 22092954, -13191123, -2042793, -11968512, 32186753, -11517388, -6574341, 2470660, -27417366, 16625501 }, - { -11057722, 3042016, 13770083, -9257922, 584236, -544855, -7770857, 2602725, -27351616, 14247413 }, - { 6314175, -10264892, -32772502, 15957557, -10157730, 168750, -8618807, 14290061, 27108877, -1180880 } - }, - { - { -8586597, -7170966, 13241782, 10960156, -32991015, -13794596, 33547976, -11058889, -27148451, 981874 }, - { 22833440, 9293594, -32649448, -13618667, -9136966, 14756819, -22928859, -13970780, -10479804, -16197962 }, - { -7768587, 3326786, -28111797, 10783824, 19178761, 14905060, 22680049, 13906969, -15933690, 3797899 } - }, - { - { 21721356, -4212746, -12206123, 9310182, -3882239, -13653110, 23740224, -2709232, 20491983, -8042152 }, - { 9209270, -15135055, -13256557, -6167798, -731016, 15289673, 25947805, 15286587, 30997318, -6703063 }, - { 7392032, 16618386, 23946583, -8039892, -13265164, -1533858, -14197445, -2321576, 17649998, -250080 } - }, - { - { -9301088, -14193827, 30609526, -3049543, -25175069, -1283752, -15241566, -9525724, -2233253, 7662146 }, - { -17558673, 1763594, -33114336, 15908610, -30040870, -12174295, 7335080, -8472199, -3174674, 3440183 }, - { -19889700, -5977008, -24111293, -9688870, 10799743, -16571957, 40450, -4431835, 4862400, 1133 } - }, - { - { -32856209, -7873957, -5422389, 14860950, -16319031, 7956142, 7258061, 311861, -30594991, -7379421 }, - { -3773428, -1565936, 28985340, 7499440, 24445838, 9325937, 29727763, 16527196, 18278453, 15405622 }, - { -4381906, 8508652, -19898366, -3674424, -5984453, 15149970, -13313598, 843523, -21875062, 13626197 } - }, - { - { 2281448, -13487055, -10915418, -2609910, 1879358, 16164207, -10783882, 3953792, 13340839, 15928663 }, - { 31727126, -7179855, -18437503, -8283652, 2875793, -16390330, -25269894, -7014826, -23452306, 5964753 }, - { 4100420, -5959452, -17179337, 6017714, -18705837, 12227141, -26684835, 11344144, 2538215, -7570755 } - }, - { - { -9433605, 6123113, 11159803, -2156608, 30016280, 14966241, -20474983, 1485421, -629256, -15958862 }, - { -26804558, 4260919, 11851389, 9658551, -32017107, 16367492, -20205425, -13191288, 11659922, -11115118 }, - { 26180396, 10015009, -30844224, -8581293, 5418197, 9480663, 2231568, -10170080, 33100372, -1306171 } - }, - { - { 15121113, -5201871, -10389905, 15427821, -27509937, -15992507, 21670947, 4486675, -5931810, -14466380 }, - { 16166486, -9483733, -11104130, 6023908, -31926798, -1364923, 2340060, -16254968, -10735770, -10039824 }, - { 28042865, -3557089, -12126526, 12259706, -3717498, -6945899, 6766453, -8689599, 18036436, 5803270 } - } -}, -{ /* 23/31 */ - { - { -817581, 6763912, 11803561, 1585585, 10958447, -2671165, 23855391, 4598332, -6159431, -14117438 }, - { -31031306, -14256194, 17332029, -2383520, 31312682, -5967183, 696309, 50292, -20095739, 11763584 }, - { -594563, -2514283, -32234153, 12643980, 12650761, 14811489, 665117, -12613632, -19773211, -10713562 } - }, - { - { 30464590, -11262872, -4127476, -12734478, 19835327, -7105613, -24396175, 2075773, -17020157, 992471 }, - { 18357185, -6994433, 7766382, 16342475, -29324918, 411174, 14578841, 8080033, -11574335, -10601610 }, - { 19598397, 10334610, 12555054, 2555664, 18821899, -10339780, 21873263, 16014234, 26224780, 16452269 } - }, - { - { -30223925, 5145196, 5944548, 16385966, 3976735, 2009897, -11377804, -7618186, -20533829, 3698650 }, - { 14187449, 3448569, -10636236, -10810935, -22663880, -3433596, 7268410, -10890444, 27394301, 12015369 }, - { 19695761, 16087646, 28032085, 12999827, 6817792, 11427614, 20244189, -1312777, -13259127, -3402461 } - }, - { - { 30860103, 12735208, -1888245, -4699734, -16974906, 2256940, -8166013, 12298312, -8550524, -10393462 }, - { -5719826, -11245325, -1910649, 15569035, 26642876, -7587760, -5789354, -15118654, -4976164, 12651793 }, - { -2848395, 9953421, 11531313, -5282879, 26895123, -12697089, -13118820, -16517902, 9768698, -2533218 } - }, - { - { -24719459, 1894651, -287698, -4704085, 15348719, -8156530, 32767513, 12765450, 4940095, 10678226 }, - { 18860224, 15980149, -18987240, -1562570, -26233012, -11071856, -7843882, 13944024, -24372348, 16582019 }, - { -15504260, 4970268, -29893044, 4175593, -20993212, -2199756, -11704054, 15444560, -11003761, 7989037 } - }, - { - { 31490452, 5568061, -2412803, 2182383, -32336847, 4531686, -32078269, 6200206, -19686113, -14800171 }, - { -17308668, -15879940, -31522777, -2831, -32887382, 16375549, 8680158, -16371713, 28550068, -6857132 }, - { -28126887, -5688091, 16837845, -1820458, -6850681, 12700016, -30039981, 4364038, 1155602, 5988841 } - }, - { - { 21890435, -13272907, -12624011, 12154349, -7831873, 15300496, 23148983, -4470481, 24618407, 8283181 }, - { -33136107, -10512751, 9975416, 6841041, -31559793, 16356536, 3070187, -7025928, 1466169, 10740210 }, - { -1509399, -15488185, -13503385, -10655916, 32799044, 909394, -13938903, -5779719, -32164649, -15327040 } - }, - { - { 3960823, -14267803, -28026090, -15918051, -19404858, 13146868, 15567327, 951507, -3260321, -573935 }, - { 24740841, 5052253, -30094131, 8961361, 25877428, 6165135, -24368180, 14397372, -7380369, -6144105 }, - { -28888365, 3510803, -28103278, -1158478, -11238128, -10631454, -15441463, -14453128, -1625486, -6494814 } - } -}, -{ /* 24/31 */ - { - { 793299, -9230478, 8836302, -6235707, -27360908, -2369593, 33152843, -4885251, -9906200, -621852 }, - { 5666233, 525582, 20782575, -8038419, -24538499, 14657740, 16099374, 1468826, -6171428, -15186581 }, - { -4859255, -3779343, -2917758, -6748019, 7778750, 11688288, -30404353, -9871238, -1558923, -9863646 } - }, - { - { 10896332, -7719704, 824275, 472601, -19460308, 3009587, 25248958, 14783338, -30581476, -15757844 }, - { 10566929, 12612572, -31944212, 11118703, -12633376, 12362879, 21752402, 8822496, 24003793, 14264025 }, - { 27713862, -7355973, -11008240, 9227530, 27050101, 2504721, 23886875, -13117525, 13958495, -5732453 } - }, - { - { -23481610, 4867226, -27247128, 3900521, 29838369, -8212291, -31889399, -10041781, 7340521, -15410068 }, - { 4646514, -8011124, -22766023, -11532654, 23184553, 8566613, 31366726, -1381061, -15066784, -10375192 }, - { -17270517, 12723032, -16993061, 14878794, 21619651, -6197576, 27584817, 3093888, -8843694, 3849921 } - }, - { - { -9064912, 2103172, 25561640, -15125738, -5239824, 9582958, 32477045, -9017955, 5002294, -15550259 }, - { -12057553, -11177906, 21115585, -13365155, 8808712, -12030708, 16489530, 13378448, -25845716, 12741426 }, - { -5946367, 10645103, -30911586, 15390284, -3286982, -7118677, 24306472, 15852464, 28834118, -7646072 } - }, - { - { -17335748, -9107057, -24531279, 9434953, -8472084, -583362, -13090771, 455841, 20461858, 5491305 }, - { 13669248, -16095482, -12481974, -10203039, -14569770, -11893198, -24995986, 11293807, -28588204, -9421832 }, - { 28497928, 6272777, -33022994, 14470570, 8906179, -1225630, 18504674, -14165166, 29867745, -8795943 } - }, - { - { -16207023, 13517196, -27799630, -13697798, 24009064, -6373891, -6367600, -13175392, 22853429, -4012011 }, - { 24191378, 16712145, -13931797, 15217831, 14542237, 1646131, 18603514, -11037887, 12876623, -2112447 }, - { 17902668, 4518229, -411702, -2829247, 26878217, 5258055, -12860753, 608397, 16031844, 3723494 } - }, - { - { -28632773, 12763728, -20446446, 7577504, 33001348, -13017745, 17558842, -7872890, 23896954, -4314245 }, - { -20005381, -12011952, 31520464, 605201, 2543521, 5991821, -2945064, 7229064, -9919646, -8826859 }, - { 28816045, 298879, -28165016, -15920938, 19000928, -1665890, -12680833, -2949325, -18051778, -2082915 } - }, - { - { 16000882, -344896, 3493092, -11447198, -29504595, -13159789, 12577740, 16041268, -19715240, 7847707 }, - { 10151868, 10572098, 27312476, 7922682, 14825339, 4723128, -32855931, -6519018, -10020567, 3852848 }, - { -11430470, 15697596, -21121557, -4420647, 5386314, 15063598, 16514493, -15932110, 29330899, -15076224 } - } -}, -{ /* 25/31 */ - { - { -25499735, -4378794, -15222908, -6901211, 16615731, 2051784, 3303702, 15490, -27548796, 12314391 }, - { 15683520, -6003043, 18109120, -9980648, 15337968, -5997823, -16717435, 15921866, 16103996, -3731215 }, - { -23169824, -10781249, 13588192, -1628807, -3798557, -1074929, -19273607, 5402699, -29815713, -9841101 } - }, - { - { 23190676, 2384583, -32714340, 3462154, -29903655, -1529132, -11266856, 8911517, -25205859, 2739713 }, - { 21374101, -3554250, -33524649, 9874411, 15377179, 11831242, -33529904, 6134907, 4931255, 11987849 }, - { -7732, -2978858, -16223486, 7277597, 105524, -322051, -31480539, 13861388, -30076310, 10117930 } - }, - { - { -29501170, -10744872, -26163768, 13051539, -25625564, 5089643, -6325503, 6704079, 12890019, 15728940 }, - { -21972360, -11771379, -951059, -4418840, 14704840, 2695116, 903376, -10428139, 12885167, 8311031 }, - { -17516482, 5352194, 10384213, -13811658, 7506451, 13453191, 26423267, 4384730, 1888765, -5435404 } - }, - { - { -25817338, -3107312, -13494599, -3182506, 30896459, -13921729, -32251644, -12707869, -19464434, -3340243 }, - { -23607977, -2665774, -526091, 4651136, 5765089, 4618330, 6092245, 14845197, 17151279, -9854116 }, - { -24830458, -12733720, -15165978, 10367250, -29530908, -265356, 22825805, -7087279, -16866484, 16176525 } - }, - { - { -23583256, 6564961, 20063689, 3798228, -4740178, 7359225, 2006182, -10363426, -28746253, -10197509 }, - { -10626600, -4486402, -13320562, -5125317, 3432136, -6393229, 23632037, -1940610, 32808310, 1099883 }, - { 15030977, 5768825, -27451236, -2887299, -6427378, -15361371, -15277896, -6809350, 2051441, -15225865 } - }, - { - { -3362323, -7239372, 7517890, 9824992, 23555850, 295369, 5148398, -14154188, -22686354, 16633660 }, - { 4577086, -16752288, 13249841, -15304328, 19958763, -14537274, 18559670, -10759549, 8402478, -9864273 }, - { -28406330, -1051581, -26790155, -907698, -17212414, -11030789, 9453451, -14980072, 17983010, 9967138 } - }, - { - { -25762494, 6524722, 26585488, 9969270, 24709298, 1220360, -1677990, 7806337, 17507396, 3651560 }, - { -10420457, -4118111, 14584639, 15971087, -15768321, 8861010, 26556809, -5574557, -18553322, -11357135 }, - { 2839101, 14284142, 4029895, 3472686, 14402957, 12689363, -26642121, 8459447, -5605463, -7621941 } - }, - { - { -4839289, -3535444, 9744961, 2871048, 25113978, 3187018, -25110813, -849066, 17258084, -7977739 }, - { 18164541, -10595176, -17154882, -1542417, 19237078, -9745295, 23357533, -15217008, 26908270, 12150756 }, - { -30264870, -7647865, 5112249, -7036672, -1499807, -6974257, 43168, -5537701, -32302074, 16215819 } - } -}, -{ /* 26/31 */ - { - { -6898905, 9824394, -12304779, -4401089, -31397141, -6276835, 32574489, 12532905, -7503072, -8675347 }, - { -27343522, -16515468, -27151524, -10722951, 946346, 16291093, 254968, 7168080, 21676107, -1943028 }, - { 21260961, -8424752, -16831886, -11920822, -23677961, 3968121, -3651949, -6215466, -3556191, -7913075 } - }, - { - { 16544754, 13250366, -16804428, 15546242, -4583003, 12757258, -2462308, -8680336, -18907032, -9662799 }, - { -2415239, -15577728, 18312303, 4964443, -15272530, -12653564, 26820651, 16690659, 25459437, -4564609 }, - { -25144690, 11425020, 28423002, -11020557, -6144921, -15826224, 9142795, -2391602, -6432418, -1644817 } - }, - { - { -23104652, 6253476, 16964147, -3768872, -25113972, -12296437, -27457225, -16344658, 6335692, 7249989 }, - { -30333227, 13979675, 7503222, -12368314, -11956721, -4621693, -30272269, 2682242, 25993170, -12478523 }, - { 4364628, 5930691, 32304656, -10044554, -8054781, 15091131, 22857016, -10598955, 31820368, 15075278 } - }, - { - { 31879134, -8918693, 17258761, 90626, -8041836, -4917709, 24162788, -9650886, -17970238, 12833045 }, - { 19073683, 14851414, -24403169, -11860168, 7625278, 11091125, -19619190, 2074449, -9413939, 14905377 }, - { 24483667, -11935567, -2518866, -11547418, -1553130, 15355506, -25282080, 9253129, 27628530, -7555480 } - }, - { - { 17597607, 8340603, 19355617, 552187, 26198470, -3176583, 4593324, -9157582, -14110875, 15297016 }, - { 510886, 14337390, -31785257, 16638632, 6328095, 2713355, -20217417, -11864220, 8683221, 2921426 }, - { 18606791, 11874196, 27155355, -5281482, -24031742, 6265446, -25178240, -1278924, 4674690, 13890525 } - }, - { - { 13609624, 13069022, -27372361, -13055908, 24360586, 9592974, 14977157, 9835105, 4389687, 288396 }, - { 9922506, -519394, 13613107, 5883594, -18758345, -434263, -12304062, 8317628, 23388070, 16052080 }, - { 12720016, 11937594, -31970060, -5028689, 26900120, 8561328, -20155687, -11632979, -14754271, -10812892 } - }, - { - { 15961858, 14150409, 26716931, -665832, -22794328, 13603569, 11829573, 7467844, -28822128, 929275 }, - { 11038231, -11582396, -27310482, -7316562, -10498527, -16307831, -23479533, -9371869, -21393143, 2465074 }, - { 20017163, -4323226, 27915242, 1529148, 12396362, 15675764, 13817261, -9658066, 2463391, -4622140 } - }, - { - { -16358878, -12663911, -12065183, 4996454, -1256422, 1073572, 9583558, 12851107, 4003896, 12673717 }, - { -1731589, -15155870, -3262930, 16143082, 19294135, 13385325, 14741514, -9103726, 7903886, 2348101 }, - { 24536016, -16515207, 12715592, -3862155, 1511293, 10047386, -3842346, -7129159, -28377538, 10048127 } - } -}, -{ /* 27/31 */ - { - { -12622226, -6204820, 30718825, 2591312, -10617028, 12192840, 18873298, -7297090, -32297756, 15221632 }, - { -26478122, -11103864, 11546244, -1852483, 9180880, 7656409, -21343950, 2095755, 29769758, 6593415 }, - { -31994208, -2907461, 4176912, 3264766, 12538965, -868111, 26312345, -6118678, 30958054, 8292160 } - }, - { - { 31429822, -13959116, 29173532, 15632448, 12174511, -2760094, 32808831, 3977186, 26143136, -3148876 }, - { 22648901, 1402143, -22799984, 13746059, 7936347, 365344, -8668633, -1674433, -3758243, -2304625 }, - { -15491917, 8012313, -2514730, -12702462, -23965846, -10254029, -1612713, -1535569, -16664475, 8194478 } - }, - { - { 27338066, -7507420, -7414224, 10140405, -19026427, -6589889, 27277191, 8855376, 28572286, 3005164 }, - { 26287124, 4821776, 25476601, -4145903, -3764513, -15788984, -18008582, 1182479, -26094821, -13079595 }, - { -7171154, 3178080, 23970071, 6201893, -17195577, -4489192, -21876275, -13982627, 32208683, -1198248 } - }, - { - { -16657702, 2817643, -10286362, 14811298, 6024667, 13349505, -27315504, -10497842, -27672585, -11539858 }, - { 15941029, -9405932, -21367050, 8062055, 31876073, -238629, -15278393, -1444429, 15397331, -4130193 }, - { 8934485, -13485467, -23286397, -13423241, -32446090, 14047986, 31170398, -1441021, -27505566, 15087184 } - }, - { - { -18357243, -2156491, 24524913, -16677868, 15520427, -6360776, -15502406, 11461896, 16788528, -5868942 }, - { -1947386, 16013773, 21750665, 3714552, -17401782, -16055433, -3770287, -10323320, 31322514, -11615635 }, - { 21426655, -5650218, -13648287, -5347537, -28812189, -4920970, -18275391, -14621414, 13040862, -12112948 } - }, - { - { 11293895, 12478086, -27136401, 15083750, -29307421, 14748872, 14555558, -13417103, 1613711, 4896935 }, - { -25894883, 15323294, -8489791, -8057900, 25967126, -13425460, 2825960, -4897045, -23971776, -11267415 }, - { -15924766, -5229880, -17443532, 6410664, 3622847, 10243618, 20615400, 12405433, -23753030, -8436416 } - }, - { - { -7091295, 12556208, -20191352, 9025187, -17072479, 4333801, 4378436, 2432030, 23097949, -566018 }, - { 4565804, -16025654, 20084412, -7842817, 1724999, 189254, 24767264, 10103221, -18512313, 2424778 }, - { 366633, -11976806, 8173090, -6890119, 30788634, 5745705, -7168678, 1344109, -3642553, 12412659 } - }, - { - { -24001791, 7690286, 14929416, -168257, -32210835, -13412986, 24162697, -15326504, -3141501, 11179385 }, - { 18289522, -14724954, 8056945, 16430056, -21729724, 7842514, -6001441, -1486897, -18684645, -11443503 }, - { 476239, 6601091, -6152790, -9723375, 17503545, -4863900, 27672959, 13403813, 11052904, 5219329 } - } -}, -{ /* 28/31 */ - { - { 20678546, -8375738, -32671898, 8849123, -5009758, 14574752, 31186971, -3973730, 9014762, -8579056 }, - { -13644050, -10350239, -15962508, 5075808, -1514661, -11534600, -33102500, 9160280, 8473550, -3256838 }, - { 24900749, 14435722, 17209120, -15292541, -22592275, 9878983, -7689309, -16335821, -24568481, 11788948 } - }, - { - { -3118155, -11395194, -13802089, 14797441, 9652448, -6845904, -20037437, 10410733, -24568470, -1458691 }, - { -15659161, 16736706, -22467150, 10215878, -9097177, 7563911, 11871841, -12505194, -18513325, 8464118 }, - { -23400612, 8348507, -14585951, -861714, -3950205, -6373419, 14325289, 8628612, 33313881, -8370517 } - }, - { - { -20186973, -4967935, 22367356, 5271547, -1097117, -4788838, -24805667, -10236854, -8940735, -5818269 }, - { -6948785, -1795212, -32625683, -16021179, 32635414, -7374245, 15989197, -12838188, 28358192, -4253904 }, - { -23561781, -2799059, -32351682, -1661963, -9147719, 10429267, -16637684, 4072016, -5351664, 5596589 } - }, - { - { -28236598, -3390048, 12312896, 6213178, 3117142, 16078565, 29266239, 2557221, 1768301, 15373193 }, - { -7243358, -3246960, -4593467, -7553353, -127927, -912245, -1090902, -4504991, -24660491, 3442910 }, - { -30210571, 5124043, 14181784, 8197961, 18964734, -11939093, 22597931, 7176455, -18585478, 13365930 } - }, - { - { -7877390, -1499958, 8324673, 4690079, 6261860, 890446, 24538107, -8570186, -9689599, -3031667 }, - { 25008904, -10771599, -4305031, -9638010, 16265036, 15721635, 683793, -11823784, 15723479, -15163481 }, - { -9660625, 12374379, -27006999, -7026148, -7724114, -12314514, 11879682, 5400171, 519526, -1235876 } - }, - { - { 22258397, -16332233, -7869817, 14613016, -22520255, -2950923, -20353881, 7315967, 16648397, 7605640 }, - { -8081308, -8464597, -8223311, 9719710, 19259459, -15348212, 23994942, -5281555, -9468848, 4763278 }, - { -21699244, 9220969, -15730624, 1084137, -25476107, -2852390, 31088447, -7764523, -11356529, 728112 } - }, - { - { 26047220, -11751471, -6900323, -16521798, 24092068, 9158119, -4273545, -12555558, -29365436, -5498272 }, - { 17510331, -322857, 5854289, 8403524, 17133918, -3112612, -28111007, 12327945, 10750447, 10014012 }, - { -10312768, 3936952, 9156313, -8897683, 16498692, -994647, -27481051, -666732, 3424691, 7540221 } - }, - { - { 30322361, -6964110, 11361005, -4143317, 7433304, 4989748, -7071422, -16317219, -9244265, 15258046 }, - { 13054562, -2779497, 19155474, 469045, -12482797, 4566042, 5631406, 2711395, 1062915, -5136345 }, - { -19240248, -11254599, -29509029, -7499965, -5835763, 13005411, -6066489, 12194497, 32960380, 1459310 } - } -}, -{ /* 29/31 */ - { - { 19852034, 7027924, 23669353, 10020366, 8586503, -6657907, 394197, -6101885, 18638003, -11174937 }, - { 31395534, 15098109, 26581030, 8030562, -16527914, -5007134, 9012486, -7584354, -6643087, -5442636 }, - { -9192165, -2347377, -1997099, 4529534, 25766844, 607986, -13222, 9677543, -32294889, -6456008 } - }, - { - { -2444496, -149937, 29348902, 8186665, 1873760, 12489863, -30934579, -7839692, -7852844, -8138429 }, - { -15236356, -15433509, 7766470, 746860, 26346930, -10221762, -27333451, 10754588, -9431476, 5203576 }, - { 31834314, 14135496, -770007, 5159118, 20917671, -16768096, -7467973, -7337524, 31809243, 7347066 } - }, - { - { -9606723, -11874240, 20414459, 13033986, 13716524, -11691881, 19797970, -12211255, 15192876, -2087490 }, - { -12663563, -2181719, 1168162, -3804809, 26747877, -14138091, 10609330, 12694420, 33473243, -13382104 }, - { 33184999, 11180355, 15832085, -11385430, -1633671, 225884, 15089336, -11023903, -6135662, 14480053 } - }, - { - { 31308717, -5619998, 31030840, -1897099, 15674547, -6582883, 5496208, 13685227, 27595050, 8737275 }, - { -20318852, -15150239, 10933843, -16178022, 8335352, -7546022, -31008351, -12610604, 26498114, 66511 }, - { 22644454, -8761729, -16671776, 4884562, -3105614, -13559366, 30540766, -4286747, -13327787, -7515095 } - }, - { - { -28017847, 9834845, 18617207, -2681312, -3401956, -13307506, 8205540, 13585437, -17127465, 15115439 }, - { 23711543, -672915, 31206561, -8362711, 6164647, -9709987, -33535882, -1426096, 8236921, 16492939 }, - { -23910559, -13515526, -26299483, -4503841, 25005590, -7687270, 19574902, 10071562, 6708380, -6222424 } - }, - { - { 2101391, -4930054, 19702731, 2367575, -15427167, 1047675, 5301017, 9328700, 29955601, -11678310 }, - { 3096359, 9271816, -21620864, -15521844, -14847996, -7592937, -25892142, -12635595, -9917575, 6216608 }, - { -32615849, 338663, -25195611, 2510422, -29213566, -13820213, 24822830, -6146567, -26767480, 7525079 } - }, - { - { -23066649, -13985623, 16133487, -7896178, -3389565, 778788, -910336, -2782495, -19386633, 11994101 }, - { 21691500, -13624626, -641331, -14367021, 3285881, -3483596, -25064666, 9718258, -7477437, 13381418 }, - { 18445390, -4202236, 14979846, 11622458, -1727110, -3582980, 23111648, -6375247, 28535282, 15779576 } - }, - { - { 30098053, 3089662, -9234387, 16662135, -21306940, 11308411, -14068454, 12021730, 9955285, -16303356 }, - { 9734894, -14576830, -7473633, -9138735, 2060392, 11313496, -18426029, 9924399, 20194861, 13380996 }, - { -26378102, -7965207, -22167821, 15789297, -18055342, -6168792, -1984914, 15707771, 26342023, 10146099 } - } -}, -{ /* 30/31 */ - { - { -26016874, -219943, 21339191, -41388, 19745256, -2878700, -29637280, 2227040, 21612326, -545728 }, - { -13077387, 1184228, 23562814, -5970442, -20351244, -6348714, 25764461, 12243797, -20856566, 11649658 }, - { -10031494, 11262626, 27384172, 2271902, 26947504, -15997771, 39944, 6114064, 33514190, 2333242 } - }, - { - { -21433588, -12421821, 8119782, 7219913, -21830522, -9016134, -6679750, -12670638, 24350578, -13450001 }, - { -4116307, -11271533, -23886186, 4843615, -30088339, 690623, -31536088, -10406836, 8317860, 12352766 }, - { 18200138, -14475911, -33087759, -2696619, -23702521, -9102511, -23552096, -2287550, 20712163, 6719373 } - }, - { - { 26656208, 6075253, -7858556, 1886072, -28344043, 4262326, 11117530, -3763210, 26224235, -3297458 }, - { -17168938, -14854097, -3395676, -16369877, -19954045, 14050420, 21728352, 9493610, 18620611, -16428628 }, - { -13323321, 13325349, 11432106, 5964811, 18609221, 6062965, -5269471, -9725556, -30701573, -16479657 } - }, - { - { -23860538, -11233159, 26961357, 1640861, -32413112, -16737940, 12248509, -5240639, 13735342, 1934062 }, - { 25089769, 6742589, 17081145, -13406266, 21909293, -16067981, -15136294, -3765346, -21277997, 5473616 }, - { 31883677, -7961101, 1083432, -11572403, 22828471, 13290673, -7125085, 12469656, 29111212, -5451014 } - }, - { - { 24244947, -15050407, -26262976, 2791540, -14997599, 16666678, 24367466, 6388839, -10295587, 452383 }, - { -25640782, -3417841, 5217916, 16224624, 19987036, -4082269, -24236251, -5915248, 15766062, 8407814 }, - { -20406999, 13990231, 15495425, 16395525, 5377168, 15166495, -8917023, -4388953, -8067909, 2276718 } - }, - { - { 30157918, 12924066, -17712050, 9245753, 19895028, 3368142, -23827587, 5096219, 22740376, -7303417 }, - { 2041139, -14256350, 7783687, 13876377, -25946985, -13352459, 24051124, 13742383, -15637599, 13295222 }, - { 33338237, -8505733, 12532113, 7977527, 9106186, -1715251, -17720195, -4612972, -4451357, -14669444 } - }, - { - { -20045281, 5454097, -14346548, 6447146, 28862071, 1883651, -2469266, -4141880, 7770569, 9620597 }, - { 23208068, 7979712, 33071466, 8149229, 1758231, -10834995, 30945528, -1694323, -33502340, -14767970 }, - { 1439958, -16270480, -1079989, -793782, 4625402, 10647766, -5043801, 1220118, 30494170, -11440799 } - }, - { - { -5037580, -13028295, -2970559, -3061767, 15640974, -6701666, -26739026, 926050, -1684339, -13333647 }, - { 13908495, -3549272, 30919928, -6273825, -21521863, 7989039, 9021034, 9078865, 3353509, 4033511 }, - { -29663431, -15113610, 32259991, -344482, 24295849, -12912123, 23161163, 8839127, 27485041, 7356032 } - } -}, +{/* 0/31 */ + {{25967493, -14356035, 29566456, 3660896, -12694345, 4014787, 27544626, + -11754271, -6079156, 2047605}, + {-12545711, 934262, -2722910, 3049990, -727428, 9406986, 12720692, 5043384, + 19500929, -15469378}, + {-8738181, 4489570, 9688441, -14785194, 10184609, -12363380, 29287919, + 11864899, -24514362, -4438546}}, + {{-12815894, -12976347, -21581243, 11784320, -25355658, -2750717, -11717903, + -3814571, -358445, -10211303}, + {-21703237, 6903825, 27185491, 6451973, -29577724, -9554005, -15616551, + 11189268, -26829678, -5319081}, + {26966642, 11152617, 32442495, 15396054, 14353839, -12752335, -3128826, + -9541118, -15472047, -4166697}}, + {{15636291, -9688557, 24204773, -7912398, 616977, -16685262, 27787600, + -14772189, 28944400, -1550024}, + {16568933, 4717097, -11556148, -1102322, 15682896, -11807043, 16354577, + -11775962, 7689662, 11199574}, + {30464156, -5976125, -11779434, -15670865, 23220365, 15915852, 7512774, + 10017326, -17749093, -9920357}}, + {{-17036878, 13921892, 10945806, -6033431, 27105052, -16084379, -28926210, + 15006023, 3284568, -6276540}, + {23599295, -8306047, -11193664, -7687416, 13236774, 10506355, 7464579, + 9656445, 13059162, 10374397}, + {7798556, 16710257, 3033922, 2874086, 28997861, 2835604, 32406664, -3839045, + -641708, -101325}}, + {{10861363, 11473154, 27284546, 1981175, -30064349, 12577861, 32867885, + 14515107, -15438304, 10819380}, + {4708026, 6336745, 20377586, 9066809, -11272109, 6594696, -25653668, 12483688, + -12668491, 5581306}, + {19563160, 16186464, -29386857, 4097519, 10237984, -4348115, 28542350, + 13850243, -23678021, -15815942}}, + {{-15371964, -12862754, 32573250, 4720197, -26436522, 5875511, -19188627, + -15224819, -9818940, -12085777}, + {-8549212, 109983, 15149363, 2178705, 22900618, 4543417, 3044240, -15689887, + 1762328, 14866737}, + {-18199695, -15951423, -10473290, 1707278, -17185920, 3916101, -28236412, + 3959421, 27914454, 4383652}}, + {{5153746, 9909285, 1723747, -2777874, 30523605, 5516873, 19480852, 5230134, + -23952439, -15175766}, + {-30269007, -3463509, 7665486, 10083793, 28475525, 1649722, 20654025, + 16520125, 30598449, 7715701}, + {28881845, 14381568, 9657904, 3680757, -20181635, 7843316, -31400660, 1370708, + 29794553, -1409300}}, + {{14499471, -2729599, -33191113, -4254652, 28494862, 14271267, 30290735, + 10876454, -33154098, 2381726}, + {-7195431, -2655363, -14730155, 462251, -27724326, 3941372, -6236617, 3696005, + -32300832, 15351955}, + {27431194, 8222322, 16448760, -3907995, -18707002, 11938355, -32961401, + -2970515, 29551813, 10109425}}}, + {/* 1/31 */ + {{-13657040, -13155431, -31283750, 11777098, 21447386, 6519384, -2378284, + -1627556, 10092783, -4764171}, + {27939166, 14210322, 4677035, 16277044, -22964462, -12398139, -32508754, + 12005538, -17810127, 12803510}, + {17228999, -15661624, -1233527, 300140, -1224870, -11714777, 30364213, + -9038194, 18016357, 4397660}}, + {{-10958843, -7690207, 4776341, -14954238, 27850028, -15602212, -26619106, + 14544525, -17477504, 982639}, + {29253598, 15796703, -2863982, -9908884, 10057023, 3163536, 7332899, + -4120128, -21047696, 9934963}, + {5793303, 16271923, -24131614, -10116404, 29188560, 1206517, -14747930, + 4559895, -30123922, -10897950}}, + {{-27643952, -11493006, 16282657, -11036493, 28414021, -15012264, 24191034, + 4541697, -13338309, 5500568}, + {12650548, -1497113, 9052871, 11355358, -17680037, -8400164, -17430592, + 12264343, 10874051, 13524335}, + {25556948, -3045990, 714651, 2510400, 23394682, -10415330, 33119038, + 5080568, -22528059, 5376628}}, + {{-26088264, -4011052, -17013699, -3537628, -6726793, 1920897, -22321305, + -9447443, 4535768, 1569007}, + {-2255422, 14606630, -21692440, -8039818, 28430649, 8775819, -30494562, + 3044290, 31848280, 12543772}, + {-22028579, 2943893, -31857513, 6777306, 13784462, -4292203, -27377195, + -2062731, 7718482, 14474653}}, + {{2385315, 2454213, -22631320, 46603, -4437935, -15680415, 656965, + -7236665, 24316168, -5253567}, + {13741529, 10911568, -33233417, -8603737, -20177830, -1033297, 33040651, + -13424532, -20729456, 8321686}, + {21060490, -2212744, 15712757, -4336099, 1639040, 10656336, 23845965, + -11874838, -9984458, 608372}}, + {{-13672732, -15087586, -10889693, -7557059, -6036909, 11305547, 1123968, + -6780577, 27229399, 23887}, + {-23244140, -294205, -11744728, 14712571, -29465699, -2029617, 12797024, + -6440308, -1633405, 16678954}, + {-29500620, 4770662, -16054387, 14001338, 7830047, 9564805, -1508144, + -4795045, -17169265, 4904953}}, + {{24059557, 14617003, 19037157, -15039908, 19766093, -14906429, 5169211, + 16191880, 2128236, -4326833}, + {-16981152, 4124966, -8540610, -10653797, 30336522, -14105247, -29806336, + 916033, -6882542, -2986532}, + {-22630907, 12419372, -7134229, -7473371, -16478904, 16739175, 285431, + 2763829, 15736322, 4143876}}, + {{2379352, 11839345, -4110402, -5988665, 11274298, 794957, 212801, + -14594663, 23527084, -16458268}, + {33431127, -11130478, -17838966, -15626900, 8909499, 8376530, -32625340, + 4087881, -15188911, -14416214}, + {1767683, 7197987, -13205226, -2022635, -13091350, 448826, 5799055, + 4357868, -4774191, -16323038}}}, + {/* 2/31 */ + {{6721966, 13833823, -23523388, -1551314, 26354293, -11863321, 23365147, + -3949732, 7390890, 2759800}, + {4409041, 2052381, 23373853, 10530217, 7676779, -12885954, 21302353, + -4264057, 1244380, -12919645}, + {-4421239, 7169619, 4982368, -2957590, 30256825, -2777540, 14086413, + 9208236, 15886429, 16489664}}, + {{1996075, 10375649, 14346367, 13311202, -6874135, -16438411, -13693198, + 398369, -30606455, -712933}, + {-25307465, 9795880, -2777414, 14878809, -33531835, 14780363, 13348553, + 12076947, -30836462, 5113182}, + {-17770784, 11797796, 31950843, 13929123, -25888302, 12288344, -30341101, + -7336386, 13847711, 5387222}}, + {{-18582163, -3416217, 17824843, -2340966, 22744343, -10442611, 8763061, + 3617786, -19600662, 10370991}, + {20246567, -14369378, 22358229, -543712, 18507283, -10413996, 14554437, + -8746092, 32232924, 16763880}, + {9648505, 10094563, 26416693, 14745928, -30374318, -6472621, 11094161, + 15689506, 3140038, -16510092}}, + {{-16160072, 5472695, 31895588, 4744994, 8823515, 10365685, -27224800, + 9448613, -28774454, 366295}, + {19153450, 11523972, -11096490, -6503142, -24647631, 5420647, 28344573, + 8041113, 719605, 11671788}, + {8678025, 2694440, -6808014, 2517372, 4964326, 11152271, -15432916, + -15266516, 27000813, -10195553}}, + {{-15157904, 7134312, 8639287, -2814877, -7235688, 10421742, 564065, + 5336097, 6750977, -14521026}, + {11836410, -3979488, 26297894, 16080799, 23455045, 15735944, 1695823, + -8819122, 8169720, 16220347}, + {-18115838, 8653647, 17578566, -6092619, -8025777, -16012763, -11144307, + -2627664, -5990708, -14166033}}, + {{-23308498, -10968312, 15213228, -10081214, -30853605, -11050004, + 27884329, 2847284, 2655861, 1738395}, + {-27537433, -14253021, -25336301, -8002780, -9370762, 8129821, 21651608, + -3239336, -19087449, -11005278}, + {1533110, 3437855, 23735889, 459276, 29970501, 11335377, 26030092, + 5821408, 10478196, 8544890}}, + {{32173121, -16129311, 24896207, 3921497, 22579056, -3410854, 19270449, + 12217473, 17789017, -3395995}, + {-30552961, -2228401, -15578829, -10147201, 13243889, 517024, 15479401, + -3853233, 30460520, 1052596}, + {-11614875, 13323618, 32618793, 8175907, -15230173, 12596687, 27491595, + -4612359, 3179268, -9478891}}, + {{31947069, -14366651, -4640583, -15339921, -15125977, -6039709, -14756777, + -16411740, 19072640, -9511060}, + {11685058, 11822410, 3158003, -13952594, 33402194, -4165066, 5977896, + -5215017, 473099, 5040608}, + {-20290863, 8198642, -27410132, 11602123, 1290375, -2799760, 28326862, + 1721092, -19558642, -3131606}}}, + {/* 3/31 */ + {{7881532, 10687937, 7578723, 7738378, -18951012, -2553952, 21820786, + 8076149, -27868496, 11538389}, + {-19935666, 3899861, 18283497, -6801568, -15728660, -11249211, 8754525, + 7446702, -5676054, 5797016}, + {-11295600, -3793569, -15782110, -7964573, 12708869, -8456199, 2014099, + -9050574, -2369172, -5877341}}, + {{-22472376, -11568741, -27682020, 1146375, 18956691, 16640559, 1192730, + -3714199, 15123619, 10811505}, + {14352098, -3419715, -18942044, 10822655, 32750596, 4699007, -70363, + 15776356, -28886779, -11974553}, + {-28241164, -8072475, -4978962, -5315317, 29416931, 1847569, -20654173, + -16484855, 4714547, -9600655}}, + {{15200332, 8368572, 19679101, 15970074, -31872674, 1959451, 24611599, + -4543832, -11745876, 12340220}, + {12876937, -10480056, 33134381, 6590940, -6307776, 14872440, 9613953, + 8241152, 15370987, 9608631}, + {-4143277, -12014408, 8446281, -391603, 4407738, 13629032, -7724868, + 15866074, -28210621, -8814099}}, + {{26660628, -15677655, 8393734, 358047, -7401291, 992988, -23904233, + 858697, 20571223, 8420556}, + {14620715, 13067227, -15447274, 8264467, 14106269, 15080814, 33531827, + 12516406, -21574435, -12476749}, + {236881, 10476226, 57258, -14677024, 6472998, 2466984, 17258519, 7256740, + 8791136, 15069930}}, + {{1276410, -9371918, 22949635, -16322807, -23493039, -5702186, 14711875, + 4874229, -30663140, -2331391}, + {5855666, 4990204, -13711848, 7294284, -7804282, 1924647, -1423175, + -7912378, -33069337, 9234253}, + {20590503, -9018988, 31529744, -7352666, -2706834, 10650548, 31559055, + -11609587, 18979186, 13396066}}, + {{24474287, 4968103, 22267082, 4407354, 24063882, -8325180, -18816887, + 13594782, 33514650, 7021958}, + {-11566906, -6565505, -21365085, 15928892, -26158305, 4315421, -25948728, + -3916677, -21480480, 12868082}, + {-28635013, 13504661, 19988037, -2132761, 21078225, 6443208, -21446107, + 2244500, -12455797, -8089383}}, + {{-30595528, 13793479, -5852820, 319136, -25723172, -6263899, 33086546, + 8957937, -15233648, 5540521}, + {-11630176, -11503902, -8119500, -7643073, 2620056, 1022908, -23710744, + -1568984, -16128528, -14962807}, + {23152971, 775386, 27395463, 14006635, -9701118, 4649512, 1689819, 892185, + -11513277, -15205948}}, + {{9770129, 9586738, 26496094, 4324120, 1556511, -3550024, 27453819, + 4763127, -19179614, 5867134}, + {-32765025, 1927590, 31726409, -4753295, 23962434, -16019500, 27846559, + 5931263, -29749703, -16108455}, + {27461885, -2977536, 22380810, 1815854, -23033753, -3031938, 7283490, + -15148073, -19526700, 7734629}}}, + {/* 4/31 */ + {{-8010264, -9590817, -11120403, 6196038, 29344158, -13430885, 7585295, + -3176626, 18549497, 15302069}, + {-32658337, -6171222, -7672793, -11051681, 6258878, 13504381, 10458790, + -6418461, -8872242, 8424746}, + {24687205, 8613276, -30667046, -3233545, 1863892, -1830544, 19206234, + 7134917, -11284482, -828919}}, + {{11334899, -9218022, 8025293, 12707519, 17523892, -10476071, 10243738, + -14685461, -5066034, 16498837}, + {8911542, 6887158, -9584260, -6958590, 11145641, -9543680, 17303925, + -14124238, 6536641, 10543906}, + {-28946384, 15479763, -17466835, 568876, -1497683, 11223454, -2669190, + -16625574, -27235709, 8876771}}, + {{-25742899, -12566864, -15649966, -846607, -33026686, -796288, -33481822, + 15824474, -604426, -9039817}, + {10330056, 70051, 7957388, -9002667, 9764902, 15609756, 27698697, + -4890037, 1657394, 3084098}, + {10477963, -7470260, 12119566, -13250805, 29016247, -5365589, 31280319, + 14396151, -30233575, 15272409}}, + {{-12288309, 3169463, 28813183, 16658753, 25116432, -5630466, -25173957, + -12636138, -25014757, 1950504}, + {-26180358, 9489187, 11053416, -14746161, -31053720, 5825630, -8384306, + -8767532, 15341279, 8373727}, + {28685821, 7759505, -14378516, -12002860, -31971820, 4079242, 298136, + -10232602, -2878207, 15190420}}, + {{-32932876, 13806336, -14337485, -15794431, -24004620, 10940928, 8669718, + 2742393, -26033313, -6875003}, + {-1580388, -11729417, -25979658, -11445023, -17411874, -10912854, 9291594, + -16247779, -12154742, 6048605}, + {-30305315, 14843444, 1539301, 11864366, 20201677, 1900163, 13934231, + 5128323, 11213262, 9168384}}, + {{-26280513, 11007847, 19408960, -940758, -18592965, -4328580, -5088060, + -11105150, 20470157, -16398701}, + {-23136053, 9282192, 14855179, -15390078, -7362815, -14408560, -22783952, + 14461608, 14042978, 5230683}, + {29969567, -2741594, -16711867, -8552442, 9175486, -2468974, 21556951, + 3506042, -5933891, -12449708}}, + {{-3144746, 8744661, 19704003, 4581278, -20430686, 6830683, -21284170, + 8971513, -28539189, 15326563}, + {-19464629, 10110288, -17262528, -3503892, -23500387, 1355669, -15523050, + 15300988, -20514118, 9168260}, + {-5353335, 4488613, -23803248, 16314347, 7780487, -15638939, -28948358, + 9601605, 33087103, -9011387}}, + {{-19443170, -15512900, -20797467, -12445323, -29824447, 10229461, + -27444329, -15000531, -5996870, 15664672}, + {23294591, -16632613, -22650781, -8470978, 27844204, 11461195, 13099750, + -2460356, 18151676, 13417686}, + {-24722913, -4176517, -31150679, 5988919, -26858785, 6685065, 1661597, + -12551441, 15271676, -15452665}}}, + {/* 5/31 */ + {{11433042, -13228665, 8239631, -5279517, -1985436, -725718, -18698764, + 2167544, -6921301, -13440182}, + {-31436171, 15575146, 30436815, 12192228, -22463353, 9395379, -9917708, + -8638997, 12215110, 12028277}, + {14098400, 6555944, 23007258, 5757252, -15427832, -12950502, 30123440, + 4617780, -16900089, -655628}}, + {{-4026201, -15240835, 11893168, 13718664, -14809462, 1847385, -15819999, + 10154009, 23973261, -12684474}, + {-26531820, -3695990, -1908898, 2534301, -31870557, -16550355, 18341390, + -11419951, 32013174, -10103539}, + {-25479301, 10876443, -11771086, -14625140, -12369567, 1838104, 21911214, + 6354752, 4425632, -837822}}, + {{-10433389, -14612966, 22229858, -3091047, -13191166, 776729, -17415375, + -12020462, 4725005, 14044970}, + {19268650, -7304421, 1555349, 8692754, -21474059, -9910664, 6347390, + -1411784, -19522291, -16109756}, + {-24864089, 12986008, -10898878, -5558584, -11312371, -148526, 19541418, + 8180106, 9282262, 10282508}}, + {{-26205082, 4428547, -8661196, -13194263, 4098402, -14165257, 15522535, + 8372215, 5542595, -10702683}, + {-10562541, 14895633, 26814552, -16673850, -17480754, -2489360, -2781891, + 6993761, -18093885, 10114655}, + {-20107055, -929418, 31422704, 10427861, -7110749, 6150669, -29091755, + -11529146, 25953725, -106158}}, + {{-4234397, -8039292, -9119125, 3046000, 2101609, -12607294, 19390020, + 6094296, -3315279, 12831125}, + {-15998678, 7578152, 5310217, 14408357, -33548620, -224739, 31575954, + 6326196, 7381791, -2421839}, + {-20902779, 3296811, 24736065, -16328389, 18374254, 7318640, 6295303, + 8082724, -15362489, 12339664}}, + {{27724736, 2291157, 6088201, -14184798, 1792727, 5857634, 13848414, + 15768922, 25091167, 14856294}, + {-18866652, 8331043, 24373479, 8541013, -701998, -9269457, 12927300, + -12695493, -22182473, -9012899}, + {-11423429, -5421590, 11632845, 3405020, 30536730, -11674039, -27260765, + 13866390, 30146206, 9142070}}, + {{3924129, -15307516, -13817122, -10054960, 12291820, -668366, -27702774, + 9326384, -8237858, 4171294}, + {-15921940, 16037937, 6713787, 16606682, -21612135, 2790944, 26396185, + 3731949, 345228, -5462949}, + {-21327538, 13448259, 25284571, 1143661, 20614966, -8849387, 2031539, + -12391231, -16253183, -13582083}}, + {{31016211, -16722429, 26371392, -14451233, -5027349, 14854137, 17477601, + 3842657, 28012650, -16405420}, + {-5075835, 9368966, -8562079, -4600902, -15249953, 6970560, -9189873, + 16292057, -8867157, 3507940}, + {29439664, 3537914, 23333589, 6997794, -17555561, -11018068, -15209202, + -15051267, -9164929, 6580396}}}, + {/* 6/31 */ + {{-12185861, -7679788, 16438269, 10826160, -8696817, -6235611, 17860444, + -9273846, -2095802, 9304567}, + {20714564, -4336911, 29088195, 7406487, 11426967, -5095705, 14792667, + -14608617, 5289421, -477127}, + {-16665533, -10650790, -6160345, -13305760, 9192020, -1802462, 17271490, + 12349094, 26939669, -3752294}}, + {{-12889898, 9373458, 31595848, 16374215, 21471720, 13221525, -27283495, + -12348559, -3698806, 117887}, + {22263325, -6560050, 3984570, -11174646, -15114008, -566785, 28311253, + 5358056, -23319780, 541964}, + {16259219, 3261970, 2309254, -15534474, -16885711, -4581916, 24134070, + -16705829, -13337066, -13552195}}, + {{9378160, -13140186, -22845982, -12745264, 28198281, -7244098, -2399684, + -717351, 690426, 14876244}, + {24977353, -314384, -8223969, -13465086, 28432343, -1176353, -13068804, + -12297348, -22380984, 6618999}, + {-1538174, 11685646, 12944378, 13682314, -24389511, -14413193, 8044829, + -13817328, 32239829, -5652762}}, + {{-18603066, 4762990, -926250, 8885304, -28412480, -3187315, 9781647, + -10350059, 32779359, 5095274}, + {-33008130, -5214506, -32264887, -3685216, 9460461, -9327423, -24601656, + 14506724, 21639561, -2630236}, + {-16400943, -13112215, 25239338, 15531969, 3987758, -4499318, -1289502, + -6863535, 17874574, 558605}}, + {{-13600129, 10240081, 9171883, 16131053, -20869254, 9599700, 33499487, + 5080151, 2085892, 5119761}, + {-22205145, -2519528, -16381601, 414691, -25019550, 2170430, 30634760, + -8363614, -31999993, -5759884}, + {-6845704, 15791202, 8550074, -1312654, 29928809, -12092256, 27534430, + -7192145, -22351378, 12961482}}, + {{-24492060, -9570771, 10368194, 11582341, -23397293, -2245287, 16533930, + 8206996, -30194652, -5159638}, + {-11121496, -3382234, 2307366, 6362031, -135455, 8868177, -16835630, + 7031275, 7589640, 8945490}, + {-32152748, 8917967, 6661220, -11677616, -1192060, -15793393, 7251489, + -11182180, 24099109, -14456170}}, + {{5019558, -7907470, 4244127, -14714356, -26933272, 6453165, -19118182, + -13289025, -6231896, -10280736}, + {10853594, 10721687, 26480089, 5861829, -22995819, 1972175, -1866647, + -10557898, -3363451, -6441124}, + {-17002408, 5906790, 221599, -6563147, 7828208, -13248918, 24362661, + -2008168, -13866408, 7421392}}, + {{8139927, -6546497, 32257646, -5890546, 30375719, 1886181, -21175108, + 15441252, 28826358, -4123029}, + {6267086, 9695052, 7709135, -16603597, -32869068, -1886135, 14795160, + -7840124, 13746021, -1742048}, + {28584902, 7787108, -6732942, -15050729, 22846041, -7571236, -3181936, + -363524, 4771362, -8419958}}}, + {/* 7/31 */ + {{24949256, 6376279, -27466481, -8174608, -18646154, -9930606, 33543569, + -12141695, 3569627, 11342593}, + {26514989, 4740088, 27912651, 3697550, 19331575, -11472339, 6809886, + 4608608, 7325975, -14801071}, + {-11618399, -14554430, -24321212, 7655128, -1369274, 5214312, -27400540, + 10258390, -17646694, -8186692}}, + {{11431204, 15823007, 26570245, 14329124, 18029990, 4796082, -31446179, + 15580664, 9280358, -3973687}, + {-160783, -10326257, -22855316, -4304997, -20861367, -13621002, -32810901, + -11181622, -15545091, 4387441}, + {-20799378, 12194512, 3937617, -5805892, -27154820, 9340370, -24513992, + 8548137, 20617071, -7482001}}, + {{-938825, -3930586, -8714311, 16124718, 24603125, -6225393, -13775352, + -11875822, 24345683, 10325460}, + {-19855277, -1568885, -22202708, 8714034, 14007766, 6928528, 16318175, + -1010689, 4766743, 3552007}, + {-21751364, -16730916, 1351763, -803421, -4009670, 3950935, 3217514, + 14481909, 10988822, -3994762}}, + {{15564307, -14311570, 3101243, 5684148, 30446780, -8051356, 12677127, + -6505343, -8295852, 13296005}, + {-9442290, 6624296, -30298964, -11913677, -4670981, -2057379, 31521204, + 9614054, -30000824, 12074674}, + {4771191, -135239, 14290749, -13089852, 27992298, 14998318, -1413936, + -1556716, 29832613, -16391035}}, + {{7064884, -7541174, -19161962, -5067537, -18891269, -2912736, 25825242, + 5293297, -27122660, 13101590}, + {-2298563, 2439670, -7466610, 1719965, -27267541, -16328445, 32512469, + -5317593, -30356070, -4190957}, + {-30006540, 10162316, -33180176, 3981723, -16482138, -13070044, 14413974, + 9515896, 19568978, 9628812}}, + {{33053803, 199357, 15894591, 1583059, 27380243, -4580435, -17838894, + -6106839, -6291786, 3437740}, + {-18978877, 3884493, 19469877, 12726490, 15913552, 13614290, -22961733, + 70104, 7463304, 4176122}, + {-27124001, 10659917, 11482427, -16070381, 12771467, -6635117, -32719404, + -5322751, 24216882, 5944158}}, + {{8894125, 7450974, -2664149, -9765752, -28080517, -12389115, 19345746, + 14680796, 11632993, 5847885}, + {26942781, -2315317, 9129564, -4906607, 26024105, 11769399, -11518837, + 6367194, -9727230, 4782140}, + {19916461, -4828410, -22910704, -11414391, 25606324, -5972441, 33253853, + 8220911, 6358847, -1873857}}, + {{801428, -2081702, 16569428, 11065167, 29875704, 96627, 7908388, -4480480, + -13538503, 1387155}, + {19646058, 5720633, -11416706, 12814209, 11607948, 12749789, 14147075, + 15156355, -21866831, 11835260}, + {19299512, 1155910, 28703737, 14890794, 2925026, 7269399, 26121523, + 15467869, -26560550, 5052483}}}, + {/* 8/31 */ + {{-3017432, 10058206, 1980837, 3964243, 22160966, 12322533, -6431123, + -12618185, 12228557, -7003677}, + {32944382, 14922211, -22844894, 5188528, 21913450, -8719943, 4001465, + 13238564, -6114803, 8653815}, + {22865569, -4652735, 27603668, -12545395, 14348958, 8234005, 24808405, + 5719875, 28483275, 2841751}}, + {{-16420968, -1113305, -327719, -12107856, 21886282, -15552774, -1887966, + -315658, 19932058, -12739203}, + {-11656086, 10087521, -8864888, -5536143, -19278573, -3055912, 3999228, + 13239134, -4777469, -13910208}, + {1382174, -11694719, 17266790, 9194690, -13324356, 9720081, 20403944, + 11284705, -14013818, 3093230}}, + {{16650921, -11037932, -1064178, 1570629, -8329746, 7352753, -302424, + 16271225, -24049421, -6691850}, + {-21911077, -5927941, -4611316, -5560156, -31744103, -10785293, 24123614, + 15193618, -21652117, -16739389}, + {-9935934, -4289447, -25279823, 4372842, 2087473, 10399484, 31870908, + 14690798, 17361620, 11864968}}, + {{-11307610, 6210372, 13206574, 5806320, -29017692, -13967200, -12331205, + -7486601, -25578460, -16240689}, + {14668462, -12270235, 26039039, 15305210, 25515617, 4542480, 10453892, + 6577524, 9145645, -6443880}, + {5974874, 3053895, -9433049, -10385191, -31865124, 3225009, -7972642, + 3936128, -5652273, -3050304}}, + {{30625386, -4729400, -25555961, -12792866, -20484575, 7695099, 17097188, + -16303496, -27999779, 1803632}, + {-3553091, 9865099, -5228566, 4272701, -5673832, -16689700, 14911344, + 12196514, -21405489, 7047412}, + {20093277, 9920966, -11138194, -5343857, 13161587, 12044805, -32856851, + 4124601, -32343828, -10257566}}, + {{-20788824, 14084654, -13531713, 7842147, 19119038, -13822605, 4752377, + -8714640, -21679658, 2288038}, + {-26819236, -3283715, 29965059, 3039786, -14473765, 2540457, 29457502, + 14625692, -24819617, 12570232}, + {-1063558, -11551823, 16920318, 12494842, 1278292, -5869109, -21159943, + -3498680, -11974704, 4724943}}, + {{17960970, -11775534, -4140968, -9702530, -8876562, -1410617, -12907383, + -8659932, -29576300, 1903856}, + {23134274, -14279132, -10681997, -1611936, 20684485, 15770816, -12989750, + 3190296, 26955097, 14109738}, + {15308788, 5320727, -30113809, -14318877, 22902008, 7767164, 29425325, + -11277562, 31960942, 11934971}}, + {{-27395711, 8435796, 4109644, 12222639, -24627868, 14818669, 20638173, + 4875028, 10491392, 1379718}, + {-13159415, 9197841, 3875503, -8936108, -1383712, -5879801, 33518459, + 16176658, 21432314, 12180697}, + {-11787308, 11500838, 13787581, -13832590, -22430679, 10140205, 1465425, + 12689540, -10301319, -13872883}}}, + {/* 9/31 */ + {{5414091, -15386041, -21007664, 9643570, 12834970, 1186149, -2622916, + -1342231, 26128231, 6032912}, + {-26337395, -13766162, 32496025, -13653919, 17847801, -12669156, 3604025, + 8316894, -25875034, -10437358}, + {3296484, 6223048, 24680646, -12246460, -23052020, 5903205, -8862297, + -4639164, 12376617, 3188849}}, + {{29190488, -14659046, 27549113, -1183516, 3520066, -10697301, 32049515, + -7309113, -16109234, -9852307}, + {-14744486, -9309156, 735818, -598978, -20407687, -5057904, 25246078, + -15795669, 18640741, -960977}, + {-6928835, -16430795, 10361374, 5642961, 4910474, 12345252, -31638386, + -494430, 10530747, 1053335}}, + {{-29265967, -14186805, -13538216, -12117373, -19457059, -10655384, + -31462369, -2948985, 24018831, 15026644}, + {-22592535, -3145277, -2289276, 5953843, -13440189, 9425631, 25310643, + 13003497, -2314791, -15145616}, + {-27419985, -603321, -8043984, -1669117, -26092265, 13987819, -27297622, + 187899, -23166419, -2531735}}, + {{-21744398, -13810475, 1844840, 5021428, -10434399, -15911473, 9716667, + 16266922, -5070217, 726099}, + {29370922, -6053998, 7334071, -15342259, 9385287, 2247707, -13661962, + -4839461, 30007388, -15823341}, + {-936379, 16086691, 23751945, -543318, -1167538, -5189036, 9137109, + 730663, 9835848, 4555336}}, + {{-23376435, 1410446, -22253753, -12899614, 30867635, 15826977, 17693930, + 544696, -11985298, 12422646}, + {31117226, -12215734, -13502838, 6561947, -9876867, -12757670, -5118685, + -4096706, 29120153, 13924425}, + {-17400879, -14233209, 19675799, -2734756, -11006962, -5858820, -9383939, + -11317700, 7240931, -237388}}, + {{-31361739, -11346780, -15007447, -5856218, -22453340, -12152771, 1222336, + 4389483, 3293637, -15551743}, + {-16684801, -14444245, 11038544, 11054958, -13801175, -3338533, -24319580, + 7733547, 12796905, -6335822}, + {-8759414, -10817836, -25418864, 10783769, -30615557, -9746811, -28253339, + 3647836, 3222231, -11160462}}, + {{18606113, 1693100, -25448386, -15170272, 4112353, 10045021, 23603893, + -2048234, -7550776, 2484985}, + {9255317, -3131197, -12156162, -1004256, 13098013, -9214866, 16377220, + -2102812, -19802075, -3034702}, + {-22729289, 7496160, -5742199, 11329249, 19991973, -3347502, -31718148, + 9936966, -30097688, -10618797}}, + {{21878590, -5001297, 4338336, 13643897, -3036865, 13160960, 19708896, + 5415497, -7360503, -4109293}, + {27736861, 10103576, 12500508, 8502413, -3413016, -9633558, 10436918, + -1550276, -23659143, -8132100}, + {19492550, -12104365, -29681976, -852630, -3208171, 12403437, 30066266, + 8367329, 13243957, 8709688}}}, + {/* 10/31 */ + {{12015105, 2801261, 28198131, 10151021, 24818120, -4743133, -11194191, + -5645734, 5150968, 7274186}, + {2831366, -12492146, 1478975, 6122054, 23825128, -12733586, 31097299, + 6083058, 31021603, -9793610}, + {-2529932, -2229646, 445613, 10720828, -13849527, -11505937, -23507731, + 16354465, 15067285, -14147707}}, + {{7840942, 14037873, -33364863, 15934016, -728213, -3642706, 21403988, + 1057586, -19379462, -12403220}, + {915865, -16469274, 15608285, -8789130, -24357026, 6060030, -17371319, + 8410997, -7220461, 16527025}, + {32922597, -556987, 20336074, -16184568, 10903705, -5384487, 16957574, + 52992, 23834301, 6588044}}, + {{32752030, 11232950, 3381995, -8714866, 22652988, -10744103, 17159699, + 16689107, -20314580, -1305992}, + {-4689649, 9166776, -25710296, -10847306, 11576752, 12733943, 7924251, + -2752281, 1976123, -7249027}, + {21251222, 16309901, -2983015, -6783122, 30810597, 12967303, 156041, + -3371252, 12331345, -8237197}}, + {{8651614, -4477032, -16085636, -4996994, 13002507, 2950805, 29054427, + -5106970, 10008136, -4667901}, + {31486080, 15114593, -14261250, 12951354, 14369431, -7387845, 16347321, + -13662089, 8684155, -10532952}, + {19443825, 11385320, 24468943, -9659068, -23919258, 2187569, -26263207, + -6086921, 31316348, 14219878}}, + {{-28594490, 1193785, 32245219, 11392485, 31092169, 15722801, 27146014, + 6992409, 29126555, 9207390}, + {32382935, 1110093, 18477781, 11028262, -27411763, -7548111, -4980517, + 10843782, -7957600, -14435730}, + {2814918, 7836403, 27519878, -7868156, -20894015, -11553689, -21494559, + 8550130, 28346258, 1994730}}, + {{-19578299, 8085545, -14000519, -3948622, 2785838, -16231307, -19516951, + 7174894, 22628102, 8115180}, + {-30405132, 955511, -11133838, -15078069, -32447087, -13278079, -25651578, + 3317160, -9943017, 930272}, + {-15303681, -6833769, 28856490, 1357446, 23421993, 1057177, 24091212, + -1388970, -22765376, -10650715}}, + {{-22751231, -5303997, -12907607, -12768866, -15811511, -7797053, + -14839018, -16554220, -1867018, 8398970}, + {-31969310, 2106403, -4736360, 1362501, 12813763, 16200670, 22981545, + -6291273, 18009408, -15772772}, + {-17220923, -9545221, -27784654, 14166835, 29815394, 7444469, 29551787, + -3727419, 19288549, 1325865}}, + {{15100157, -15835752, -23923978, -1005098, -26450192, 15509408, 12376730, + -3479146, 33166107, -8042750}, + {20909231, 13023121, -9209752, 16251778, -5778415, -8094914, 12412151, + 10018715, 2213263, -13878373}, + {32529814, -11074689, 30361439, -16689753, -9135940, 1513226, 22922121, + 6382134, -5766928, 8371348}}}, + {/* 11/31 */ + {{9923462, 11271500, 12616794, 3544722, -29998368, -1721626, 12891687, + -8193132, -26442943, 10486144}, + {-22597207, -7012665, 8587003, -8257861, 4084309, -12970062, 361726, + 2610596, -23921530, -11455195}, + {5408411, -1136691, -4969122, 10561668, 24145918, 14240566, 31319731, + -4235541, 19985175, -3436086}}, + {{-13994457, 16616821, 14549246, 3341099, 32155958, 13648976, -17577068, + 8849297, 65030, 8370684}, + {-8320926, -12049626, 31204563, 5839400, -20627288, -1057277, -19442942, + 6922164, 12743482, -9800518}, + {-2361371, 12678785, 28815050, 4759974, -23893047, 4884717, 23783145, + 11038569, 18800704, 255233}}, + {{-5269658, -1773886, 13957886, 7990715, 23132995, 728773, 13393847, + 9066957, 19258688, -14753793}, + {-2936654, -10827535, -10432089, 14516793, -3640786, 4372541, -31934921, + 2209390, -1524053, 2055794}, + {580882, 16705327, 5468415, -2683018, -30926419, -14696000, -7203346, + -8994389, -30021019, 7394435}}, + {{23838809, 1822728, -15738443, 15242727, 8318092, -3733104, -21672180, + -3492205, -4821741, 14799921}, + {13345610, 9759151, 3371034, -16137791, 16353039, 8577942, 31129804, + 13496856, -9056018, 7402518}, + {2286874, -4435931, -20042458, -2008336, -13696227, 5038122, 11006906, + -15760352, 8205061, 1607563}}, + {{14414086, -8002132, 3331830, -3208217, 22249151, -5594188, 18364661, + -2906958, 30019587, -9029278}, + {-27688051, 1585953, -10775053, 931069, -29120221, -11002319, -14410829, + 12029093, 9944378, 8024}, + {4368715, -3709630, 29874200, -15022983, -20230386, -11410704, -16114594, + -999085, -8142388, 5640030}}, + {{10299610, 13746483, 11661824, 16234854, 7630238, 5998374, 9809887, + -16694564, 15219798, -14327783}, + {27425505, -5719081, 3055006, 10660664, 23458024, 595578, -15398605, + -1173195, -18342183, 9742717}, + {6744077, 2427284, 26042789, 2720740, -847906, 1118974, 32324614, 7406442, + 12420155, 1994844}}, + {{14012521, -5024720, -18384453, -9578469, -26485342, -3936439, -13033478, + -10909803, 24319929, -6446333}, + {16412690, -4507367, 10772641, 15929391, -17068788, -4658621, 10555945, + -10484049, -30102368, -4739048}, + {22397382, -7767684, -9293161, -12792868, 17166287, -9755136, -27333065, + 6199366, 21880021, -12250760}}, + {{-4283307, 5368523, -31117018, 8163389, -30323063, 3209128, 16557151, + 8890729, 8840445, 4957760}, + {-15447727, 709327, -6919446, -10870178, -29777922, 6522332, -21720181, + 12130072, -14796503, 5005757}, + {-2114751, -14308128, 23019042, 15765735, -25269683, 6002752, 10183197, + -13239326, -16395286, -2176112}}}, + {/* 12/31 */ + {{-19025756, 1632005, 13466291, -7995100, -23640451, 16573537, -32013908, + -3057104, 22208662, 2000468}, + {3065073, -1412761, -25598674, -361432, -17683065, -5703415, -8164212, + 11248527, -3691214, -7414184}, + {10379208, -6045554, 8877319, 1473647, -29291284, -12507580, 16690915, + 2553332, -3132688, 16400289}}, + {{15716668, 1254266, -18472690, 7446274, -8448918, 6344164, -22097271, + -7285580, 26894937, 9132066}, + {24158887, 12938817, 11085297, -8177598, -28063478, -4457083, -30576463, + 64452, -6817084, -2692882}, + {13488534, 7794716, 22236231, 5989356, 25426474, -12578208, 2350710, + -3418511, -4688006, 2364226}}, + {{16335052, 9132434, 25640582, 6678888, 1725628, 8517937, -11807024, + -11697457, 15445875, -7798101}, + {29004207, -7867081, 28661402, -640412, -12794003, -7943086, 31863255, + -4135540, -278050, -15759279}, + {-6122061, -14866665, -28614905, 14569919, -10857999, -3591829, 10343412, + -6976290, -29828287, -10815811}}, + {{27081650, 3463984, 14099042, -4517604, 1616303, -6205604, 29542636, + 15372179, 17293797, 960709}, + {20263915, 11434237, -5765435, 11236810, 13505955, -10857102, -16111345, + 6493122, -19384511, 7639714}, + {-2830798, -14839232, 25403038, -8215196, -8317012, -16173699, 18006287, + -16043750, 29994677, -15808121}}, + {{9769828, 5202651, -24157398, -13631392, -28051003, -11561624, -24613141, + -13860782, -31184575, 709464}, + {12286395, 13076066, -21775189, -1176622, -25003198, 4057652, -32018128, + -8890874, 16102007, 13205847}, + {13733362, 5599946, 10557076, 3195751, -5557991, 8536970, -25540170, + 8525972, 10151379, 10394400}}, + {{4024660, -16137551, 22436262, 12276534, -9099015, -2686099, 19698229, + 11743039, -33302334, 8934414}, + {-15879800, -4525240, -8580747, -2934061, 14634845, -698278, -9449077, + 3137094, -11536886, 11721158}, + {17555939, -5013938, 8268606, 2331751, -22738815, 9761013, 9319229, + 8835153, -9205489, -1280045}}, + {{-461409, -7830014, 20614118, 16688288, -7514766, -4807119, 22300304, + 505429, 6108462, -6183415}, + {-5070281, 12367917, -30663534, 3234473, 32617080, -8422642, 29880583, + -13483331, -26898490, -7867459}, + {-31975283, 5726539, 26934134, 10237677, -3173717, -605053, 24199304, + 3795095, 7592688, -14992079}}, + {{21594432, -14964228, 17466408, -4077222, 32537084, 2739898, 6407723, + 12018833, -28256052, 4298412}, + {-20650503, -11961496, -27236275, 570498, 3767144, -1717540, 13891942, + -1569194, 13717174, 10805743}, + {-14676630, -15644296, 15287174, 11927123, 24177847, -8175568, -796431, + 14860609, -26938930, -5863836}}}, + {/* 13/31 */ + {{12962541, 5311799, -10060768, 11658280, 18855286, -7954201, 13286263, + -12808704, -4381056, 9882022}, + {18512079, 11319350, -20123124, 15090309, 18818594, 5271736, -22727904, + 3666879, -23967430, -3299429}, + {-6789020, -3146043, 16192429, 13241070, 15898607, -14206114, -10084880, + -6661110, -2403099, 5276065}}, + {{30169808, -5317648, 26306206, -11750859, 27814964, 7069267, 7152851, + 3684982, 1449224, 13082861}, + {10342826, 3098505, 2119311, 193222, 25702612, 12233820, 23697382, + 15056736, -21016438, -8202000}, + {-33150110, 3261608, 22745853, 7948688, 19370557, -15177665, -26171976, + 6482814, -10300080, -11060101}}, + {{32869458, -5408545, 25609743, 15678670, -10687769, -15471071, 26112421, + 2521008, -22664288, 6904815}, + {29506923, 4457497, 3377935, -9796444, -30510046, 12935080, 1561737, + 3841096, -29003639, -6657642}, + {10340844, -6630377, -18656632, -2278430, 12621151, -13339055, 30878497, + -11824370, -25584551, 5181966}}, + {{25940115, -12658025, 17324188, -10307374, -8671468, 15029094, 24396252, + -16450922, -2322852, -12388574}, + {-21765684, 9916823, -1300409, 4079498, -1028346, 11909559, 1782390, + 12641087, 20603771, -6561742}, + {-18882287, -11673380, 24849422, 11501709, 13161720, -4768874, 1925523, + 11914390, 4662781, 7820689}}, + {{12241050, -425982, 8132691, 9393934, 32846760, -1599620, 29749456, + 12172924, 16136752, 15264020}, + {-10349955, -14680563, -8211979, 2330220, -17662549, -14545780, 10658213, + 6671822, 19012087, 3772772}, + {3753511, -3421066, 10617074, 2028709, 14841030, -6721664, 28718732, + -15762884, 20527771, 12988982}}, + {{-14822485, -5797269, -3707987, 12689773, -898983, -10914866, -24183046, + -10564943, 3299665, -12424953}, + {-16777703, -15253301, -9642417, 4978983, 3308785, 8755439, 6943197, + 6461331, -25583147, 8991218}, + {-17226263, 1816362, -1673288, -6086439, 31783888, -8175991, -32948145, + 7417950, -30242287, 1507265}}, + {{29692663, 6829891, -10498800, 4334896, 20945975, -11906496, -28887608, + 8209391, 14606362, -10647073}, + {-3481570, 8707081, 32188102, 5672294, 22096700, 1711240, -33020695, + 9761487, 4170404, -2085325}, + {-11587470, 14855945, -4127778, -1531857, -26649089, 15084046, 22186522, + 16002000, -14276837, -8400798}}, + {{-4811456, 13761029, -31703877, -2483919, -3312471, 7869047, -7113572, + -9620092, 13240845, 10965870}, + {-7742563, -8256762, -14768334, -13656260, -23232383, 12387166, 4498947, + 14147411, 29514390, 4302863}, + {-13413405, -12407859, 20757302, -13801832, 14785143, 8976368, -5061276, + -2144373, 17846988, -13971927}}}, + {/* 14/31 */ + {{-2244452, -754728, -4597030, -1066309, -6247172, 1455299, -21647728, + -9214789, -5222701, 12650267}, + {-9906797, -16070310, 21134160, 12198166, -27064575, 708126, 387813, + 13770293, -19134326, 10958663}, + {22470984, 12369526, 23446014, -5441109, -21520802, -9698723, -11772496, + -11574455, -25083830, 4271862}}, + {{-25169565, -10053642, -19909332, 15361595, -5984358, 2159192, 75375, + -4278529, -32526221, 8469673}, + {15854970, 4148314, -8893890, 7259002, 11666551, 13824734, -30531198, + 2697372, 24154791, -9460943}, + {15446137, -15806644, 29759747, 14019369, 30811221, -9610191, -31582008, + 12840104, 24913809, 9815020}}, + {{-4709286, -5614269, -31841498, -12288893, -14443537, 10799414, -9103676, + 13438769, 18735128, 9466238}, + {11933045, 9281483, 5081055, -5183824, -2628162, -4905629, -7727821, + -10896103, -22728655, 16199064}, + {14576810, 379472, -26786533, -8317236, -29426508, -10812974, -102766, + 1876699, 30801119, 2164795}}, + {{15995086, 3199873, 13672555, 13712240, -19378835, -4647646, -13081610, + -15496269, -13492807, 1268052}, + {-10290614, -3659039, -3286592, 10948818, 23037027, 3794475, -3470338, + -12600221, -17055369, 3565904}, + {29210088, -9419337, -5919792, -4952785, 10834811, -13327726, -16512102, + -10820713, -27162222, -14030531}}, + {{-13161890, 15508588, 16663704, -8156150, -28349942, 9019123, -29183421, + -3769423, 2244111, -14001979}, + {-5152875, -3800936, -9306475, -6071583, 16243069, 14684434, -25673088, + -16180800, 13491506, 4641841}, + {10813417, 643330, -19188515, -728916, 30292062, -16600078, 27548447, + -7721242, 14476989, -12767431}}, + {{10292079, 9984945, 6481436, 8279905, -7251514, 7032743, 27282937, + -1644259, -27912810, 12651324}, + {-31185513, -813383, 22271204, 11835308, 10201545, 15351028, 17099662, + 3988035, 21721536, -3148940}, + {10202177, -6545839, -31373232, -9574638, -32150642, -8119683, -12906320, + 3852694, 13216206, 14842320}}, + {{-15815640, -10601066, -6538952, -7258995, -6984659, -6581778, -31500847, + 13765824, -27434397, 9900184}, + {14465505, -13833331, -32133984, -14738873, -27443187, 12990492, 33046193, + 15796406, -7051866, -8040114}, + {30924417, -8279620, 6359016, -12816335, 16508377, 9071735, -25488601, + 15413635, 9524356, -7018878}}, + {{12274201, -13175547, 32627641, -1785326, 6736625, 13267305, 5237659, + -5109483, 15663516, 4035784}, + {-2951309, 8903985, 17349946, 601635, -16432815, -4612556, -13732739, + -15889334, -22258478, 4659091}, + {-16916263, -4952973, -30393711, -15158821, 20774812, 15897498, 5736189, + 15026997, -2178256, -13455585}}}, + {/* 15/31 */ + {{-8858980, -2219056, 28571666, -10155518, -474467, -10105698, -3801496, + 278095, 23440562, -290208}, + {10226241, -5928702, 15139956, 120818, -14867693, 5218603, 32937275, + 11551483, -16571960, -7442864}, + {17932739, -12437276, -24039557, 10749060, 11316803, 7535897, 22503767, + 5561594, -3646624, 3898661}}, + {{7749907, -969567, -16339731, -16464, -25018111, 15122143, -1573531, + 7152530, 21831162, 1245233}, + {26958459, -14658026, 4314586, 8346991, -5677764, 11960072, -32589295, + -620035, -30402091, -16716212}, + {-12165896, 9166947, 33491384, 13673479, 29787085, 13096535, 6280834, + 14587357, -22338025, 13987525}}, + {{-24349909, 7778775, 21116000, 15572597, -4833266, -5357778, -4300898, + -5124639, -7469781, -2858068}, + {9681908, -6737123, -31951644, 13591838, -6883821, 386950, 31622781, + 6439245, -14581012, 4091397}, + {-8426427, 1470727, -28109679, -1596990, 3978627, -5123623, -19622683, + 12092163, 29077877, -14741988}}, + {{5269168, -6859726, -13230211, -8020715, 25932563, 1763552, -5606110, + -5505881, -20017847, 2357889}, + {32264008, -15407652, -5387735, -1160093, -2091322, -3946900, 23104804, + -12869908, 5727338, 189038}, + {14609123, -8954470, -6000566, -16622781, -14577387, -7743898, -26745169, + 10942115, -25888931, -14884697}}, + {{20513500, 5557931, -15604613, 7829531, 26413943, -2019404, -21378968, + 7471781, 13913677, -5137875}, + {-25574376, 11967826, 29233242, 12948236, -6754465, 4713227, -8940970, + 14059180, 12878652, 8511905}, + {-25656801, 3393631, -2955415, -7075526, -2250709, 9366908, -30223418, + 6812974, 5568676, -3127656}}, + {{11630004, 12144454, 2116339, 13606037, 27378885, 15676917, -17408753, + -13504373, -14395196, 8070818}, + {27117696, -10007378, -31282771, -5570088, 1127282, 12772488, -29845906, + 10483306, -11552749, -1028714}, + {10637467, -5688064, 5674781, 1072708, -26343588, -6982302, -1683975, + 9177853, -27493162, 15431203}}, + {{20525145, 10892566, -12742472, 12779443, -29493034, 16150075, -28240519, + 14943142, -15056790, -7935931}, + {-30024462, 5626926, -551567, -9981087, 753598, 11981191, 25244767, + -3239766, -3356550, 9594024}, + {-23752644, 2636870, -5163910, -10103818, 585134, 7877383, 11345683, + -6492290, 13352335, -10977084}}, + {{-1931799, -5407458, 3304649, -12884869, 17015806, -4877091, -29783850, + -7752482, -13215537, -319204}, + {20239939, 6607058, 6203985, 3483793, -18386976, -779229, -20723742, + 15077870, -22750759, 14523817}, + {27406042, -6041657, 27423596, -4497394, 4996214, 10002360, -28842031, + -4545494, -30172742, -4805667}}}, + {/* 16/31 */ + {{11374242, 12660715, 17861383, -12540833, 10935568, 1099227, -13886076, + -9091740, -27727044, 11358504}, + {-12730809, 10311867, 1510375, 10778093, -2119455, -9145702, 32676003, + 11149336, -26123651, 4985768}, + {-19096303, 341147, -6197485, -239033, 15756973, -8796662, -983043, + 13794114, -19414307, -15621255}}, + {{6490081, 11940286, 25495923, -7726360, 8668373, -8751316, 3367603, + 6970005, -1691065, -9004790}, + {1656497, 13457317, 15370807, 6364910, 13605745, 8362338, -19174622, + -5475723, -16796596, -5031438}, + {-22273315, -13524424, -64685, -4334223, -18605636, -10921968, -20571065, + -7007978, -99853, -10237333}}, + {{17747465, 10039260, 19368299, -4050591, -20630635, -16041286, 31992683, + -15857976, -29260363, -5511971}, + {31932027, -4986141, -19612382, 16366580, 22023614, 88450, 11371999, + -3744247, 4882242, -10626905}, + {29796507, 37186, 19818052, 10115756, -11829032, 3352736, 18551198, + 3272828, -5190932, -4162409}}, + {{12501286, 4044383, -8612957, -13392385, -32430052, 5136599, -19230378, + -3529697, 330070, -3659409}, + {6384877, 2899513, 17807477, 7663917, -2358888, 12363165, 25366522, + -8573892, -271295, 12071499}, + {-8365515, -4042521, 25133448, -4517355, -6211027, 2265927, -32769618, + 1936675, -5159697, 3829363}}, + {{28425966, -5835433, -577090, -4697198, -14217555, 6870930, 7921550, + -6567787, 26333140, 14267664}, + {-11067219, 11871231, 27385719, -10559544, -4585914, -11189312, 10004786, + -8709488, -21761224, 8930324}, + {-21197785, -16396035, 25654216, -1725397, 12282012, 11008919, 1541940, + 4757911, -26491501, -16408940}}, + {{13537262, -7759490, -20604840, 10961927, -5922820, -13218065, -13156584, + 6217254, -15943699, 13814990}, + {-17422573, 15157790, 18705543, 29619, 24409717, -260476, 27361681, + 9257833, -1956526, -1776914}, + {-25045300, -10191966, 15366585, 15166509, -13105086, 8423556, -29171540, + 12361135, -18685978, 4578290}}, + {{24579768, 3711570, 1342322, -11180126, -27005135, 14124956, -22544529, + 14074919, 21964432, 8235257}, + {-6528613, -2411497, 9442966, -5925588, 12025640, -1487420, -2981514, + -1669206, 13006806, 2355433}, + {-16304899, -13605259, -6632427, -5142349, 16974359, -10911083, 27202044, + 1719366, 1141648, -12796236}}, + {{-12863944, -13219986, -8318266, -11018091, -6810145, -4843894, 13475066, + -3133972, 32674895, 13715045}, + {11423335, -5468059, 32344216, 8962751, 24989809, 9241752, -13265253, + 16086212, -28740881, -15642093}, + {-1409668, 12530728, -6368726, 10847387, 19531186, -14132160, -11709148, + 7791794, -27245943, 4383347}}}, + {/* 17/31 */ + {{-28970898, 5271447, -1266009, -9736989, -12455236, 16732599, -4862407, + -4906449, 27193557, 6245191}, + {-15193956, 5362278, -1783893, 2695834, 4960227, 12840725, 23061898, + 3260492, 22510453, 8577507}, + {-12632451, 11257346, -32692994, 13548177, -721004, 10879011, 31168030, + 13952092, -29571492, -3635906}}, + {{3877321, -9572739, 32416692, 5405324, -11004407, -13656635, 3759769, + 11935320, 5611860, 8164018}, + {-16275802, 14667797, 15906460, 12155291, -22111149, -9039718, 32003002, + -8832289, 5773085, -8422109}, + {-23788118, -8254300, 1950875, 8937633, 18686727, 16459170, -905725, + 12376320, 31632953, 190926}}, + {{-24593607, -16138885, -8423991, 13378746, 14162407, 6901328, -8288749, + 4508564, -25341555, -3627528}, + {8884438, -5884009, 6023974, 10104341, -6881569, -4941533, 18722941, + -14786005, -1672488, 827625}, + {-32720583, -16289296, -32503547, 7101210, 13354605, 2659080, -1800575, + -14108036, -24878478, 1541286}}, + {{2901347, -1117687, 3880376, -10059388, -17620940, -3612781, -21802117, + -3567481, 20456845, -1885033}, + {27019610, 12299467, -13658288, -1603234, -12861660, -4861471, -19540150, + -5016058, 29439641, 15138866}, + {21536104, -6626420, -32447818, -10690208, -22408077, 5175814, -5420040, + -16361163, 7779328, 109896}}, + {{30279744, 14648750, -8044871, 6425558, 13639621, -743509, 28698390, + 12180118, 23177719, -554075}, + {26572847, 3405927, -31701700, 12890905, -19265668, 5335866, -6493768, + 2378492, 4439158, -13279347}, + {-22716706, 3489070, -9225266, -332753, 18875722, -1140095, 14819434, + -12731527, -17717757, -5461437}}, + {{-5056483, 16566551, 15953661, 3767752, -10436499, 15627060, -820954, + 2177225, 8550082, -15114165}, + {-18473302, 16596775, -381660, 15663611, 22860960, 15585581, -27844109, + -3582739, -23260460, -8428588}, + {-32480551, 15707275, -8205912, -5652081, 29464558, 2713815, -22725137, + 15860482, -21902570, 1494193}}, + {{-19562091, -14087393, -25583872, -9299552, 13127842, 759709, 21923482, + 16529112, 8742704, 12967017}, + {-28464899, 1553205, 32536856, -10473729, -24691605, -406174, -8914625, + -2933896, -29903758, 15553883}, + {21877909, 3230008, 9881174, 10539357, -4797115, 2841332, 11543572, + 14513274, 19375923, -12647961}}, + {{8832269, -14495485, 13253511, 5137575, 5037871, 4078777, 24880818, + -6222716, 2862653, 9455043}, + {29306751, 5123106, 20245049, -14149889, 9592566, 8447059, -2077124, + -2990080, 15511449, 4789663}, + {-20679756, 7004547, 8824831, -9434977, -4045704, -3750736, -5754762, + 108893, 23513200, 16652362}}}, + {/* 18/31 */ + {{-33256173, 4144782, -4476029, -6579123, 10770039, -7155542, -6650416, + -12936300, -18319198, 10212860}, + {2756081, 8598110, 7383731, -6859892, 22312759, -1105012, 21179801, + 2600940, -9988298, -12506466}, + {-24645692, 13317462, -30449259, -15653928, 21365574, -10869657, 11344424, + 864440, -2499677, -16710063}}, + {{-26432803, 6148329, -17184412, -14474154, 18782929, -275997, -22561534, + 211300, 2719757, 4940997}, + {-1323882, 3911313, -6948744, 14759765, -30027150, 7851207, 21690126, + 8518463, 26699843, 5276295}, + {-13149873, -6429067, 9396249, 365013, 24703301, -10488939, 1321586, + 149635, -15452774, 7159369}}, + {{9987780, -3404759, 17507962, 9505530, 9731535, -2165514, 22356009, + 8312176, 22477218, -8403385}, + {18155857, -16504990, 19744716, 9006923, 15154154, -10538976, 24256460, + -4864995, -22548173, 9334109}, + {2986088, -4911893, 10776628, -3473844, 10620590, -7083203, -21413845, + 14253545, -22587149, 536906}}, + {{4377756, 8115836, 24567078, 15495314, 11625074, 13064599, 7390551, + 10589625, 10838060, -15420424}, + {-19342404, 867880, 9277171, -3218459, -14431572, -1986443, 19295826, + -15796950, 6378260, 699185}, + {7895026, 4057113, -7081772, -13077756, -17886831, -323126, -716039, + 15693155, -5045064, -13373962}}, + {{-7737563, -5869402, -14566319, -7406919, 11385654, 13201616, 31730678, + -10962840, -3918636, -9669325}, + {10188286, -15770834, -7336361, 13427543, 22223443, 14896287, 30743455, + 7116568, -21786507, 5427593}, + {696102, 13206899, 27047647, -10632082, 15285305, -9853179, 10798490, + -4578720, 19236243, 12477404}}, + {{-11229439, 11243796, -17054270, -8040865, -788228, -8167967, -3897669, + 11180504, -23169516, 7733644}, + {17800790, -14036179, -27000429, -11766671, 23887827, 3149671, 23466177, + -10538171, 10322027, 15313801}, + {26246234, 11968874, 32263343, -5468728, 6830755, -13323031, -15794704, + -101982, -24449242, 10890804}}, + {{-31365647, 10271363, -12660625, -6267268, 16690207, -13062544, -14982212, + 16484931, 25180797, -5334884}, + {-586574, 10376444, -32586414, -11286356, 19801893, 10997610, 2276632, + 9482883, 316878, 13820577}, + {-9882808, -4510367, -2115506, 16457136, -11100081, 11674996, 30756178, + -7515054, 30696930, -3712849}}, + {{32988917, -9603412, 12499366, 7910787, -10617257, -11931514, -7342816, + -9985397, -32349517, 7392473}, + {-8855661, 15927861, 9866406, -3649411, -2396914, -16655781, -30409476, + -9134995, 25112947, -2926644}, + {-2504044, -436966, 25621774, -5678772, 15085042, -5479877, -24884878, + -13526194, 5537438, -13914319}}}, + {/* 19/31 */ + {{-11225584, 2320285, -9584280, 10149187, -33444663, 5808648, -14876251, + -1729667, 31234590, 6090599}, + {-9633316, 116426, 26083934, 2897444, -6364437, -2688086, 609721, + 15878753, -6970405, -9034768}, + {-27757857, 247744, -15194774, -9002551, 23288161, -10011936, -23869595, + 6503646, 20650474, 1804084}}, + {{-27589786, 15456424, 8972517, 8469608, 15640622, 4439847, 3121995, + -10329713, 27842616, -202328}, + {-15306973, 2839644, 22530074, 10026331, 4602058, 5048462, 28248656, + 5031932, -11375082, 12714369}, + {20807691, -7270825, 29286141, 11421711, -27876523, -13868230, -21227475, + 1035546, -19733229, 12796920}}, + {{12076899, -14301286, -8785001, -11848922, -25012791, 16400684, -17591495, + -12899438, 3480665, -15182815}, + {-32361549, 5457597, 28548107, 7833186, 7303070, -11953545, -24363064, + -15921875, -33374054, 2771025}, + {-21389266, 421932, 26597266, 6860826, 22486084, -6737172, -17137485, + -4210226, -24552282, 15673397}}, + {{-20184622, 2338216, 19788685, -9620956, -4001265, -8740893, -20271184, + 4733254, 3727144, -12934448}, + {6120119, 814863, -11794402, -622716, 6812205, -15747771, 2019594, + 7975683, 31123697, -10958981}, + {30069250, -11435332, 30434654, 2958439, 18399564, -976289, 12296869, + 9204260, -16432438, 9648165}}, + {{32705432, -1550977, 30705658, 7451065, -11805606, 9631813, 3305266, + 5248604, -26008332, -11377501}, + {17219865, 2375039, -31570947, -5575615, -19459679, 9219903, 294711, + 15298639, 2662509, -16297073}, + {-1172927, -7558695, -4366770, -4287744, -21346413, -8434326, 32087529, + -1222777, 32247248, -14389861}}, + {{14312628, 1221556, 17395390, -8700143, -4945741, -8684635, -28197744, + -9637817, -16027623, -13378845}, + {-1428825, -9678990, -9235681, 6549687, -7383069, -468664, 23046502, + 9803137, 17597934, 2346211}, + {18510800, 15337574, 26171504, 981392, -22241552, 7827556, -23491134, + -11323352, 3059833, -11782870}}, + {{10141598, 6082907, 17829293, -1947643, 9830092, 13613136, -25556636, + -5544586, -33502212, 3592096}, + {33114168, -15889352, -26525686, -13343397, 33076705, 8716171, 1151462, + 1521897, -982665, -6837803}, + {-32939165, -4255815, 23947181, -324178, -33072974, -12305637, -16637686, + 3891704, 26353178, 693168}}, + {{30374239, 1595580, -16884039, 13186931, 4600344, 406904, 9585294, + -400668, 31375464, 14369965}, + {-14370654, -7772529, 1510301, 6434173, -18784789, -6262728, 32732230, + -13108839, 17901441, 16011505}, + {18171223, -11934626, -12500402, 15197122, -11038147, -15230035, + -19172240, -16046376, 8764035, 12309598}}}, + {/* 20/31 */ + {{5975908, -5243188, -19459362, -9681747, -11541277, 14015782, -23665757, + 1228319, 17544096, -10593782}, + {5811932, -1715293, 3442887, -2269310, -18367348, -8359541, -18044043, + -15410127, -5565381, 12348900}, + {-31399660, 11407555, 25755363, 6891399, -3256938, 14872274, -24849353, + 8141295, -10632534, -585479}}, + {{-12675304, 694026, -5076145, 13300344, 14015258, -14451394, -9698672, + -11329050, 30944593, 1130208}, + {8247766, -6710942, -26562381, -7709309, -14401939, -14648910, 4652152, + 2488540, 23550156, -271232}, + {17294316, -3788438, 7026748, 15626851, 22990044, 113481, 2267737, + -5908146, -408818, -137719}}, + {{16091085, -16253926, 18599252, 7340678, 2137637, -1221657, -3364161, + 14550936, 3260525, -7166271}, + {-4910104, -13332887, 18550887, 10864893, -16459325, -7291596, -23028869, + -13204905, -12748722, 2701326}, + {-8574695, 16099415, 4629974, -16340524, -20786213, -6005432, -10018363, + 9276971, 11329923, 1862132}}, + {{14763076, -15903608, -30918270, 3689867, 3511892, 10313526, -21951088, + 12219231, -9037963, -940300}, + {8894987, -3446094, 6150753, 3013931, 301220, 15693451, -31981216, + -2909717, -15438168, 11595570}, + {15214962, 3537601, -26238722, -14058872, 4418657, -15230761, 13947276, + 10730794, -13489462, -4363670}}, + {{-2538306, 7682793, 32759013, 263109, -29984731, -7955452, -22332124, + -10188635, 977108, 699994}, + {-12466472, 4195084, -9211532, 550904, -15565337, 12917920, 19118110, + -439841, -30534533, -14337913}, + {31788461, -14507657, 4799989, 7372237, 8808585, -14747943, 9408237, + -10051775, 12493932, -5409317}}, + {{-25680606, 5260744, -19235809, -6284470, -3695942, 16566087, 27218280, + 2607121, 29375955, 6024730}, + {842132, -2794693, -4763381, -8722815, 26332018, -12405641, 11831880, + 6985184, -9940361, 2854096}, + {-4847262, -7969331, 2516242, -5847713, 9695691, -7221186, 16512645, + 960770, 12121869, 16648078}}, + {{-15218652, 14667096, -13336229, 2013717, 30598287, -464137, -31504922, + -7882064, 20237806, 2838411}, + {-19288047, 4453152, 15298546, -16178388, 22115043, -15972604, 12544294, + -13470457, 1068881, -12499905}, + {-9558883, -16518835, 33238498, 13506958, 30505848, -1114596, -8486907, + -2630053, 12521378, 4845654}}, + {{-28198521, 10744108, -2958380, 10199664, 7759311, -13088600, 3409348, + -873400, -6482306, -12885870}, + {-23561822, 6230156, -20382013, 10655314, -24040585, -11621172, 10477734, + -1240216, -3113227, 13974498}, + {12966261, 15550616, -32038948, -1615346, 21025980, -629444, 5642325, + 7188737, 18895762, 12629579}}}, + {/* 21/31 */ + {{14741879, -14946887, 22177208, -11721237, 1279741, 8058600, 11758140, + 789443, 32195181, 3895677}, + {10758205, 15755439, -4509950, 9243698, -4879422, 6879879, -2204575, + -3566119, -8982069, 4429647}, + {-2453894, 15725973, -20436342, -10410672, -5803908, -11040220, -7135870, + -11642895, 18047436, -15281743}}, + {{-25173001, -11307165, 29759956, 11776784, -22262383, -15820455, 10993114, + -12850837, -17620701, -9408468}, + {21987233, 700364, -24505048, 14972008, -7774265, -5718395, 32155026, + 2581431, -29958985, 8773375}, + {-25568350, 454463, -13211935, 16126715, 25240068, 8594567, 20656846, + 12017935, -7874389, -13920155}}, + {{6028182, 6263078, -31011806, -11301710, -818919, 2461772, -31841174, + -5468042, -1721788, -2776725}, + {-12278994, 16624277, 987579, -5922598, 32908203, 1248608, 7719845, + -4166698, 28408820, 6816612}, + {-10358094, -8237829, 19549651, -12169222, 22082623, 16147817, 20613181, + 13982702, -10339570, 5067943}}, + {{-30505967, -3821767, 12074681, 13582412, -19877972, 2443951, -19719286, + 12746132, 5331210, -10105944}, + {30528811, 3601899, -1957090, 4619785, -27361822, -15436388, 24180793, + -12570394, 27679908, -1648928}, + {9402404, -13957065, 32834043, 10838634, -26580150, -13237195, 26653274, + -8685565, 22611444, -12715406}}, + {{22190590, 1118029, 22736441, 15130463, -30460692, -5991321, 19189625, + -4648942, 4854859, 6622139}, + {-8310738, -2953450, -8262579, -3388049, -10401731, -271929, 13424426, + -3567227, 26404409, 13001963}, + {-31241838, -15415700, -2994250, 8939346, 11562230, -12840670, -26064365, + -11621720, -15405155, 11020693}}, + {{1866042, -7949489, -7898649, -10301010, 12483315, 13477547, 3175636, + -12424163, 28761762, 1406734}, + {-448555, -1777666, 13018551, 3194501, -9580420, -11161737, 24760585, + -4347088, 25577411, -13378680}, + {-24290378, 4759345, -690653, -1852816, 2066747, 10693769, -29595790, + 9884936, -9368926, 4745410}}, + {{-9141284, 6049714, -19531061, -4341411, -31260798, 9944276, -15462008, + -11311852, 10931924, -11931931}, + {-16561513, 14112680, -8012645, 4817318, -8040464, -11414606, -22853429, + 10856641, -20470770, 13434654}, + {22759489, -10073434, -16766264, -1871422, 13637442, -10168091, 1765144, + -12654326, 28445307, -5364710}}, + {{29875063, 12493613, 2795536, -3786330, 1710620, 15181182, -10195717, + -8788675, 9074234, 1167180}, + {-26205683, 11014233, -9842651, -2635485, -26908120, 7532294, -18716888, + -9535498, 3843903, 9367684}, + {-10969595, -6403711, 9591134, 9582310, 11349256, 108879, 16235123, + 8601684, -139197, 4242895}}}, + {/* 22/31 */ + {{22092954, -13191123, -2042793, -11968512, 32186753, -11517388, -6574341, + 2470660, -27417366, 16625501}, + {-11057722, 3042016, 13770083, -9257922, 584236, -544855, -7770857, + 2602725, -27351616, 14247413}, + {6314175, -10264892, -32772502, 15957557, -10157730, 168750, -8618807, + 14290061, 27108877, -1180880}}, + {{-8586597, -7170966, 13241782, 10960156, -32991015, -13794596, 33547976, + -11058889, -27148451, 981874}, + {22833440, 9293594, -32649448, -13618667, -9136966, 14756819, -22928859, + -13970780, -10479804, -16197962}, + {-7768587, 3326786, -28111797, 10783824, 19178761, 14905060, 22680049, + 13906969, -15933690, 3797899}}, + {{21721356, -4212746, -12206123, 9310182, -3882239, -13653110, 23740224, + -2709232, 20491983, -8042152}, + {9209270, -15135055, -13256557, -6167798, -731016, 15289673, 25947805, + 15286587, 30997318, -6703063}, + {7392032, 16618386, 23946583, -8039892, -13265164, -1533858, -14197445, + -2321576, 17649998, -250080}}, + {{-9301088, -14193827, 30609526, -3049543, -25175069, -1283752, -15241566, + -9525724, -2233253, 7662146}, + {-17558673, 1763594, -33114336, 15908610, -30040870, -12174295, 7335080, + -8472199, -3174674, 3440183}, + {-19889700, -5977008, -24111293, -9688870, 10799743, -16571957, 40450, + -4431835, 4862400, 1133}}, + {{-32856209, -7873957, -5422389, 14860950, -16319031, 7956142, 7258061, + 311861, -30594991, -7379421}, + {-3773428, -1565936, 28985340, 7499440, 24445838, 9325937, 29727763, + 16527196, 18278453, 15405622}, + {-4381906, 8508652, -19898366, -3674424, -5984453, 15149970, -13313598, + 843523, -21875062, 13626197}}, + {{2281448, -13487055, -10915418, -2609910, 1879358, 16164207, -10783882, + 3953792, 13340839, 15928663}, + {31727126, -7179855, -18437503, -8283652, 2875793, -16390330, -25269894, + -7014826, -23452306, 5964753}, + {4100420, -5959452, -17179337, 6017714, -18705837, 12227141, -26684835, + 11344144, 2538215, -7570755}}, + {{-9433605, 6123113, 11159803, -2156608, 30016280, 14966241, -20474983, + 1485421, -629256, -15958862}, + {-26804558, 4260919, 11851389, 9658551, -32017107, 16367492, -20205425, + -13191288, 11659922, -11115118}, + {26180396, 10015009, -30844224, -8581293, 5418197, 9480663, 2231568, + -10170080, 33100372, -1306171}}, + {{15121113, -5201871, -10389905, 15427821, -27509937, -15992507, 21670947, + 4486675, -5931810, -14466380}, + {16166486, -9483733, -11104130, 6023908, -31926798, -1364923, 2340060, + -16254968, -10735770, -10039824}, + {28042865, -3557089, -12126526, 12259706, -3717498, -6945899, 6766453, + -8689599, 18036436, 5803270}}}, + {/* 23/31 */ + {{-817581, 6763912, 11803561, 1585585, 10958447, -2671165, 23855391, + 4598332, -6159431, -14117438}, + {-31031306, -14256194, 17332029, -2383520, 31312682, -5967183, 696309, + 50292, -20095739, 11763584}, + {-594563, -2514283, -32234153, 12643980, 12650761, 14811489, 665117, + -12613632, -19773211, -10713562}}, + {{30464590, -11262872, -4127476, -12734478, 19835327, -7105613, -24396175, + 2075773, -17020157, 992471}, + {18357185, -6994433, 7766382, 16342475, -29324918, 411174, 14578841, + 8080033, -11574335, -10601610}, + {19598397, 10334610, 12555054, 2555664, 18821899, -10339780, 21873263, + 16014234, 26224780, 16452269}}, + {{-30223925, 5145196, 5944548, 16385966, 3976735, 2009897, -11377804, + -7618186, -20533829, 3698650}, + {14187449, 3448569, -10636236, -10810935, -22663880, -3433596, 7268410, + -10890444, 27394301, 12015369}, + {19695761, 16087646, 28032085, 12999827, 6817792, 11427614, 20244189, + -1312777, -13259127, -3402461}}, + {{30860103, 12735208, -1888245, -4699734, -16974906, 2256940, -8166013, + 12298312, -8550524, -10393462}, + {-5719826, -11245325, -1910649, 15569035, 26642876, -7587760, -5789354, + -15118654, -4976164, 12651793}, + {-2848395, 9953421, 11531313, -5282879, 26895123, -12697089, -13118820, + -16517902, 9768698, -2533218}}, + {{-24719459, 1894651, -287698, -4704085, 15348719, -8156530, 32767513, + 12765450, 4940095, 10678226}, + {18860224, 15980149, -18987240, -1562570, -26233012, -11071856, -7843882, + 13944024, -24372348, 16582019}, + {-15504260, 4970268, -29893044, 4175593, -20993212, -2199756, -11704054, + 15444560, -11003761, 7989037}}, + {{31490452, 5568061, -2412803, 2182383, -32336847, 4531686, -32078269, + 6200206, -19686113, -14800171}, + {-17308668, -15879940, -31522777, -2831, -32887382, 16375549, 8680158, + -16371713, 28550068, -6857132}, + {-28126887, -5688091, 16837845, -1820458, -6850681, 12700016, -30039981, + 4364038, 1155602, 5988841}}, + {{21890435, -13272907, -12624011, 12154349, -7831873, 15300496, 23148983, + -4470481, 24618407, 8283181}, + {-33136107, -10512751, 9975416, 6841041, -31559793, 16356536, 3070187, + -7025928, 1466169, 10740210}, + {-1509399, -15488185, -13503385, -10655916, 32799044, 909394, -13938903, + -5779719, -32164649, -15327040}}, + {{3960823, -14267803, -28026090, -15918051, -19404858, 13146868, 15567327, + 951507, -3260321, -573935}, + {24740841, 5052253, -30094131, 8961361, 25877428, 6165135, -24368180, + 14397372, -7380369, -6144105}, + {-28888365, 3510803, -28103278, -1158478, -11238128, -10631454, -15441463, + -14453128, -1625486, -6494814}}}, + {/* 24/31 */ + {{793299, -9230478, 8836302, -6235707, -27360908, -2369593, 33152843, + -4885251, -9906200, -621852}, + {5666233, 525582, 20782575, -8038419, -24538499, 14657740, 16099374, + 1468826, -6171428, -15186581}, + {-4859255, -3779343, -2917758, -6748019, 7778750, 11688288, -30404353, + -9871238, -1558923, -9863646}}, + {{10896332, -7719704, 824275, 472601, -19460308, 3009587, 25248958, + 14783338, -30581476, -15757844}, + {10566929, 12612572, -31944212, 11118703, -12633376, 12362879, 21752402, + 8822496, 24003793, 14264025}, + {27713862, -7355973, -11008240, 9227530, 27050101, 2504721, 23886875, + -13117525, 13958495, -5732453}}, + {{-23481610, 4867226, -27247128, 3900521, 29838369, -8212291, -31889399, + -10041781, 7340521, -15410068}, + {4646514, -8011124, -22766023, -11532654, 23184553, 8566613, 31366726, + -1381061, -15066784, -10375192}, + {-17270517, 12723032, -16993061, 14878794, 21619651, -6197576, 27584817, + 3093888, -8843694, 3849921}}, + {{-9064912, 2103172, 25561640, -15125738, -5239824, 9582958, 32477045, + -9017955, 5002294, -15550259}, + {-12057553, -11177906, 21115585, -13365155, 8808712, -12030708, 16489530, + 13378448, -25845716, 12741426}, + {-5946367, 10645103, -30911586, 15390284, -3286982, -7118677, 24306472, + 15852464, 28834118, -7646072}}, + {{-17335748, -9107057, -24531279, 9434953, -8472084, -583362, -13090771, + 455841, 20461858, 5491305}, + {13669248, -16095482, -12481974, -10203039, -14569770, -11893198, + -24995986, 11293807, -28588204, -9421832}, + {28497928, 6272777, -33022994, 14470570, 8906179, -1225630, 18504674, + -14165166, 29867745, -8795943}}, + {{-16207023, 13517196, -27799630, -13697798, 24009064, -6373891, -6367600, + -13175392, 22853429, -4012011}, + {24191378, 16712145, -13931797, 15217831, 14542237, 1646131, 18603514, + -11037887, 12876623, -2112447}, + {17902668, 4518229, -411702, -2829247, 26878217, 5258055, -12860753, + 608397, 16031844, 3723494}}, + {{-28632773, 12763728, -20446446, 7577504, 33001348, -13017745, 17558842, + -7872890, 23896954, -4314245}, + {-20005381, -12011952, 31520464, 605201, 2543521, 5991821, -2945064, + 7229064, -9919646, -8826859}, + {28816045, 298879, -28165016, -15920938, 19000928, -1665890, -12680833, + -2949325, -18051778, -2082915}}, + {{16000882, -344896, 3493092, -11447198, -29504595, -13159789, 12577740, + 16041268, -19715240, 7847707}, + {10151868, 10572098, 27312476, 7922682, 14825339, 4723128, -32855931, + -6519018, -10020567, 3852848}, + {-11430470, 15697596, -21121557, -4420647, 5386314, 15063598, 16514493, + -15932110, 29330899, -15076224}}}, + {/* 25/31 */ + {{-25499735, -4378794, -15222908, -6901211, 16615731, 2051784, 3303702, + 15490, -27548796, 12314391}, + {15683520, -6003043, 18109120, -9980648, 15337968, -5997823, -16717435, + 15921866, 16103996, -3731215}, + {-23169824, -10781249, 13588192, -1628807, -3798557, -1074929, -19273607, + 5402699, -29815713, -9841101}}, + {{23190676, 2384583, -32714340, 3462154, -29903655, -1529132, -11266856, + 8911517, -25205859, 2739713}, + {21374101, -3554250, -33524649, 9874411, 15377179, 11831242, -33529904, + 6134907, 4931255, 11987849}, + {-7732, -2978858, -16223486, 7277597, 105524, -322051, -31480539, + 13861388, -30076310, 10117930}}, + {{-29501170, -10744872, -26163768, 13051539, -25625564, 5089643, -6325503, + 6704079, 12890019, 15728940}, + {-21972360, -11771379, -951059, -4418840, 14704840, 2695116, 903376, + -10428139, 12885167, 8311031}, + {-17516482, 5352194, 10384213, -13811658, 7506451, 13453191, 26423267, + 4384730, 1888765, -5435404}}, + {{-25817338, -3107312, -13494599, -3182506, 30896459, -13921729, -32251644, + -12707869, -19464434, -3340243}, + {-23607977, -2665774, -526091, 4651136, 5765089, 4618330, 6092245, + 14845197, 17151279, -9854116}, + {-24830458, -12733720, -15165978, 10367250, -29530908, -265356, 22825805, + -7087279, -16866484, 16176525}}, + {{-23583256, 6564961, 20063689, 3798228, -4740178, 7359225, 2006182, + -10363426, -28746253, -10197509}, + {-10626600, -4486402, -13320562, -5125317, 3432136, -6393229, 23632037, + -1940610, 32808310, 1099883}, + {15030977, 5768825, -27451236, -2887299, -6427378, -15361371, -15277896, + -6809350, 2051441, -15225865}}, + {{-3362323, -7239372, 7517890, 9824992, 23555850, 295369, 5148398, + -14154188, -22686354, 16633660}, + {4577086, -16752288, 13249841, -15304328, 19958763, -14537274, 18559670, + -10759549, 8402478, -9864273}, + {-28406330, -1051581, -26790155, -907698, -17212414, -11030789, 9453451, + -14980072, 17983010, 9967138}}, + {{-25762494, 6524722, 26585488, 9969270, 24709298, 1220360, -1677990, + 7806337, 17507396, 3651560}, + {-10420457, -4118111, 14584639, 15971087, -15768321, 8861010, 26556809, + -5574557, -18553322, -11357135}, + {2839101, 14284142, 4029895, 3472686, 14402957, 12689363, -26642121, + 8459447, -5605463, -7621941}}, + {{-4839289, -3535444, 9744961, 2871048, 25113978, 3187018, -25110813, + -849066, 17258084, -7977739}, + {18164541, -10595176, -17154882, -1542417, 19237078, -9745295, 23357533, + -15217008, 26908270, 12150756}, + {-30264870, -7647865, 5112249, -7036672, -1499807, -6974257, 43168, + -5537701, -32302074, 16215819}}}, + {/* 26/31 */ + {{-6898905, 9824394, -12304779, -4401089, -31397141, -6276835, 32574489, + 12532905, -7503072, -8675347}, + {-27343522, -16515468, -27151524, -10722951, 946346, 16291093, 254968, + 7168080, 21676107, -1943028}, + {21260961, -8424752, -16831886, -11920822, -23677961, 3968121, -3651949, + -6215466, -3556191, -7913075}}, + {{16544754, 13250366, -16804428, 15546242, -4583003, 12757258, -2462308, + -8680336, -18907032, -9662799}, + {-2415239, -15577728, 18312303, 4964443, -15272530, -12653564, 26820651, + 16690659, 25459437, -4564609}, + {-25144690, 11425020, 28423002, -11020557, -6144921, -15826224, 9142795, + -2391602, -6432418, -1644817}}, + {{-23104652, 6253476, 16964147, -3768872, -25113972, -12296437, -27457225, + -16344658, 6335692, 7249989}, + {-30333227, 13979675, 7503222, -12368314, -11956721, -4621693, -30272269, + 2682242, 25993170, -12478523}, + {4364628, 5930691, 32304656, -10044554, -8054781, 15091131, 22857016, + -10598955, 31820368, 15075278}}, + {{31879134, -8918693, 17258761, 90626, -8041836, -4917709, 24162788, + -9650886, -17970238, 12833045}, + {19073683, 14851414, -24403169, -11860168, 7625278, 11091125, -19619190, + 2074449, -9413939, 14905377}, + {24483667, -11935567, -2518866, -11547418, -1553130, 15355506, -25282080, + 9253129, 27628530, -7555480}}, + {{17597607, 8340603, 19355617, 552187, 26198470, -3176583, 4593324, + -9157582, -14110875, 15297016}, + {510886, 14337390, -31785257, 16638632, 6328095, 2713355, -20217417, + -11864220, 8683221, 2921426}, + {18606791, 11874196, 27155355, -5281482, -24031742, 6265446, -25178240, + -1278924, 4674690, 13890525}}, + {{13609624, 13069022, -27372361, -13055908, 24360586, 9592974, 14977157, + 9835105, 4389687, 288396}, + {9922506, -519394, 13613107, 5883594, -18758345, -434263, -12304062, + 8317628, 23388070, 16052080}, + {12720016, 11937594, -31970060, -5028689, 26900120, 8561328, -20155687, + -11632979, -14754271, -10812892}}, + {{15961858, 14150409, 26716931, -665832, -22794328, 13603569, 11829573, + 7467844, -28822128, 929275}, + {11038231, -11582396, -27310482, -7316562, -10498527, -16307831, + -23479533, -9371869, -21393143, 2465074}, + {20017163, -4323226, 27915242, 1529148, 12396362, 15675764, 13817261, + -9658066, 2463391, -4622140}}, + {{-16358878, -12663911, -12065183, 4996454, -1256422, 1073572, 9583558, + 12851107, 4003896, 12673717}, + {-1731589, -15155870, -3262930, 16143082, 19294135, 13385325, 14741514, + -9103726, 7903886, 2348101}, + {24536016, -16515207, 12715592, -3862155, 1511293, 10047386, -3842346, + -7129159, -28377538, 10048127}}}, + {/* 27/31 */ + {{-12622226, -6204820, 30718825, 2591312, -10617028, 12192840, 18873298, + -7297090, -32297756, 15221632}, + {-26478122, -11103864, 11546244, -1852483, 9180880, 7656409, -21343950, + 2095755, 29769758, 6593415}, + {-31994208, -2907461, 4176912, 3264766, 12538965, -868111, 26312345, + -6118678, 30958054, 8292160}}, + {{31429822, -13959116, 29173532, 15632448, 12174511, -2760094, 32808831, + 3977186, 26143136, -3148876}, + {22648901, 1402143, -22799984, 13746059, 7936347, 365344, -8668633, + -1674433, -3758243, -2304625}, + {-15491917, 8012313, -2514730, -12702462, -23965846, -10254029, -1612713, + -1535569, -16664475, 8194478}}, + {{27338066, -7507420, -7414224, 10140405, -19026427, -6589889, 27277191, + 8855376, 28572286, 3005164}, + {26287124, 4821776, 25476601, -4145903, -3764513, -15788984, -18008582, + 1182479, -26094821, -13079595}, + {-7171154, 3178080, 23970071, 6201893, -17195577, -4489192, -21876275, + -13982627, 32208683, -1198248}}, + {{-16657702, 2817643, -10286362, 14811298, 6024667, 13349505, -27315504, + -10497842, -27672585, -11539858}, + {15941029, -9405932, -21367050, 8062055, 31876073, -238629, -15278393, + -1444429, 15397331, -4130193}, + {8934485, -13485467, -23286397, -13423241, -32446090, 14047986, 31170398, + -1441021, -27505566, 15087184}}, + {{-18357243, -2156491, 24524913, -16677868, 15520427, -6360776, -15502406, + 11461896, 16788528, -5868942}, + {-1947386, 16013773, 21750665, 3714552, -17401782, -16055433, -3770287, + -10323320, 31322514, -11615635}, + {21426655, -5650218, -13648287, -5347537, -28812189, -4920970, -18275391, + -14621414, 13040862, -12112948}}, + {{11293895, 12478086, -27136401, 15083750, -29307421, 14748872, 14555558, + -13417103, 1613711, 4896935}, + {-25894883, 15323294, -8489791, -8057900, 25967126, -13425460, 2825960, + -4897045, -23971776, -11267415}, + {-15924766, -5229880, -17443532, 6410664, 3622847, 10243618, 20615400, + 12405433, -23753030, -8436416}}, + {{-7091295, 12556208, -20191352, 9025187, -17072479, 4333801, 4378436, + 2432030, 23097949, -566018}, + {4565804, -16025654, 20084412, -7842817, 1724999, 189254, 24767264, + 10103221, -18512313, 2424778}, + {366633, -11976806, 8173090, -6890119, 30788634, 5745705, -7168678, + 1344109, -3642553, 12412659}}, + {{-24001791, 7690286, 14929416, -168257, -32210835, -13412986, 24162697, + -15326504, -3141501, 11179385}, + {18289522, -14724954, 8056945, 16430056, -21729724, 7842514, -6001441, + -1486897, -18684645, -11443503}, + {476239, 6601091, -6152790, -9723375, 17503545, -4863900, 27672959, + 13403813, 11052904, 5219329}}}, + {/* 28/31 */ + {{20678546, -8375738, -32671898, 8849123, -5009758, 14574752, 31186971, + -3973730, 9014762, -8579056}, + {-13644050, -10350239, -15962508, 5075808, -1514661, -11534600, -33102500, + 9160280, 8473550, -3256838}, + {24900749, 14435722, 17209120, -15292541, -22592275, 9878983, -7689309, + -16335821, -24568481, 11788948}}, + {{-3118155, -11395194, -13802089, 14797441, 9652448, -6845904, -20037437, + 10410733, -24568470, -1458691}, + {-15659161, 16736706, -22467150, 10215878, -9097177, 7563911, 11871841, + -12505194, -18513325, 8464118}, + {-23400612, 8348507, -14585951, -861714, -3950205, -6373419, 14325289, + 8628612, 33313881, -8370517}}, + {{-20186973, -4967935, 22367356, 5271547, -1097117, -4788838, -24805667, + -10236854, -8940735, -5818269}, + {-6948785, -1795212, -32625683, -16021179, 32635414, -7374245, 15989197, + -12838188, 28358192, -4253904}, + {-23561781, -2799059, -32351682, -1661963, -9147719, 10429267, -16637684, + 4072016, -5351664, 5596589}}, + {{-28236598, -3390048, 12312896, 6213178, 3117142, 16078565, 29266239, + 2557221, 1768301, 15373193}, + {-7243358, -3246960, -4593467, -7553353, -127927, -912245, -1090902, + -4504991, -24660491, 3442910}, + {-30210571, 5124043, 14181784, 8197961, 18964734, -11939093, 22597931, + 7176455, -18585478, 13365930}}, + {{-7877390, -1499958, 8324673, 4690079, 6261860, 890446, 24538107, + -8570186, -9689599, -3031667}, + {25008904, -10771599, -4305031, -9638010, 16265036, 15721635, 683793, + -11823784, 15723479, -15163481}, + {-9660625, 12374379, -27006999, -7026148, -7724114, -12314514, 11879682, + 5400171, 519526, -1235876}}, + {{22258397, -16332233, -7869817, 14613016, -22520255, -2950923, -20353881, + 7315967, 16648397, 7605640}, + {-8081308, -8464597, -8223311, 9719710, 19259459, -15348212, 23994942, + -5281555, -9468848, 4763278}, + {-21699244, 9220969, -15730624, 1084137, -25476107, -2852390, 31088447, + -7764523, -11356529, 728112}}, + {{26047220, -11751471, -6900323, -16521798, 24092068, 9158119, -4273545, + -12555558, -29365436, -5498272}, + {17510331, -322857, 5854289, 8403524, 17133918, -3112612, -28111007, + 12327945, 10750447, 10014012}, + {-10312768, 3936952, 9156313, -8897683, 16498692, -994647, -27481051, + -666732, 3424691, 7540221}}, + {{30322361, -6964110, 11361005, -4143317, 7433304, 4989748, -7071422, + -16317219, -9244265, 15258046}, + {13054562, -2779497, 19155474, 469045, -12482797, 4566042, 5631406, + 2711395, 1062915, -5136345}, + {-19240248, -11254599, -29509029, -7499965, -5835763, 13005411, -6066489, + 12194497, 32960380, 1459310}}}, + {/* 29/31 */ + {{19852034, 7027924, 23669353, 10020366, 8586503, -6657907, 394197, + -6101885, 18638003, -11174937}, + {31395534, 15098109, 26581030, 8030562, -16527914, -5007134, 9012486, + -7584354, -6643087, -5442636}, + {-9192165, -2347377, -1997099, 4529534, 25766844, 607986, -13222, 9677543, + -32294889, -6456008}}, + {{-2444496, -149937, 29348902, 8186665, 1873760, 12489863, -30934579, + -7839692, -7852844, -8138429}, + {-15236356, -15433509, 7766470, 746860, 26346930, -10221762, -27333451, + 10754588, -9431476, 5203576}, + {31834314, 14135496, -770007, 5159118, 20917671, -16768096, -7467973, + -7337524, 31809243, 7347066}}, + {{-9606723, -11874240, 20414459, 13033986, 13716524, -11691881, 19797970, + -12211255, 15192876, -2087490}, + {-12663563, -2181719, 1168162, -3804809, 26747877, -14138091, 10609330, + 12694420, 33473243, -13382104}, + {33184999, 11180355, 15832085, -11385430, -1633671, 225884, 15089336, + -11023903, -6135662, 14480053}}, + {{31308717, -5619998, 31030840, -1897099, 15674547, -6582883, 5496208, + 13685227, 27595050, 8737275}, + {-20318852, -15150239, 10933843, -16178022, 8335352, -7546022, -31008351, + -12610604, 26498114, 66511}, + {22644454, -8761729, -16671776, 4884562, -3105614, -13559366, 30540766, + -4286747, -13327787, -7515095}}, + {{-28017847, 9834845, 18617207, -2681312, -3401956, -13307506, 8205540, + 13585437, -17127465, 15115439}, + {23711543, -672915, 31206561, -8362711, 6164647, -9709987, -33535882, + -1426096, 8236921, 16492939}, + {-23910559, -13515526, -26299483, -4503841, 25005590, -7687270, 19574902, + 10071562, 6708380, -6222424}}, + {{2101391, -4930054, 19702731, 2367575, -15427167, 1047675, 5301017, + 9328700, 29955601, -11678310}, + {3096359, 9271816, -21620864, -15521844, -14847996, -7592937, -25892142, + -12635595, -9917575, 6216608}, + {-32615849, 338663, -25195611, 2510422, -29213566, -13820213, 24822830, + -6146567, -26767480, 7525079}}, + {{-23066649, -13985623, 16133487, -7896178, -3389565, 778788, -910336, + -2782495, -19386633, 11994101}, + {21691500, -13624626, -641331, -14367021, 3285881, -3483596, -25064666, + 9718258, -7477437, 13381418}, + {18445390, -4202236, 14979846, 11622458, -1727110, -3582980, 23111648, + -6375247, 28535282, 15779576}}, + {{30098053, 3089662, -9234387, 16662135, -21306940, 11308411, -14068454, + 12021730, 9955285, -16303356}, + {9734894, -14576830, -7473633, -9138735, 2060392, 11313496, -18426029, + 9924399, 20194861, 13380996}, + {-26378102, -7965207, -22167821, 15789297, -18055342, -6168792, -1984914, + 15707771, 26342023, 10146099}}}, + {/* 30/31 */ + {{-26016874, -219943, 21339191, -41388, 19745256, -2878700, -29637280, + 2227040, 21612326, -545728}, + {-13077387, 1184228, 23562814, -5970442, -20351244, -6348714, 25764461, + 12243797, -20856566, 11649658}, + {-10031494, 11262626, 27384172, 2271902, 26947504, -15997771, 39944, + 6114064, 33514190, 2333242}}, + {{-21433588, -12421821, 8119782, 7219913, -21830522, -9016134, -6679750, + -12670638, 24350578, -13450001}, + {-4116307, -11271533, -23886186, 4843615, -30088339, 690623, -31536088, + -10406836, 8317860, 12352766}, + {18200138, -14475911, -33087759, -2696619, -23702521, -9102511, -23552096, + -2287550, 20712163, 6719373}}, + {{26656208, 6075253, -7858556, 1886072, -28344043, 4262326, 11117530, + -3763210, 26224235, -3297458}, + {-17168938, -14854097, -3395676, -16369877, -19954045, 14050420, 21728352, + 9493610, 18620611, -16428628}, + {-13323321, 13325349, 11432106, 5964811, 18609221, 6062965, -5269471, + -9725556, -30701573, -16479657}}, + {{-23860538, -11233159, 26961357, 1640861, -32413112, -16737940, 12248509, + -5240639, 13735342, 1934062}, + {25089769, 6742589, 17081145, -13406266, 21909293, -16067981, -15136294, + -3765346, -21277997, 5473616}, + {31883677, -7961101, 1083432, -11572403, 22828471, 13290673, -7125085, + 12469656, 29111212, -5451014}}, + {{24244947, -15050407, -26262976, 2791540, -14997599, 16666678, 24367466, + 6388839, -10295587, 452383}, + {-25640782, -3417841, 5217916, 16224624, 19987036, -4082269, -24236251, + -5915248, 15766062, 8407814}, + {-20406999, 13990231, 15495425, 16395525, 5377168, 15166495, -8917023, + -4388953, -8067909, 2276718}}, + {{30157918, 12924066, -17712050, 9245753, 19895028, 3368142, -23827587, + 5096219, 22740376, -7303417}, + {2041139, -14256350, 7783687, 13876377, -25946985, -13352459, 24051124, + 13742383, -15637599, 13295222}, + {33338237, -8505733, 12532113, 7977527, 9106186, -1715251, -17720195, + -4612972, -4451357, -14669444}}, + {{-20045281, 5454097, -14346548, 6447146, 28862071, 1883651, -2469266, + -4141880, 7770569, 9620597}, + {23208068, 7979712, 33071466, 8149229, 1758231, -10834995, 30945528, + -1694323, -33502340, -14767970}, + {1439958, -16270480, -1079989, -793782, 4625402, 10647766, -5043801, + 1220118, 30494170, -11440799}}, + {{-5037580, -13028295, -2970559, -3061767, 15640974, -6701666, -26739026, + 926050, -1684339, -13333647}, + {13908495, -3549272, 30919928, -6273825, -21521863, 7989039, 9021034, + 9078865, 3353509, 4033511}, + {-29663431, -15113610, 32259991, -344482, 24295849, -12912123, 23161163, + 8839127, 27485041, 7356032}}}, { /* 31/31 */ - { - { 9661027, 705443, 11980065, -5370154, -1628543, 14661173, -6346142, 2625015, 28431036, -16771834 }, - { -23839233, -8311415, -25945511, 7480958, -17681669, -8354183, -22545972, 14150565, 15970762, 4099461 }, - { 29262576, 16756590, 26350592, -8793563, 8529671, -11208050, 13617293, -9937143, 11465739, 8317062 } - }, - { - { -25493081, -6962928, 32500200, -9419051, -23038724, -2302222, 14898637, 3848455, 20969334, -5157516 }, - { -20384450, -14347713, -18336405, 13884722, -33039454, 2842114, -21610826, -3649888, 11177095, 14989547 }, - { -24496721, -11716016, 16959896, 2278463, 12066309, 10137771, 13515641, 2581286, -28487508, 9930240 } - }, - { - { -17751622, -2097826, 16544300, -13009300, -15914807, -14949081, 18345767, -13403753, 16291481, -5314038 }, - { -33229194, 2553288, 32678213, 9875984, 8534129, 6889387, -9676774, 6957617, 4368891, 9788741 }, - { 16660756, 7281060, -10830758, 12911820, 20108584, -8101676, -21722536, -8613148, 16250552, -11111103 } - }, - { - { -19765507, 2390526, -16551031, 14161980, 1905286, 6414907, 4689584, 10604807, -30190403, 4782747 }, - { -1354539, 14736941, -7367442, -13292886, 7710542, -14155590, -9981571, 4383045, 22546403, 437323 }, - { 31665577, -12180464, -16186830, 1491339, -18368625, 3294682, 27343084, 2786261, -30633590, -14097016 } - }, - { - { -14467279, -683715, -33374107, 7448552, 19294360, 14334329, -19690631, 2355319, -19284671, -6114373 }, - { 15121312, -15796162, 6377020, -6031361, -10798111, -12957845, 18952177, 15496498, -29380133, 11754228 }, - { -2637277, -13483075, 8488727, -14303896, 12728761, -1622493, 7141596, 11724556, 22761615, -10134141 } - }, - { - { 16918416, 11729663, -18083579, 3022987, -31015732, -13339659, -28741185, -12227393, 32851222, 11717399 }, - { 11166634, 7338049, -6722523, 4531520, -29468672, -7302055, 31474879, 3483633, -1193175, -4030831 }, - { -185635, 9921305, 31456609, -13536438, -12013818, 13348923, 33142652, 6546660, -19985279, -3948376 } - }, - { - { -32460596, 11266712, -11197107, -7899103, 31703694, 3855903, -8537131, -12833048, -30772034, -15486313 }, - { -18006477, 12709068, 3991746, -6479188, -21491523, -10550425, -31135347, -16049879, 10928917, 3011958 }, - { -6957757, -15594337, 31696059, 334240, 29576716, 14796075, -30831056, -12805180, 18008031, 10258577 } - }, - { - { -22448644, 15655569, 7018479, -4410003, -30314266, -1201591, -1853465, 1367120, 25127874, 6671743 }, - { 29701166, -14373934, -10878120, 9279288, -17568, 13127210, 21382910, 11042292, 25838796, 4642684 }, - { -20430234, 14955537, -24126347, 8124619, -5369288, -5990470, 30468147, -13900640, 18423289, 4177476 } + {{9661027, 705443, 11980065, -5370154, -1628543, 14661173, -6346142, 2625015, + 28431036, -16771834}, + {-23839233, -8311415, -25945511, 7480958, -17681669, -8354183, -22545972, + 14150565, 15970762, 4099461}, + {29262576, 16756590, 26350592, -8793563, 8529671, -11208050, 13617293, + -9937143, 11465739, 8317062}}, + {{-25493081, -6962928, 32500200, -9419051, -23038724, -2302222, 14898637, + 3848455, 20969334, -5157516}, + {-20384450, -14347713, -18336405, 13884722, -33039454, 2842114, + -21610826, -3649888, 11177095, 14989547}, + {-24496721, -11716016, 16959896, 2278463, 12066309, 10137771, 13515641, + 2581286, -28487508, 9930240}}, + {{-17751622, -2097826, 16544300, -13009300, -15914807, -14949081, + 18345767, -13403753, 16291481, -5314038}, + {-33229194, 2553288, 32678213, 9875984, 8534129, 6889387, -9676774, + 6957617, 4368891, 9788741}, + {16660756, 7281060, -10830758, 12911820, 20108584, -8101676, -21722536, + -8613148, 16250552, -11111103}}, + {{-19765507, 2390526, -16551031, 14161980, 1905286, 6414907, 4689584, + 10604807, -30190403, 4782747}, + {-1354539, 14736941, -7367442, -13292886, 7710542, -14155590, -9981571, + 4383045, 22546403, 437323}, + {31665577, -12180464, -16186830, 1491339, -18368625, 3294682, 27343084, + 2786261, -30633590, -14097016}}, + {{-14467279, -683715, -33374107, 7448552, 19294360, 14334329, -19690631, + 2355319, -19284671, -6114373}, + {15121312, -15796162, 6377020, -6031361, -10798111, -12957845, 18952177, + 15496498, -29380133, 11754228}, + {-2637277, -13483075, 8488727, -14303896, 12728761, -1622493, 7141596, + 11724556, 22761615, -10134141}}, + {{16918416, 11729663, -18083579, 3022987, -31015732, -13339659, -28741185, + -12227393, 32851222, 11717399}, + {11166634, 7338049, -6722523, 4531520, -29468672, -7302055, 31474879, + 3483633, -1193175, -4030831}, + {-185635, 9921305, 31456609, -13536438, -12013818, 13348923, 33142652, + 6546660, -19985279, -3948376}}, + {{-32460596, 11266712, -11197107, -7899103, 31703694, 3855903, -8537131, + -12833048, -30772034, -15486313}, + {-18006477, 12709068, 3991746, -6479188, -21491523, -10550425, -31135347, + -16049879, 10928917, 3011958}, + {-6957757, -15594337, 31696059, 334240, 29576716, 14796075, -30831056, + -12805180, 18008031, 10258577}}, + { + {-22448644, 15655569, 7018479, -4410003, -30314266, + -1201591, -1853465, 1367120, 25127874, 6671743}, + {29701166, -14373934, -10878120, 9279288, -17568, + 13127210, 21382910, 11042292, 25838796, 4642684}, + { + -20430234, 14955537, -24126347, 8124619, -5369288, -5990470, 30468147, + -13900640, 18423289, 4177476 + } } } diff --git a/crypto/curve25519/ref10/fe_25_5/base2.h b/crypto/curve25519/ref10/fe_25_5/base2.h index 90a1457ea..321ea658e 100644 --- a/crypto/curve25519/ref10/fe_25_5/base2.h +++ b/crypto/curve25519/ref10/fe_25_5/base2.h @@ -1,40 +1,52 @@ -{ - { 25967493, -14356035, 29566456, 3660896, -12694345, 4014787, 27544626, -11754271, -6079156, 2047605 }, - { -12545711, 934262, -2722910, 3049990, -727428, 9406986, 12720692, 5043384, 19500929, -15469378 }, - { -8738181, 4489570, 9688441, -14785194, 10184609, -12363380, 29287919, 11864899, -24514362, -4438546 } -}, -{ - { 15636291, -9688557, 24204773, -7912398, 616977, -16685262, 27787600, -14772189, 28944400, -1550024 }, - { 16568933, 4717097, -11556148, -1102322, 15682896, -11807043, 16354577, -11775962, 7689662, 11199574 }, - { 30464156, -5976125, -11779434, -15670865, 23220365, 15915852, 7512774, 10017326, -17749093, -9920357 } -}, -{ - { 10861363, 11473154, 27284546, 1981175, -30064349, 12577861, 32867885, 14515107, -15438304, 10819380 }, - { 4708026, 6336745, 20377586, 9066809, -11272109, 6594696, -25653668, 12483688, -12668491, 5581306 }, - { 19563160, 16186464, -29386857, 4097519, 10237984, -4348115, 28542350, 13850243, -23678021, -15815942 } -}, -{ - { 5153746, 9909285, 1723747, -2777874, 30523605, 5516873, 19480852, 5230134, -23952439, -15175766 }, - { -30269007, -3463509, 7665486, 10083793, 28475525, 1649722, 20654025, 16520125, 30598449, 7715701 }, - { 28881845, 14381568, 9657904, 3680757, -20181635, 7843316, -31400660, 1370708, 29794553, -1409300 } -}, -{ - { -22518993, -6692182, 14201702, -8745502, -23510406, 8844726, 18474211, -1361450, -13062696, 13821877 }, - { -6455177, -7839871, 3374702, -4740862, -27098617, -10571707, 31655028, -7212327, 18853322, -14220951 }, - { 4566830, -12963868, -28974889, -12240689, -7602672, -2830569, -8514358, -10431137, 2207753, -3209784 } -}, -{ - { -25154831, -4185821, 29681144, 7868801, -6854661, -9423865, -12437364, -663000, -31111463, -16132436 }, - { 25576264, -2703214, 7349804, -11814844, 16472782, 9300885, 3844789, 15725684, 171356, 6466918 }, - { 23103977, 13316479, 9739013, -16149481, 817875, -15038942, 8965339, -14088058, -30714912, 16193877 } -}, -{ - { -33521811, 3180713, -2394130, 14003687, -16903474, -16270840, 17238398, 4729455, -18074513, 9256800 }, - { -25182317, -4174131, 32336398, 5036987, -21236817, 11360617, 22616405, 9761698, -19827198, 630305 }, - { -13720693, 2639453, -24237460, -7406481, 9494427, -5774029, -6554551, -15960994, -2449256, -14291300 } -}, -{ - { -3151181, -5046075, 9282714, 6866145, -31907062, -863023, -18940575, 15033784, 25105118, -7894876 }, - { -24326370, 15950226, -31801215, -14592823, -11662737, -5090925, 1573892, -2625887, 2198790, -15804619 }, - { -3099351, 10324967, -2241613, 7453183, -5446979, -2735503, -13812022, -16236442, -32461234, -12290683 } +{{25967493, -14356035, 29566456, 3660896, -12694345, 4014787, 27544626, + -11754271, -6079156, 2047605}, + {-12545711, 934262, -2722910, 3049990, -727428, 9406986, 12720692, 5043384, + 19500929, -15469378}, + {-8738181, 4489570, 9688441, -14785194, 10184609, -12363380, 29287919, + 11864899, -24514362, -4438546}}, + {{15636291, -9688557, 24204773, -7912398, 616977, -16685262, 27787600, + -14772189, 28944400, -1550024}, + {16568933, 4717097, -11556148, -1102322, 15682896, -11807043, 16354577, + -11775962, 7689662, 11199574}, + {30464156, -5976125, -11779434, -15670865, 23220365, 15915852, 7512774, + 10017326, -17749093, -9920357}}, + {{10861363, 11473154, 27284546, 1981175, -30064349, 12577861, 32867885, + 14515107, -15438304, 10819380}, + {4708026, 6336745, 20377586, 9066809, -11272109, 6594696, -25653668, + 12483688, -12668491, 5581306}, + {19563160, 16186464, -29386857, 4097519, 10237984, -4348115, 28542350, + 13850243, -23678021, -15815942}}, + {{5153746, 9909285, 1723747, -2777874, 30523605, 5516873, 19480852, 5230134, + -23952439, -15175766}, + {-30269007, -3463509, 7665486, 10083793, 28475525, 1649722, 20654025, + 16520125, 30598449, 7715701}, + {28881845, 14381568, 9657904, 3680757, -20181635, 7843316, -31400660, + 1370708, 29794553, -1409300}}, + {{-22518993, -6692182, 14201702, -8745502, -23510406, 8844726, 18474211, + -1361450, -13062696, 13821877}, + {-6455177, -7839871, 3374702, -4740862, -27098617, -10571707, 31655028, + -7212327, 18853322, -14220951}, + {4566830, -12963868, -28974889, -12240689, -7602672, -2830569, -8514358, + -10431137, 2207753, -3209784}}, + {{-25154831, -4185821, 29681144, 7868801, -6854661, -9423865, -12437364, + -663000, -31111463, -16132436}, + {25576264, -2703214, 7349804, -11814844, 16472782, 9300885, 3844789, + 15725684, 171356, 6466918}, + {23103977, 13316479, 9739013, -16149481, 817875, -15038942, 8965339, + -14088058, -30714912, 16193877}}, + {{-33521811, 3180713, -2394130, 14003687, -16903474, -16270840, 17238398, + 4729455, -18074513, 9256800}, + {-25182317, -4174131, 32336398, 5036987, -21236817, 11360617, 22616405, + 9761698, -19827198, 630305}, + {-13720693, 2639453, -24237460, -7406481, 9494427, -5774029, -6554551, + -15960994, -2449256, -14291300}}, +{ + {-3151181, -5046075, 9282714, 6866145, -31907062, + -863023, -18940575, 15033784, 25105118, -7894876}, + {-24326370, 15950226, -31801215, -14592823, -11662737, + -5090925, 1573892, -2625887, 2198790, -15804619}, + { + -3099351, 10324967, -2241613, 7453183, -5446979, -2735503, -13812022, + -16236442, -32461234, -12290683 + } } diff --git a/crypto/curve25519/ref10/fe_25_5/constants.h b/crypto/curve25519/ref10/fe_25_5/constants.h index 3dc9156a3..e24857dce 100644 --- a/crypto/curve25519/ref10/fe_25_5/constants.h +++ b/crypto/curve25519/ref10/fe_25_5/constants.h @@ -1,20 +1,18 @@ -/* 37095705934669439343138083508754565189542113879843219016388785533085940283555 */ -static const fe25519 d = { - -10913610, 13857413, -15372611, 6949391, 114729, -8787816, -6275908, -3247719, -18696448, -12055116 -}; +/* 37095705934669439343138083508754565189542113879843219016388785533085940283555 + */ +static const fe25519 d = {-10913610, 13857413, -15372611, 6949391, 114729, + -8787816, -6275908, -3247719, -18696448, -12055116}; /* 2 * d = * 16295367250680780974490674513165176452449235426866156013048779062215315747161 */ -static const fe25519 d2 = { - -21827239, -5839606, -30745221, 13898782, 229458, 15978800, -12551817, -6495438, 29715968, 9444199 }; +static const fe25519 d2 = {-21827239, -5839606, -30745221, 13898782, 229458, + 15978800, -12551817, -6495438, 29715968, 9444199}; /* sqrt(-1) */ -static const fe25519 sqrtm1 = { - -32595792, -7943725, 9377950, 3500415, 12389472, -272473, -25146209, -2005654, 326686, 11406482 -}; +static const fe25519 sqrtm1 = {-32595792, -7943725, 9377950, 3500415, + 12389472, -272473, -25146209, -2005654, + 326686, 11406482}; /* A = 486662 */ -static const fe25519 curve25519_A = { - 486662, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; +static const fe25519 curve25519_A = {486662, 0, 0, 0, 0, 0, 0, 0, 0, 0}; diff --git a/crypto/curve25519/ref10/fe_25_5/fe.h b/crypto/curve25519/ref10/fe_25_5/fe.h index f216669e4..25cc89d28 100644 --- a/crypto/curve25519/ref10/fe_25_5/fe.h +++ b/crypto/curve25519/ref10/fe_25_5/fe.h @@ -5,70 +5,70 @@ void fe25519_frombytes(fe25519 h, const unsigned char *s) { - int64_t h0 = load_4(s); - int64_t h1 = load_3(s + 4) << 6; - int64_t h2 = load_3(s + 7) << 5; - int64_t h3 = load_3(s + 10) << 3; - int64_t h4 = load_3(s + 13) << 2; - int64_t h5 = load_4(s + 16); - int64_t h6 = load_3(s + 20) << 7; - int64_t h7 = load_3(s + 23) << 5; - int64_t h8 = load_3(s + 26) << 4; - int64_t h9 = (load_3(s + 29) & 8388607) << 2; - - int64_t carry0; - int64_t carry1; - int64_t carry2; - int64_t carry3; - int64_t carry4; - int64_t carry5; - int64_t carry6; - int64_t carry7; - int64_t carry8; - int64_t carry9; - - carry9 = (h9 + (int64_t)(1L << 24)) >> 25; - h0 += carry9 * 19; - h9 -= carry9 * ((uint64_t) 1L << 25); - carry1 = (h1 + (int64_t)(1L << 24)) >> 25; - h2 += carry1; - h1 -= carry1 * ((uint64_t) 1L << 25); - carry3 = (h3 + (int64_t)(1L << 24)) >> 25; - h4 += carry3; - h3 -= carry3 * ((uint64_t) 1L << 25); - carry5 = (h5 + (int64_t)(1L << 24)) >> 25; - h6 += carry5; - h5 -= carry5 * ((uint64_t) 1L << 25); - carry7 = (h7 + (int64_t)(1L << 24)) >> 25; - h8 += carry7; - h7 -= carry7 * ((uint64_t) 1L << 25); - - carry0 = (h0 + (int64_t)(1L << 25)) >> 26; - h1 += carry0; - h0 -= carry0 * ((uint64_t) 1L << 26); - carry2 = (h2 + (int64_t)(1L << 25)) >> 26; - h3 += carry2; - h2 -= carry2 * ((uint64_t) 1L << 26); - carry4 = (h4 + (int64_t)(1L << 25)) >> 26; - h5 += carry4; - h4 -= carry4 * ((uint64_t) 1L << 26); - carry6 = (h6 + (int64_t)(1L << 25)) >> 26; - h7 += carry6; - h6 -= carry6 * ((uint64_t) 1L << 26); - carry8 = (h8 + (int64_t)(1L << 25)) >> 26; - h9 += carry8; - h8 -= carry8 * ((uint64_t) 1L << 26); - - h[0] = (int32_t) h0; - h[1] = (int32_t) h1; - h[2] = (int32_t) h2; - h[3] = (int32_t) h3; - h[4] = (int32_t) h4; - h[5] = (int32_t) h5; - h[6] = (int32_t) h6; - h[7] = (int32_t) h7; - h[8] = (int32_t) h8; - h[9] = (int32_t) h9; + int64_t h0 = load_4(s); + int64_t h1 = load_3(s + 4) << 6; + int64_t h2 = load_3(s + 7) << 5; + int64_t h3 = load_3(s + 10) << 3; + int64_t h4 = load_3(s + 13) << 2; + int64_t h5 = load_4(s + 16); + int64_t h6 = load_3(s + 20) << 7; + int64_t h7 = load_3(s + 23) << 5; + int64_t h8 = load_3(s + 26) << 4; + int64_t h9 = (load_3(s + 29) & 8388607) << 2; + + int64_t carry0; + int64_t carry1; + int64_t carry2; + int64_t carry3; + int64_t carry4; + int64_t carry5; + int64_t carry6; + int64_t carry7; + int64_t carry8; + int64_t carry9; + + carry9 = (h9 + (int64_t)(1L << 24)) >> 25; + h0 += carry9 * 19; + h9 -= carry9 * ((uint64_t)1L << 25); + carry1 = (h1 + (int64_t)(1L << 24)) >> 25; + h2 += carry1; + h1 -= carry1 * ((uint64_t)1L << 25); + carry3 = (h3 + (int64_t)(1L << 24)) >> 25; + h4 += carry3; + h3 -= carry3 * ((uint64_t)1L << 25); + carry5 = (h5 + (int64_t)(1L << 24)) >> 25; + h6 += carry5; + h5 -= carry5 * ((uint64_t)1L << 25); + carry7 = (h7 + (int64_t)(1L << 24)) >> 25; + h8 += carry7; + h7 -= carry7 * ((uint64_t)1L << 25); + + carry0 = (h0 + (int64_t)(1L << 25)) >> 26; + h1 += carry0; + h0 -= carry0 * ((uint64_t)1L << 26); + carry2 = (h2 + (int64_t)(1L << 25)) >> 26; + h3 += carry2; + h2 -= carry2 * ((uint64_t)1L << 26); + carry4 = (h4 + (int64_t)(1L << 25)) >> 26; + h5 += carry4; + h4 -= carry4 * ((uint64_t)1L << 26); + carry6 = (h6 + (int64_t)(1L << 25)) >> 26; + h7 += carry6; + h6 -= carry6 * ((uint64_t)1L << 26); + carry8 = (h8 + (int64_t)(1L << 25)) >> 26; + h9 += carry8; + h8 -= carry8 * ((uint64_t)1L << 26); + + h[0] = (int32_t)h0; + h[1] = (int32_t)h1; + h[2] = (int32_t)h2; + h[3] = (int32_t)h3; + h[4] = (int32_t)h4; + h[5] = (int32_t)h5; + h[6] = (int32_t)h6; + h[7] = (int32_t)h7; + h[8] = (int32_t)h8; + h[9] = (int32_t)h9; } /* @@ -99,76 +99,77 @@ fe25519_frombytes(fe25519 h, const unsigned char *s) static void fe25519_reduce(fe25519 h, const fe25519 f) { - int32_t h0 = f[0]; - int32_t h1 = f[1]; - int32_t h2 = f[2]; - int32_t h3 = f[3]; - int32_t h4 = f[4]; - int32_t h5 = f[5]; - int32_t h6 = f[6]; - int32_t h7 = f[7]; - int32_t h8 = f[8]; - int32_t h9 = f[9]; - - int32_t q; - int32_t carry0, carry1, carry2, carry3, carry4, carry5, carry6, carry7, carry8, carry9; - - q = (19 * h9 + ((uint32_t) 1L << 24)) >> 25; - q = (h0 + q) >> 26; - q = (h1 + q) >> 25; - q = (h2 + q) >> 26; - q = (h3 + q) >> 25; - q = (h4 + q) >> 26; - q = (h5 + q) >> 25; - q = (h6 + q) >> 26; - q = (h7 + q) >> 25; - q = (h8 + q) >> 26; - q = (h9 + q) >> 25; - - /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */ - h0 += 19 * q; - /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */ - - carry0 = h0 >> 26; - h1 += carry0; - h0 -= carry0 * ((uint32_t) 1L << 26); - carry1 = h1 >> 25; - h2 += carry1; - h1 -= carry1 * ((uint32_t) 1L << 25); - carry2 = h2 >> 26; - h3 += carry2; - h2 -= carry2 * ((uint32_t) 1L << 26); - carry3 = h3 >> 25; - h4 += carry3; - h3 -= carry3 * ((uint32_t) 1L << 25); - carry4 = h4 >> 26; - h5 += carry4; - h4 -= carry4 * ((uint32_t) 1L << 26); - carry5 = h5 >> 25; - h6 += carry5; - h5 -= carry5 * ((uint32_t) 1L << 25); - carry6 = h6 >> 26; - h7 += carry6; - h6 -= carry6 * ((uint32_t) 1L << 26); - carry7 = h7 >> 25; - h8 += carry7; - h7 -= carry7 * ((uint32_t) 1L << 25); - carry8 = h8 >> 26; - h9 += carry8; - h8 -= carry8 * ((uint32_t) 1L << 26); - carry9 = h9 >> 25; - h9 -= carry9 * ((uint32_t) 1L << 25); - - h[0] = h0; - h[1] = h1; - h[2] = h2; - h[3] = h3; - h[4] = h4; - h[5] = h5; - h[6] = h6; - h[7] = h7; - h[8] = h8; - h[9] = h9; + int32_t h0 = f[0]; + int32_t h1 = f[1]; + int32_t h2 = f[2]; + int32_t h3 = f[3]; + int32_t h4 = f[4]; + int32_t h5 = f[5]; + int32_t h6 = f[6]; + int32_t h7 = f[7]; + int32_t h8 = f[8]; + int32_t h9 = f[9]; + + int32_t q; + int32_t carry0, carry1, carry2, carry3, carry4, carry5, carry6, carry7, + carry8, carry9; + + q = (19 * h9 + ((uint32_t)1L << 24)) >> 25; + q = (h0 + q) >> 26; + q = (h1 + q) >> 25; + q = (h2 + q) >> 26; + q = (h3 + q) >> 25; + q = (h4 + q) >> 26; + q = (h5 + q) >> 25; + q = (h6 + q) >> 26; + q = (h7 + q) >> 25; + q = (h8 + q) >> 26; + q = (h9 + q) >> 25; + + /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */ + h0 += 19 * q; + /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */ + + carry0 = h0 >> 26; + h1 += carry0; + h0 -= carry0 * ((uint32_t)1L << 26); + carry1 = h1 >> 25; + h2 += carry1; + h1 -= carry1 * ((uint32_t)1L << 25); + carry2 = h2 >> 26; + h3 += carry2; + h2 -= carry2 * ((uint32_t)1L << 26); + carry3 = h3 >> 25; + h4 += carry3; + h3 -= carry3 * ((uint32_t)1L << 25); + carry4 = h4 >> 26; + h5 += carry4; + h4 -= carry4 * ((uint32_t)1L << 26); + carry5 = h5 >> 25; + h6 += carry5; + h5 -= carry5 * ((uint32_t)1L << 25); + carry6 = h6 >> 26; + h7 += carry6; + h6 -= carry6 * ((uint32_t)1L << 26); + carry7 = h7 >> 25; + h8 += carry7; + h7 -= carry7 * ((uint32_t)1L << 25); + carry8 = h8 >> 26; + h9 += carry8; + h8 -= carry8 * ((uint32_t)1L << 26); + carry9 = h9 >> 25; + h9 -= carry9 * ((uint32_t)1L << 25); + + h[0] = h0; + h[1] = h1; + h[2] = h2; + h[3] = h3; + h[4] = h4; + h[5] = h5; + h[6] = h6; + h[7] = h7; + h[8] = h8; + h[9] = h9; } /* @@ -182,39 +183,39 @@ fe25519_reduce(fe25519 h, const fe25519 f) void fe25519_tobytes(unsigned char *s, const fe25519 h) { - fe25519 t; - - fe25519_reduce(t, h); - s[0] = t[0] >> 0; - s[1] = t[0] >> 8; - s[2] = t[0] >> 16; - s[3] = (t[0] >> 24) | (t[1] * ((uint32_t) 1 << 2)); - s[4] = t[1] >> 6; - s[5] = t[1] >> 14; - s[6] = (t[1] >> 22) | (t[2] * ((uint32_t) 1 << 3)); - s[7] = t[2] >> 5; - s[8] = t[2] >> 13; - s[9] = (t[2] >> 21) | (t[3] * ((uint32_t) 1 << 5)); - s[10] = t[3] >> 3; - s[11] = t[3] >> 11; - s[12] = (t[3] >> 19) | (t[4] * ((uint32_t) 1 << 6)); - s[13] = t[4] >> 2; - s[14] = t[4] >> 10; - s[15] = t[4] >> 18; - s[16] = t[5] >> 0; - s[17] = t[5] >> 8; - s[18] = t[5] >> 16; - s[19] = (t[5] >> 24) | (t[6] * ((uint32_t) 1 << 1)); - s[20] = t[6] >> 7; - s[21] = t[6] >> 15; - s[22] = (t[6] >> 23) | (t[7] * ((uint32_t) 1 << 3)); - s[23] = t[7] >> 5; - s[24] = t[7] >> 13; - s[25] = (t[7] >> 21) | (t[8] * ((uint32_t) 1 << 4)); - s[26] = t[8] >> 4; - s[27] = t[8] >> 12; - s[28] = (t[8] >> 20) | (t[9] * ((uint32_t) 1 << 6)); - s[29] = t[9] >> 2; - s[30] = t[9] >> 10; - s[31] = t[9] >> 18; + fe25519 t; + + fe25519_reduce(t, h); + s[0] = t[0] >> 0; + s[1] = t[0] >> 8; + s[2] = t[0] >> 16; + s[3] = (t[0] >> 24) | (t[1] * ((uint32_t)1 << 2)); + s[4] = t[1] >> 6; + s[5] = t[1] >> 14; + s[6] = (t[1] >> 22) | (t[2] * ((uint32_t)1 << 3)); + s[7] = t[2] >> 5; + s[8] = t[2] >> 13; + s[9] = (t[2] >> 21) | (t[3] * ((uint32_t)1 << 5)); + s[10] = t[3] >> 3; + s[11] = t[3] >> 11; + s[12] = (t[3] >> 19) | (t[4] * ((uint32_t)1 << 6)); + s[13] = t[4] >> 2; + s[14] = t[4] >> 10; + s[15] = t[4] >> 18; + s[16] = t[5] >> 0; + s[17] = t[5] >> 8; + s[18] = t[5] >> 16; + s[19] = (t[5] >> 24) | (t[6] * ((uint32_t)1 << 1)); + s[20] = t[6] >> 7; + s[21] = t[6] >> 15; + s[22] = (t[6] >> 23) | (t[7] * ((uint32_t)1 << 3)); + s[23] = t[7] >> 5; + s[24] = t[7] >> 13; + s[25] = (t[7] >> 21) | (t[8] * ((uint32_t)1 << 4)); + s[26] = t[8] >> 4; + s[27] = t[8] >> 12; + s[28] = (t[8] >> 20) | (t[9] * ((uint32_t)1 << 6)); + s[29] = t[9] >> 2; + s[30] = t[9] >> 10; + s[31] = t[9] >> 18; } diff --git a/crypto/curve25519/sandy2x/consts_namespace.h b/crypto/curve25519/sandy2x/consts_namespace.h index 9f81fa61c..e0beb8414 100644 --- a/crypto/curve25519/sandy2x/consts_namespace.h +++ b/crypto/curve25519/sandy2x/consts_namespace.h @@ -17,4 +17,3 @@ #define REDMASK51 crypto_scalarmult_curve25519_sandy2x_REDMASK51 #endif /* ifndef consts_namespace_H */ - diff --git a/crypto/curve25519/sandy2x/curve25519_sandy2x.c b/crypto/curve25519/sandy2x/curve25519_sandy2x.c index 98b7cf79e..2075f2e10 100644 --- a/crypto/curve25519/sandy2x/curve25519_sandy2x.c +++ b/crypto/curve25519/sandy2x/curve25519_sandy2x.c @@ -26,13 +26,14 @@ crypto_scalarmult_curve25519_sandy2x(unsigned char *q, const unsigned char *n, const unsigned char *p) { unsigned char *t = q; - fe var[3]; - fe51 x_51; - fe51 z_51; - unsigned int i; - - for (i = 0; i < 32; i++) { - t[i] = n[i]; + fe var[3]; + fe51 x_51; + fe51 z_51; + unsigned int i; + + for(i = 0; i < 32; i++) + { + t[i] = n[i]; } t[0] &= 248; t[31] &= 127; @@ -72,13 +73,14 @@ crypto_scalarmult_curve25519_sandy2x_base(unsigned char *q, const unsigned char *n) { unsigned char *t = q; - fe var[3]; - fe51 x_51; - fe51 z_51; - unsigned int i; - - for (i = 0;i < 32; i++) { - t[i] = n[i]; + fe var[3]; + fe51 x_51; + fe51 z_51; + unsigned int i; + + for(i = 0; i < 32; i++) + { + t[i] = n[i]; } t[0] &= 248; t[31] &= 127; @@ -106,9 +108,8 @@ crypto_scalarmult_curve25519_sandy2x_base(unsigned char *q, } struct crypto_scalarmult_curve25519_implementation -crypto_scalarmult_curve25519_sandy2x_implementation = { - SODIUM_C99(.mult = ) crypto_scalarmult_curve25519_sandy2x, - SODIUM_C99(.mult_base = ) crypto_scalarmult_curve25519_sandy2x_base -}; + crypto_scalarmult_curve25519_sandy2x_implementation = { + SODIUM_C99(.mult =) crypto_scalarmult_curve25519_sandy2x, + SODIUM_C99(.mult_base =) crypto_scalarmult_curve25519_sandy2x_base}; #endif diff --git a/crypto/curve25519/sandy2x/fe.h b/crypto/curve25519/sandy2x/fe.h index b1115f869..17371551c 100644 --- a/crypto/curve25519/sandy2x/fe.h +++ b/crypto/curve25519/sandy2x/fe.h @@ -21,6 +21,7 @@ Bounds on each t[i] vary depending on context. #define fe_frombytes crypto_scalarmult_curve25519_sandy2x_fe_frombytes -extern void fe_frombytes(fe, const unsigned char *); +extern void +fe_frombytes(fe, const unsigned char *); #endif diff --git a/crypto/curve25519/sandy2x/fe51.h b/crypto/curve25519/sandy2x/fe51.h index 8e3f199b2..e8ab7a16d 100644 --- a/crypto/curve25519/sandy2x/fe51.h +++ b/crypto/curve25519/sandy2x/fe51.h @@ -9,7 +9,8 @@ #define fe51_H #ifdef __cplusplus -extern "C" { +extern "C" +{ #endif #include @@ -17,16 +18,19 @@ extern "C" { #include "fe51_namespace.h" -typedef struct -{ + typedef struct + { uint64_t v[5]; -} -fe51; - -extern void fe51_pack(unsigned char *, const fe51 *); -extern void fe51_mul(fe51 *, const fe51 *, const fe51 *); -extern void fe51_nsquare(fe51 *, const fe51 *, int); -extern void fe51_invert(fe51 *, const fe51 *); + } fe51; + + extern void + fe51_pack(unsigned char *, const fe51 *); + extern void + fe51_mul(fe51 *, const fe51 *, const fe51 *); + extern void + fe51_nsquare(fe51 *, const fe51 *, int); + extern void + fe51_invert(fe51 *, const fe51 *); #ifdef __cplusplus } diff --git a/crypto/curve25519/sandy2x/fe51_invert.c b/crypto/curve25519/sandy2x/fe51_invert.c index ec9bb1a91..cedb8d688 100644 --- a/crypto/curve25519/sandy2x/fe51_invert.c +++ b/crypto/curve25519/sandy2x/fe51_invert.c @@ -12,47 +12,47 @@ void fe51_invert(fe51 *r, const fe51 *x) { - fe51 z2; - fe51 z9; - fe51 z11; - fe51 z2_5_0; - fe51 z2_10_0; - fe51 z2_20_0; - fe51 z2_50_0; - fe51 z2_100_0; - fe51 t; - - /* 2 */ fe51_square(&z2,x); - /* 4 */ fe51_square(&t,&z2); - /* 8 */ fe51_square(&t,&t); - /* 9 */ fe51_mul(&z9,&t,x); - /* 11 */ fe51_mul(&z11,&z9,&z2); - /* 22 */ fe51_square(&t,&z11); - /* 2^5 - 2^0 = 31 */ fe51_mul(&z2_5_0,&t,&z9); - - /* 2^10 - 2^5 */ fe51_nsquare(&t,&z2_5_0, 5); - /* 2^10 - 2^0 */ fe51_mul(&z2_10_0,&t,&z2_5_0); - - /* 2^20 - 2^10 */ fe51_nsquare(&t,&z2_10_0, 10); - /* 2^20 - 2^0 */ fe51_mul(&z2_20_0,&t,&z2_10_0); - - /* 2^40 - 2^20 */ fe51_nsquare(&t,&z2_20_0, 20); - /* 2^40 - 2^0 */ fe51_mul(&t,&t,&z2_20_0); - - /* 2^50 - 2^10 */ fe51_nsquare(&t,&t,10); - /* 2^50 - 2^0 */ fe51_mul(&z2_50_0,&t,&z2_10_0); - - /* 2^100 - 2^50 */ fe51_nsquare(&t,&z2_50_0, 50); - /* 2^100 - 2^0 */ fe51_mul(&z2_100_0,&t,&z2_50_0); - - /* 2^200 - 2^100 */ fe51_nsquare(&t,&z2_100_0, 100); - /* 2^200 - 2^0 */ fe51_mul(&t,&t,&z2_100_0); - - /* 2^250 - 2^50 */ fe51_nsquare(&t,&t, 50); - /* 2^250 - 2^0 */ fe51_mul(&t,&t,&z2_50_0); - - /* 2^255 - 2^5 */ fe51_nsquare(&t,&t,5); - /* 2^255 - 21 */ fe51_mul(r,&t,&z11); + fe51 z2; + fe51 z9; + fe51 z11; + fe51 z2_5_0; + fe51 z2_10_0; + fe51 z2_20_0; + fe51 z2_50_0; + fe51 z2_100_0; + fe51 t; + + /* 2 */ fe51_square(&z2, x); + /* 4 */ fe51_square(&t, &z2); + /* 8 */ fe51_square(&t, &t); + /* 9 */ fe51_mul(&z9, &t, x); + /* 11 */ fe51_mul(&z11, &z9, &z2); + /* 22 */ fe51_square(&t, &z11); + /* 2^5 - 2^0 = 31 */ fe51_mul(&z2_5_0, &t, &z9); + + /* 2^10 - 2^5 */ fe51_nsquare(&t, &z2_5_0, 5); + /* 2^10 - 2^0 */ fe51_mul(&z2_10_0, &t, &z2_5_0); + + /* 2^20 - 2^10 */ fe51_nsquare(&t, &z2_10_0, 10); + /* 2^20 - 2^0 */ fe51_mul(&z2_20_0, &t, &z2_10_0); + + /* 2^40 - 2^20 */ fe51_nsquare(&t, &z2_20_0, 20); + /* 2^40 - 2^0 */ fe51_mul(&t, &t, &z2_20_0); + + /* 2^50 - 2^10 */ fe51_nsquare(&t, &t, 10); + /* 2^50 - 2^0 */ fe51_mul(&z2_50_0, &t, &z2_10_0); + + /* 2^100 - 2^50 */ fe51_nsquare(&t, &z2_50_0, 50); + /* 2^100 - 2^0 */ fe51_mul(&z2_100_0, &t, &z2_50_0); + + /* 2^200 - 2^100 */ fe51_nsquare(&t, &z2_100_0, 100); + /* 2^200 - 2^0 */ fe51_mul(&t, &t, &z2_100_0); + + /* 2^250 - 2^50 */ fe51_nsquare(&t, &t, 50); + /* 2^250 - 2^0 */ fe51_mul(&t, &t, &z2_50_0); + + /* 2^255 - 2^5 */ fe51_nsquare(&t, &t, 5); + /* 2^255 - 21 */ fe51_mul(r, &t, &z11); } #endif diff --git a/crypto/curve25519/sandy2x/fe51_namespace.h b/crypto/curve25519/sandy2x/fe51_namespace.h index 057f242ca..87ae5c354 100644 --- a/crypto/curve25519/sandy2x/fe51_namespace.h +++ b/crypto/curve25519/sandy2x/fe51_namespace.h @@ -1,16 +1,15 @@ #ifndef fe51_namespace_H #define fe51_namespace_H -#define fe51 crypto_scalarmult_curve25519_sandy2x_fe51 -#define _fe51 _crypto_scalarmult_curve25519_sandy2x_fe51 -#define fe51_pack crypto_scalarmult_curve25519_sandy2x_fe51_pack -#define _fe51_pack _crypto_scalarmult_curve25519_sandy2x_fe51_pack -#define fe51_mul crypto_scalarmult_curve25519_sandy2x_fe51_mul -#define _fe51_mul _crypto_scalarmult_curve25519_sandy2x_fe51_mul -#define fe51_nsquare crypto_scalarmult_curve25519_sandy2x_fe51_nsquare -#define _fe51_nsquare _crypto_scalarmult_curve25519_sandy2x_fe51_nsquare +#define fe51 crypto_scalarmult_curve25519_sandy2x_fe51 +#define _fe51 _crypto_scalarmult_curve25519_sandy2x_fe51 +#define fe51_pack crypto_scalarmult_curve25519_sandy2x_fe51_pack +#define _fe51_pack _crypto_scalarmult_curve25519_sandy2x_fe51_pack +#define fe51_mul crypto_scalarmult_curve25519_sandy2x_fe51_mul +#define _fe51_mul _crypto_scalarmult_curve25519_sandy2x_fe51_mul +#define fe51_nsquare crypto_scalarmult_curve25519_sandy2x_fe51_nsquare +#define _fe51_nsquare _crypto_scalarmult_curve25519_sandy2x_fe51_nsquare -#define fe51_invert crypto_scalarmult_curve25519_sandy2x_fe51_invert +#define fe51_invert crypto_scalarmult_curve25519_sandy2x_fe51_invert #endif /* ifndef fe51_namespace_H */ - diff --git a/crypto/curve25519/sandy2x/fe_frombytes_sandy2x.c b/crypto/curve25519/sandy2x/fe_frombytes_sandy2x.c index 2fe081ee2..86973e7f1 100644 --- a/crypto/curve25519/sandy2x/fe_frombytes_sandy2x.c +++ b/crypto/curve25519/sandy2x/fe_frombytes_sandy2x.c @@ -10,9 +10,9 @@ static uint64_t load_3(const unsigned char *in) { uint64_t result; - result = (uint64_t) in[0]; - result |= ((uint64_t) in[1]) << 8; - result |= ((uint64_t) in[2]) << 16; + result = (uint64_t)in[0]; + result |= ((uint64_t)in[1]) << 8; + result |= ((uint64_t)in[2]) << 16; return result; } @@ -20,10 +20,10 @@ static uint64_t load_4(const unsigned char *in) { uint64_t result; - result = (uint64_t) in[0]; - result |= ((uint64_t) in[1]) << 8; - result |= ((uint64_t) in[2]) << 16; - result |= ((uint64_t) in[3]) << 24; + result = (uint64_t)in[0]; + result |= ((uint64_t)in[1]) << 8; + result |= ((uint64_t)in[2]) << 16; + result |= ((uint64_t)in[3]) << 24; return result; } @@ -51,17 +51,37 @@ fe_frombytes(fe h, const unsigned char *s) uint64_t carry8; uint64_t carry9; - carry9 = h9 >> 25; h0 += carry9 * 19; h9 &= 0x1FFFFFF; - carry1 = h1 >> 25; h2 += carry1; h1 &= 0x1FFFFFF; - carry3 = h3 >> 25; h4 += carry3; h3 &= 0x1FFFFFF; - carry5 = h5 >> 25; h6 += carry5; h5 &= 0x1FFFFFF; - carry7 = h7 >> 25; h8 += carry7; h7 &= 0x1FFFFFF; + carry9 = h9 >> 25; + h0 += carry9 * 19; + h9 &= 0x1FFFFFF; + carry1 = h1 >> 25; + h2 += carry1; + h1 &= 0x1FFFFFF; + carry3 = h3 >> 25; + h4 += carry3; + h3 &= 0x1FFFFFF; + carry5 = h5 >> 25; + h6 += carry5; + h5 &= 0x1FFFFFF; + carry7 = h7 >> 25; + h8 += carry7; + h7 &= 0x1FFFFFF; - carry0 = h0 >> 26; h1 += carry0; h0 &= 0x3FFFFFF; - carry2 = h2 >> 26; h3 += carry2; h2 &= 0x3FFFFFF; - carry4 = h4 >> 26; h5 += carry4; h4 &= 0x3FFFFFF; - carry6 = h6 >> 26; h7 += carry6; h6 &= 0x3FFFFFF; - carry8 = h8 >> 26; h9 += carry8; h8 &= 0x3FFFFFF; + carry0 = h0 >> 26; + h1 += carry0; + h0 &= 0x3FFFFFF; + carry2 = h2 >> 26; + h3 += carry2; + h2 &= 0x3FFFFFF; + carry4 = h4 >> 26; + h5 += carry4; + h4 &= 0x3FFFFFF; + carry6 = h6 >> 26; + h7 += carry6; + h6 &= 0x3FFFFFF; + carry8 = h8 >> 26; + h9 += carry8; + h8 &= 0x3FFFFFF; h[0] = h0; h[1] = h1; diff --git a/crypto/curve25519/sandy2x/ladder.h b/crypto/curve25519/sandy2x/ladder.h index ccf4ecaec..52fa9bd3d 100644 --- a/crypto/curve25519/sandy2x/ladder.h +++ b/crypto/curve25519/sandy2x/ladder.h @@ -2,17 +2,18 @@ #define ladder_H #ifdef __cplusplus -extern "C" { +extern "C" +{ #endif #include "fe.h" #include "ladder_namespace.h" -extern void ladder(fe *, const unsigned char *); + extern void + ladder(fe *, const unsigned char *); #ifdef __cplusplus } #endif #endif /* ifndef ladder_H */ - diff --git a/crypto/curve25519/sandy2x/ladder_base.h b/crypto/curve25519/sandy2x/ladder_base.h index a69be13f0..9f69fa648 100644 --- a/crypto/curve25519/sandy2x/ladder_base.h +++ b/crypto/curve25519/sandy2x/ladder_base.h @@ -2,17 +2,18 @@ #define ladder_base_H #ifdef __cplusplus -extern "C" { +extern "C" +{ #endif #include "fe.h" #include "ladder_base_namespace.h" -extern void ladder_base(fe *, const unsigned char *); + extern void + ladder_base(fe *, const unsigned char *); #ifdef __cplusplus } #endif #endif /* ifndef ladder_base_H */ - diff --git a/crypto/curve25519/sandy2x/ladder_base_namespace.h b/crypto/curve25519/sandy2x/ladder_base_namespace.h index 304546a18..12c3cf62d 100644 --- a/crypto/curve25519/sandy2x/ladder_base_namespace.h +++ b/crypto/curve25519/sandy2x/ladder_base_namespace.h @@ -1,8 +1,7 @@ #ifndef ladder_base_namespace_H #define ladder_base_namespace_H -#define ladder_base crypto_scalarmult_curve25519_sandy2x_ladder_base +#define ladder_base crypto_scalarmult_curve25519_sandy2x_ladder_base #define _ladder_base _crypto_scalarmult_curve25519_sandy2x_ladder_base #endif /* ifndef ladder_base_namespace_H */ - diff --git a/crypto/curve25519/sandy2x/ladder_namespace.h b/crypto/curve25519/sandy2x/ladder_namespace.h index 6637074be..ff066e278 100644 --- a/crypto/curve25519/sandy2x/ladder_namespace.h +++ b/crypto/curve25519/sandy2x/ladder_namespace.h @@ -1,8 +1,7 @@ #ifndef ladder_namespace_H #define ladder_namespace_H -#define ladder crypto_scalarmult_curve25519_sandy2x_ladder +#define ladder crypto_scalarmult_curve25519_sandy2x_ladder #define _ladder _crypto_scalarmult_curve25519_sandy2x_ladder #endif /* ifndef ladder_namespace_H */ - diff --git a/crypto/curve25519/scalarmult_curve25519.h b/crypto/curve25519/scalarmult_curve25519.h index 66edbf6a8..bb1ade998 100644 --- a/crypto/curve25519/scalarmult_curve25519.h +++ b/crypto/curve25519/scalarmult_curve25519.h @@ -2,10 +2,10 @@ #ifndef scalarmult_poly1305_H #define scalarmult_poly1305_H -typedef struct crypto_scalarmult_curve25519_implementation { - int (*mult)(unsigned char *q, const unsigned char *n, - const unsigned char *p); - int (*mult_base)(unsigned char *q, const unsigned char *n); +typedef struct crypto_scalarmult_curve25519_implementation +{ + int (*mult)(unsigned char *q, const unsigned char *n, const unsigned char *p); + int (*mult_base)(unsigned char *q, const unsigned char *n); } crypto_scalarmult_curve25519_implementation; #endif diff --git a/crypto/ed25519/ref10/sign_ed25519_ref10.h b/crypto/ed25519/ref10/sign_ed25519_ref10.h index 29f45a854..24e1802e8 100644 --- a/crypto/ed25519/ref10/sign_ed25519_ref10.h +++ b/crypto/ed25519/ref10/sign_ed25519_ref10.h @@ -1,18 +1,17 @@ #ifndef sign_ed25519_ref10_H #define sign_ed25519_ref10_H -void _crypto_sign_ed25519_ref10_hinit(crypto_hash_sha512_state *hs, - int prehashed); +void +_crypto_sign_ed25519_ref10_hinit(crypto_hash_sha512_state *hs, int prehashed); -int _crypto_sign_ed25519_detached(unsigned char *sig, - unsigned long long *siglen_p, - const unsigned char *m, - unsigned long long mlen, - const unsigned char *sk, int prehashed); +int +_crypto_sign_ed25519_detached(unsigned char *sig, unsigned long long *siglen_p, + const unsigned char *m, unsigned long long mlen, + const unsigned char *sk, int prehashed); -int _crypto_sign_ed25519_verify_detached(const unsigned char *sig, - const unsigned char *m, - unsigned long long mlen, - const unsigned char *pk, - int prehashed); +int +_crypto_sign_ed25519_verify_detached(const unsigned char *sig, + const unsigned char *m, + unsigned long long mlen, + const unsigned char *pk, int prehashed); #endif diff --git a/crypto/include/libntrup/ntru_api.h b/crypto/include/libntrup/ntru_api.h index 50c761b59..9753891d9 100644 --- a/crypto/include/libntrup/ntru_api.h +++ b/crypto/include/libntrup/ntru_api.h @@ -1,12 +1,22 @@ -int crypto_kem_enc_ref(unsigned char *cstr, unsigned char *k, const unsigned char *pk); - -int crypto_kem_dec_ref(unsigned char *k, const unsigned char *cstr, const unsigned char *sk); +int +crypto_kem_enc_ref(unsigned char *cstr, unsigned char *k, + const unsigned char *pk); -int crypto_kem_keypair_ref(unsigned char *pk, unsigned char * sk); +int +crypto_kem_dec_ref(unsigned char *k, const unsigned char *cstr, + const unsigned char *sk); -int crypto_kem_enc_avx2(unsigned char *cstr, unsigned char *k, const unsigned char *pk); - -int crypto_kem_dec_avx2(unsigned char *k, const unsigned char *cstr, const unsigned char *sk); +int +crypto_kem_keypair_ref(unsigned char *pk, unsigned char *sk); -int crypto_kem_keypair_avx2(unsigned char *pk, unsigned char * sk); +int +crypto_kem_enc_avx2(unsigned char *cstr, unsigned char *k, + const unsigned char *pk); + +int +crypto_kem_dec_avx2(unsigned char *k, const unsigned char *cstr, + const unsigned char *sk); + +int +crypto_kem_keypair_avx2(unsigned char *pk, unsigned char *sk); diff --git a/crypto/include/sodium/common.h b/crypto/include/sodium/common.h index 632fc8a73..d971612df 100644 --- a/crypto/include/sodium/common.h +++ b/crypto/include/sodium/common.h @@ -5,42 +5,42 @@ #include #include -#define COMPILER_ASSERT(X) (void) sizeof(char[(X) ? 1 : -1]) +#define COMPILER_ASSERT(X) (void)sizeof(char[(X) ? 1 : -1]) #ifdef HAVE_TI_MODE -# if defined(__SIZEOF_INT128__) +#if defined(__SIZEOF_INT128__) typedef unsigned __int128 uint128_t; -# else +#else typedef unsigned uint128_t __attribute__((mode(TI))); -# endif +#endif #endif #define ROTL32(X, B) rotl32((X), (B)) static inline uint32_t rotl32(const uint32_t x, const int b) { - return (x << b) | (x >> (32 - b)); + return (x << b) | (x >> (32 - b)); } #define ROTL64(X, B) rotl64((X), (B)) static inline uint64_t rotl64(const uint64_t x, const int b) { - return (x << b) | (x >> (64 - b)); + return (x << b) | (x >> (64 - b)); } #define ROTR32(X, B) rotr32((X), (B)) static inline uint32_t rotr32(const uint32_t x, const int b) { - return (x >> b) | (x << (32 - b)); + return (x >> b) | (x << (32 - b)); } #define ROTR64(X, B) rotr64((X), (B)) static inline uint64_t rotr64(const uint64_t x, const int b) { - return (x >> b) | (x << (64 - b)); + return (x >> b) | (x << (64 - b)); } #define LOAD64_LE(SRC) load64_le(SRC) @@ -48,19 +48,19 @@ static inline uint64_t load64_le(const uint8_t src[8]) { #ifdef NATIVE_LITTLE_ENDIAN - uint64_t w; - memcpy(&w, src, sizeof w); - return w; + uint64_t w; + memcpy(&w, src, sizeof w); + return w; #else - uint64_t w = (uint64_t) src[0]; - w |= (uint64_t) src[1] << 8; - w |= (uint64_t) src[2] << 16; - w |= (uint64_t) src[3] << 24; - w |= (uint64_t) src[4] << 32; - w |= (uint64_t) src[5] << 40; - w |= (uint64_t) src[6] << 48; - w |= (uint64_t) src[7] << 56; - return w; + uint64_t w = (uint64_t)src[0]; + w |= (uint64_t)src[1] << 8; + w |= (uint64_t)src[2] << 16; + w |= (uint64_t)src[3] << 24; + w |= (uint64_t)src[4] << 32; + w |= (uint64_t)src[5] << 40; + w |= (uint64_t)src[6] << 48; + w |= (uint64_t)src[7] << 56; + return w; #endif } @@ -69,16 +69,23 @@ static inline void store64_le(uint8_t dst[8], uint64_t w) { #ifdef NATIVE_LITTLE_ENDIAN - memcpy(dst, &w, sizeof w); + memcpy(dst, &w, sizeof w); #else - dst[0] = (uint8_t) w; w >>= 8; - dst[1] = (uint8_t) w; w >>= 8; - dst[2] = (uint8_t) w; w >>= 8; - dst[3] = (uint8_t) w; w >>= 8; - dst[4] = (uint8_t) w; w >>= 8; - dst[5] = (uint8_t) w; w >>= 8; - dst[6] = (uint8_t) w; w >>= 8; - dst[7] = (uint8_t) w; + dst[0] = (uint8_t)w; + w >>= 8; + dst[1] = (uint8_t)w; + w >>= 8; + dst[2] = (uint8_t)w; + w >>= 8; + dst[3] = (uint8_t)w; + w >>= 8; + dst[4] = (uint8_t)w; + w >>= 8; + dst[5] = (uint8_t)w; + w >>= 8; + dst[6] = (uint8_t)w; + w >>= 8; + dst[7] = (uint8_t)w; #endif } @@ -87,15 +94,15 @@ static inline uint32_t load32_le(const uint8_t src[4]) { #ifdef NATIVE_LITTLE_ENDIAN - uint32_t w; - memcpy(&w, src, sizeof w); - return w; + uint32_t w; + memcpy(&w, src, sizeof w); + return w; #else - uint32_t w = (uint32_t) src[0]; - w |= (uint32_t) src[1] << 8; - w |= (uint32_t) src[2] << 16; - w |= (uint32_t) src[3] << 24; - return w; + uint32_t w = (uint32_t)src[0]; + w |= (uint32_t)src[1] << 8; + w |= (uint32_t)src[2] << 16; + w |= (uint32_t)src[3] << 24; + return w; #endif } @@ -104,12 +111,15 @@ static inline void store32_le(uint8_t dst[4], uint32_t w) { #ifdef NATIVE_LITTLE_ENDIAN - memcpy(dst, &w, sizeof w); + memcpy(dst, &w, sizeof w); #else - dst[0] = (uint8_t) w; w >>= 8; - dst[1] = (uint8_t) w; w >>= 8; - dst[2] = (uint8_t) w; w >>= 8; - dst[3] = (uint8_t) w; + dst[0] = (uint8_t)w; + w >>= 8; + dst[1] = (uint8_t)w; + w >>= 8; + dst[2] = (uint8_t)w; + w >>= 8; + dst[3] = (uint8_t)w; #endif } @@ -120,19 +130,19 @@ static inline uint64_t load64_be(const uint8_t src[8]) { #ifdef NATIVE_BIG_ENDIAN - uint64_t w; - memcpy(&w, src, sizeof w); - return w; + uint64_t w; + memcpy(&w, src, sizeof w); + return w; #else - uint64_t w = (uint64_t) src[7]; - w |= (uint64_t) src[6] << 8; - w |= (uint64_t) src[5] << 16; - w |= (uint64_t) src[4] << 24; - w |= (uint64_t) src[3] << 32; - w |= (uint64_t) src[2] << 40; - w |= (uint64_t) src[1] << 48; - w |= (uint64_t) src[0] << 56; - return w; + uint64_t w = (uint64_t)src[7]; + w |= (uint64_t)src[6] << 8; + w |= (uint64_t)src[5] << 16; + w |= (uint64_t)src[4] << 24; + w |= (uint64_t)src[3] << 32; + w |= (uint64_t)src[2] << 40; + w |= (uint64_t)src[1] << 48; + w |= (uint64_t)src[0] << 56; + return w; #endif } @@ -141,16 +151,23 @@ static inline void store64_be(uint8_t dst[8], uint64_t w) { #ifdef NATIVE_BIG_ENDIAN - memcpy(dst, &w, sizeof w); + memcpy(dst, &w, sizeof w); #else - dst[7] = (uint8_t) w; w >>= 8; - dst[6] = (uint8_t) w; w >>= 8; - dst[5] = (uint8_t) w; w >>= 8; - dst[4] = (uint8_t) w; w >>= 8; - dst[3] = (uint8_t) w; w >>= 8; - dst[2] = (uint8_t) w; w >>= 8; - dst[1] = (uint8_t) w; w >>= 8; - dst[0] = (uint8_t) w; + dst[7] = (uint8_t)w; + w >>= 8; + dst[6] = (uint8_t)w; + w >>= 8; + dst[5] = (uint8_t)w; + w >>= 8; + dst[4] = (uint8_t)w; + w >>= 8; + dst[3] = (uint8_t)w; + w >>= 8; + dst[2] = (uint8_t)w; + w >>= 8; + dst[1] = (uint8_t)w; + w >>= 8; + dst[0] = (uint8_t)w; #endif } @@ -159,15 +176,15 @@ static inline uint32_t load32_be(const uint8_t src[4]) { #ifdef NATIVE_BIG_ENDIAN - uint32_t w; - memcpy(&w, src, sizeof w); - return w; + uint32_t w; + memcpy(&w, src, sizeof w); + return w; #else - uint32_t w = (uint32_t) src[3]; - w |= (uint32_t) src[2] << 8; - w |= (uint32_t) src[1] << 16; - w |= (uint32_t) src[0] << 24; - return w; + uint32_t w = (uint32_t)src[3]; + w |= (uint32_t)src[2] << 8; + w |= (uint32_t)src[1] << 16; + w |= (uint32_t)src[0] << 24; + return w; #endif } @@ -176,12 +193,15 @@ static inline void store32_be(uint8_t dst[4], uint32_t w) { #ifdef NATIVE_BIG_ENDIAN - memcpy(dst, &w, sizeof w); + memcpy(dst, &w, sizeof w); #else - dst[3] = (uint8_t) w; w >>= 8; - dst[2] = (uint8_t) w; w >>= 8; - dst[1] = (uint8_t) w; w >>= 8; - dst[0] = (uint8_t) w; + dst[3] = (uint8_t)w; + w >>= 8; + dst[2] = (uint8_t)w; + w >>= 8; + dst[1] = (uint8_t)w; + w >>= 8; + dst[0] = (uint8_t)w; #endif } @@ -189,58 +209,61 @@ store32_be(uint8_t dst[4], uint32_t w) static inline void xor_buf(unsigned char *out, const unsigned char *in, size_t n) { - size_t i; + size_t i; - for (i = 0; i < n; i++) { - out[i] ^= in[i]; - } + for(i = 0; i < n; i++) + { + out[i] ^= in[i]; + } } #if !defined(__clang__) && !defined(__GNUC__) -# ifdef __attribute__ -# undef __attribute__ -# endif -# define __attribute__(a) +#ifdef __attribute__ +#undef __attribute__ +#endif +#define __attribute__(a) #endif #ifndef CRYPTO_ALIGN -# if defined(__INTEL_COMPILER) || defined(_MSC_VER) -# define CRYPTO_ALIGN(x) __declspec(align(x)) -# else -# define CRYPTO_ALIGN(x) __attribute__ ((aligned(x))) -# endif -#endif - -#if defined(_MSC_VER) && \ - (defined(_M_X64) || defined(_M_AMD64) || defined(_M_IX86)) - -# include - -# define HAVE_INTRIN_H 1 -# define HAVE_MMINTRIN_H 1 -# define HAVE_EMMINTRIN_H 1 -# define HAVE_PMMINTRIN_H 1 -# define HAVE_TMMINTRIN_H 1 -# define HAVE_SMMINTRIN_H 1 -# define HAVE_AVXINTRIN_H 1 -# if _MSC_VER >= 1600 -# define HAVE_WMMINTRIN_H 1 -# endif -# if _MSC_VER >= 1700 && defined(_M_X64) -# define HAVE_AVX2INTRIN_H 1 -# endif +#if defined(__INTEL_COMPILER) || defined(_MSC_VER) +#define CRYPTO_ALIGN(x) __declspec(align(x)) +#else +#define CRYPTO_ALIGN(x) __attribute__((aligned(x))) +#endif +#endif + +#if defined(_MSC_VER) \ + && (defined(_M_X64) || defined(_M_AMD64) || defined(_M_IX86)) + +#include + +#define HAVE_INTRIN_H 1 +#define HAVE_MMINTRIN_H 1 +#define HAVE_EMMINTRIN_H 1 +#define HAVE_PMMINTRIN_H 1 +#define HAVE_TMMINTRIN_H 1 +#define HAVE_SMMINTRIN_H 1 +#define HAVE_AVXINTRIN_H 1 +#if _MSC_VER >= 1600 +#define HAVE_WMMINTRIN_H 1 +#endif +#if _MSC_VER >= 1700 && defined(_M_X64) +#define HAVE_AVX2INTRIN_H 1 +#endif #elif defined(HAVE_INTRIN_H) -# include +#include #endif #ifdef HAVE_LIBCTGRIND -extern void ct_poison (const void *, size_t); -extern void ct_unpoison(const void *, size_t); -# define POISON(X, L) ct_poison((X), (L)) -# define UNPOISON(X, L) ct_unpoison((X), (L)) +extern void +ct_poison(const void *, size_t); +extern void +ct_unpoison(const void *, size_t); +#define POISON(X, L) ct_poison((X), (L)) +#define UNPOISON(X, L) ct_unpoison((X), (L)) #else -# define POISON(X, L) (void) 0 -# define UNPOISON(X, L) (void) 0 +#define POISON(X, L) (void)0 +#define UNPOISON(X, L) (void)0 #endif #endif diff --git a/crypto/include/sodium/crypto_box.h b/crypto/include/sodium/crypto_box.h index 99ee19a8f..ff2b17b56 100644 --- a/crypto/include/sodium/crypto_box.h +++ b/crypto/include/sodium/crypto_box.h @@ -14,157 +14,189 @@ #include "export.h" #ifdef __cplusplus -# ifdef __GNUC__ -# pragma GCC diagnostic ignored "-Wlong-long" -# endif -extern "C" { +#ifdef __GNUC__ +#pragma GCC diagnostic ignored "-Wlong-long" +#endif +extern "C" +{ #endif #define crypto_box_SEEDBYTES crypto_box_curve25519xsalsa20poly1305_SEEDBYTES -SODIUM_EXPORT -size_t crypto_box_seedbytes(void); - -#define crypto_box_PUBLICKEYBYTES crypto_box_curve25519xsalsa20poly1305_PUBLICKEYBYTES -SODIUM_EXPORT -size_t crypto_box_publickeybytes(void); - -#define crypto_box_SECRETKEYBYTES crypto_box_curve25519xsalsa20poly1305_SECRETKEYBYTES -SODIUM_EXPORT -size_t crypto_box_secretkeybytes(void); + SODIUM_EXPORT + size_t + crypto_box_seedbytes(void); + +#define crypto_box_PUBLICKEYBYTES \ + crypto_box_curve25519xsalsa20poly1305_PUBLICKEYBYTES + SODIUM_EXPORT + size_t + crypto_box_publickeybytes(void); + +#define crypto_box_SECRETKEYBYTES \ + crypto_box_curve25519xsalsa20poly1305_SECRETKEYBYTES + SODIUM_EXPORT + size_t + crypto_box_secretkeybytes(void); #define crypto_box_NONCEBYTES crypto_box_curve25519xsalsa20poly1305_NONCEBYTES -SODIUM_EXPORT -size_t crypto_box_noncebytes(void); + SODIUM_EXPORT + size_t + crypto_box_noncebytes(void); #define crypto_box_MACBYTES crypto_box_curve25519xsalsa20poly1305_MACBYTES -SODIUM_EXPORT -size_t crypto_box_macbytes(void); + SODIUM_EXPORT + size_t + crypto_box_macbytes(void); -#define crypto_box_MESSAGEBYTES_MAX crypto_box_curve25519xsalsa20poly1305_MESSAGEBYTES_MAX -SODIUM_EXPORT -size_t crypto_box_messagebytes_max(void); +#define crypto_box_MESSAGEBYTES_MAX \ + crypto_box_curve25519xsalsa20poly1305_MESSAGEBYTES_MAX + SODIUM_EXPORT + size_t + crypto_box_messagebytes_max(void); #define crypto_box_PRIMITIVE "curve25519xsalsa20poly1305" -SODIUM_EXPORT -const char *crypto_box_primitive(void); - -SODIUM_EXPORT -int crypto_box_seed_keypair(unsigned char *pk, unsigned char *sk, - const unsigned char *seed); - -SODIUM_EXPORT -int crypto_box_keypair(unsigned char *pk, unsigned char *sk); - -SODIUM_EXPORT -int crypto_box_easy(unsigned char *c, const unsigned char *m, - unsigned long long mlen, const unsigned char *n, - const unsigned char *pk, const unsigned char *sk) - __attribute__ ((warn_unused_result)); - -SODIUM_EXPORT -int crypto_box_open_easy(unsigned char *m, const unsigned char *c, - unsigned long long clen, const unsigned char *n, - const unsigned char *pk, const unsigned char *sk) - __attribute__ ((warn_unused_result)); - -SODIUM_EXPORT -int crypto_box_detached(unsigned char *c, unsigned char *mac, - const unsigned char *m, unsigned long long mlen, - const unsigned char *n, const unsigned char *pk, - const unsigned char *sk) - __attribute__ ((warn_unused_result)); - -SODIUM_EXPORT -int crypto_box_open_detached(unsigned char *m, const unsigned char *c, - const unsigned char *mac, - unsigned long long clen, - const unsigned char *n, - const unsigned char *pk, - const unsigned char *sk) - __attribute__ ((warn_unused_result)); - -/* -- Precomputation interface -- */ - -#define crypto_box_BEFORENMBYTES crypto_box_curve25519xsalsa20poly1305_BEFORENMBYTES -SODIUM_EXPORT -size_t crypto_box_beforenmbytes(void); - -SODIUM_EXPORT -int crypto_box_beforenm(unsigned char *k, const unsigned char *pk, - const unsigned char *sk) - __attribute__ ((warn_unused_result)); - -SODIUM_EXPORT -int crypto_box_easy_afternm(unsigned char *c, const unsigned char *m, - unsigned long long mlen, const unsigned char *n, - const unsigned char *k); - -SODIUM_EXPORT -int crypto_box_open_easy_afternm(unsigned char *m, const unsigned char *c, - unsigned long long clen, const unsigned char *n, - const unsigned char *k) - __attribute__ ((warn_unused_result)); - -SODIUM_EXPORT -int crypto_box_detached_afternm(unsigned char *c, unsigned char *mac, - const unsigned char *m, unsigned long long mlen, - const unsigned char *n, const unsigned char *k); - -SODIUM_EXPORT -int crypto_box_open_detached_afternm(unsigned char *m, const unsigned char *c, - const unsigned char *mac, - unsigned long long clen, const unsigned char *n, - const unsigned char *k) - __attribute__ ((warn_unused_result)); - -/* -- Ephemeral SK interface -- */ + SODIUM_EXPORT + const char * + crypto_box_primitive(void); + + SODIUM_EXPORT + int + crypto_box_seed_keypair(unsigned char *pk, unsigned char *sk, + const unsigned char *seed); + + SODIUM_EXPORT + int + crypto_box_keypair(unsigned char *pk, unsigned char *sk); + + SODIUM_EXPORT + int + crypto_box_easy(unsigned char *c, const unsigned char *m, + unsigned long long mlen, const unsigned char *n, + const unsigned char *pk, const unsigned char *sk) + __attribute__((warn_unused_result)); + + SODIUM_EXPORT + int + crypto_box_open_easy(unsigned char *m, const unsigned char *c, + unsigned long long clen, const unsigned char *n, + const unsigned char *pk, const unsigned char *sk) + __attribute__((warn_unused_result)); + + SODIUM_EXPORT + int + crypto_box_detached(unsigned char *c, unsigned char *mac, + const unsigned char *m, unsigned long long mlen, + const unsigned char *n, const unsigned char *pk, + const unsigned char *sk) + __attribute__((warn_unused_result)); + + SODIUM_EXPORT + int + crypto_box_open_detached(unsigned char *m, const unsigned char *c, + const unsigned char *mac, unsigned long long clen, + const unsigned char *n, const unsigned char *pk, + const unsigned char *sk) + __attribute__((warn_unused_result)); + + /* -- Precomputation interface -- */ + +#define crypto_box_BEFORENMBYTES \ + crypto_box_curve25519xsalsa20poly1305_BEFORENMBYTES + SODIUM_EXPORT + size_t + crypto_box_beforenmbytes(void); + + SODIUM_EXPORT + int + crypto_box_beforenm(unsigned char *k, const unsigned char *pk, + const unsigned char *sk) + __attribute__((warn_unused_result)); + + SODIUM_EXPORT + int + crypto_box_easy_afternm(unsigned char *c, const unsigned char *m, + unsigned long long mlen, const unsigned char *n, + const unsigned char *k); + + SODIUM_EXPORT + int + crypto_box_open_easy_afternm(unsigned char *m, const unsigned char *c, + unsigned long long clen, const unsigned char *n, + const unsigned char *k) + __attribute__((warn_unused_result)); + + SODIUM_EXPORT + int + crypto_box_detached_afternm(unsigned char *c, unsigned char *mac, + const unsigned char *m, unsigned long long mlen, + const unsigned char *n, const unsigned char *k); + + SODIUM_EXPORT + int + crypto_box_open_detached_afternm(unsigned char *m, const unsigned char *c, + const unsigned char *mac, + unsigned long long clen, + const unsigned char *n, + const unsigned char *k) + __attribute__((warn_unused_result)); + + /* -- Ephemeral SK interface -- */ #define crypto_box_SEALBYTES (crypto_box_PUBLICKEYBYTES + crypto_box_MACBYTES) -SODIUM_EXPORT -size_t crypto_box_sealbytes(void); + SODIUM_EXPORT + size_t + crypto_box_sealbytes(void); -SODIUM_EXPORT -int crypto_box_seal(unsigned char *c, const unsigned char *m, - unsigned long long mlen, const unsigned char *pk); + SODIUM_EXPORT + int + crypto_box_seal(unsigned char *c, const unsigned char *m, + unsigned long long mlen, const unsigned char *pk); -SODIUM_EXPORT -int crypto_box_seal_open(unsigned char *m, const unsigned char *c, - unsigned long long clen, - const unsigned char *pk, const unsigned char *sk) - __attribute__ ((warn_unused_result)); + SODIUM_EXPORT + int + crypto_box_seal_open(unsigned char *m, const unsigned char *c, + unsigned long long clen, const unsigned char *pk, + const unsigned char *sk) + __attribute__((warn_unused_result)); -/* -- NaCl compatibility interface ; Requires padding -- */ + /* -- NaCl compatibility interface ; Requires padding -- */ #define crypto_box_ZEROBYTES crypto_box_curve25519xsalsa20poly1305_ZEROBYTES -SODIUM_EXPORT -size_t crypto_box_zerobytes(void); - -#define crypto_box_BOXZEROBYTES crypto_box_curve25519xsalsa20poly1305_BOXZEROBYTES -SODIUM_EXPORT -size_t crypto_box_boxzerobytes(void); - -SODIUM_EXPORT -int crypto_box(unsigned char *c, const unsigned char *m, - unsigned long long mlen, const unsigned char *n, - const unsigned char *pk, const unsigned char *sk) - __attribute__ ((warn_unused_result)); - -SODIUM_EXPORT -int crypto_box_open(unsigned char *m, const unsigned char *c, - unsigned long long clen, const unsigned char *n, - const unsigned char *pk, const unsigned char *sk) - __attribute__ ((warn_unused_result)); - -SODIUM_EXPORT -int crypto_box_afternm(unsigned char *c, const unsigned char *m, - unsigned long long mlen, const unsigned char *n, - const unsigned char *k); - -SODIUM_EXPORT -int crypto_box_open_afternm(unsigned char *m, const unsigned char *c, - unsigned long long clen, const unsigned char *n, - const unsigned char *k) - __attribute__ ((warn_unused_result)); + SODIUM_EXPORT + size_t + crypto_box_zerobytes(void); + +#define crypto_box_BOXZEROBYTES \ + crypto_box_curve25519xsalsa20poly1305_BOXZEROBYTES + SODIUM_EXPORT + size_t + crypto_box_boxzerobytes(void); + + SODIUM_EXPORT + int + crypto_box(unsigned char *c, const unsigned char *m, unsigned long long mlen, + const unsigned char *n, const unsigned char *pk, + const unsigned char *sk) __attribute__((warn_unused_result)); + + SODIUM_EXPORT + int + crypto_box_open(unsigned char *m, const unsigned char *c, + unsigned long long clen, const unsigned char *n, + const unsigned char *pk, const unsigned char *sk) + __attribute__((warn_unused_result)); + + SODIUM_EXPORT + int + crypto_box_afternm(unsigned char *c, const unsigned char *m, + unsigned long long mlen, const unsigned char *n, + const unsigned char *k); + + SODIUM_EXPORT + int + crypto_box_open_afternm(unsigned char *m, const unsigned char *c, + unsigned long long clen, const unsigned char *n, + const unsigned char *k) + __attribute__((warn_unused_result)); #ifdef __cplusplus } diff --git a/crypto/include/sodium/crypto_box_curve25519xsalsa20poly1305.h b/crypto/include/sodium/crypto_box_curve25519xsalsa20poly1305.h index c5b15f42e..6a71ddee8 100644 --- a/crypto/include/sodium/crypto_box_curve25519xsalsa20poly1305.h +++ b/crypto/include/sodium/crypto_box_curve25519xsalsa20poly1305.h @@ -6,101 +6,114 @@ #include "export.h" #ifdef __cplusplus -# ifdef __GNUC__ -# pragma GCC diagnostic ignored "-Wlong-long" -# endif -extern "C" { +#ifdef __GNUC__ +#pragma GCC diagnostic ignored "-Wlong-long" +#endif +extern "C" +{ #endif #define crypto_box_curve25519xsalsa20poly1305_SEEDBYTES 32U -SODIUM_EXPORT -size_t crypto_box_curve25519xsalsa20poly1305_seedbytes(void); + SODIUM_EXPORT + size_t + crypto_box_curve25519xsalsa20poly1305_seedbytes(void); #define crypto_box_curve25519xsalsa20poly1305_PUBLICKEYBYTES 32U -SODIUM_EXPORT -size_t crypto_box_curve25519xsalsa20poly1305_publickeybytes(void); + SODIUM_EXPORT + size_t + crypto_box_curve25519xsalsa20poly1305_publickeybytes(void); #define crypto_box_curve25519xsalsa20poly1305_SECRETKEYBYTES 32U -SODIUM_EXPORT -size_t crypto_box_curve25519xsalsa20poly1305_secretkeybytes(void); + SODIUM_EXPORT + size_t + crypto_box_curve25519xsalsa20poly1305_secretkeybytes(void); #define crypto_box_curve25519xsalsa20poly1305_BEFORENMBYTES 32U -SODIUM_EXPORT -size_t crypto_box_curve25519xsalsa20poly1305_beforenmbytes(void); + SODIUM_EXPORT + size_t + crypto_box_curve25519xsalsa20poly1305_beforenmbytes(void); #define crypto_box_curve25519xsalsa20poly1305_NONCEBYTES 24U -SODIUM_EXPORT -size_t crypto_box_curve25519xsalsa20poly1305_noncebytes(void); + SODIUM_EXPORT + size_t + crypto_box_curve25519xsalsa20poly1305_noncebytes(void); #define crypto_box_curve25519xsalsa20poly1305_MACBYTES 16U -SODIUM_EXPORT -size_t crypto_box_curve25519xsalsa20poly1305_macbytes(void); + SODIUM_EXPORT + size_t + crypto_box_curve25519xsalsa20poly1305_macbytes(void); -/* Only for the libsodium API - The NaCl compatibility API would require BOXZEROBYTES extra bytes */ +/* Only for the libsodium API - The NaCl compatibility API would require + * BOXZEROBYTES extra bytes */ #define crypto_box_curve25519xsalsa20poly1305_MESSAGEBYTES_MAX \ - (crypto_stream_xsalsa20_MESSAGEBYTES_MAX - crypto_box_curve25519xsalsa20poly1305_MACBYTES) -SODIUM_EXPORT -size_t crypto_box_curve25519xsalsa20poly1305_messagebytes_max(void); - -SODIUM_EXPORT -int crypto_box_curve25519xsalsa20poly1305_seed_keypair(unsigned char *pk, - unsigned char *sk, - const unsigned char *seed); - -SODIUM_EXPORT -int crypto_box_curve25519xsalsa20poly1305_keypair(unsigned char *pk, - unsigned char *sk); - -SODIUM_EXPORT -int crypto_box_curve25519xsalsa20poly1305_beforenm(unsigned char *k, - const unsigned char *pk, - const unsigned char *sk) - __attribute__ ((warn_unused_result)); - -/* -- NaCl compatibility interface ; Requires padding -- */ + (crypto_stream_xsalsa20_MESSAGEBYTES_MAX \ + - crypto_box_curve25519xsalsa20poly1305_MACBYTES) + SODIUM_EXPORT + size_t + crypto_box_curve25519xsalsa20poly1305_messagebytes_max(void); + + SODIUM_EXPORT + int + crypto_box_curve25519xsalsa20poly1305_seed_keypair(unsigned char *pk, + unsigned char *sk, + const unsigned char *seed); + + SODIUM_EXPORT + int + crypto_box_curve25519xsalsa20poly1305_keypair(unsigned char *pk, + unsigned char *sk); + + SODIUM_EXPORT + int + crypto_box_curve25519xsalsa20poly1305_beforenm(unsigned char *k, + const unsigned char *pk, + const unsigned char *sk) + __attribute__((warn_unused_result)); + + /* -- NaCl compatibility interface ; Requires padding -- */ #define crypto_box_curve25519xsalsa20poly1305_BOXZEROBYTES 16U -SODIUM_EXPORT -size_t crypto_box_curve25519xsalsa20poly1305_boxzerobytes(void); + SODIUM_EXPORT + size_t + crypto_box_curve25519xsalsa20poly1305_boxzerobytes(void); #define crypto_box_curve25519xsalsa20poly1305_ZEROBYTES \ - (crypto_box_curve25519xsalsa20poly1305_BOXZEROBYTES + \ - crypto_box_curve25519xsalsa20poly1305_MACBYTES) -SODIUM_EXPORT -size_t crypto_box_curve25519xsalsa20poly1305_zerobytes(void); - -SODIUM_EXPORT -int crypto_box_curve25519xsalsa20poly1305(unsigned char *c, - const unsigned char *m, - unsigned long long mlen, - const unsigned char *n, - const unsigned char *pk, - const unsigned char *sk) - __attribute__ ((warn_unused_result)); - -SODIUM_EXPORT -int crypto_box_curve25519xsalsa20poly1305_open(unsigned char *m, - const unsigned char *c, - unsigned long long clen, - const unsigned char *n, - const unsigned char *pk, - const unsigned char *sk) - __attribute__ ((warn_unused_result)); - -SODIUM_EXPORT -int crypto_box_curve25519xsalsa20poly1305_afternm(unsigned char *c, - const unsigned char *m, - unsigned long long mlen, - const unsigned char *n, - const unsigned char *k); - -SODIUM_EXPORT -int crypto_box_curve25519xsalsa20poly1305_open_afternm(unsigned char *m, - const unsigned char *c, - unsigned long long clen, - const unsigned char *n, - const unsigned char *k) - __attribute__ ((warn_unused_result)); + (crypto_box_curve25519xsalsa20poly1305_BOXZEROBYTES \ + + crypto_box_curve25519xsalsa20poly1305_MACBYTES) + SODIUM_EXPORT + size_t + crypto_box_curve25519xsalsa20poly1305_zerobytes(void); + + SODIUM_EXPORT + int + crypto_box_curve25519xsalsa20poly1305( + unsigned char *c, const unsigned char *m, unsigned long long mlen, + const unsigned char *n, const unsigned char *pk, const unsigned char *sk) + __attribute__((warn_unused_result)); + + SODIUM_EXPORT + int + crypto_box_curve25519xsalsa20poly1305_open( + unsigned char *m, const unsigned char *c, unsigned long long clen, + const unsigned char *n, const unsigned char *pk, const unsigned char *sk) + __attribute__((warn_unused_result)); + + SODIUM_EXPORT + int + crypto_box_curve25519xsalsa20poly1305_afternm(unsigned char *c, + const unsigned char *m, + unsigned long long mlen, + const unsigned char *n, + const unsigned char *k); + + SODIUM_EXPORT + int + crypto_box_curve25519xsalsa20poly1305_open_afternm(unsigned char *m, + const unsigned char *c, + unsigned long long clen, + const unsigned char *n, + const unsigned char *k) + __attribute__((warn_unused_result)); #ifdef __cplusplus } diff --git a/crypto/include/sodium/crypto_core_hchacha20.h b/crypto/include/sodium/crypto_core_hchacha20.h index 05e5670c1..f7b37eb98 100644 --- a/crypto/include/sodium/crypto_core_hchacha20.h +++ b/crypto/include/sodium/crypto_core_hchacha20.h @@ -5,28 +5,34 @@ #include "export.h" #ifdef __cplusplus -extern "C" { +extern "C" +{ #endif #define crypto_core_hchacha20_OUTPUTBYTES 32U -SODIUM_EXPORT -size_t crypto_core_hchacha20_outputbytes(void); + SODIUM_EXPORT + size_t + crypto_core_hchacha20_outputbytes(void); #define crypto_core_hchacha20_INPUTBYTES 16U -SODIUM_EXPORT -size_t crypto_core_hchacha20_inputbytes(void); + SODIUM_EXPORT + size_t + crypto_core_hchacha20_inputbytes(void); #define crypto_core_hchacha20_KEYBYTES 32U -SODIUM_EXPORT -size_t crypto_core_hchacha20_keybytes(void); + SODIUM_EXPORT + size_t + crypto_core_hchacha20_keybytes(void); #define crypto_core_hchacha20_CONSTBYTES 16U -SODIUM_EXPORT -size_t crypto_core_hchacha20_constbytes(void); - -SODIUM_EXPORT -int crypto_core_hchacha20(unsigned char *out, const unsigned char *in, - const unsigned char *k, const unsigned char *c); + SODIUM_EXPORT + size_t + crypto_core_hchacha20_constbytes(void); + + SODIUM_EXPORT + int + crypto_core_hchacha20(unsigned char *out, const unsigned char *in, + const unsigned char *k, const unsigned char *c); #ifdef __cplusplus } diff --git a/crypto/include/sodium/crypto_core_salsa20.h b/crypto/include/sodium/crypto_core_salsa20.h index 160cc56d2..3d2084c29 100644 --- a/crypto/include/sodium/crypto_core_salsa20.h +++ b/crypto/include/sodium/crypto_core_salsa20.h @@ -5,28 +5,34 @@ #include "export.h" #ifdef __cplusplus -extern "C" { +extern "C" +{ #endif #define crypto_core_salsa20_OUTPUTBYTES 64U -SODIUM_EXPORT -size_t crypto_core_salsa20_outputbytes(void); + SODIUM_EXPORT + size_t + crypto_core_salsa20_outputbytes(void); #define crypto_core_salsa20_INPUTBYTES 16U -SODIUM_EXPORT -size_t crypto_core_salsa20_inputbytes(void); + SODIUM_EXPORT + size_t + crypto_core_salsa20_inputbytes(void); #define crypto_core_salsa20_KEYBYTES 32U -SODIUM_EXPORT -size_t crypto_core_salsa20_keybytes(void); + SODIUM_EXPORT + size_t + crypto_core_salsa20_keybytes(void); #define crypto_core_salsa20_CONSTBYTES 16U -SODIUM_EXPORT -size_t crypto_core_salsa20_constbytes(void); - -SODIUM_EXPORT -int crypto_core_salsa20(unsigned char *out, const unsigned char *in, - const unsigned char *k, const unsigned char *c); + SODIUM_EXPORT + size_t + crypto_core_salsa20_constbytes(void); + + SODIUM_EXPORT + int + crypto_core_salsa20(unsigned char *out, const unsigned char *in, + const unsigned char *k, const unsigned char *c); #ifdef __cplusplus } diff --git a/crypto/include/sodium/crypto_generichash.h b/crypto/include/sodium/crypto_generichash.h index 2398fb9db..098c84f1b 100644 --- a/crypto/include/sodium/crypto_generichash.h +++ b/crypto/include/sodium/crypto_generichash.h @@ -7,66 +7,79 @@ #include "export.h" #ifdef __cplusplus -# ifdef __GNUC__ -# pragma GCC diagnostic ignored "-Wlong-long" -# endif -extern "C" { +#ifdef __GNUC__ +#pragma GCC diagnostic ignored "-Wlong-long" +#endif +extern "C" +{ #endif #define crypto_generichash_BYTES_MIN crypto_generichash_blake2b_BYTES_MIN -SODIUM_EXPORT -size_t crypto_generichash_bytes_min(void); + SODIUM_EXPORT + size_t + crypto_generichash_bytes_min(void); #define crypto_generichash_BYTES_MAX crypto_generichash_blake2b_BYTES_MAX -SODIUM_EXPORT -size_t crypto_generichash_bytes_max(void); + SODIUM_EXPORT + size_t + crypto_generichash_bytes_max(void); #define crypto_generichash_BYTES crypto_generichash_blake2b_BYTES -SODIUM_EXPORT -size_t crypto_generichash_bytes(void); + SODIUM_EXPORT + size_t + crypto_generichash_bytes(void); #define crypto_generichash_KEYBYTES_MIN crypto_generichash_blake2b_KEYBYTES_MIN -SODIUM_EXPORT -size_t crypto_generichash_keybytes_min(void); + SODIUM_EXPORT + size_t + crypto_generichash_keybytes_min(void); #define crypto_generichash_KEYBYTES_MAX crypto_generichash_blake2b_KEYBYTES_MAX -SODIUM_EXPORT -size_t crypto_generichash_keybytes_max(void); + SODIUM_EXPORT + size_t + crypto_generichash_keybytes_max(void); #define crypto_generichash_KEYBYTES crypto_generichash_blake2b_KEYBYTES -SODIUM_EXPORT -size_t crypto_generichash_keybytes(void); + SODIUM_EXPORT + size_t + crypto_generichash_keybytes(void); #define crypto_generichash_PRIMITIVE "blake2b" -SODIUM_EXPORT -const char *crypto_generichash_primitive(void); - -typedef crypto_generichash_blake2b_state crypto_generichash_state; - -SODIUM_EXPORT -size_t crypto_generichash_statebytes(void); - -SODIUM_EXPORT -int crypto_generichash(unsigned char *out, size_t outlen, - const unsigned char *in, unsigned long long inlen, - const unsigned char *key, size_t keylen); - -SODIUM_EXPORT -int crypto_generichash_init(crypto_generichash_state *state, - const unsigned char *key, - const size_t keylen, const size_t outlen); - -SODIUM_EXPORT -int crypto_generichash_update(crypto_generichash_state *state, - const unsigned char *in, - unsigned long long inlen); - -SODIUM_EXPORT -int crypto_generichash_final(crypto_generichash_state *state, - unsigned char *out, const size_t outlen); - -SODIUM_EXPORT -void crypto_generichash_keygen(unsigned char k[crypto_generichash_KEYBYTES]); + SODIUM_EXPORT + const char * + crypto_generichash_primitive(void); + + typedef crypto_generichash_blake2b_state crypto_generichash_state; + + SODIUM_EXPORT + size_t + crypto_generichash_statebytes(void); + + SODIUM_EXPORT + int + crypto_generichash(unsigned char *out, size_t outlen, const unsigned char *in, + unsigned long long inlen, const unsigned char *key, + size_t keylen); + + SODIUM_EXPORT + int + crypto_generichash_init(crypto_generichash_state *state, + const unsigned char *key, const size_t keylen, + const size_t outlen); + + SODIUM_EXPORT + int + crypto_generichash_update(crypto_generichash_state *state, + const unsigned char *in, unsigned long long inlen); + + SODIUM_EXPORT + int + crypto_generichash_final(crypto_generichash_state *state, unsigned char *out, + const size_t outlen); + + SODIUM_EXPORT + void + crypto_generichash_keygen(unsigned char k[crypto_generichash_KEYBYTES]); #ifdef __cplusplus } diff --git a/crypto/include/sodium/crypto_generichash_blake2b.h b/crypto/include/sodium/crypto_generichash_blake2b.h index 9326a04ad..07761316c 100644 --- a/crypto/include/sodium/crypto_generichash_blake2b.h +++ b/crypto/include/sodium/crypto_generichash_blake2b.h @@ -8,107 +8,120 @@ #include "export.h" #ifdef __cplusplus -# ifdef __GNUC__ -# pragma GCC diagnostic ignored "-Wlong-long" -# endif -extern "C" { +#ifdef __GNUC__ +#pragma GCC diagnostic ignored "-Wlong-long" +#endif +extern "C" +{ #endif #if defined(__IBMC__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) -# pragma pack(1) +#pragma pack(1) #else -# pragma pack(push, 1) +#pragma pack(push, 1) #endif -typedef struct CRYPTO_ALIGN(64) crypto_generichash_blake2b_state { + typedef struct CRYPTO_ALIGN(64) crypto_generichash_blake2b_state + { uint64_t h[8]; uint64_t t[2]; uint64_t f[2]; - uint8_t buf[2 * 128]; - size_t buflen; - uint8_t last_node; -} crypto_generichash_blake2b_state; + uint8_t buf[2 * 128]; + size_t buflen; + uint8_t last_node; + } crypto_generichash_blake2b_state; #if defined(__IBMC__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) -# pragma pack() +#pragma pack() #else -# pragma pack(pop) +#pragma pack(pop) #endif -#define crypto_generichash_blake2b_BYTES_MIN 16U -SODIUM_EXPORT -size_t crypto_generichash_blake2b_bytes_min(void); - -#define crypto_generichash_blake2b_BYTES_MAX 64U -SODIUM_EXPORT -size_t crypto_generichash_blake2b_bytes_max(void); - -#define crypto_generichash_blake2b_BYTES 32U -SODIUM_EXPORT -size_t crypto_generichash_blake2b_bytes(void); - -#define crypto_generichash_blake2b_KEYBYTES_MIN 16U -SODIUM_EXPORT -size_t crypto_generichash_blake2b_keybytes_min(void); - -#define crypto_generichash_blake2b_KEYBYTES_MAX 64U -SODIUM_EXPORT -size_t crypto_generichash_blake2b_keybytes_max(void); - -#define crypto_generichash_blake2b_KEYBYTES 32U -SODIUM_EXPORT -size_t crypto_generichash_blake2b_keybytes(void); - -#define crypto_generichash_blake2b_SALTBYTES 16U -SODIUM_EXPORT -size_t crypto_generichash_blake2b_saltbytes(void); +#define crypto_generichash_blake2b_BYTES_MIN 16U + SODIUM_EXPORT + size_t + crypto_generichash_blake2b_bytes_min(void); + +#define crypto_generichash_blake2b_BYTES_MAX 64U + SODIUM_EXPORT + size_t + crypto_generichash_blake2b_bytes_max(void); + +#define crypto_generichash_blake2b_BYTES 32U + SODIUM_EXPORT + size_t + crypto_generichash_blake2b_bytes(void); + +#define crypto_generichash_blake2b_KEYBYTES_MIN 16U + SODIUM_EXPORT + size_t + crypto_generichash_blake2b_keybytes_min(void); + +#define crypto_generichash_blake2b_KEYBYTES_MAX 64U + SODIUM_EXPORT + size_t + crypto_generichash_blake2b_keybytes_max(void); + +#define crypto_generichash_blake2b_KEYBYTES 32U + SODIUM_EXPORT + size_t + crypto_generichash_blake2b_keybytes(void); + +#define crypto_generichash_blake2b_SALTBYTES 16U + SODIUM_EXPORT + size_t + crypto_generichash_blake2b_saltbytes(void); #define crypto_generichash_blake2b_PERSONALBYTES 16U -SODIUM_EXPORT -size_t crypto_generichash_blake2b_personalbytes(void); - -SODIUM_EXPORT -size_t crypto_generichash_blake2b_statebytes(void); - -SODIUM_EXPORT -int crypto_generichash_blake2b(unsigned char *out, size_t outlen, - const unsigned char *in, - unsigned long long inlen, - const unsigned char *key, size_t keylen); - -SODIUM_EXPORT -int crypto_generichash_blake2b_salt_personal(unsigned char *out, size_t outlen, - const unsigned char *in, - unsigned long long inlen, - const unsigned char *key, - size_t keylen, - const unsigned char *salt, - const unsigned char *personal); - -SODIUM_EXPORT -int crypto_generichash_blake2b_init(crypto_generichash_blake2b_state *state, - const unsigned char *key, - const size_t keylen, const size_t outlen); - -SODIUM_EXPORT -int crypto_generichash_blake2b_init_salt_personal(crypto_generichash_blake2b_state *state, - const unsigned char *key, - const size_t keylen, const size_t outlen, - const unsigned char *salt, - const unsigned char *personal); - -SODIUM_EXPORT -int crypto_generichash_blake2b_update(crypto_generichash_blake2b_state *state, - const unsigned char *in, - unsigned long long inlen); - -SODIUM_EXPORT -int crypto_generichash_blake2b_final(crypto_generichash_blake2b_state *state, - unsigned char *out, - const size_t outlen); - -SODIUM_EXPORT -void crypto_generichash_blake2b_keygen(unsigned char k[crypto_generichash_blake2b_KEYBYTES]); + SODIUM_EXPORT + size_t + crypto_generichash_blake2b_personalbytes(void); + + SODIUM_EXPORT + size_t + crypto_generichash_blake2b_statebytes(void); + + SODIUM_EXPORT + int + crypto_generichash_blake2b(unsigned char *out, size_t outlen, + const unsigned char *in, unsigned long long inlen, + const unsigned char *key, size_t keylen); + + SODIUM_EXPORT + int + crypto_generichash_blake2b_salt_personal( + unsigned char *out, size_t outlen, const unsigned char *in, + unsigned long long inlen, const unsigned char *key, size_t keylen, + const unsigned char *salt, const unsigned char *personal); + + SODIUM_EXPORT + int + crypto_generichash_blake2b_init(crypto_generichash_blake2b_state *state, + const unsigned char *key, const size_t keylen, + const size_t outlen); + + SODIUM_EXPORT + int + crypto_generichash_blake2b_init_salt_personal( + crypto_generichash_blake2b_state *state, const unsigned char *key, + const size_t keylen, const size_t outlen, const unsigned char *salt, + const unsigned char *personal); + + SODIUM_EXPORT + int + crypto_generichash_blake2b_update(crypto_generichash_blake2b_state *state, + const unsigned char *in, + unsigned long long inlen); + + SODIUM_EXPORT + int + crypto_generichash_blake2b_final(crypto_generichash_blake2b_state *state, + unsigned char *out, const size_t outlen); + + SODIUM_EXPORT + void + crypto_generichash_blake2b_keygen( + unsigned char k[crypto_generichash_blake2b_KEYBYTES]); #ifdef __cplusplus } diff --git a/crypto/include/sodium/crypto_scalarmult.h b/crypto/include/sodium/crypto_scalarmult.h index f7fa6f8f3..aa0f55a31 100644 --- a/crypto/include/sodium/crypto_scalarmult.h +++ b/crypto/include/sodium/crypto_scalarmult.h @@ -7,36 +7,41 @@ #include "export.h" #ifdef __cplusplus -extern "C" { +extern "C" +{ #endif #define crypto_scalarmult_BYTES crypto_scalarmult_curve25519_BYTES -SODIUM_EXPORT -size_t crypto_scalarmult_bytes(void); + SODIUM_EXPORT + size_t + crypto_scalarmult_bytes(void); #define crypto_scalarmult_SCALARBYTES crypto_scalarmult_curve25519_SCALARBYTES -SODIUM_EXPORT -size_t crypto_scalarmult_scalarbytes(void); + SODIUM_EXPORT + size_t + crypto_scalarmult_scalarbytes(void); #define crypto_scalarmult_PRIMITIVE "curve25519" -SODIUM_EXPORT -const char *crypto_scalarmult_primitive(void); - -SODIUM_EXPORT -int crypto_scalarmult_base(unsigned char *q, const unsigned char *n); - -/* - * NOTE: Do not use the result of this function directly. - * - * Hash the result with the public keys in order to compute a shared - * secret key: H(q || client_pk || server_pk) - * - * Or unless this is not an option, use the crypto_kx() API instead. - */ -SODIUM_EXPORT -int crypto_scalarmult(unsigned char *q, const unsigned char *n, - const unsigned char *p) - __attribute__ ((warn_unused_result)); + SODIUM_EXPORT + const char * + crypto_scalarmult_primitive(void); + + SODIUM_EXPORT + int + crypto_scalarmult_base(unsigned char *q, const unsigned char *n); + + /* + * NOTE: Do not use the result of this function directly. + * + * Hash the result with the public keys in order to compute a shared + * secret key: H(q || client_pk || server_pk) + * + * Or unless this is not an option, use the crypto_kx() API instead. + */ + SODIUM_EXPORT + int + crypto_scalarmult(unsigned char *q, const unsigned char *n, + const unsigned char *p) __attribute__((warn_unused_result)); #ifdef __cplusplus } diff --git a/crypto/include/sodium/crypto_scalarmult_curve25519.h b/crypto/include/sodium/crypto_scalarmult_curve25519.h index ae85eadc2..570527601 100644 --- a/crypto/include/sodium/crypto_scalarmult_curve25519.h +++ b/crypto/include/sodium/crypto_scalarmult_curve25519.h @@ -6,32 +6,37 @@ #include "export.h" #ifdef __cplusplus -extern "C" { +extern "C" +{ #endif #define crypto_scalarmult_curve25519_BYTES 32U -SODIUM_EXPORT -size_t crypto_scalarmult_curve25519_bytes(void); + SODIUM_EXPORT + size_t + crypto_scalarmult_curve25519_bytes(void); #define crypto_scalarmult_curve25519_SCALARBYTES 32U -SODIUM_EXPORT -size_t crypto_scalarmult_curve25519_scalarbytes(void); - -/* - * NOTE: Do not use the result of this function directly. - * - * Hash the result with the public keys in order to compute a shared - * secret key: H(q || client_pk || server_pk) - * - * Or unless this is not an option, use the crypto_kx() API instead. - */ -SODIUM_EXPORT -int crypto_scalarmult_curve25519(unsigned char *q, const unsigned char *n, - const unsigned char *p) - __attribute__ ((warn_unused_result)); - -SODIUM_EXPORT -int crypto_scalarmult_curve25519_base(unsigned char *q, const unsigned char *n); + SODIUM_EXPORT + size_t + crypto_scalarmult_curve25519_scalarbytes(void); + + /* + * NOTE: Do not use the result of this function directly. + * + * Hash the result with the public keys in order to compute a shared + * secret key: H(q || client_pk || server_pk) + * + * Or unless this is not an option, use the crypto_kx() API instead. + */ + SODIUM_EXPORT + int + crypto_scalarmult_curve25519(unsigned char *q, const unsigned char *n, + const unsigned char *p) + __attribute__((warn_unused_result)); + + SODIUM_EXPORT + int + crypto_scalarmult_curve25519_base(unsigned char *q, const unsigned char *n); #ifdef __cplusplus } diff --git a/crypto/include/sodium/crypto_scalarmult_ed25519.h b/crypto/include/sodium/crypto_scalarmult_ed25519.h index 3d5123511..3a0ad428e 100644 --- a/crypto/include/sodium/crypto_scalarmult_ed25519.h +++ b/crypto/include/sodium/crypto_scalarmult_ed25519.h @@ -7,32 +7,37 @@ #include "export.h" #ifdef __cplusplus -extern "C" { +extern "C" +{ #endif #define crypto_scalarmult_ed25519_BYTES 32U -SODIUM_EXPORT -size_t crypto_scalarmult_ed25519_bytes(void); + SODIUM_EXPORT + size_t + crypto_scalarmult_ed25519_bytes(void); #define crypto_scalarmult_ed25519_SCALARBYTES 32U -SODIUM_EXPORT -size_t crypto_scalarmult_ed25519_scalarbytes(void); - -/* - * NOTE: Do not use the result of this function directly. - * - * Hash the result with the public keys in order to compute a shared - * secret key: H(q || client_pk || server_pk) - * - * Or unless this is not an option, use the crypto_kx() API instead. - */ -SODIUM_EXPORT -int crypto_scalarmult_ed25519(unsigned char *q, const unsigned char *n, - const unsigned char *p) - __attribute__ ((warn_unused_result)); - -SODIUM_EXPORT -int crypto_scalarmult_ed25519_base(unsigned char *q, const unsigned char *n); + SODIUM_EXPORT + size_t + crypto_scalarmult_ed25519_scalarbytes(void); + + /* + * NOTE: Do not use the result of this function directly. + * + * Hash the result with the public keys in order to compute a shared + * secret key: H(q || client_pk || server_pk) + * + * Or unless this is not an option, use the crypto_kx() API instead. + */ + SODIUM_EXPORT + int + crypto_scalarmult_ed25519(unsigned char *q, const unsigned char *n, + const unsigned char *p) + __attribute__((warn_unused_result)); + + SODIUM_EXPORT + int + crypto_scalarmult_ed25519_base(unsigned char *q, const unsigned char *n); #ifdef __cplusplus } diff --git a/crypto/include/sodium/crypto_sign.h b/crypto/include/sodium/crypto_sign.h index 85aff0c9d..fbed5a1dc 100644 --- a/crypto/include/sodium/crypto_sign.h +++ b/crypto/include/sodium/crypto_sign.h @@ -14,87 +14,102 @@ #include "export.h" #ifdef __cplusplus -# ifdef __GNUC__ -# pragma GCC diagnostic ignored "-Wlong-long" -# endif -extern "C" { +#ifdef __GNUC__ +#pragma GCC diagnostic ignored "-Wlong-long" +#endif +extern "C" +{ #endif -typedef crypto_sign_ed25519ph_state crypto_sign_state; + typedef crypto_sign_ed25519ph_state crypto_sign_state; -SODIUM_EXPORT -size_t crypto_sign_statebytes(void); + SODIUM_EXPORT + size_t + crypto_sign_statebytes(void); #define crypto_sign_BYTES crypto_sign_ed25519_BYTES -SODIUM_EXPORT -size_t crypto_sign_bytes(void); + SODIUM_EXPORT + size_t + crypto_sign_bytes(void); #define crypto_sign_SEEDBYTES crypto_sign_ed25519_SEEDBYTES -SODIUM_EXPORT -size_t crypto_sign_seedbytes(void); + SODIUM_EXPORT + size_t + crypto_sign_seedbytes(void); #define crypto_sign_PUBLICKEYBYTES crypto_sign_ed25519_PUBLICKEYBYTES -SODIUM_EXPORT -size_t crypto_sign_publickeybytes(void); + SODIUM_EXPORT + size_t + crypto_sign_publickeybytes(void); #define crypto_sign_SECRETKEYBYTES crypto_sign_ed25519_SECRETKEYBYTES -SODIUM_EXPORT -size_t crypto_sign_secretkeybytes(void); + SODIUM_EXPORT + size_t + crypto_sign_secretkeybytes(void); #define crypto_sign_MESSAGEBYTES_MAX crypto_sign_ed25519_MESSAGEBYTES_MAX -SODIUM_EXPORT -size_t crypto_sign_messagebytes_max(void); + SODIUM_EXPORT + size_t + crypto_sign_messagebytes_max(void); #define crypto_sign_PRIMITIVE "ed25519" -SODIUM_EXPORT -const char *crypto_sign_primitive(void); - -SODIUM_EXPORT -int crypto_sign_seed_keypair(unsigned char *pk, unsigned char *sk, - const unsigned char *seed); - -SODIUM_EXPORT -int crypto_sign_keypair(unsigned char *pk, unsigned char *sk); - -SODIUM_EXPORT -int crypto_sign(unsigned char *sm, unsigned long long *smlen_p, - const unsigned char *m, unsigned long long mlen, - const unsigned char *sk); - -SODIUM_EXPORT -int crypto_sign_open(unsigned char *m, unsigned long long *mlen_p, - const unsigned char *sm, unsigned long long smlen, - const unsigned char *pk) - __attribute__ ((warn_unused_result)); - -SODIUM_EXPORT -int crypto_sign_detached(unsigned char *sig, unsigned long long *siglen_p, - const unsigned char *m, unsigned long long mlen, - const unsigned char *sk); - -SODIUM_EXPORT -int crypto_sign_verify_detached(const unsigned char *sig, - const unsigned char *m, - unsigned long long mlen, - const unsigned char *pk) - __attribute__ ((warn_unused_result)); - -SODIUM_EXPORT -int crypto_sign_init(crypto_sign_state *state); - -SODIUM_EXPORT -int crypto_sign_update(crypto_sign_state *state, - const unsigned char *m, unsigned long long mlen); - -SODIUM_EXPORT -int crypto_sign_final_create(crypto_sign_state *state, unsigned char *sig, - unsigned long long *siglen_p, - const unsigned char *sk); - -SODIUM_EXPORT -int crypto_sign_final_verify(crypto_sign_state *state, unsigned char *sig, - const unsigned char *pk) - __attribute__ ((warn_unused_result)); + SODIUM_EXPORT + const char * + crypto_sign_primitive(void); + + SODIUM_EXPORT + int + crypto_sign_seed_keypair(unsigned char *pk, unsigned char *sk, + const unsigned char *seed); + + SODIUM_EXPORT + int + crypto_sign_keypair(unsigned char *pk, unsigned char *sk); + + SODIUM_EXPORT + int + crypto_sign(unsigned char *sm, unsigned long long *smlen_p, + const unsigned char *m, unsigned long long mlen, + const unsigned char *sk); + + SODIUM_EXPORT + int + crypto_sign_open(unsigned char *m, unsigned long long *mlen_p, + const unsigned char *sm, unsigned long long smlen, + const unsigned char *pk) __attribute__((warn_unused_result)); + + SODIUM_EXPORT + int + crypto_sign_detached(unsigned char *sig, unsigned long long *siglen_p, + const unsigned char *m, unsigned long long mlen, + const unsigned char *sk); + + SODIUM_EXPORT + int + crypto_sign_verify_detached(const unsigned char *sig, const unsigned char *m, + unsigned long long mlen, const unsigned char *pk) + __attribute__((warn_unused_result)); + + SODIUM_EXPORT + int + crypto_sign_init(crypto_sign_state *state); + + SODIUM_EXPORT + int + crypto_sign_update(crypto_sign_state *state, const unsigned char *m, + unsigned long long mlen); + + SODIUM_EXPORT + int + crypto_sign_final_create(crypto_sign_state *state, unsigned char *sig, + unsigned long long *siglen_p, + const unsigned char *sk); + + SODIUM_EXPORT + int + crypto_sign_final_verify(crypto_sign_state *state, unsigned char *sig, + const unsigned char *pk) + __attribute__((warn_unused_result)); #ifdef __cplusplus } diff --git a/crypto/include/sodium/crypto_sign_ed25519.h b/crypto/include/sodium/crypto_sign_ed25519.h index 38d2b9dd6..115b8df22 100644 --- a/crypto/include/sodium/crypto_sign_ed25519.h +++ b/crypto/include/sodium/crypto_sign_ed25519.h @@ -6,106 +6,125 @@ #include "export.h" #ifdef __cplusplus -# ifdef __GNUC__ -# pragma GCC diagnostic ignored "-Wlong-long" -# endif -extern "C" { +#ifdef __GNUC__ +#pragma GCC diagnostic ignored "-Wlong-long" +#endif +extern "C" +{ #endif -typedef struct crypto_sign_ed25519ph_state { + typedef struct crypto_sign_ed25519ph_state + { crypto_hash_sha512_state hs; -} crypto_sign_ed25519ph_state; + } crypto_sign_ed25519ph_state; -SODIUM_EXPORT -size_t crypto_sign_ed25519ph_statebytes(void); + SODIUM_EXPORT + size_t + crypto_sign_ed25519ph_statebytes(void); #define crypto_sign_ed25519_BYTES 64U -SODIUM_EXPORT -size_t crypto_sign_ed25519_bytes(void); + SODIUM_EXPORT + size_t + crypto_sign_ed25519_bytes(void); #define crypto_sign_ed25519_SEEDBYTES 32U -SODIUM_EXPORT -size_t crypto_sign_ed25519_seedbytes(void); + SODIUM_EXPORT + size_t + crypto_sign_ed25519_seedbytes(void); #define crypto_sign_ed25519_PUBLICKEYBYTES 32U -SODIUM_EXPORT -size_t crypto_sign_ed25519_publickeybytes(void); + SODIUM_EXPORT + size_t + crypto_sign_ed25519_publickeybytes(void); #define crypto_sign_ed25519_SECRETKEYBYTES (32U + 32U) -SODIUM_EXPORT -size_t crypto_sign_ed25519_secretkeybytes(void); - -#define crypto_sign_ed25519_MESSAGEBYTES_MAX (SODIUM_SIZE_MAX - crypto_sign_ed25519_BYTES) -SODIUM_EXPORT -size_t crypto_sign_ed25519_messagebytes_max(void); - -SODIUM_EXPORT -int crypto_sign_ed25519(unsigned char *sm, unsigned long long *smlen_p, - const unsigned char *m, unsigned long long mlen, - const unsigned char *sk); - -SODIUM_EXPORT -int crypto_sign_ed25519_open(unsigned char *m, unsigned long long *mlen_p, - const unsigned char *sm, unsigned long long smlen, - const unsigned char *pk) - __attribute__ ((warn_unused_result)); - -SODIUM_EXPORT -int crypto_sign_ed25519_detached(unsigned char *sig, - unsigned long long *siglen_p, - const unsigned char *m, - unsigned long long mlen, - const unsigned char *sk); - -SODIUM_EXPORT -int crypto_sign_ed25519_verify_detached(const unsigned char *sig, - const unsigned char *m, - unsigned long long mlen, - const unsigned char *pk) - __attribute__ ((warn_unused_result)); - -SODIUM_EXPORT -int crypto_sign_ed25519_keypair(unsigned char *pk, unsigned char *sk); - -SODIUM_EXPORT -int crypto_sign_ed25519_seed_keypair(unsigned char *pk, unsigned char *sk, - const unsigned char *seed); - -SODIUM_EXPORT -int crypto_sign_ed25519_pk_to_curve25519(unsigned char *curve25519_pk, - const unsigned char *ed25519_pk) - __attribute__ ((warn_unused_result)); - -SODIUM_EXPORT -int crypto_sign_ed25519_sk_to_curve25519(unsigned char *curve25519_sk, - const unsigned char *ed25519_sk); - -SODIUM_EXPORT -int crypto_sign_ed25519_sk_to_seed(unsigned char *seed, - const unsigned char *sk); - -SODIUM_EXPORT -int crypto_sign_ed25519_sk_to_pk(unsigned char *pk, const unsigned char *sk); - -SODIUM_EXPORT -int crypto_sign_ed25519ph_init(crypto_sign_ed25519ph_state *state); - -SODIUM_EXPORT -int crypto_sign_ed25519ph_update(crypto_sign_ed25519ph_state *state, - const unsigned char *m, - unsigned long long mlen); - -SODIUM_EXPORT -int crypto_sign_ed25519ph_final_create(crypto_sign_ed25519ph_state *state, - unsigned char *sig, - unsigned long long *siglen_p, - const unsigned char *sk); - -SODIUM_EXPORT -int crypto_sign_ed25519ph_final_verify(crypto_sign_ed25519ph_state *state, - unsigned char *sig, - const unsigned char *pk) - __attribute__ ((warn_unused_result)); + SODIUM_EXPORT + size_t + crypto_sign_ed25519_secretkeybytes(void); + +#define crypto_sign_ed25519_MESSAGEBYTES_MAX \ + (SODIUM_SIZE_MAX - crypto_sign_ed25519_BYTES) + SODIUM_EXPORT + size_t + crypto_sign_ed25519_messagebytes_max(void); + + SODIUM_EXPORT + int + crypto_sign_ed25519(unsigned char *sm, unsigned long long *smlen_p, + const unsigned char *m, unsigned long long mlen, + const unsigned char *sk); + + SODIUM_EXPORT + int + crypto_sign_ed25519_open(unsigned char *m, unsigned long long *mlen_p, + const unsigned char *sm, unsigned long long smlen, + const unsigned char *pk) + __attribute__((warn_unused_result)); + + SODIUM_EXPORT + int + crypto_sign_ed25519_detached(unsigned char *sig, unsigned long long *siglen_p, + const unsigned char *m, unsigned long long mlen, + const unsigned char *sk); + + SODIUM_EXPORT + int + crypto_sign_ed25519_verify_detached(const unsigned char *sig, + const unsigned char *m, + unsigned long long mlen, + const unsigned char *pk) + __attribute__((warn_unused_result)); + + SODIUM_EXPORT + int + crypto_sign_ed25519_keypair(unsigned char *pk, unsigned char *sk); + + SODIUM_EXPORT + int + crypto_sign_ed25519_seed_keypair(unsigned char *pk, unsigned char *sk, + const unsigned char *seed); + + SODIUM_EXPORT + int + crypto_sign_ed25519_pk_to_curve25519(unsigned char *curve25519_pk, + const unsigned char *ed25519_pk) + __attribute__((warn_unused_result)); + + SODIUM_EXPORT + int + crypto_sign_ed25519_sk_to_curve25519(unsigned char *curve25519_sk, + const unsigned char *ed25519_sk); + + SODIUM_EXPORT + int + crypto_sign_ed25519_sk_to_seed(unsigned char *seed, const unsigned char *sk); + + SODIUM_EXPORT + int + crypto_sign_ed25519_sk_to_pk(unsigned char *pk, const unsigned char *sk); + + SODIUM_EXPORT + int + crypto_sign_ed25519ph_init(crypto_sign_ed25519ph_state *state); + + SODIUM_EXPORT + int + crypto_sign_ed25519ph_update(crypto_sign_ed25519ph_state *state, + const unsigned char *m, unsigned long long mlen); + + SODIUM_EXPORT + int + crypto_sign_ed25519ph_final_create(crypto_sign_ed25519ph_state *state, + unsigned char *sig, + unsigned long long *siglen_p, + const unsigned char *sk); + + SODIUM_EXPORT + int + crypto_sign_ed25519ph_final_verify(crypto_sign_ed25519ph_state *state, + unsigned char *sig, + const unsigned char *pk) + __attribute__((warn_unused_result)); #ifdef __cplusplus } diff --git a/crypto/include/sodium/crypto_sign_edwards25519sha512batch.h b/crypto/include/sodium/crypto_sign_edwards25519sha512batch.h index 4bb919247..1a95af3d4 100644 --- a/crypto/include/sodium/crypto_sign_edwards25519sha512batch.h +++ b/crypto/include/sodium/crypto_sign_edwards25519sha512batch.h @@ -16,37 +16,42 @@ #include "export.h" #ifdef __cplusplus -# ifdef __GNUC__ -# pragma GCC diagnostic ignored "-Wlong-long" -# endif -extern "C" { +#ifdef __GNUC__ +#pragma GCC diagnostic ignored "-Wlong-long" +#endif +extern "C" +{ #endif #define crypto_sign_edwards25519sha512batch_BYTES 64U #define crypto_sign_edwards25519sha512batch_PUBLICKEYBYTES 32U #define crypto_sign_edwards25519sha512batch_SECRETKEYBYTES (32U + 32U) -#define crypto_sign_edwards25519sha512batch_MESSAGEBYTES_MAX (SODIUM_SIZE_MAX - crypto_sign_edwards25519sha512batch_BYTES) - -SODIUM_EXPORT -int crypto_sign_edwards25519sha512batch(unsigned char *sm, - unsigned long long *smlen_p, - const unsigned char *m, - unsigned long long mlen, - const unsigned char *sk) - __attribute__ ((deprecated)); - -SODIUM_EXPORT -int crypto_sign_edwards25519sha512batch_open(unsigned char *m, - unsigned long long *mlen_p, - const unsigned char *sm, - unsigned long long smlen, - const unsigned char *pk) - __attribute__ ((deprecated)); - -SODIUM_EXPORT -int crypto_sign_edwards25519sha512batch_keypair(unsigned char *pk, - unsigned char *sk) - __attribute__ ((deprecated)); +#define crypto_sign_edwards25519sha512batch_MESSAGEBYTES_MAX \ + (SODIUM_SIZE_MAX - crypto_sign_edwards25519sha512batch_BYTES) + + SODIUM_EXPORT + int + crypto_sign_edwards25519sha512batch(unsigned char *sm, + unsigned long long *smlen_p, + const unsigned char *m, + unsigned long long mlen, + const unsigned char *sk) + __attribute__((deprecated)); + + SODIUM_EXPORT + int + crypto_sign_edwards25519sha512batch_open(unsigned char *m, + unsigned long long *mlen_p, + const unsigned char *sm, + unsigned long long smlen, + const unsigned char *pk) + __attribute__((deprecated)); + + SODIUM_EXPORT + int + crypto_sign_edwards25519sha512batch_keypair(unsigned char *pk, + unsigned char *sk) + __attribute__((deprecated)); #ifdef __cplusplus } diff --git a/crypto/include/sodium/crypto_stream_chacha20.h b/crypto/include/sodium/crypto_stream_chacha20.h index d3e2b234e..845b81b8c 100644 --- a/crypto/include/sodium/crypto_stream_chacha20.h +++ b/crypto/include/sodium/crypto_stream_chacha20.h @@ -14,82 +14,103 @@ #include "export.h" #ifdef __cplusplus -# ifdef __GNUC__ -# pragma GCC diagnostic ignored "-Wlong-long" -# endif -extern "C" { +#ifdef __GNUC__ +#pragma GCC diagnostic ignored "-Wlong-long" +#endif +extern "C" +{ #endif #define crypto_stream_chacha20_KEYBYTES 32U -SODIUM_EXPORT -size_t crypto_stream_chacha20_keybytes(void); + SODIUM_EXPORT + size_t + crypto_stream_chacha20_keybytes(void); #define crypto_stream_chacha20_NONCEBYTES 8U -SODIUM_EXPORT -size_t crypto_stream_chacha20_noncebytes(void); + SODIUM_EXPORT + size_t + crypto_stream_chacha20_noncebytes(void); #define crypto_stream_chacha20_MESSAGEBYTES_MAX SODIUM_SIZE_MAX -SODIUM_EXPORT -size_t crypto_stream_chacha20_messagebytes_max(void); + SODIUM_EXPORT + size_t + crypto_stream_chacha20_messagebytes_max(void); -/* ChaCha20 with a 64-bit nonce and a 64-bit counter, as originally designed */ + /* ChaCha20 with a 64-bit nonce and a 64-bit counter, as originally designed + */ -SODIUM_EXPORT -int crypto_stream_chacha20(unsigned char *c, unsigned long long clen, - const unsigned char *n, const unsigned char *k); + SODIUM_EXPORT + int + crypto_stream_chacha20(unsigned char *c, unsigned long long clen, + const unsigned char *n, const unsigned char *k); -SODIUM_EXPORT -int crypto_stream_chacha20_xor(unsigned char *c, const unsigned char *m, - unsigned long long mlen, const unsigned char *n, - const unsigned char *k); + SODIUM_EXPORT + int + crypto_stream_chacha20_xor(unsigned char *c, const unsigned char *m, + unsigned long long mlen, const unsigned char *n, + const unsigned char *k); -SODIUM_EXPORT -int crypto_stream_chacha20_xor_ic(unsigned char *c, const unsigned char *m, - unsigned long long mlen, - const unsigned char *n, uint64_t ic, - const unsigned char *k); + SODIUM_EXPORT + int + crypto_stream_chacha20_xor_ic(unsigned char *c, const unsigned char *m, + unsigned long long mlen, const unsigned char *n, + uint64_t ic, const unsigned char *k); -SODIUM_EXPORT -void crypto_stream_chacha20_keygen(unsigned char k[crypto_stream_chacha20_KEYBYTES]); + SODIUM_EXPORT + void + crypto_stream_chacha20_keygen( + unsigned char k[crypto_stream_chacha20_KEYBYTES]); -/* ChaCha20 with a 96-bit nonce and a 32-bit counter (IETF) */ + /* ChaCha20 with a 96-bit nonce and a 32-bit counter (IETF) */ #define crypto_stream_chacha20_ietf_KEYBYTES 32U -SODIUM_EXPORT -size_t crypto_stream_chacha20_ietf_keybytes(void); + SODIUM_EXPORT + size_t + crypto_stream_chacha20_ietf_keybytes(void); #define crypto_stream_chacha20_ietf_NONCEBYTES 12U -SODIUM_EXPORT -size_t crypto_stream_chacha20_ietf_noncebytes(void); + SODIUM_EXPORT + size_t + crypto_stream_chacha20_ietf_noncebytes(void); #define crypto_stream_chacha20_ietf_MESSAGEBYTES_MAX \ - SODIUM_MIN(SODIUM_SIZE_MAX, 64ULL * (1ULL << 32)) -SODIUM_EXPORT -size_t crypto_stream_chacha20_ietf_messagebytes_max(void); - -SODIUM_EXPORT -int crypto_stream_chacha20_ietf(unsigned char *c, unsigned long long clen, - const unsigned char *n, const unsigned char *k); - -SODIUM_EXPORT -int crypto_stream_chacha20_ietf_xor(unsigned char *c, const unsigned char *m, - unsigned long long mlen, const unsigned char *n, - const unsigned char *k); - -SODIUM_EXPORT -int crypto_stream_chacha20_ietf_xor_ic(unsigned char *c, const unsigned char *m, - unsigned long long mlen, - const unsigned char *n, uint32_t ic, - const unsigned char *k); - -SODIUM_EXPORT -void crypto_stream_chacha20_ietf_keygen(unsigned char k[crypto_stream_chacha20_ietf_KEYBYTES]); - -/* Aliases */ + SODIUM_MIN(SODIUM_SIZE_MAX, 64ULL * (1ULL << 32)) + SODIUM_EXPORT + size_t + crypto_stream_chacha20_ietf_messagebytes_max(void); + + SODIUM_EXPORT + int + crypto_stream_chacha20_ietf(unsigned char *c, unsigned long long clen, + const unsigned char *n, const unsigned char *k); + + SODIUM_EXPORT + int + crypto_stream_chacha20_ietf_xor(unsigned char *c, const unsigned char *m, + unsigned long long mlen, + const unsigned char *n, + const unsigned char *k); -#define crypto_stream_chacha20_IETF_KEYBYTES crypto_stream_chacha20_ietf_KEYBYTES -#define crypto_stream_chacha20_IETF_NONCEBYTES crypto_stream_chacha20_ietf_NONCEBYTES -#define crypto_stream_chacha20_IETF_MESSAGEBYTES_MAX crypto_stream_chacha20_ietf_MESSAGEBYTES_MAX + SODIUM_EXPORT + int + crypto_stream_chacha20_ietf_xor_ic(unsigned char *c, const unsigned char *m, + unsigned long long mlen, + const unsigned char *n, uint32_t ic, + const unsigned char *k); + + SODIUM_EXPORT + void + crypto_stream_chacha20_ietf_keygen( + unsigned char k[crypto_stream_chacha20_ietf_KEYBYTES]); + + /* Aliases */ + +#define crypto_stream_chacha20_IETF_KEYBYTES \ + crypto_stream_chacha20_ietf_KEYBYTES +#define crypto_stream_chacha20_IETF_NONCEBYTES \ + crypto_stream_chacha20_ietf_NONCEBYTES +#define crypto_stream_chacha20_IETF_MESSAGEBYTES_MAX \ + crypto_stream_chacha20_ietf_MESSAGEBYTES_MAX #ifdef __cplusplus } diff --git a/crypto/include/sodium/crypto_stream_salsa20.h b/crypto/include/sodium/crypto_stream_salsa20.h index 0c7688c73..d9f862530 100644 --- a/crypto/include/sodium/crypto_stream_salsa20.h +++ b/crypto/include/sodium/crypto_stream_salsa20.h @@ -14,41 +14,48 @@ #include "export.h" #ifdef __cplusplus -# ifdef __GNUC__ -# pragma GCC diagnostic ignored "-Wlong-long" -# endif -extern "C" { +#ifdef __GNUC__ +#pragma GCC diagnostic ignored "-Wlong-long" +#endif +extern "C" +{ #endif #define crypto_stream_salsa20_KEYBYTES 32U -SODIUM_EXPORT -size_t crypto_stream_salsa20_keybytes(void); + SODIUM_EXPORT + size_t + crypto_stream_salsa20_keybytes(void); #define crypto_stream_salsa20_NONCEBYTES 8U -SODIUM_EXPORT -size_t crypto_stream_salsa20_noncebytes(void); + SODIUM_EXPORT + size_t + crypto_stream_salsa20_noncebytes(void); #define crypto_stream_salsa20_MESSAGEBYTES_MAX SODIUM_SIZE_MAX -SODIUM_EXPORT -size_t crypto_stream_salsa20_messagebytes_max(void); - -SODIUM_EXPORT -int crypto_stream_salsa20(unsigned char *c, unsigned long long clen, - const unsigned char *n, const unsigned char *k); - -SODIUM_EXPORT -int crypto_stream_salsa20_xor(unsigned char *c, const unsigned char *m, - unsigned long long mlen, const unsigned char *n, - const unsigned char *k); - -SODIUM_EXPORT -int crypto_stream_salsa20_xor_ic(unsigned char *c, const unsigned char *m, - unsigned long long mlen, - const unsigned char *n, uint64_t ic, - const unsigned char *k); - -SODIUM_EXPORT -void crypto_stream_salsa20_keygen(unsigned char k[crypto_stream_salsa20_KEYBYTES]); + SODIUM_EXPORT + size_t + crypto_stream_salsa20_messagebytes_max(void); + + SODIUM_EXPORT + int + crypto_stream_salsa20(unsigned char *c, unsigned long long clen, + const unsigned char *n, const unsigned char *k); + + SODIUM_EXPORT + int + crypto_stream_salsa20_xor(unsigned char *c, const unsigned char *m, + unsigned long long mlen, const unsigned char *n, + const unsigned char *k); + + SODIUM_EXPORT + int + crypto_stream_salsa20_xor_ic(unsigned char *c, const unsigned char *m, + unsigned long long mlen, const unsigned char *n, + uint64_t ic, const unsigned char *k); + + SODIUM_EXPORT + void + crypto_stream_salsa20_keygen(unsigned char k[crypto_stream_salsa20_KEYBYTES]); #ifdef __cplusplus } diff --git a/crypto/include/sodium/crypto_stream_xsalsa20.h b/crypto/include/sodium/crypto_stream_xsalsa20.h index cb4c44a8b..d4be372da 100644 --- a/crypto/include/sodium/crypto_stream_xsalsa20.h +++ b/crypto/include/sodium/crypto_stream_xsalsa20.h @@ -14,41 +14,49 @@ #include "export.h" #ifdef __cplusplus -# ifdef __GNUC__ -# pragma GCC diagnostic ignored "-Wlong-long" -# endif -extern "C" { +#ifdef __GNUC__ +#pragma GCC diagnostic ignored "-Wlong-long" +#endif +extern "C" +{ #endif #define crypto_stream_xsalsa20_KEYBYTES 32U -SODIUM_EXPORT -size_t crypto_stream_xsalsa20_keybytes(void); + SODIUM_EXPORT + size_t + crypto_stream_xsalsa20_keybytes(void); #define crypto_stream_xsalsa20_NONCEBYTES 24U -SODIUM_EXPORT -size_t crypto_stream_xsalsa20_noncebytes(void); + SODIUM_EXPORT + size_t + crypto_stream_xsalsa20_noncebytes(void); #define crypto_stream_xsalsa20_MESSAGEBYTES_MAX SODIUM_SIZE_MAX -SODIUM_EXPORT -size_t crypto_stream_xsalsa20_messagebytes_max(void); - -SODIUM_EXPORT -int crypto_stream_xsalsa20(unsigned char *c, unsigned long long clen, - const unsigned char *n, const unsigned char *k); - -SODIUM_EXPORT -int crypto_stream_xsalsa20_xor(unsigned char *c, const unsigned char *m, - unsigned long long mlen, const unsigned char *n, - const unsigned char *k); - -SODIUM_EXPORT -int crypto_stream_xsalsa20_xor_ic(unsigned char *c, const unsigned char *m, - unsigned long long mlen, - const unsigned char *n, uint64_t ic, - const unsigned char *k); - -SODIUM_EXPORT -void crypto_stream_xsalsa20_keygen(unsigned char k[crypto_stream_xsalsa20_KEYBYTES]); + SODIUM_EXPORT + size_t + crypto_stream_xsalsa20_messagebytes_max(void); + + SODIUM_EXPORT + int + crypto_stream_xsalsa20(unsigned char *c, unsigned long long clen, + const unsigned char *n, const unsigned char *k); + + SODIUM_EXPORT + int + crypto_stream_xsalsa20_xor(unsigned char *c, const unsigned char *m, + unsigned long long mlen, const unsigned char *n, + const unsigned char *k); + + SODIUM_EXPORT + int + crypto_stream_xsalsa20_xor_ic(unsigned char *c, const unsigned char *m, + unsigned long long mlen, const unsigned char *n, + uint64_t ic, const unsigned char *k); + + SODIUM_EXPORT + void + crypto_stream_xsalsa20_keygen( + unsigned char k[crypto_stream_xsalsa20_KEYBYTES]); #ifdef __cplusplus } diff --git a/crypto/include/sodium/export.h b/crypto/include/sodium/export.h index a0074fc9c..7cbc16360 100644 --- a/crypto/include/sodium/export.h +++ b/crypto/include/sodium/export.h @@ -7,48 +7,48 @@ #include #if !defined(__clang__) && !defined(__GNUC__) -# ifdef __attribute__ -# undef __attribute__ -# endif -# define __attribute__(a) +#ifdef __attribute__ +#undef __attribute__ +#endif +#define __attribute__(a) #endif #ifdef SODIUM_STATIC -# define SODIUM_EXPORT -# define SODIUM_EXPORT_WEAK +#define SODIUM_EXPORT +#define SODIUM_EXPORT_WEAK +#else +#if defined(_MSC_VER) +#ifdef SODIUM_DLL_EXPORT +#define SODIUM_EXPORT __declspec(dllexport) +#else +#define SODIUM_EXPORT __declspec(dllimport) +#endif +#else +#if defined(__SUNPRO_C) +#ifndef __GNU_C__ +#define SODIUM_EXPORT __attribute__(visibility(__global)) +#else +#define SODIUM_EXPORT __attribute__ __global +#endif +#elif defined(_MSG_VER) +#define SODIUM_EXPORT extern __declspec(dllexport) #else -# if defined(_MSC_VER) -# ifdef SODIUM_DLL_EXPORT -# define SODIUM_EXPORT __declspec(dllexport) -# else -# define SODIUM_EXPORT __declspec(dllimport) -# endif -# else -# if defined(__SUNPRO_C) -# ifndef __GNU_C__ -# define SODIUM_EXPORT __attribute__ (visibility(__global)) -# else -# define SODIUM_EXPORT __attribute__ __global -# endif -# elif defined(_MSG_VER) -# define SODIUM_EXPORT extern __declspec(dllexport) -# else -# define SODIUM_EXPORT __attribute__ ((visibility ("default"))) -# endif -# endif -# if defined(__ELF__) && !defined(SODIUM_DISABLE_WEAK_FUNCTIONS) -# define SODIUM_EXPORT_WEAK SODIUM_EXPORT __attribute__((weak)) -# else -# define SODIUM_EXPORT_WEAK SODIUM_EXPORT -# endif +#define SODIUM_EXPORT __attribute__((visibility("default"))) +#endif +#endif +#if defined(__ELF__) && !defined(SODIUM_DISABLE_WEAK_FUNCTIONS) +#define SODIUM_EXPORT_WEAK SODIUM_EXPORT __attribute__((weak)) +#else +#define SODIUM_EXPORT_WEAK SODIUM_EXPORT +#endif #endif #ifndef CRYPTO_ALIGN -# if defined(__INTEL_COMPILER) || defined(_MSC_VER) -# define CRYPTO_ALIGN(x) __declspec(align(x)) -# else -# define CRYPTO_ALIGN(x) __attribute__ ((aligned(x))) -# endif +#if defined(__INTEL_COMPILER) || defined(_MSC_VER) +#define CRYPTO_ALIGN(x) __declspec(align(x)) +#else +#define CRYPTO_ALIGN(x) __attribute__((aligned(x))) +#endif #endif #define SODIUM_MIN(A, B) ((A) < (B) ? (A) : (B)) diff --git a/crypto/include/sodium/private/common.h b/crypto/include/sodium/private/common.h index 632fc8a73..d971612df 100644 --- a/crypto/include/sodium/private/common.h +++ b/crypto/include/sodium/private/common.h @@ -5,42 +5,42 @@ #include #include -#define COMPILER_ASSERT(X) (void) sizeof(char[(X) ? 1 : -1]) +#define COMPILER_ASSERT(X) (void)sizeof(char[(X) ? 1 : -1]) #ifdef HAVE_TI_MODE -# if defined(__SIZEOF_INT128__) +#if defined(__SIZEOF_INT128__) typedef unsigned __int128 uint128_t; -# else +#else typedef unsigned uint128_t __attribute__((mode(TI))); -# endif +#endif #endif #define ROTL32(X, B) rotl32((X), (B)) static inline uint32_t rotl32(const uint32_t x, const int b) { - return (x << b) | (x >> (32 - b)); + return (x << b) | (x >> (32 - b)); } #define ROTL64(X, B) rotl64((X), (B)) static inline uint64_t rotl64(const uint64_t x, const int b) { - return (x << b) | (x >> (64 - b)); + return (x << b) | (x >> (64 - b)); } #define ROTR32(X, B) rotr32((X), (B)) static inline uint32_t rotr32(const uint32_t x, const int b) { - return (x >> b) | (x << (32 - b)); + return (x >> b) | (x << (32 - b)); } #define ROTR64(X, B) rotr64((X), (B)) static inline uint64_t rotr64(const uint64_t x, const int b) { - return (x >> b) | (x << (64 - b)); + return (x >> b) | (x << (64 - b)); } #define LOAD64_LE(SRC) load64_le(SRC) @@ -48,19 +48,19 @@ static inline uint64_t load64_le(const uint8_t src[8]) { #ifdef NATIVE_LITTLE_ENDIAN - uint64_t w; - memcpy(&w, src, sizeof w); - return w; + uint64_t w; + memcpy(&w, src, sizeof w); + return w; #else - uint64_t w = (uint64_t) src[0]; - w |= (uint64_t) src[1] << 8; - w |= (uint64_t) src[2] << 16; - w |= (uint64_t) src[3] << 24; - w |= (uint64_t) src[4] << 32; - w |= (uint64_t) src[5] << 40; - w |= (uint64_t) src[6] << 48; - w |= (uint64_t) src[7] << 56; - return w; + uint64_t w = (uint64_t)src[0]; + w |= (uint64_t)src[1] << 8; + w |= (uint64_t)src[2] << 16; + w |= (uint64_t)src[3] << 24; + w |= (uint64_t)src[4] << 32; + w |= (uint64_t)src[5] << 40; + w |= (uint64_t)src[6] << 48; + w |= (uint64_t)src[7] << 56; + return w; #endif } @@ -69,16 +69,23 @@ static inline void store64_le(uint8_t dst[8], uint64_t w) { #ifdef NATIVE_LITTLE_ENDIAN - memcpy(dst, &w, sizeof w); + memcpy(dst, &w, sizeof w); #else - dst[0] = (uint8_t) w; w >>= 8; - dst[1] = (uint8_t) w; w >>= 8; - dst[2] = (uint8_t) w; w >>= 8; - dst[3] = (uint8_t) w; w >>= 8; - dst[4] = (uint8_t) w; w >>= 8; - dst[5] = (uint8_t) w; w >>= 8; - dst[6] = (uint8_t) w; w >>= 8; - dst[7] = (uint8_t) w; + dst[0] = (uint8_t)w; + w >>= 8; + dst[1] = (uint8_t)w; + w >>= 8; + dst[2] = (uint8_t)w; + w >>= 8; + dst[3] = (uint8_t)w; + w >>= 8; + dst[4] = (uint8_t)w; + w >>= 8; + dst[5] = (uint8_t)w; + w >>= 8; + dst[6] = (uint8_t)w; + w >>= 8; + dst[7] = (uint8_t)w; #endif } @@ -87,15 +94,15 @@ static inline uint32_t load32_le(const uint8_t src[4]) { #ifdef NATIVE_LITTLE_ENDIAN - uint32_t w; - memcpy(&w, src, sizeof w); - return w; + uint32_t w; + memcpy(&w, src, sizeof w); + return w; #else - uint32_t w = (uint32_t) src[0]; - w |= (uint32_t) src[1] << 8; - w |= (uint32_t) src[2] << 16; - w |= (uint32_t) src[3] << 24; - return w; + uint32_t w = (uint32_t)src[0]; + w |= (uint32_t)src[1] << 8; + w |= (uint32_t)src[2] << 16; + w |= (uint32_t)src[3] << 24; + return w; #endif } @@ -104,12 +111,15 @@ static inline void store32_le(uint8_t dst[4], uint32_t w) { #ifdef NATIVE_LITTLE_ENDIAN - memcpy(dst, &w, sizeof w); + memcpy(dst, &w, sizeof w); #else - dst[0] = (uint8_t) w; w >>= 8; - dst[1] = (uint8_t) w; w >>= 8; - dst[2] = (uint8_t) w; w >>= 8; - dst[3] = (uint8_t) w; + dst[0] = (uint8_t)w; + w >>= 8; + dst[1] = (uint8_t)w; + w >>= 8; + dst[2] = (uint8_t)w; + w >>= 8; + dst[3] = (uint8_t)w; #endif } @@ -120,19 +130,19 @@ static inline uint64_t load64_be(const uint8_t src[8]) { #ifdef NATIVE_BIG_ENDIAN - uint64_t w; - memcpy(&w, src, sizeof w); - return w; + uint64_t w; + memcpy(&w, src, sizeof w); + return w; #else - uint64_t w = (uint64_t) src[7]; - w |= (uint64_t) src[6] << 8; - w |= (uint64_t) src[5] << 16; - w |= (uint64_t) src[4] << 24; - w |= (uint64_t) src[3] << 32; - w |= (uint64_t) src[2] << 40; - w |= (uint64_t) src[1] << 48; - w |= (uint64_t) src[0] << 56; - return w; + uint64_t w = (uint64_t)src[7]; + w |= (uint64_t)src[6] << 8; + w |= (uint64_t)src[5] << 16; + w |= (uint64_t)src[4] << 24; + w |= (uint64_t)src[3] << 32; + w |= (uint64_t)src[2] << 40; + w |= (uint64_t)src[1] << 48; + w |= (uint64_t)src[0] << 56; + return w; #endif } @@ -141,16 +151,23 @@ static inline void store64_be(uint8_t dst[8], uint64_t w) { #ifdef NATIVE_BIG_ENDIAN - memcpy(dst, &w, sizeof w); + memcpy(dst, &w, sizeof w); #else - dst[7] = (uint8_t) w; w >>= 8; - dst[6] = (uint8_t) w; w >>= 8; - dst[5] = (uint8_t) w; w >>= 8; - dst[4] = (uint8_t) w; w >>= 8; - dst[3] = (uint8_t) w; w >>= 8; - dst[2] = (uint8_t) w; w >>= 8; - dst[1] = (uint8_t) w; w >>= 8; - dst[0] = (uint8_t) w; + dst[7] = (uint8_t)w; + w >>= 8; + dst[6] = (uint8_t)w; + w >>= 8; + dst[5] = (uint8_t)w; + w >>= 8; + dst[4] = (uint8_t)w; + w >>= 8; + dst[3] = (uint8_t)w; + w >>= 8; + dst[2] = (uint8_t)w; + w >>= 8; + dst[1] = (uint8_t)w; + w >>= 8; + dst[0] = (uint8_t)w; #endif } @@ -159,15 +176,15 @@ static inline uint32_t load32_be(const uint8_t src[4]) { #ifdef NATIVE_BIG_ENDIAN - uint32_t w; - memcpy(&w, src, sizeof w); - return w; + uint32_t w; + memcpy(&w, src, sizeof w); + return w; #else - uint32_t w = (uint32_t) src[3]; - w |= (uint32_t) src[2] << 8; - w |= (uint32_t) src[1] << 16; - w |= (uint32_t) src[0] << 24; - return w; + uint32_t w = (uint32_t)src[3]; + w |= (uint32_t)src[2] << 8; + w |= (uint32_t)src[1] << 16; + w |= (uint32_t)src[0] << 24; + return w; #endif } @@ -176,12 +193,15 @@ static inline void store32_be(uint8_t dst[4], uint32_t w) { #ifdef NATIVE_BIG_ENDIAN - memcpy(dst, &w, sizeof w); + memcpy(dst, &w, sizeof w); #else - dst[3] = (uint8_t) w; w >>= 8; - dst[2] = (uint8_t) w; w >>= 8; - dst[1] = (uint8_t) w; w >>= 8; - dst[0] = (uint8_t) w; + dst[3] = (uint8_t)w; + w >>= 8; + dst[2] = (uint8_t)w; + w >>= 8; + dst[1] = (uint8_t)w; + w >>= 8; + dst[0] = (uint8_t)w; #endif } @@ -189,58 +209,61 @@ store32_be(uint8_t dst[4], uint32_t w) static inline void xor_buf(unsigned char *out, const unsigned char *in, size_t n) { - size_t i; + size_t i; - for (i = 0; i < n; i++) { - out[i] ^= in[i]; - } + for(i = 0; i < n; i++) + { + out[i] ^= in[i]; + } } #if !defined(__clang__) && !defined(__GNUC__) -# ifdef __attribute__ -# undef __attribute__ -# endif -# define __attribute__(a) +#ifdef __attribute__ +#undef __attribute__ +#endif +#define __attribute__(a) #endif #ifndef CRYPTO_ALIGN -# if defined(__INTEL_COMPILER) || defined(_MSC_VER) -# define CRYPTO_ALIGN(x) __declspec(align(x)) -# else -# define CRYPTO_ALIGN(x) __attribute__ ((aligned(x))) -# endif -#endif - -#if defined(_MSC_VER) && \ - (defined(_M_X64) || defined(_M_AMD64) || defined(_M_IX86)) - -# include - -# define HAVE_INTRIN_H 1 -# define HAVE_MMINTRIN_H 1 -# define HAVE_EMMINTRIN_H 1 -# define HAVE_PMMINTRIN_H 1 -# define HAVE_TMMINTRIN_H 1 -# define HAVE_SMMINTRIN_H 1 -# define HAVE_AVXINTRIN_H 1 -# if _MSC_VER >= 1600 -# define HAVE_WMMINTRIN_H 1 -# endif -# if _MSC_VER >= 1700 && defined(_M_X64) -# define HAVE_AVX2INTRIN_H 1 -# endif +#if defined(__INTEL_COMPILER) || defined(_MSC_VER) +#define CRYPTO_ALIGN(x) __declspec(align(x)) +#else +#define CRYPTO_ALIGN(x) __attribute__((aligned(x))) +#endif +#endif + +#if defined(_MSC_VER) \ + && (defined(_M_X64) || defined(_M_AMD64) || defined(_M_IX86)) + +#include + +#define HAVE_INTRIN_H 1 +#define HAVE_MMINTRIN_H 1 +#define HAVE_EMMINTRIN_H 1 +#define HAVE_PMMINTRIN_H 1 +#define HAVE_TMMINTRIN_H 1 +#define HAVE_SMMINTRIN_H 1 +#define HAVE_AVXINTRIN_H 1 +#if _MSC_VER >= 1600 +#define HAVE_WMMINTRIN_H 1 +#endif +#if _MSC_VER >= 1700 && defined(_M_X64) +#define HAVE_AVX2INTRIN_H 1 +#endif #elif defined(HAVE_INTRIN_H) -# include +#include #endif #ifdef HAVE_LIBCTGRIND -extern void ct_poison (const void *, size_t); -extern void ct_unpoison(const void *, size_t); -# define POISON(X, L) ct_poison((X), (L)) -# define UNPOISON(X, L) ct_unpoison((X), (L)) +extern void +ct_poison(const void *, size_t); +extern void +ct_unpoison(const void *, size_t); +#define POISON(X, L) ct_poison((X), (L)) +#define UNPOISON(X, L) ct_unpoison((X), (L)) #else -# define POISON(X, L) (void) 0 -# define UNPOISON(X, L) (void) 0 +#define POISON(X, L) (void)0 +#define UNPOISON(X, L) (void)0 #endif #endif diff --git a/crypto/include/sodium/private/ed25519_ref10.h b/crypto/include/sodium/private/ed25519_ref10.h index 42fcd9813..e28922744 100644 --- a/crypto/include/sodium/private/ed25519_ref10.h +++ b/crypto/include/sodium/private/ed25519_ref10.h @@ -15,17 +15,19 @@ typedef uint64_t fe25519[5]; typedef int32_t fe25519[10]; #endif -void fe25519_invert(fe25519 out, const fe25519 z); -void fe25519_frombytes(fe25519 h, const unsigned char *s); -void fe25519_tobytes(unsigned char *s, const fe25519 h); +void +fe25519_invert(fe25519 out, const fe25519 z); +void +fe25519_frombytes(fe25519 h, const unsigned char *s); +void +fe25519_tobytes(unsigned char *s, const fe25519 h); #ifdef HAVE_TI_MODE -# include "ed25519_ref10_fe_51.h" +#include "ed25519_ref10_fe_51.h" #else -# include "ed25519_ref10_fe_25_5.h" +#include "ed25519_ref10_fe_25_5.h" #endif - /* ge means group element. @@ -40,86 +42,109 @@ void fe25519_tobytes(unsigned char *s, const fe25519 h); ge25519_precomp (Duif): (y+x,y-x,2dxy) */ -typedef struct { - fe25519 X; - fe25519 Y; - fe25519 Z; +typedef struct +{ + fe25519 X; + fe25519 Y; + fe25519 Z; } ge25519_p2; -typedef struct { - fe25519 X; - fe25519 Y; - fe25519 Z; - fe25519 T; +typedef struct +{ + fe25519 X; + fe25519 Y; + fe25519 Z; + fe25519 T; } ge25519_p3; -typedef struct { - fe25519 X; - fe25519 Y; - fe25519 Z; - fe25519 T; +typedef struct +{ + fe25519 X; + fe25519 Y; + fe25519 Z; + fe25519 T; } ge25519_p1p1; -typedef struct { - fe25519 yplusx; - fe25519 yminusx; - fe25519 xy2d; +typedef struct +{ + fe25519 yplusx; + fe25519 yminusx; + fe25519 xy2d; } ge25519_precomp; -typedef struct { - fe25519 YplusX; - fe25519 YminusX; - fe25519 Z; - fe25519 T2d; +typedef struct +{ + fe25519 YplusX; + fe25519 YminusX; + fe25519 Z; + fe25519 T2d; } ge25519_cached; -void ge25519_tobytes(unsigned char *s, const ge25519_p2 *h); +void +ge25519_tobytes(unsigned char *s, const ge25519_p2 *h); -void ge25519_p3_tobytes(unsigned char *s, const ge25519_p3 *h); +void +ge25519_p3_tobytes(unsigned char *s, const ge25519_p3 *h); -int ge25519_frombytes(ge25519_p3 *h, const unsigned char *s); +int +ge25519_frombytes(ge25519_p3 *h, const unsigned char *s); -int ge25519_frombytes_negate_vartime(ge25519_p3 *h, const unsigned char *s); +int +ge25519_frombytes_negate_vartime(ge25519_p3 *h, const unsigned char *s); -void ge25519_p3_to_cached(ge25519_cached *r, const ge25519_p3 *p); +void +ge25519_p3_to_cached(ge25519_cached *r, const ge25519_p3 *p); -void ge25519_p1p1_to_p2(ge25519_p2 *r, const ge25519_p1p1 *p); +void +ge25519_p1p1_to_p2(ge25519_p2 *r, const ge25519_p1p1 *p); -void ge25519_p1p1_to_p3(ge25519_p3 *r, const ge25519_p1p1 *p); +void +ge25519_p1p1_to_p3(ge25519_p3 *r, const ge25519_p1p1 *p); -void ge25519_add(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_cached *q); +void +ge25519_add(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_cached *q); -void ge25519_sub(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_cached *q); +void +ge25519_sub(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_cached *q); -void ge25519_scalarmult_base(ge25519_p3 *h, const unsigned char *a); +void +ge25519_scalarmult_base(ge25519_p3 *h, const unsigned char *a); -void ge25519_double_scalarmult_vartime(ge25519_p2 *r, const unsigned char *a, - const ge25519_p3 *A, - const unsigned char *b); +void +ge25519_double_scalarmult_vartime(ge25519_p2 *r, const unsigned char *a, + const ge25519_p3 *A, const unsigned char *b); -void ge25519_scalarmult(ge25519_p3 *h, const unsigned char *a, - const ge25519_p3 *p); +void +ge25519_scalarmult(ge25519_p3 *h, const unsigned char *a, const ge25519_p3 *p); -int ge25519_is_canonical(const unsigned char *s); +int +ge25519_is_canonical(const unsigned char *s); -int ge25519_is_on_curve(const ge25519_p3 *p); +int +ge25519_is_on_curve(const ge25519_p3 *p); -int ge25519_is_on_main_subgroup(const ge25519_p3 *p); +int +ge25519_is_on_main_subgroup(const ge25519_p3 *p); -int ge25519_has_small_order(const unsigned char s[32]); +int +ge25519_has_small_order(const unsigned char s[32]); -void ge25519_from_uniform(unsigned char s[32], const unsigned char r[32]); +void +ge25519_from_uniform(unsigned char s[32], const unsigned char r[32]); /* The set of scalars is \Z/l where l = 2^252 + 27742317777372353535851937790883648493. */ -void sc25519_reduce(unsigned char *s); +void +sc25519_reduce(unsigned char *s); -void sc25519_muladd(unsigned char *s, const unsigned char *a, - const unsigned char *b, const unsigned char *c); +void +sc25519_muladd(unsigned char *s, const unsigned char *a, const unsigned char *b, + const unsigned char *c); -int sc25519_is_canonical(const unsigned char *s); +int +sc25519_is_canonical(const unsigned char *s); #endif diff --git a/crypto/include/sodium/private/ed25519_ref10_fe_51.h b/crypto/include/sodium/private/ed25519_ref10_fe_51.h index 3a30f3014..804fcf924 100644 --- a/crypto/include/sodium/private/ed25519_ref10_fe_51.h +++ b/crypto/include/sodium/private/ed25519_ref10_fe_51.h @@ -10,7 +10,7 @@ static inline void fe25519_0(fe25519 h) { - memset(&h[0], 0, 5 * sizeof h[0]); + memset(&h[0], 0, 5 * sizeof h[0]); } /* @@ -20,8 +20,8 @@ fe25519_0(fe25519 h) static inline void fe25519_1(fe25519 h) { - h[0] = 1; - memset(&h[1], 0, 4 * sizeof h[0]); + h[0] = 1; + memset(&h[1], 0, 4 * sizeof h[0]); } /* @@ -32,17 +32,17 @@ fe25519_1(fe25519 h) static inline void fe25519_add(fe25519 h, const fe25519 f, const fe25519 g) { - uint64_t h0 = f[0] + g[0]; - uint64_t h1 = f[1] + g[1]; - uint64_t h2 = f[2] + g[2]; - uint64_t h3 = f[3] + g[3]; - uint64_t h4 = f[4] + g[4]; - - h[0] = h0; - h[1] = h1; - h[2] = h2; - h[3] = h3; - h[4] = h4; + uint64_t h0 = f[0] + g[0]; + uint64_t h1 = f[1] + g[1]; + uint64_t h2 = f[2] + g[2]; + uint64_t h3 = f[3] + g[3]; + uint64_t h4 = f[4] + g[4]; + + h[0] = h0; + h[1] = h1; + h[2] = h2; + h[3] = h3; + h[4] = h4; } /* @@ -52,37 +52,37 @@ fe25519_add(fe25519 h, const fe25519 f, const fe25519 g) static void fe25519_sub(fe25519 h, const fe25519 f, const fe25519 g) { - const uint64_t mask = 0x7ffffffffffffULL; - uint64_t h0, h1, h2, h3, h4; - - h0 = g[0]; - h1 = g[1]; - h2 = g[2]; - h3 = g[3]; - h4 = g[4]; - - h1 += h0 >> 51; - h0 &= mask; - h2 += h1 >> 51; - h1 &= mask; - h3 += h2 >> 51; - h2 &= mask; - h4 += h3 >> 51; - h3 &= mask; - h0 += 19ULL * (h4 >> 51); - h4 &= mask; - - h0 = (f[0] + 0xfffffffffffdaULL) - h0; - h1 = (f[1] + 0xffffffffffffeULL) - h1; - h2 = (f[2] + 0xffffffffffffeULL) - h2; - h3 = (f[3] + 0xffffffffffffeULL) - h3; - h4 = (f[4] + 0xffffffffffffeULL) - h4; - - h[0] = h0; - h[1] = h1; - h[2] = h2; - h[3] = h3; - h[4] = h4; + const uint64_t mask = 0x7ffffffffffffULL; + uint64_t h0, h1, h2, h3, h4; + + h0 = g[0]; + h1 = g[1]; + h2 = g[2]; + h3 = g[3]; + h4 = g[4]; + + h1 += h0 >> 51; + h0 &= mask; + h2 += h1 >> 51; + h1 &= mask; + h3 += h2 >> 51; + h2 &= mask; + h4 += h3 >> 51; + h3 &= mask; + h0 += 19ULL * (h4 >> 51); + h4 &= mask; + + h0 = (f[0] + 0xfffffffffffdaULL) - h0; + h1 = (f[1] + 0xffffffffffffeULL) - h1; + h2 = (f[2] + 0xffffffffffffeULL) - h2; + h3 = (f[3] + 0xffffffffffffeULL) - h3; + h4 = (f[4] + 0xffffffffffffeULL) - h4; + + h[0] = h0; + h[1] = h1; + h[2] = h2; + h[3] = h3; + h[4] = h4; } /* @@ -92,10 +92,10 @@ fe25519_sub(fe25519 h, const fe25519 f, const fe25519 g) static inline void fe25519_neg(fe25519 h, const fe25519 f) { - fe25519 zero; + fe25519 zero; - fe25519_0(zero); - fe25519_sub(h, zero, f); + fe25519_0(zero); + fe25519_sub(h, zero, f); } /* @@ -108,31 +108,31 @@ fe25519_neg(fe25519 h, const fe25519 f) static void fe25519_cmov(fe25519 f, const fe25519 g, unsigned int b) { - const uint64_t mask = (uint64_t) (-(int64_t) b); - - uint64_t f0 = f[0]; - uint64_t f1 = f[1]; - uint64_t f2 = f[2]; - uint64_t f3 = f[3]; - uint64_t f4 = f[4]; - - uint64_t x0 = f0 ^ g[0]; - uint64_t x1 = f1 ^ g[1]; - uint64_t x2 = f2 ^ g[2]; - uint64_t x3 = f3 ^ g[3]; - uint64_t x4 = f4 ^ g[4]; - - x0 &= mask; - x1 &= mask; - x2 &= mask; - x3 &= mask; - x4 &= mask; - - f[0] = f0 ^ x0; - f[1] = f1 ^ x1; - f[2] = f2 ^ x2; - f[3] = f3 ^ x3; - f[4] = f4 ^ x4; + const uint64_t mask = (uint64_t)(-(int64_t)b); + + uint64_t f0 = f[0]; + uint64_t f1 = f[1]; + uint64_t f2 = f[2]; + uint64_t f3 = f[3]; + uint64_t f4 = f[4]; + + uint64_t x0 = f0 ^ g[0]; + uint64_t x1 = f1 ^ g[1]; + uint64_t x2 = f2 ^ g[2]; + uint64_t x3 = f3 ^ g[3]; + uint64_t x4 = f4 ^ g[4]; + + x0 &= mask; + x1 &= mask; + x2 &= mask; + x3 &= mask; + x4 &= mask; + + f[0] = f0 ^ x0; + f[1] = f1 ^ x1; + f[2] = f2 ^ x2; + f[3] = f3 ^ x3; + f[4] = f4 ^ x4; } /* @@ -145,43 +145,43 @@ Preconditions: b in {0,1}. static void fe25519_cswap(fe25519 f, fe25519 g, unsigned int b) { - const uint64_t mask = (uint64_t) (-(int64_t) b); - - uint64_t f0 = f[0]; - uint64_t f1 = f[1]; - uint64_t f2 = f[2]; - uint64_t f3 = f[3]; - uint64_t f4 = f[4]; - - uint64_t g0 = g[0]; - uint64_t g1 = g[1]; - uint64_t g2 = g[2]; - uint64_t g3 = g[3]; - uint64_t g4 = g[4]; - - uint64_t x0 = f0 ^ g0; - uint64_t x1 = f1 ^ g1; - uint64_t x2 = f2 ^ g2; - uint64_t x3 = f3 ^ g3; - uint64_t x4 = f4 ^ g4; - - x0 &= mask; - x1 &= mask; - x2 &= mask; - x3 &= mask; - x4 &= mask; - - f[0] = f0 ^ x0; - f[1] = f1 ^ x1; - f[2] = f2 ^ x2; - f[3] = f3 ^ x3; - f[4] = f4 ^ x4; - - g[0] = g0 ^ x0; - g[1] = g1 ^ x1; - g[2] = g2 ^ x2; - g[3] = g3 ^ x3; - g[4] = g4 ^ x4; + const uint64_t mask = (uint64_t)(-(int64_t)b); + + uint64_t f0 = f[0]; + uint64_t f1 = f[1]; + uint64_t f2 = f[2]; + uint64_t f3 = f[3]; + uint64_t f4 = f[4]; + + uint64_t g0 = g[0]; + uint64_t g1 = g[1]; + uint64_t g2 = g[2]; + uint64_t g3 = g[3]; + uint64_t g4 = g[4]; + + uint64_t x0 = f0 ^ g0; + uint64_t x1 = f1 ^ g1; + uint64_t x2 = f2 ^ g2; + uint64_t x3 = f3 ^ g3; + uint64_t x4 = f4 ^ g4; + + x0 &= mask; + x1 &= mask; + x2 &= mask; + x3 &= mask; + x4 &= mask; + + f[0] = f0 ^ x0; + f[1] = f1 ^ x1; + f[2] = f2 ^ x2; + f[3] = f3 ^ x3; + f[4] = f4 ^ x4; + + g[0] = g0 ^ x0; + g[1] = g1 ^ x1; + g[2] = g2 ^ x2; + g[3] = g3 ^ x3; + g[4] = g4 ^ x4; } /* @@ -191,17 +191,17 @@ fe25519_cswap(fe25519 f, fe25519 g, unsigned int b) static inline void fe25519_copy(fe25519 h, const fe25519 f) { - uint64_t f0 = f[0]; - uint64_t f1 = f[1]; - uint64_t f2 = f[2]; - uint64_t f3 = f[3]; - uint64_t f4 = f[4]; - - h[0] = f0; - h[1] = f1; - h[2] = f2; - h[3] = f3; - h[4] = f4; + uint64_t f0 = f[0]; + uint64_t f1 = f[1]; + uint64_t f2 = f[2]; + uint64_t f3 = f[3]; + uint64_t f4 = f[4]; + + h[0] = f0; + h[1] = f1; + h[2] = f2; + h[3] = f3; + h[4] = f4; } /* @@ -212,11 +212,11 @@ fe25519_copy(fe25519 h, const fe25519 f) static inline int fe25519_isnegative(const fe25519 f) { - unsigned char s[32]; + unsigned char s[32]; - fe25519_tobytes(s, f); + fe25519_tobytes(s, f); - return s[0] & 1; + return s[0] & 1; } /* @@ -227,11 +227,11 @@ fe25519_isnegative(const fe25519 f) static inline int fe25519_iszero(const fe25519 f) { - unsigned char s[32]; + unsigned char s[32]; - fe25519_tobytes(s, f); + fe25519_tobytes(s, f); - return sodium_is_zero(s, 32); + return sodium_is_zero(s, 32); } /* @@ -242,87 +242,87 @@ fe25519_iszero(const fe25519 f) static void fe25519_mul(fe25519 h, const fe25519 f, const fe25519 g) { - const uint64_t mask = 0x7ffffffffffffULL; - uint128_t r0, r1, r2, r3, r4, carry; - uint64_t f0, f1, f2, f3, f4; - uint64_t f1_19, f2_19, f3_19, f4_19; - uint64_t g0, g1, g2, g3, g4; - uint64_t r00, r01, r02, r03, r04; - - f0 = f[0]; - f1 = f[1]; - f2 = f[2]; - f3 = f[3]; - f4 = f[4]; - - g0 = g[0]; - g1 = g[1]; - g2 = g[2]; - g3 = g[3]; - g4 = g[4]; - - f1_19 = 19ULL * f1; - f2_19 = 19ULL * f2; - f3_19 = 19ULL * f3; - f4_19 = 19ULL * f4; - - r0 = ((uint128_t) f0 ) * ((uint128_t) g0); - r0 += ((uint128_t) f1_19) * ((uint128_t) g4); - r0 += ((uint128_t) f2_19) * ((uint128_t) g3); - r0 += ((uint128_t) f3_19) * ((uint128_t) g2); - r0 += ((uint128_t) f4_19) * ((uint128_t) g1); - - r1 = ((uint128_t) f0 ) * ((uint128_t) g1); - r1 += ((uint128_t) f1 ) * ((uint128_t) g0); - r1 += ((uint128_t) f2_19) * ((uint128_t) g4); - r1 += ((uint128_t) f3_19) * ((uint128_t) g3); - r1 += ((uint128_t) f4_19) * ((uint128_t) g2); - - r2 = ((uint128_t) f0 ) * ((uint128_t) g2); - r2 += ((uint128_t) f1 ) * ((uint128_t) g1); - r2 += ((uint128_t) f2 ) * ((uint128_t) g0); - r2 += ((uint128_t) f3_19) * ((uint128_t) g4); - r2 += ((uint128_t) f4_19) * ((uint128_t) g3); - - r3 = ((uint128_t) f0 ) * ((uint128_t) g3); - r3 += ((uint128_t) f1 ) * ((uint128_t) g2); - r3 += ((uint128_t) f2 ) * ((uint128_t) g1); - r3 += ((uint128_t) f3 ) * ((uint128_t) g0); - r3 += ((uint128_t) f4_19) * ((uint128_t) g4); - - r4 = ((uint128_t) f0 ) * ((uint128_t) g4); - r4 += ((uint128_t) f1 ) * ((uint128_t) g3); - r4 += ((uint128_t) f2 ) * ((uint128_t) g2); - r4 += ((uint128_t) f3 ) * ((uint128_t) g1); - r4 += ((uint128_t) f4 ) * ((uint128_t) g0); - - r00 = ((uint64_t) r0) & mask; - carry = r0 >> 51; - r1 += carry; - r01 = ((uint64_t) r1) & mask; - carry = r1 >> 51; - r2 += carry; - r02 = ((uint64_t) r2) & mask; - carry = r2 >> 51; - r3 += carry; - r03 = ((uint64_t) r3) & mask; - carry = r3 >> 51; - r4 += carry; - r04 = ((uint64_t) r4) & mask; - carry = r4 >> 51; - r00 += 19ULL * (uint64_t) carry; - carry = r00 >> 51; - r00 &= mask; - r01 += (uint64_t) carry; - carry = r01 >> 51; - r01 &= mask; - r02 += (uint64_t) carry; - - h[0] = r00; - h[1] = r01; - h[2] = r02; - h[3] = r03; - h[4] = r04; + const uint64_t mask = 0x7ffffffffffffULL; + uint128_t r0, r1, r2, r3, r4, carry; + uint64_t f0, f1, f2, f3, f4; + uint64_t f1_19, f2_19, f3_19, f4_19; + uint64_t g0, g1, g2, g3, g4; + uint64_t r00, r01, r02, r03, r04; + + f0 = f[0]; + f1 = f[1]; + f2 = f[2]; + f3 = f[3]; + f4 = f[4]; + + g0 = g[0]; + g1 = g[1]; + g2 = g[2]; + g3 = g[3]; + g4 = g[4]; + + f1_19 = 19ULL * f1; + f2_19 = 19ULL * f2; + f3_19 = 19ULL * f3; + f4_19 = 19ULL * f4; + + r0 = ((uint128_t)f0) * ((uint128_t)g0); + r0 += ((uint128_t)f1_19) * ((uint128_t)g4); + r0 += ((uint128_t)f2_19) * ((uint128_t)g3); + r0 += ((uint128_t)f3_19) * ((uint128_t)g2); + r0 += ((uint128_t)f4_19) * ((uint128_t)g1); + + r1 = ((uint128_t)f0) * ((uint128_t)g1); + r1 += ((uint128_t)f1) * ((uint128_t)g0); + r1 += ((uint128_t)f2_19) * ((uint128_t)g4); + r1 += ((uint128_t)f3_19) * ((uint128_t)g3); + r1 += ((uint128_t)f4_19) * ((uint128_t)g2); + + r2 = ((uint128_t)f0) * ((uint128_t)g2); + r2 += ((uint128_t)f1) * ((uint128_t)g1); + r2 += ((uint128_t)f2) * ((uint128_t)g0); + r2 += ((uint128_t)f3_19) * ((uint128_t)g4); + r2 += ((uint128_t)f4_19) * ((uint128_t)g3); + + r3 = ((uint128_t)f0) * ((uint128_t)g3); + r3 += ((uint128_t)f1) * ((uint128_t)g2); + r3 += ((uint128_t)f2) * ((uint128_t)g1); + r3 += ((uint128_t)f3) * ((uint128_t)g0); + r3 += ((uint128_t)f4_19) * ((uint128_t)g4); + + r4 = ((uint128_t)f0) * ((uint128_t)g4); + r4 += ((uint128_t)f1) * ((uint128_t)g3); + r4 += ((uint128_t)f2) * ((uint128_t)g2); + r4 += ((uint128_t)f3) * ((uint128_t)g1); + r4 += ((uint128_t)f4) * ((uint128_t)g0); + + r00 = ((uint64_t)r0) & mask; + carry = r0 >> 51; + r1 += carry; + r01 = ((uint64_t)r1) & mask; + carry = r1 >> 51; + r2 += carry; + r02 = ((uint64_t)r2) & mask; + carry = r2 >> 51; + r3 += carry; + r03 = ((uint64_t)r3) & mask; + carry = r3 >> 51; + r4 += carry; + r04 = ((uint64_t)r4) & mask; + carry = r4 >> 51; + r00 += 19ULL * (uint64_t)carry; + carry = r00 >> 51; + r00 &= mask; + r01 += (uint64_t)carry; + carry = r01 >> 51; + r01 &= mask; + r02 += (uint64_t)carry; + + h[0] = r00; + h[1] = r01; + h[2] = r02; + h[3] = r03; + h[4] = r04; } /* @@ -333,75 +333,75 @@ fe25519_mul(fe25519 h, const fe25519 f, const fe25519 g) static void fe25519_sq(fe25519 h, const fe25519 f) { - const uint64_t mask = 0x7ffffffffffffULL; - uint128_t r0, r1, r2, r3, r4, carry; - uint64_t f0, f1, f2, f3, f4; - uint64_t f0_2, f1_2, f1_38, f2_38, f3_38, f3_19, f4_19; - uint64_t r00, r01, r02, r03, r04; - - f0 = f[0]; - f1 = f[1]; - f2 = f[2]; - f3 = f[3]; - f4 = f[4]; - - f0_2 = f0 << 1; - f1_2 = f1 << 1; - - f1_38 = 38ULL * f1; - f2_38 = 38ULL * f2; - f3_38 = 38ULL * f3; - - f3_19 = 19ULL * f3; - f4_19 = 19ULL * f4; - - r0 = ((uint128_t) f0 ) * ((uint128_t) f0); - r0 += ((uint128_t) f1_38) * ((uint128_t) f4); - r0 += ((uint128_t) f2_38) * ((uint128_t) f3); - - r1 = ((uint128_t) f0_2 ) * ((uint128_t) f1); - r1 += ((uint128_t) f2_38) * ((uint128_t) f4); - r1 += ((uint128_t) f3_19) * ((uint128_t) f3); - - r2 = ((uint128_t) f0_2 ) * ((uint128_t) f2); - r2 += ((uint128_t) f1 ) * ((uint128_t) f1); - r2 += ((uint128_t) f3_38) * ((uint128_t) f4); - - r3 = ((uint128_t) f0_2 ) * ((uint128_t) f3); - r3 += ((uint128_t) f1_2 ) * ((uint128_t) f2); - r3 += ((uint128_t) f4_19) * ((uint128_t) f4); - - r4 = ((uint128_t) f0_2 ) * ((uint128_t) f4); - r4 += ((uint128_t) f1_2 ) * ((uint128_t) f3); - r4 += ((uint128_t) f2 ) * ((uint128_t) f2); - - r00 = ((uint64_t) r0) & mask; - carry = r0 >> 51; - r1 += carry; - r01 = ((uint64_t) r1) & mask; - carry = r1 >> 51; - r2 += carry; - r02 = ((uint64_t) r2) & mask; - carry = r2 >> 51; - r3 += carry; - r03 = ((uint64_t) r3) & mask; - carry = r3 >> 51; - r4 += carry; - r04 = ((uint64_t) r4) & mask; - carry = r4 >> 51; - r00 += 19ULL * (uint64_t) carry; - carry = r00 >> 51; - r00 &= mask; - r01 += (uint64_t) carry; - carry = r01 >> 51; - r01 &= mask; - r02 += (uint64_t) carry; - - h[0] = r00; - h[1] = r01; - h[2] = r02; - h[3] = r03; - h[4] = r04; + const uint64_t mask = 0x7ffffffffffffULL; + uint128_t r0, r1, r2, r3, r4, carry; + uint64_t f0, f1, f2, f3, f4; + uint64_t f0_2, f1_2, f1_38, f2_38, f3_38, f3_19, f4_19; + uint64_t r00, r01, r02, r03, r04; + + f0 = f[0]; + f1 = f[1]; + f2 = f[2]; + f3 = f[3]; + f4 = f[4]; + + f0_2 = f0 << 1; + f1_2 = f1 << 1; + + f1_38 = 38ULL * f1; + f2_38 = 38ULL * f2; + f3_38 = 38ULL * f3; + + f3_19 = 19ULL * f3; + f4_19 = 19ULL * f4; + + r0 = ((uint128_t)f0) * ((uint128_t)f0); + r0 += ((uint128_t)f1_38) * ((uint128_t)f4); + r0 += ((uint128_t)f2_38) * ((uint128_t)f3); + + r1 = ((uint128_t)f0_2) * ((uint128_t)f1); + r1 += ((uint128_t)f2_38) * ((uint128_t)f4); + r1 += ((uint128_t)f3_19) * ((uint128_t)f3); + + r2 = ((uint128_t)f0_2) * ((uint128_t)f2); + r2 += ((uint128_t)f1) * ((uint128_t)f1); + r2 += ((uint128_t)f3_38) * ((uint128_t)f4); + + r3 = ((uint128_t)f0_2) * ((uint128_t)f3); + r3 += ((uint128_t)f1_2) * ((uint128_t)f2); + r3 += ((uint128_t)f4_19) * ((uint128_t)f4); + + r4 = ((uint128_t)f0_2) * ((uint128_t)f4); + r4 += ((uint128_t)f1_2) * ((uint128_t)f3); + r4 += ((uint128_t)f2) * ((uint128_t)f2); + + r00 = ((uint64_t)r0) & mask; + carry = r0 >> 51; + r1 += carry; + r01 = ((uint64_t)r1) & mask; + carry = r1 >> 51; + r2 += carry; + r02 = ((uint64_t)r2) & mask; + carry = r2 >> 51; + r3 += carry; + r03 = ((uint64_t)r3) & mask; + carry = r3 >> 51; + r4 += carry; + r04 = ((uint64_t)r4) & mask; + carry = r4 >> 51; + r00 += 19ULL * (uint64_t)carry; + carry = r00 >> 51; + r00 &= mask; + r01 += (uint64_t)carry; + carry = r01 >> 51; + r01 &= mask; + r02 += (uint64_t)carry; + + h[0] = r00; + h[1] = r01; + h[2] = r02; + h[3] = r03; + h[4] = r04; } /* @@ -412,107 +412,107 @@ fe25519_sq(fe25519 h, const fe25519 f) static void fe25519_sq2(fe25519 h, const fe25519 f) { - const uint64_t mask = 0x7ffffffffffffULL; - uint128_t r0, r1, r2, r3, r4, carry; - uint64_t f0, f1, f2, f3, f4; - uint64_t f0_2, f1_2, f1_38, f2_38, f3_38, f3_19, f4_19; - uint64_t r00, r01, r02, r03, r04; - - f0 = f[0]; - f1 = f[1]; - f2 = f[2]; - f3 = f[3]; - f4 = f[4]; - - f0_2 = f0 << 1; - f1_2 = f1 << 1; - - f1_38 = 38ULL * f1; - f2_38 = 38ULL * f2; - f3_38 = 38ULL * f3; - - f3_19 = 19ULL * f3; - f4_19 = 19ULL * f4; - - r0 = ((uint128_t) f0 ) * ((uint128_t) f0); - r0 += ((uint128_t) f1_38) * ((uint128_t) f4); - r0 += ((uint128_t) f2_38) * ((uint128_t) f3); - - r1 = ((uint128_t) f0_2 ) * ((uint128_t) f1); - r1 += ((uint128_t) f2_38) * ((uint128_t) f4); - r1 += ((uint128_t) f3_19) * ((uint128_t) f3); - - r2 = ((uint128_t) f0_2 ) * ((uint128_t) f2); - r2 += ((uint128_t) f1 ) * ((uint128_t) f1); - r2 += ((uint128_t) f3_38) * ((uint128_t) f4); - - r3 = ((uint128_t) f0_2 ) * ((uint128_t) f3); - r3 += ((uint128_t) f1_2 ) * ((uint128_t) f2); - r3 += ((uint128_t) f4_19) * ((uint128_t) f4); - - r4 = ((uint128_t) f0_2 ) * ((uint128_t) f4); - r4 += ((uint128_t) f1_2 ) * ((uint128_t) f3); - r4 += ((uint128_t) f2 ) * ((uint128_t) f2); - - r0 <<= 1; - r1 <<= 1; - r2 <<= 1; - r3 <<= 1; - r4 <<= 1; - - r00 = ((uint64_t) r0) & mask; - carry = r0 >> 51; - r1 += carry; - r01 = ((uint64_t) r1) & mask; - carry = r1 >> 51; - r2 += carry; - r02 = ((uint64_t) r2) & mask; - carry = r2 >> 51; - r3 += carry; - r03 = ((uint64_t) r3) & mask; - carry = r3 >> 51; - r4 += carry; - r04 = ((uint64_t) r4) & mask; - carry = r4 >> 51; - r00 += 19ULL * (uint64_t) carry; - carry = r00 >> 51; - r00 &= mask; - r01 += (uint64_t) carry; - carry = r01 >> 51; - r01 &= mask; - r02 += (uint64_t) carry; - - h[0] = r00; - h[1] = r01; - h[2] = r02; - h[3] = r03; - h[4] = r04; + const uint64_t mask = 0x7ffffffffffffULL; + uint128_t r0, r1, r2, r3, r4, carry; + uint64_t f0, f1, f2, f3, f4; + uint64_t f0_2, f1_2, f1_38, f2_38, f3_38, f3_19, f4_19; + uint64_t r00, r01, r02, r03, r04; + + f0 = f[0]; + f1 = f[1]; + f2 = f[2]; + f3 = f[3]; + f4 = f[4]; + + f0_2 = f0 << 1; + f1_2 = f1 << 1; + + f1_38 = 38ULL * f1; + f2_38 = 38ULL * f2; + f3_38 = 38ULL * f3; + + f3_19 = 19ULL * f3; + f4_19 = 19ULL * f4; + + r0 = ((uint128_t)f0) * ((uint128_t)f0); + r0 += ((uint128_t)f1_38) * ((uint128_t)f4); + r0 += ((uint128_t)f2_38) * ((uint128_t)f3); + + r1 = ((uint128_t)f0_2) * ((uint128_t)f1); + r1 += ((uint128_t)f2_38) * ((uint128_t)f4); + r1 += ((uint128_t)f3_19) * ((uint128_t)f3); + + r2 = ((uint128_t)f0_2) * ((uint128_t)f2); + r2 += ((uint128_t)f1) * ((uint128_t)f1); + r2 += ((uint128_t)f3_38) * ((uint128_t)f4); + + r3 = ((uint128_t)f0_2) * ((uint128_t)f3); + r3 += ((uint128_t)f1_2) * ((uint128_t)f2); + r3 += ((uint128_t)f4_19) * ((uint128_t)f4); + + r4 = ((uint128_t)f0_2) * ((uint128_t)f4); + r4 += ((uint128_t)f1_2) * ((uint128_t)f3); + r4 += ((uint128_t)f2) * ((uint128_t)f2); + + r0 <<= 1; + r1 <<= 1; + r2 <<= 1; + r3 <<= 1; + r4 <<= 1; + + r00 = ((uint64_t)r0) & mask; + carry = r0 >> 51; + r1 += carry; + r01 = ((uint64_t)r1) & mask; + carry = r1 >> 51; + r2 += carry; + r02 = ((uint64_t)r2) & mask; + carry = r2 >> 51; + r3 += carry; + r03 = ((uint64_t)r3) & mask; + carry = r3 >> 51; + r4 += carry; + r04 = ((uint64_t)r4) & mask; + carry = r4 >> 51; + r00 += 19ULL * (uint64_t)carry; + carry = r00 >> 51; + r00 &= mask; + r01 += (uint64_t)carry; + carry = r01 >> 51; + r01 &= mask; + r02 += (uint64_t)carry; + + h[0] = r00; + h[1] = r01; + h[2] = r02; + h[3] = r03; + h[4] = r04; } static void fe25519_scalar_product(fe25519 h, const fe25519 f, uint32_t n) { - const uint64_t mask = 0x7ffffffffffffULL; - uint128_t a; - uint128_t sn = (uint128_t) n; - uint64_t h0, h1, h2, h3, h4; - - a = f[0] * sn; - h0 = ((uint64_t) a) & mask; - a = f[1] * sn + ((uint64_t) (a >> 51)); - h1 = ((uint64_t) a) & mask; - a = f[2] * sn + ((uint64_t) (a >> 51)); - h2 = ((uint64_t) a) & mask; - a = f[3] * sn + ((uint64_t) (a >> 51)); - h3 = ((uint64_t) a) & mask; - a = f[4] * sn + ((uint64_t) (a >> 51)); - h4 = ((uint64_t) a) & mask; - - h0 += (a >> 51) * 19ULL; - - h[0] = h0; - h[1] = h1; - h[2] = h2; - h[3] = h3; - h[4] = h4; + const uint64_t mask = 0x7ffffffffffffULL; + uint128_t a; + uint128_t sn = (uint128_t)n; + uint64_t h0, h1, h2, h3, h4; + + a = f[0] * sn; + h0 = ((uint64_t)a) & mask; + a = f[1] * sn + ((uint64_t)(a >> 51)); + h1 = ((uint64_t)a) & mask; + a = f[2] * sn + ((uint64_t)(a >> 51)); + h2 = ((uint64_t)a) & mask; + a = f[3] * sn + ((uint64_t)(a >> 51)); + h3 = ((uint64_t)a) & mask; + a = f[4] * sn + ((uint64_t)(a >> 51)); + h4 = ((uint64_t)a) & mask; + + h0 += (a >> 51) * 19ULL; + + h[0] = h0; + h[1] = h1; + h[2] = h2; + h[3] = h3; + h[4] = h4; } diff --git a/crypto/include/sodium/private/implementations.h b/crypto/include/sodium/private/implementations.h index c7237f851..6fe38a60e 100644 --- a/crypto/include/sodium/private/implementations.h +++ b/crypto/include/sodium/private/implementations.h @@ -1,11 +1,17 @@ #ifndef implementations_H #define implementations_H -int _crypto_generichash_blake2b_pick_best_implementation(void); -int _crypto_onetimeauth_poly1305_pick_best_implementation(void); -int _crypto_pwhash_argon2_pick_best_implementation(void); -int _crypto_scalarmult_curve25519_pick_best_implementation(void); -int _crypto_stream_chacha20_pick_best_implementation(void); -int _crypto_stream_salsa20_pick_best_implementation(void); +int +_crypto_generichash_blake2b_pick_best_implementation(void); +int +_crypto_onetimeauth_poly1305_pick_best_implementation(void); +int +_crypto_pwhash_argon2_pick_best_implementation(void); +int +_crypto_scalarmult_curve25519_pick_best_implementation(void); +int +_crypto_stream_chacha20_pick_best_implementation(void); +int +_crypto_stream_salsa20_pick_best_implementation(void); #endif diff --git a/crypto/include/sodium/private/mutex.h b/crypto/include/sodium/private/mutex.h index 322b6742b..90c8127b0 100644 --- a/crypto/include/sodium/private/mutex.h +++ b/crypto/include/sodium/private/mutex.h @@ -1,7 +1,9 @@ #ifndef mutex_H #define mutex_H 1 -extern int sodium_crit_enter(void); -extern int sodium_crit_leave(void); +extern int +sodium_crit_enter(void); +extern int +sodium_crit_leave(void); #endif diff --git a/crypto/include/sodium/private/sse2_64_32.h b/crypto/include/sodium/private/sse2_64_32.h index d0455b41b..124d2b804 100644 --- a/crypto/include/sodium/private/sse2_64_32.h +++ b/crypto/include/sodium/private/sse2_64_32.h @@ -4,46 +4,53 @@ #include "common.h" #ifdef HAVE_INTRIN_H -# include +#include #endif -#if defined(HAVE_EMMINTRIN_H) && \ - !(defined(__amd64) || defined(__amd64__) || defined(__x86_64__) || \ - defined(_M_X64) || defined(_M_AMD64)) +#if defined(HAVE_EMMINTRIN_H) \ + && !(defined(__amd64) || defined(__amd64__) || defined(__x86_64__) \ + || defined(_M_X64) || defined(_M_AMD64)) -# include -# include +#include +#include -# ifndef _mm_set_epi64x -# define _mm_set_epi64x(Q0, Q1) sodium__mm_set_epi64x((Q0), (Q1)) +#ifndef _mm_set_epi64x +#define _mm_set_epi64x(Q0, Q1) sodium__mm_set_epi64x((Q0), (Q1)) static inline __m128i sodium__mm_set_epi64x(int64_t q1, int64_t q0) { - union { int64_t as64; int32_t as32[2]; } x0, x1; - x0.as64 = q0; x1.as64 = q1; - return _mm_set_epi32(x1.as32[1], x1.as32[0], x0.as32[1], x0.as32[0]); + union { + int64_t as64; + int32_t as32[2]; + } x0, x1; + x0.as64 = q0; + x1.as64 = q1; + return _mm_set_epi32(x1.as32[1], x1.as32[0], x0.as32[1], x0.as32[0]); } -# endif +#endif -# ifndef _mm_set1_epi64x -# define _mm_set1_epi64x(Q) sodium__mm_set1_epi64x(Q) +#ifndef _mm_set1_epi64x +#define _mm_set1_epi64x(Q) sodium__mm_set1_epi64x(Q) static inline __m128i sodium__mm_set1_epi64x(int64_t q) { - return _mm_set_epi64x(q, q); + return _mm_set_epi64x(q, q); } -# endif +#endif -# ifndef _mm_cvtsi64_si128 -# define _mm_cvtsi64_si128(Q) sodium__mm_cvtsi64_si128(Q) +#ifndef _mm_cvtsi64_si128 +#define _mm_cvtsi64_si128(Q) sodium__mm_cvtsi64_si128(Q) static inline __m128i sodium__mm_cvtsi64_si128(int64_t q) { - union { int64_t as64; int32_t as32[2]; } x; - x.as64 = q; - return _mm_setr_epi32(x.as32[0], x.as32[1], 0, 0); + union { + int64_t as64; + int32_t as32[2]; + } x; + x.as64 = q; + return _mm_setr_epi32(x.as32[0], x.as32[1], 0, 0); } -# endif +#endif #endif diff --git a/crypto/include/sodium/randombytes_nativeclient.h b/crypto/include/sodium/randombytes_nativeclient.h index 5158d8c3c..8e71e0c56 100644 --- a/crypto/include/sodium/randombytes_nativeclient.h +++ b/crypto/include/sodium/randombytes_nativeclient.h @@ -4,19 +4,21 @@ #ifdef __native_client__ -# include "export.h" -# include "randombytes.h" +#include "export.h" +#include "randombytes.h" -# ifdef __cplusplus -extern "C" { -# endif +#ifdef __cplusplus +extern "C" +{ +#endif -SODIUM_EXPORT -extern struct randombytes_implementation randombytes_nativeclient_implementation; + SODIUM_EXPORT + extern struct randombytes_implementation + randombytes_nativeclient_implementation; -# ifdef __cplusplus +#ifdef __cplusplus } -# endif +#endif #endif diff --git a/crypto/include/sodium/runtime.h b/crypto/include/sodium/runtime.h index 7f15d58e7..18a0e4de5 100644 --- a/crypto/include/sodium/runtime.h +++ b/crypto/include/sodium/runtime.h @@ -5,45 +5,59 @@ #include "export.h" #ifdef __cplusplus -extern "C" { +extern "C" +{ #endif -SODIUM_EXPORT_WEAK -int sodium_runtime_has_neon(void); + SODIUM_EXPORT_WEAK + int + sodium_runtime_has_neon(void); -SODIUM_EXPORT_WEAK -int sodium_runtime_has_sse2(void); + SODIUM_EXPORT_WEAK + int + sodium_runtime_has_sse2(void); -SODIUM_EXPORT_WEAK -int sodium_runtime_has_sse3(void); + SODIUM_EXPORT_WEAK + int + sodium_runtime_has_sse3(void); -SODIUM_EXPORT_WEAK -int sodium_runtime_has_ssse3(void); + SODIUM_EXPORT_WEAK + int + sodium_runtime_has_ssse3(void); -SODIUM_EXPORT_WEAK -int sodium_runtime_has_sse41(void); + SODIUM_EXPORT_WEAK + int + sodium_runtime_has_sse41(void); -SODIUM_EXPORT_WEAK -int sodium_runtime_has_avx(void); + SODIUM_EXPORT_WEAK + int + sodium_runtime_has_avx(void); -SODIUM_EXPORT_WEAK -int sodium_runtime_has_avx2(void); + SODIUM_EXPORT_WEAK + int + sodium_runtime_has_avx2(void); -SODIUM_EXPORT_WEAK -int sodium_runtime_has_avx512f(void); + SODIUM_EXPORT_WEAK + int + sodium_runtime_has_avx512f(void); -SODIUM_EXPORT_WEAK -int sodium_runtime_has_pclmul(void); + SODIUM_EXPORT_WEAK + int + sodium_runtime_has_pclmul(void); -SODIUM_EXPORT_WEAK -int sodium_runtime_has_aesni(void); + SODIUM_EXPORT_WEAK + int + sodium_runtime_has_aesni(void); -SODIUM_EXPORT_WEAK -int sodium_runtime_has_rdrand(void); + SODIUM_EXPORT_WEAK + int + sodium_runtime_has_rdrand(void); -/* ------------------------------------------------------------------------- */ + /* ------------------------------------------------------------------------- + */ -int _sodium_runtime_get_cpu_features(void); + int + _sodium_runtime_get_cpu_features(void); #ifdef __cplusplus } diff --git a/crypto/include/sodium/utils.h b/crypto/include/sodium/utils.h index 46eb331cf..e17366b01 100644 --- a/crypto/include/sodium/utils.h +++ b/crypto/include/sodium/utils.h @@ -7,161 +7,188 @@ #include "export.h" #ifdef __cplusplus -extern "C" { +extern "C" +{ #endif #ifndef SODIUM_C99 -# if defined(__cplusplus) || !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L -# define SODIUM_C99(X) -# else -# define SODIUM_C99(X) X -# endif +#if defined(__cplusplus) || !defined(__STDC_VERSION__) \ + || __STDC_VERSION__ < 199901L +#define SODIUM_C99(X) +#else +#define SODIUM_C99(X) X +#endif #endif -SODIUM_EXPORT -void sodium_memzero(void * const pnt, const size_t len); - -SODIUM_EXPORT -void sodium_stackzero(const size_t len); - -/* - * WARNING: sodium_memcmp() must be used to verify if two secret keys - * are equal, in constant time. - * It returns 0 if the keys are equal, and -1 if they differ. - * This function is not designed for lexicographical comparisons. - */ -SODIUM_EXPORT -int sodium_memcmp(const void * const b1_, const void * const b2_, size_t len) - __attribute__ ((warn_unused_result)); - -/* - * sodium_compare() returns -1 if b1_ < b2_, 1 if b1_ > b2_ and 0 if b1_ == b2_ - * It is suitable for lexicographical comparisons, or to compare nonces - * and counters stored in little-endian format. - * However, it is slower than sodium_memcmp(). - */ -SODIUM_EXPORT -int sodium_compare(const unsigned char *b1_, const unsigned char *b2_, - size_t len) - __attribute__ ((warn_unused_result)); - -SODIUM_EXPORT -int sodium_is_zero(const unsigned char *n, const size_t nlen); - -SODIUM_EXPORT -void sodium_increment(unsigned char *n, const size_t nlen); - -SODIUM_EXPORT -void sodium_add(unsigned char *a, const unsigned char *b, const size_t len); - -SODIUM_EXPORT -char *sodium_bin2hex(char * const hex, const size_t hex_maxlen, - const unsigned char * const bin, const size_t bin_len); - -SODIUM_EXPORT -int sodium_hex2bin(unsigned char * const bin, const size_t bin_maxlen, - const char * const hex, const size_t hex_len, - const char * const ignore, size_t * const bin_len, - const char ** const hex_end); - -#define sodium_base64_VARIANT_ORIGINAL 1 + SODIUM_EXPORT + void + sodium_memzero(void *const pnt, const size_t len); + + SODIUM_EXPORT + void + sodium_stackzero(const size_t len); + + /* + * WARNING: sodium_memcmp() must be used to verify if two secret keys + * are equal, in constant time. + * It returns 0 if the keys are equal, and -1 if they differ. + * This function is not designed for lexicographical comparisons. + */ + SODIUM_EXPORT + int + sodium_memcmp(const void *const b1_, const void *const b2_, size_t len) + __attribute__((warn_unused_result)); + + /* + * sodium_compare() returns -1 if b1_ < b2_, 1 if b1_ > b2_ and 0 if b1_ == + * b2_ It is suitable for lexicographical comparisons, or to compare nonces + * and counters stored in little-endian format. + * However, it is slower than sodium_memcmp(). + */ + SODIUM_EXPORT + int + sodium_compare(const unsigned char *b1_, const unsigned char *b2_, size_t len) + __attribute__((warn_unused_result)); + + SODIUM_EXPORT + int + sodium_is_zero(const unsigned char *n, const size_t nlen); + + SODIUM_EXPORT + void + sodium_increment(unsigned char *n, const size_t nlen); + + SODIUM_EXPORT + void + sodium_add(unsigned char *a, const unsigned char *b, const size_t len); + + SODIUM_EXPORT + char * + sodium_bin2hex(char *const hex, const size_t hex_maxlen, + const unsigned char *const bin, const size_t bin_len); + + SODIUM_EXPORT + int + sodium_hex2bin(unsigned char *const bin, const size_t bin_maxlen, + const char *const hex, const size_t hex_len, + const char *const ignore, size_t *const bin_len, + const char **const hex_end); + +#define sodium_base64_VARIANT_ORIGINAL 1 #define sodium_base64_VARIANT_ORIGINAL_NO_PADDING 3 -#define sodium_base64_VARIANT_URLSAFE 5 -#define sodium_base64_VARIANT_URLSAFE_NO_PADDING 7 +#define sodium_base64_VARIANT_URLSAFE 5 +#define sodium_base64_VARIANT_URLSAFE_NO_PADDING 7 /* * Computes the required length to encode BIN_LEN bytes as a base64 string * using the given variant. The computed length includes a trailing \0. */ -#define sodium_base64_ENCODED_LEN(BIN_LEN, VARIANT) \ - (((BIN_LEN) / 3U) * 4U + \ - ((((BIN_LEN) - ((BIN_LEN) / 3U) * 3U) | (((BIN_LEN) - ((BIN_LEN) / 3U) * 3U) >> 1)) & 1U) * \ - (4U - (~((((VARIANT) & 2U) >> 1) - 1U) & (3U - ((BIN_LEN) - ((BIN_LEN) / 3U) * 3U)))) + 1U) - -SODIUM_EXPORT -size_t sodium_base64_encoded_len(const size_t bin_len, const int variant); - -SODIUM_EXPORT -char *sodium_bin2base64(char * const b64, const size_t b64_maxlen, - const unsigned char * const bin, const size_t bin_len, - const int variant); - -SODIUM_EXPORT -int sodium_base642bin(unsigned char * const bin, const size_t bin_maxlen, - const char * const b64, const size_t b64_len, - const char * const ignore, size_t * const bin_len, - const char ** const b64_end, const int variant); - -SODIUM_EXPORT -int sodium_mlock(void * const addr, const size_t len); - -SODIUM_EXPORT -int sodium_munlock(void * const addr, const size_t len); - -/* WARNING: sodium_malloc() and sodium_allocarray() are not general-purpose - * allocation functions. - * - * They return a pointer to a region filled with 0xd0 bytes, immediately - * followed by a guard page. - * As a result, accessing a single byte after the requested allocation size - * will intentionally trigger a segmentation fault. - * - * A canary and an additional guard page placed before the beginning of the - * region may also kill the process if a buffer underflow is detected. - * - * The memory layout is: - * [unprotected region size (read only)][guard page (no access)][unprotected pages (read/write)][guard page (no access)] - * With the layout of the unprotected pages being: - * [optional padding][16-bytes canary][user region] - * - * However: - * - These functions are significantly slower than standard functions - * - Each allocation requires 3 or 4 additional pages - * - The returned address will not be aligned if the allocation size is not - * a multiple of the required alignment. For this reason, these functions - * are designed to store data, such as secret keys and messages. - * - * sodium_malloc() can be used to allocate any libsodium data structure. - * - * The crypto_generichash_state structure is packed and its length is - * either 357 or 361 bytes. For this reason, when using sodium_malloc() to - * allocate a crypto_generichash_state structure, padding must be added in - * order to ensure proper alignment. crypto_generichash_statebytes() - * returns the rounded up structure size, and should be prefered to sizeof(): - * state = sodium_malloc(crypto_generichash_statebytes()); - */ - -SODIUM_EXPORT -void *sodium_malloc(const size_t size) - __attribute__ ((malloc)); - -SODIUM_EXPORT -void *sodium_allocarray(size_t count, size_t size) - __attribute__ ((malloc)); - -SODIUM_EXPORT -void sodium_free(void *ptr); - -SODIUM_EXPORT -int sodium_mprotect_noaccess(void *ptr); - -SODIUM_EXPORT -int sodium_mprotect_readonly(void *ptr); - -SODIUM_EXPORT -int sodium_mprotect_readwrite(void *ptr); - -SODIUM_EXPORT -int sodium_pad(size_t *padded_buflen_p, unsigned char *buf, - size_t unpadded_buflen, size_t blocksize, size_t max_buflen); - -SODIUM_EXPORT -int sodium_unpad(size_t *unpadded_buflen_p, const unsigned char *buf, - size_t padded_buflen, size_t blocksize); - -/* -------- */ - -int _sodium_alloc_init(void); +#define sodium_base64_ENCODED_LEN(BIN_LEN, VARIANT) \ + (((BIN_LEN) / 3U) * 4U \ + + ((((BIN_LEN) - ((BIN_LEN) / 3U) * 3U) \ + | (((BIN_LEN) - ((BIN_LEN) / 3U) * 3U) >> 1)) \ + & 1U) \ + * (4U \ + - (~((((VARIANT)&2U) >> 1) - 1U) \ + & (3U - ((BIN_LEN) - ((BIN_LEN) / 3U) * 3U)))) \ + + 1U) + + SODIUM_EXPORT + size_t + sodium_base64_encoded_len(const size_t bin_len, const int variant); + + SODIUM_EXPORT + char * + sodium_bin2base64(char *const b64, const size_t b64_maxlen, + const unsigned char *const bin, const size_t bin_len, + const int variant); + + SODIUM_EXPORT + int + sodium_base642bin(unsigned char *const bin, const size_t bin_maxlen, + const char *const b64, const size_t b64_len, + const char *const ignore, size_t *const bin_len, + const char **const b64_end, const int variant); + + SODIUM_EXPORT + int + sodium_mlock(void *const addr, const size_t len); + + SODIUM_EXPORT + int + sodium_munlock(void *const addr, const size_t len); + + /* WARNING: sodium_malloc() and sodium_allocarray() are not general-purpose + * allocation functions. + * + * They return a pointer to a region filled with 0xd0 bytes, immediately + * followed by a guard page. + * As a result, accessing a single byte after the requested allocation size + * will intentionally trigger a segmentation fault. + * + * A canary and an additional guard page placed before the beginning of the + * region may also kill the process if a buffer underflow is detected. + * + * The memory layout is: + * [unprotected region size (read only)][guard page (no access)][unprotected + * pages (read/write)][guard page (no access)] With the layout of the + * unprotected pages being: [optional padding][16-bytes canary][user region] + * + * However: + * - These functions are significantly slower than standard functions + * - Each allocation requires 3 or 4 additional pages + * - The returned address will not be aligned if the allocation size is not + * a multiple of the required alignment. For this reason, these functions + * are designed to store data, such as secret keys and messages. + * + * sodium_malloc() can be used to allocate any libsodium data structure. + * + * The crypto_generichash_state structure is packed and its length is + * either 357 or 361 bytes. For this reason, when using sodium_malloc() to + * allocate a crypto_generichash_state structure, padding must be added in + * order to ensure proper alignment. crypto_generichash_statebytes() + * returns the rounded up structure size, and should be prefered to sizeof(): + * state = sodium_malloc(crypto_generichash_statebytes()); + */ + + SODIUM_EXPORT + void * + sodium_malloc(const size_t size) __attribute__((malloc)); + + SODIUM_EXPORT + void * + sodium_allocarray(size_t count, size_t size) __attribute__((malloc)); + + SODIUM_EXPORT + void + sodium_free(void *ptr); + + SODIUM_EXPORT + int + sodium_mprotect_noaccess(void *ptr); + + SODIUM_EXPORT + int + sodium_mprotect_readonly(void *ptr); + + SODIUM_EXPORT + int + sodium_mprotect_readwrite(void *ptr); + + SODIUM_EXPORT + int + sodium_pad(size_t *padded_buflen_p, unsigned char *buf, + size_t unpadded_buflen, size_t blocksize, size_t max_buflen); + + SODIUM_EXPORT + int + sodium_unpad(size_t *unpadded_buflen_p, const unsigned char *buf, + size_t padded_buflen, size_t blocksize); + + /* -------- */ + + int + _sodium_alloc_init(void); #ifdef __cplusplus } diff --git a/crypto/libntrup/src/avx/int32_sort.c b/crypto/libntrup/src/avx/int32_sort.c index cd8974df0..89c2c2aa6 100644 --- a/crypto/libntrup/src/avx/int32_sort.c +++ b/crypto/libntrup/src/avx/int32_sort.c @@ -4,424 +4,463 @@ typedef crypto_int32 int32; -static inline void minmax(int32 *x,int32 *y) +static inline void +minmax(int32 *x, int32 *y) { - asm("movl (%0),%%eax;movl (%1),%%ebx;cmpl %%ebx,%%eax;mov %%eax,%%edx;cmovg %%ebx,%%eax;cmovg %%edx,%%ebx;movl %%eax,(%0);movl %%ebx,(%1)" - : : "r"(x),"r"(y) : "%eax","%ebx","%edx"); + asm("movl (%0),%%eax;movl (%1),%%ebx;cmpl %%ebx,%%eax;mov %%eax,%%edx;cmovg " + "%%ebx,%%eax;cmovg %%edx,%%ebx;movl %%eax,(%0);movl %%ebx,(%1)" + : + : "r"(x), "r"(y) + : "%eax", "%ebx", "%edx"); } /* sort x0,x2; sort x1,x3; ... sort x13, x15 */ -static inline void minmax02through1315(int32 *x) +static inline void +minmax02through1315(int32 *x) { - __m256i a = _mm256_loadu_si256((__m256i *) x); - __m256i b = _mm256_loadu_si256((__m256i *) (x + 8)); - __m256i c = _mm256_unpacklo_epi64(a,b); /* a01b01a45b45 */ - __m256i d = _mm256_unpackhi_epi64(a,b); /* a23b23a67b67 */ - __m256i g = _mm256_min_epi32(c,d); - __m256i h = _mm256_max_epi32(c,d); - a = _mm256_unpacklo_epi64(g,h); - b = _mm256_unpackhi_epi64(g,h); - _mm256_storeu_si256((__m256i *) x,a); - _mm256_storeu_si256((__m256i *) (x + 8),b); + __m256i a = _mm256_loadu_si256((__m256i *)x); + __m256i b = _mm256_loadu_si256((__m256i *)(x + 8)); + __m256i c = _mm256_unpacklo_epi64(a, b); /* a01b01a45b45 */ + __m256i d = _mm256_unpackhi_epi64(a, b); /* a23b23a67b67 */ + __m256i g = _mm256_min_epi32(c, d); + __m256i h = _mm256_max_epi32(c, d); + a = _mm256_unpacklo_epi64(g, h); + b = _mm256_unpackhi_epi64(g, h); + _mm256_storeu_si256((__m256i *)x, a); + _mm256_storeu_si256((__m256i *)(x + 8), b); } /* sort x0,x2; sort x1,x3; sort x4,x6; sort x5,x7 */ -static inline void minmax02134657(int32 *x) +static inline void +minmax02134657(int32 *x) { - __m256i a = _mm256_loadu_si256((__m256i *) x); - __m256i b = _mm256_shuffle_epi32(a,0x4e); - __m256i c = _mm256_cmpgt_epi32(a,b); - c = _mm256_shuffle_epi32(c,0x44); + __m256i a = _mm256_loadu_si256((__m256i *)x); + __m256i b = _mm256_shuffle_epi32(a, 0x4e); + __m256i c = _mm256_cmpgt_epi32(a, b); + c = _mm256_shuffle_epi32(c, 0x44); __m256i abc = c & (a ^ b); a ^= abc; - _mm256_storeu_si256((__m256i *) x,a); + _mm256_storeu_si256((__m256i *)x, a); } -static void multiminmax2plus2( - int32 *x, - int n) +static void +multiminmax2plus2(int32 *x, int n) { - while (n >= 16) { + while(n >= 16) + { minmax02through1315(x); n -= 16; x += 16; } - if (n >= 8) { + if(n >= 8) + { minmax02134657(x); n -= 8; x += 8; } - if (n >= 4) { - minmax(x,x + 2); - minmax(x + 1,x + 3); + if(n >= 4) + { + minmax(x, x + 2); + minmax(x + 1, x + 3); n -= 4; x += 4; } - if (n > 0) { - minmax(x,x + 2); - if (n > 1) minmax(x + 1,x + 3); + if(n > 0) + { + minmax(x, x + 2); + if(n > 1) + minmax(x + 1, x + 3); } } -static void multiminmax2plus6( - int32 *x, - int n) +static void +multiminmax2plus6(int32 *x, int n) { - while (n >= 4) { - minmax(x,x + 6); - minmax(x + 1,x + 7); + while(n >= 4) + { + minmax(x, x + 6); + minmax(x + 1, x + 7); n -= 4; x += 4; } - if (n > 0) { - minmax(x,x + 6); - if (n > 1) minmax(x + 1,x + 7); + if(n > 0) + { + minmax(x, x + 6); + if(n > 1) + minmax(x + 1, x + 7); } } -static void multiminmax2plus14( - int32 *x, - int n) +static void +multiminmax2plus14(int32 *x, int n) { - while (n >= 8) { - minmax(x,x + 14); - minmax(x + 1,x + 15); - minmax(x + 4,x + 18); - minmax(x + 5,x + 19); + while(n >= 8) + { + minmax(x, x + 14); + minmax(x + 1, x + 15); + minmax(x + 4, x + 18); + minmax(x + 5, x + 19); n -= 8; x += 8; } - if (n >= 4) { - minmax(x,x + 14); - minmax(x + 1,x + 15); + if(n >= 4) + { + minmax(x, x + 14); + minmax(x + 1, x + 15); n -= 4; x += 4; } - if (n > 0) { - minmax(x,x + 14); - if (n > 1) minmax(x + 1,x + 15); + if(n > 0) + { + minmax(x, x + 14); + if(n > 1) + minmax(x + 1, x + 15); } } /* sort x[i],y[i] for i in 0,1,4,5,8,9,12,13 */ /* all of x0...x15 and y0...y15 must exist; no aliasing */ -static inline void minmax0145891213(int32 *x,int32 *y) +static inline void +minmax0145891213(int32 *x, int32 *y) { - __m256i a01234567 = _mm256_loadu_si256((__m256i *) x); - __m256i a89101112131415 = _mm256_loadu_si256((__m256i *) (x + 8)); - __m256i b01234567 = _mm256_loadu_si256((__m256i *) y); - __m256i b89101112131415 = _mm256_loadu_si256((__m256i *) (y + 8)); + __m256i a01234567 = _mm256_loadu_si256((__m256i *)x); + __m256i a89101112131415 = _mm256_loadu_si256((__m256i *)(x + 8)); + __m256i b01234567 = _mm256_loadu_si256((__m256i *)y); + __m256i b89101112131415 = _mm256_loadu_si256((__m256i *)(y + 8)); - __m256i a0189451213 = _mm256_unpacklo_epi64(a01234567,a89101112131415); - __m256i b0189451213 = _mm256_unpacklo_epi64(b01234567,b89101112131415); - __m256i c0189451213 = _mm256_min_epi32(a0189451213,b0189451213); - __m256i d0189451213 = _mm256_max_epi32(a0189451213,b0189451213); + __m256i a0189451213 = _mm256_unpacklo_epi64(a01234567, a89101112131415); + __m256i b0189451213 = _mm256_unpacklo_epi64(b01234567, b89101112131415); + __m256i c0189451213 = _mm256_min_epi32(a0189451213, b0189451213); + __m256i d0189451213 = _mm256_max_epi32(a0189451213, b0189451213); - __m256i c01234567 = _mm256_blend_epi32(a01234567,c0189451213,0x33); - __m256i d01234567 = _mm256_blend_epi32(b01234567,d0189451213,0x33); - __m256i c89101112131415 = _mm256_unpackhi_epi64(c0189451213,a89101112131415); - __m256i d89101112131415 = _mm256_unpackhi_epi64(d0189451213,b89101112131415); + __m256i c01234567 = _mm256_blend_epi32(a01234567, c0189451213, 0x33); + __m256i d01234567 = _mm256_blend_epi32(b01234567, d0189451213, 0x33); + __m256i c89101112131415 = _mm256_unpackhi_epi64(c0189451213, a89101112131415); + __m256i d89101112131415 = _mm256_unpackhi_epi64(d0189451213, b89101112131415); - _mm256_storeu_si256((__m256i *) x,c01234567); - _mm256_storeu_si256((__m256i *) (x + 8),c89101112131415); - _mm256_storeu_si256((__m256i *) y,d01234567); - _mm256_storeu_si256((__m256i *) (y + 8),d89101112131415); + _mm256_storeu_si256((__m256i *)x, c01234567); + _mm256_storeu_si256((__m256i *)(x + 8), c89101112131415); + _mm256_storeu_si256((__m256i *)y, d01234567); + _mm256_storeu_si256((__m256i *)(y + 8), d89101112131415); } /* offset >= 30 */ -static void multiminmax2plusmore( - int32 *x, - int n, - int offset) +static void +multiminmax2plusmore(int32 *x, int n, int offset) { - while (n >= 16) { - minmax0145891213(x,x + offset); + while(n >= 16) + { + minmax0145891213(x, x + offset); n -= 16; x += 16; } - if (n >= 8) { - minmax(x,x + offset); - minmax(x + 1,x + 1 + offset); - minmax(x + 4,x + 4 + offset); - minmax(x + 5,x + 5 + offset); + if(n >= 8) + { + minmax(x, x + offset); + minmax(x + 1, x + 1 + offset); + minmax(x + 4, x + 4 + offset); + minmax(x + 5, x + 5 + offset); n -= 8; x += 8; } - if (n >= 4) { - minmax(x,x + offset); - minmax(x + 1,x + 1 + offset); + if(n >= 4) + { + minmax(x, x + offset); + minmax(x + 1, x + 1 + offset); n -= 4; x += 4; } - if (n > 0) { - minmax(x,x + offset); - if (n > 1) minmax(x + 1,x + 1 + offset); + if(n > 0) + { + minmax(x, x + offset); + if(n > 1) + minmax(x + 1, x + 1 + offset); } } /* sort x0,x1; ... sort x14, x15 */ -static inline void minmax01through1415(int32 *x) +static inline void +minmax01through1415(int32 *x) { - __m256i a = _mm256_loadu_si256((__m256i *) x); - __m256i b = _mm256_loadu_si256((__m256i *) (x + 8)); - __m256i c = _mm256_unpacklo_epi32(a,b); /* ab0ab1ab4ab5 */ - __m256i d = _mm256_unpackhi_epi32(a,b); /* ab2ab3ab6ab7 */ - __m256i e = _mm256_unpacklo_epi32(c,d); /* a02b02a46b46 */ - __m256i f = _mm256_unpackhi_epi32(c,d); /* a13b13a57b57 */ - __m256i g = _mm256_min_epi32(e,f); /* a02b02a46b46 */ - __m256i h = _mm256_max_epi32(e,f); /* a13b13a57b57 */ - a = _mm256_unpacklo_epi32(g,h); - b = _mm256_unpackhi_epi32(g,h); - _mm256_storeu_si256((__m256i *) x,a); - _mm256_storeu_si256((__m256i *) (x + 8),b); + __m256i a = _mm256_loadu_si256((__m256i *)x); + __m256i b = _mm256_loadu_si256((__m256i *)(x + 8)); + __m256i c = _mm256_unpacklo_epi32(a, b); /* ab0ab1ab4ab5 */ + __m256i d = _mm256_unpackhi_epi32(a, b); /* ab2ab3ab6ab7 */ + __m256i e = _mm256_unpacklo_epi32(c, d); /* a02b02a46b46 */ + __m256i f = _mm256_unpackhi_epi32(c, d); /* a13b13a57b57 */ + __m256i g = _mm256_min_epi32(e, f); /* a02b02a46b46 */ + __m256i h = _mm256_max_epi32(e, f); /* a13b13a57b57 */ + a = _mm256_unpacklo_epi32(g, h); + b = _mm256_unpackhi_epi32(g, h); + _mm256_storeu_si256((__m256i *)x, a); + _mm256_storeu_si256((__m256i *)(x + 8), b); } /* sort x0,x1; sort x2,x3; sort x4,x5; sort x6,x7 */ -static inline void minmax01234567(int32 *x) +static inline void +minmax01234567(int32 *x) { - __m256i a = _mm256_loadu_si256((__m256i *) x); - __m256i b = _mm256_shuffle_epi32(a,0xb1); - __m256i c = _mm256_cmpgt_epi32(a,b); - c = _mm256_shuffle_epi32(c,0xa0); + __m256i a = _mm256_loadu_si256((__m256i *)x); + __m256i b = _mm256_shuffle_epi32(a, 0xb1); + __m256i c = _mm256_cmpgt_epi32(a, b); + c = _mm256_shuffle_epi32(c, 0xa0); __m256i abc = c & (a ^ b); a ^= abc; - _mm256_storeu_si256((__m256i *) x,a); + _mm256_storeu_si256((__m256i *)x, a); } -static void multiminmax1plus1( - int32 *x, - int n) +static void +multiminmax1plus1(int32 *x, int n) { - while (n >= 16) { + while(n >= 16) + { minmax01through1415(x); n -= 16; x += 16; } - if (n >= 8) { + if(n >= 8) + { minmax01234567(x); n -= 8; x += 8; } - if (n >= 4) { - minmax(x,x + 1); - minmax(x + 2,x + 3); + if(n >= 4) + { + minmax(x, x + 1); + minmax(x + 2, x + 3); n -= 4; x += 4; } - if (n >= 2) { - minmax(x,x + 1); + if(n >= 2) + { + minmax(x, x + 1); n -= 2; x += 2; } - if (n > 0) - minmax(x,x + 1); + if(n > 0) + minmax(x, x + 1); } -static void multiminmax1( - int32 *x, - int n, - int offset) +static void +multiminmax1(int32 *x, int n, int offset) { - while (n >= 16) { - minmax(x,x + offset); - minmax(x + 2,x + 2 + offset); - minmax(x + 4,x + 4 + offset); - minmax(x + 6,x + 6 + offset); - minmax(x + 8,x + 8 + offset); - minmax(x + 10,x + 10 + offset); - minmax(x + 12,x + 12 + offset); - minmax(x + 14,x + 14 + offset); + while(n >= 16) + { + minmax(x, x + offset); + minmax(x + 2, x + 2 + offset); + minmax(x + 4, x + 4 + offset); + minmax(x + 6, x + 6 + offset); + minmax(x + 8, x + 8 + offset); + minmax(x + 10, x + 10 + offset); + minmax(x + 12, x + 12 + offset); + minmax(x + 14, x + 14 + offset); n -= 16; x += 16; } - if (n >= 8) { - minmax(x,x + offset); - minmax(x + 2,x + 2 + offset); - minmax(x + 4,x + 4 + offset); - minmax(x + 6,x + 6 + offset); + if(n >= 8) + { + minmax(x, x + offset); + minmax(x + 2, x + 2 + offset); + minmax(x + 4, x + 4 + offset); + minmax(x + 6, x + 6 + offset); n -= 8; x += 8; } - if (n >= 4) { - minmax(x,x + offset); - minmax(x + 2,x + 2 + offset); + if(n >= 4) + { + minmax(x, x + offset); + minmax(x + 2, x + 2 + offset); n -= 4; x += 4; } - if (n >= 2) { - minmax(x,x + offset); + if(n >= 2) + { + minmax(x, x + offset); n -= 2; x += 2; } - if (n > 0) - minmax(x,x + offset); + if(n > 0) + minmax(x, x + offset); } /* sort x[i],y[i] for i in 0,2,4,6,8,10,12,14 */ /* all of x0...x15 and y0...y15 must exist; no aliasing */ -static inline void minmax02468101214(int32 *x,int32 *y) +static inline void +minmax02468101214(int32 *x, int32 *y) { - __m256i a01234567 = _mm256_loadu_si256((__m256i *) x); - __m256i a89101112131415 = _mm256_loadu_si256((__m256i *) (x + 8)); - __m256i b01234567 = _mm256_loadu_si256((__m256i *) y); - __m256i b89101112131415 = _mm256_loadu_si256((__m256i *) (y + 8)); + __m256i a01234567 = _mm256_loadu_si256((__m256i *)x); + __m256i a89101112131415 = _mm256_loadu_si256((__m256i *)(x + 8)); + __m256i b01234567 = _mm256_loadu_si256((__m256i *)y); + __m256i b89101112131415 = _mm256_loadu_si256((__m256i *)(y + 8)); - __m256i a0819412513 = _mm256_unpacklo_epi32(a01234567,a89101112131415); - __m256i a210311614715 = _mm256_unpackhi_epi32(a01234567,a89101112131415); - __m256i a02810461214 = _mm256_unpacklo_epi32(a0819412513,a210311614715); - __m256i a13911571315 = _mm256_unpackhi_epi32(a0819412513,a210311614715); + __m256i a0819412513 = _mm256_unpacklo_epi32(a01234567, a89101112131415); + __m256i a210311614715 = _mm256_unpackhi_epi32(a01234567, a89101112131415); + __m256i a02810461214 = _mm256_unpacklo_epi32(a0819412513, a210311614715); + __m256i a13911571315 = _mm256_unpackhi_epi32(a0819412513, a210311614715); - __m256i b0819412513 = _mm256_unpacklo_epi32(b01234567,b89101112131415); - __m256i b210311614715 = _mm256_unpackhi_epi32(b01234567,b89101112131415); - __m256i b02810461214 = _mm256_unpacklo_epi32(b0819412513,b210311614715); - __m256i b13911571315 = _mm256_unpackhi_epi32(b0819412513,b210311614715); + __m256i b0819412513 = _mm256_unpacklo_epi32(b01234567, b89101112131415); + __m256i b210311614715 = _mm256_unpackhi_epi32(b01234567, b89101112131415); + __m256i b02810461214 = _mm256_unpacklo_epi32(b0819412513, b210311614715); + __m256i b13911571315 = _mm256_unpackhi_epi32(b0819412513, b210311614715); - __m256i c02810461214 = _mm256_min_epi32(a02810461214,b02810461214); - __m256i d02810461214 = _mm256_max_epi32(a02810461214,b02810461214); + __m256i c02810461214 = _mm256_min_epi32(a02810461214, b02810461214); + __m256i d02810461214 = _mm256_max_epi32(a02810461214, b02810461214); - __m256i c01234567 = _mm256_unpacklo_epi32(c02810461214,a13911571315); - __m256i c89101112131415 = _mm256_unpackhi_epi32(c02810461214,a13911571315); - __m256i d01234567 = _mm256_unpacklo_epi32(d02810461214,b13911571315); - __m256i d89101112131415 = _mm256_unpackhi_epi32(d02810461214,b13911571315); - - _mm256_storeu_si256((__m256i *) x,c01234567); - _mm256_storeu_si256((__m256i *) (x + 8),c89101112131415); - _mm256_storeu_si256((__m256i *) y,d01234567); - _mm256_storeu_si256((__m256i *) (y + 8),d89101112131415); + __m256i c01234567 = _mm256_unpacklo_epi32(c02810461214, a13911571315); + __m256i c89101112131415 = _mm256_unpackhi_epi32(c02810461214, a13911571315); + __m256i d01234567 = _mm256_unpacklo_epi32(d02810461214, b13911571315); + __m256i d89101112131415 = _mm256_unpackhi_epi32(d02810461214, b13911571315); + + _mm256_storeu_si256((__m256i *)x, c01234567); + _mm256_storeu_si256((__m256i *)(x + 8), c89101112131415); + _mm256_storeu_si256((__m256i *)y, d01234567); + _mm256_storeu_si256((__m256i *)(y + 8), d89101112131415); } /* assumes offset >= 31 */ -static void multiminmax1plusmore( - int32 *x, - int n, - int offset) +static void +multiminmax1plusmore(int32 *x, int n, int offset) { - while (n >= 16) { - minmax02468101214(x,x + offset); + while(n >= 16) + { + minmax02468101214(x, x + offset); n -= 16; x += 16; } - if (n >= 8) { - minmax(x,x + offset); - minmax(x + 2,x + 2 + offset); - minmax(x + 4,x + 4 + offset); - minmax(x + 6,x + 6 + offset); + if(n >= 8) + { + minmax(x, x + offset); + minmax(x + 2, x + 2 + offset); + minmax(x + 4, x + 4 + offset); + minmax(x + 6, x + 6 + offset); n -= 8; x += 8; } - if (n >= 4) { - minmax(x,x + offset); - minmax(x + 2,x + 2 + offset); + if(n >= 4) + { + minmax(x, x + offset); + minmax(x + 2, x + 2 + offset); n -= 4; x += 4; } - if (n >= 2) { - minmax(x,x + offset); + if(n >= 2) + { + minmax(x, x + offset); n -= 2; x += 2; } - if (n > 0) - minmax(x,x + offset); + if(n > 0) + minmax(x, x + offset); } /* sort x0,y0; sort x1,y1; ...; sort x7,y7 */ -static inline void minmax8(int32 *x,int32 *y) +static inline void +minmax8(int32 *x, int32 *y) { - __m256i a = _mm256_loadu_si256((__m256i *) x); - __m256i b = _mm256_loadu_si256((__m256i *) y); - _mm256_storeu_si256((__m256i *) x,_mm256_min_epi32(a,b)); - _mm256_storeu_si256((__m256i *) y,_mm256_max_epi32(a,b)); + __m256i a = _mm256_loadu_si256((__m256i *)x); + __m256i b = _mm256_loadu_si256((__m256i *)y); + _mm256_storeu_si256((__m256i *)x, _mm256_min_epi32(a, b)); + _mm256_storeu_si256((__m256i *)y, _mm256_max_epi32(a, b)); } /* assumes p >= 8; implies offset >= 8 */ -static void multiminmax_atleast8(int p, - int32 *x, - int n, - int offset) +static void +multiminmax_atleast8(int p, int32 *x, int n, int offset) { int i; - while (n >= 2 * p) { - for (i = 0;i < p;i += 8) - minmax8(x + i,x + i + offset); + while(n >= 2 * p) + { + for(i = 0; i < p; i += 8) + minmax8(x + i, x + i + offset); n -= 2 * p; x += 2 * p; } - for (i = 0;i + 8 <= n;i += 8) { - if (i & p) return; - minmax8(x + i,x + i + offset); + for(i = 0; i + 8 <= n; i += 8) + { + if(i & p) + return; + minmax8(x + i, x + i + offset); } - for (;i < n;++i) { - if (i & p) return; - minmax(x + i,x + i + offset); + for(; i < n; ++i) + { + if(i & p) + return; + minmax(x + i, x + i + offset); } } /* sort x0,y0; sort x1,y1; sort x2,y2; sort x3,y3 */ -static inline void minmax4(int32 *x,int32 *y) +static inline void +minmax4(int32 *x, int32 *y) { - __m128i a = _mm_loadu_si128((__m128i *) x); - __m128i b = _mm_loadu_si128((__m128i *) y); - _mm_storeu_si128((__m128i *) x,_mm_min_epi32(a,b)); - _mm_storeu_si128((__m128i *) y,_mm_max_epi32(a,b)); + __m128i a = _mm_loadu_si128((__m128i *)x); + __m128i b = _mm_loadu_si128((__m128i *)y); + _mm_storeu_si128((__m128i *)x, _mm_min_epi32(a, b)); + _mm_storeu_si128((__m128i *)y, _mm_max_epi32(a, b)); } -static void multiminmax4( - int32 *x, - int n, - int offset) +static void +multiminmax4(int32 *x, int n, int offset) { int i; - while (n >= 8) { - minmax4(x,x + offset); + while(n >= 8) + { + minmax4(x, x + offset); n -= 8; x += 8; } - if (n >= 4) - minmax4(x,x + offset); + if(n >= 4) + minmax4(x, x + offset); else - for (i = 0;i < n;++i) - minmax(x + i,x + i + offset); + for(i = 0; i < n; ++i) + minmax(x + i, x + i + offset); } -void int32_sort(int32 *x,int n) +void +int32_sort(int32 *x, int n) { - int top,p,q; + int top, p, q; - if (n < 2) return; + if(n < 2) + return; top = 1; - while (top < n - top) top += top; + while(top < n - top) + top += top; - for (p = top;p >= 8;p >>= 1) { - multiminmax_atleast8(p,x,n - p,p); - for (q = top;q > p;q >>= 1) - multiminmax_atleast8(p,x + p,n - q,q - p); - } - if (p >= 4) { - multiminmax4(x,n - 4,4); - for (q = top;q > 4;q >>= 1) - multiminmax4(x + 4,n - q,q - 4); - } - if (p >= 2) { - multiminmax2plus2(x,n - 2); - for (q = top;q >= 32;q >>= 1) - multiminmax2plusmore(x + 2,n - q,q - 2); - if (q >= 16) - multiminmax2plus14(x + 2,n - 16); - if (q >= 8) - multiminmax2plus6(x + 2,n - 8); - if (q >= 4) - multiminmax2plus2(x + 2,n - 4); - } - multiminmax1plus1(x,n - 1); - for (q = top;q >= 32;q >>= 1) - multiminmax1plusmore(x + 1,n - q,q - 1); - if (q >= 16) - multiminmax1(x + 1,n - 16,15); - if (q >= 8) - multiminmax1(x + 1,n - 8,7); - if (q >= 4) - multiminmax1(x + 1,n - 4,3); - if (q >= 2) - multiminmax1plus1(x + 1,n - 2); + for(p = top; p >= 8; p >>= 1) + { + multiminmax_atleast8(p, x, n - p, p); + for(q = top; q > p; q >>= 1) + multiminmax_atleast8(p, x + p, n - q, q - p); + } + if(p >= 4) + { + multiminmax4(x, n - 4, 4); + for(q = top; q > 4; q >>= 1) + multiminmax4(x + 4, n - q, q - 4); + } + if(p >= 2) + { + multiminmax2plus2(x, n - 2); + for(q = top; q >= 32; q >>= 1) + multiminmax2plusmore(x + 2, n - q, q - 2); + if(q >= 16) + multiminmax2plus14(x + 2, n - 16); + if(q >= 8) + multiminmax2plus6(x + 2, n - 8); + if(q >= 4) + multiminmax2plus2(x + 2, n - 4); + } + multiminmax1plus1(x, n - 1); + for(q = top; q >= 32; q >>= 1) + multiminmax1plusmore(x + 1, n - q, q - 1); + if(q >= 16) + multiminmax1(x + 1, n - 16, 15); + if(q >= 8) + multiminmax1(x + 1, n - 8, 7); + if(q >= 4) + multiminmax1(x + 1, n - 4, 3); + if(q >= 2) + multiminmax1plus1(x + 1, n - 2); } #endif \ No newline at end of file diff --git a/crypto/libntrup/src/avx/r3.h b/crypto/libntrup/src/avx/r3.h index c2f2e5e27..aa0b03607 100644 --- a/crypto/libntrup/src/avx/r3.h +++ b/crypto/libntrup/src/avx/r3.h @@ -4,12 +4,15 @@ #include "small.h" #define r3_mult crypto_kem_sntrup4591761_avx_r3_mult -extern void r3_mult(small *,const small *,const small *); +extern void +r3_mult(small *, const small *, const small *); #define r3_recip crypto_kem_sntrup4591761_avx_r3_recip -extern int r3_recip(small *,const small *); +extern int +r3_recip(small *, const small *); #define r3_weightw_mask crypto_kem_sntrup4591761_avx_r3_weightw_mask -extern int r3_weightw_mask(const small *); +extern int +r3_weightw_mask(const small *); #endif diff --git a/crypto/libntrup/src/avx/r3_recip.c b/crypto/libntrup/src/avx/r3_recip.c index 76d8a91cd..839bb2ce7 100644 --- a/crypto/libntrup/src/avx/r3_recip.c +++ b/crypto/libntrup/src/avx/r3_recip.c @@ -6,91 +6,102 @@ #include "r3.h" /* caller must ensure that x-y does not overflow */ -static int smaller_mask(int x,int y) +static int +smaller_mask(int x, int y) { return (x - y) >> 31; } -static void vectormod3_product(small *z,int len,const small *x,const small c) +static void +vectormod3_product(small *z, int len, const small *x, const small c) { int i; int minusmask = c; - int plusmask = -c; + int plusmask = -c; __m256i minusvec, plusvec, zerovec; minusmask >>= 31; plusmask >>= 31; minusvec = _mm256_set1_epi32(minusmask); - plusvec = _mm256_set1_epi32(plusmask); - zerovec = _mm256_set1_epi32(0); - - while (len >= 32) { - __m256i xi = _mm256_loadu_si256((__m256i *) x); - xi = (xi & plusvec) | (_mm256_sub_epi8(zerovec,xi) & minusvec); - _mm256_storeu_si256((__m256i *) z,xi); + plusvec = _mm256_set1_epi32(plusmask); + zerovec = _mm256_set1_epi32(0); + + while(len >= 32) + { + __m256i xi = _mm256_loadu_si256((__m256i *)x); + xi = (xi & plusvec) | (_mm256_sub_epi8(zerovec, xi) & minusvec); + _mm256_storeu_si256((__m256i *)z, xi); x += 32; z += 32; len -= 32; } - for (i = 0;i < len;++i) z[i] = mod3_product(x[i],c); + for(i = 0; i < len; ++i) + z[i] = mod3_product(x[i], c); } -static void vectormod3_minusproduct(small *z,int len,const small *x,const small *y,const small c) +static void +vectormod3_minusproduct(small *z, int len, const small *x, const small *y, + const small c) { int i; int minusmask = c; - int plusmask = -c; + int plusmask = -c; __m256i minusvec, plusvec, zerovec, twovec, fourvec; minusmask >>= 31; plusmask >>= 31; minusvec = _mm256_set1_epi32(minusmask); - plusvec = _mm256_set1_epi32(plusmask); - zerovec = _mm256_set1_epi32(0); - twovec = _mm256_set1_epi32(0x02020202); - fourvec = _mm256_set1_epi32(0x04040404); - - while (len >= 32) { - __m256i xi = _mm256_loadu_si256((__m256i *) x); - __m256i yi = _mm256_loadu_si256((__m256i *) y); + plusvec = _mm256_set1_epi32(plusmask); + zerovec = _mm256_set1_epi32(0); + twovec = _mm256_set1_epi32(0x02020202); + fourvec = _mm256_set1_epi32(0x04040404); + + while(len >= 32) + { + __m256i xi = _mm256_loadu_si256((__m256i *)x); + __m256i yi = _mm256_loadu_si256((__m256i *)y); __m256i r; - yi = (yi & plusvec) | (_mm256_sub_epi8(zerovec,yi) & minusvec); - xi = _mm256_sub_epi8(xi,yi); + yi = (yi & plusvec) | (_mm256_sub_epi8(zerovec, yi) & minusvec); + xi = _mm256_sub_epi8(xi, yi); - r = _mm256_add_epi8(xi,twovec); + r = _mm256_add_epi8(xi, twovec); r &= fourvec; - r = _mm256_srli_epi32(r,2); - xi = _mm256_sub_epi8(xi,r); - r = _mm256_add_epi8(r,r); - xi = _mm256_sub_epi8(xi,r); + r = _mm256_srli_epi32(r, 2); + xi = _mm256_sub_epi8(xi, r); + r = _mm256_add_epi8(r, r); + xi = _mm256_sub_epi8(xi, r); - r = _mm256_sub_epi8(twovec,xi); + r = _mm256_sub_epi8(twovec, xi); r &= fourvec; - r = _mm256_srli_epi32(r,2); - xi = _mm256_add_epi8(xi,r); - r = _mm256_add_epi8(r,r); - xi = _mm256_add_epi8(xi,r); + r = _mm256_srli_epi32(r, 2); + xi = _mm256_add_epi8(xi, r); + r = _mm256_add_epi8(r, r); + xi = _mm256_add_epi8(xi, r); - _mm256_storeu_si256((__m256i *) z,xi); + _mm256_storeu_si256((__m256i *)z, xi); x += 32; y += 32; z += 32; len -= 32; } - for (i = 0;i < len;++i) z[i] = mod3_minusproduct(x[i],y[i],c); + for(i = 0; i < len; ++i) + z[i] = mod3_minusproduct(x[i], y[i], c); } -static void vectormod3_shift(small *z,int len) +static void +vectormod3_shift(small *z, int len) { int i; - while (len >= 33) { - __m256i zi = _mm256_loadu_si256((__m256i *) (z + len - 33)); - _mm256_storeu_si256((__m256i *) (z + len - 32),zi); + while(len >= 33) + { + __m256i zi = _mm256_loadu_si256((__m256i *)(z + len - 33)); + _mm256_storeu_si256((__m256i *)(z + len - 32), zi); len -= 32; } - for (i = len - 1;i > 0;--i) z[i] = z[i - 1]; + for(i = len - 1; i > 0; --i) + z[i] = z[i - 1]; z[0] = 0; } @@ -100,12 +111,13 @@ or returning -1 if s is not invertible mod m r,s are polys of degree

= loops) break; + if(loop >= loops) + break; - c = mod3_quotient(g[p],f[p]); + c = mod3_quotient(g[p], f[p]); - vectormod3_minusproduct(g,768,g,f,c); - vectormod3_shift(g,769); + vectormod3_minusproduct(g, 768, g, f, c); + vectormod3_shift(g, 769); #ifdef SIMPLER - vectormod3_minusproduct(v,1536,v,u,c); - vectormod3_shift(v,1537); + vectormod3_minusproduct(v, 1536, v, u, c); + vectormod3_shift(v, 1537); #else - if (loop < p) { - vectormod3_minusproduct(v,loop + 1,v,u,c); - vectormod3_shift(v,loop + 2); - } else { - vectormod3_minusproduct(v + loop - p,p + 1,v + loop - p,u + loop - p,c); - vectormod3_shift(v + loop - p,p + 2); + if(loop < p) + { + vectormod3_minusproduct(v, loop + 1, v, u, c); + vectormod3_shift(v, loop + 2); + } + else + { + vectormod3_minusproduct(v + loop - p, p + 1, v + loop - p, u + loop - p, + c); + vectormod3_shift(v + loop - p, p + 2); } #endif @@ -171,24 +194,28 @@ int r3_recip(small *r,const small *s) ++loop; - swapmask = smaller_mask(e,d) & mod3_nonzero_mask(g[p]); - swap(&e,&d,sizeof e,swapmask); - swap(f,g,(p + 1) * sizeof(small),swapmask); + swapmask = smaller_mask(e, d) & mod3_nonzero_mask(g[p]); + swap(&e, &d, sizeof e, swapmask); + swap(f, g, (p + 1) * sizeof(small), swapmask); #ifdef SIMPLER - swap(u,v,1536 * sizeof(small),swapmask); + swap(u, v, 1536 * sizeof(small), swapmask); #else - if (loop < p) { - swap(u,v,(loop + 1) * sizeof(small),swapmask); - } else { - swap(u + loop - p,v + loop - p,(p + 1) * sizeof(small),swapmask); + if(loop < p) + { + swap(u, v, (loop + 1) * sizeof(small), swapmask); + } + else + { + swap(u + loop - p, v + loop - p, (p + 1) * sizeof(small), swapmask); } #endif } c = mod3_reciprocal(f[p]); - vectormod3_product(r,p,u + p,c); - for (i = p;i < 768;++i) r[i] = 0; - return smaller_mask(0,d); + vectormod3_product(r, p, u + p, c); + for(i = p; i < 768; ++i) + r[i] = 0; + return smaller_mask(0, d); } #endif \ No newline at end of file diff --git a/crypto/libntrup/src/avx/rq.h b/crypto/libntrup/src/avx/rq.h index f9adfb7a3..0660af350 100644 --- a/crypto/libntrup/src/avx/rq.h +++ b/crypto/libntrup/src/avx/rq.h @@ -5,27 +5,35 @@ #include "small.h" #define rq_encode crypto_kem_sntrup4591761_avx_rq_encode -extern void rq_encode(unsigned char *,const modq *); +extern void +rq_encode(unsigned char *, const modq *); #define rq_decode crypto_kem_sntrup4591761_avx_rq_decode -extern void rq_decode(modq *,const unsigned char *); +extern void +rq_decode(modq *, const unsigned char *); #define rq_roundencode crypto_kem_sntrup4591761_avx_rq_roundencode -extern void rq_roundencode(unsigned char *,const modq *); +extern void +rq_roundencode(unsigned char *, const modq *); #define rq_decoderounded crypto_kem_sntrup4591761_avx_rq_decoderounded -extern void rq_decoderounded(modq *,const unsigned char *); +extern void +rq_decoderounded(modq *, const unsigned char *); #define rq_round3 crypto_kem_sntrup4591761_avx_rq_round3 -extern void rq_round3(modq *,const modq *); +extern void +rq_round3(modq *, const modq *); #define rq_mod3 crypto_kem_sntrup4591761_avx_rq_mod3 -extern void rq_mod3(small *,const modq *); +extern void +rq_mod3(small *, const modq *); #define rq_mult crypto_kem_sntrup4591761_avx_rq_mult -extern void rq_mult(modq *,const modq *,const small *); +extern void +rq_mult(modq *, const modq *, const small *); #define rq_recip3 crypto_kem_sntrup4591761_avx_rq_recip3 -int rq_recip3(modq *,const small *); +int +rq_recip3(modq *, const small *); #endif diff --git a/crypto/libntrup/src/avx/rq_mod3.c b/crypto/libntrup/src/avx/rq_mod3.c index 6f631d938..6a972acda 100644 --- a/crypto/libntrup/src/avx/rq_mod3.c +++ b/crypto/libntrup/src/avx/rq_mod3.c @@ -12,47 +12,57 @@ // 32-bit hosts only #ifndef __amd64__ -#define _mm_extract_epi64(X, N) (__extension__ ({ __v2di __a = (__v2di)(X); \ - __a[N];})) +#define _mm_extract_epi64(X, N) \ + (__extension__({ \ + __v2di __a = (__v2di)(X); \ + __a[N]; \ + })) #endif -static inline __m256i squeeze(__m256i x) +static inline __m256i +squeeze(__m256i x) { - __m256i q = _mm256_mulhrs_epi16(x,v7); - q = _mm256_mullo_epi16(q,v4591_16); - return _mm256_sub_epi16(x,q); + __m256i q = _mm256_mulhrs_epi16(x, v7); + q = _mm256_mullo_epi16(q, v4591_16); + return _mm256_sub_epi16(x, q); } -static inline __m256i freeze(__m256i x) +static inline __m256i +freeze(__m256i x) { __m256i mask, x2296, x4591; - x4591 = _mm256_add_epi16(x,v4591_16); - mask = _mm256_srai_epi16(x,15); - x = _mm256_blendv_epi8(x,x4591,mask); - x2296 = _mm256_sub_epi16(x,v2296_16); - mask = _mm256_srai_epi16(x2296,15); - x4591 = _mm256_sub_epi16(x,v4591_16); - x = _mm256_blendv_epi8(x4591,x,mask); + x4591 = _mm256_add_epi16(x, v4591_16); + mask = _mm256_srai_epi16(x, 15); + x = _mm256_blendv_epi8(x, x4591, mask); + x2296 = _mm256_sub_epi16(x, v2296_16); + mask = _mm256_srai_epi16(x2296, 15); + x4591 = _mm256_sub_epi16(x, v4591_16); + x = _mm256_blendv_epi8(x4591, x, mask); return x; } -void rq_mod3(small *g,const modq *f) +void +rq_mod3(small *g, const modq *f) { int i; - for (i = 0;i < 768;i += 16) { - __m256i x = _mm256_loadu_si256((__m256i *) &f[i]); + for(i = 0; i < 768; i += 16) + { + __m256i x = _mm256_loadu_si256((__m256i *)&f[i]); __m256i q; - x = _mm256_mullo_epi16(x,v3); + x = _mm256_mullo_epi16(x, v3); x = squeeze(x); x = freeze(x); - q = _mm256_mulhrs_epi16(x,v10923_16); - x = _mm256_sub_epi16(x,q); - q = _mm256_add_epi16(q,q); - x = _mm256_sub_epi16(x,q); /* g0 g1 ... g15 */ - x = _mm256_packs_epi16(x,x); /* g0 ... g7 g0 ... g7 g8 ... g15 g8 ... g15 */ - 0[(long long *) &g[i]] = _mm_extract_epi64(_mm256_extracti128_si256(x,0),0); - 1[(long long *) &g[i]] = _mm_extract_epi64(_mm256_extracti128_si256(x,1),0); + q = _mm256_mulhrs_epi16(x, v10923_16); + x = _mm256_sub_epi16(x, q); + q = _mm256_add_epi16(q, q); + x = _mm256_sub_epi16(x, q); /* g0 g1 ... g15 */ + x = _mm256_packs_epi16(x, + x); /* g0 ... g7 g0 ... g7 g8 ... g15 g8 ... g15 */ + 0 [(long long *)&g[i]] = + _mm_extract_epi64(_mm256_extracti128_si256(x, 0), 0); + 1 [(long long *)&g[i]] = + _mm_extract_epi64(_mm256_extracti128_si256(x, 1), 0); } } #endif \ No newline at end of file diff --git a/crypto/libntrup/src/avx/rq_recip3.c b/crypto/libntrup/src/avx/rq_recip3.c index 0e666e1be..5b4e96d6b 100644 --- a/crypto/libntrup/src/avx/rq_recip3.c +++ b/crypto/libntrup/src/avx/rq_recip3.c @@ -10,93 +10,103 @@ #define v29234_16 _mm256_set1_epi16(29234) /* caller must ensure that x-y does not overflow */ -static int smaller_mask(int x,int y) +static int +smaller_mask(int x, int y) { return (x - y) >> 31; } -static inline __m256i product(__m256i x,__m256i y) +static inline __m256i +product(__m256i x, __m256i y) { __m256i lo, hi, r0, r1, t0, t1, t, s0, s1; - lo = _mm256_mullo_epi16(x,y); - hi = _mm256_mulhi_epi16(x,y); - r0 = _mm256_unpacklo_epi16(lo,hi); - r1 = _mm256_unpackhi_epi16(lo,hi); - - t0 = _mm256_srai_epi32(r0,16); - t1 = _mm256_srai_epi32(r1,16); - t = _mm256_packs_epi32(t0,t1); - t = _mm256_mulhrs_epi16(t,v29234_16); - lo = _mm256_mullo_epi16(t,v4591_16); - hi = _mm256_mulhi_epi16(t,v4591_16); - s0 = _mm256_unpacklo_epi16(lo,hi); - s1 = _mm256_unpackhi_epi16(lo,hi); - s0 = _mm256_slli_epi32(s0,4); - s1 = _mm256_slli_epi32(s1,4); - r0 = _mm256_sub_epi32(r0,s0); - r1 = _mm256_sub_epi32(r1,s1); - - t0 = _mm256_srai_epi32(r0,8); - t1 = _mm256_srai_epi32(r1,8); - t = _mm256_packs_epi32(t0,t1); - t = _mm256_mulhrs_epi16(t,v1827_16); - lo = _mm256_mullo_epi16(t,v4591_16); - hi = _mm256_mulhi_epi16(t,v4591_16); - s0 = _mm256_unpacklo_epi16(lo,hi); - s1 = _mm256_unpackhi_epi16(lo,hi); - r0 = _mm256_sub_epi32(r0,s0); - r1 = _mm256_sub_epi32(r1,s1); - - x = _mm256_packs_epi32(r0,r1); + lo = _mm256_mullo_epi16(x, y); + hi = _mm256_mulhi_epi16(x, y); + r0 = _mm256_unpacklo_epi16(lo, hi); + r1 = _mm256_unpackhi_epi16(lo, hi); + + t0 = _mm256_srai_epi32(r0, 16); + t1 = _mm256_srai_epi32(r1, 16); + t = _mm256_packs_epi32(t0, t1); + t = _mm256_mulhrs_epi16(t, v29234_16); + lo = _mm256_mullo_epi16(t, v4591_16); + hi = _mm256_mulhi_epi16(t, v4591_16); + s0 = _mm256_unpacklo_epi16(lo, hi); + s1 = _mm256_unpackhi_epi16(lo, hi); + s0 = _mm256_slli_epi32(s0, 4); + s1 = _mm256_slli_epi32(s1, 4); + r0 = _mm256_sub_epi32(r0, s0); + r1 = _mm256_sub_epi32(r1, s1); + + t0 = _mm256_srai_epi32(r0, 8); + t1 = _mm256_srai_epi32(r1, 8); + t = _mm256_packs_epi32(t0, t1); + t = _mm256_mulhrs_epi16(t, v1827_16); + lo = _mm256_mullo_epi16(t, v4591_16); + hi = _mm256_mulhi_epi16(t, v4591_16); + s0 = _mm256_unpacklo_epi16(lo, hi); + s1 = _mm256_unpackhi_epi16(lo, hi); + r0 = _mm256_sub_epi32(r0, s0); + r1 = _mm256_sub_epi32(r1, s1); + + x = _mm256_packs_epi32(r0, r1); return x; } -static inline __m256i minusproduct(__m256i x,__m256i y,__m256i z) +static inline __m256i +minusproduct(__m256i x, __m256i y, __m256i z) { __m256i t; - x = _mm256_sub_epi16(x,product(y,z)); - t = _mm256_mulhrs_epi16(x,v7); - t = _mm256_mullo_epi16(t,v4591_16); - x = _mm256_sub_epi16(x,t); + x = _mm256_sub_epi16(x, product(y, z)); + t = _mm256_mulhrs_epi16(x, v7); + t = _mm256_mullo_epi16(t, v4591_16); + x = _mm256_sub_epi16(x, t); return x; } -static void vectormodq_product(modq *z,int len,const modq *x,const modq c) +static void +vectormodq_product(modq *z, int len, const modq *x, const modq c) { __m256i cvec = _mm256_set1_epi16(c); - while (len >= 16) { - __m256i xi = _mm256_loadu_si256((__m256i *) x); - xi = product(xi,cvec); - _mm256_storeu_si256((__m256i *) z,xi); + while(len >= 16) + { + __m256i xi = _mm256_loadu_si256((__m256i *)x); + xi = product(xi, cvec); + _mm256_storeu_si256((__m256i *)z, xi); x += 16; z += 16; len -= 16; } - while (len > 0) { - *z = modq_product(*x,c); + while(len > 0) + { + *z = modq_product(*x, c); ++x; ++z; --len; } } -static void vectormodq_minusproduct(modq *z,int len,const modq *x,const modq *y,const modq c) +static void +vectormodq_minusproduct(modq *z, int len, const modq *x, const modq *y, + const modq c) { __m256i cvec = _mm256_set1_epi16(c); - while (len >= 16) { - __m256i xi = _mm256_loadu_si256((__m256i *) x); - __m256i yi = _mm256_loadu_si256((__m256i *) y); - xi = minusproduct(xi,yi,cvec); - _mm256_storeu_si256((__m256i *) z,xi); + while(len >= 16) + { + __m256i xi = _mm256_loadu_si256((__m256i *)x); + __m256i yi = _mm256_loadu_si256((__m256i *)y); + xi = minusproduct(xi, yi, cvec); + _mm256_storeu_si256((__m256i *)z, xi); x += 16; y += 16; z += 16; len -= 16; } - while (len > 0) { - *z = modq_minusproduct(*x,*y,c); + while(len > 0) + { + *z = modq_minusproduct(*x, *y, c); ++x; ++y; ++z; @@ -104,15 +114,18 @@ static void vectormodq_minusproduct(modq *z,int len,const modq *x,const modq *y, } } -static void vectormodq_shift(modq *z,int len) +static void +vectormodq_shift(modq *z, int len) { int i; - while (len >= 17) { - __m256i zi = _mm256_loadu_si256((__m256i *) (z + len - 17)); - _mm256_storeu_si256((__m256i *) (z + len - 16),zi); + while(len >= 17) + { + __m256i zi = _mm256_loadu_si256((__m256i *)(z + len - 17)); + _mm256_storeu_si256((__m256i *)(z + len - 16), zi); len -= 16; } - for (i = len - 1;i > 0;--i) z[i] = z[i - 1]; + for(i = len - 1; i > 0; --i) + z[i] = z[i - 1]; z[0] = 0; } @@ -122,9 +135,10 @@ or returning -1 if s is not invertible mod m r,s are polys of degree

= loops) break; + if(loop >= loops) + break; - c = modq_quotient(g[p],f[p]); + c = modq_quotient(g[p], f[p]); - vectormodq_minusproduct(g,768,g,f,c); - vectormodq_shift(g,769); + vectormodq_minusproduct(g, 768, g, f, c); + vectormodq_shift(g, 769); #ifdef SIMPLER - vectormodq_minusproduct(v,1536,v,u,c); - vectormodq_shift(v,1537); + vectormodq_minusproduct(v, 1536, v, u, c); + vectormodq_shift(v, 1537); #else - if (loop < p) { - vectormodq_minusproduct(v,loop + 1,v,u,c); - vectormodq_shift(v,loop + 2); - } else { - vectormodq_minusproduct(v + loop - p,p + 1,v + loop - p,u + loop - p,c); - vectormodq_shift(v + loop - p,p + 2); + if(loop < p) + { + vectormodq_minusproduct(v, loop + 1, v, u, c); + vectormodq_shift(v, loop + 2); + } + else + { + vectormodq_minusproduct(v + loop - p, p + 1, v + loop - p, u + loop - p, + c); + vectormodq_shift(v + loop - p, p + 2); } #endif @@ -193,25 +218,30 @@ int rq_recip3(modq *r,const small *s) ++loop; - swapmask = smaller_mask(e,d) & modq_nonzero_mask(g[p]); - swap(&e,&d,sizeof e,swapmask); - swap(f,g,768 * sizeof(modq),swapmask); + swapmask = smaller_mask(e, d) & modq_nonzero_mask(g[p]); + swap(&e, &d, sizeof e, swapmask); + swap(f, g, 768 * sizeof(modq), swapmask); #ifdef SIMPLER - swap(u,v,1536 * sizeof(modq),swapmask); + swap(u, v, 1536 * sizeof(modq), swapmask); #else - if (loop < p) { - swap(u,v,(loop + 1) * sizeof(modq),swapmask); - } else { - swap(u + loop - p,v + loop - p,(p + 1) * sizeof(modq),swapmask); + if(loop < p) + { + swap(u, v, (loop + 1) * sizeof(modq), swapmask); + } + else + { + swap(u + loop - p, v + loop - p, (p + 1) * sizeof(modq), swapmask); } #endif } c = modq_reciprocal(f[p]); - vectormodq_product(r,p,u + p,c); - for (i = 0;i < p;++i) r[i] = modq_freeze(r[i]); - for (i = p;i < 768;++i) r[i] = 0; - return smaller_mask(0,d); + vectormodq_product(r, p, u + p, c); + for(i = 0; i < p; ++i) + r[i] = modq_freeze(r[i]); + for(i = p; i < 768; ++i) + r[i] = 0; + return smaller_mask(0, d); } #endif \ No newline at end of file diff --git a/crypto/libntrup/src/avx/rq_round3.c b/crypto/libntrup/src/avx/rq_round3.c index d66d8c8ce..e6c6569b8 100644 --- a/crypto/libntrup/src/avx/rq_round3.c +++ b/crypto/libntrup/src/avx/rq_round3.c @@ -6,17 +6,19 @@ #define v3_16 _mm256_set1_epi16(3) #define v10923_16 _mm256_set1_epi16(10923) -void rq_round3(modq *h,const modq *f) +void +rq_round3(modq *h, const modq *f) { int i; - for (i = 0;i < 768;i += 16) { - __m256i x = _mm256_loadu_si256((__m256i *) &f[i]); + for(i = 0; i < 768; i += 16) + { + __m256i x = _mm256_loadu_si256((__m256i *)&f[i]); __m256i x2; - x = _mm256_mulhrs_epi16(x,v10923_16); - x2 = _mm256_add_epi16(x,x); - x = _mm256_add_epi16(x,x2); - _mm256_storeu_si256((__m256i *) &h[i],x); + x = _mm256_mulhrs_epi16(x, v10923_16); + x2 = _mm256_add_epi16(x, x); + x = _mm256_add_epi16(x, x2); + _mm256_storeu_si256((__m256i *)&h[i], x); } } #endif \ No newline at end of file diff --git a/crypto/libntrup/src/avx/rq_rounded.c b/crypto/libntrup/src/avx/rq_rounded.c index da35da348..512f59732 100644 --- a/crypto/libntrup/src/avx/rq_rounded.c +++ b/crypto/libntrup/src/avx/rq_rounded.c @@ -164,35 +164,40 @@ rq_decoderounded(modq *f, const unsigned char *c) /* x is f0 + f1*1536 + f2*1536^2 */ /* with each f between 0 and 1530 */ - f2 = x + f2 = + x * _mm256_set1_pd( - 0.00000042385525173611114052197733521876177320564238470979034900665283203125); + 0.00000042385525173611114052197733521876177320564238470979034900665283203125); f2 = floor(f2); x -= f2 * _mm256_set1_pd(2359296.0); - f1 = x + f1 = + x * _mm256_set1_pd( - 0.00065104166666666673894681149903362893383018672466278076171875); + 0.00065104166666666673894681149903362893383018672466278076171875); f1 = floor(f1); x -= f1 * _mm256_set1_pd(1536.0); f0 = x; - f2 -= _mm256_set1_pd(1531.0) + f2 -= + _mm256_set1_pd(1531.0) * floor( - f2 - * _mm256_set1_pd( - 0.0006531678641410842804659875326933615724556148052215576171875)); - f1 -= _mm256_set1_pd(1531.0) + f2 + * _mm256_set1_pd( + 0.0006531678641410842804659875326933615724556148052215576171875)); + f1 -= + _mm256_set1_pd(1531.0) * floor( - f1 - * _mm256_set1_pd( - 0.0006531678641410842804659875326933615724556148052215576171875)); - f0 -= _mm256_set1_pd(1531.0) + f1 + * _mm256_set1_pd( + 0.0006531678641410842804659875326933615724556148052215576171875)); + f0 -= + _mm256_set1_pd(1531.0) * floor( - f0 - * _mm256_set1_pd( - 0.0006531678641410842804659875326933615724556148052215576171875)); + f0 + * _mm256_set1_pd( + 0.0006531678641410842804659875326933615724556148052215576171875)); f2 *= _mm256_set1_pd(3.0); f2 -= _mm256_set1_pd(2295.0); diff --git a/crypto/libntrup/src/avx/swap.c b/crypto/libntrup/src/avx/swap.c index f0d37b424..569470639 100644 --- a/crypto/libntrup/src/avx/swap.c +++ b/crypto/libntrup/src/avx/swap.c @@ -2,30 +2,33 @@ #include #include "swap.h" -void swap(void *x,void *y,int bytes,int mask) +void +swap(void *x, void *y, int bytes, int mask) { - char c = mask; + char c = mask; __m256i maskvec = _mm256_set1_epi32(mask); - - while (bytes >= 32) { - __m256i xi = _mm256_loadu_si256(x); - __m256i yi = _mm256_loadu_si256(y); - __m256i xinew = _mm256_blendv_epi8(xi,yi,maskvec); - __m256i yinew = _mm256_blendv_epi8(yi,xi,maskvec); - _mm256_storeu_si256(x,xinew); - _mm256_storeu_si256(y,yinew); - x = 32 + (char *) x; - y = 32 + (char *) y; + + while(bytes >= 32) + { + __m256i xi = _mm256_loadu_si256(x); + __m256i yi = _mm256_loadu_si256(y); + __m256i xinew = _mm256_blendv_epi8(xi, yi, maskvec); + __m256i yinew = _mm256_blendv_epi8(yi, xi, maskvec); + _mm256_storeu_si256(x, xinew); + _mm256_storeu_si256(y, yinew); + x = 32 + (char *)x; + y = 32 + (char *)y; bytes -= 32; } - while (bytes > 0) { - char xi = *(char *) x; - char yi = *(char *) y; - char t = c & (xi ^ yi); + while(bytes > 0) + { + char xi = *(char *)x; + char yi = *(char *)y; + char t = c & (xi ^ yi); xi ^= t; yi ^= t; - *(char *) x = xi; - *(char *) y = yi; + *(char *)x = xi; + *(char *)y = yi; ++x; ++y; --bytes; diff --git a/crypto/libntrup/src/avx/swap.h b/crypto/libntrup/src/avx/swap.h index dcfba2c4a..1f56ccf1e 100644 --- a/crypto/libntrup/src/avx/swap.h +++ b/crypto/libntrup/src/avx/swap.h @@ -2,6 +2,7 @@ #define swap_h #define swap crypto_kem_sntrup4591761_avx_swap -extern void swap(void *,void *,int,int); +extern void +swap(void *, void *, int, int); #endif diff --git a/crypto/libntrup/src/ntru.cpp b/crypto/libntrup/src/ntru.cpp index 812154707..c27d38402 100644 --- a/crypto/libntrup/src/ntru.cpp +++ b/crypto/libntrup/src/ntru.cpp @@ -1,7 +1,7 @@ #include #include -#include // printf +#include // printf #if __AVX2__ #include diff --git a/crypto/libntrup/src/ref/params.h b/crypto/libntrup/src/ref/params.h index 7458ed8c5..c55d435e1 100644 --- a/crypto/libntrup/src/ref/params.h +++ b/crypto/libntrup/src/ref/params.h @@ -7,7 +7,7 @@ #define qshift 2295 #define p 761 #ifdef _MSC_VER -#define LOOPS 2*p+1 +#define LOOPS 2 * p + 1 #endif #define w 286 diff --git a/crypto/libntrup/src/ref/r3.h b/crypto/libntrup/src/ref/r3.h index 4308dd935..07a5fb4b9 100644 --- a/crypto/libntrup/src/ref/r3.h +++ b/crypto/libntrup/src/ref/r3.h @@ -4,9 +4,11 @@ #include "small.h" #define r3_mult crypto_kem_sntrup4591761_ref_r3_mult -extern void r3_mult(small *,const small *,const small *); +extern void +r3_mult(small *, const small *, const small *); #define r3_recip crypto_kem_sntrup4591761_ref_r3_recip -extern int r3_recip(small *,const small *); +extern int +r3_recip(small *, const small *); #endif diff --git a/crypto/libntrup/src/ref/r3_mult.c b/crypto/libntrup/src/ref/r3_mult.c index 0a4273dce..8569315b1 100644 --- a/crypto/libntrup/src/ref/r3_mult.c +++ b/crypto/libntrup/src/ref/r3_mult.c @@ -2,30 +2,34 @@ #include "mod3.h" #include "r3.h" -void r3_mult(small *h,const small *f,const small *g) +void +r3_mult(small *h, const small *f, const small *g) { small fg[p + p - 1]; small result; int i, j; - for (i = 0;i < p;++i) { + for(i = 0; i < p; ++i) + { result = 0; - for (j = 0;j <= i;++j) - result = mod3_plusproduct(result,f[j],g[i - j]); + for(j = 0; j <= i; ++j) + result = mod3_plusproduct(result, f[j], g[i - j]); fg[i] = result; } - for (i = p;i < p + p - 1;++i) { + for(i = p; i < p + p - 1; ++i) + { result = 0; - for (j = i - p + 1;j < p;++j) - result = mod3_plusproduct(result,f[j],g[i - j]); + for(j = i - p + 1; j < p; ++j) + result = mod3_plusproduct(result, f[j], g[i - j]); fg[i] = result; } - for (i = p + p - 2;i >= p;--i) { - fg[i - p] = mod3_sum(fg[i - p],fg[i]); - fg[i - p + 1] = mod3_sum(fg[i - p + 1],fg[i]); + for(i = p + p - 2; i >= p; --i) + { + fg[i - p] = mod3_sum(fg[i - p], fg[i]); + fg[i - p + 1] = mod3_sum(fg[i - p + 1], fg[i]); } - for (i = 0;i < p;++i) + for(i = 0; i < p; ++i) h[i] = fg[i]; } diff --git a/crypto/libntrup/src/ref/rq.h b/crypto/libntrup/src/ref/rq.h index c8007896b..69d800d95 100644 --- a/crypto/libntrup/src/ref/rq.h +++ b/crypto/libntrup/src/ref/rq.h @@ -5,24 +5,31 @@ #include "small.h" #define rq_encode crypto_kem_sntrup4591761_ref_rq_encode -extern void rq_encode(unsigned char *,const modq *); +extern void +rq_encode(unsigned char *, const modq *); #define rq_decode crypto_kem_sntrup4591761_ref_rq_decode -extern void rq_decode(modq *,const unsigned char *); +extern void +rq_decode(modq *, const unsigned char *); #define rq_encoderounded crypto_kem_sntrup4591761_ref_rq_encoderounded -extern void rq_encoderounded(unsigned char *,const modq *); +extern void +rq_encoderounded(unsigned char *, const modq *); #define rq_decoderounded crypto_kem_sntrup4591761_ref_rq_decoderounded -extern void rq_decoderounded(modq *,const unsigned char *); +extern void +rq_decoderounded(modq *, const unsigned char *); #define rq_round3 crypto_kem_sntrup4591761_ref_rq_round -extern void rq_round3(modq *,const modq *); +extern void +rq_round3(modq *, const modq *); #define rq_mult crypto_kem_sntrup4591761_ref_rq_mult -extern void rq_mult(modq *,const modq *,const small *); +extern void +rq_mult(modq *, const modq *, const small *); #define rq_recip3 crypto_kem_sntrup4591761_ref_rq_recip3 -int rq_recip3(modq *,const small *); +int +rq_recip3(modq *, const small *); #endif diff --git a/crypto/libntrup/src/ref/rq_mult.c b/crypto/libntrup/src/ref/rq_mult.c index 86dc7da03..e8be2f2cf 100644 --- a/crypto/libntrup/src/ref/rq_mult.c +++ b/crypto/libntrup/src/ref/rq_mult.c @@ -1,30 +1,34 @@ #include "params.h" #include "rq.h" -void rq_mult(modq *h,const modq *f,const small *g) +void +rq_mult(modq *h, const modq *f, const small *g) { modq fg[p + p - 1]; modq result; int i, j; - for (i = 0;i < p;++i) { + for(i = 0; i < p; ++i) + { result = 0; - for (j = 0;j <= i;++j) - result = modq_plusproduct(result,f[j],g[i - j]); + for(j = 0; j <= i; ++j) + result = modq_plusproduct(result, f[j], g[i - j]); fg[i] = result; } - for (i = p;i < p + p - 1;++i) { + for(i = p; i < p + p - 1; ++i) + { result = 0; - for (j = i - p + 1;j < p;++j) - result = modq_plusproduct(result,f[j],g[i - j]); + for(j = i - p + 1; j < p; ++j) + result = modq_plusproduct(result, f[j], g[i - j]); fg[i] = result; } - for (i = p + p - 2;i >= p;--i) { - fg[i - p] = modq_sum(fg[i - p],fg[i]); - fg[i - p + 1] = modq_sum(fg[i - p + 1],fg[i]); + for(i = p + p - 2; i >= p; --i) + { + fg[i - p] = modq_sum(fg[i - p], fg[i]); + fg[i - p + 1] = modq_sum(fg[i - p + 1], fg[i]); } - for (i = 0;i < p;++i) + for(i = 0; i < p; ++i) h[i] = fg[i]; } diff --git a/crypto/libntrup/src/ref/rq_round3.c b/crypto/libntrup/src/ref/rq_round3.c index c972e8e4e..41853a30a 100644 --- a/crypto/libntrup/src/ref/rq_round3.c +++ b/crypto/libntrup/src/ref/rq_round3.c @@ -1,10 +1,11 @@ #include "params.h" #include "rq.h" -void rq_round3(modq *h,const modq *f) +void +rq_round3(modq *h, const modq *f) { int i; - for (i = 0;i < p;++i) + for(i = 0; i < p; ++i) h[i] = ((21846 * (f[i] + 2295) + 32768) >> 16) * 3 - 2295; } diff --git a/crypto/libntrup/src/ref/small.c b/crypto/libntrup/src/ref/small.c index 270dcbe28..209de2369 100644 --- a/crypto/libntrup/src/ref/small.c +++ b/crypto/libntrup/src/ref/small.c @@ -4,34 +4,41 @@ /* XXX: these functions rely on p mod 4 = 1 */ /* all coefficients in -1, 0, 1 */ -void small_encode(unsigned char *c,const small *f) +void +small_encode(unsigned char *c, const small *f) { small c0; int i; - for (i = 0;i < p/4;++i) { + for(i = 0; i < p / 4; ++i) + { c0 = *f++ + 1; c0 += (*f++ + 1) << 2; c0 += (*f++ + 1) << 4; c0 += (*f++ + 1) << 6; *c++ = c0; } - c0 = *f++ + 1; + c0 = *f++ + 1; *c++ = c0; } -void small_decode(small *f,const unsigned char *c) +void +small_decode(small *f, const unsigned char *c) { unsigned char c0; int i; - for (i = 0;i < p/4;++i) { - c0 = *c++; - *f++ = ((small) (c0 & 3)) - 1; c0 >>= 2; - *f++ = ((small) (c0 & 3)) - 1; c0 >>= 2; - *f++ = ((small) (c0 & 3)) - 1; c0 >>= 2; - *f++ = ((small) (c0 & 3)) - 1; + for(i = 0; i < p / 4; ++i) + { + c0 = *c++; + *f++ = ((small)(c0 & 3)) - 1; + c0 >>= 2; + *f++ = ((small)(c0 & 3)) - 1; + c0 >>= 2; + *f++ = ((small)(c0 & 3)) - 1; + c0 >>= 2; + *f++ = ((small)(c0 & 3)) - 1; } - c0 = *c++; - *f++ = ((small) (c0 & 3)) - 1; + c0 = *c++; + *f++ = ((small)(c0 & 3)) - 1; } diff --git a/crypto/libntrup/src/ref/swap.c b/crypto/libntrup/src/ref/swap.c index 76cfb2d07..2051f1c5d 100644 --- a/crypto/libntrup/src/ref/swap.c +++ b/crypto/libntrup/src/ref/swap.c @@ -1,19 +1,21 @@ #include "swap.h" -void swap(void *x,void *y,int bytes,int mask) +void +swap(void *x, void *y, int bytes, int mask) { int i; char xi, yi, c, t; c = mask; - - for (i = 0;i < bytes;++i) { - xi = i[(char *) x]; - yi = i[(char *) y]; - t = c & (xi ^ yi); + + for(i = 0; i < bytes; ++i) + { + xi = i[(char *)x]; + yi = i[(char *)y]; + t = c & (xi ^ yi); xi ^= t; yi ^= t; - i[(char *) x] = xi; - i[(char *) y] = yi; + i[(char *)x] = xi; + i[(char *)y] = yi; } } diff --git a/crypto/libntrup/src/ref/swap.h b/crypto/libntrup/src/ref/swap.h index fe13c4e52..2c3f3a8a5 100644 --- a/crypto/libntrup/src/ref/swap.h +++ b/crypto/libntrup/src/ref/swap.h @@ -2,6 +2,7 @@ #define swap_h #define swap crypto_kem_sntrup4591761_ref_swap -extern void swap(void *,void *,int,int); +extern void +swap(void *, void *, int, int); #endif diff --git a/crypto/libsodium/init.c b/crypto/libsodium/init.c index f5232c79d..525942265 100644 --- a/crypto/libsodium/init.c +++ b/crypto/libsodium/init.c @@ -36,7 +36,7 @@ sodium_init(void) return -1; /* LCOV_EXCL_LINE */ } /* if we're here, we already started properly */ - return initialized ? 0: -1; + return initialized ? 0 : -1; } _sodium_runtime_get_cpu_features(); _crypto_generichash_blake2b_pick_best_implementation(); diff --git a/crypto/salsa20/core_salsa_ref.c b/crypto/salsa20/core_salsa_ref.c index f470d3c87..cd643b595 100644 --- a/crypto/salsa20/core_salsa_ref.c +++ b/crypto/salsa20/core_salsa_ref.c @@ -10,116 +10,116 @@ crypto_core_salsa(unsigned char *out, const unsigned char *in, const unsigned char *k, const unsigned char *c, const int rounds) { - uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, - x15; - uint32_t j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, - j15; - int i; + uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; + uint32_t j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; + int i; - j0 = x0 = 0x61707865; - j5 = x5 = 0x3320646e; - j10 = x10 = 0x79622d32; - j15 = x15 = 0x6b206574; - if (c != NULL) { - j0 = x0 = LOAD32_LE(c + 0); - j5 = x5 = LOAD32_LE(c + 4); - j10 = x10 = LOAD32_LE(c + 8); - j15 = x15 = LOAD32_LE(c + 12); - } - j1 = x1 = LOAD32_LE(k + 0); - j2 = x2 = LOAD32_LE(k + 4); - j3 = x3 = LOAD32_LE(k + 8); - j4 = x4 = LOAD32_LE(k + 12); - j11 = x11 = LOAD32_LE(k + 16); - j12 = x12 = LOAD32_LE(k + 20); - j13 = x13 = LOAD32_LE(k + 24); - j14 = x14 = LOAD32_LE(k + 28); + j0 = x0 = 0x61707865; + j5 = x5 = 0x3320646e; + j10 = x10 = 0x79622d32; + j15 = x15 = 0x6b206574; + if(c != NULL) + { + j0 = x0 = LOAD32_LE(c + 0); + j5 = x5 = LOAD32_LE(c + 4); + j10 = x10 = LOAD32_LE(c + 8); + j15 = x15 = LOAD32_LE(c + 12); + } + j1 = x1 = LOAD32_LE(k + 0); + j2 = x2 = LOAD32_LE(k + 4); + j3 = x3 = LOAD32_LE(k + 8); + j4 = x4 = LOAD32_LE(k + 12); + j11 = x11 = LOAD32_LE(k + 16); + j12 = x12 = LOAD32_LE(k + 20); + j13 = x13 = LOAD32_LE(k + 24); + j14 = x14 = LOAD32_LE(k + 28); - j6 = x6 = LOAD32_LE(in + 0); - j7 = x7 = LOAD32_LE(in + 4); - j8 = x8 = LOAD32_LE(in + 8); - j9 = x9 = LOAD32_LE(in + 12); + j6 = x6 = LOAD32_LE(in + 0); + j7 = x7 = LOAD32_LE(in + 4); + j8 = x8 = LOAD32_LE(in + 8); + j9 = x9 = LOAD32_LE(in + 12); - for (i = 0; i < rounds; i += 2) { - x4 ^= ROTL32(x0 + x12, 7); - x8 ^= ROTL32(x4 + x0, 9); - x12 ^= ROTL32(x8 + x4, 13); - x0 ^= ROTL32(x12 + x8, 18); - x9 ^= ROTL32(x5 + x1, 7); - x13 ^= ROTL32(x9 + x5, 9); - x1 ^= ROTL32(x13 + x9, 13); - x5 ^= ROTL32(x1 + x13, 18); - x14 ^= ROTL32(x10 + x6, 7); - x2 ^= ROTL32(x14 + x10, 9); - x6 ^= ROTL32(x2 + x14, 13); - x10 ^= ROTL32(x6 + x2, 18); - x3 ^= ROTL32(x15 + x11, 7); - x7 ^= ROTL32(x3 + x15, 9); - x11 ^= ROTL32(x7 + x3, 13); - x15 ^= ROTL32(x11 + x7, 18); - x1 ^= ROTL32(x0 + x3, 7); - x2 ^= ROTL32(x1 + x0, 9); - x3 ^= ROTL32(x2 + x1, 13); - x0 ^= ROTL32(x3 + x2, 18); - x6 ^= ROTL32(x5 + x4, 7); - x7 ^= ROTL32(x6 + x5, 9); - x4 ^= ROTL32(x7 + x6, 13); - x5 ^= ROTL32(x4 + x7, 18); - x11 ^= ROTL32(x10 + x9, 7); - x8 ^= ROTL32(x11 + x10, 9); - x9 ^= ROTL32(x8 + x11, 13); - x10 ^= ROTL32(x9 + x8, 18); - x12 ^= ROTL32(x15 + x14, 7); - x13 ^= ROTL32(x12 + x15, 9); - x14 ^= ROTL32(x13 + x12, 13); - x15 ^= ROTL32(x14 + x13, 18); - } - STORE32_LE(out + 0, x0 + j0); - STORE32_LE(out + 4, x1 + j1); - STORE32_LE(out + 8, x2 + j2); - STORE32_LE(out + 12, x3 + j3); - STORE32_LE(out + 16, x4 + j4); - STORE32_LE(out + 20, x5 + j5); - STORE32_LE(out + 24, x6 + j6); - STORE32_LE(out + 28, x7 + j7); - STORE32_LE(out + 32, x8 + j8); - STORE32_LE(out + 36, x9 + j9); - STORE32_LE(out + 40, x10 + j10); - STORE32_LE(out + 44, x11 + j11); - STORE32_LE(out + 48, x12 + j12); - STORE32_LE(out + 52, x13 + j13); - STORE32_LE(out + 56, x14 + j14); - STORE32_LE(out + 60, x15 + j15); + for(i = 0; i < rounds; i += 2) + { + x4 ^= ROTL32(x0 + x12, 7); + x8 ^= ROTL32(x4 + x0, 9); + x12 ^= ROTL32(x8 + x4, 13); + x0 ^= ROTL32(x12 + x8, 18); + x9 ^= ROTL32(x5 + x1, 7); + x13 ^= ROTL32(x9 + x5, 9); + x1 ^= ROTL32(x13 + x9, 13); + x5 ^= ROTL32(x1 + x13, 18); + x14 ^= ROTL32(x10 + x6, 7); + x2 ^= ROTL32(x14 + x10, 9); + x6 ^= ROTL32(x2 + x14, 13); + x10 ^= ROTL32(x6 + x2, 18); + x3 ^= ROTL32(x15 + x11, 7); + x7 ^= ROTL32(x3 + x15, 9); + x11 ^= ROTL32(x7 + x3, 13); + x15 ^= ROTL32(x11 + x7, 18); + x1 ^= ROTL32(x0 + x3, 7); + x2 ^= ROTL32(x1 + x0, 9); + x3 ^= ROTL32(x2 + x1, 13); + x0 ^= ROTL32(x3 + x2, 18); + x6 ^= ROTL32(x5 + x4, 7); + x7 ^= ROTL32(x6 + x5, 9); + x4 ^= ROTL32(x7 + x6, 13); + x5 ^= ROTL32(x4 + x7, 18); + x11 ^= ROTL32(x10 + x9, 7); + x8 ^= ROTL32(x11 + x10, 9); + x9 ^= ROTL32(x8 + x11, 13); + x10 ^= ROTL32(x9 + x8, 18); + x12 ^= ROTL32(x15 + x14, 7); + x13 ^= ROTL32(x12 + x15, 9); + x14 ^= ROTL32(x13 + x12, 13); + x15 ^= ROTL32(x14 + x13, 18); + } + STORE32_LE(out + 0, x0 + j0); + STORE32_LE(out + 4, x1 + j1); + STORE32_LE(out + 8, x2 + j2); + STORE32_LE(out + 12, x3 + j3); + STORE32_LE(out + 16, x4 + j4); + STORE32_LE(out + 20, x5 + j5); + STORE32_LE(out + 24, x6 + j6); + STORE32_LE(out + 28, x7 + j7); + STORE32_LE(out + 32, x8 + j8); + STORE32_LE(out + 36, x9 + j9); + STORE32_LE(out + 40, x10 + j10); + STORE32_LE(out + 44, x11 + j11); + STORE32_LE(out + 48, x12 + j12); + STORE32_LE(out + 52, x13 + j13); + STORE32_LE(out + 56, x14 + j14); + STORE32_LE(out + 60, x15 + j15); } int crypto_core_salsa20(unsigned char *out, const unsigned char *in, const unsigned char *k, const unsigned char *c) { - crypto_core_salsa(out, in, k, c, 20); - return 0; + crypto_core_salsa(out, in, k, c, 20); + return 0; } size_t crypto_core_salsa20_outputbytes(void) { - return crypto_core_salsa20_OUTPUTBYTES; + return crypto_core_salsa20_OUTPUTBYTES; } size_t crypto_core_salsa20_inputbytes(void) { - return crypto_core_salsa20_INPUTBYTES; + return crypto_core_salsa20_INPUTBYTES; } size_t crypto_core_salsa20_keybytes(void) { - return crypto_core_salsa20_KEYBYTES; + return crypto_core_salsa20_KEYBYTES; } size_t crypto_core_salsa20_constbytes(void) { - return crypto_core_salsa20_CONSTBYTES; + return crypto_core_salsa20_CONSTBYTES; } diff --git a/crypto/salsa20/ref/salsa20_ref.c b/crypto/salsa20/ref/salsa20_ref.c index 674b1d951..1d9b2fa73 100644 --- a/crypto/salsa20/ref/salsa20_ref.c +++ b/crypto/salsa20/ref/salsa20_ref.c @@ -13,7 +13,6 @@ Public domain. #include "../stream_salsa20.h" #include "salsa20_ref.h" - static int stream_ref(unsigned char *c, unsigned long long clen, const unsigned char *n, const unsigned char *k) @@ -132,4 +131,3 @@ struct crypto_stream_salsa20_implementation SODIUM_C99(.stream =) stream_ref, SODIUM_C99(.stream_xor_ic =) stream_ref_xor_ic, }; - diff --git a/crypto/salsa20/stream_salsa20.h b/crypto/salsa20/stream_salsa20.h index 1949d3811..99523d3a6 100644 --- a/crypto/salsa20/stream_salsa20.h +++ b/crypto/salsa20/stream_salsa20.h @@ -4,13 +4,13 @@ #include -typedef struct crypto_stream_salsa20_implementation { - int (*stream)(unsigned char *c, unsigned long long clen, - const unsigned char *n, const unsigned char *k); - int (*stream_xor_ic)(unsigned char *c, const unsigned char *m, - unsigned long long mlen, - const unsigned char *n, uint64_t ic, - const unsigned char *k); +typedef struct crypto_stream_salsa20_implementation +{ + int (*stream)(unsigned char *c, unsigned long long clen, + const unsigned char *n, const unsigned char *k); + int (*stream_xor_ic)(unsigned char *c, const unsigned char *m, + unsigned long long mlen, const unsigned char *n, + uint64_t ic, const unsigned char *k); } crypto_stream_salsa20_implementation; #endif diff --git a/crypto/salsa20/xmm6int/u0.h b/crypto/salsa20/xmm6int/u0.h index b2d416805..eaf6b8aa0 100644 --- a/crypto/salsa20/xmm6int/u0.h +++ b/crypto/salsa20/xmm6int/u0.h @@ -1,195 +1,199 @@ -if (bytes > 0) { - __m128i diag0 = _mm_loadu_si128((__m128i *) (x + 0)); - __m128i diag1 = _mm_loadu_si128((__m128i *) (x + 4)); - __m128i diag2 = _mm_loadu_si128((__m128i *) (x + 8)); - __m128i diag3 = _mm_loadu_si128((__m128i *) (x + 12)); - __m128i a0, a1, a2, a3, a4, a5, a6, a7; - __m128i b0, b1, b2, b3, b4, b5, b6, b7; - uint8_t partialblock[64]; - - unsigned int i; - - a0 = diag1; - for (i = 0; i < ROUNDS; i += 4) { - a0 = _mm_add_epi32(a0, diag0); - a1 = diag0; - b0 = a0; - a0 = _mm_slli_epi32(a0, 7); - b0 = _mm_srli_epi32(b0, 25); - diag3 = _mm_xor_si128(diag3, a0); - - diag3 = _mm_xor_si128(diag3, b0); - - a1 = _mm_add_epi32(a1, diag3); - a2 = diag3; - b1 = a1; - a1 = _mm_slli_epi32(a1, 9); - b1 = _mm_srli_epi32(b1, 23); - diag2 = _mm_xor_si128(diag2, a1); - diag3 = _mm_shuffle_epi32(diag3, 0x93); - diag2 = _mm_xor_si128(diag2, b1); - - a2 = _mm_add_epi32(a2, diag2); - a3 = diag2; - b2 = a2; - a2 = _mm_slli_epi32(a2, 13); - b2 = _mm_srli_epi32(b2, 19); - diag1 = _mm_xor_si128(diag1, a2); - diag2 = _mm_shuffle_epi32(diag2, 0x4e); - diag1 = _mm_xor_si128(diag1, b2); - - a3 = _mm_add_epi32(a3, diag1); - a4 = diag3; - b3 = a3; - a3 = _mm_slli_epi32(a3, 18); - b3 = _mm_srli_epi32(b3, 14); - diag0 = _mm_xor_si128(diag0, a3); - diag1 = _mm_shuffle_epi32(diag1, 0x39); - diag0 = _mm_xor_si128(diag0, b3); - - a4 = _mm_add_epi32(a4, diag0); - a5 = diag0; - b4 = a4; - a4 = _mm_slli_epi32(a4, 7); - b4 = _mm_srli_epi32(b4, 25); - diag1 = _mm_xor_si128(diag1, a4); - - diag1 = _mm_xor_si128(diag1, b4); - - a5 = _mm_add_epi32(a5, diag1); - a6 = diag1; - b5 = a5; - a5 = _mm_slli_epi32(a5, 9); - b5 = _mm_srli_epi32(b5, 23); - diag2 = _mm_xor_si128(diag2, a5); - diag1 = _mm_shuffle_epi32(diag1, 0x93); - diag2 = _mm_xor_si128(diag2, b5); - - a6 = _mm_add_epi32(a6, diag2); - a7 = diag2; - b6 = a6; - a6 = _mm_slli_epi32(a6, 13); - b6 = _mm_srli_epi32(b6, 19); - diag3 = _mm_xor_si128(diag3, a6); - diag2 = _mm_shuffle_epi32(diag2, 0x4e); - diag3 = _mm_xor_si128(diag3, b6); - - a7 = _mm_add_epi32(a7, diag3); - a0 = diag1; - b7 = a7; - a7 = _mm_slli_epi32(a7, 18); - b7 = _mm_srli_epi32(b7, 14); - diag0 = _mm_xor_si128(diag0, a7); - diag3 = _mm_shuffle_epi32(diag3, 0x39); - diag0 = _mm_xor_si128(diag0, b7); - - a0 = _mm_add_epi32(a0, diag0); - a1 = diag0; - b0 = a0; - a0 = _mm_slli_epi32(a0, 7); - b0 = _mm_srli_epi32(b0, 25); - diag3 = _mm_xor_si128(diag3, a0); - - diag3 = _mm_xor_si128(diag3, b0); - - a1 = _mm_add_epi32(a1, diag3); - a2 = diag3; - b1 = a1; - a1 = _mm_slli_epi32(a1, 9); - b1 = _mm_srli_epi32(b1, 23); - diag2 = _mm_xor_si128(diag2, a1); - diag3 = _mm_shuffle_epi32(diag3, 0x93); - diag2 = _mm_xor_si128(diag2, b1); - - a2 = _mm_add_epi32(a2, diag2); - a3 = diag2; - b2 = a2; - a2 = _mm_slli_epi32(a2, 13); - b2 = _mm_srli_epi32(b2, 19); - diag1 = _mm_xor_si128(diag1, a2); - diag2 = _mm_shuffle_epi32(diag2, 0x4e); - diag1 = _mm_xor_si128(diag1, b2); - - a3 = _mm_add_epi32(a3, diag1); - a4 = diag3; - b3 = a3; - a3 = _mm_slli_epi32(a3, 18); - b3 = _mm_srli_epi32(b3, 14); - diag0 = _mm_xor_si128(diag0, a3); - diag1 = _mm_shuffle_epi32(diag1, 0x39); - diag0 = _mm_xor_si128(diag0, b3); - - a4 = _mm_add_epi32(a4, diag0); - a5 = diag0; - b4 = a4; - a4 = _mm_slli_epi32(a4, 7); - b4 = _mm_srli_epi32(b4, 25); - diag1 = _mm_xor_si128(diag1, a4); - - diag1 = _mm_xor_si128(diag1, b4); - - a5 = _mm_add_epi32(a5, diag1); - a6 = diag1; - b5 = a5; - a5 = _mm_slli_epi32(a5, 9); - b5 = _mm_srli_epi32(b5, 23); - diag2 = _mm_xor_si128(diag2, a5); - diag1 = _mm_shuffle_epi32(diag1, 0x93); - diag2 = _mm_xor_si128(diag2, b5); - - a6 = _mm_add_epi32(a6, diag2); - a7 = diag2; - b6 = a6; - a6 = _mm_slli_epi32(a6, 13); - b6 = _mm_srli_epi32(b6, 19); - diag3 = _mm_xor_si128(diag3, a6); - diag2 = _mm_shuffle_epi32(diag2, 0x4e); - diag3 = _mm_xor_si128(diag3, b6); - - a7 = _mm_add_epi32(a7, diag3); - a0 = diag1; - b7 = a7; - a7 = _mm_slli_epi32(a7, 18); - b7 = _mm_srli_epi32(b7, 14); - diag0 = _mm_xor_si128(diag0, a7); - diag3 = _mm_shuffle_epi32(diag3, 0x39); - diag0 = _mm_xor_si128(diag0, b7); - } - - diag0 = _mm_add_epi32(diag0, _mm_loadu_si128((__m128i *) (x + 0))); - diag1 = _mm_add_epi32(diag1, _mm_loadu_si128((__m128i *) (x + 4))); - diag2 = _mm_add_epi32(diag2, _mm_loadu_si128((__m128i *) (x + 8))); - diag3 = _mm_add_epi32(diag3, _mm_loadu_si128((__m128i *) (x + 12))); - -#define ONEQUAD_SHUFFLE(A, B, C, D) \ - do { \ - uint32_t in##A = _mm_cvtsi128_si32(diag0); \ - uint32_t in##B = _mm_cvtsi128_si32(diag1); \ - uint32_t in##C = _mm_cvtsi128_si32(diag2); \ - uint32_t in##D = _mm_cvtsi128_si32(diag3); \ - diag0 = _mm_shuffle_epi32(diag0, 0x39); \ - diag1 = _mm_shuffle_epi32(diag1, 0x39); \ - diag2 = _mm_shuffle_epi32(diag2, 0x39); \ - diag3 = _mm_shuffle_epi32(diag3, 0x39); \ - *(uint32_t *) (partialblock + (A * 4)) = in##A; \ - *(uint32_t *) (partialblock + (B * 4)) = in##B; \ - *(uint32_t *) (partialblock + (C * 4)) = in##C; \ - *(uint32_t *) (partialblock + (D * 4)) = in##D; \ - } while (0) +if(bytes > 0) +{ + __m128i diag0 = _mm_loadu_si128((__m128i *)(x + 0)); + __m128i diag1 = _mm_loadu_si128((__m128i *)(x + 4)); + __m128i diag2 = _mm_loadu_si128((__m128i *)(x + 8)); + __m128i diag3 = _mm_loadu_si128((__m128i *)(x + 12)); + __m128i a0, a1, a2, a3, a4, a5, a6, a7; + __m128i b0, b1, b2, b3, b4, b5, b6, b7; + uint8_t partialblock[64]; + + unsigned int i; + + a0 = diag1; + for(i = 0; i < ROUNDS; i += 4) + { + a0 = _mm_add_epi32(a0, diag0); + a1 = diag0; + b0 = a0; + a0 = _mm_slli_epi32(a0, 7); + b0 = _mm_srli_epi32(b0, 25); + diag3 = _mm_xor_si128(diag3, a0); + + diag3 = _mm_xor_si128(diag3, b0); + + a1 = _mm_add_epi32(a1, diag3); + a2 = diag3; + b1 = a1; + a1 = _mm_slli_epi32(a1, 9); + b1 = _mm_srli_epi32(b1, 23); + diag2 = _mm_xor_si128(diag2, a1); + diag3 = _mm_shuffle_epi32(diag3, 0x93); + diag2 = _mm_xor_si128(diag2, b1); + + a2 = _mm_add_epi32(a2, diag2); + a3 = diag2; + b2 = a2; + a2 = _mm_slli_epi32(a2, 13); + b2 = _mm_srli_epi32(b2, 19); + diag1 = _mm_xor_si128(diag1, a2); + diag2 = _mm_shuffle_epi32(diag2, 0x4e); + diag1 = _mm_xor_si128(diag1, b2); + + a3 = _mm_add_epi32(a3, diag1); + a4 = diag3; + b3 = a3; + a3 = _mm_slli_epi32(a3, 18); + b3 = _mm_srli_epi32(b3, 14); + diag0 = _mm_xor_si128(diag0, a3); + diag1 = _mm_shuffle_epi32(diag1, 0x39); + diag0 = _mm_xor_si128(diag0, b3); + + a4 = _mm_add_epi32(a4, diag0); + a5 = diag0; + b4 = a4; + a4 = _mm_slli_epi32(a4, 7); + b4 = _mm_srli_epi32(b4, 25); + diag1 = _mm_xor_si128(diag1, a4); + + diag1 = _mm_xor_si128(diag1, b4); + + a5 = _mm_add_epi32(a5, diag1); + a6 = diag1; + b5 = a5; + a5 = _mm_slli_epi32(a5, 9); + b5 = _mm_srli_epi32(b5, 23); + diag2 = _mm_xor_si128(diag2, a5); + diag1 = _mm_shuffle_epi32(diag1, 0x93); + diag2 = _mm_xor_si128(diag2, b5); + + a6 = _mm_add_epi32(a6, diag2); + a7 = diag2; + b6 = a6; + a6 = _mm_slli_epi32(a6, 13); + b6 = _mm_srli_epi32(b6, 19); + diag3 = _mm_xor_si128(diag3, a6); + diag2 = _mm_shuffle_epi32(diag2, 0x4e); + diag3 = _mm_xor_si128(diag3, b6); + + a7 = _mm_add_epi32(a7, diag3); + a0 = diag1; + b7 = a7; + a7 = _mm_slli_epi32(a7, 18); + b7 = _mm_srli_epi32(b7, 14); + diag0 = _mm_xor_si128(diag0, a7); + diag3 = _mm_shuffle_epi32(diag3, 0x39); + diag0 = _mm_xor_si128(diag0, b7); + + a0 = _mm_add_epi32(a0, diag0); + a1 = diag0; + b0 = a0; + a0 = _mm_slli_epi32(a0, 7); + b0 = _mm_srli_epi32(b0, 25); + diag3 = _mm_xor_si128(diag3, a0); + + diag3 = _mm_xor_si128(diag3, b0); + + a1 = _mm_add_epi32(a1, diag3); + a2 = diag3; + b1 = a1; + a1 = _mm_slli_epi32(a1, 9); + b1 = _mm_srli_epi32(b1, 23); + diag2 = _mm_xor_si128(diag2, a1); + diag3 = _mm_shuffle_epi32(diag3, 0x93); + diag2 = _mm_xor_si128(diag2, b1); + + a2 = _mm_add_epi32(a2, diag2); + a3 = diag2; + b2 = a2; + a2 = _mm_slli_epi32(a2, 13); + b2 = _mm_srli_epi32(b2, 19); + diag1 = _mm_xor_si128(diag1, a2); + diag2 = _mm_shuffle_epi32(diag2, 0x4e); + diag1 = _mm_xor_si128(diag1, b2); + + a3 = _mm_add_epi32(a3, diag1); + a4 = diag3; + b3 = a3; + a3 = _mm_slli_epi32(a3, 18); + b3 = _mm_srli_epi32(b3, 14); + diag0 = _mm_xor_si128(diag0, a3); + diag1 = _mm_shuffle_epi32(diag1, 0x39); + diag0 = _mm_xor_si128(diag0, b3); + + a4 = _mm_add_epi32(a4, diag0); + a5 = diag0; + b4 = a4; + a4 = _mm_slli_epi32(a4, 7); + b4 = _mm_srli_epi32(b4, 25); + diag1 = _mm_xor_si128(diag1, a4); + + diag1 = _mm_xor_si128(diag1, b4); + + a5 = _mm_add_epi32(a5, diag1); + a6 = diag1; + b5 = a5; + a5 = _mm_slli_epi32(a5, 9); + b5 = _mm_srli_epi32(b5, 23); + diag2 = _mm_xor_si128(diag2, a5); + diag1 = _mm_shuffle_epi32(diag1, 0x93); + diag2 = _mm_xor_si128(diag2, b5); + + a6 = _mm_add_epi32(a6, diag2); + a7 = diag2; + b6 = a6; + a6 = _mm_slli_epi32(a6, 13); + b6 = _mm_srli_epi32(b6, 19); + diag3 = _mm_xor_si128(diag3, a6); + diag2 = _mm_shuffle_epi32(diag2, 0x4e); + diag3 = _mm_xor_si128(diag3, b6); + + a7 = _mm_add_epi32(a7, diag3); + a0 = diag1; + b7 = a7; + a7 = _mm_slli_epi32(a7, 18); + b7 = _mm_srli_epi32(b7, 14); + diag0 = _mm_xor_si128(diag0, a7); + diag3 = _mm_shuffle_epi32(diag3, 0x39); + diag0 = _mm_xor_si128(diag0, b7); + } + + diag0 = _mm_add_epi32(diag0, _mm_loadu_si128((__m128i *)(x + 0))); + diag1 = _mm_add_epi32(diag1, _mm_loadu_si128((__m128i *)(x + 4))); + diag2 = _mm_add_epi32(diag2, _mm_loadu_si128((__m128i *)(x + 8))); + diag3 = _mm_add_epi32(diag3, _mm_loadu_si128((__m128i *)(x + 12))); + +#define ONEQUAD_SHUFFLE(A, B, C, D) \ + do \ + { \ + uint32_t in##A = _mm_cvtsi128_si32(diag0); \ + uint32_t in##B = _mm_cvtsi128_si32(diag1); \ + uint32_t in##C = _mm_cvtsi128_si32(diag2); \ + uint32_t in##D = _mm_cvtsi128_si32(diag3); \ + diag0 = _mm_shuffle_epi32(diag0, 0x39); \ + diag1 = _mm_shuffle_epi32(diag1, 0x39); \ + diag2 = _mm_shuffle_epi32(diag2, 0x39); \ + diag3 = _mm_shuffle_epi32(diag3, 0x39); \ + *(uint32_t *)(partialblock + (A * 4)) = in##A; \ + *(uint32_t *)(partialblock + (B * 4)) = in##B; \ + *(uint32_t *)(partialblock + (C * 4)) = in##C; \ + *(uint32_t *)(partialblock + (D * 4)) = in##D; \ + } while(0) #define ONEQUAD(A, B, C, D) ONEQUAD_SHUFFLE(A, B, C, D) - ONEQUAD(0, 12, 8, 4); - ONEQUAD(5, 1, 13, 9); - ONEQUAD(10, 6, 2, 14); - ONEQUAD(15, 11, 7, 3); + ONEQUAD(0, 12, 8, 4); + ONEQUAD(5, 1, 13, 9); + ONEQUAD(10, 6, 2, 14); + ONEQUAD(15, 11, 7, 3); #undef ONEQUAD #undef ONEQUAD_SHUFFLE - for (i = 0; i < bytes; i++) { - c[i] = m[i] ^ partialblock[i]; - } + for(i = 0; i < bytes; i++) + { + c[i] = m[i] ^ partialblock[i]; + } - sodium_memzero(partialblock, sizeof partialblock); + sodium_memzero(partialblock, sizeof partialblock); } diff --git a/crypto/salsa20/xmm6int/u1.h b/crypto/salsa20/xmm6int/u1.h index c245d9565..235891a82 100644 --- a/crypto/salsa20/xmm6int/u1.h +++ b/crypto/salsa20/xmm6int/u1.h @@ -1,207 +1,211 @@ -while (bytes >= 64) { - __m128i diag0 = _mm_loadu_si128((__m128i *) (x + 0)); - __m128i diag1 = _mm_loadu_si128((__m128i *) (x + 4)); - __m128i diag2 = _mm_loadu_si128((__m128i *) (x + 8)); - __m128i diag3 = _mm_loadu_si128((__m128i *) (x + 12)); - __m128i a0, a1, a2, a3, a4, a5, a6, a7; - __m128i b0, b1, b2, b3, b4, b5, b6, b7; - - uint32_t in8; - uint32_t in9; - int i; - - a0 = diag1; - for (i = 0; i < ROUNDS; i += 4) { - a0 = _mm_add_epi32(a0, diag0); - a1 = diag0; - b0 = a0; - a0 = _mm_slli_epi32(a0, 7); - b0 = _mm_srli_epi32(b0, 25); - diag3 = _mm_xor_si128(diag3, a0); - - diag3 = _mm_xor_si128(diag3, b0); - - a1 = _mm_add_epi32(a1, diag3); - a2 = diag3; - b1 = a1; - a1 = _mm_slli_epi32(a1, 9); - b1 = _mm_srli_epi32(b1, 23); - diag2 = _mm_xor_si128(diag2, a1); - diag3 = _mm_shuffle_epi32(diag3, 0x93); - diag2 = _mm_xor_si128(diag2, b1); - - a2 = _mm_add_epi32(a2, diag2); - a3 = diag2; - b2 = a2; - a2 = _mm_slli_epi32(a2, 13); - b2 = _mm_srli_epi32(b2, 19); - diag1 = _mm_xor_si128(diag1, a2); - diag2 = _mm_shuffle_epi32(diag2, 0x4e); - diag1 = _mm_xor_si128(diag1, b2); - - a3 = _mm_add_epi32(a3, diag1); - a4 = diag3; - b3 = a3; - a3 = _mm_slli_epi32(a3, 18); - b3 = _mm_srli_epi32(b3, 14); - diag0 = _mm_xor_si128(diag0, a3); - diag1 = _mm_shuffle_epi32(diag1, 0x39); - diag0 = _mm_xor_si128(diag0, b3); - - a4 = _mm_add_epi32(a4, diag0); - a5 = diag0; - b4 = a4; - a4 = _mm_slli_epi32(a4, 7); - b4 = _mm_srli_epi32(b4, 25); - diag1 = _mm_xor_si128(diag1, a4); - - diag1 = _mm_xor_si128(diag1, b4); - - a5 = _mm_add_epi32(a5, diag1); - a6 = diag1; - b5 = a5; - a5 = _mm_slli_epi32(a5, 9); - b5 = _mm_srli_epi32(b5, 23); - diag2 = _mm_xor_si128(diag2, a5); - diag1 = _mm_shuffle_epi32(diag1, 0x93); - diag2 = _mm_xor_si128(diag2, b5); - - a6 = _mm_add_epi32(a6, diag2); - a7 = diag2; - b6 = a6; - a6 = _mm_slli_epi32(a6, 13); - b6 = _mm_srli_epi32(b6, 19); - diag3 = _mm_xor_si128(diag3, a6); - diag2 = _mm_shuffle_epi32(diag2, 0x4e); - diag3 = _mm_xor_si128(diag3, b6); - - a7 = _mm_add_epi32(a7, diag3); - a0 = diag1; - b7 = a7; - a7 = _mm_slli_epi32(a7, 18); - b7 = _mm_srli_epi32(b7, 14); - diag0 = _mm_xor_si128(diag0, a7); - diag3 = _mm_shuffle_epi32(diag3, 0x39); - diag0 = _mm_xor_si128(diag0, b7); - - a0 = _mm_add_epi32(a0, diag0); - a1 = diag0; - b0 = a0; - a0 = _mm_slli_epi32(a0, 7); - b0 = _mm_srli_epi32(b0, 25); - diag3 = _mm_xor_si128(diag3, a0); - - diag3 = _mm_xor_si128(diag3, b0); - - a1 = _mm_add_epi32(a1, diag3); - a2 = diag3; - b1 = a1; - a1 = _mm_slli_epi32(a1, 9); - b1 = _mm_srli_epi32(b1, 23); - diag2 = _mm_xor_si128(diag2, a1); - diag3 = _mm_shuffle_epi32(diag3, 0x93); - diag2 = _mm_xor_si128(diag2, b1); - - a2 = _mm_add_epi32(a2, diag2); - a3 = diag2; - b2 = a2; - a2 = _mm_slli_epi32(a2, 13); - b2 = _mm_srli_epi32(b2, 19); - diag1 = _mm_xor_si128(diag1, a2); - diag2 = _mm_shuffle_epi32(diag2, 0x4e); - diag1 = _mm_xor_si128(diag1, b2); - - a3 = _mm_add_epi32(a3, diag1); - a4 = diag3; - b3 = a3; - a3 = _mm_slli_epi32(a3, 18); - b3 = _mm_srli_epi32(b3, 14); - diag0 = _mm_xor_si128(diag0, a3); - diag1 = _mm_shuffle_epi32(diag1, 0x39); - diag0 = _mm_xor_si128(diag0, b3); - - a4 = _mm_add_epi32(a4, diag0); - a5 = diag0; - b4 = a4; - a4 = _mm_slli_epi32(a4, 7); - b4 = _mm_srli_epi32(b4, 25); - diag1 = _mm_xor_si128(diag1, a4); - - diag1 = _mm_xor_si128(diag1, b4); - - a5 = _mm_add_epi32(a5, diag1); - a6 = diag1; - b5 = a5; - a5 = _mm_slli_epi32(a5, 9); - b5 = _mm_srli_epi32(b5, 23); - diag2 = _mm_xor_si128(diag2, a5); - diag1 = _mm_shuffle_epi32(diag1, 0x93); - diag2 = _mm_xor_si128(diag2, b5); - - a6 = _mm_add_epi32(a6, diag2); - a7 = diag2; - b6 = a6; - a6 = _mm_slli_epi32(a6, 13); - b6 = _mm_srli_epi32(b6, 19); - diag3 = _mm_xor_si128(diag3, a6); - diag2 = _mm_shuffle_epi32(diag2, 0x4e); - diag3 = _mm_xor_si128(diag3, b6); - - a7 = _mm_add_epi32(a7, diag3); - a0 = diag1; - b7 = a7; - a7 = _mm_slli_epi32(a7, 18); - b7 = _mm_srli_epi32(b7, 14); - diag0 = _mm_xor_si128(diag0, a7); - diag3 = _mm_shuffle_epi32(diag3, 0x39); - diag0 = _mm_xor_si128(diag0, b7); - } - - diag0 = _mm_add_epi32(diag0, _mm_loadu_si128((__m128i *) (x + 0))); - diag1 = _mm_add_epi32(diag1, _mm_loadu_si128((__m128i *) (x + 4))); - diag2 = _mm_add_epi32(diag2, _mm_loadu_si128((__m128i *) (x + 8))); - diag3 = _mm_add_epi32(diag3, _mm_loadu_si128((__m128i *) (x + 12))); - -#define ONEQUAD_SHUFFLE(A, B, C, D) \ - do { \ - uint32_t in##A = _mm_cvtsi128_si32(diag0); \ - uint32_t in##B = _mm_cvtsi128_si32(diag1); \ - uint32_t in##C = _mm_cvtsi128_si32(diag2); \ - uint32_t in##D = _mm_cvtsi128_si32(diag3); \ - diag0 = _mm_shuffle_epi32(diag0, 0x39); \ - diag1 = _mm_shuffle_epi32(diag1, 0x39); \ - diag2 = _mm_shuffle_epi32(diag2, 0x39); \ - diag3 = _mm_shuffle_epi32(diag3, 0x39); \ - in##A ^= *(uint32_t *) (m + (A * 4)); \ - in##B ^= *(uint32_t *) (m + (B * 4)); \ - in##C ^= *(uint32_t *) (m + (C * 4)); \ - in##D ^= *(uint32_t *) (m + (D * 4)); \ - *(uint32_t *) (c + (A * 4)) = in##A; \ - *(uint32_t *) (c + (B * 4)) = in##B; \ - *(uint32_t *) (c + (C * 4)) = in##C; \ - *(uint32_t *) (c + (D * 4)) = in##D; \ - } while (0) +while(bytes >= 64) +{ + __m128i diag0 = _mm_loadu_si128((__m128i *)(x + 0)); + __m128i diag1 = _mm_loadu_si128((__m128i *)(x + 4)); + __m128i diag2 = _mm_loadu_si128((__m128i *)(x + 8)); + __m128i diag3 = _mm_loadu_si128((__m128i *)(x + 12)); + __m128i a0, a1, a2, a3, a4, a5, a6, a7; + __m128i b0, b1, b2, b3, b4, b5, b6, b7; + + uint32_t in8; + uint32_t in9; + int i; + + a0 = diag1; + for(i = 0; i < ROUNDS; i += 4) + { + a0 = _mm_add_epi32(a0, diag0); + a1 = diag0; + b0 = a0; + a0 = _mm_slli_epi32(a0, 7); + b0 = _mm_srli_epi32(b0, 25); + diag3 = _mm_xor_si128(diag3, a0); + + diag3 = _mm_xor_si128(diag3, b0); + + a1 = _mm_add_epi32(a1, diag3); + a2 = diag3; + b1 = a1; + a1 = _mm_slli_epi32(a1, 9); + b1 = _mm_srli_epi32(b1, 23); + diag2 = _mm_xor_si128(diag2, a1); + diag3 = _mm_shuffle_epi32(diag3, 0x93); + diag2 = _mm_xor_si128(diag2, b1); + + a2 = _mm_add_epi32(a2, diag2); + a3 = diag2; + b2 = a2; + a2 = _mm_slli_epi32(a2, 13); + b2 = _mm_srli_epi32(b2, 19); + diag1 = _mm_xor_si128(diag1, a2); + diag2 = _mm_shuffle_epi32(diag2, 0x4e); + diag1 = _mm_xor_si128(diag1, b2); + + a3 = _mm_add_epi32(a3, diag1); + a4 = diag3; + b3 = a3; + a3 = _mm_slli_epi32(a3, 18); + b3 = _mm_srli_epi32(b3, 14); + diag0 = _mm_xor_si128(diag0, a3); + diag1 = _mm_shuffle_epi32(diag1, 0x39); + diag0 = _mm_xor_si128(diag0, b3); + + a4 = _mm_add_epi32(a4, diag0); + a5 = diag0; + b4 = a4; + a4 = _mm_slli_epi32(a4, 7); + b4 = _mm_srli_epi32(b4, 25); + diag1 = _mm_xor_si128(diag1, a4); + + diag1 = _mm_xor_si128(diag1, b4); + + a5 = _mm_add_epi32(a5, diag1); + a6 = diag1; + b5 = a5; + a5 = _mm_slli_epi32(a5, 9); + b5 = _mm_srli_epi32(b5, 23); + diag2 = _mm_xor_si128(diag2, a5); + diag1 = _mm_shuffle_epi32(diag1, 0x93); + diag2 = _mm_xor_si128(diag2, b5); + + a6 = _mm_add_epi32(a6, diag2); + a7 = diag2; + b6 = a6; + a6 = _mm_slli_epi32(a6, 13); + b6 = _mm_srli_epi32(b6, 19); + diag3 = _mm_xor_si128(diag3, a6); + diag2 = _mm_shuffle_epi32(diag2, 0x4e); + diag3 = _mm_xor_si128(diag3, b6); + + a7 = _mm_add_epi32(a7, diag3); + a0 = diag1; + b7 = a7; + a7 = _mm_slli_epi32(a7, 18); + b7 = _mm_srli_epi32(b7, 14); + diag0 = _mm_xor_si128(diag0, a7); + diag3 = _mm_shuffle_epi32(diag3, 0x39); + diag0 = _mm_xor_si128(diag0, b7); + + a0 = _mm_add_epi32(a0, diag0); + a1 = diag0; + b0 = a0; + a0 = _mm_slli_epi32(a0, 7); + b0 = _mm_srli_epi32(b0, 25); + diag3 = _mm_xor_si128(diag3, a0); + + diag3 = _mm_xor_si128(diag3, b0); + + a1 = _mm_add_epi32(a1, diag3); + a2 = diag3; + b1 = a1; + a1 = _mm_slli_epi32(a1, 9); + b1 = _mm_srli_epi32(b1, 23); + diag2 = _mm_xor_si128(diag2, a1); + diag3 = _mm_shuffle_epi32(diag3, 0x93); + diag2 = _mm_xor_si128(diag2, b1); + + a2 = _mm_add_epi32(a2, diag2); + a3 = diag2; + b2 = a2; + a2 = _mm_slli_epi32(a2, 13); + b2 = _mm_srli_epi32(b2, 19); + diag1 = _mm_xor_si128(diag1, a2); + diag2 = _mm_shuffle_epi32(diag2, 0x4e); + diag1 = _mm_xor_si128(diag1, b2); + + a3 = _mm_add_epi32(a3, diag1); + a4 = diag3; + b3 = a3; + a3 = _mm_slli_epi32(a3, 18); + b3 = _mm_srli_epi32(b3, 14); + diag0 = _mm_xor_si128(diag0, a3); + diag1 = _mm_shuffle_epi32(diag1, 0x39); + diag0 = _mm_xor_si128(diag0, b3); + + a4 = _mm_add_epi32(a4, diag0); + a5 = diag0; + b4 = a4; + a4 = _mm_slli_epi32(a4, 7); + b4 = _mm_srli_epi32(b4, 25); + diag1 = _mm_xor_si128(diag1, a4); + + diag1 = _mm_xor_si128(diag1, b4); + + a5 = _mm_add_epi32(a5, diag1); + a6 = diag1; + b5 = a5; + a5 = _mm_slli_epi32(a5, 9); + b5 = _mm_srli_epi32(b5, 23); + diag2 = _mm_xor_si128(diag2, a5); + diag1 = _mm_shuffle_epi32(diag1, 0x93); + diag2 = _mm_xor_si128(diag2, b5); + + a6 = _mm_add_epi32(a6, diag2); + a7 = diag2; + b6 = a6; + a6 = _mm_slli_epi32(a6, 13); + b6 = _mm_srli_epi32(b6, 19); + diag3 = _mm_xor_si128(diag3, a6); + diag2 = _mm_shuffle_epi32(diag2, 0x4e); + diag3 = _mm_xor_si128(diag3, b6); + + a7 = _mm_add_epi32(a7, diag3); + a0 = diag1; + b7 = a7; + a7 = _mm_slli_epi32(a7, 18); + b7 = _mm_srli_epi32(b7, 14); + diag0 = _mm_xor_si128(diag0, a7); + diag3 = _mm_shuffle_epi32(diag3, 0x39); + diag0 = _mm_xor_si128(diag0, b7); + } + + diag0 = _mm_add_epi32(diag0, _mm_loadu_si128((__m128i *)(x + 0))); + diag1 = _mm_add_epi32(diag1, _mm_loadu_si128((__m128i *)(x + 4))); + diag2 = _mm_add_epi32(diag2, _mm_loadu_si128((__m128i *)(x + 8))); + diag3 = _mm_add_epi32(diag3, _mm_loadu_si128((__m128i *)(x + 12))); + +#define ONEQUAD_SHUFFLE(A, B, C, D) \ + do \ + { \ + uint32_t in##A = _mm_cvtsi128_si32(diag0); \ + uint32_t in##B = _mm_cvtsi128_si32(diag1); \ + uint32_t in##C = _mm_cvtsi128_si32(diag2); \ + uint32_t in##D = _mm_cvtsi128_si32(diag3); \ + diag0 = _mm_shuffle_epi32(diag0, 0x39); \ + diag1 = _mm_shuffle_epi32(diag1, 0x39); \ + diag2 = _mm_shuffle_epi32(diag2, 0x39); \ + diag3 = _mm_shuffle_epi32(diag3, 0x39); \ + in##A ^= *(uint32_t *)(m + (A * 4)); \ + in##B ^= *(uint32_t *)(m + (B * 4)); \ + in##C ^= *(uint32_t *)(m + (C * 4)); \ + in##D ^= *(uint32_t *)(m + (D * 4)); \ + *(uint32_t *)(c + (A * 4)) = in##A; \ + *(uint32_t *)(c + (B * 4)) = in##B; \ + *(uint32_t *)(c + (C * 4)) = in##C; \ + *(uint32_t *)(c + (D * 4)) = in##D; \ + } while(0) #define ONEQUAD(A, B, C, D) ONEQUAD_SHUFFLE(A, B, C, D) - ONEQUAD(0, 12, 8, 4); - ONEQUAD(5, 1, 13, 9); - ONEQUAD(10, 6, 2, 14); - ONEQUAD(15, 11, 7, 3); + ONEQUAD(0, 12, 8, 4); + ONEQUAD(5, 1, 13, 9); + ONEQUAD(10, 6, 2, 14); + ONEQUAD(15, 11, 7, 3); #undef ONEQUAD #undef ONEQUAD_SHUFFLE - in8 = x[8]; - in9 = x[13]; - in8++; - if (in8 == 0) { - in9++; - } - x[8] = in8; - x[13] = in9; - - c += 64; - m += 64; - bytes -= 64; + in8 = x[8]; + in9 = x[13]; + in8++; + if(in8 == 0) + { + in9++; + } + x[8] = in8; + x[13] = in9; + + c += 64; + m += 64; + bytes -= 64; } diff --git a/crypto/salsa20/xmm6int/u4.h b/crypto/salsa20/xmm6int/u4.h index 61d935fc9..dcff7457d 100644 --- a/crypto/salsa20/xmm6int/u4.h +++ b/crypto/salsa20/xmm6int/u4.h @@ -1,547 +1,548 @@ -if (bytes >= 256) { - __m128i y0, y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12, y13, y14, - y15; - __m128i z0, z1, z2, z3, z4, z5, z6, z7, z8, z9, z10, z11, z12, z13, z14, - z15; - __m128i orig0, orig1, orig2, orig3, orig4, orig5, orig6, orig7, orig8, - orig9, orig10, orig11, orig12, orig13, orig14, orig15; - - uint32_t in8; - uint32_t in9; - int i; - - /* element broadcast immediate for _mm_shuffle_epi32 are in order: - 0x00, 0x55, 0xaa, 0xff */ - z0 = _mm_loadu_si128((__m128i *) (x + 0)); - z5 = _mm_shuffle_epi32(z0, 0x55); - z10 = _mm_shuffle_epi32(z0, 0xaa); - z15 = _mm_shuffle_epi32(z0, 0xff); - z0 = _mm_shuffle_epi32(z0, 0x00); - z1 = _mm_loadu_si128((__m128i *) (x + 4)); - z6 = _mm_shuffle_epi32(z1, 0xaa); - z11 = _mm_shuffle_epi32(z1, 0xff); - z12 = _mm_shuffle_epi32(z1, 0x00); - z1 = _mm_shuffle_epi32(z1, 0x55); - z2 = _mm_loadu_si128((__m128i *) (x + 8)); - z7 = _mm_shuffle_epi32(z2, 0xff); - z13 = _mm_shuffle_epi32(z2, 0x55); - z2 = _mm_shuffle_epi32(z2, 0xaa); - /* no z8 -> first half of the nonce, will fill later */ - z3 = _mm_loadu_si128((__m128i *) (x + 12)); - z4 = _mm_shuffle_epi32(z3, 0x00); - z14 = _mm_shuffle_epi32(z3, 0xaa); - z3 = _mm_shuffle_epi32(z3, 0xff); - /* no z9 -> second half of the nonce, will fill later */ - orig0 = z0; - orig1 = z1; - orig2 = z2; - orig3 = z3; - orig4 = z4; - orig5 = z5; - orig6 = z6; - orig7 = z7; - orig10 = z10; - orig11 = z11; - orig12 = z12; - orig13 = z13; - orig14 = z14; - orig15 = z15; - - while (bytes >= 256) { - /* vector implementation for z8 and z9 */ - /* not sure if it helps for only 4 blocks */ - const __m128i addv8 = _mm_set_epi64x(1, 0); - const __m128i addv9 = _mm_set_epi64x(3, 2); - __m128i t8, t9; - uint64_t in89; - - in8 = x[8]; - in9 = x[13]; - in89 = ((uint64_t) in8) | (((uint64_t) in9) << 32); - t8 = _mm_set1_epi64x(in89); - t9 = _mm_set1_epi64x(in89); - - z8 = _mm_add_epi64(addv8, t8); - z9 = _mm_add_epi64(addv9, t9); - - t8 = _mm_unpacklo_epi32(z8, z9); - t9 = _mm_unpackhi_epi32(z8, z9); - - z8 = _mm_unpacklo_epi32(t8, t9); - z9 = _mm_unpackhi_epi32(t8, t9); - - orig8 = z8; - orig9 = z9; - - in89 += 4; - - x[8] = in89 & 0xFFFFFFFF; - x[13] = (in89 >> 32) & 0xFFFFFFFF; - - z5 = orig5; - z10 = orig10; - z15 = orig15; - z14 = orig14; - z3 = orig3; - z6 = orig6; - z11 = orig11; - z1 = orig1; - - z7 = orig7; - z13 = orig13; - z2 = orig2; - z9 = orig9; - z0 = orig0; - z12 = orig12; - z4 = orig4; - z8 = orig8; - - for (i = 0; i < ROUNDS; i += 2) { - /* the inner loop is a direct translation (regexp search/replace) - * from the amd64-xmm6 ASM */ - __m128i r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, - r14, r15; - - y4 = z12; - y4 = _mm_add_epi32(y4, z0); - r4 = y4; - y4 = _mm_slli_epi32(y4, 7); - z4 = _mm_xor_si128(z4, y4); - r4 = _mm_srli_epi32(r4, 25); - z4 = _mm_xor_si128(z4, r4); - - y9 = z1; - y9 = _mm_add_epi32(y9, z5); - r9 = y9; - y9 = _mm_slli_epi32(y9, 7); - z9 = _mm_xor_si128(z9, y9); - r9 = _mm_srli_epi32(r9, 25); - z9 = _mm_xor_si128(z9, r9); - - y8 = z0; - y8 = _mm_add_epi32(y8, z4); - r8 = y8; - y8 = _mm_slli_epi32(y8, 9); - z8 = _mm_xor_si128(z8, y8); - r8 = _mm_srli_epi32(r8, 23); - z8 = _mm_xor_si128(z8, r8); - - y13 = z5; - y13 = _mm_add_epi32(y13, z9); - r13 = y13; - y13 = _mm_slli_epi32(y13, 9); - z13 = _mm_xor_si128(z13, y13); - r13 = _mm_srli_epi32(r13, 23); - z13 = _mm_xor_si128(z13, r13); - - y12 = z4; - y12 = _mm_add_epi32(y12, z8); - r12 = y12; - y12 = _mm_slli_epi32(y12, 13); - z12 = _mm_xor_si128(z12, y12); - r12 = _mm_srli_epi32(r12, 19); - z12 = _mm_xor_si128(z12, r12); - - y1 = z9; - y1 = _mm_add_epi32(y1, z13); - r1 = y1; - y1 = _mm_slli_epi32(y1, 13); - z1 = _mm_xor_si128(z1, y1); - r1 = _mm_srli_epi32(r1, 19); - z1 = _mm_xor_si128(z1, r1); - - y0 = z8; - y0 = _mm_add_epi32(y0, z12); - r0 = y0; - y0 = _mm_slli_epi32(y0, 18); - z0 = _mm_xor_si128(z0, y0); - r0 = _mm_srli_epi32(r0, 14); - z0 = _mm_xor_si128(z0, r0); - - y5 = z13; - y5 = _mm_add_epi32(y5, z1); - r5 = y5; - y5 = _mm_slli_epi32(y5, 18); - z5 = _mm_xor_si128(z5, y5); - r5 = _mm_srli_epi32(r5, 14); - z5 = _mm_xor_si128(z5, r5); - - y14 = z6; - y14 = _mm_add_epi32(y14, z10); - r14 = y14; - y14 = _mm_slli_epi32(y14, 7); - z14 = _mm_xor_si128(z14, y14); - r14 = _mm_srli_epi32(r14, 25); - z14 = _mm_xor_si128(z14, r14); - - y3 = z11; - y3 = _mm_add_epi32(y3, z15); - r3 = y3; - y3 = _mm_slli_epi32(y3, 7); - z3 = _mm_xor_si128(z3, y3); - r3 = _mm_srli_epi32(r3, 25); - z3 = _mm_xor_si128(z3, r3); - - y2 = z10; - y2 = _mm_add_epi32(y2, z14); - r2 = y2; - y2 = _mm_slli_epi32(y2, 9); - z2 = _mm_xor_si128(z2, y2); - r2 = _mm_srli_epi32(r2, 23); - z2 = _mm_xor_si128(z2, r2); - - y7 = z15; - y7 = _mm_add_epi32(y7, z3); - r7 = y7; - y7 = _mm_slli_epi32(y7, 9); - z7 = _mm_xor_si128(z7, y7); - r7 = _mm_srli_epi32(r7, 23); - z7 = _mm_xor_si128(z7, r7); - - y6 = z14; - y6 = _mm_add_epi32(y6, z2); - r6 = y6; - y6 = _mm_slli_epi32(y6, 13); - z6 = _mm_xor_si128(z6, y6); - r6 = _mm_srli_epi32(r6, 19); - z6 = _mm_xor_si128(z6, r6); - - y11 = z3; - y11 = _mm_add_epi32(y11, z7); - r11 = y11; - y11 = _mm_slli_epi32(y11, 13); - z11 = _mm_xor_si128(z11, y11); - r11 = _mm_srli_epi32(r11, 19); - z11 = _mm_xor_si128(z11, r11); - - y10 = z2; - y10 = _mm_add_epi32(y10, z6); - r10 = y10; - y10 = _mm_slli_epi32(y10, 18); - z10 = _mm_xor_si128(z10, y10); - r10 = _mm_srli_epi32(r10, 14); - z10 = _mm_xor_si128(z10, r10); - - y1 = z3; - y1 = _mm_add_epi32(y1, z0); - r1 = y1; - y1 = _mm_slli_epi32(y1, 7); - z1 = _mm_xor_si128(z1, y1); - r1 = _mm_srli_epi32(r1, 25); - z1 = _mm_xor_si128(z1, r1); - - y15 = z7; - y15 = _mm_add_epi32(y15, z11); - r15 = y15; - y15 = _mm_slli_epi32(y15, 18); - z15 = _mm_xor_si128(z15, y15); - r15 = _mm_srli_epi32(r15, 14); - z15 = _mm_xor_si128(z15, r15); - - y6 = z4; - y6 = _mm_add_epi32(y6, z5); - r6 = y6; - y6 = _mm_slli_epi32(y6, 7); - z6 = _mm_xor_si128(z6, y6); - r6 = _mm_srli_epi32(r6, 25); - z6 = _mm_xor_si128(z6, r6); - - y2 = z0; - y2 = _mm_add_epi32(y2, z1); - r2 = y2; - y2 = _mm_slli_epi32(y2, 9); - z2 = _mm_xor_si128(z2, y2); - r2 = _mm_srli_epi32(r2, 23); - z2 = _mm_xor_si128(z2, r2); - - y7 = z5; - y7 = _mm_add_epi32(y7, z6); - r7 = y7; - y7 = _mm_slli_epi32(y7, 9); - z7 = _mm_xor_si128(z7, y7); - r7 = _mm_srli_epi32(r7, 23); - z7 = _mm_xor_si128(z7, r7); - - y3 = z1; - y3 = _mm_add_epi32(y3, z2); - r3 = y3; - y3 = _mm_slli_epi32(y3, 13); - z3 = _mm_xor_si128(z3, y3); - r3 = _mm_srli_epi32(r3, 19); - z3 = _mm_xor_si128(z3, r3); - - y4 = z6; - y4 = _mm_add_epi32(y4, z7); - r4 = y4; - y4 = _mm_slli_epi32(y4, 13); - z4 = _mm_xor_si128(z4, y4); - r4 = _mm_srli_epi32(r4, 19); - z4 = _mm_xor_si128(z4, r4); - - y0 = z2; - y0 = _mm_add_epi32(y0, z3); - r0 = y0; - y0 = _mm_slli_epi32(y0, 18); - z0 = _mm_xor_si128(z0, y0); - r0 = _mm_srli_epi32(r0, 14); - z0 = _mm_xor_si128(z0, r0); - - y5 = z7; - y5 = _mm_add_epi32(y5, z4); - r5 = y5; - y5 = _mm_slli_epi32(y5, 18); - z5 = _mm_xor_si128(z5, y5); - r5 = _mm_srli_epi32(r5, 14); - z5 = _mm_xor_si128(z5, r5); - - y11 = z9; - y11 = _mm_add_epi32(y11, z10); - r11 = y11; - y11 = _mm_slli_epi32(y11, 7); - z11 = _mm_xor_si128(z11, y11); - r11 = _mm_srli_epi32(r11, 25); - z11 = _mm_xor_si128(z11, r11); - - y12 = z14; - y12 = _mm_add_epi32(y12, z15); - r12 = y12; - y12 = _mm_slli_epi32(y12, 7); - z12 = _mm_xor_si128(z12, y12); - r12 = _mm_srli_epi32(r12, 25); - z12 = _mm_xor_si128(z12, r12); - - y8 = z10; - y8 = _mm_add_epi32(y8, z11); - r8 = y8; - y8 = _mm_slli_epi32(y8, 9); - z8 = _mm_xor_si128(z8, y8); - r8 = _mm_srli_epi32(r8, 23); - z8 = _mm_xor_si128(z8, r8); - - y13 = z15; - y13 = _mm_add_epi32(y13, z12); - r13 = y13; - y13 = _mm_slli_epi32(y13, 9); - z13 = _mm_xor_si128(z13, y13); - r13 = _mm_srli_epi32(r13, 23); - z13 = _mm_xor_si128(z13, r13); - - y9 = z11; - y9 = _mm_add_epi32(y9, z8); - r9 = y9; - y9 = _mm_slli_epi32(y9, 13); - z9 = _mm_xor_si128(z9, y9); - r9 = _mm_srli_epi32(r9, 19); - z9 = _mm_xor_si128(z9, r9); - - y14 = z12; - y14 = _mm_add_epi32(y14, z13); - r14 = y14; - y14 = _mm_slli_epi32(y14, 13); - z14 = _mm_xor_si128(z14, y14); - r14 = _mm_srli_epi32(r14, 19); - z14 = _mm_xor_si128(z14, r14); - - y10 = z8; - y10 = _mm_add_epi32(y10, z9); - r10 = y10; - y10 = _mm_slli_epi32(y10, 18); - z10 = _mm_xor_si128(z10, y10); - r10 = _mm_srli_epi32(r10, 14); - z10 = _mm_xor_si128(z10, r10); - - y15 = z13; - y15 = _mm_add_epi32(y15, z14); - r15 = y15; - y15 = _mm_slli_epi32(y15, 18); - z15 = _mm_xor_si128(z15, y15); - r15 = _mm_srli_epi32(r15, 14); - z15 = _mm_xor_si128(z15, r15); - } +if(bytes >= 256) +{ + __m128i y0, y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12, y13, y14, y15; + __m128i z0, z1, z2, z3, z4, z5, z6, z7, z8, z9, z10, z11, z12, z13, z14, z15; + __m128i orig0, orig1, orig2, orig3, orig4, orig5, orig6, orig7, orig8, orig9, + orig10, orig11, orig12, orig13, orig14, orig15; + + uint32_t in8; + uint32_t in9; + int i; + + /* element broadcast immediate for _mm_shuffle_epi32 are in order: + 0x00, 0x55, 0xaa, 0xff */ + z0 = _mm_loadu_si128((__m128i *)(x + 0)); + z5 = _mm_shuffle_epi32(z0, 0x55); + z10 = _mm_shuffle_epi32(z0, 0xaa); + z15 = _mm_shuffle_epi32(z0, 0xff); + z0 = _mm_shuffle_epi32(z0, 0x00); + z1 = _mm_loadu_si128((__m128i *)(x + 4)); + z6 = _mm_shuffle_epi32(z1, 0xaa); + z11 = _mm_shuffle_epi32(z1, 0xff); + z12 = _mm_shuffle_epi32(z1, 0x00); + z1 = _mm_shuffle_epi32(z1, 0x55); + z2 = _mm_loadu_si128((__m128i *)(x + 8)); + z7 = _mm_shuffle_epi32(z2, 0xff); + z13 = _mm_shuffle_epi32(z2, 0x55); + z2 = _mm_shuffle_epi32(z2, 0xaa); + /* no z8 -> first half of the nonce, will fill later */ + z3 = _mm_loadu_si128((__m128i *)(x + 12)); + z4 = _mm_shuffle_epi32(z3, 0x00); + z14 = _mm_shuffle_epi32(z3, 0xaa); + z3 = _mm_shuffle_epi32(z3, 0xff); + /* no z9 -> second half of the nonce, will fill later */ + orig0 = z0; + orig1 = z1; + orig2 = z2; + orig3 = z3; + orig4 = z4; + orig5 = z5; + orig6 = z6; + orig7 = z7; + orig10 = z10; + orig11 = z11; + orig12 = z12; + orig13 = z13; + orig14 = z14; + orig15 = z15; + + while(bytes >= 256) + { + /* vector implementation for z8 and z9 */ + /* not sure if it helps for only 4 blocks */ + const __m128i addv8 = _mm_set_epi64x(1, 0); + const __m128i addv9 = _mm_set_epi64x(3, 2); + __m128i t8, t9; + uint64_t in89; + + in8 = x[8]; + in9 = x[13]; + in89 = ((uint64_t)in8) | (((uint64_t)in9) << 32); + t8 = _mm_set1_epi64x(in89); + t9 = _mm_set1_epi64x(in89); + + z8 = _mm_add_epi64(addv8, t8); + z9 = _mm_add_epi64(addv9, t9); + + t8 = _mm_unpacklo_epi32(z8, z9); + t9 = _mm_unpackhi_epi32(z8, z9); + + z8 = _mm_unpacklo_epi32(t8, t9); + z9 = _mm_unpackhi_epi32(t8, t9); + + orig8 = z8; + orig9 = z9; + + in89 += 4; + + x[8] = in89 & 0xFFFFFFFF; + x[13] = (in89 >> 32) & 0xFFFFFFFF; + + z5 = orig5; + z10 = orig10; + z15 = orig15; + z14 = orig14; + z3 = orig3; + z6 = orig6; + z11 = orig11; + z1 = orig1; + + z7 = orig7; + z13 = orig13; + z2 = orig2; + z9 = orig9; + z0 = orig0; + z12 = orig12; + z4 = orig4; + z8 = orig8; + + for(i = 0; i < ROUNDS; i += 2) + { + /* the inner loop is a direct translation (regexp search/replace) + * from the amd64-xmm6 ASM */ + __m128i r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, + r15; + + y4 = z12; + y4 = _mm_add_epi32(y4, z0); + r4 = y4; + y4 = _mm_slli_epi32(y4, 7); + z4 = _mm_xor_si128(z4, y4); + r4 = _mm_srli_epi32(r4, 25); + z4 = _mm_xor_si128(z4, r4); + + y9 = z1; + y9 = _mm_add_epi32(y9, z5); + r9 = y9; + y9 = _mm_slli_epi32(y9, 7); + z9 = _mm_xor_si128(z9, y9); + r9 = _mm_srli_epi32(r9, 25); + z9 = _mm_xor_si128(z9, r9); + + y8 = z0; + y8 = _mm_add_epi32(y8, z4); + r8 = y8; + y8 = _mm_slli_epi32(y8, 9); + z8 = _mm_xor_si128(z8, y8); + r8 = _mm_srli_epi32(r8, 23); + z8 = _mm_xor_si128(z8, r8); + + y13 = z5; + y13 = _mm_add_epi32(y13, z9); + r13 = y13; + y13 = _mm_slli_epi32(y13, 9); + z13 = _mm_xor_si128(z13, y13); + r13 = _mm_srli_epi32(r13, 23); + z13 = _mm_xor_si128(z13, r13); + + y12 = z4; + y12 = _mm_add_epi32(y12, z8); + r12 = y12; + y12 = _mm_slli_epi32(y12, 13); + z12 = _mm_xor_si128(z12, y12); + r12 = _mm_srli_epi32(r12, 19); + z12 = _mm_xor_si128(z12, r12); + + y1 = z9; + y1 = _mm_add_epi32(y1, z13); + r1 = y1; + y1 = _mm_slli_epi32(y1, 13); + z1 = _mm_xor_si128(z1, y1); + r1 = _mm_srli_epi32(r1, 19); + z1 = _mm_xor_si128(z1, r1); + + y0 = z8; + y0 = _mm_add_epi32(y0, z12); + r0 = y0; + y0 = _mm_slli_epi32(y0, 18); + z0 = _mm_xor_si128(z0, y0); + r0 = _mm_srli_epi32(r0, 14); + z0 = _mm_xor_si128(z0, r0); + + y5 = z13; + y5 = _mm_add_epi32(y5, z1); + r5 = y5; + y5 = _mm_slli_epi32(y5, 18); + z5 = _mm_xor_si128(z5, y5); + r5 = _mm_srli_epi32(r5, 14); + z5 = _mm_xor_si128(z5, r5); + + y14 = z6; + y14 = _mm_add_epi32(y14, z10); + r14 = y14; + y14 = _mm_slli_epi32(y14, 7); + z14 = _mm_xor_si128(z14, y14); + r14 = _mm_srli_epi32(r14, 25); + z14 = _mm_xor_si128(z14, r14); + + y3 = z11; + y3 = _mm_add_epi32(y3, z15); + r3 = y3; + y3 = _mm_slli_epi32(y3, 7); + z3 = _mm_xor_si128(z3, y3); + r3 = _mm_srli_epi32(r3, 25); + z3 = _mm_xor_si128(z3, r3); + + y2 = z10; + y2 = _mm_add_epi32(y2, z14); + r2 = y2; + y2 = _mm_slli_epi32(y2, 9); + z2 = _mm_xor_si128(z2, y2); + r2 = _mm_srli_epi32(r2, 23); + z2 = _mm_xor_si128(z2, r2); + + y7 = z15; + y7 = _mm_add_epi32(y7, z3); + r7 = y7; + y7 = _mm_slli_epi32(y7, 9); + z7 = _mm_xor_si128(z7, y7); + r7 = _mm_srli_epi32(r7, 23); + z7 = _mm_xor_si128(z7, r7); + + y6 = z14; + y6 = _mm_add_epi32(y6, z2); + r6 = y6; + y6 = _mm_slli_epi32(y6, 13); + z6 = _mm_xor_si128(z6, y6); + r6 = _mm_srli_epi32(r6, 19); + z6 = _mm_xor_si128(z6, r6); + + y11 = z3; + y11 = _mm_add_epi32(y11, z7); + r11 = y11; + y11 = _mm_slli_epi32(y11, 13); + z11 = _mm_xor_si128(z11, y11); + r11 = _mm_srli_epi32(r11, 19); + z11 = _mm_xor_si128(z11, r11); + + y10 = z2; + y10 = _mm_add_epi32(y10, z6); + r10 = y10; + y10 = _mm_slli_epi32(y10, 18); + z10 = _mm_xor_si128(z10, y10); + r10 = _mm_srli_epi32(r10, 14); + z10 = _mm_xor_si128(z10, r10); + + y1 = z3; + y1 = _mm_add_epi32(y1, z0); + r1 = y1; + y1 = _mm_slli_epi32(y1, 7); + z1 = _mm_xor_si128(z1, y1); + r1 = _mm_srli_epi32(r1, 25); + z1 = _mm_xor_si128(z1, r1); + + y15 = z7; + y15 = _mm_add_epi32(y15, z11); + r15 = y15; + y15 = _mm_slli_epi32(y15, 18); + z15 = _mm_xor_si128(z15, y15); + r15 = _mm_srli_epi32(r15, 14); + z15 = _mm_xor_si128(z15, r15); + + y6 = z4; + y6 = _mm_add_epi32(y6, z5); + r6 = y6; + y6 = _mm_slli_epi32(y6, 7); + z6 = _mm_xor_si128(z6, y6); + r6 = _mm_srli_epi32(r6, 25); + z6 = _mm_xor_si128(z6, r6); + + y2 = z0; + y2 = _mm_add_epi32(y2, z1); + r2 = y2; + y2 = _mm_slli_epi32(y2, 9); + z2 = _mm_xor_si128(z2, y2); + r2 = _mm_srli_epi32(r2, 23); + z2 = _mm_xor_si128(z2, r2); + + y7 = z5; + y7 = _mm_add_epi32(y7, z6); + r7 = y7; + y7 = _mm_slli_epi32(y7, 9); + z7 = _mm_xor_si128(z7, y7); + r7 = _mm_srli_epi32(r7, 23); + z7 = _mm_xor_si128(z7, r7); + + y3 = z1; + y3 = _mm_add_epi32(y3, z2); + r3 = y3; + y3 = _mm_slli_epi32(y3, 13); + z3 = _mm_xor_si128(z3, y3); + r3 = _mm_srli_epi32(r3, 19); + z3 = _mm_xor_si128(z3, r3); + + y4 = z6; + y4 = _mm_add_epi32(y4, z7); + r4 = y4; + y4 = _mm_slli_epi32(y4, 13); + z4 = _mm_xor_si128(z4, y4); + r4 = _mm_srli_epi32(r4, 19); + z4 = _mm_xor_si128(z4, r4); + + y0 = z2; + y0 = _mm_add_epi32(y0, z3); + r0 = y0; + y0 = _mm_slli_epi32(y0, 18); + z0 = _mm_xor_si128(z0, y0); + r0 = _mm_srli_epi32(r0, 14); + z0 = _mm_xor_si128(z0, r0); + + y5 = z7; + y5 = _mm_add_epi32(y5, z4); + r5 = y5; + y5 = _mm_slli_epi32(y5, 18); + z5 = _mm_xor_si128(z5, y5); + r5 = _mm_srli_epi32(r5, 14); + z5 = _mm_xor_si128(z5, r5); + + y11 = z9; + y11 = _mm_add_epi32(y11, z10); + r11 = y11; + y11 = _mm_slli_epi32(y11, 7); + z11 = _mm_xor_si128(z11, y11); + r11 = _mm_srli_epi32(r11, 25); + z11 = _mm_xor_si128(z11, r11); + + y12 = z14; + y12 = _mm_add_epi32(y12, z15); + r12 = y12; + y12 = _mm_slli_epi32(y12, 7); + z12 = _mm_xor_si128(z12, y12); + r12 = _mm_srli_epi32(r12, 25); + z12 = _mm_xor_si128(z12, r12); + + y8 = z10; + y8 = _mm_add_epi32(y8, z11); + r8 = y8; + y8 = _mm_slli_epi32(y8, 9); + z8 = _mm_xor_si128(z8, y8); + r8 = _mm_srli_epi32(r8, 23); + z8 = _mm_xor_si128(z8, r8); + + y13 = z15; + y13 = _mm_add_epi32(y13, z12); + r13 = y13; + y13 = _mm_slli_epi32(y13, 9); + z13 = _mm_xor_si128(z13, y13); + r13 = _mm_srli_epi32(r13, 23); + z13 = _mm_xor_si128(z13, r13); + + y9 = z11; + y9 = _mm_add_epi32(y9, z8); + r9 = y9; + y9 = _mm_slli_epi32(y9, 13); + z9 = _mm_xor_si128(z9, y9); + r9 = _mm_srli_epi32(r9, 19); + z9 = _mm_xor_si128(z9, r9); + + y14 = z12; + y14 = _mm_add_epi32(y14, z13); + r14 = y14; + y14 = _mm_slli_epi32(y14, 13); + z14 = _mm_xor_si128(z14, y14); + r14 = _mm_srli_epi32(r14, 19); + z14 = _mm_xor_si128(z14, r14); + + y10 = z8; + y10 = _mm_add_epi32(y10, z9); + r10 = y10; + y10 = _mm_slli_epi32(y10, 18); + z10 = _mm_xor_si128(z10, y10); + r10 = _mm_srli_epi32(r10, 14); + z10 = _mm_xor_si128(z10, r10); + + y15 = z13; + y15 = _mm_add_epi32(y15, z14); + r15 = y15; + y15 = _mm_slli_epi32(y15, 18); + z15 = _mm_xor_si128(z15, y15); + r15 = _mm_srli_epi32(r15, 14); + z15 = _mm_xor_si128(z15, r15); + } /* store data ; this macro replicates the original amd64-xmm6 code */ -#define ONEQUAD_SHUFFLE(A, B, C, D) \ - z##A = _mm_add_epi32(z##A, orig##A); \ - z##B = _mm_add_epi32(z##B, orig##B); \ - z##C = _mm_add_epi32(z##C, orig##C); \ - z##D = _mm_add_epi32(z##D, orig##D); \ - in##A = _mm_cvtsi128_si32(z##A); \ - in##B = _mm_cvtsi128_si32(z##B); \ - in##C = _mm_cvtsi128_si32(z##C); \ - in##D = _mm_cvtsi128_si32(z##D); \ - z##A = _mm_shuffle_epi32(z##A, 0x39); \ - z##B = _mm_shuffle_epi32(z##B, 0x39); \ - z##C = _mm_shuffle_epi32(z##C, 0x39); \ - z##D = _mm_shuffle_epi32(z##D, 0x39); \ - \ - in##A ^= *(uint32_t *) (m + 0); \ - in##B ^= *(uint32_t *) (m + 4); \ - in##C ^= *(uint32_t *) (m + 8); \ - in##D ^= *(uint32_t *) (m + 12); \ - \ - *(uint32_t *) (c + 0) = in##A; \ - *(uint32_t *) (c + 4) = in##B; \ - *(uint32_t *) (c + 8) = in##C; \ - *(uint32_t *) (c + 12) = in##D; \ - \ - in##A = _mm_cvtsi128_si32(z##A); \ - in##B = _mm_cvtsi128_si32(z##B); \ - in##C = _mm_cvtsi128_si32(z##C); \ - in##D = _mm_cvtsi128_si32(z##D); \ - z##A = _mm_shuffle_epi32(z##A, 0x39); \ - z##B = _mm_shuffle_epi32(z##B, 0x39); \ - z##C = _mm_shuffle_epi32(z##C, 0x39); \ - z##D = _mm_shuffle_epi32(z##D, 0x39); \ - \ - in##A ^= *(uint32_t *) (m + 64); \ - in##B ^= *(uint32_t *) (m + 68); \ - in##C ^= *(uint32_t *) (m + 72); \ - in##D ^= *(uint32_t *) (m + 76); \ - *(uint32_t *) (c + 64) = in##A; \ - *(uint32_t *) (c + 68) = in##B; \ - *(uint32_t *) (c + 72) = in##C; \ - *(uint32_t *) (c + 76) = in##D; \ - \ - in##A = _mm_cvtsi128_si32(z##A); \ - in##B = _mm_cvtsi128_si32(z##B); \ - in##C = _mm_cvtsi128_si32(z##C); \ - in##D = _mm_cvtsi128_si32(z##D); \ - z##A = _mm_shuffle_epi32(z##A, 0x39); \ - z##B = _mm_shuffle_epi32(z##B, 0x39); \ - z##C = _mm_shuffle_epi32(z##C, 0x39); \ - z##D = _mm_shuffle_epi32(z##D, 0x39); \ - \ - in##A ^= *(uint32_t *) (m + 128); \ - in##B ^= *(uint32_t *) (m + 132); \ - in##C ^= *(uint32_t *) (m + 136); \ - in##D ^= *(uint32_t *) (m + 140); \ - *(uint32_t *) (c + 128) = in##A; \ - *(uint32_t *) (c + 132) = in##B; \ - *(uint32_t *) (c + 136) = in##C; \ - *(uint32_t *) (c + 140) = in##D; \ - \ - in##A = _mm_cvtsi128_si32(z##A); \ - in##B = _mm_cvtsi128_si32(z##B); \ - in##C = _mm_cvtsi128_si32(z##C); \ - in##D = _mm_cvtsi128_si32(z##D); \ - \ - in##A ^= *(uint32_t *) (m + 192); \ - in##B ^= *(uint32_t *) (m + 196); \ - in##C ^= *(uint32_t *) (m + 200); \ - in##D ^= *(uint32_t *) (m + 204); \ - *(uint32_t *) (c + 192) = in##A; \ - *(uint32_t *) (c + 196) = in##B; \ - *(uint32_t *) (c + 200) = in##C; \ - *(uint32_t *) (c + 204) = in##D +#define ONEQUAD_SHUFFLE(A, B, C, D) \ + z##A = _mm_add_epi32(z##A, orig##A); \ + z##B = _mm_add_epi32(z##B, orig##B); \ + z##C = _mm_add_epi32(z##C, orig##C); \ + z##D = _mm_add_epi32(z##D, orig##D); \ + in##A = _mm_cvtsi128_si32(z##A); \ + in##B = _mm_cvtsi128_si32(z##B); \ + in##C = _mm_cvtsi128_si32(z##C); \ + in##D = _mm_cvtsi128_si32(z##D); \ + z##A = _mm_shuffle_epi32(z##A, 0x39); \ + z##B = _mm_shuffle_epi32(z##B, 0x39); \ + z##C = _mm_shuffle_epi32(z##C, 0x39); \ + z##D = _mm_shuffle_epi32(z##D, 0x39); \ + \ + in##A ^= *(uint32_t *)(m + 0); \ + in##B ^= *(uint32_t *)(m + 4); \ + in##C ^= *(uint32_t *)(m + 8); \ + in##D ^= *(uint32_t *)(m + 12); \ + \ + *(uint32_t *)(c + 0) = in##A; \ + *(uint32_t *)(c + 4) = in##B; \ + *(uint32_t *)(c + 8) = in##C; \ + *(uint32_t *)(c + 12) = in##D; \ + \ + in##A = _mm_cvtsi128_si32(z##A); \ + in##B = _mm_cvtsi128_si32(z##B); \ + in##C = _mm_cvtsi128_si32(z##C); \ + in##D = _mm_cvtsi128_si32(z##D); \ + z##A = _mm_shuffle_epi32(z##A, 0x39); \ + z##B = _mm_shuffle_epi32(z##B, 0x39); \ + z##C = _mm_shuffle_epi32(z##C, 0x39); \ + z##D = _mm_shuffle_epi32(z##D, 0x39); \ + \ + in##A ^= *(uint32_t *)(m + 64); \ + in##B ^= *(uint32_t *)(m + 68); \ + in##C ^= *(uint32_t *)(m + 72); \ + in##D ^= *(uint32_t *)(m + 76); \ + *(uint32_t *)(c + 64) = in##A; \ + *(uint32_t *)(c + 68) = in##B; \ + *(uint32_t *)(c + 72) = in##C; \ + *(uint32_t *)(c + 76) = in##D; \ + \ + in##A = _mm_cvtsi128_si32(z##A); \ + in##B = _mm_cvtsi128_si32(z##B); \ + in##C = _mm_cvtsi128_si32(z##C); \ + in##D = _mm_cvtsi128_si32(z##D); \ + z##A = _mm_shuffle_epi32(z##A, 0x39); \ + z##B = _mm_shuffle_epi32(z##B, 0x39); \ + z##C = _mm_shuffle_epi32(z##C, 0x39); \ + z##D = _mm_shuffle_epi32(z##D, 0x39); \ + \ + in##A ^= *(uint32_t *)(m + 128); \ + in##B ^= *(uint32_t *)(m + 132); \ + in##C ^= *(uint32_t *)(m + 136); \ + in##D ^= *(uint32_t *)(m + 140); \ + *(uint32_t *)(c + 128) = in##A; \ + *(uint32_t *)(c + 132) = in##B; \ + *(uint32_t *)(c + 136) = in##C; \ + *(uint32_t *)(c + 140) = in##D; \ + \ + in##A = _mm_cvtsi128_si32(z##A); \ + in##B = _mm_cvtsi128_si32(z##B); \ + in##C = _mm_cvtsi128_si32(z##C); \ + in##D = _mm_cvtsi128_si32(z##D); \ + \ + in##A ^= *(uint32_t *)(m + 192); \ + in##B ^= *(uint32_t *)(m + 196); \ + in##C ^= *(uint32_t *)(m + 200); \ + in##D ^= *(uint32_t *)(m + 204); \ + *(uint32_t *)(c + 192) = in##A; \ + *(uint32_t *)(c + 196) = in##B; \ + *(uint32_t *)(c + 200) = in##C; \ + *(uint32_t *)(c + 204) = in##D /* store data ; this macro replaces shuffle+mov by a direct extract; not much * difference */ -#define ONEQUAD_EXTRACT(A, B, C, D) \ - z##A = _mm_add_epi32(z##A, orig##A); \ - z##B = _mm_add_epi32(z##B, orig##B); \ - z##C = _mm_add_epi32(z##C, orig##C); \ - z##D = _mm_add_epi32(z##D, orig##D); \ - in##A = _mm_cvtsi128_si32(z##A); \ - in##B = _mm_cvtsi128_si32(z##B); \ - in##C = _mm_cvtsi128_si32(z##C); \ - in##D = _mm_cvtsi128_si32(z##D); \ - in##A ^= *(uint32_t *) (m + 0); \ - in##B ^= *(uint32_t *) (m + 4); \ - in##C ^= *(uint32_t *) (m + 8); \ - in##D ^= *(uint32_t *) (m + 12); \ - *(uint32_t *) (c + 0) = in##A; \ - *(uint32_t *) (c + 4) = in##B; \ - *(uint32_t *) (c + 8) = in##C; \ - *(uint32_t *) (c + 12) = in##D; \ - \ - in##A = _mm_extract_epi32(z##A, 1); \ - in##B = _mm_extract_epi32(z##B, 1); \ - in##C = _mm_extract_epi32(z##C, 1); \ - in##D = _mm_extract_epi32(z##D, 1); \ - \ - in##A ^= *(uint32_t *) (m + 64); \ - in##B ^= *(uint32_t *) (m + 68); \ - in##C ^= *(uint32_t *) (m + 72); \ - in##D ^= *(uint32_t *) (m + 76); \ - *(uint32_t *) (c + 64) = in##A; \ - *(uint32_t *) (c + 68) = in##B; \ - *(uint32_t *) (c + 72) = in##C; \ - *(uint32_t *) (c + 76) = in##D; \ - \ - in##A = _mm_extract_epi32(z##A, 2); \ - in##B = _mm_extract_epi32(z##B, 2); \ - in##C = _mm_extract_epi32(z##C, 2); \ - in##D = _mm_extract_epi32(z##D, 2); \ - \ - in##A ^= *(uint32_t *) (m + 128); \ - in##B ^= *(uint32_t *) (m + 132); \ - in##C ^= *(uint32_t *) (m + 136); \ - in##D ^= *(uint32_t *) (m + 140); \ - *(uint32_t *) (c + 128) = in##A; \ - *(uint32_t *) (c + 132) = in##B; \ - *(uint32_t *) (c + 136) = in##C; \ - *(uint32_t *) (c + 140) = in##D; \ - \ - in##A = _mm_extract_epi32(z##A, 3); \ - in##B = _mm_extract_epi32(z##B, 3); \ - in##C = _mm_extract_epi32(z##C, 3); \ - in##D = _mm_extract_epi32(z##D, 3); \ - \ - in##A ^= *(uint32_t *) (m + 192); \ - in##B ^= *(uint32_t *) (m + 196); \ - in##C ^= *(uint32_t *) (m + 200); \ - in##D ^= *(uint32_t *) (m + 204); \ - *(uint32_t *) (c + 192) = in##A; \ - *(uint32_t *) (c + 196) = in##B; \ - *(uint32_t *) (c + 200) = in##C; \ - *(uint32_t *) (c + 204) = in##D +#define ONEQUAD_EXTRACT(A, B, C, D) \ + z##A = _mm_add_epi32(z##A, orig##A); \ + z##B = _mm_add_epi32(z##B, orig##B); \ + z##C = _mm_add_epi32(z##C, orig##C); \ + z##D = _mm_add_epi32(z##D, orig##D); \ + in##A = _mm_cvtsi128_si32(z##A); \ + in##B = _mm_cvtsi128_si32(z##B); \ + in##C = _mm_cvtsi128_si32(z##C); \ + in##D = _mm_cvtsi128_si32(z##D); \ + in##A ^= *(uint32_t *)(m + 0); \ + in##B ^= *(uint32_t *)(m + 4); \ + in##C ^= *(uint32_t *)(m + 8); \ + in##D ^= *(uint32_t *)(m + 12); \ + *(uint32_t *)(c + 0) = in##A; \ + *(uint32_t *)(c + 4) = in##B; \ + *(uint32_t *)(c + 8) = in##C; \ + *(uint32_t *)(c + 12) = in##D; \ + \ + in##A = _mm_extract_epi32(z##A, 1); \ + in##B = _mm_extract_epi32(z##B, 1); \ + in##C = _mm_extract_epi32(z##C, 1); \ + in##D = _mm_extract_epi32(z##D, 1); \ + \ + in##A ^= *(uint32_t *)(m + 64); \ + in##B ^= *(uint32_t *)(m + 68); \ + in##C ^= *(uint32_t *)(m + 72); \ + in##D ^= *(uint32_t *)(m + 76); \ + *(uint32_t *)(c + 64) = in##A; \ + *(uint32_t *)(c + 68) = in##B; \ + *(uint32_t *)(c + 72) = in##C; \ + *(uint32_t *)(c + 76) = in##D; \ + \ + in##A = _mm_extract_epi32(z##A, 2); \ + in##B = _mm_extract_epi32(z##B, 2); \ + in##C = _mm_extract_epi32(z##C, 2); \ + in##D = _mm_extract_epi32(z##D, 2); \ + \ + in##A ^= *(uint32_t *)(m + 128); \ + in##B ^= *(uint32_t *)(m + 132); \ + in##C ^= *(uint32_t *)(m + 136); \ + in##D ^= *(uint32_t *)(m + 140); \ + *(uint32_t *)(c + 128) = in##A; \ + *(uint32_t *)(c + 132) = in##B; \ + *(uint32_t *)(c + 136) = in##C; \ + *(uint32_t *)(c + 140) = in##D; \ + \ + in##A = _mm_extract_epi32(z##A, 3); \ + in##B = _mm_extract_epi32(z##B, 3); \ + in##C = _mm_extract_epi32(z##C, 3); \ + in##D = _mm_extract_epi32(z##D, 3); \ + \ + in##A ^= *(uint32_t *)(m + 192); \ + in##B ^= *(uint32_t *)(m + 196); \ + in##C ^= *(uint32_t *)(m + 200); \ + in##D ^= *(uint32_t *)(m + 204); \ + *(uint32_t *)(c + 192) = in##A; \ + *(uint32_t *)(c + 196) = in##B; \ + *(uint32_t *)(c + 200) = in##C; \ + *(uint32_t *)(c + 204) = in##D /* store data ; this macro first transpose data in-registers, and then store * them in memory. much faster with icc. */ -#define ONEQUAD_TRANSPOSE(A, B, C, D) \ - z##A = _mm_add_epi32(z##A, orig##A); \ - z##B = _mm_add_epi32(z##B, orig##B); \ - z##C = _mm_add_epi32(z##C, orig##C); \ - z##D = _mm_add_epi32(z##D, orig##D); \ - y##A = _mm_unpacklo_epi32(z##A, z##B); \ - y##B = _mm_unpacklo_epi32(z##C, z##D); \ - y##C = _mm_unpackhi_epi32(z##A, z##B); \ - y##D = _mm_unpackhi_epi32(z##C, z##D); \ - z##A = _mm_unpacklo_epi64(y##A, y##B); \ - z##B = _mm_unpackhi_epi64(y##A, y##B); \ - z##C = _mm_unpacklo_epi64(y##C, y##D); \ - z##D = _mm_unpackhi_epi64(y##C, y##D); \ - y##A = _mm_xor_si128(z##A, _mm_loadu_si128((__m128i *) (m + 0))); \ - _mm_storeu_si128((__m128i *) (c + 0), y##A); \ - y##B = _mm_xor_si128(z##B, _mm_loadu_si128((__m128i *) (m + 64))); \ - _mm_storeu_si128((__m128i *) (c + 64), y##B); \ - y##C = _mm_xor_si128(z##C, _mm_loadu_si128((__m128i *) (m + 128))); \ - _mm_storeu_si128((__m128i *) (c + 128), y##C); \ - y##D = _mm_xor_si128(z##D, _mm_loadu_si128((__m128i *) (m + 192))); \ - _mm_storeu_si128((__m128i *) (c + 192), y##D) +#define ONEQUAD_TRANSPOSE(A, B, C, D) \ + z##A = _mm_add_epi32(z##A, orig##A); \ + z##B = _mm_add_epi32(z##B, orig##B); \ + z##C = _mm_add_epi32(z##C, orig##C); \ + z##D = _mm_add_epi32(z##D, orig##D); \ + y##A = _mm_unpacklo_epi32(z##A, z##B); \ + y##B = _mm_unpacklo_epi32(z##C, z##D); \ + y##C = _mm_unpackhi_epi32(z##A, z##B); \ + y##D = _mm_unpackhi_epi32(z##C, z##D); \ + z##A = _mm_unpacklo_epi64(y##A, y##B); \ + z##B = _mm_unpackhi_epi64(y##A, y##B); \ + z##C = _mm_unpacklo_epi64(y##C, y##D); \ + z##D = _mm_unpackhi_epi64(y##C, y##D); \ + y##A = _mm_xor_si128(z##A, _mm_loadu_si128((__m128i *)(m + 0))); \ + _mm_storeu_si128((__m128i *)(c + 0), y##A); \ + y##B = _mm_xor_si128(z##B, _mm_loadu_si128((__m128i *)(m + 64))); \ + _mm_storeu_si128((__m128i *)(c + 64), y##B); \ + y##C = _mm_xor_si128(z##C, _mm_loadu_si128((__m128i *)(m + 128))); \ + _mm_storeu_si128((__m128i *)(c + 128), y##C); \ + y##D = _mm_xor_si128(z##D, _mm_loadu_si128((__m128i *)(m + 192))); \ + _mm_storeu_si128((__m128i *)(c + 192), y##D) #define ONEQUAD(A, B, C, D) ONEQUAD_TRANSPOSE(A, B, C, D) - ONEQUAD(0, 1, 2, 3); - m += 16; - c += 16; - ONEQUAD(4, 5, 6, 7); - m += 16; - c += 16; - ONEQUAD(8, 9, 10, 11); - m += 16; - c += 16; - ONEQUAD(12, 13, 14, 15); - m -= 48; - c -= 48; + ONEQUAD(0, 1, 2, 3); + m += 16; + c += 16; + ONEQUAD(4, 5, 6, 7); + m += 16; + c += 16; + ONEQUAD(8, 9, 10, 11); + m += 16; + c += 16; + ONEQUAD(12, 13, 14, 15); + m -= 48; + c -= 48; #undef ONEQUAD #undef ONEQUAD_TRANSPOSE #undef ONEQUAD_EXTRACT #undef ONEQUAD_SHUFFLE - bytes -= 256; - c += 256; - m += 256; - } + bytes -= 256; + c += 256; + m += 256; + } } diff --git a/crypto/salsa20/xmm6int/u8.h b/crypto/salsa20/xmm6int/u8.h index 467a96129..bc08f1fe8 100644 --- a/crypto/salsa20/xmm6int/u8.h +++ b/crypto/salsa20/xmm6int/u8.h @@ -1,476 +1,471 @@ -if (bytes >= 512) { - __m256i y0, y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12, y13, y14, - y15; - - /* the naive way seems as fast (if not a bit faster) than the vector way */ - __m256i z0 = _mm256_set1_epi32(x[0]); - __m256i z5 = _mm256_set1_epi32(x[1]); - __m256i z10 = _mm256_set1_epi32(x[2]); - __m256i z15 = _mm256_set1_epi32(x[3]); - __m256i z12 = _mm256_set1_epi32(x[4]); - __m256i z1 = _mm256_set1_epi32(x[5]); - __m256i z6 = _mm256_set1_epi32(x[6]); - __m256i z11 = _mm256_set1_epi32(x[7]); - __m256i z8; /* useless */ - __m256i z13 = _mm256_set1_epi32(x[9]); - __m256i z2 = _mm256_set1_epi32(x[10]); - __m256i z7 = _mm256_set1_epi32(x[11]); - __m256i z4 = _mm256_set1_epi32(x[12]); - __m256i z9; /* useless */ - __m256i z14 = _mm256_set1_epi32(x[14]); - __m256i z3 = _mm256_set1_epi32(x[15]); - - __m256i orig0 = z0; - __m256i orig1 = z1; - __m256i orig2 = z2; - __m256i orig3 = z3; - __m256i orig4 = z4; - __m256i orig5 = z5; - __m256i orig6 = z6; - __m256i orig7 = z7; - __m256i orig8; - __m256i orig9; - __m256i orig10 = z10; - __m256i orig11 = z11; - __m256i orig12 = z12; - __m256i orig13 = z13; - __m256i orig14 = z14; - __m256i orig15 = z15; - - uint32_t in8; - uint32_t in9; - int i; - - while (bytes >= 512) { - /* vector implementation for z8 and z9 */ - /* faster than the naive version for 8 blocks */ - const __m256i addv8 = _mm256_set_epi64x(3, 2, 1, 0); - const __m256i addv9 = _mm256_set_epi64x(7, 6, 5, 4); - const __m256i permute = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0); - - __m256i t8, t9; - uint64_t in89; - - in8 = x[8]; - in9 = x[13]; /* see arrays above for the address translation */ - in89 = ((uint64_t) in8) | (((uint64_t) in9) << 32); - - z8 = z9 = _mm256_broadcastq_epi64(_mm_cvtsi64_si128(in89)); - - t8 = _mm256_add_epi64(addv8, z8); - t9 = _mm256_add_epi64(addv9, z9); - - z8 = _mm256_unpacklo_epi32(t8, t9); - z9 = _mm256_unpackhi_epi32(t8, t9); - - t8 = _mm256_unpacklo_epi32(z8, z9); - t9 = _mm256_unpackhi_epi32(z8, z9); - - /* required because unpack* are intra-lane */ - z8 = _mm256_permutevar8x32_epi32(t8, permute); - z9 = _mm256_permutevar8x32_epi32(t9, permute); - - orig8 = z8; - orig9 = z9; - - in89 += 8; - - x[8] = in89 & 0xFFFFFFFF; - x[13] = (in89 >> 32) & 0xFFFFFFFF; - - z5 = orig5; - z10 = orig10; - z15 = orig15; - z14 = orig14; - z3 = orig3; - z6 = orig6; - z11 = orig11; - z1 = orig1; - - z7 = orig7; - z13 = orig13; - z2 = orig2; - z9 = orig9; - z0 = orig0; - z12 = orig12; - z4 = orig4; - z8 = orig8; - - for (i = 0; i < ROUNDS; i += 2) { - /* the inner loop is a direct translation (regexp search/replace) - * from the amd64-xmm6 ASM */ - __m256i r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, - r14, r15; - - y4 = z12; - y4 = _mm256_add_epi32(y4, z0); - r4 = y4; - y4 = _mm256_slli_epi32(y4, 7); - z4 = _mm256_xor_si256(z4, y4); - r4 = _mm256_srli_epi32(r4, 25); - z4 = _mm256_xor_si256(z4, r4); - - y9 = z1; - y9 = _mm256_add_epi32(y9, z5); - r9 = y9; - y9 = _mm256_slli_epi32(y9, 7); - z9 = _mm256_xor_si256(z9, y9); - r9 = _mm256_srli_epi32(r9, 25); - z9 = _mm256_xor_si256(z9, r9); - - y8 = z0; - y8 = _mm256_add_epi32(y8, z4); - r8 = y8; - y8 = _mm256_slli_epi32(y8, 9); - z8 = _mm256_xor_si256(z8, y8); - r8 = _mm256_srli_epi32(r8, 23); - z8 = _mm256_xor_si256(z8, r8); - - y13 = z5; - y13 = _mm256_add_epi32(y13, z9); - r13 = y13; - y13 = _mm256_slli_epi32(y13, 9); - z13 = _mm256_xor_si256(z13, y13); - r13 = _mm256_srli_epi32(r13, 23); - z13 = _mm256_xor_si256(z13, r13); - - y12 = z4; - y12 = _mm256_add_epi32(y12, z8); - r12 = y12; - y12 = _mm256_slli_epi32(y12, 13); - z12 = _mm256_xor_si256(z12, y12); - r12 = _mm256_srli_epi32(r12, 19); - z12 = _mm256_xor_si256(z12, r12); - - y1 = z9; - y1 = _mm256_add_epi32(y1, z13); - r1 = y1; - y1 = _mm256_slli_epi32(y1, 13); - z1 = _mm256_xor_si256(z1, y1); - r1 = _mm256_srli_epi32(r1, 19); - z1 = _mm256_xor_si256(z1, r1); - - y0 = z8; - y0 = _mm256_add_epi32(y0, z12); - r0 = y0; - y0 = _mm256_slli_epi32(y0, 18); - z0 = _mm256_xor_si256(z0, y0); - r0 = _mm256_srli_epi32(r0, 14); - z0 = _mm256_xor_si256(z0, r0); - - y5 = z13; - y5 = _mm256_add_epi32(y5, z1); - r5 = y5; - y5 = _mm256_slli_epi32(y5, 18); - z5 = _mm256_xor_si256(z5, y5); - r5 = _mm256_srli_epi32(r5, 14); - z5 = _mm256_xor_si256(z5, r5); - - y14 = z6; - y14 = _mm256_add_epi32(y14, z10); - r14 = y14; - y14 = _mm256_slli_epi32(y14, 7); - z14 = _mm256_xor_si256(z14, y14); - r14 = _mm256_srli_epi32(r14, 25); - z14 = _mm256_xor_si256(z14, r14); - - y3 = z11; - y3 = _mm256_add_epi32(y3, z15); - r3 = y3; - y3 = _mm256_slli_epi32(y3, 7); - z3 = _mm256_xor_si256(z3, y3); - r3 = _mm256_srli_epi32(r3, 25); - z3 = _mm256_xor_si256(z3, r3); - - y2 = z10; - y2 = _mm256_add_epi32(y2, z14); - r2 = y2; - y2 = _mm256_slli_epi32(y2, 9); - z2 = _mm256_xor_si256(z2, y2); - r2 = _mm256_srli_epi32(r2, 23); - z2 = _mm256_xor_si256(z2, r2); - - y7 = z15; - y7 = _mm256_add_epi32(y7, z3); - r7 = y7; - y7 = _mm256_slli_epi32(y7, 9); - z7 = _mm256_xor_si256(z7, y7); - r7 = _mm256_srli_epi32(r7, 23); - z7 = _mm256_xor_si256(z7, r7); - - y6 = z14; - y6 = _mm256_add_epi32(y6, z2); - r6 = y6; - y6 = _mm256_slli_epi32(y6, 13); - z6 = _mm256_xor_si256(z6, y6); - r6 = _mm256_srli_epi32(r6, 19); - z6 = _mm256_xor_si256(z6, r6); - - y11 = z3; - y11 = _mm256_add_epi32(y11, z7); - r11 = y11; - y11 = _mm256_slli_epi32(y11, 13); - z11 = _mm256_xor_si256(z11, y11); - r11 = _mm256_srli_epi32(r11, 19); - z11 = _mm256_xor_si256(z11, r11); - - y10 = z2; - y10 = _mm256_add_epi32(y10, z6); - r10 = y10; - y10 = _mm256_slli_epi32(y10, 18); - z10 = _mm256_xor_si256(z10, y10); - r10 = _mm256_srli_epi32(r10, 14); - z10 = _mm256_xor_si256(z10, r10); - - y1 = z3; - y1 = _mm256_add_epi32(y1, z0); - r1 = y1; - y1 = _mm256_slli_epi32(y1, 7); - z1 = _mm256_xor_si256(z1, y1); - r1 = _mm256_srli_epi32(r1, 25); - z1 = _mm256_xor_si256(z1, r1); - - y15 = z7; - y15 = _mm256_add_epi32(y15, z11); - r15 = y15; - y15 = _mm256_slli_epi32(y15, 18); - z15 = _mm256_xor_si256(z15, y15); - r15 = _mm256_srli_epi32(r15, 14); - z15 = _mm256_xor_si256(z15, r15); - - y6 = z4; - y6 = _mm256_add_epi32(y6, z5); - r6 = y6; - y6 = _mm256_slli_epi32(y6, 7); - z6 = _mm256_xor_si256(z6, y6); - r6 = _mm256_srli_epi32(r6, 25); - z6 = _mm256_xor_si256(z6, r6); - - y2 = z0; - y2 = _mm256_add_epi32(y2, z1); - r2 = y2; - y2 = _mm256_slli_epi32(y2, 9); - z2 = _mm256_xor_si256(z2, y2); - r2 = _mm256_srli_epi32(r2, 23); - z2 = _mm256_xor_si256(z2, r2); - - y7 = z5; - y7 = _mm256_add_epi32(y7, z6); - r7 = y7; - y7 = _mm256_slli_epi32(y7, 9); - z7 = _mm256_xor_si256(z7, y7); - r7 = _mm256_srli_epi32(r7, 23); - z7 = _mm256_xor_si256(z7, r7); - - y3 = z1; - y3 = _mm256_add_epi32(y3, z2); - r3 = y3; - y3 = _mm256_slli_epi32(y3, 13); - z3 = _mm256_xor_si256(z3, y3); - r3 = _mm256_srli_epi32(r3, 19); - z3 = _mm256_xor_si256(z3, r3); - - y4 = z6; - y4 = _mm256_add_epi32(y4, z7); - r4 = y4; - y4 = _mm256_slli_epi32(y4, 13); - z4 = _mm256_xor_si256(z4, y4); - r4 = _mm256_srli_epi32(r4, 19); - z4 = _mm256_xor_si256(z4, r4); - - y0 = z2; - y0 = _mm256_add_epi32(y0, z3); - r0 = y0; - y0 = _mm256_slli_epi32(y0, 18); - z0 = _mm256_xor_si256(z0, y0); - r0 = _mm256_srli_epi32(r0, 14); - z0 = _mm256_xor_si256(z0, r0); - - y5 = z7; - y5 = _mm256_add_epi32(y5, z4); - r5 = y5; - y5 = _mm256_slli_epi32(y5, 18); - z5 = _mm256_xor_si256(z5, y5); - r5 = _mm256_srli_epi32(r5, 14); - z5 = _mm256_xor_si256(z5, r5); - - y11 = z9; - y11 = _mm256_add_epi32(y11, z10); - r11 = y11; - y11 = _mm256_slli_epi32(y11, 7); - z11 = _mm256_xor_si256(z11, y11); - r11 = _mm256_srli_epi32(r11, 25); - z11 = _mm256_xor_si256(z11, r11); - - y12 = z14; - y12 = _mm256_add_epi32(y12, z15); - r12 = y12; - y12 = _mm256_slli_epi32(y12, 7); - z12 = _mm256_xor_si256(z12, y12); - r12 = _mm256_srli_epi32(r12, 25); - z12 = _mm256_xor_si256(z12, r12); - - y8 = z10; - y8 = _mm256_add_epi32(y8, z11); - r8 = y8; - y8 = _mm256_slli_epi32(y8, 9); - z8 = _mm256_xor_si256(z8, y8); - r8 = _mm256_srli_epi32(r8, 23); - z8 = _mm256_xor_si256(z8, r8); - - y13 = z15; - y13 = _mm256_add_epi32(y13, z12); - r13 = y13; - y13 = _mm256_slli_epi32(y13, 9); - z13 = _mm256_xor_si256(z13, y13); - r13 = _mm256_srli_epi32(r13, 23); - z13 = _mm256_xor_si256(z13, r13); - - y9 = z11; - y9 = _mm256_add_epi32(y9, z8); - r9 = y9; - y9 = _mm256_slli_epi32(y9, 13); - z9 = _mm256_xor_si256(z9, y9); - r9 = _mm256_srli_epi32(r9, 19); - z9 = _mm256_xor_si256(z9, r9); - - y14 = z12; - y14 = _mm256_add_epi32(y14, z13); - r14 = y14; - y14 = _mm256_slli_epi32(y14, 13); - z14 = _mm256_xor_si256(z14, y14); - r14 = _mm256_srli_epi32(r14, 19); - z14 = _mm256_xor_si256(z14, r14); - - y10 = z8; - y10 = _mm256_add_epi32(y10, z9); - r10 = y10; - y10 = _mm256_slli_epi32(y10, 18); - z10 = _mm256_xor_si256(z10, y10); - r10 = _mm256_srli_epi32(r10, 14); - z10 = _mm256_xor_si256(z10, r10); - - y15 = z13; - y15 = _mm256_add_epi32(y15, z14); - r15 = y15; - y15 = _mm256_slli_epi32(y15, 18); - z15 = _mm256_xor_si256(z15, y15); - r15 = _mm256_srli_epi32(r15, 14); - z15 = _mm256_xor_si256(z15, r15); - } +if(bytes >= 512) +{ + __m256i y0, y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12, y13, y14, y15; + + /* the naive way seems as fast (if not a bit faster) than the vector way */ + __m256i z0 = _mm256_set1_epi32(x[0]); + __m256i z5 = _mm256_set1_epi32(x[1]); + __m256i z10 = _mm256_set1_epi32(x[2]); + __m256i z15 = _mm256_set1_epi32(x[3]); + __m256i z12 = _mm256_set1_epi32(x[4]); + __m256i z1 = _mm256_set1_epi32(x[5]); + __m256i z6 = _mm256_set1_epi32(x[6]); + __m256i z11 = _mm256_set1_epi32(x[7]); + __m256i z8; /* useless */ + __m256i z13 = _mm256_set1_epi32(x[9]); + __m256i z2 = _mm256_set1_epi32(x[10]); + __m256i z7 = _mm256_set1_epi32(x[11]); + __m256i z4 = _mm256_set1_epi32(x[12]); + __m256i z9; /* useless */ + __m256i z14 = _mm256_set1_epi32(x[14]); + __m256i z3 = _mm256_set1_epi32(x[15]); + + __m256i orig0 = z0; + __m256i orig1 = z1; + __m256i orig2 = z2; + __m256i orig3 = z3; + __m256i orig4 = z4; + __m256i orig5 = z5; + __m256i orig6 = z6; + __m256i orig7 = z7; + __m256i orig8; + __m256i orig9; + __m256i orig10 = z10; + __m256i orig11 = z11; + __m256i orig12 = z12; + __m256i orig13 = z13; + __m256i orig14 = z14; + __m256i orig15 = z15; + + uint32_t in8; + uint32_t in9; + int i; + + while(bytes >= 512) + { + /* vector implementation for z8 and z9 */ + /* faster than the naive version for 8 blocks */ + const __m256i addv8 = _mm256_set_epi64x(3, 2, 1, 0); + const __m256i addv9 = _mm256_set_epi64x(7, 6, 5, 4); + const __m256i permute = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0); + + __m256i t8, t9; + uint64_t in89; + + in8 = x[8]; + in9 = x[13]; /* see arrays above for the address translation */ + in89 = ((uint64_t)in8) | (((uint64_t)in9) << 32); + + z8 = z9 = _mm256_broadcastq_epi64(_mm_cvtsi64_si128(in89)); + + t8 = _mm256_add_epi64(addv8, z8); + t9 = _mm256_add_epi64(addv9, z9); + + z8 = _mm256_unpacklo_epi32(t8, t9); + z9 = _mm256_unpackhi_epi32(t8, t9); + + t8 = _mm256_unpacklo_epi32(z8, z9); + t9 = _mm256_unpackhi_epi32(z8, z9); + + /* required because unpack* are intra-lane */ + z8 = _mm256_permutevar8x32_epi32(t8, permute); + z9 = _mm256_permutevar8x32_epi32(t9, permute); + + orig8 = z8; + orig9 = z9; + + in89 += 8; + + x[8] = in89 & 0xFFFFFFFF; + x[13] = (in89 >> 32) & 0xFFFFFFFF; + + z5 = orig5; + z10 = orig10; + z15 = orig15; + z14 = orig14; + z3 = orig3; + z6 = orig6; + z11 = orig11; + z1 = orig1; + + z7 = orig7; + z13 = orig13; + z2 = orig2; + z9 = orig9; + z0 = orig0; + z12 = orig12; + z4 = orig4; + z8 = orig8; + + for(i = 0; i < ROUNDS; i += 2) + { + /* the inner loop is a direct translation (regexp search/replace) + * from the amd64-xmm6 ASM */ + __m256i r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, + r15; + + y4 = z12; + y4 = _mm256_add_epi32(y4, z0); + r4 = y4; + y4 = _mm256_slli_epi32(y4, 7); + z4 = _mm256_xor_si256(z4, y4); + r4 = _mm256_srli_epi32(r4, 25); + z4 = _mm256_xor_si256(z4, r4); + + y9 = z1; + y9 = _mm256_add_epi32(y9, z5); + r9 = y9; + y9 = _mm256_slli_epi32(y9, 7); + z9 = _mm256_xor_si256(z9, y9); + r9 = _mm256_srli_epi32(r9, 25); + z9 = _mm256_xor_si256(z9, r9); + + y8 = z0; + y8 = _mm256_add_epi32(y8, z4); + r8 = y8; + y8 = _mm256_slli_epi32(y8, 9); + z8 = _mm256_xor_si256(z8, y8); + r8 = _mm256_srli_epi32(r8, 23); + z8 = _mm256_xor_si256(z8, r8); + + y13 = z5; + y13 = _mm256_add_epi32(y13, z9); + r13 = y13; + y13 = _mm256_slli_epi32(y13, 9); + z13 = _mm256_xor_si256(z13, y13); + r13 = _mm256_srli_epi32(r13, 23); + z13 = _mm256_xor_si256(z13, r13); + + y12 = z4; + y12 = _mm256_add_epi32(y12, z8); + r12 = y12; + y12 = _mm256_slli_epi32(y12, 13); + z12 = _mm256_xor_si256(z12, y12); + r12 = _mm256_srli_epi32(r12, 19); + z12 = _mm256_xor_si256(z12, r12); + + y1 = z9; + y1 = _mm256_add_epi32(y1, z13); + r1 = y1; + y1 = _mm256_slli_epi32(y1, 13); + z1 = _mm256_xor_si256(z1, y1); + r1 = _mm256_srli_epi32(r1, 19); + z1 = _mm256_xor_si256(z1, r1); + + y0 = z8; + y0 = _mm256_add_epi32(y0, z12); + r0 = y0; + y0 = _mm256_slli_epi32(y0, 18); + z0 = _mm256_xor_si256(z0, y0); + r0 = _mm256_srli_epi32(r0, 14); + z0 = _mm256_xor_si256(z0, r0); + + y5 = z13; + y5 = _mm256_add_epi32(y5, z1); + r5 = y5; + y5 = _mm256_slli_epi32(y5, 18); + z5 = _mm256_xor_si256(z5, y5); + r5 = _mm256_srli_epi32(r5, 14); + z5 = _mm256_xor_si256(z5, r5); + + y14 = z6; + y14 = _mm256_add_epi32(y14, z10); + r14 = y14; + y14 = _mm256_slli_epi32(y14, 7); + z14 = _mm256_xor_si256(z14, y14); + r14 = _mm256_srli_epi32(r14, 25); + z14 = _mm256_xor_si256(z14, r14); + + y3 = z11; + y3 = _mm256_add_epi32(y3, z15); + r3 = y3; + y3 = _mm256_slli_epi32(y3, 7); + z3 = _mm256_xor_si256(z3, y3); + r3 = _mm256_srli_epi32(r3, 25); + z3 = _mm256_xor_si256(z3, r3); + + y2 = z10; + y2 = _mm256_add_epi32(y2, z14); + r2 = y2; + y2 = _mm256_slli_epi32(y2, 9); + z2 = _mm256_xor_si256(z2, y2); + r2 = _mm256_srli_epi32(r2, 23); + z2 = _mm256_xor_si256(z2, r2); + + y7 = z15; + y7 = _mm256_add_epi32(y7, z3); + r7 = y7; + y7 = _mm256_slli_epi32(y7, 9); + z7 = _mm256_xor_si256(z7, y7); + r7 = _mm256_srli_epi32(r7, 23); + z7 = _mm256_xor_si256(z7, r7); + + y6 = z14; + y6 = _mm256_add_epi32(y6, z2); + r6 = y6; + y6 = _mm256_slli_epi32(y6, 13); + z6 = _mm256_xor_si256(z6, y6); + r6 = _mm256_srli_epi32(r6, 19); + z6 = _mm256_xor_si256(z6, r6); + + y11 = z3; + y11 = _mm256_add_epi32(y11, z7); + r11 = y11; + y11 = _mm256_slli_epi32(y11, 13); + z11 = _mm256_xor_si256(z11, y11); + r11 = _mm256_srli_epi32(r11, 19); + z11 = _mm256_xor_si256(z11, r11); + + y10 = z2; + y10 = _mm256_add_epi32(y10, z6); + r10 = y10; + y10 = _mm256_slli_epi32(y10, 18); + z10 = _mm256_xor_si256(z10, y10); + r10 = _mm256_srli_epi32(r10, 14); + z10 = _mm256_xor_si256(z10, r10); + + y1 = z3; + y1 = _mm256_add_epi32(y1, z0); + r1 = y1; + y1 = _mm256_slli_epi32(y1, 7); + z1 = _mm256_xor_si256(z1, y1); + r1 = _mm256_srli_epi32(r1, 25); + z1 = _mm256_xor_si256(z1, r1); + + y15 = z7; + y15 = _mm256_add_epi32(y15, z11); + r15 = y15; + y15 = _mm256_slli_epi32(y15, 18); + z15 = _mm256_xor_si256(z15, y15); + r15 = _mm256_srli_epi32(r15, 14); + z15 = _mm256_xor_si256(z15, r15); + + y6 = z4; + y6 = _mm256_add_epi32(y6, z5); + r6 = y6; + y6 = _mm256_slli_epi32(y6, 7); + z6 = _mm256_xor_si256(z6, y6); + r6 = _mm256_srli_epi32(r6, 25); + z6 = _mm256_xor_si256(z6, r6); + + y2 = z0; + y2 = _mm256_add_epi32(y2, z1); + r2 = y2; + y2 = _mm256_slli_epi32(y2, 9); + z2 = _mm256_xor_si256(z2, y2); + r2 = _mm256_srli_epi32(r2, 23); + z2 = _mm256_xor_si256(z2, r2); + + y7 = z5; + y7 = _mm256_add_epi32(y7, z6); + r7 = y7; + y7 = _mm256_slli_epi32(y7, 9); + z7 = _mm256_xor_si256(z7, y7); + r7 = _mm256_srli_epi32(r7, 23); + z7 = _mm256_xor_si256(z7, r7); + + y3 = z1; + y3 = _mm256_add_epi32(y3, z2); + r3 = y3; + y3 = _mm256_slli_epi32(y3, 13); + z3 = _mm256_xor_si256(z3, y3); + r3 = _mm256_srli_epi32(r3, 19); + z3 = _mm256_xor_si256(z3, r3); + + y4 = z6; + y4 = _mm256_add_epi32(y4, z7); + r4 = y4; + y4 = _mm256_slli_epi32(y4, 13); + z4 = _mm256_xor_si256(z4, y4); + r4 = _mm256_srli_epi32(r4, 19); + z4 = _mm256_xor_si256(z4, r4); + + y0 = z2; + y0 = _mm256_add_epi32(y0, z3); + r0 = y0; + y0 = _mm256_slli_epi32(y0, 18); + z0 = _mm256_xor_si256(z0, y0); + r0 = _mm256_srli_epi32(r0, 14); + z0 = _mm256_xor_si256(z0, r0); + + y5 = z7; + y5 = _mm256_add_epi32(y5, z4); + r5 = y5; + y5 = _mm256_slli_epi32(y5, 18); + z5 = _mm256_xor_si256(z5, y5); + r5 = _mm256_srli_epi32(r5, 14); + z5 = _mm256_xor_si256(z5, r5); + + y11 = z9; + y11 = _mm256_add_epi32(y11, z10); + r11 = y11; + y11 = _mm256_slli_epi32(y11, 7); + z11 = _mm256_xor_si256(z11, y11); + r11 = _mm256_srli_epi32(r11, 25); + z11 = _mm256_xor_si256(z11, r11); + + y12 = z14; + y12 = _mm256_add_epi32(y12, z15); + r12 = y12; + y12 = _mm256_slli_epi32(y12, 7); + z12 = _mm256_xor_si256(z12, y12); + r12 = _mm256_srli_epi32(r12, 25); + z12 = _mm256_xor_si256(z12, r12); + + y8 = z10; + y8 = _mm256_add_epi32(y8, z11); + r8 = y8; + y8 = _mm256_slli_epi32(y8, 9); + z8 = _mm256_xor_si256(z8, y8); + r8 = _mm256_srli_epi32(r8, 23); + z8 = _mm256_xor_si256(z8, r8); + + y13 = z15; + y13 = _mm256_add_epi32(y13, z12); + r13 = y13; + y13 = _mm256_slli_epi32(y13, 9); + z13 = _mm256_xor_si256(z13, y13); + r13 = _mm256_srli_epi32(r13, 23); + z13 = _mm256_xor_si256(z13, r13); + + y9 = z11; + y9 = _mm256_add_epi32(y9, z8); + r9 = y9; + y9 = _mm256_slli_epi32(y9, 13); + z9 = _mm256_xor_si256(z9, y9); + r9 = _mm256_srli_epi32(r9, 19); + z9 = _mm256_xor_si256(z9, r9); + + y14 = z12; + y14 = _mm256_add_epi32(y14, z13); + r14 = y14; + y14 = _mm256_slli_epi32(y14, 13); + z14 = _mm256_xor_si256(z14, y14); + r14 = _mm256_srli_epi32(r14, 19); + z14 = _mm256_xor_si256(z14, r14); + + y10 = z8; + y10 = _mm256_add_epi32(y10, z9); + r10 = y10; + y10 = _mm256_slli_epi32(y10, 18); + z10 = _mm256_xor_si256(z10, y10); + r10 = _mm256_srli_epi32(r10, 14); + z10 = _mm256_xor_si256(z10, r10); + + y15 = z13; + y15 = _mm256_add_epi32(y15, z14); + r15 = y15; + y15 = _mm256_slli_epi32(y15, 18); + z15 = _mm256_xor_si256(z15, y15); + r15 = _mm256_srli_epi32(r15, 14); + z15 = _mm256_xor_si256(z15, r15); + } /* store data ; this macro first transpose data in-registers, and then store * them in memory. much faster with icc. */ -#define ONEQUAD_TRANSPOSE(A, B, C, D) \ - { \ - __m128i t0, t1, t2, t3; \ - z##A = _mm256_add_epi32(z##A, orig##A); \ - z##B = _mm256_add_epi32(z##B, orig##B); \ - z##C = _mm256_add_epi32(z##C, orig##C); \ - z##D = _mm256_add_epi32(z##D, orig##D); \ - y##A = _mm256_unpacklo_epi32(z##A, z##B); \ - y##B = _mm256_unpacklo_epi32(z##C, z##D); \ - y##C = _mm256_unpackhi_epi32(z##A, z##B); \ - y##D = _mm256_unpackhi_epi32(z##C, z##D); \ - z##A = _mm256_unpacklo_epi64(y##A, y##B); \ - z##B = _mm256_unpackhi_epi64(y##A, y##B); \ - z##C = _mm256_unpacklo_epi64(y##C, y##D); \ - z##D = _mm256_unpackhi_epi64(y##C, y##D); \ - t0 = _mm_xor_si128(_mm256_extracti128_si256(z##A, 0), \ - _mm_loadu_si128((__m128i*) (m + 0))); \ - _mm_storeu_si128((__m128i*) (c + 0), t0); \ - t1 = _mm_xor_si128(_mm256_extracti128_si256(z##B, 0), \ - _mm_loadu_si128((__m128i*) (m + 64))); \ - _mm_storeu_si128((__m128i*) (c + 64), t1); \ - t2 = _mm_xor_si128(_mm256_extracti128_si256(z##C, 0), \ - _mm_loadu_si128((__m128i*) (m + 128))); \ - _mm_storeu_si128((__m128i*) (c + 128), t2); \ - t3 = _mm_xor_si128(_mm256_extracti128_si256(z##D, 0), \ - _mm_loadu_si128((__m128i*) (m + 192))); \ - _mm_storeu_si128((__m128i*) (c + 192), t3); \ - t0 = _mm_xor_si128(_mm256_extracti128_si256(z##A, 1), \ - _mm_loadu_si128((__m128i*) (m + 256))); \ - _mm_storeu_si128((__m128i*) (c + 256), t0); \ - t1 = _mm_xor_si128(_mm256_extracti128_si256(z##B, 1), \ - _mm_loadu_si128((__m128i*) (m + 320))); \ - _mm_storeu_si128((__m128i*) (c + 320), t1); \ - t2 = _mm_xor_si128(_mm256_extracti128_si256(z##C, 1), \ - _mm_loadu_si128((__m128i*) (m + 384))); \ - _mm_storeu_si128((__m128i*) (c + 384), t2); \ - t3 = _mm_xor_si128(_mm256_extracti128_si256(z##D, 1), \ - _mm_loadu_si128((__m128i*) (m + 448))); \ - _mm_storeu_si128((__m128i*) (c + 448), t3); \ - } +#define ONEQUAD_TRANSPOSE(A, B, C, D) \ + { \ + __m128i t0, t1, t2, t3; \ + z##A = _mm256_add_epi32(z##A, orig##A); \ + z##B = _mm256_add_epi32(z##B, orig##B); \ + z##C = _mm256_add_epi32(z##C, orig##C); \ + z##D = _mm256_add_epi32(z##D, orig##D); \ + y##A = _mm256_unpacklo_epi32(z##A, z##B); \ + y##B = _mm256_unpacklo_epi32(z##C, z##D); \ + y##C = _mm256_unpackhi_epi32(z##A, z##B); \ + y##D = _mm256_unpackhi_epi32(z##C, z##D); \ + z##A = _mm256_unpacklo_epi64(y##A, y##B); \ + z##B = _mm256_unpackhi_epi64(y##A, y##B); \ + z##C = _mm256_unpacklo_epi64(y##C, y##D); \ + z##D = _mm256_unpackhi_epi64(y##C, y##D); \ + t0 = _mm_xor_si128(_mm256_extracti128_si256(z##A, 0), \ + _mm_loadu_si128((__m128i*)(m + 0))); \ + _mm_storeu_si128((__m128i*)(c + 0), t0); \ + t1 = _mm_xor_si128(_mm256_extracti128_si256(z##B, 0), \ + _mm_loadu_si128((__m128i*)(m + 64))); \ + _mm_storeu_si128((__m128i*)(c + 64), t1); \ + t2 = _mm_xor_si128(_mm256_extracti128_si256(z##C, 0), \ + _mm_loadu_si128((__m128i*)(m + 128))); \ + _mm_storeu_si128((__m128i*)(c + 128), t2); \ + t3 = _mm_xor_si128(_mm256_extracti128_si256(z##D, 0), \ + _mm_loadu_si128((__m128i*)(m + 192))); \ + _mm_storeu_si128((__m128i*)(c + 192), t3); \ + t0 = _mm_xor_si128(_mm256_extracti128_si256(z##A, 1), \ + _mm_loadu_si128((__m128i*)(m + 256))); \ + _mm_storeu_si128((__m128i*)(c + 256), t0); \ + t1 = _mm_xor_si128(_mm256_extracti128_si256(z##B, 1), \ + _mm_loadu_si128((__m128i*)(m + 320))); \ + _mm_storeu_si128((__m128i*)(c + 320), t1); \ + t2 = _mm_xor_si128(_mm256_extracti128_si256(z##C, 1), \ + _mm_loadu_si128((__m128i*)(m + 384))); \ + _mm_storeu_si128((__m128i*)(c + 384), t2); \ + t3 = _mm_xor_si128(_mm256_extracti128_si256(z##D, 1), \ + _mm_loadu_si128((__m128i*)(m + 448))); \ + _mm_storeu_si128((__m128i*)(c + 448), t3); \ + } #define ONEQUAD(A, B, C, D) ONEQUAD_TRANSPOSE(A, B, C, D) -#define ONEQUAD_UNPCK(A, B, C, D) \ - { \ - z##A = _mm256_add_epi32(z##A, orig##A); \ - z##B = _mm256_add_epi32(z##B, orig##B); \ - z##C = _mm256_add_epi32(z##C, orig##C); \ - z##D = _mm256_add_epi32(z##D, orig##D); \ - y##A = _mm256_unpacklo_epi32(z##A, z##B); \ - y##B = _mm256_unpacklo_epi32(z##C, z##D); \ - y##C = _mm256_unpackhi_epi32(z##A, z##B); \ - y##D = _mm256_unpackhi_epi32(z##C, z##D); \ - z##A = _mm256_unpacklo_epi64(y##A, y##B); \ - z##B = _mm256_unpackhi_epi64(y##A, y##B); \ - z##C = _mm256_unpacklo_epi64(y##C, y##D); \ - z##D = _mm256_unpackhi_epi64(y##C, y##D); \ - } - -#define ONEOCTO(A, B, C, D, A2, B2, C2, D2) \ - { \ - ONEQUAD_UNPCK(A, B, C, D); \ - ONEQUAD_UNPCK(A2, B2, C2, D2); \ - y##A = _mm256_permute2x128_si256(z##A, z##A2, 0x20); \ - y##A2 = _mm256_permute2x128_si256(z##A, z##A2, 0x31); \ - y##B = _mm256_permute2x128_si256(z##B, z##B2, 0x20); \ - y##B2 = _mm256_permute2x128_si256(z##B, z##B2, 0x31); \ - y##C = _mm256_permute2x128_si256(z##C, z##C2, 0x20); \ - y##C2 = _mm256_permute2x128_si256(z##C, z##C2, 0x31); \ - y##D = _mm256_permute2x128_si256(z##D, z##D2, 0x20); \ - y##D2 = _mm256_permute2x128_si256(z##D, z##D2, 0x31); \ - y##A = _mm256_xor_si256(y##A, _mm256_loadu_si256((__m256i*) (m + 0))); \ - y##B = \ - _mm256_xor_si256(y##B, _mm256_loadu_si256((__m256i*) (m + 64))); \ - y##C = \ - _mm256_xor_si256(y##C, _mm256_loadu_si256((__m256i*) (m + 128))); \ - y##D = \ - _mm256_xor_si256(y##D, _mm256_loadu_si256((__m256i*) (m + 192))); \ - y##A2 = \ - _mm256_xor_si256(y##A2, _mm256_loadu_si256((__m256i*) (m + 256))); \ - y##B2 = \ - _mm256_xor_si256(y##B2, _mm256_loadu_si256((__m256i*) (m + 320))); \ - y##C2 = \ - _mm256_xor_si256(y##C2, _mm256_loadu_si256((__m256i*) (m + 384))); \ - y##D2 = \ - _mm256_xor_si256(y##D2, _mm256_loadu_si256((__m256i*) (m + 448))); \ - _mm256_storeu_si256((__m256i*) (c + 0), y##A); \ - _mm256_storeu_si256((__m256i*) (c + 64), y##B); \ - _mm256_storeu_si256((__m256i*) (c + 128), y##C); \ - _mm256_storeu_si256((__m256i*) (c + 192), y##D); \ - _mm256_storeu_si256((__m256i*) (c + 256), y##A2); \ - _mm256_storeu_si256((__m256i*) (c + 320), y##B2); \ - _mm256_storeu_si256((__m256i*) (c + 384), y##C2); \ - _mm256_storeu_si256((__m256i*) (c + 448), y##D2); \ - } - - ONEOCTO(0, 1, 2, 3, 4, 5, 6, 7); - m += 32; - c += 32; - ONEOCTO(8, 9, 10, 11, 12, 13, 14, 15); - m -= 32; - c -= 32; +#define ONEQUAD_UNPCK(A, B, C, D) \ + { \ + z##A = _mm256_add_epi32(z##A, orig##A); \ + z##B = _mm256_add_epi32(z##B, orig##B); \ + z##C = _mm256_add_epi32(z##C, orig##C); \ + z##D = _mm256_add_epi32(z##D, orig##D); \ + y##A = _mm256_unpacklo_epi32(z##A, z##B); \ + y##B = _mm256_unpacklo_epi32(z##C, z##D); \ + y##C = _mm256_unpackhi_epi32(z##A, z##B); \ + y##D = _mm256_unpackhi_epi32(z##C, z##D); \ + z##A = _mm256_unpacklo_epi64(y##A, y##B); \ + z##B = _mm256_unpackhi_epi64(y##A, y##B); \ + z##C = _mm256_unpacklo_epi64(y##C, y##D); \ + z##D = _mm256_unpackhi_epi64(y##C, y##D); \ + } + +#define ONEOCTO(A, B, C, D, A2, B2, C2, D2) \ + { \ + ONEQUAD_UNPCK(A, B, C, D); \ + ONEQUAD_UNPCK(A2, B2, C2, D2); \ + y##A = _mm256_permute2x128_si256(z##A, z##A2, 0x20); \ + y##A2 = _mm256_permute2x128_si256(z##A, z##A2, 0x31); \ + y##B = _mm256_permute2x128_si256(z##B, z##B2, 0x20); \ + y##B2 = _mm256_permute2x128_si256(z##B, z##B2, 0x31); \ + y##C = _mm256_permute2x128_si256(z##C, z##C2, 0x20); \ + y##C2 = _mm256_permute2x128_si256(z##C, z##C2, 0x31); \ + y##D = _mm256_permute2x128_si256(z##D, z##D2, 0x20); \ + y##D2 = _mm256_permute2x128_si256(z##D, z##D2, 0x31); \ + y##A = _mm256_xor_si256(y##A, _mm256_loadu_si256((__m256i*)(m + 0))); \ + y##B = _mm256_xor_si256(y##B, _mm256_loadu_si256((__m256i*)(m + 64))); \ + y##C = _mm256_xor_si256(y##C, _mm256_loadu_si256((__m256i*)(m + 128))); \ + y##D = _mm256_xor_si256(y##D, _mm256_loadu_si256((__m256i*)(m + 192))); \ + y##A2 = _mm256_xor_si256(y##A2, _mm256_loadu_si256((__m256i*)(m + 256))); \ + y##B2 = _mm256_xor_si256(y##B2, _mm256_loadu_si256((__m256i*)(m + 320))); \ + y##C2 = _mm256_xor_si256(y##C2, _mm256_loadu_si256((__m256i*)(m + 384))); \ + y##D2 = _mm256_xor_si256(y##D2, _mm256_loadu_si256((__m256i*)(m + 448))); \ + _mm256_storeu_si256((__m256i*)(c + 0), y##A); \ + _mm256_storeu_si256((__m256i*)(c + 64), y##B); \ + _mm256_storeu_si256((__m256i*)(c + 128), y##C); \ + _mm256_storeu_si256((__m256i*)(c + 192), y##D); \ + _mm256_storeu_si256((__m256i*)(c + 256), y##A2); \ + _mm256_storeu_si256((__m256i*)(c + 320), y##B2); \ + _mm256_storeu_si256((__m256i*)(c + 384), y##C2); \ + _mm256_storeu_si256((__m256i*)(c + 448), y##D2); \ + } + + ONEOCTO(0, 1, 2, 3, 4, 5, 6, 7); + m += 32; + c += 32; + ONEOCTO(8, 9, 10, 11, 12, 13, 14, 15); + m -= 32; + c -= 32; #undef ONEQUAD #undef ONEQUAD_TRANSPOSE #undef ONEQUAD_UNPCK #undef ONEOCTO - bytes -= 512; - c += 512; - m += 512; - } + bytes -= 512; + c += 512; + m += 512; + } }