mirror of
https://github.com/oxen-io/lokinet.git
synced 2024-11-17 15:25:35 +00:00
90 lines
2.6 KiB
C
90 lines
2.6 KiB
C
|
|
#include <stdint.h>
|
|
#include <string.h>
|
|
|
|
#include "blake2.h"
|
|
#include <sodium/private/common.h>
|
|
#include <sodium/private/sse2_64_32.h>
|
|
|
|
#if defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H)
|
|
|
|
#ifdef __GNUC__
|
|
#pragma GCC target("sse2")
|
|
#pragma GCC target("ssse3")
|
|
#endif
|
|
|
|
#include <emmintrin.h>
|
|
#include <tmmintrin.h>
|
|
|
|
#include "blake2b-compress-ssse3.h"
|
|
|
|
CRYPTO_ALIGN(64)
|
|
static const uint64_t blake2b_IV[8] = {
|
|
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 0x3c6ef372fe94f82bULL,
|
|
0xa54ff53a5f1d36f1ULL, 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
|
|
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL};
|
|
|
|
int
|
|
blake2b_compress_ssse3(blake2b_state *S,
|
|
const uint8_t block[BLAKE2B_BLOCKBYTES])
|
|
{
|
|
__m128i row1l, row1h;
|
|
__m128i row2l, row2h;
|
|
__m128i row3l, row3h;
|
|
__m128i row4l, row4h;
|
|
__m128i b0, b1;
|
|
__m128i t0, t1;
|
|
const __m128i r16 =
|
|
_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
|
|
const __m128i r24 =
|
|
_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
|
|
const uint64_t m0 = ((uint64_t *)block)[0];
|
|
const uint64_t m1 = ((uint64_t *)block)[1];
|
|
const uint64_t m2 = ((uint64_t *)block)[2];
|
|
const uint64_t m3 = ((uint64_t *)block)[3];
|
|
const uint64_t m4 = ((uint64_t *)block)[4];
|
|
const uint64_t m5 = ((uint64_t *)block)[5];
|
|
const uint64_t m6 = ((uint64_t *)block)[6];
|
|
const uint64_t m7 = ((uint64_t *)block)[7];
|
|
const uint64_t m8 = ((uint64_t *)block)[8];
|
|
const uint64_t m9 = ((uint64_t *)block)[9];
|
|
const uint64_t m10 = ((uint64_t *)block)[10];
|
|
const uint64_t m11 = ((uint64_t *)block)[11];
|
|
const uint64_t m12 = ((uint64_t *)block)[12];
|
|
const uint64_t m13 = ((uint64_t *)block)[13];
|
|
const uint64_t m14 = ((uint64_t *)block)[14];
|
|
const uint64_t m15 = ((uint64_t *)block)[15];
|
|
|
|
row1l = LOADU(&S->h[0]);
|
|
row1h = LOADU(&S->h[2]);
|
|
row2l = LOADU(&S->h[4]);
|
|
row2h = LOADU(&S->h[6]);
|
|
row3l = LOADU(&blake2b_IV[0]);
|
|
row3h = LOADU(&blake2b_IV[2]);
|
|
row4l = _mm_xor_si128(LOADU(&blake2b_IV[4]), LOADU(&S->t[0]));
|
|
row4h = _mm_xor_si128(LOADU(&blake2b_IV[6]), LOADU(&S->f[0]));
|
|
ROUND(0);
|
|
ROUND(1);
|
|
ROUND(2);
|
|
ROUND(3);
|
|
ROUND(4);
|
|
ROUND(5);
|
|
ROUND(6);
|
|
ROUND(7);
|
|
ROUND(8);
|
|
ROUND(9);
|
|
ROUND(10);
|
|
ROUND(11);
|
|
row1l = _mm_xor_si128(row3l, row1l);
|
|
row1h = _mm_xor_si128(row3h, row1h);
|
|
STOREU(&S->h[0], _mm_xor_si128(LOADU(&S->h[0]), row1l));
|
|
STOREU(&S->h[2], _mm_xor_si128(LOADU(&S->h[2]), row1h));
|
|
row2l = _mm_xor_si128(row4l, row2l);
|
|
row2h = _mm_xor_si128(row4h, row2h);
|
|
STOREU(&S->h[4], _mm_xor_si128(LOADU(&S->h[4]), row2l));
|
|
STOREU(&S->h[6], _mm_xor_si128(LOADU(&S->h[6]), row2h));
|
|
return 0;
|
|
}
|
|
|
|
#endif
|