make format crypto/

pull/787/head
Rick V 5 years ago
parent 842fe039bc
commit 595b15d538
No known key found for this signature in database
GPG Key ID: C0EDC8723FDC3465

@ -20,12 +20,12 @@ LOADU64(const void *p)
}
#define ROTATE16 \
_mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, \
3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9)
_mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, 3, \
4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9)
#define ROTATE24 \
_mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, \
4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10)
_mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, \
5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10)
#define ADD(a, b) _mm256_add_epi64(a, b)
#define SUB(a, b) _mm256_sub_epi64(a, b)
@ -40,7 +40,8 @@ LOADU64(const void *p)
#define ROT63(x) _mm256_or_si256(_mm256_srli_epi64((x), 63), ADD((x), (x)))
#define BLAKE2B_G1_V1(a, b, c, d, m) \
do { \
do \
{ \
a = ADD(a, m); \
a = ADD(a, b); \
d = XOR(d, a); \
@ -51,7 +52,8 @@ LOADU64(const void *p)
} while(0)
#define BLAKE2B_G2_V1(a, b, c, d, m) \
do { \
do \
{ \
a = ADD(a, m); \
a = ADD(a, b); \
d = XOR(d, a); \
@ -62,14 +64,16 @@ LOADU64(const void *p)
} while(0)
#define BLAKE2B_DIAG_V1(a, b, c, d) \
do { \
do \
{ \
d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(2, 1, 0, 3)); \
c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(1, 0, 3, 2)); \
b = _mm256_permute4x64_epi64(b, _MM_SHUFFLE(0, 3, 2, 1)); \
} while(0)
#define BLAKE2B_UNDIAG_V1(a, b, c, d) \
do { \
do \
{ \
d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(0, 3, 2, 1)); \
c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(1, 0, 3, 2)); \
b = _mm256_permute4x64_epi64(b, _MM_SHUFFLE(2, 1, 0, 3)); \
@ -78,7 +82,8 @@ LOADU64(const void *p)
#include "blake2b-load-avx2.h"
#define BLAKE2B_ROUND_V1(a, b, c, d, r, m) \
do { \
do \
{ \
__m256i b0; \
BLAKE2B_LOAD_MSG_##r##_1(b0); \
BLAKE2B_G1_V1(a, b, c, d, b0); \
@ -93,7 +98,8 @@ LOADU64(const void *p)
} while(0)
#define BLAKE2B_ROUNDS_V1(a, b, c, d, m) \
do { \
do \
{ \
BLAKE2B_ROUND_V1(a, b, c, d, 0, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 1, (m)); \
BLAKE2B_ROUND_V1(a, b, c, d, 2, (m)); \
@ -120,13 +126,13 @@ LOADU64(const void *p)
__m256i t0, t1;
#define BLAKE2B_COMPRESS_V1(a, b, m, t0, t1, f0, f1) \
do { \
do \
{ \
DECLARE_MESSAGE_WORDS(m) \
const __m256i iv0 = a; \
const __m256i iv1 = b; \
__m256i c = LOAD(&blake2b_IV[0]); \
__m256i d = \
XOR(LOAD(&blake2b_IV[4]), _mm256_set_epi64x(f1, f0, t1, t0)); \
__m256i d = XOR(LOAD(&blake2b_IV[4]), _mm256_set_epi64x(f1, f0, t1, t0)); \
BLAKE2B_ROUNDS_V1(a, b, c, d, m); \
a = XOR(a, c); \
b = XOR(b, d); \

@ -6,14 +6,11 @@
#define STOREU(p, r) _mm_storeu_si128((__m128i *)(void *)(p), r)
#define _mm_roti_epi64(x, c) \
(-(c) == 32) \
? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \
: (-(c) == 24) \
? _mm_shuffle_epi8((x), r24) \
(-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \
: (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \
: (-(c) == 16) \
? _mm_shuffle_epi8((x), r16) \
: (-(c) == 63) \
? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
: (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
_mm_add_epi64((x), (x))) \
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
_mm_slli_epi64((x), 64 - (-(c))))

@ -6,14 +6,11 @@
#define STOREU(p, r) _mm_storeu_si128((__m128i *)(void *)(p), r)
#define _mm_roti_epi64(x, c) \
(-(c) == 32) \
? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \
: (-(c) == 24) \
? _mm_shuffle_epi8((x), r24) \
(-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \
: (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \
: (-(c) == 16) \
? _mm_shuffle_epi8((x), r16) \
: (-(c) == 63) \
? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
: (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
_mm_add_epi64((x), (x))) \
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
_mm_slli_epi64((x), 64 - (-(c))))

@ -2,336 +2,384 @@
#define blake2b_load_avx2_H
#define BLAKE2B_LOAD_MSG_0_1(b0) \
do { \
do \
{ \
t0 = _mm256_unpacklo_epi64(m0, m1); \
t1 = _mm256_unpacklo_epi64(m2, m3); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_0_2(b0) \
do { \
do \
{ \
t0 = _mm256_unpackhi_epi64(m0, m1); \
t1 = _mm256_unpackhi_epi64(m2, m3); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_0_3(b0) \
do { \
do \
{ \
t0 = _mm256_unpacklo_epi64(m4, m5); \
t1 = _mm256_unpacklo_epi64(m6, m7); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_0_4(b0) \
do { \
do \
{ \
t0 = _mm256_unpackhi_epi64(m4, m5); \
t1 = _mm256_unpackhi_epi64(m6, m7); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_1_1(b0) \
do { \
do \
{ \
t0 = _mm256_unpacklo_epi64(m7, m2); \
t1 = _mm256_unpackhi_epi64(m4, m6); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_1_2(b0) \
do { \
do \
{ \
t0 = _mm256_unpacklo_epi64(m5, m4); \
t1 = _mm256_alignr_epi8(m3, m7, 8); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_1_3(b0) \
do { \
do \
{ \
t0 = _mm256_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \
t1 = _mm256_unpackhi_epi64(m5, m2); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_1_4(b0) \
do { \
do \
{ \
t0 = _mm256_unpacklo_epi64(m6, m1); \
t1 = _mm256_unpackhi_epi64(m3, m1); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_2_1(b0) \
do { \
do \
{ \
t0 = _mm256_alignr_epi8(m6, m5, 8); \
t1 = _mm256_unpackhi_epi64(m2, m7); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_2_2(b0) \
do { \
do \
{ \
t0 = _mm256_unpacklo_epi64(m4, m0); \
t1 = _mm256_blend_epi32(m6, m1, 0x33); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_2_3(b0) \
do { \
do \
{ \
t0 = _mm256_blend_epi32(m1, m5, 0x33); \
t1 = _mm256_unpackhi_epi64(m3, m4); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_2_4(b0) \
do { \
do \
{ \
t0 = _mm256_unpacklo_epi64(m7, m3); \
t1 = _mm256_alignr_epi8(m2, m0, 8); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_3_1(b0) \
do { \
do \
{ \
t0 = _mm256_unpackhi_epi64(m3, m1); \
t1 = _mm256_unpackhi_epi64(m6, m5); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_3_2(b0) \
do { \
do \
{ \
t0 = _mm256_unpackhi_epi64(m4, m0); \
t1 = _mm256_unpacklo_epi64(m6, m7); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_3_3(b0) \
do { \
do \
{ \
t0 = _mm256_blend_epi32(m2, m1, 0x33); \
t1 = _mm256_blend_epi32(m7, m2, 0x33); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_3_4(b0) \
do { \
do \
{ \
t0 = _mm256_unpacklo_epi64(m3, m5); \
t1 = _mm256_unpacklo_epi64(m0, m4); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_4_1(b0) \
do { \
do \
{ \
t0 = _mm256_unpackhi_epi64(m4, m2); \
t1 = _mm256_unpacklo_epi64(m1, m5); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_4_2(b0) \
do { \
do \
{ \
t0 = _mm256_blend_epi32(m3, m0, 0x33); \
t1 = _mm256_blend_epi32(m7, m2, 0x33); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_4_3(b0) \
do { \
do \
{ \
t0 = _mm256_blend_epi32(m5, m7, 0x33); \
t1 = _mm256_blend_epi32(m1, m3, 0x33); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_4_4(b0) \
do { \
do \
{ \
t0 = _mm256_alignr_epi8(m6, m0, 8); \
t1 = _mm256_blend_epi32(m6, m4, 0x33); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_5_1(b0) \
do { \
do \
{ \
t0 = _mm256_unpacklo_epi64(m1, m3); \
t1 = _mm256_unpacklo_epi64(m0, m4); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_5_2(b0) \
do { \
do \
{ \
t0 = _mm256_unpacklo_epi64(m6, m5); \
t1 = _mm256_unpackhi_epi64(m5, m1); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_5_3(b0) \
do { \
do \
{ \
t0 = _mm256_blend_epi32(m3, m2, 0x33); \
t1 = _mm256_unpackhi_epi64(m7, m0); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_5_4(b0) \
do { \
do \
{ \
t0 = _mm256_unpackhi_epi64(m6, m2); \
t1 = _mm256_blend_epi32(m4, m7, 0x33); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_6_1(b0) \
do { \
do \
{ \
t0 = _mm256_blend_epi32(m0, m6, 0x33); \
t1 = _mm256_unpacklo_epi64(m7, m2); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_6_2(b0) \
do { \
do \
{ \
t0 = _mm256_unpackhi_epi64(m2, m7); \
t1 = _mm256_alignr_epi8(m5, m6, 8); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_6_3(b0) \
do { \
do \
{ \
t0 = _mm256_unpacklo_epi64(m0, m3); \
t1 = _mm256_shuffle_epi32(m4, _MM_SHUFFLE(1, 0, 3, 2)); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_6_4(b0) \
do { \
do \
{ \
t0 = _mm256_unpackhi_epi64(m3, m1); \
t1 = _mm256_blend_epi32(m5, m1, 0x33); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_7_1(b0) \
do { \
do \
{ \
t0 = _mm256_unpackhi_epi64(m6, m3); \
t1 = _mm256_blend_epi32(m1, m6, 0x33); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_7_2(b0) \
do { \
do \
{ \
t0 = _mm256_alignr_epi8(m7, m5, 8); \
t1 = _mm256_unpackhi_epi64(m0, m4); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_7_3(b0) \
do { \
do \
{ \
t0 = _mm256_unpackhi_epi64(m2, m7); \
t1 = _mm256_unpacklo_epi64(m4, m1); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_7_4(b0) \
do { \
do \
{ \
t0 = _mm256_unpacklo_epi64(m0, m2); \
t1 = _mm256_unpacklo_epi64(m3, m5); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_8_1(b0) \
do { \
do \
{ \
t0 = _mm256_unpacklo_epi64(m3, m7); \
t1 = _mm256_alignr_epi8(m0, m5, 8); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_8_2(b0) \
do { \
do \
{ \
t0 = _mm256_unpackhi_epi64(m7, m4); \
t1 = _mm256_alignr_epi8(m4, m1, 8); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_8_3(b0) \
do { \
do \
{ \
t0 = m6; \
t1 = _mm256_alignr_epi8(m5, m0, 8); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_8_4(b0) \
do { \
do \
{ \
t0 = _mm256_blend_epi32(m3, m1, 0x33); \
t1 = m2; \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_9_1(b0) \
do { \
do \
{ \
t0 = _mm256_unpacklo_epi64(m5, m4); \
t1 = _mm256_unpackhi_epi64(m3, m0); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_9_2(b0) \
do { \
do \
{ \
t0 = _mm256_unpacklo_epi64(m1, m2); \
t1 = _mm256_blend_epi32(m2, m3, 0x33); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_9_3(b0) \
do { \
do \
{ \
t0 = _mm256_unpackhi_epi64(m7, m4); \
t1 = _mm256_unpackhi_epi64(m1, m6); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_9_4(b0) \
do { \
do \
{ \
t0 = _mm256_alignr_epi8(m7, m5, 8); \
t1 = _mm256_unpacklo_epi64(m6, m0); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_10_1(b0) \
do { \
do \
{ \
t0 = _mm256_unpacklo_epi64(m0, m1); \
t1 = _mm256_unpacklo_epi64(m2, m3); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_10_2(b0) \
do { \
do \
{ \
t0 = _mm256_unpackhi_epi64(m0, m1); \
t1 = _mm256_unpackhi_epi64(m2, m3); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_10_3(b0) \
do { \
do \
{ \
t0 = _mm256_unpacklo_epi64(m4, m5); \
t1 = _mm256_unpacklo_epi64(m6, m7); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_10_4(b0) \
do { \
do \
{ \
t0 = _mm256_unpackhi_epi64(m4, m5); \
t1 = _mm256_unpackhi_epi64(m6, m7); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_11_1(b0) \
do { \
do \
{ \
t0 = _mm256_unpacklo_epi64(m7, m2); \
t1 = _mm256_unpackhi_epi64(m4, m6); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_11_2(b0) \
do { \
do \
{ \
t0 = _mm256_unpacklo_epi64(m5, m4); \
t1 = _mm256_alignr_epi8(m3, m7, 8); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_11_3(b0) \
do { \
do \
{ \
t0 = _mm256_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \
t1 = _mm256_unpackhi_epi64(m5, m2); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
} while(0)
#define BLAKE2B_LOAD_MSG_11_4(b0) \
do { \
do \
{ \
t0 = _mm256_unpacklo_epi64(m6, m1); \
t1 = _mm256_unpackhi_epi64(m3, m1); \
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \

@ -17,289 +17,337 @@
#define blake2b_load_sse41_H
#define LOAD_MSG_0_1(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpacklo_epi64(m0, m1); \
b1 = _mm_unpacklo_epi64(m2, m3); \
} while(0)
#define LOAD_MSG_0_2(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpackhi_epi64(m0, m1); \
b1 = _mm_unpackhi_epi64(m2, m3); \
} while(0)
#define LOAD_MSG_0_3(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpacklo_epi64(m4, m5); \
b1 = _mm_unpacklo_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_0_4(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpackhi_epi64(m4, m5); \
b1 = _mm_unpackhi_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_1_1(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpacklo_epi64(m7, m2); \
b1 = _mm_unpackhi_epi64(m4, m6); \
} while(0)
#define LOAD_MSG_1_2(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpacklo_epi64(m5, m4); \
b1 = _mm_alignr_epi8(m3, m7, 8); \
} while(0)
#define LOAD_MSG_1_3(b0, b1) \
do { \
do \
{ \
b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \
b1 = _mm_unpackhi_epi64(m5, m2); \
} while(0)
#define LOAD_MSG_1_4(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpacklo_epi64(m6, m1); \
b1 = _mm_unpackhi_epi64(m3, m1); \
} while(0)
#define LOAD_MSG_2_1(b0, b1) \
do { \
do \
{ \
b0 = _mm_alignr_epi8(m6, m5, 8); \
b1 = _mm_unpackhi_epi64(m2, m7); \
} while(0)
#define LOAD_MSG_2_2(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpacklo_epi64(m4, m0); \
b1 = _mm_blend_epi16(m1, m6, 0xF0); \
} while(0)
#define LOAD_MSG_2_3(b0, b1) \
do { \
do \
{ \
b0 = _mm_blend_epi16(m5, m1, 0xF0); \
b1 = _mm_unpackhi_epi64(m3, m4); \
} while(0)
#define LOAD_MSG_2_4(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpacklo_epi64(m7, m3); \
b1 = _mm_alignr_epi8(m2, m0, 8); \
} while(0)
#define LOAD_MSG_3_1(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpackhi_epi64(m3, m1); \
b1 = _mm_unpackhi_epi64(m6, m5); \
} while(0)
#define LOAD_MSG_3_2(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpackhi_epi64(m4, m0); \
b1 = _mm_unpacklo_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_3_3(b0, b1) \
do { \
do \
{ \
b0 = _mm_blend_epi16(m1, m2, 0xF0); \
b1 = _mm_blend_epi16(m2, m7, 0xF0); \
} while(0)
#define LOAD_MSG_3_4(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpacklo_epi64(m3, m5); \
b1 = _mm_unpacklo_epi64(m0, m4); \
} while(0)
#define LOAD_MSG_4_1(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpackhi_epi64(m4, m2); \
b1 = _mm_unpacklo_epi64(m1, m5); \
} while(0)
#define LOAD_MSG_4_2(b0, b1) \
do { \
do \
{ \
b0 = _mm_blend_epi16(m0, m3, 0xF0); \
b1 = _mm_blend_epi16(m2, m7, 0xF0); \
} while(0)
#define LOAD_MSG_4_3(b0, b1) \
do { \
do \
{ \
b0 = _mm_blend_epi16(m7, m5, 0xF0); \
b1 = _mm_blend_epi16(m3, m1, 0xF0); \
} while(0)
#define LOAD_MSG_4_4(b0, b1) \
do { \
do \
{ \
b0 = _mm_alignr_epi8(m6, m0, 8); \
b1 = _mm_blend_epi16(m4, m6, 0xF0); \
} while(0)
#define LOAD_MSG_5_1(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpacklo_epi64(m1, m3); \
b1 = _mm_unpacklo_epi64(m0, m4); \
} while(0)
#define LOAD_MSG_5_2(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpacklo_epi64(m6, m5); \
b1 = _mm_unpackhi_epi64(m5, m1); \
} while(0)
#define LOAD_MSG_5_3(b0, b1) \
do { \
do \
{ \
b0 = _mm_blend_epi16(m2, m3, 0xF0); \
b1 = _mm_unpackhi_epi64(m7, m0); \
} while(0)
#define LOAD_MSG_5_4(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpackhi_epi64(m6, m2); \
b1 = _mm_blend_epi16(m7, m4, 0xF0); \
} while(0)
#define LOAD_MSG_6_1(b0, b1) \
do { \
do \
{ \
b0 = _mm_blend_epi16(m6, m0, 0xF0); \
b1 = _mm_unpacklo_epi64(m7, m2); \
} while(0)
#define LOAD_MSG_6_2(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpackhi_epi64(m2, m7); \
b1 = _mm_alignr_epi8(m5, m6, 8); \
} while(0)
#define LOAD_MSG_6_3(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpacklo_epi64(m0, m3); \
b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1, 0, 3, 2)); \
} while(0)
#define LOAD_MSG_6_4(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpackhi_epi64(m3, m1); \
b1 = _mm_blend_epi16(m1, m5, 0xF0); \
} while(0)
#define LOAD_MSG_7_1(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpackhi_epi64(m6, m3); \
b1 = _mm_blend_epi16(m6, m1, 0xF0); \
} while(0)
#define LOAD_MSG_7_2(b0, b1) \
do { \
do \
{ \
b0 = _mm_alignr_epi8(m7, m5, 8); \
b1 = _mm_unpackhi_epi64(m0, m4); \
} while(0)
#define LOAD_MSG_7_3(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpackhi_epi64(m2, m7); \
b1 = _mm_unpacklo_epi64(m4, m1); \
} while(0)
#define LOAD_MSG_7_4(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpacklo_epi64(m0, m2); \
b1 = _mm_unpacklo_epi64(m3, m5); \
} while(0)
#define LOAD_MSG_8_1(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpacklo_epi64(m3, m7); \
b1 = _mm_alignr_epi8(m0, m5, 8); \
} while(0)
#define LOAD_MSG_8_2(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpackhi_epi64(m7, m4); \
b1 = _mm_alignr_epi8(m4, m1, 8); \
} while(0)
#define LOAD_MSG_8_3(b0, b1) \
do { \
do \
{ \
b0 = m6; \
b1 = _mm_alignr_epi8(m5, m0, 8); \
} while(0)
#define LOAD_MSG_8_4(b0, b1) \
do { \
do \
{ \
b0 = _mm_blend_epi16(m1, m3, 0xF0); \
b1 = m2; \
} while(0)
#define LOAD_MSG_9_1(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpacklo_epi64(m5, m4); \
b1 = _mm_unpackhi_epi64(m3, m0); \
} while(0)
#define LOAD_MSG_9_2(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpacklo_epi64(m1, m2); \
b1 = _mm_blend_epi16(m3, m2, 0xF0); \
} while(0)
#define LOAD_MSG_9_3(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpackhi_epi64(m7, m4); \
b1 = _mm_unpackhi_epi64(m1, m6); \
} while(0)
#define LOAD_MSG_9_4(b0, b1) \
do { \
do \
{ \
b0 = _mm_alignr_epi8(m7, m5, 8); \
b1 = _mm_unpacklo_epi64(m6, m0); \
} while(0)
#define LOAD_MSG_10_1(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpacklo_epi64(m0, m1); \
b1 = _mm_unpacklo_epi64(m2, m3); \
} while(0)
#define LOAD_MSG_10_2(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpackhi_epi64(m0, m1); \
b1 = _mm_unpackhi_epi64(m2, m3); \
} while(0)
#define LOAD_MSG_10_3(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpacklo_epi64(m4, m5); \
b1 = _mm_unpacklo_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_10_4(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpackhi_epi64(m4, m5); \
b1 = _mm_unpackhi_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_11_1(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpacklo_epi64(m7, m2); \
b1 = _mm_unpackhi_epi64(m4, m6); \
} while(0)
#define LOAD_MSG_11_2(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpacklo_epi64(m5, m4); \
b1 = _mm_alignr_epi8(m3, m7, 8); \
} while(0)
#define LOAD_MSG_11_3(b0, b1) \
do { \
do \
{ \
b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \
b1 = _mm_unpackhi_epi64(m5, m2); \
} while(0)
#define LOAD_MSG_11_4(b0, b1) \
do { \
do \
{ \
b0 = _mm_unpacklo_epi64(m6, m1); \
b1 = _mm_unpackhi_epi64(m3, m1); \
} while(0)

@ -24,9 +24,11 @@
#ifndef __amd64__
#ifdef __clang__
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2")))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("sse2")))
#else
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __target__("sse2")))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __target__("sse2")))
#endif
static __inline__ __m128i __DEFAULT_FN_ATTRS

@ -1,4 +1,5 @@
if (bytes > 0) {
if(bytes > 0)
{
__m128i x_0, x_1, x_2, x_3;
__m128i t_1;
const __m128i rot16 =
@ -14,7 +15,8 @@ if (bytes > 0) {
x_2 = _mm_loadu_si128((__m128i*)(x + 8));
x_3 = _mm_loadu_si128((__m128i*)(x + 12));
for (i = 0; i < ROUNDS; i += 2) {
for(i = 0; i < ROUNDS; i += 2)
{
x_0 = _mm_add_epi32(x_0, x_1);
x_3 = _mm_xor_si128(x_3, x_0);
x_3 = _mm_shuffle_epi8(x_3, rot16);
@ -78,7 +80,8 @@ if (bytes > 0) {
_mm_storeu_si128((__m128i*)(partialblock + 32), x_2);
_mm_storeu_si128((__m128i*)(partialblock + 48), x_3);
for (i = 0; i < bytes; i++) {
for(i = 0; i < bytes; i++)
{
c[i] = m[i] ^ partialblock[i];
}

@ -1,4 +1,5 @@
while (bytes >= 64) {
while(bytes >= 64)
{
__m128i x_0, x_1, x_2, x_3;
__m128i t_1;
const __m128i rot16 =
@ -15,7 +16,8 @@ while (bytes >= 64) {
x_2 = _mm_loadu_si128((__m128i*)(x + 8));
x_3 = _mm_loadu_si128((__m128i*)(x + 12));
for (i = 0; i < ROUNDS; i += 2) {
for(i = 0; i < ROUNDS; i += 2)
{
x_0 = _mm_add_epi32(x_0, x_1);
x_3 = _mm_xor_si128(x_3, x_0);
x_3 = _mm_shuffle_epi8(x_3, rot16);
@ -86,7 +88,8 @@ while (bytes >= 64) {
in12 = x[12];
in13 = x[13];
in12++;
if (in12 == 0) {
if(in12 == 0)
{
in13++;
}
x[12] = in12;

@ -20,7 +20,8 @@
#define VEC4_QUARTERROUND(A, B, C, D) VEC4_QUARTERROUND_SHUFFLE(A, B, C, D)
if (bytes >= 256) {
if(bytes >= 256)
{
/* constant for shuffling bytes (replacing multiple-of-8 rotates) */
__m128i rot16 =
_mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
@ -65,7 +66,8 @@ if (bytes >= 256) {
uint32_t in12, in13;
int i;
while (bytes >= 256) {
while(bytes >= 256)
{
const __m128i addv12 = _mm_set_epi64x(1, 0);
const __m128i addv13 = _mm_set_epi64x(3, 2);
__m128i t12, t13;
@ -109,7 +111,8 @@ if (bytes >= 256) {
x[12] = in1213 & 0xFFFFFFFF;
x[13] = (in1213 >> 32) & 0xFFFFFFFF;
for (i = 0; i < ROUNDS; i += 2) {
for(i = 0; i < ROUNDS; i += 2)
{
VEC4_QUARTERROUND(0, 4, 8, 12);
VEC4_QUARTERROUND(1, 5, 9, 13);
VEC4_QUARTERROUND(2, 6, 10, 14);

@ -123,17 +123,17 @@
#define VEC8_ROUND(A1, B1, C1, D1, A2, B2, C2, D2, A3, B3, C3, D3, A4, B4, C4, \
D4) \
VEC8_ROUND_SEQ(A1, B1, C1, D1, A2, B2, C2, D2, A3, B3, C3, D3, A4, B4, C4, \
D4)
VEC8_ROUND_SEQ(A1, B1, C1, D1, A2, B2, C2, D2, A3, B3, C3, D3, A4, B4, C4, D4)
if (bytes >= 512) {
if(bytes >= 512)
{
/* constant for shuffling bytes (replacing multiple-of-8 rotates) */
__m256i rot16 =
_mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
_mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2, 13,
12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
__m256i rot8 =
_mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
_mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3, 14,
13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
uint32_t in12, in13;
/* the naive way seems as fast (if not a bit faster) than the vector way */
@ -173,7 +173,8 @@ if (bytes >= 512) {
__m256i t_0, t_1, t_2, t_3, t_4, t_5, t_6, t_7, t_8, t_9, t_10, t_11, t_12,
t_13, t_14, t_15;
while (bytes >= 512) {
while(bytes >= 512)
{
const __m256i addv12 = _mm256_set_epi64x(3, 2, 1, 0);
const __m256i addv13 = _mm256_set_epi64x(7, 6, 5, 4);
const __m256i permute = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
@ -223,7 +224,8 @@ if (bytes >= 512) {
x[12] = in1213 & 0xFFFFFFFF;
x[13] = (in1213 >> 32) & 0xFFFFFFFF;
for (i = 0; i < ROUNDS; i += 2) {
for(i = 0; i < ROUNDS; i += 2)
{
VEC8_ROUND(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15);
VEC8_ROUND(0, 5, 10, 15, 1, 6, 11, 12, 2, 7, 8, 13, 3, 4, 9, 14);
}
@ -299,22 +301,18 @@ if (bytes >= 512) {
t_##C2 = _mm256_permute2x128_si256(x_##C, x_##C2, 0x31); \
t_##D = _mm256_permute2x128_si256(x_##D, x_##D2, 0x20); \
t_##D2 = _mm256_permute2x128_si256(x_##D, x_##D2, 0x31); \
t_##A = \
_mm256_xor_si256(t_##A, _mm256_loadu_si256((__m256i*) (m + 0))); \
t_##B = \
_mm256_xor_si256(t_##B, _mm256_loadu_si256((__m256i*) (m + 64))); \
t_##C = \
_mm256_xor_si256(t_##C, _mm256_loadu_si256((__m256i*) (m + 128))); \
t_##D = \
_mm256_xor_si256(t_##D, _mm256_loadu_si256((__m256i*) (m + 192))); \
t_##A2 = _mm256_xor_si256(t_##A2, \
_mm256_loadu_si256((__m256i*) (m + 256))); \
t_##B2 = _mm256_xor_si256(t_##B2, \
_mm256_loadu_si256((__m256i*) (m + 320))); \
t_##C2 = _mm256_xor_si256(t_##C2, \
_mm256_loadu_si256((__m256i*) (m + 384))); \
t_##D2 = _mm256_xor_si256(t_##D2, \
_mm256_loadu_si256((__m256i*) (m + 448))); \
t_##A = _mm256_xor_si256(t_##A, _mm256_loadu_si256((__m256i*)(m + 0))); \
t_##B = _mm256_xor_si256(t_##B, _mm256_loadu_si256((__m256i*)(m + 64))); \
t_##C = _mm256_xor_si256(t_##C, _mm256_loadu_si256((__m256i*)(m + 128))); \
t_##D = _mm256_xor_si256(t_##D, _mm256_loadu_si256((__m256i*)(m + 192))); \
t_##A2 = \
_mm256_xor_si256(t_##A2, _mm256_loadu_si256((__m256i*)(m + 256))); \
t_##B2 = \
_mm256_xor_si256(t_##B2, _mm256_loadu_si256((__m256i*)(m + 320))); \
t_##C2 = \
_mm256_xor_si256(t_##C2, _mm256_loadu_si256((__m256i*)(m + 384))); \
t_##D2 = \
_mm256_xor_si256(t_##D2, _mm256_loadu_si256((__m256i*)(m + 448))); \
_mm256_storeu_si256((__m256i*)(c + 0), t_##A); \
_mm256_storeu_si256((__m256i*)(c + 64), t_##B); \
_mm256_storeu_si256((__m256i*)(c + 128), t_##C); \

@ -4,19 +4,18 @@
#include <stdint.h>
typedef struct crypto_stream_chacha20_implementation {
typedef struct crypto_stream_chacha20_implementation
{
int (*stream)(unsigned char *c, unsigned long long clen,
const unsigned char *n, const unsigned char *k);
int (*stream_ietf)(unsigned char *c, unsigned long long clen,
const unsigned char *n, const unsigned char *k);
int (*stream_xor_ic)(unsigned char *c, const unsigned char *m,
unsigned long long mlen,
const unsigned char *n, uint64_t ic,
const unsigned char *k);
unsigned long long mlen, const unsigned char *n,
uint64_t ic, const unsigned char *k);
int (*stream_ietf_xor_ic)(unsigned char *c, const unsigned char *m,
unsigned long long mlen,
const unsigned char *n, uint32_t ic,
const unsigned char *k);
unsigned long long mlen, const unsigned char *n,
uint32_t ic, const unsigned char *k);
} crypto_stream_chacha20_implementation;
#endif

@ -74,8 +74,8 @@ randombytes_stir(void)
try
{
var window_ = 'object' == = typeof window ? window : self;
var crypto_ = typeof window_.crypto != = 'undefined' ? window_.crypto
: window_.msCrypto;
var crypto_ = typeof window_.crypto != =
'undefined' ? window_.crypto : window_.msCrypto;
var randomValuesStandard = function()
{
var buf = new Uint32Array(1);

File diff suppressed because it is too large Load Diff

@ -1,40 +1,52 @@
{
{ 25967493, -14356035, 29566456, 3660896, -12694345, 4014787, 27544626, -11754271, -6079156, 2047605 },
{ -12545711, 934262, -2722910, 3049990, -727428, 9406986, 12720692, 5043384, 19500929, -15469378 },
{ -8738181, 4489570, 9688441, -14785194, 10184609, -12363380, 29287919, 11864899, -24514362, -4438546 }
},
{
{ 15636291, -9688557, 24204773, -7912398, 616977, -16685262, 27787600, -14772189, 28944400, -1550024 },
{ 16568933, 4717097, -11556148, -1102322, 15682896, -11807043, 16354577, -11775962, 7689662, 11199574 },
{ 30464156, -5976125, -11779434, -15670865, 23220365, 15915852, 7512774, 10017326, -17749093, -9920357 }
},
{
{ 10861363, 11473154, 27284546, 1981175, -30064349, 12577861, 32867885, 14515107, -15438304, 10819380 },
{ 4708026, 6336745, 20377586, 9066809, -11272109, 6594696, -25653668, 12483688, -12668491, 5581306 },
{ 19563160, 16186464, -29386857, 4097519, 10237984, -4348115, 28542350, 13850243, -23678021, -15815942 }
},
{
{ 5153746, 9909285, 1723747, -2777874, 30523605, 5516873, 19480852, 5230134, -23952439, -15175766 },
{ -30269007, -3463509, 7665486, 10083793, 28475525, 1649722, 20654025, 16520125, 30598449, 7715701 },
{ 28881845, 14381568, 9657904, 3680757, -20181635, 7843316, -31400660, 1370708, 29794553, -1409300 }
},
{
{ -22518993, -6692182, 14201702, -8745502, -23510406, 8844726, 18474211, -1361450, -13062696, 13821877 },
{ -6455177, -7839871, 3374702, -4740862, -27098617, -10571707, 31655028, -7212327, 18853322, -14220951 },
{ 4566830, -12963868, -28974889, -12240689, -7602672, -2830569, -8514358, -10431137, 2207753, -3209784 }
},
{
{ -25154831, -4185821, 29681144, 7868801, -6854661, -9423865, -12437364, -663000, -31111463, -16132436 },
{ 25576264, -2703214, 7349804, -11814844, 16472782, 9300885, 3844789, 15725684, 171356, 6466918 },
{ 23103977, 13316479, 9739013, -16149481, 817875, -15038942, 8965339, -14088058, -30714912, 16193877 }
},
{
{ -33521811, 3180713, -2394130, 14003687, -16903474, -16270840, 17238398, 4729455, -18074513, 9256800 },
{ -25182317, -4174131, 32336398, 5036987, -21236817, 11360617, 22616405, 9761698, -19827198, 630305 },
{ -13720693, 2639453, -24237460, -7406481, 9494427, -5774029, -6554551, -15960994, -2449256, -14291300 }
},
{
{ -3151181, -5046075, 9282714, 6866145, -31907062, -863023, -18940575, 15033784, 25105118, -7894876 },
{ -24326370, 15950226, -31801215, -14592823, -11662737, -5090925, 1573892, -2625887, 2198790, -15804619 },
{ -3099351, 10324967, -2241613, 7453183, -5446979, -2735503, -13812022, -16236442, -32461234, -12290683 }
{{25967493, -14356035, 29566456, 3660896, -12694345, 4014787, 27544626,
-11754271, -6079156, 2047605},
{-12545711, 934262, -2722910, 3049990, -727428, 9406986, 12720692, 5043384,
19500929, -15469378},
{-8738181, 4489570, 9688441, -14785194, 10184609, -12363380, 29287919,
11864899, -24514362, -4438546}},
{{15636291, -9688557, 24204773, -7912398, 616977, -16685262, 27787600,
-14772189, 28944400, -1550024},
{16568933, 4717097, -11556148, -1102322, 15682896, -11807043, 16354577,
-11775962, 7689662, 11199574},
{30464156, -5976125, -11779434, -15670865, 23220365, 15915852, 7512774,
10017326, -17749093, -9920357}},
{{10861363, 11473154, 27284546, 1981175, -30064349, 12577861, 32867885,
14515107, -15438304, 10819380},
{4708026, 6336745, 20377586, 9066809, -11272109, 6594696, -25653668,
12483688, -12668491, 5581306},
{19563160, 16186464, -29386857, 4097519, 10237984, -4348115, 28542350,
13850243, -23678021, -15815942}},
{{5153746, 9909285, 1723747, -2777874, 30523605, 5516873, 19480852, 5230134,
-23952439, -15175766},
{-30269007, -3463509, 7665486, 10083793, 28475525, 1649722, 20654025,
16520125, 30598449, 7715701},
{28881845, 14381568, 9657904, 3680757, -20181635, 7843316, -31400660,
1370708, 29794553, -1409300}},
{{-22518993, -6692182, 14201702, -8745502, -23510406, 8844726, 18474211,
-1361450, -13062696, 13821877},
{-6455177, -7839871, 3374702, -4740862, -27098617, -10571707, 31655028,
-7212327, 18853322, -14220951},
{4566830, -12963868, -28974889, -12240689, -7602672, -2830569, -8514358,
-10431137, 2207753, -3209784}},
{{-25154831, -4185821, 29681144, 7868801, -6854661, -9423865, -12437364,
-663000, -31111463, -16132436},
{25576264, -2703214, 7349804, -11814844, 16472782, 9300885, 3844789,
15725684, 171356, 6466918},
{23103977, 13316479, 9739013, -16149481, 817875, -15038942, 8965339,
-14088058, -30714912, 16193877}},
{{-33521811, 3180713, -2394130, 14003687, -16903474, -16270840, 17238398,
4729455, -18074513, 9256800},
{-25182317, -4174131, 32336398, 5036987, -21236817, 11360617, 22616405,
9761698, -19827198, 630305},
{-13720693, 2639453, -24237460, -7406481, 9494427, -5774029, -6554551,
-15960994, -2449256, -14291300}},
{
{-3151181, -5046075, 9282714, 6866145, -31907062,
-863023, -18940575, 15033784, 25105118, -7894876},
{-24326370, 15950226, -31801215, -14592823, -11662737,
-5090925, 1573892, -2625887, 2198790, -15804619},
{
-3099351, 10324967, -2241613, 7453183, -5446979, -2735503, -13812022,
-16236442, -32461234, -12290683
}
}

@ -1,20 +1,18 @@
/* 37095705934669439343138083508754565189542113879843219016388785533085940283555 */
static const fe25519 d = {
-10913610, 13857413, -15372611, 6949391, 114729, -8787816, -6275908, -3247719, -18696448, -12055116
};
/* 37095705934669439343138083508754565189542113879843219016388785533085940283555
*/
static const fe25519 d = {-10913610, 13857413, -15372611, 6949391, 114729,
-8787816, -6275908, -3247719, -18696448, -12055116};
/* 2 * d =
* 16295367250680780974490674513165176452449235426866156013048779062215315747161
*/
static const fe25519 d2 = {
-21827239, -5839606, -30745221, 13898782, 229458, 15978800, -12551817, -6495438, 29715968, 9444199 };
static const fe25519 d2 = {-21827239, -5839606, -30745221, 13898782, 229458,
15978800, -12551817, -6495438, 29715968, 9444199};
/* sqrt(-1) */
static const fe25519 sqrtm1 = {
-32595792, -7943725, 9377950, 3500415, 12389472, -272473, -25146209, -2005654, 326686, 11406482
};
static const fe25519 sqrtm1 = {-32595792, -7943725, 9377950, 3500415,
12389472, -272473, -25146209, -2005654,
326686, 11406482};
/* A = 486662 */
static const fe25519 curve25519_A = {
486662, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
static const fe25519 curve25519_A = {486662, 0, 0, 0, 0, 0, 0, 0, 0, 0};

@ -111,7 +111,8 @@ fe25519_reduce(fe25519 h, const fe25519 f)
int32_t h9 = f[9];
int32_t q;
int32_t carry0, carry1, carry2, carry3, carry4, carry5, carry6, carry7, carry8, carry9;
int32_t carry0, carry1, carry2, carry3, carry4, carry5, carry6, carry7,
carry8, carry9;
q = (19 * h9 + ((uint32_t)1L << 24)) >> 25;
q = (h0 + q) >> 26;

@ -17,4 +17,3 @@
#define REDMASK51 crypto_scalarmult_curve25519_sandy2x_REDMASK51
#endif /* ifndef consts_namespace_H */

@ -31,7 +31,8 @@ crypto_scalarmult_curve25519_sandy2x(unsigned char *q, const unsigned char *n,
fe51 z_51;
unsigned int i;
for (i = 0; i < 32; i++) {
for(i = 0; i < 32; i++)
{
t[i] = n[i];
}
t[0] &= 248;
@ -77,7 +78,8 @@ crypto_scalarmult_curve25519_sandy2x_base(unsigned char *q,
fe51 z_51;
unsigned int i;
for (i = 0;i < 32; i++) {
for(i = 0; i < 32; i++)
{
t[i] = n[i];
}
t[0] &= 248;
@ -108,7 +110,6 @@ crypto_scalarmult_curve25519_sandy2x_base(unsigned char *q,
struct crypto_scalarmult_curve25519_implementation
crypto_scalarmult_curve25519_sandy2x_implementation = {
SODIUM_C99(.mult =) crypto_scalarmult_curve25519_sandy2x,
SODIUM_C99(.mult_base = ) crypto_scalarmult_curve25519_sandy2x_base
};
SODIUM_C99(.mult_base =) crypto_scalarmult_curve25519_sandy2x_base};
#endif

@ -21,6 +21,7 @@ Bounds on each t[i] vary depending on context.
#define fe_frombytes crypto_scalarmult_curve25519_sandy2x_fe_frombytes
extern void fe_frombytes(fe, const unsigned char *);
extern void
fe_frombytes(fe, const unsigned char *);
#endif

@ -9,7 +9,8 @@
#define fe51_H
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
#include <stdint.h>
@ -20,13 +21,16 @@ extern "C" {
typedef struct
{
uint64_t v[5];
}
fe51;
extern void fe51_pack(unsigned char *, const fe51 *);
extern void fe51_mul(fe51 *, const fe51 *, const fe51 *);
extern void fe51_nsquare(fe51 *, const fe51 *, int);
extern void fe51_invert(fe51 *, const fe51 *);
} fe51;
extern void
fe51_pack(unsigned char *, const fe51 *);
extern void
fe51_mul(fe51 *, const fe51 *, const fe51 *);
extern void
fe51_nsquare(fe51 *, const fe51 *, int);
extern void
fe51_invert(fe51 *, const fe51 *);
#ifdef __cplusplus
}

@ -13,4 +13,3 @@
#define fe51_invert crypto_scalarmult_curve25519_sandy2x_fe51_invert
#endif /* ifndef fe51_namespace_H */

@ -51,17 +51,37 @@ fe_frombytes(fe h, const unsigned char *s)
uint64_t carry8;
uint64_t carry9;
carry9 = h9 >> 25; h0 += carry9 * 19; h9 &= 0x1FFFFFF;
carry1 = h1 >> 25; h2 += carry1; h1 &= 0x1FFFFFF;
carry3 = h3 >> 25; h4 += carry3; h3 &= 0x1FFFFFF;
carry5 = h5 >> 25; h6 += carry5; h5 &= 0x1FFFFFF;
carry7 = h7 >> 25; h8 += carry7; h7 &= 0x1FFFFFF;
carry9 = h9 >> 25;
h0 += carry9 * 19;
h9 &= 0x1FFFFFF;
carry1 = h1 >> 25;
h2 += carry1;
h1 &= 0x1FFFFFF;
carry3 = h3 >> 25;
h4 += carry3;
h3 &= 0x1FFFFFF;
carry5 = h5 >> 25;
h6 += carry5;
h5 &= 0x1FFFFFF;
carry7 = h7 >> 25;
h8 += carry7;
h7 &= 0x1FFFFFF;
carry0 = h0 >> 26; h1 += carry0; h0 &= 0x3FFFFFF;
carry2 = h2 >> 26; h3 += carry2; h2 &= 0x3FFFFFF;
carry4 = h4 >> 26; h5 += carry4; h4 &= 0x3FFFFFF;
carry6 = h6 >> 26; h7 += carry6; h6 &= 0x3FFFFFF;
carry8 = h8 >> 26; h9 += carry8; h8 &= 0x3FFFFFF;
carry0 = h0 >> 26;
h1 += carry0;
h0 &= 0x3FFFFFF;
carry2 = h2 >> 26;
h3 += carry2;
h2 &= 0x3FFFFFF;
carry4 = h4 >> 26;
h5 += carry4;
h4 &= 0x3FFFFFF;
carry6 = h6 >> 26;
h7 += carry6;
h6 &= 0x3FFFFFF;
carry8 = h8 >> 26;
h9 += carry8;
h8 &= 0x3FFFFFF;
h[0] = h0;
h[1] = h1;

@ -2,17 +2,18 @@
#define ladder_H
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
#include "fe.h"
#include "ladder_namespace.h"
extern void ladder(fe *, const unsigned char *);
extern void
ladder(fe *, const unsigned char *);
#ifdef __cplusplus
}
#endif
#endif /* ifndef ladder_H */

@ -2,17 +2,18 @@
#define ladder_base_H
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
#include "fe.h"
#include "ladder_base_namespace.h"
extern void ladder_base(fe *, const unsigned char *);
extern void
ladder_base(fe *, const unsigned char *);
#ifdef __cplusplus
}
#endif
#endif /* ifndef ladder_base_H */

@ -5,4 +5,3 @@
#define _ladder_base _crypto_scalarmult_curve25519_sandy2x_ladder_base
#endif /* ifndef ladder_base_namespace_H */

@ -5,4 +5,3 @@
#define _ladder _crypto_scalarmult_curve25519_sandy2x_ladder
#endif /* ifndef ladder_namespace_H */

@ -2,9 +2,9 @@
#ifndef scalarmult_poly1305_H
#define scalarmult_poly1305_H
typedef struct crypto_scalarmult_curve25519_implementation {
int (*mult)(unsigned char *q, const unsigned char *n,
const unsigned char *p);
typedef struct crypto_scalarmult_curve25519_implementation
{
int (*mult)(unsigned char *q, const unsigned char *n, const unsigned char *p);
int (*mult_base)(unsigned char *q, const unsigned char *n);
} crypto_scalarmult_curve25519_implementation;

@ -1,18 +1,17 @@
#ifndef sign_ed25519_ref10_H
#define sign_ed25519_ref10_H
void _crypto_sign_ed25519_ref10_hinit(crypto_hash_sha512_state *hs,
int prehashed);
void
_crypto_sign_ed25519_ref10_hinit(crypto_hash_sha512_state *hs, int prehashed);
int _crypto_sign_ed25519_detached(unsigned char *sig,
unsigned long long *siglen_p,
const unsigned char *m,
unsigned long long mlen,
int
_crypto_sign_ed25519_detached(unsigned char *sig, unsigned long long *siglen_p,
const unsigned char *m, unsigned long long mlen,
const unsigned char *sk, int prehashed);
int _crypto_sign_ed25519_verify_detached(const unsigned char *sig,
int
_crypto_sign_ed25519_verify_detached(const unsigned char *sig,
const unsigned char *m,
unsigned long long mlen,
const unsigned char *pk,
int prehashed);
const unsigned char *pk, int prehashed);
#endif

@ -1,12 +1,22 @@
int crypto_kem_enc_ref(unsigned char *cstr, unsigned char *k, const unsigned char *pk);
int
crypto_kem_enc_ref(unsigned char *cstr, unsigned char *k,
const unsigned char *pk);
int crypto_kem_dec_ref(unsigned char *k, const unsigned char *cstr, const unsigned char *sk);
int
crypto_kem_dec_ref(unsigned char *k, const unsigned char *cstr,
const unsigned char *sk);
int crypto_kem_keypair_ref(unsigned char *pk, unsigned char * sk);
int
crypto_kem_keypair_ref(unsigned char *pk, unsigned char *sk);
int crypto_kem_enc_avx2(unsigned char *cstr, unsigned char *k, const unsigned char *pk);
int
crypto_kem_enc_avx2(unsigned char *cstr, unsigned char *k,
const unsigned char *pk);
int crypto_kem_dec_avx2(unsigned char *k, const unsigned char *cstr, const unsigned char *sk);
int
crypto_kem_dec_avx2(unsigned char *k, const unsigned char *cstr,
const unsigned char *sk);
int crypto_kem_keypair_avx2(unsigned char *pk, unsigned char * sk);
int
crypto_kem_keypair_avx2(unsigned char *pk, unsigned char *sk);

@ -71,13 +71,20 @@ store64_le(uint8_t dst[8], uint64_t w)
#ifdef NATIVE_LITTLE_ENDIAN
memcpy(dst, &w, sizeof w);
#else
dst[0] = (uint8_t) w; w >>= 8;
dst[1] = (uint8_t) w; w >>= 8;
dst[2] = (uint8_t) w; w >>= 8;
dst[3] = (uint8_t) w; w >>= 8;
dst[4] = (uint8_t) w; w >>= 8;
dst[5] = (uint8_t) w; w >>= 8;
dst[6] = (uint8_t) w; w >>= 8;
dst[0] = (uint8_t)w;
w >>= 8;
dst[1] = (uint8_t)w;
w >>= 8;
dst[2] = (uint8_t)w;
w >>= 8;
dst[3] = (uint8_t)w;
w >>= 8;
dst[4] = (uint8_t)w;
w >>= 8;
dst[5] = (uint8_t)w;
w >>= 8;
dst[6] = (uint8_t)w;
w >>= 8;
dst[7] = (uint8_t)w;
#endif
}
@ -106,9 +113,12 @@ store32_le(uint8_t dst[4], uint32_t w)
#ifdef NATIVE_LITTLE_ENDIAN
memcpy(dst, &w, sizeof w);
#else
dst[0] = (uint8_t) w; w >>= 8;
dst[1] = (uint8_t) w; w >>= 8;
dst[2] = (uint8_t) w; w >>= 8;
dst[0] = (uint8_t)w;
w >>= 8;
dst[1] = (uint8_t)w;
w >>= 8;
dst[2] = (uint8_t)w;
w >>= 8;
dst[3] = (uint8_t)w;
#endif
}
@ -143,13 +153,20 @@ store64_be(uint8_t dst[8], uint64_t w)
#ifdef NATIVE_BIG_ENDIAN
memcpy(dst, &w, sizeof w);
#else
dst[7] = (uint8_t) w; w >>= 8;
dst[6] = (uint8_t) w; w >>= 8;
dst[5] = (uint8_t) w; w >>= 8;
dst[4] = (uint8_t) w; w >>= 8;
dst[3] = (uint8_t) w; w >>= 8;
dst[2] = (uint8_t) w; w >>= 8;
dst[1] = (uint8_t) w; w >>= 8;
dst[7] = (uint8_t)w;
w >>= 8;
dst[6] = (uint8_t)w;
w >>= 8;
dst[5] = (uint8_t)w;
w >>= 8;
dst[4] = (uint8_t)w;
w >>= 8;
dst[3] = (uint8_t)w;
w >>= 8;
dst[2] = (uint8_t)w;
w >>= 8;
dst[1] = (uint8_t)w;
w >>= 8;
dst[0] = (uint8_t)w;
#endif
}
@ -178,9 +195,12 @@ store32_be(uint8_t dst[4], uint32_t w)
#ifdef NATIVE_BIG_ENDIAN
memcpy(dst, &w, sizeof w);
#else
dst[3] = (uint8_t) w; w >>= 8;
dst[2] = (uint8_t) w; w >>= 8;
dst[1] = (uint8_t) w; w >>= 8;
dst[3] = (uint8_t)w;
w >>= 8;
dst[2] = (uint8_t)w;
w >>= 8;
dst[1] = (uint8_t)w;
w >>= 8;
dst[0] = (uint8_t)w;
#endif
}
@ -191,7 +211,8 @@ xor_buf(unsigned char *out, const unsigned char *in, size_t n)
{
size_t i;
for (i = 0; i < n; i++) {
for(i = 0; i < n; i++)
{
out[i] ^= in[i];
}
}
@ -211,8 +232,8 @@ xor_buf(unsigned char *out, const unsigned char *in, size_t n)
#endif
#endif
#if defined(_MSC_VER) && \
(defined(_M_X64) || defined(_M_AMD64) || defined(_M_IX86))
#if defined(_MSC_VER) \
&& (defined(_M_X64) || defined(_M_AMD64) || defined(_M_IX86))
#include <intrin.h>
@ -234,8 +255,10 @@ xor_buf(unsigned char *out, const unsigned char *in, size_t n)
#endif
#ifdef HAVE_LIBCTGRIND
extern void ct_poison (const void *, size_t);
extern void ct_unpoison(const void *, size_t);
extern void
ct_poison(const void *, size_t);
extern void
ct_unpoison(const void *, size_t);
#define POISON(X, L) ct_poison((X), (L))
#define UNPOISON(X, L) ct_unpoison((X), (L))
#else

@ -17,103 +17,126 @@
#ifdef __GNUC__
#pragma GCC diagnostic ignored "-Wlong-long"
#endif
extern "C" {
extern "C"
{
#endif
#define crypto_box_SEEDBYTES crypto_box_curve25519xsalsa20poly1305_SEEDBYTES
SODIUM_EXPORT
size_t crypto_box_seedbytes(void);
size_t
crypto_box_seedbytes(void);
#define crypto_box_PUBLICKEYBYTES crypto_box_curve25519xsalsa20poly1305_PUBLICKEYBYTES
#define crypto_box_PUBLICKEYBYTES \
crypto_box_curve25519xsalsa20poly1305_PUBLICKEYBYTES
SODIUM_EXPORT
size_t crypto_box_publickeybytes(void);
size_t
crypto_box_publickeybytes(void);
#define crypto_box_SECRETKEYBYTES crypto_box_curve25519xsalsa20poly1305_SECRETKEYBYTES
#define crypto_box_SECRETKEYBYTES \
crypto_box_curve25519xsalsa20poly1305_SECRETKEYBYTES
SODIUM_EXPORT
size_t crypto_box_secretkeybytes(void);
size_t
crypto_box_secretkeybytes(void);
#define crypto_box_NONCEBYTES crypto_box_curve25519xsalsa20poly1305_NONCEBYTES
SODIUM_EXPORT
size_t crypto_box_noncebytes(void);
size_t
crypto_box_noncebytes(void);
#define crypto_box_MACBYTES crypto_box_curve25519xsalsa20poly1305_MACBYTES
SODIUM_EXPORT
size_t crypto_box_macbytes(void);
size_t
crypto_box_macbytes(void);
#define crypto_box_MESSAGEBYTES_MAX crypto_box_curve25519xsalsa20poly1305_MESSAGEBYTES_MAX
#define crypto_box_MESSAGEBYTES_MAX \
crypto_box_curve25519xsalsa20poly1305_MESSAGEBYTES_MAX
SODIUM_EXPORT
size_t crypto_box_messagebytes_max(void);
size_t
crypto_box_messagebytes_max(void);
#define crypto_box_PRIMITIVE "curve25519xsalsa20poly1305"
SODIUM_EXPORT
const char *crypto_box_primitive(void);
const char *
crypto_box_primitive(void);
SODIUM_EXPORT
int crypto_box_seed_keypair(unsigned char *pk, unsigned char *sk,
int
crypto_box_seed_keypair(unsigned char *pk, unsigned char *sk,
const unsigned char *seed);
SODIUM_EXPORT
int crypto_box_keypair(unsigned char *pk, unsigned char *sk);
int
crypto_box_keypair(unsigned char *pk, unsigned char *sk);
SODIUM_EXPORT
int crypto_box_easy(unsigned char *c, const unsigned char *m,
int
crypto_box_easy(unsigned char *c, const unsigned char *m,
unsigned long long mlen, const unsigned char *n,
const unsigned char *pk, const unsigned char *sk)
__attribute__((warn_unused_result));
SODIUM_EXPORT
int crypto_box_open_easy(unsigned char *m, const unsigned char *c,
int
crypto_box_open_easy(unsigned char *m, const unsigned char *c,
unsigned long long clen, const unsigned char *n,
const unsigned char *pk, const unsigned char *sk)
__attribute__((warn_unused_result));
SODIUM_EXPORT
int crypto_box_detached(unsigned char *c, unsigned char *mac,
int
crypto_box_detached(unsigned char *c, unsigned char *mac,
const unsigned char *m, unsigned long long mlen,
const unsigned char *n, const unsigned char *pk,
const unsigned char *sk)
__attribute__((warn_unused_result));
SODIUM_EXPORT
int crypto_box_open_detached(unsigned char *m, const unsigned char *c,
const unsigned char *mac,
unsigned long long clen,
const unsigned char *n,
const unsigned char *pk,
int
crypto_box_open_detached(unsigned char *m, const unsigned char *c,
const unsigned char *mac, unsigned long long clen,
const unsigned char *n, const unsigned char *pk,
const unsigned char *sk)
__attribute__((warn_unused_result));
/* -- Precomputation interface -- */
#define crypto_box_BEFORENMBYTES crypto_box_curve25519xsalsa20poly1305_BEFORENMBYTES
#define crypto_box_BEFORENMBYTES \
crypto_box_curve25519xsalsa20poly1305_BEFORENMBYTES
SODIUM_EXPORT
size_t crypto_box_beforenmbytes(void);
size_t
crypto_box_beforenmbytes(void);
SODIUM_EXPORT
int crypto_box_beforenm(unsigned char *k, const unsigned char *pk,
int
crypto_box_beforenm(unsigned char *k, const unsigned char *pk,
const unsigned char *sk)
__attribute__((warn_unused_result));
SODIUM_EXPORT
int crypto_box_easy_afternm(unsigned char *c, const unsigned char *m,
int
crypto_box_easy_afternm(unsigned char *c, const unsigned char *m,
unsigned long long mlen, const unsigned char *n,
const unsigned char *k);
SODIUM_EXPORT
int crypto_box_open_easy_afternm(unsigned char *m, const unsigned char *c,
int
crypto_box_open_easy_afternm(unsigned char *m, const unsigned char *c,
unsigned long long clen, const unsigned char *n,
const unsigned char *k)
__attribute__((warn_unused_result));
SODIUM_EXPORT
int crypto_box_detached_afternm(unsigned char *c, unsigned char *mac,
int
crypto_box_detached_afternm(unsigned char *c, unsigned char *mac,
const unsigned char *m, unsigned long long mlen,
const unsigned char *n, const unsigned char *k);
SODIUM_EXPORT
int crypto_box_open_detached_afternm(unsigned char *m, const unsigned char *c,
int
crypto_box_open_detached_afternm(unsigned char *m, const unsigned char *c,
const unsigned char *mac,
unsigned long long clen, const unsigned char *n,
unsigned long long clen,
const unsigned char *n,
const unsigned char *k)
__attribute__((warn_unused_result));
@ -121,47 +144,56 @@ int crypto_box_open_detached_afternm(unsigned char *m, const unsigned char *c,
#define crypto_box_SEALBYTES (crypto_box_PUBLICKEYBYTES + crypto_box_MACBYTES)
SODIUM_EXPORT
size_t crypto_box_sealbytes(void);
size_t
crypto_box_sealbytes(void);
SODIUM_EXPORT
int crypto_box_seal(unsigned char *c, const unsigned char *m,
int
crypto_box_seal(unsigned char *c, const unsigned char *m,
unsigned long long mlen, const unsigned char *pk);
SODIUM_EXPORT
int crypto_box_seal_open(unsigned char *m, const unsigned char *c,
unsigned long long clen,
const unsigned char *pk, const unsigned char *sk)
int
crypto_box_seal_open(unsigned char *m, const unsigned char *c,
unsigned long long clen, const unsigned char *pk,
const unsigned char *sk)
__attribute__((warn_unused_result));
/* -- NaCl compatibility interface ; Requires padding -- */
#define crypto_box_ZEROBYTES crypto_box_curve25519xsalsa20poly1305_ZEROBYTES
SODIUM_EXPORT
size_t crypto_box_zerobytes(void);
size_t
crypto_box_zerobytes(void);
#define crypto_box_BOXZEROBYTES crypto_box_curve25519xsalsa20poly1305_BOXZEROBYTES
#define crypto_box_BOXZEROBYTES \
crypto_box_curve25519xsalsa20poly1305_BOXZEROBYTES
SODIUM_EXPORT
size_t crypto_box_boxzerobytes(void);
size_t
crypto_box_boxzerobytes(void);
SODIUM_EXPORT
int crypto_box(unsigned char *c, const unsigned char *m,
unsigned long long mlen, const unsigned char *n,
const unsigned char *pk, const unsigned char *sk)
__attribute__ ((warn_unused_result));
int
crypto_box(unsigned char *c, const unsigned char *m, unsigned long long mlen,
const unsigned char *n, const unsigned char *pk,
const unsigned char *sk) __attribute__((warn_unused_result));
SODIUM_EXPORT
int crypto_box_open(unsigned char *m, const unsigned char *c,
int
crypto_box_open(unsigned char *m, const unsigned char *c,
unsigned long long clen, const unsigned char *n,
const unsigned char *pk, const unsigned char *sk)
__attribute__((warn_unused_result));
SODIUM_EXPORT
int crypto_box_afternm(unsigned char *c, const unsigned char *m,
int
crypto_box_afternm(unsigned char *c, const unsigned char *m,
unsigned long long mlen, const unsigned char *n,
const unsigned char *k);
SODIUM_EXPORT
int crypto_box_open_afternm(unsigned char *m, const unsigned char *c,
int
crypto_box_open_afternm(unsigned char *m, const unsigned char *c,
unsigned long long clen, const unsigned char *n,
const unsigned char *k)
__attribute__((warn_unused_result));

@ -9,50 +9,63 @@
#ifdef __GNUC__
#pragma GCC diagnostic ignored "-Wlong-long"
#endif
extern "C" {
extern "C"
{
#endif
#define crypto_box_curve25519xsalsa20poly1305_SEEDBYTES 32U
SODIUM_EXPORT
size_t crypto_box_curve25519xsalsa20poly1305_seedbytes(void);
size_t
crypto_box_curve25519xsalsa20poly1305_seedbytes(void);
#define crypto_box_curve25519xsalsa20poly1305_PUBLICKEYBYTES 32U
SODIUM_EXPORT
size_t crypto_box_curve25519xsalsa20poly1305_publickeybytes(void);
size_t
crypto_box_curve25519xsalsa20poly1305_publickeybytes(void);
#define crypto_box_curve25519xsalsa20poly1305_SECRETKEYBYTES 32U
SODIUM_EXPORT
size_t crypto_box_curve25519xsalsa20poly1305_secretkeybytes(void);
size_t
crypto_box_curve25519xsalsa20poly1305_secretkeybytes(void);
#define crypto_box_curve25519xsalsa20poly1305_BEFORENMBYTES 32U
SODIUM_EXPORT
size_t crypto_box_curve25519xsalsa20poly1305_beforenmbytes(void);
size_t
crypto_box_curve25519xsalsa20poly1305_beforenmbytes(void);
#define crypto_box_curve25519xsalsa20poly1305_NONCEBYTES 24U
SODIUM_EXPORT
size_t crypto_box_curve25519xsalsa20poly1305_noncebytes(void);
size_t
crypto_box_curve25519xsalsa20poly1305_noncebytes(void);
#define crypto_box_curve25519xsalsa20poly1305_MACBYTES 16U
SODIUM_EXPORT
size_t crypto_box_curve25519xsalsa20poly1305_macbytes(void);
size_t
crypto_box_curve25519xsalsa20poly1305_macbytes(void);
/* Only for the libsodium API - The NaCl compatibility API would require BOXZEROBYTES extra bytes */
/* Only for the libsodium API - The NaCl compatibility API would require
* BOXZEROBYTES extra bytes */
#define crypto_box_curve25519xsalsa20poly1305_MESSAGEBYTES_MAX \
(crypto_stream_xsalsa20_MESSAGEBYTES_MAX - crypto_box_curve25519xsalsa20poly1305_MACBYTES)
(crypto_stream_xsalsa20_MESSAGEBYTES_MAX \
- crypto_box_curve25519xsalsa20poly1305_MACBYTES)
SODIUM_EXPORT
size_t crypto_box_curve25519xsalsa20poly1305_messagebytes_max(void);
size_t
crypto_box_curve25519xsalsa20poly1305_messagebytes_max(void);
SODIUM_EXPORT
int crypto_box_curve25519xsalsa20poly1305_seed_keypair(unsigned char *pk,
int
crypto_box_curve25519xsalsa20poly1305_seed_keypair(unsigned char *pk,
unsigned char *sk,
const unsigned char *seed);
SODIUM_EXPORT
int crypto_box_curve25519xsalsa20poly1305_keypair(unsigned char *pk,
int
crypto_box_curve25519xsalsa20poly1305_keypair(unsigned char *pk,
unsigned char *sk);
SODIUM_EXPORT
int crypto_box_curve25519xsalsa20poly1305_beforenm(unsigned char *k,
int
crypto_box_curve25519xsalsa20poly1305_beforenm(unsigned char *k,
const unsigned char *pk,
const unsigned char *sk)
__attribute__((warn_unused_result));
@ -61,41 +74,41 @@ int crypto_box_curve25519xsalsa20poly1305_beforenm(unsigned char *k,
#define crypto_box_curve25519xsalsa20poly1305_BOXZEROBYTES 16U
SODIUM_EXPORT
size_t crypto_box_curve25519xsalsa20poly1305_boxzerobytes(void);
size_t
crypto_box_curve25519xsalsa20poly1305_boxzerobytes(void);
#define crypto_box_curve25519xsalsa20poly1305_ZEROBYTES \
(crypto_box_curve25519xsalsa20poly1305_BOXZEROBYTES + \
crypto_box_curve25519xsalsa20poly1305_MACBYTES)
(crypto_box_curve25519xsalsa20poly1305_BOXZEROBYTES \
+ crypto_box_curve25519xsalsa20poly1305_MACBYTES)
SODIUM_EXPORT
size_t crypto_box_curve25519xsalsa20poly1305_zerobytes(void);
size_t
crypto_box_curve25519xsalsa20poly1305_zerobytes(void);
SODIUM_EXPORT
int crypto_box_curve25519xsalsa20poly1305(unsigned char *c,
const unsigned char *m,
unsigned long long mlen,
const unsigned char *n,
const unsigned char *pk,
const unsigned char *sk)
int
crypto_box_curve25519xsalsa20poly1305(
unsigned char *c, const unsigned char *m, unsigned long long mlen,
const unsigned char *n, const unsigned char *pk, const unsigned char *sk)
__attribute__((warn_unused_result));
SODIUM_EXPORT
int crypto_box_curve25519xsalsa20poly1305_open(unsigned char *m,
const unsigned char *c,
unsigned long long clen,
const unsigned char *n,
const unsigned char *pk,
const unsigned char *sk)
int
crypto_box_curve25519xsalsa20poly1305_open(
unsigned char *m, const unsigned char *c, unsigned long long clen,
const unsigned char *n, const unsigned char *pk, const unsigned char *sk)
__attribute__((warn_unused_result));
SODIUM_EXPORT
int crypto_box_curve25519xsalsa20poly1305_afternm(unsigned char *c,
int
crypto_box_curve25519xsalsa20poly1305_afternm(unsigned char *c,
const unsigned char *m,
unsigned long long mlen,
const unsigned char *n,
const unsigned char *k);
SODIUM_EXPORT
int crypto_box_curve25519xsalsa20poly1305_open_afternm(unsigned char *m,
int
crypto_box_curve25519xsalsa20poly1305_open_afternm(unsigned char *m,
const unsigned char *c,
unsigned long long clen,
const unsigned char *n,

@ -5,27 +5,33 @@
#include "export.h"
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
#define crypto_core_hchacha20_OUTPUTBYTES 32U
SODIUM_EXPORT
size_t crypto_core_hchacha20_outputbytes(void);
size_t
crypto_core_hchacha20_outputbytes(void);
#define crypto_core_hchacha20_INPUTBYTES 16U
SODIUM_EXPORT
size_t crypto_core_hchacha20_inputbytes(void);
size_t
crypto_core_hchacha20_inputbytes(void);
#define crypto_core_hchacha20_KEYBYTES 32U
SODIUM_EXPORT
size_t crypto_core_hchacha20_keybytes(void);
size_t
crypto_core_hchacha20_keybytes(void);
#define crypto_core_hchacha20_CONSTBYTES 16U
SODIUM_EXPORT
size_t crypto_core_hchacha20_constbytes(void);
size_t
crypto_core_hchacha20_constbytes(void);
SODIUM_EXPORT
int crypto_core_hchacha20(unsigned char *out, const unsigned char *in,
int
crypto_core_hchacha20(unsigned char *out, const unsigned char *in,
const unsigned char *k, const unsigned char *c);
#ifdef __cplusplus

@ -5,27 +5,33 @@
#include "export.h"
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
#define crypto_core_salsa20_OUTPUTBYTES 64U
SODIUM_EXPORT
size_t crypto_core_salsa20_outputbytes(void);
size_t
crypto_core_salsa20_outputbytes(void);
#define crypto_core_salsa20_INPUTBYTES 16U
SODIUM_EXPORT
size_t crypto_core_salsa20_inputbytes(void);
size_t
crypto_core_salsa20_inputbytes(void);
#define crypto_core_salsa20_KEYBYTES 32U
SODIUM_EXPORT
size_t crypto_core_salsa20_keybytes(void);
size_t
crypto_core_salsa20_keybytes(void);
#define crypto_core_salsa20_CONSTBYTES 16U
SODIUM_EXPORT
size_t crypto_core_salsa20_constbytes(void);
size_t
crypto_core_salsa20_constbytes(void);
SODIUM_EXPORT
int crypto_core_salsa20(unsigned char *out, const unsigned char *in,
int
crypto_core_salsa20(unsigned char *out, const unsigned char *in,
const unsigned char *k, const unsigned char *c);
#ifdef __cplusplus

@ -10,63 +10,76 @@
#ifdef __GNUC__
#pragma GCC diagnostic ignored "-Wlong-long"
#endif
extern "C" {
extern "C"
{
#endif
#define crypto_generichash_BYTES_MIN crypto_generichash_blake2b_BYTES_MIN
SODIUM_EXPORT
size_t crypto_generichash_bytes_min(void);
size_t
crypto_generichash_bytes_min(void);
#define crypto_generichash_BYTES_MAX crypto_generichash_blake2b_BYTES_MAX
SODIUM_EXPORT
size_t crypto_generichash_bytes_max(void);
size_t
crypto_generichash_bytes_max(void);
#define crypto_generichash_BYTES crypto_generichash_blake2b_BYTES
SODIUM_EXPORT
size_t crypto_generichash_bytes(void);
size_t
crypto_generichash_bytes(void);
#define crypto_generichash_KEYBYTES_MIN crypto_generichash_blake2b_KEYBYTES_MIN
SODIUM_EXPORT
size_t crypto_generichash_keybytes_min(void);
size_t
crypto_generichash_keybytes_min(void);
#define crypto_generichash_KEYBYTES_MAX crypto_generichash_blake2b_KEYBYTES_MAX
SODIUM_EXPORT
size_t crypto_generichash_keybytes_max(void);
size_t
crypto_generichash_keybytes_max(void);
#define crypto_generichash_KEYBYTES crypto_generichash_blake2b_KEYBYTES
SODIUM_EXPORT
size_t crypto_generichash_keybytes(void);
size_t
crypto_generichash_keybytes(void);
#define crypto_generichash_PRIMITIVE "blake2b"
SODIUM_EXPORT
const char *crypto_generichash_primitive(void);
const char *
crypto_generichash_primitive(void);
typedef crypto_generichash_blake2b_state crypto_generichash_state;
SODIUM_EXPORT
size_t crypto_generichash_statebytes(void);
size_t
crypto_generichash_statebytes(void);
SODIUM_EXPORT
int crypto_generichash(unsigned char *out, size_t outlen,
const unsigned char *in, unsigned long long inlen,
const unsigned char *key, size_t keylen);
int
crypto_generichash(unsigned char *out, size_t outlen, const unsigned char *in,
unsigned long long inlen, const unsigned char *key,
size_t keylen);
SODIUM_EXPORT
int crypto_generichash_init(crypto_generichash_state *state,
const unsigned char *key,
const size_t keylen, const size_t outlen);
int
crypto_generichash_init(crypto_generichash_state *state,
const unsigned char *key, const size_t keylen,
const size_t outlen);
SODIUM_EXPORT
int crypto_generichash_update(crypto_generichash_state *state,
const unsigned char *in,
unsigned long long inlen);
int
crypto_generichash_update(crypto_generichash_state *state,
const unsigned char *in, unsigned long long inlen);
SODIUM_EXPORT
int crypto_generichash_final(crypto_generichash_state *state,
unsigned char *out, const size_t outlen);
int
crypto_generichash_final(crypto_generichash_state *state, unsigned char *out,
const size_t outlen);
SODIUM_EXPORT
void crypto_generichash_keygen(unsigned char k[crypto_generichash_KEYBYTES]);
void
crypto_generichash_keygen(unsigned char k[crypto_generichash_KEYBYTES]);
#ifdef __cplusplus
}

@ -11,7 +11,8 @@
#ifdef __GNUC__
#pragma GCC diagnostic ignored "-Wlong-long"
#endif
extern "C" {
extern "C"
{
#endif
#if defined(__IBMC__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC)
@ -20,7 +21,8 @@ extern "C" {
#pragma pack(push, 1)
#endif
typedef struct CRYPTO_ALIGN(64) crypto_generichash_blake2b_state {
typedef struct CRYPTO_ALIGN(64) crypto_generichash_blake2b_state
{
uint64_t h[8];
uint64_t t[2];
uint64_t f[2];
@ -37,78 +39,89 @@ typedef struct CRYPTO_ALIGN(64) crypto_generichash_blake2b_state {
#define crypto_generichash_blake2b_BYTES_MIN 16U
SODIUM_EXPORT
size_t crypto_generichash_blake2b_bytes_min(void);
size_t
crypto_generichash_blake2b_bytes_min(void);
#define crypto_generichash_blake2b_BYTES_MAX 64U
SODIUM_EXPORT
size_t crypto_generichash_blake2b_bytes_max(void);
size_t
crypto_generichash_blake2b_bytes_max(void);
#define crypto_generichash_blake2b_BYTES 32U
SODIUM_EXPORT
size_t crypto_generichash_blake2b_bytes(void);
size_t
crypto_generichash_blake2b_bytes(void);
#define crypto_generichash_blake2b_KEYBYTES_MIN 16U
SODIUM_EXPORT
size_t crypto_generichash_blake2b_keybytes_min(void);
size_t
crypto_generichash_blake2b_keybytes_min(void);
#define crypto_generichash_blake2b_KEYBYTES_MAX 64U
SODIUM_EXPORT
size_t crypto_generichash_blake2b_keybytes_max(void);
size_t
crypto_generichash_blake2b_keybytes_max(void);
#define crypto_generichash_blake2b_KEYBYTES 32U
SODIUM_EXPORT
size_t crypto_generichash_blake2b_keybytes(void);
size_t
crypto_generichash_blake2b_keybytes(void);
#define crypto_generichash_blake2b_SALTBYTES 16U
SODIUM_EXPORT
size_t crypto_generichash_blake2b_saltbytes(void);
size_t
crypto_generichash_blake2b_saltbytes(void);
#define crypto_generichash_blake2b_PERSONALBYTES 16U
SODIUM_EXPORT
size_t crypto_generichash_blake2b_personalbytes(void);
size_t
crypto_generichash_blake2b_personalbytes(void);
SODIUM_EXPORT
size_t crypto_generichash_blake2b_statebytes(void);
size_t
crypto_generichash_blake2b_statebytes(void);
SODIUM_EXPORT
int crypto_generichash_blake2b(unsigned char *out, size_t outlen,
const unsigned char *in,
unsigned long long inlen,
int
crypto_generichash_blake2b(unsigned char *out, size_t outlen,
const unsigned char *in, unsigned long long inlen,
const unsigned char *key, size_t keylen);
SODIUM_EXPORT
int crypto_generichash_blake2b_salt_personal(unsigned char *out, size_t outlen,
const unsigned char *in,
unsigned long long inlen,
const unsigned char *key,
size_t keylen,
const unsigned char *salt,
const unsigned char *personal);
int
crypto_generichash_blake2b_salt_personal(
unsigned char *out, size_t outlen, const unsigned char *in,
unsigned long long inlen, const unsigned char *key, size_t keylen,
const unsigned char *salt, const unsigned char *personal);
SODIUM_EXPORT
int crypto_generichash_blake2b_init(crypto_generichash_blake2b_state *state,
const unsigned char *key,
const size_t keylen, const size_t outlen);
int
crypto_generichash_blake2b_init(crypto_generichash_blake2b_state *state,
const unsigned char *key, const size_t keylen,
const size_t outlen);
SODIUM_EXPORT
int crypto_generichash_blake2b_init_salt_personal(crypto_generichash_blake2b_state *state,
const unsigned char *key,
const size_t keylen, const size_t outlen,
const unsigned char *salt,
int
crypto_generichash_blake2b_init_salt_personal(
crypto_generichash_blake2b_state *state, const unsigned char *key,
const size_t keylen, const size_t outlen, const unsigned char *salt,
const unsigned char *personal);
SODIUM_EXPORT
int crypto_generichash_blake2b_update(crypto_generichash_blake2b_state *state,
int
crypto_generichash_blake2b_update(crypto_generichash_blake2b_state *state,
const unsigned char *in,
unsigned long long inlen);
SODIUM_EXPORT
int crypto_generichash_blake2b_final(crypto_generichash_blake2b_state *state,
unsigned char *out,
const size_t outlen);
int
crypto_generichash_blake2b_final(crypto_generichash_blake2b_state *state,
unsigned char *out, const size_t outlen);
SODIUM_EXPORT
void crypto_generichash_blake2b_keygen(unsigned char k[crypto_generichash_blake2b_KEYBYTES]);
void
crypto_generichash_blake2b_keygen(
unsigned char k[crypto_generichash_blake2b_KEYBYTES]);
#ifdef __cplusplus
}

@ -7,23 +7,28 @@
#include "export.h"
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
#define crypto_scalarmult_BYTES crypto_scalarmult_curve25519_BYTES
SODIUM_EXPORT
size_t crypto_scalarmult_bytes(void);
size_t
crypto_scalarmult_bytes(void);
#define crypto_scalarmult_SCALARBYTES crypto_scalarmult_curve25519_SCALARBYTES
SODIUM_EXPORT
size_t crypto_scalarmult_scalarbytes(void);
size_t
crypto_scalarmult_scalarbytes(void);
#define crypto_scalarmult_PRIMITIVE "curve25519"
SODIUM_EXPORT
const char *crypto_scalarmult_primitive(void);
const char *
crypto_scalarmult_primitive(void);
SODIUM_EXPORT
int crypto_scalarmult_base(unsigned char *q, const unsigned char *n);
int
crypto_scalarmult_base(unsigned char *q, const unsigned char *n);
/*
* NOTE: Do not use the result of this function directly.
@ -34,9 +39,9 @@ int crypto_scalarmult_base(unsigned char *q, const unsigned char *n);
* Or unless this is not an option, use the crypto_kx() API instead.
*/
SODIUM_EXPORT
int crypto_scalarmult(unsigned char *q, const unsigned char *n,
const unsigned char *p)
__attribute__ ((warn_unused_result));
int
crypto_scalarmult(unsigned char *q, const unsigned char *n,
const unsigned char *p) __attribute__((warn_unused_result));
#ifdef __cplusplus
}

@ -6,16 +6,19 @@
#include "export.h"
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
#define crypto_scalarmult_curve25519_BYTES 32U
SODIUM_EXPORT
size_t crypto_scalarmult_curve25519_bytes(void);
size_t
crypto_scalarmult_curve25519_bytes(void);
#define crypto_scalarmult_curve25519_SCALARBYTES 32U
SODIUM_EXPORT
size_t crypto_scalarmult_curve25519_scalarbytes(void);
size_t
crypto_scalarmult_curve25519_scalarbytes(void);
/*
* NOTE: Do not use the result of this function directly.
@ -26,12 +29,14 @@ size_t crypto_scalarmult_curve25519_scalarbytes(void);
* Or unless this is not an option, use the crypto_kx() API instead.
*/
SODIUM_EXPORT
int crypto_scalarmult_curve25519(unsigned char *q, const unsigned char *n,
int
crypto_scalarmult_curve25519(unsigned char *q, const unsigned char *n,
const unsigned char *p)
__attribute__((warn_unused_result));
SODIUM_EXPORT
int crypto_scalarmult_curve25519_base(unsigned char *q, const unsigned char *n);
int
crypto_scalarmult_curve25519_base(unsigned char *q, const unsigned char *n);
#ifdef __cplusplus
}

@ -7,16 +7,19 @@
#include "export.h"
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
#define crypto_scalarmult_ed25519_BYTES 32U
SODIUM_EXPORT
size_t crypto_scalarmult_ed25519_bytes(void);
size_t
crypto_scalarmult_ed25519_bytes(void);
#define crypto_scalarmult_ed25519_SCALARBYTES 32U
SODIUM_EXPORT
size_t crypto_scalarmult_ed25519_scalarbytes(void);
size_t
crypto_scalarmult_ed25519_scalarbytes(void);
/*
* NOTE: Do not use the result of this function directly.
@ -27,12 +30,14 @@ size_t crypto_scalarmult_ed25519_scalarbytes(void);
* Or unless this is not an option, use the crypto_kx() API instead.
*/
SODIUM_EXPORT
int crypto_scalarmult_ed25519(unsigned char *q, const unsigned char *n,
int
crypto_scalarmult_ed25519(unsigned char *q, const unsigned char *n,
const unsigned char *p)
__attribute__((warn_unused_result));
SODIUM_EXPORT
int crypto_scalarmult_ed25519_base(unsigned char *q, const unsigned char *n);
int
crypto_scalarmult_ed25519_base(unsigned char *q, const unsigned char *n);
#ifdef __cplusplus
}

@ -17,82 +17,97 @@
#ifdef __GNUC__
#pragma GCC diagnostic ignored "-Wlong-long"
#endif
extern "C" {
extern "C"
{
#endif
typedef crypto_sign_ed25519ph_state crypto_sign_state;
SODIUM_EXPORT
size_t crypto_sign_statebytes(void);
size_t
crypto_sign_statebytes(void);
#define crypto_sign_BYTES crypto_sign_ed25519_BYTES
SODIUM_EXPORT
size_t crypto_sign_bytes(void);
size_t
crypto_sign_bytes(void);
#define crypto_sign_SEEDBYTES crypto_sign_ed25519_SEEDBYTES
SODIUM_EXPORT
size_t crypto_sign_seedbytes(void);
size_t
crypto_sign_seedbytes(void);
#define crypto_sign_PUBLICKEYBYTES crypto_sign_ed25519_PUBLICKEYBYTES
SODIUM_EXPORT
size_t crypto_sign_publickeybytes(void);
size_t
crypto_sign_publickeybytes(void);
#define crypto_sign_SECRETKEYBYTES crypto_sign_ed25519_SECRETKEYBYTES
SODIUM_EXPORT
size_t crypto_sign_secretkeybytes(void);
size_t
crypto_sign_secretkeybytes(void);
#define crypto_sign_MESSAGEBYTES_MAX crypto_sign_ed25519_MESSAGEBYTES_MAX
SODIUM_EXPORT
size_t crypto_sign_messagebytes_max(void);
size_t
crypto_sign_messagebytes_max(void);
#define crypto_sign_PRIMITIVE "ed25519"
SODIUM_EXPORT
const char *crypto_sign_primitive(void);
const char *
crypto_sign_primitive(void);
SODIUM_EXPORT
int crypto_sign_seed_keypair(unsigned char *pk, unsigned char *sk,
int
crypto_sign_seed_keypair(unsigned char *pk, unsigned char *sk,
const unsigned char *seed);
SODIUM_EXPORT
int crypto_sign_keypair(unsigned char *pk, unsigned char *sk);
int
crypto_sign_keypair(unsigned char *pk, unsigned char *sk);
SODIUM_EXPORT
int crypto_sign(unsigned char *sm, unsigned long long *smlen_p,
int
crypto_sign(unsigned char *sm, unsigned long long *smlen_p,
const unsigned char *m, unsigned long long mlen,
const unsigned char *sk);
SODIUM_EXPORT
int crypto_sign_open(unsigned char *m, unsigned long long *mlen_p,
int
crypto_sign_open(unsigned char *m, unsigned long long *mlen_p,
const unsigned char *sm, unsigned long long smlen,
const unsigned char *pk)
__attribute__ ((warn_unused_result));
const unsigned char *pk) __attribute__((warn_unused_result));
SODIUM_EXPORT
int crypto_sign_detached(unsigned char *sig, unsigned long long *siglen_p,
int
crypto_sign_detached(unsigned char *sig, unsigned long long *siglen_p,
const unsigned char *m, unsigned long long mlen,
const unsigned char *sk);
SODIUM_EXPORT
int crypto_sign_verify_detached(const unsigned char *sig,
const unsigned char *m,
unsigned long long mlen,
const unsigned char *pk)
int
crypto_sign_verify_detached(const unsigned char *sig, const unsigned char *m,
unsigned long long mlen, const unsigned char *pk)
__attribute__((warn_unused_result));
SODIUM_EXPORT
int crypto_sign_init(crypto_sign_state *state);
int
crypto_sign_init(crypto_sign_state *state);
SODIUM_EXPORT
int crypto_sign_update(crypto_sign_state *state,
const unsigned char *m, unsigned long long mlen);
int
crypto_sign_update(crypto_sign_state *state, const unsigned char *m,
unsigned long long mlen);
SODIUM_EXPORT
int crypto_sign_final_create(crypto_sign_state *state, unsigned char *sig,
int
crypto_sign_final_create(crypto_sign_state *state, unsigned char *sig,
unsigned long long *siglen_p,
const unsigned char *sk);
SODIUM_EXPORT
int crypto_sign_final_verify(crypto_sign_state *state, unsigned char *sig,
int
crypto_sign_final_verify(crypto_sign_state *state, unsigned char *sig,
const unsigned char *pk)
__attribute__((warn_unused_result));

@ -9,100 +9,119 @@
#ifdef __GNUC__
#pragma GCC diagnostic ignored "-Wlong-long"
#endif
extern "C" {
extern "C"
{
#endif
typedef struct crypto_sign_ed25519ph_state {
typedef struct crypto_sign_ed25519ph_state
{
crypto_hash_sha512_state hs;
} crypto_sign_ed25519ph_state;
SODIUM_EXPORT
size_t crypto_sign_ed25519ph_statebytes(void);
size_t
crypto_sign_ed25519ph_statebytes(void);
#define crypto_sign_ed25519_BYTES 64U
SODIUM_EXPORT
size_t crypto_sign_ed25519_bytes(void);
size_t
crypto_sign_ed25519_bytes(void);
#define crypto_sign_ed25519_SEEDBYTES 32U
SODIUM_EXPORT
size_t crypto_sign_ed25519_seedbytes(void);
size_t
crypto_sign_ed25519_seedbytes(void);
#define crypto_sign_ed25519_PUBLICKEYBYTES 32U
SODIUM_EXPORT
size_t crypto_sign_ed25519_publickeybytes(void);
size_t
crypto_sign_ed25519_publickeybytes(void);
#define crypto_sign_ed25519_SECRETKEYBYTES (32U + 32U)
SODIUM_EXPORT
size_t crypto_sign_ed25519_secretkeybytes(void);
size_t
crypto_sign_ed25519_secretkeybytes(void);
#define crypto_sign_ed25519_MESSAGEBYTES_MAX (SODIUM_SIZE_MAX - crypto_sign_ed25519_BYTES)
#define crypto_sign_ed25519_MESSAGEBYTES_MAX \
(SODIUM_SIZE_MAX - crypto_sign_ed25519_BYTES)
SODIUM_EXPORT
size_t crypto_sign_ed25519_messagebytes_max(void);
size_t
crypto_sign_ed25519_messagebytes_max(void);
SODIUM_EXPORT
int crypto_sign_ed25519(unsigned char *sm, unsigned long long *smlen_p,
int
crypto_sign_ed25519(unsigned char *sm, unsigned long long *smlen_p,
const unsigned char *m, unsigned long long mlen,
const unsigned char *sk);
SODIUM_EXPORT
int crypto_sign_ed25519_open(unsigned char *m, unsigned long long *mlen_p,
int
crypto_sign_ed25519_open(unsigned char *m, unsigned long long *mlen_p,
const unsigned char *sm, unsigned long long smlen,
const unsigned char *pk)
__attribute__((warn_unused_result));
SODIUM_EXPORT
int crypto_sign_ed25519_detached(unsigned char *sig,
unsigned long long *siglen_p,
const unsigned char *m,
unsigned long long mlen,
int
crypto_sign_ed25519_detached(unsigned char *sig, unsigned long long *siglen_p,
const unsigned char *m, unsigned long long mlen,
const unsigned char *sk);
SODIUM_EXPORT
int crypto_sign_ed25519_verify_detached(const unsigned char *sig,
int
crypto_sign_ed25519_verify_detached(const unsigned char *sig,
const unsigned char *m,
unsigned long long mlen,
const unsigned char *pk)
__attribute__((warn_unused_result));
SODIUM_EXPORT
int crypto_sign_ed25519_keypair(unsigned char *pk, unsigned char *sk);
int
crypto_sign_ed25519_keypair(unsigned char *pk, unsigned char *sk);
SODIUM_EXPORT
int crypto_sign_ed25519_seed_keypair(unsigned char *pk, unsigned char *sk,
int
crypto_sign_ed25519_seed_keypair(unsigned char *pk, unsigned char *sk,
const unsigned char *seed);
SODIUM_EXPORT
int crypto_sign_ed25519_pk_to_curve25519(unsigned char *curve25519_pk,
int
crypto_sign_ed25519_pk_to_curve25519(unsigned char *curve25519_pk,
const unsigned char *ed25519_pk)
__attribute__((warn_unused_result));
SODIUM_EXPORT
int crypto_sign_ed25519_sk_to_curve25519(unsigned char *curve25519_sk,
int
crypto_sign_ed25519_sk_to_curve25519(unsigned char *curve25519_sk,
const unsigned char *ed25519_sk);
SODIUM_EXPORT
int crypto_sign_ed25519_sk_to_seed(unsigned char *seed,
const unsigned char *sk);
int
crypto_sign_ed25519_sk_to_seed(unsigned char *seed, const unsigned char *sk);
SODIUM_EXPORT
int crypto_sign_ed25519_sk_to_pk(unsigned char *pk, const unsigned char *sk);
int
crypto_sign_ed25519_sk_to_pk(unsigned char *pk, const unsigned char *sk);
SODIUM_EXPORT
int crypto_sign_ed25519ph_init(crypto_sign_ed25519ph_state *state);
int
crypto_sign_ed25519ph_init(crypto_sign_ed25519ph_state *state);
SODIUM_EXPORT
int crypto_sign_ed25519ph_update(crypto_sign_ed25519ph_state *state,
const unsigned char *m,
unsigned long long mlen);
int
crypto_sign_ed25519ph_update(crypto_sign_ed25519ph_state *state,
const unsigned char *m, unsigned long long mlen);
SODIUM_EXPORT
int crypto_sign_ed25519ph_final_create(crypto_sign_ed25519ph_state *state,
int
crypto_sign_ed25519ph_final_create(crypto_sign_ed25519ph_state *state,
unsigned char *sig,
unsigned long long *siglen_p,
const unsigned char *sk);
SODIUM_EXPORT
int crypto_sign_ed25519ph_final_verify(crypto_sign_ed25519ph_state *state,
int
crypto_sign_ed25519ph_final_verify(crypto_sign_ed25519ph_state *state,
unsigned char *sig,
const unsigned char *pk)
__attribute__((warn_unused_result));

@ -19,16 +19,19 @@
#ifdef __GNUC__
#pragma GCC diagnostic ignored "-Wlong-long"
#endif
extern "C" {
extern "C"
{
#endif
#define crypto_sign_edwards25519sha512batch_BYTES 64U
#define crypto_sign_edwards25519sha512batch_PUBLICKEYBYTES 32U
#define crypto_sign_edwards25519sha512batch_SECRETKEYBYTES (32U + 32U)
#define crypto_sign_edwards25519sha512batch_MESSAGEBYTES_MAX (SODIUM_SIZE_MAX - crypto_sign_edwards25519sha512batch_BYTES)
#define crypto_sign_edwards25519sha512batch_MESSAGEBYTES_MAX \
(SODIUM_SIZE_MAX - crypto_sign_edwards25519sha512batch_BYTES)
SODIUM_EXPORT
int crypto_sign_edwards25519sha512batch(unsigned char *sm,
int
crypto_sign_edwards25519sha512batch(unsigned char *sm,
unsigned long long *smlen_p,
const unsigned char *m,
unsigned long long mlen,
@ -36,7 +39,8 @@ int crypto_sign_edwards25519sha512batch(unsigned char *sm,
__attribute__((deprecated));
SODIUM_EXPORT
int crypto_sign_edwards25519sha512batch_open(unsigned char *m,
int
crypto_sign_edwards25519sha512batch_open(unsigned char *m,
unsigned long long *mlen_p,
const unsigned char *sm,
unsigned long long smlen,
@ -44,7 +48,8 @@ int crypto_sign_edwards25519sha512batch_open(unsigned char *m,
__attribute__((deprecated));
SODIUM_EXPORT
int crypto_sign_edwards25519sha512batch_keypair(unsigned char *pk,
int
crypto_sign_edwards25519sha512batch_keypair(unsigned char *pk,
unsigned char *sk)
__attribute__((deprecated));

@ -17,79 +17,100 @@
#ifdef __GNUC__
#pragma GCC diagnostic ignored "-Wlong-long"
#endif
extern "C" {
extern "C"
{
#endif
#define crypto_stream_chacha20_KEYBYTES 32U
SODIUM_EXPORT
size_t crypto_stream_chacha20_keybytes(void);
size_t
crypto_stream_chacha20_keybytes(void);
#define crypto_stream_chacha20_NONCEBYTES 8U
SODIUM_EXPORT
size_t crypto_stream_chacha20_noncebytes(void);
size_t
crypto_stream_chacha20_noncebytes(void);
#define crypto_stream_chacha20_MESSAGEBYTES_MAX SODIUM_SIZE_MAX
SODIUM_EXPORT
size_t crypto_stream_chacha20_messagebytes_max(void);
size_t
crypto_stream_chacha20_messagebytes_max(void);
/* ChaCha20 with a 64-bit nonce and a 64-bit counter, as originally designed */
/* ChaCha20 with a 64-bit nonce and a 64-bit counter, as originally designed
*/
SODIUM_EXPORT
int crypto_stream_chacha20(unsigned char *c, unsigned long long clen,
int
crypto_stream_chacha20(unsigned char *c, unsigned long long clen,
const unsigned char *n, const unsigned char *k);
SODIUM_EXPORT
int crypto_stream_chacha20_xor(unsigned char *c, const unsigned char *m,
int
crypto_stream_chacha20_xor(unsigned char *c, const unsigned char *m,
unsigned long long mlen, const unsigned char *n,
const unsigned char *k);
SODIUM_EXPORT
int crypto_stream_chacha20_xor_ic(unsigned char *c, const unsigned char *m,
unsigned long long mlen,
const unsigned char *n, uint64_t ic,
const unsigned char *k);
int
crypto_stream_chacha20_xor_ic(unsigned char *c, const unsigned char *m,
unsigned long long mlen, const unsigned char *n,
uint64_t ic, const unsigned char *k);
SODIUM_EXPORT
void crypto_stream_chacha20_keygen(unsigned char k[crypto_stream_chacha20_KEYBYTES]);
void
crypto_stream_chacha20_keygen(
unsigned char k[crypto_stream_chacha20_KEYBYTES]);
/* ChaCha20 with a 96-bit nonce and a 32-bit counter (IETF) */
#define crypto_stream_chacha20_ietf_KEYBYTES 32U
SODIUM_EXPORT
size_t crypto_stream_chacha20_ietf_keybytes(void);
size_t
crypto_stream_chacha20_ietf_keybytes(void);
#define crypto_stream_chacha20_ietf_NONCEBYTES 12U
SODIUM_EXPORT
size_t crypto_stream_chacha20_ietf_noncebytes(void);
size_t
crypto_stream_chacha20_ietf_noncebytes(void);
#define crypto_stream_chacha20_ietf_MESSAGEBYTES_MAX \
SODIUM_MIN(SODIUM_SIZE_MAX, 64ULL * (1ULL << 32))
SODIUM_EXPORT
size_t crypto_stream_chacha20_ietf_messagebytes_max(void);
size_t
crypto_stream_chacha20_ietf_messagebytes_max(void);
SODIUM_EXPORT
int crypto_stream_chacha20_ietf(unsigned char *c, unsigned long long clen,
int
crypto_stream_chacha20_ietf(unsigned char *c, unsigned long long clen,
const unsigned char *n, const unsigned char *k);
SODIUM_EXPORT
int crypto_stream_chacha20_ietf_xor(unsigned char *c, const unsigned char *m,
unsigned long long mlen, const unsigned char *n,
int
crypto_stream_chacha20_ietf_xor(unsigned char *c, const unsigned char *m,
unsigned long long mlen,
const unsigned char *n,
const unsigned char *k);
SODIUM_EXPORT
int crypto_stream_chacha20_ietf_xor_ic(unsigned char *c, const unsigned char *m,
int
crypto_stream_chacha20_ietf_xor_ic(unsigned char *c, const unsigned char *m,
unsigned long long mlen,
const unsigned char *n, uint32_t ic,
const unsigned char *k);
SODIUM_EXPORT
void crypto_stream_chacha20_ietf_keygen(unsigned char k[crypto_stream_chacha20_ietf_KEYBYTES]);
void
crypto_stream_chacha20_ietf_keygen(
unsigned char k[crypto_stream_chacha20_ietf_KEYBYTES]);
/* Aliases */
#define crypto_stream_chacha20_IETF_KEYBYTES crypto_stream_chacha20_ietf_KEYBYTES
#define crypto_stream_chacha20_IETF_NONCEBYTES crypto_stream_chacha20_ietf_NONCEBYTES
#define crypto_stream_chacha20_IETF_MESSAGEBYTES_MAX crypto_stream_chacha20_ietf_MESSAGEBYTES_MAX
#define crypto_stream_chacha20_IETF_KEYBYTES \
crypto_stream_chacha20_ietf_KEYBYTES
#define crypto_stream_chacha20_IETF_NONCEBYTES \
crypto_stream_chacha20_ietf_NONCEBYTES
#define crypto_stream_chacha20_IETF_MESSAGEBYTES_MAX \
crypto_stream_chacha20_ietf_MESSAGEBYTES_MAX
#ifdef __cplusplus
}

@ -17,38 +17,45 @@
#ifdef __GNUC__
#pragma GCC diagnostic ignored "-Wlong-long"
#endif
extern "C" {
extern "C"
{
#endif
#define crypto_stream_salsa20_KEYBYTES 32U
SODIUM_EXPORT
size_t crypto_stream_salsa20_keybytes(void);
size_t
crypto_stream_salsa20_keybytes(void);
#define crypto_stream_salsa20_NONCEBYTES 8U
SODIUM_EXPORT
size_t crypto_stream_salsa20_noncebytes(void);
size_t
crypto_stream_salsa20_noncebytes(void);
#define crypto_stream_salsa20_MESSAGEBYTES_MAX SODIUM_SIZE_MAX
SODIUM_EXPORT
size_t crypto_stream_salsa20_messagebytes_max(void);
size_t
crypto_stream_salsa20_messagebytes_max(void);
SODIUM_EXPORT
int crypto_stream_salsa20(unsigned char *c, unsigned long long clen,
int
crypto_stream_salsa20(unsigned char *c, unsigned long long clen,
const unsigned char *n, const unsigned char *k);
SODIUM_EXPORT
int crypto_stream_salsa20_xor(unsigned char *c, const unsigned char *m,
int
crypto_stream_salsa20_xor(unsigned char *c, const unsigned char *m,
unsigned long long mlen, const unsigned char *n,
const unsigned char *k);
SODIUM_EXPORT
int crypto_stream_salsa20_xor_ic(unsigned char *c, const unsigned char *m,
unsigned long long mlen,
const unsigned char *n, uint64_t ic,
const unsigned char *k);
int
crypto_stream_salsa20_xor_ic(unsigned char *c, const unsigned char *m,
unsigned long long mlen, const unsigned char *n,
uint64_t ic, const unsigned char *k);
SODIUM_EXPORT
void crypto_stream_salsa20_keygen(unsigned char k[crypto_stream_salsa20_KEYBYTES]);
void
crypto_stream_salsa20_keygen(unsigned char k[crypto_stream_salsa20_KEYBYTES]);
#ifdef __cplusplus
}

@ -17,38 +17,46 @@
#ifdef __GNUC__
#pragma GCC diagnostic ignored "-Wlong-long"
#endif
extern "C" {
extern "C"
{
#endif
#define crypto_stream_xsalsa20_KEYBYTES 32U
SODIUM_EXPORT
size_t crypto_stream_xsalsa20_keybytes(void);
size_t
crypto_stream_xsalsa20_keybytes(void);
#define crypto_stream_xsalsa20_NONCEBYTES 24U
SODIUM_EXPORT
size_t crypto_stream_xsalsa20_noncebytes(void);
size_t
crypto_stream_xsalsa20_noncebytes(void);
#define crypto_stream_xsalsa20_MESSAGEBYTES_MAX SODIUM_SIZE_MAX
SODIUM_EXPORT
size_t crypto_stream_xsalsa20_messagebytes_max(void);
size_t
crypto_stream_xsalsa20_messagebytes_max(void);
SODIUM_EXPORT
int crypto_stream_xsalsa20(unsigned char *c, unsigned long long clen,
int
crypto_stream_xsalsa20(unsigned char *c, unsigned long long clen,
const unsigned char *n, const unsigned char *k);
SODIUM_EXPORT
int crypto_stream_xsalsa20_xor(unsigned char *c, const unsigned char *m,
int
crypto_stream_xsalsa20_xor(unsigned char *c, const unsigned char *m,
unsigned long long mlen, const unsigned char *n,
const unsigned char *k);
SODIUM_EXPORT
int crypto_stream_xsalsa20_xor_ic(unsigned char *c, const unsigned char *m,
unsigned long long mlen,
const unsigned char *n, uint64_t ic,
const unsigned char *k);
int
crypto_stream_xsalsa20_xor_ic(unsigned char *c, const unsigned char *m,
unsigned long long mlen, const unsigned char *n,
uint64_t ic, const unsigned char *k);
SODIUM_EXPORT
void crypto_stream_xsalsa20_keygen(unsigned char k[crypto_stream_xsalsa20_KEYBYTES]);
void
crypto_stream_xsalsa20_keygen(
unsigned char k[crypto_stream_xsalsa20_KEYBYTES]);
#ifdef __cplusplus
}

@ -71,13 +71,20 @@ store64_le(uint8_t dst[8], uint64_t w)
#ifdef NATIVE_LITTLE_ENDIAN
memcpy(dst, &w, sizeof w);
#else
dst[0] = (uint8_t) w; w >>= 8;
dst[1] = (uint8_t) w; w >>= 8;
dst[2] = (uint8_t) w; w >>= 8;
dst[3] = (uint8_t) w; w >>= 8;
dst[4] = (uint8_t) w; w >>= 8;
dst[5] = (uint8_t) w; w >>= 8;
dst[6] = (uint8_t) w; w >>= 8;
dst[0] = (uint8_t)w;
w >>= 8;
dst[1] = (uint8_t)w;
w >>= 8;
dst[2] = (uint8_t)w;
w >>= 8;
dst[3] = (uint8_t)w;
w >>= 8;
dst[4] = (uint8_t)w;
w >>= 8;
dst[5] = (uint8_t)w;
w >>= 8;
dst[6] = (uint8_t)w;
w >>= 8;
dst[7] = (uint8_t)w;
#endif
}
@ -106,9 +113,12 @@ store32_le(uint8_t dst[4], uint32_t w)
#ifdef NATIVE_LITTLE_ENDIAN
memcpy(dst, &w, sizeof w);
#else
dst[0] = (uint8_t) w; w >>= 8;
dst[1] = (uint8_t) w; w >>= 8;
dst[2] = (uint8_t) w; w >>= 8;
dst[0] = (uint8_t)w;
w >>= 8;
dst[1] = (uint8_t)w;
w >>= 8;
dst[2] = (uint8_t)w;
w >>= 8;
dst[3] = (uint8_t)w;
#endif
}
@ -143,13 +153,20 @@ store64_be(uint8_t dst[8], uint64_t w)
#ifdef NATIVE_BIG_ENDIAN
memcpy(dst, &w, sizeof w);
#else
dst[7] = (uint8_t) w; w >>= 8;
dst[6] = (uint8_t) w; w >>= 8;
dst[5] = (uint8_t) w; w >>= 8;
dst[4] = (uint8_t) w; w >>= 8;
dst[3] = (uint8_t) w; w >>= 8;
dst[2] = (uint8_t) w; w >>= 8;
dst[1] = (uint8_t) w; w >>= 8;
dst[7] = (uint8_t)w;
w >>= 8;
dst[6] = (uint8_t)w;
w >>= 8;
dst[5] = (uint8_t)w;
w >>= 8;
dst[4] = (uint8_t)w;
w >>= 8;
dst[3] = (uint8_t)w;
w >>= 8;
dst[2] = (uint8_t)w;
w >>= 8;
dst[1] = (uint8_t)w;
w >>= 8;
dst[0] = (uint8_t)w;
#endif
}
@ -178,9 +195,12 @@ store32_be(uint8_t dst[4], uint32_t w)
#ifdef NATIVE_BIG_ENDIAN
memcpy(dst, &w, sizeof w);
#else
dst[3] = (uint8_t) w; w >>= 8;
dst[2] = (uint8_t) w; w >>= 8;
dst[1] = (uint8_t) w; w >>= 8;
dst[3] = (uint8_t)w;
w >>= 8;
dst[2] = (uint8_t)w;
w >>= 8;
dst[1] = (uint8_t)w;
w >>= 8;
dst[0] = (uint8_t)w;
#endif
}
@ -191,7 +211,8 @@ xor_buf(unsigned char *out, const unsigned char *in, size_t n)
{
size_t i;
for (i = 0; i < n; i++) {
for(i = 0; i < n; i++)
{
out[i] ^= in[i];
}
}
@ -211,8 +232,8 @@ xor_buf(unsigned char *out, const unsigned char *in, size_t n)
#endif
#endif
#if defined(_MSC_VER) && \
(defined(_M_X64) || defined(_M_AMD64) || defined(_M_IX86))
#if defined(_MSC_VER) \
&& (defined(_M_X64) || defined(_M_AMD64) || defined(_M_IX86))
#include <intrin.h>
@ -234,8 +255,10 @@ xor_buf(unsigned char *out, const unsigned char *in, size_t n)
#endif
#ifdef HAVE_LIBCTGRIND
extern void ct_poison (const void *, size_t);
extern void ct_unpoison(const void *, size_t);
extern void
ct_poison(const void *, size_t);
extern void
ct_unpoison(const void *, size_t);
#define POISON(X, L) ct_poison((X), (L))
#define UNPOISON(X, L) ct_unpoison((X), (L))
#else

@ -15,9 +15,12 @@ typedef uint64_t fe25519[5];
typedef int32_t fe25519[10];
#endif
void fe25519_invert(fe25519 out, const fe25519 z);
void fe25519_frombytes(fe25519 h, const unsigned char *s);
void fe25519_tobytes(unsigned char *s, const fe25519 h);
void
fe25519_invert(fe25519 out, const fe25519 z);
void
fe25519_frombytes(fe25519 h, const unsigned char *s);
void
fe25519_tobytes(unsigned char *s, const fe25519 h);
#ifdef HAVE_TI_MODE
#include "ed25519_ref10_fe_51.h"
@ -25,7 +28,6 @@ void fe25519_tobytes(unsigned char *s, const fe25519 h);
#include "ed25519_ref10_fe_25_5.h"
#endif
/*
ge means group element.
@ -40,86 +42,109 @@ void fe25519_tobytes(unsigned char *s, const fe25519 h);
ge25519_precomp (Duif): (y+x,y-x,2dxy)
*/
typedef struct {
typedef struct
{
fe25519 X;
fe25519 Y;
fe25519 Z;
} ge25519_p2;
typedef struct {
typedef struct
{
fe25519 X;
fe25519 Y;
fe25519 Z;
fe25519 T;
} ge25519_p3;
typedef struct {
typedef struct
{
fe25519 X;
fe25519 Y;
fe25519 Z;
fe25519 T;
} ge25519_p1p1;
typedef struct {
typedef struct
{
fe25519 yplusx;
fe25519 yminusx;
fe25519 xy2d;
} ge25519_precomp;
typedef struct {
typedef struct
{
fe25519 YplusX;
fe25519 YminusX;
fe25519 Z;
fe25519 T2d;
} ge25519_cached;
void ge25519_tobytes(unsigned char *s, const ge25519_p2 *h);
void
ge25519_tobytes(unsigned char *s, const ge25519_p2 *h);
void ge25519_p3_tobytes(unsigned char *s, const ge25519_p3 *h);
void
ge25519_p3_tobytes(unsigned char *s, const ge25519_p3 *h);
int ge25519_frombytes(ge25519_p3 *h, const unsigned char *s);
int
ge25519_frombytes(ge25519_p3 *h, const unsigned char *s);
int ge25519_frombytes_negate_vartime(ge25519_p3 *h, const unsigned char *s);
int
ge25519_frombytes_negate_vartime(ge25519_p3 *h, const unsigned char *s);
void ge25519_p3_to_cached(ge25519_cached *r, const ge25519_p3 *p);
void
ge25519_p3_to_cached(ge25519_cached *r, const ge25519_p3 *p);
void ge25519_p1p1_to_p2(ge25519_p2 *r, const ge25519_p1p1 *p);
void
ge25519_p1p1_to_p2(ge25519_p2 *r, const ge25519_p1p1 *p);
void ge25519_p1p1_to_p3(ge25519_p3 *r, const ge25519_p1p1 *p);
void
ge25519_p1p1_to_p3(ge25519_p3 *r, const ge25519_p1p1 *p);
void ge25519_add(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_cached *q);
void
ge25519_add(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_cached *q);
void ge25519_sub(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_cached *q);
void
ge25519_sub(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_cached *q);
void ge25519_scalarmult_base(ge25519_p3 *h, const unsigned char *a);
void
ge25519_scalarmult_base(ge25519_p3 *h, const unsigned char *a);
void ge25519_double_scalarmult_vartime(ge25519_p2 *r, const unsigned char *a,
const ge25519_p3 *A,
const unsigned char *b);
void
ge25519_double_scalarmult_vartime(ge25519_p2 *r, const unsigned char *a,
const ge25519_p3 *A, const unsigned char *b);
void ge25519_scalarmult(ge25519_p3 *h, const unsigned char *a,
const ge25519_p3 *p);
void
ge25519_scalarmult(ge25519_p3 *h, const unsigned char *a, const ge25519_p3 *p);
int ge25519_is_canonical(const unsigned char *s);
int
ge25519_is_canonical(const unsigned char *s);
int ge25519_is_on_curve(const ge25519_p3 *p);
int
ge25519_is_on_curve(const ge25519_p3 *p);
int ge25519_is_on_main_subgroup(const ge25519_p3 *p);
int
ge25519_is_on_main_subgroup(const ge25519_p3 *p);
int ge25519_has_small_order(const unsigned char s[32]);
int
ge25519_has_small_order(const unsigned char s[32]);
void ge25519_from_uniform(unsigned char s[32], const unsigned char r[32]);
void
ge25519_from_uniform(unsigned char s[32], const unsigned char r[32]);
/*
The set of scalars is \Z/l
where l = 2^252 + 27742317777372353535851937790883648493.
*/
void sc25519_reduce(unsigned char *s);
void
sc25519_reduce(unsigned char *s);
void sc25519_muladd(unsigned char *s, const unsigned char *a,
const unsigned char *b, const unsigned char *c);
void
sc25519_muladd(unsigned char *s, const unsigned char *a, const unsigned char *b,
const unsigned char *c);
int sc25519_is_canonical(const unsigned char *s);
int
sc25519_is_canonical(const unsigned char *s);
#endif

@ -1,11 +1,17 @@
#ifndef implementations_H
#define implementations_H
int _crypto_generichash_blake2b_pick_best_implementation(void);
int _crypto_onetimeauth_poly1305_pick_best_implementation(void);
int _crypto_pwhash_argon2_pick_best_implementation(void);
int _crypto_scalarmult_curve25519_pick_best_implementation(void);
int _crypto_stream_chacha20_pick_best_implementation(void);
int _crypto_stream_salsa20_pick_best_implementation(void);
int
_crypto_generichash_blake2b_pick_best_implementation(void);
int
_crypto_onetimeauth_poly1305_pick_best_implementation(void);
int
_crypto_pwhash_argon2_pick_best_implementation(void);
int
_crypto_scalarmult_curve25519_pick_best_implementation(void);
int
_crypto_stream_chacha20_pick_best_implementation(void);
int
_crypto_stream_salsa20_pick_best_implementation(void);
#endif

@ -1,7 +1,9 @@
#ifndef mutex_H
#define mutex_H 1
extern int sodium_crit_enter(void);
extern int sodium_crit_leave(void);
extern int
sodium_crit_enter(void);
extern int
sodium_crit_leave(void);
#endif

@ -7,9 +7,9 @@
#include <intrin.h>
#endif
#if defined(HAVE_EMMINTRIN_H) && \
!(defined(__amd64) || defined(__amd64__) || defined(__x86_64__) || \
defined(_M_X64) || defined(_M_AMD64))
#if defined(HAVE_EMMINTRIN_H) \
&& !(defined(__amd64) || defined(__amd64__) || defined(__x86_64__) \
|| defined(_M_X64) || defined(_M_AMD64))
#include <emmintrin.h>
#include <stdint.h>
@ -19,8 +19,12 @@
static inline __m128i
sodium__mm_set_epi64x(int64_t q1, int64_t q0)
{
union { int64_t as64; int32_t as32[2]; } x0, x1;
x0.as64 = q0; x1.as64 = q1;
union {
int64_t as64;
int32_t as32[2];
} x0, x1;
x0.as64 = q0;
x1.as64 = q1;
return _mm_set_epi32(x1.as32[1], x1.as32[0], x0.as32[1], x0.as32[0]);
}
#endif
@ -39,7 +43,10 @@ sodium__mm_set1_epi64x(int64_t q)
static inline __m128i
sodium__mm_cvtsi64_si128(int64_t q)
{
union { int64_t as64; int32_t as32[2]; } x;
union {
int64_t as64;
int32_t as32[2];
} x;
x.as64 = q;
return _mm_setr_epi32(x.as32[0], x.as32[1], 0, 0);
}

@ -8,11 +8,13 @@
#include "randombytes.h"
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
SODIUM_EXPORT
extern struct randombytes_implementation randombytes_nativeclient_implementation;
extern struct randombytes_implementation
randombytes_nativeclient_implementation;
#ifdef __cplusplus
}

@ -5,45 +5,59 @@
#include "export.h"
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
SODIUM_EXPORT_WEAK
int sodium_runtime_has_neon(void);
int
sodium_runtime_has_neon(void);
SODIUM_EXPORT_WEAK
int sodium_runtime_has_sse2(void);
int
sodium_runtime_has_sse2(void);
SODIUM_EXPORT_WEAK
int sodium_runtime_has_sse3(void);
int
sodium_runtime_has_sse3(void);
SODIUM_EXPORT_WEAK
int sodium_runtime_has_ssse3(void);
int
sodium_runtime_has_ssse3(void);
SODIUM_EXPORT_WEAK
int sodium_runtime_has_sse41(void);
int
sodium_runtime_has_sse41(void);
SODIUM_EXPORT_WEAK
int sodium_runtime_has_avx(void);
int
sodium_runtime_has_avx(void);
SODIUM_EXPORT_WEAK
int sodium_runtime_has_avx2(void);
int
sodium_runtime_has_avx2(void);
SODIUM_EXPORT_WEAK
int sodium_runtime_has_avx512f(void);
int
sodium_runtime_has_avx512f(void);
SODIUM_EXPORT_WEAK
int sodium_runtime_has_pclmul(void);
int
sodium_runtime_has_pclmul(void);
SODIUM_EXPORT_WEAK
int sodium_runtime_has_aesni(void);
int
sodium_runtime_has_aesni(void);
SODIUM_EXPORT_WEAK
int sodium_runtime_has_rdrand(void);
int
sodium_runtime_has_rdrand(void);
/* ------------------------------------------------------------------------- */
/* -------------------------------------------------------------------------
*/
int _sodium_runtime_get_cpu_features(void);
int
_sodium_runtime_get_cpu_features(void);
#ifdef __cplusplus
}

@ -7,11 +7,13 @@
#include "export.h"
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
#ifndef SODIUM_C99
# if defined(__cplusplus) || !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L
#if defined(__cplusplus) || !defined(__STDC_VERSION__) \
|| __STDC_VERSION__ < 199901L
#define SODIUM_C99(X)
#else
#define SODIUM_C99(X) X
@ -19,10 +21,12 @@ extern "C" {
#endif
SODIUM_EXPORT
void sodium_memzero(void * const pnt, const size_t len);
void
sodium_memzero(void *const pnt, const size_t len);
SODIUM_EXPORT
void sodium_stackzero(const size_t len);
void
sodium_stackzero(const size_t len);
/*
* WARNING: sodium_memcmp() must be used to verify if two secret keys
@ -31,35 +35,41 @@ void sodium_stackzero(const size_t len);
* This function is not designed for lexicographical comparisons.
*/
SODIUM_EXPORT
int sodium_memcmp(const void * const b1_, const void * const b2_, size_t len)
int
sodium_memcmp(const void *const b1_, const void *const b2_, size_t len)
__attribute__((warn_unused_result));
/*
* sodium_compare() returns -1 if b1_ < b2_, 1 if b1_ > b2_ and 0 if b1_ == b2_
* It is suitable for lexicographical comparisons, or to compare nonces
* sodium_compare() returns -1 if b1_ < b2_, 1 if b1_ > b2_ and 0 if b1_ ==
* b2_ It is suitable for lexicographical comparisons, or to compare nonces
* and counters stored in little-endian format.
* However, it is slower than sodium_memcmp().
*/
SODIUM_EXPORT
int sodium_compare(const unsigned char *b1_, const unsigned char *b2_,
size_t len)
int
sodium_compare(const unsigned char *b1_, const unsigned char *b2_, size_t len)
__attribute__((warn_unused_result));
SODIUM_EXPORT
int sodium_is_zero(const unsigned char *n, const size_t nlen);
int
sodium_is_zero(const unsigned char *n, const size_t nlen);
SODIUM_EXPORT
void sodium_increment(unsigned char *n, const size_t nlen);
void
sodium_increment(unsigned char *n, const size_t nlen);
SODIUM_EXPORT
void sodium_add(unsigned char *a, const unsigned char *b, const size_t len);
void
sodium_add(unsigned char *a, const unsigned char *b, const size_t len);
SODIUM_EXPORT
char *sodium_bin2hex(char * const hex, const size_t hex_maxlen,
char *
sodium_bin2hex(char *const hex, const size_t hex_maxlen,
const unsigned char *const bin, const size_t bin_len);
SODIUM_EXPORT
int sodium_hex2bin(unsigned char * const bin, const size_t bin_maxlen,
int
sodium_hex2bin(unsigned char *const bin, const size_t bin_maxlen,
const char *const hex, const size_t hex_len,
const char *const ignore, size_t *const bin_len,
const char **const hex_end);
@ -74,29 +84,39 @@ int sodium_hex2bin(unsigned char * const bin, const size_t bin_maxlen,
* using the given variant. The computed length includes a trailing \0.
*/
#define sodium_base64_ENCODED_LEN(BIN_LEN, VARIANT) \
(((BIN_LEN) / 3U) * 4U + \
((((BIN_LEN) - ((BIN_LEN) / 3U) * 3U) | (((BIN_LEN) - ((BIN_LEN) / 3U) * 3U) >> 1)) & 1U) * \
(4U - (~((((VARIANT) & 2U) >> 1) - 1U) & (3U - ((BIN_LEN) - ((BIN_LEN) / 3U) * 3U)))) + 1U)
(((BIN_LEN) / 3U) * 4U \
+ ((((BIN_LEN) - ((BIN_LEN) / 3U) * 3U) \
| (((BIN_LEN) - ((BIN_LEN) / 3U) * 3U) >> 1)) \
& 1U) \
* (4U \
- (~((((VARIANT)&2U) >> 1) - 1U) \
& (3U - ((BIN_LEN) - ((BIN_LEN) / 3U) * 3U)))) \
+ 1U)
SODIUM_EXPORT
size_t sodium_base64_encoded_len(const size_t bin_len, const int variant);
size_t
sodium_base64_encoded_len(const size_t bin_len, const int variant);
SODIUM_EXPORT
char *sodium_bin2base64(char * const b64, const size_t b64_maxlen,
char *
sodium_bin2base64(char *const b64, const size_t b64_maxlen,
const unsigned char *const bin, const size_t bin_len,
const int variant);
SODIUM_EXPORT
int sodium_base642bin(unsigned char * const bin, const size_t bin_maxlen,
int
sodium_base642bin(unsigned char *const bin, const size_t bin_maxlen,
const char *const b64, const size_t b64_len,
const char *const ignore, size_t *const bin_len,
const char **const b64_end, const int variant);
SODIUM_EXPORT
int sodium_mlock(void * const addr, const size_t len);
int
sodium_mlock(void *const addr, const size_t len);
SODIUM_EXPORT
int sodium_munlock(void * const addr, const size_t len);
int
sodium_munlock(void *const addr, const size_t len);
/* WARNING: sodium_malloc() and sodium_allocarray() are not general-purpose
* allocation functions.
@ -110,9 +130,9 @@ int sodium_munlock(void * const addr, const size_t len);
* region may also kill the process if a buffer underflow is detected.
*
* The memory layout is:
* [unprotected region size (read only)][guard page (no access)][unprotected pages (read/write)][guard page (no access)]
* With the layout of the unprotected pages being:
* [optional padding][16-bytes canary][user region]
* [unprotected region size (read only)][guard page (no access)][unprotected
* pages (read/write)][guard page (no access)] With the layout of the
* unprotected pages being: [optional padding][16-bytes canary][user region]
*
* However:
* - These functions are significantly slower than standard functions
@ -132,36 +152,43 @@ int sodium_munlock(void * const addr, const size_t len);
*/
SODIUM_EXPORT
void *sodium_malloc(const size_t size)
__attribute__ ((malloc));
void *
sodium_malloc(const size_t size) __attribute__((malloc));
SODIUM_EXPORT
void *sodium_allocarray(size_t count, size_t size)
__attribute__ ((malloc));
void *
sodium_allocarray(size_t count, size_t size) __attribute__((malloc));
SODIUM_EXPORT
void sodium_free(void *ptr);
void
sodium_free(void *ptr);
SODIUM_EXPORT
int sodium_mprotect_noaccess(void *ptr);
int
sodium_mprotect_noaccess(void *ptr);
SODIUM_EXPORT
int sodium_mprotect_readonly(void *ptr);
int
sodium_mprotect_readonly(void *ptr);
SODIUM_EXPORT
int sodium_mprotect_readwrite(void *ptr);
int
sodium_mprotect_readwrite(void *ptr);
SODIUM_EXPORT
int sodium_pad(size_t *padded_buflen_p, unsigned char *buf,
int
sodium_pad(size_t *padded_buflen_p, unsigned char *buf,
size_t unpadded_buflen, size_t blocksize, size_t max_buflen);
SODIUM_EXPORT
int sodium_unpad(size_t *unpadded_buflen_p, const unsigned char *buf,
int
sodium_unpad(size_t *unpadded_buflen_p, const unsigned char *buf,
size_t padded_buflen, size_t blocksize);
/* -------- */
int _sodium_alloc_init(void);
int
_sodium_alloc_init(void);
#ifdef __cplusplus
}

@ -4,14 +4,19 @@
typedef crypto_int32 int32;
static inline void minmax(int32 *x,int32 *y)
static inline void
minmax(int32 *x, int32 *y)
{
asm("movl (%0),%%eax;movl (%1),%%ebx;cmpl %%ebx,%%eax;mov %%eax,%%edx;cmovg %%ebx,%%eax;cmovg %%edx,%%ebx;movl %%eax,(%0);movl %%ebx,(%1)"
: : "r"(x),"r"(y) : "%eax","%ebx","%edx");
asm("movl (%0),%%eax;movl (%1),%%ebx;cmpl %%ebx,%%eax;mov %%eax,%%edx;cmovg "
"%%ebx,%%eax;cmovg %%edx,%%ebx;movl %%eax,(%0);movl %%ebx,(%1)"
:
: "r"(x), "r"(y)
: "%eax", "%ebx", "%edx");
}
/* sort x0,x2; sort x1,x3; ... sort x13, x15 */
static inline void minmax02through1315(int32 *x)
static inline void
minmax02through1315(int32 *x)
{
__m256i a = _mm256_loadu_si256((__m256i *)x);
__m256i b = _mm256_loadu_si256((__m256i *)(x + 8));
@ -26,7 +31,8 @@ static inline void minmax02through1315(int32 *x)
}
/* sort x0,x2; sort x1,x3; sort x4,x6; sort x5,x7 */
static inline void minmax02134657(int32 *x)
static inline void
minmax02134657(int32 *x)
{
__m256i a = _mm256_loadu_si256((__m256i *)x);
__m256i b = _mm256_shuffle_epi32(a, 0x4e);
@ -37,53 +43,59 @@ static inline void minmax02134657(int32 *x)
_mm256_storeu_si256((__m256i *)x, a);
}
static void multiminmax2plus2(
int32 *x,
int n)
static void
multiminmax2plus2(int32 *x, int n)
{
while(n >= 16)
{
while (n >= 16) {
minmax02through1315(x);
n -= 16;
x += 16;
}
if (n >= 8) {
if(n >= 8)
{
minmax02134657(x);
n -= 8;
x += 8;
}
if (n >= 4) {
if(n >= 4)
{
minmax(x, x + 2);
minmax(x + 1, x + 3);
n -= 4;
x += 4;
}
if (n > 0) {
if(n > 0)
{
minmax(x, x + 2);
if (n > 1) minmax(x + 1,x + 3);
if(n > 1)
minmax(x + 1, x + 3);
}
}
static void multiminmax2plus6(
int32 *x,
int n)
static void
multiminmax2plus6(int32 *x, int n)
{
while(n >= 4)
{
while (n >= 4) {
minmax(x, x + 6);
minmax(x + 1, x + 7);
n -= 4;
x += 4;
}
if (n > 0) {
if(n > 0)
{
minmax(x, x + 6);
if (n > 1) minmax(x + 1,x + 7);
if(n > 1)
minmax(x + 1, x + 7);
}
}
static void multiminmax2plus14(
int32 *x,
int n)
static void
multiminmax2plus14(int32 *x, int n)
{
while(n >= 8)
{
while (n >= 8) {
minmax(x, x + 14);
minmax(x + 1, x + 15);
minmax(x + 4, x + 18);
@ -91,21 +103,25 @@ static void multiminmax2plus14(
n -= 8;
x += 8;
}
if (n >= 4) {
if(n >= 4)
{
minmax(x, x + 14);
minmax(x + 1, x + 15);
n -= 4;
x += 4;
}
if (n > 0) {
if(n > 0)
{
minmax(x, x + 14);
if (n > 1) minmax(x + 1,x + 15);
if(n > 1)
minmax(x + 1, x + 15);
}
}
/* sort x[i],y[i] for i in 0,1,4,5,8,9,12,13 */
/* all of x0...x15 and y0...y15 must exist; no aliasing */
static inline void minmax0145891213(int32 *x,int32 *y)
static inline void
minmax0145891213(int32 *x, int32 *y)
{
__m256i a01234567 = _mm256_loadu_si256((__m256i *)x);
__m256i a89101112131415 = _mm256_loadu_si256((__m256i *)(x + 8));
@ -129,17 +145,17 @@ static inline void minmax0145891213(int32 *x,int32 *y)
}
/* offset >= 30 */
static void multiminmax2plusmore(
int32 *x,
int n,
int offset)
static void
multiminmax2plusmore(int32 *x, int n, int offset)
{
while(n >= 16)
{
while (n >= 16) {
minmax0145891213(x, x + offset);
n -= 16;
x += 16;
}
if (n >= 8) {
if(n >= 8)
{
minmax(x, x + offset);
minmax(x + 1, x + 1 + offset);
minmax(x + 4, x + 4 + offset);
@ -147,20 +163,24 @@ static void multiminmax2plusmore(
n -= 8;
x += 8;
}
if (n >= 4) {
if(n >= 4)
{
minmax(x, x + offset);
minmax(x + 1, x + 1 + offset);
n -= 4;
x += 4;
}
if (n > 0) {
if(n > 0)
{
minmax(x, x + offset);
if (n > 1) minmax(x + 1,x + 1 + offset);
if(n > 1)
minmax(x + 1, x + 1 + offset);
}
}
/* sort x0,x1; ... sort x14, x15 */
static inline void minmax01through1415(int32 *x)
static inline void
minmax01through1415(int32 *x)
{
__m256i a = _mm256_loadu_si256((__m256i *)x);
__m256i b = _mm256_loadu_si256((__m256i *)(x + 8));
@ -177,7 +197,8 @@ static inline void minmax01through1415(int32 *x)
}
/* sort x0,x1; sort x2,x3; sort x4,x5; sort x6,x7 */
static inline void minmax01234567(int32 *x)
static inline void
minmax01234567(int32 *x)
{
__m256i a = _mm256_loadu_si256((__m256i *)x);
__m256i b = _mm256_shuffle_epi32(a, 0xb1);
@ -188,27 +209,30 @@ static inline void minmax01234567(int32 *x)
_mm256_storeu_si256((__m256i *)x, a);
}
static void multiminmax1plus1(
int32 *x,
int n)
static void
multiminmax1plus1(int32 *x, int n)
{
while(n >= 16)
{
while (n >= 16) {
minmax01through1415(x);
n -= 16;
x += 16;
}
if (n >= 8) {
if(n >= 8)
{
minmax01234567(x);
n -= 8;
x += 8;
}
if (n >= 4) {
if(n >= 4)
{
minmax(x, x + 1);
minmax(x + 2, x + 3);
n -= 4;
x += 4;
}
if (n >= 2) {
if(n >= 2)
{
minmax(x, x + 1);
n -= 2;
x += 2;
@ -217,12 +241,11 @@ static void multiminmax1plus1(
minmax(x, x + 1);
}
static void multiminmax1(
int32 *x,
int n,
int offset)
static void
multiminmax1(int32 *x, int n, int offset)
{
while(n >= 16)
{
while (n >= 16) {
minmax(x, x + offset);
minmax(x + 2, x + 2 + offset);
minmax(x + 4, x + 4 + offset);
@ -234,7 +257,8 @@ static void multiminmax1(
n -= 16;
x += 16;
}
if (n >= 8) {
if(n >= 8)
{
minmax(x, x + offset);
minmax(x + 2, x + 2 + offset);
minmax(x + 4, x + 4 + offset);
@ -242,13 +266,15 @@ static void multiminmax1(
n -= 8;
x += 8;
}
if (n >= 4) {
if(n >= 4)
{
minmax(x, x + offset);
minmax(x + 2, x + 2 + offset);
n -= 4;
x += 4;
}
if (n >= 2) {
if(n >= 2)
{
minmax(x, x + offset);
n -= 2;
x += 2;
@ -259,7 +285,8 @@ static void multiminmax1(
/* sort x[i],y[i] for i in 0,2,4,6,8,10,12,14 */
/* all of x0...x15 and y0...y15 must exist; no aliasing */
static inline void minmax02468101214(int32 *x,int32 *y)
static inline void
minmax02468101214(int32 *x, int32 *y)
{
__m256i a01234567 = _mm256_loadu_si256((__m256i *)x);
__m256i a89101112131415 = _mm256_loadu_si256((__m256i *)(x + 8));
@ -291,17 +318,17 @@ static inline void minmax02468101214(int32 *x,int32 *y)
}
/* assumes offset >= 31 */
static void multiminmax1plusmore(
int32 *x,
int n,
int offset)
static void
multiminmax1plusmore(int32 *x, int n, int offset)
{
while(n >= 16)
{
while (n >= 16) {
minmax02468101214(x, x + offset);
n -= 16;
x += 16;
}
if (n >= 8) {
if(n >= 8)
{
minmax(x, x + offset);
minmax(x + 2, x + 2 + offset);
minmax(x + 4, x + 4 + offset);
@ -309,13 +336,15 @@ static void multiminmax1plusmore(
n -= 8;
x += 8;
}
if (n >= 4) {
if(n >= 4)
{
minmax(x, x + offset);
minmax(x + 2, x + 2 + offset);
n -= 4;
x += 4;
}
if (n >= 2) {
if(n >= 2)
{
minmax(x, x + offset);
n -= 2;
x += 2;
@ -325,7 +354,8 @@ static void multiminmax1plusmore(
}
/* sort x0,y0; sort x1,y1; ...; sort x7,y7 */
static inline void minmax8(int32 *x,int32 *y)
static inline void
minmax8(int32 *x, int32 *y)
{
__m256i a = _mm256_loadu_si256((__m256i *)x);
__m256i b = _mm256_loadu_si256((__m256i *)y);
@ -334,30 +364,34 @@ static inline void minmax8(int32 *x,int32 *y)
}
/* assumes p >= 8; implies offset >= 8 */
static void multiminmax_atleast8(int p,
int32 *x,
int n,
int offset)
static void
multiminmax_atleast8(int p, int32 *x, int n, int offset)
{
int i;
while (n >= 2 * p) {
while(n >= 2 * p)
{
for(i = 0; i < p; i += 8)
minmax8(x + i, x + i + offset);
n -= 2 * p;
x += 2 * p;
}
for (i = 0;i + 8 <= n;i += 8) {
if (i & p) return;
for(i = 0; i + 8 <= n; i += 8)
{
if(i & p)
return;
minmax8(x + i, x + i + offset);
}
for (;i < n;++i) {
if (i & p) return;
for(; i < n; ++i)
{
if(i & p)
return;
minmax(x + i, x + i + offset);
}
}
/* sort x0,y0; sort x1,y1; sort x2,y2; sort x3,y3 */
static inline void minmax4(int32 *x,int32 *y)
static inline void
minmax4(int32 *x, int32 *y)
{
__m128i a = _mm_loadu_si128((__m128i *)x);
__m128i b = _mm_loadu_si128((__m128i *)y);
@ -365,13 +399,12 @@ static inline void minmax4(int32 *x,int32 *y)
_mm_storeu_si128((__m128i *)y, _mm_max_epi32(a, b));
}
static void multiminmax4(
int32 *x,
int n,
int offset)
static void
multiminmax4(int32 *x, int n, int offset)
{
int i;
while (n >= 8) {
while(n >= 8)
{
minmax4(x, x + offset);
n -= 8;
x += 8;
@ -383,25 +416,31 @@ static void multiminmax4(
minmax(x + i, x + i + offset);
}
void int32_sort(int32 *x,int n)
void
int32_sort(int32 *x, int n)
{
int top, p, q;
if (n < 2) return;
if(n < 2)
return;
top = 1;
while (top < n - top) top += top;
while(top < n - top)
top += top;
for (p = top;p >= 8;p >>= 1) {
for(p = top; p >= 8; p >>= 1)
{
multiminmax_atleast8(p, x, n - p, p);
for(q = top; q > p; q >>= 1)
multiminmax_atleast8(p, x + p, n - q, q - p);
}
if (p >= 4) {
if(p >= 4)
{
multiminmax4(x, n - 4, 4);
for(q = top; q > 4; q >>= 1)
multiminmax4(x + 4, n - q, q - 4);
}
if (p >= 2) {
if(p >= 2)
{
multiminmax2plus2(x, n - 2);
for(q = top; q >= 32; q >>= 1)
multiminmax2plusmore(x + 2, n - q, q - 2);

@ -4,12 +4,15 @@
#include "small.h"
#define r3_mult crypto_kem_sntrup4591761_avx_r3_mult
extern void r3_mult(small *,const small *,const small *);
extern void
r3_mult(small *, const small *, const small *);
#define r3_recip crypto_kem_sntrup4591761_avx_r3_recip
extern int r3_recip(small *,const small *);
extern int
r3_recip(small *, const small *);
#define r3_weightw_mask crypto_kem_sntrup4591761_avx_r3_weightw_mask
extern int r3_weightw_mask(const small *);
extern int
r3_weightw_mask(const small *);
#endif

@ -6,12 +6,14 @@
#include "r3.h"
/* caller must ensure that x-y does not overflow */
static int smaller_mask(int x,int y)
static int
smaller_mask(int x, int y)
{
return (x - y) >> 31;
}
static void vectormod3_product(small *z,int len,const small *x,const small c)
static void
vectormod3_product(small *z, int len, const small *x, const small c)
{
int i;
int minusmask = c;
@ -24,7 +26,8 @@ static void vectormod3_product(small *z,int len,const small *x,const small c)
plusvec = _mm256_set1_epi32(plusmask);
zerovec = _mm256_set1_epi32(0);
while (len >= 32) {
while(len >= 32)
{
__m256i xi = _mm256_loadu_si256((__m256i *)x);
xi = (xi & plusvec) | (_mm256_sub_epi8(zerovec, xi) & minusvec);
_mm256_storeu_si256((__m256i *)z, xi);
@ -33,10 +36,13 @@ static void vectormod3_product(small *z,int len,const small *x,const small c)
len -= 32;
}
for (i = 0;i < len;++i) z[i] = mod3_product(x[i],c);
for(i = 0; i < len; ++i)
z[i] = mod3_product(x[i], c);
}
static void vectormod3_minusproduct(small *z,int len,const small *x,const small *y,const small c)
static void
vectormod3_minusproduct(small *z, int len, const small *x, const small *y,
const small c)
{
int i;
int minusmask = c;
@ -51,7 +57,8 @@ static void vectormod3_minusproduct(small *z,int len,const small *x,const small
twovec = _mm256_set1_epi32(0x02020202);
fourvec = _mm256_set1_epi32(0x04040404);
while (len >= 32) {
while(len >= 32)
{
__m256i xi = _mm256_loadu_si256((__m256i *)x);
__m256i yi = _mm256_loadu_si256((__m256i *)y);
__m256i r;
@ -79,18 +86,22 @@ static void vectormod3_minusproduct(small *z,int len,const small *x,const small
len -= 32;
}
for (i = 0;i < len;++i) z[i] = mod3_minusproduct(x[i],y[i],c);
for(i = 0; i < len; ++i)
z[i] = mod3_minusproduct(x[i], y[i], c);
}
static void vectormod3_shift(small *z,int len)
static void
vectormod3_shift(small *z, int len)
{
int i;
while (len >= 33) {
while(len >= 33)
{
__m256i zi = _mm256_loadu_si256((__m256i *)(z + len - 33));
_mm256_storeu_si256((__m256i *)(z + len - 32), zi);
len -= 32;
}
for (i = len - 1;i > 0;--i) z[i] = z[i - 1];
for(i = len - 1; i > 0; --i)
z[i] = z[i - 1];
z[0] = 0;
}
@ -100,7 +111,8 @@ or returning -1 if s is not invertible mod m
r,s are polys of degree <p
m is x^p-x-1
*/
int r3_recip(small *r,const small *s)
int
r3_recip(small *r, const small *s)
{
const int loops = 2 * p + 1;
int loop;
@ -114,23 +126,28 @@ int r3_recip(small *r,const small *s)
int e = p;
int swapmask;
for (i = 2;i < p;++i) f[i] = 0;
for(i = 2; i < p; ++i)
f[i] = 0;
f[0] = -1;
f[1] = -1;
f[p] = 1;
/* generalization: can initialize f to any polynomial m */
/* requirements: m has degree exactly p, nonzero constant coefficient */
for (i = 0;i < p;++i) g[i] = s[i];
for(i = 0; i < p; ++i)
g[i] = s[i];
g[p] = 0;
for (i = 0;i <= loops;++i) u[i] = 0;
for(i = 0; i <= loops; ++i)
u[i] = 0;
v[0] = 1;
for (i = 1;i <= loops;++i) v[i] = 0;
for(i = 1; i <= loops; ++i)
v[i] = 0;
loop = 0;
for (;;) {
for(;;)
{
/* e == -1 or d + e + loop <= 2*p */
/* f has degree p: i.e., f[p]!=0 */
@ -141,13 +158,15 @@ int r3_recip(small *r,const small *s)
/* u has degree <=loop (so it fits in loop+1 coefficients) */
/* u[i]==0 for i < p-d */
/* if invertible: u[i]==0 for i < loop-p (so can look at just p+1 coefficients) */
/* if invertible: u[i]==0 for i < loop-p (so can look at just p+1
* coefficients) */
/* v has degree <=loop (so it fits in loop+1 coefficients) */
/* v[i]==0 for i < p-e */
/* v[i]==0 for i < loop-p (so can look at just p+1 coefficients) */
if (loop >= loops) break;
if(loop >= loops)
break;
c = mod3_quotient(g[p], f[p]);
@ -158,11 +177,15 @@ int r3_recip(small *r,const small *s)
vectormod3_minusproduct(v, 1536, v, u, c);
vectormod3_shift(v, 1537);
#else
if (loop < p) {
if(loop < p)
{
vectormod3_minusproduct(v, loop + 1, v, u, c);
vectormod3_shift(v, loop + 2);
} else {
vectormod3_minusproduct(v + loop - p,p + 1,v + loop - p,u + loop - p,c);
}
else
{
vectormod3_minusproduct(v + loop - p, p + 1, v + loop - p, u + loop - p,
c);
vectormod3_shift(v + loop - p, p + 2);
}
#endif
@ -178,9 +201,12 @@ int r3_recip(small *r,const small *s)
#ifdef SIMPLER
swap(u, v, 1536 * sizeof(small), swapmask);
#else
if (loop < p) {
if(loop < p)
{
swap(u, v, (loop + 1) * sizeof(small), swapmask);
} else {
}
else
{
swap(u + loop - p, v + loop - p, (p + 1) * sizeof(small), swapmask);
}
#endif
@ -188,7 +214,8 @@ int r3_recip(small *r,const small *s)
c = mod3_reciprocal(f[p]);
vectormod3_product(r, p, u + p, c);
for (i = p;i < 768;++i) r[i] = 0;
for(i = p; i < 768; ++i)
r[i] = 0;
return smaller_mask(0, d);
}
#endif

@ -5,27 +5,35 @@
#include "small.h"
#define rq_encode crypto_kem_sntrup4591761_avx_rq_encode
extern void rq_encode(unsigned char *,const modq *);
extern void
rq_encode(unsigned char *, const modq *);
#define rq_decode crypto_kem_sntrup4591761_avx_rq_decode
extern void rq_decode(modq *,const unsigned char *);
extern void
rq_decode(modq *, const unsigned char *);
#define rq_roundencode crypto_kem_sntrup4591761_avx_rq_roundencode
extern void rq_roundencode(unsigned char *,const modq *);
extern void
rq_roundencode(unsigned char *, const modq *);
#define rq_decoderounded crypto_kem_sntrup4591761_avx_rq_decoderounded
extern void rq_decoderounded(modq *,const unsigned char *);
extern void
rq_decoderounded(modq *, const unsigned char *);
#define rq_round3 crypto_kem_sntrup4591761_avx_rq_round3
extern void rq_round3(modq *,const modq *);
extern void
rq_round3(modq *, const modq *);
#define rq_mod3 crypto_kem_sntrup4591761_avx_rq_mod3
extern void rq_mod3(small *,const modq *);
extern void
rq_mod3(small *, const modq *);
#define rq_mult crypto_kem_sntrup4591761_avx_rq_mult
extern void rq_mult(modq *,const modq *,const small *);
extern void
rq_mult(modq *, const modq *, const small *);
#define rq_recip3 crypto_kem_sntrup4591761_avx_rq_recip3
int rq_recip3(modq *,const small *);
int
rq_recip3(modq *, const small *);
#endif

@ -12,18 +12,23 @@
// 32-bit hosts only
#ifndef __amd64__
#define _mm_extract_epi64(X, N) (__extension__ ({ __v2di __a = (__v2di)(X); \
__a[N];}))
#define _mm_extract_epi64(X, N) \
(__extension__({ \
__v2di __a = (__v2di)(X); \
__a[N]; \
}))
#endif
static inline __m256i squeeze(__m256i x)
static inline __m256i
squeeze(__m256i x)
{
__m256i q = _mm256_mulhrs_epi16(x, v7);
q = _mm256_mullo_epi16(q, v4591_16);
return _mm256_sub_epi16(x, q);
}
static inline __m256i freeze(__m256i x)
static inline __m256i
freeze(__m256i x)
{
__m256i mask, x2296, x4591;
x4591 = _mm256_add_epi16(x, v4591_16);
@ -36,11 +41,13 @@ static inline __m256i freeze(__m256i x)
return x;
}
void rq_mod3(small *g,const modq *f)
void
rq_mod3(small *g, const modq *f)
{
int i;
for (i = 0;i < 768;i += 16) {
for(i = 0; i < 768; i += 16)
{
__m256i x = _mm256_loadu_si256((__m256i *)&f[i]);
__m256i q;
x = _mm256_mullo_epi16(x, v3);
@ -50,9 +57,12 @@ void rq_mod3(small *g,const modq *f)
x = _mm256_sub_epi16(x, q);
q = _mm256_add_epi16(q, q);
x = _mm256_sub_epi16(x, q); /* g0 g1 ... g15 */
x = _mm256_packs_epi16(x,x); /* g0 ... g7 g0 ... g7 g8 ... g15 g8 ... g15 */
0[(long long *) &g[i]] = _mm_extract_epi64(_mm256_extracti128_si256(x,0),0);
1[(long long *) &g[i]] = _mm_extract_epi64(_mm256_extracti128_si256(x,1),0);
x = _mm256_packs_epi16(x,
x); /* g0 ... g7 g0 ... g7 g8 ... g15 g8 ... g15 */
0 [(long long *)&g[i]] =
_mm_extract_epi64(_mm256_extracti128_si256(x, 0), 0);
1 [(long long *)&g[i]] =
_mm_extract_epi64(_mm256_extracti128_si256(x, 1), 0);
}
}
#endif

@ -10,12 +10,14 @@
#define v29234_16 _mm256_set1_epi16(29234)
/* caller must ensure that x-y does not overflow */
static int smaller_mask(int x,int y)
static int
smaller_mask(int x, int y)
{
return (x - y) >> 31;
}
static inline __m256i product(__m256i x,__m256i y)
static inline __m256i
product(__m256i x, __m256i y)
{
__m256i lo, hi, r0, r1, t0, t1, t, s0, s1;
@ -52,7 +54,8 @@ static inline __m256i product(__m256i x,__m256i y)
return x;
}
static inline __m256i minusproduct(__m256i x,__m256i y,__m256i z)
static inline __m256i
minusproduct(__m256i x, __m256i y, __m256i z)
{
__m256i t;
@ -63,10 +66,12 @@ static inline __m256i minusproduct(__m256i x,__m256i y,__m256i z)
return x;
}
static void vectormodq_product(modq *z,int len,const modq *x,const modq c)
static void
vectormodq_product(modq *z, int len, const modq *x, const modq c)
{
__m256i cvec = _mm256_set1_epi16(c);
while (len >= 16) {
while(len >= 16)
{
__m256i xi = _mm256_loadu_si256((__m256i *)x);
xi = product(xi, cvec);
_mm256_storeu_si256((__m256i *)z, xi);
@ -74,7 +79,8 @@ static void vectormodq_product(modq *z,int len,const modq *x,const modq c)
z += 16;
len -= 16;
}
while (len > 0) {
while(len > 0)
{
*z = modq_product(*x, c);
++x;
++z;
@ -82,10 +88,13 @@ static void vectormodq_product(modq *z,int len,const modq *x,const modq c)
}
}
static void vectormodq_minusproduct(modq *z,int len,const modq *x,const modq *y,const modq c)
static void
vectormodq_minusproduct(modq *z, int len, const modq *x, const modq *y,
const modq c)
{
__m256i cvec = _mm256_set1_epi16(c);
while (len >= 16) {
while(len >= 16)
{
__m256i xi = _mm256_loadu_si256((__m256i *)x);
__m256i yi = _mm256_loadu_si256((__m256i *)y);
xi = minusproduct(xi, yi, cvec);
@ -95,7 +104,8 @@ static void vectormodq_minusproduct(modq *z,int len,const modq *x,const modq *y,
z += 16;
len -= 16;
}
while (len > 0) {
while(len > 0)
{
*z = modq_minusproduct(*x, *y, c);
++x;
++y;
@ -104,15 +114,18 @@ static void vectormodq_minusproduct(modq *z,int len,const modq *x,const modq *y,
}
}
static void vectormodq_shift(modq *z,int len)
static void
vectormodq_shift(modq *z, int len)
{
int i;
while (len >= 17) {
while(len >= 17)
{
__m256i zi = _mm256_loadu_si256((__m256i *)(z + len - 17));
_mm256_storeu_si256((__m256i *)(z + len - 16), zi);
len -= 16;
}
for (i = len - 1;i > 0;--i) z[i] = z[i - 1];
for(i = len - 1; i > 0; --i)
z[i] = z[i - 1];
z[0] = 0;
}
@ -122,7 +135,8 @@ or returning -1 if s is not invertible mod m
r,s are polys of degree <p
m is x^p-x-1
*/
int rq_recip3(modq *r,const small *s)
int
rq_recip3(modq *r, const small *s)
{
const int loops = 2 * p + 1;
int loop;
@ -136,23 +150,28 @@ int rq_recip3(modq *r,const small *s)
int e = p;
int swapmask;
for (i = 2;i < p;++i) f[i] = 0;
for(i = 2; i < p; ++i)
f[i] = 0;
f[0] = -1;
f[1] = -1;
f[p] = 1;
/* generalization: can initialize f to any polynomial m */
/* requirements: m has degree exactly p, nonzero constant coefficient */
for (i = 0;i < p;++i) g[i] = 3 * s[i];
for(i = 0; i < p; ++i)
g[i] = 3 * s[i];
g[p] = 0;
for (i = 0;i <= loops;++i) u[i] = 0;
for(i = 0; i <= loops; ++i)
u[i] = 0;
v[0] = 1;
for (i = 1;i <= loops;++i) v[i] = 0;
for(i = 1; i <= loops; ++i)
v[i] = 0;
loop = 0;
for (;;) {
for(;;)
{
/* e == -1 or d + e + loop <= 2*p */
/* f has degree p: i.e., f[p]!=0 */
@ -163,13 +182,15 @@ int rq_recip3(modq *r,const small *s)
/* u has degree <=loop (so it fits in loop+1 coefficients) */
/* u[i]==0 for i < p-d */
/* if invertible: u[i]==0 for i < loop-p (so can look at just p+1 coefficients) */
/* if invertible: u[i]==0 for i < loop-p (so can look at just p+1
* coefficients) */
/* v has degree <=loop (so it fits in loop+1 coefficients) */
/* v[i]==0 for i < p-e */
/* v[i]==0 for i < loop-p (so can look at just p+1 coefficients) */
if (loop >= loops) break;
if(loop >= loops)
break;
c = modq_quotient(g[p], f[p]);
@ -180,11 +201,15 @@ int rq_recip3(modq *r,const small *s)
vectormodq_minusproduct(v, 1536, v, u, c);
vectormodq_shift(v, 1537);
#else
if (loop < p) {
if(loop < p)
{
vectormodq_minusproduct(v, loop + 1, v, u, c);
vectormodq_shift(v, loop + 2);
} else {
vectormodq_minusproduct(v + loop - p,p + 1,v + loop - p,u + loop - p,c);
}
else
{
vectormodq_minusproduct(v + loop - p, p + 1, v + loop - p, u + loop - p,
c);
vectormodq_shift(v + loop - p, p + 2);
}
#endif
@ -200,9 +225,12 @@ int rq_recip3(modq *r,const small *s)
#ifdef SIMPLER
swap(u, v, 1536 * sizeof(modq), swapmask);
#else
if (loop < p) {
if(loop < p)
{
swap(u, v, (loop + 1) * sizeof(modq), swapmask);
} else {
}
else
{
swap(u + loop - p, v + loop - p, (p + 1) * sizeof(modq), swapmask);
}
#endif
@ -210,8 +238,10 @@ int rq_recip3(modq *r,const small *s)
c = modq_reciprocal(f[p]);
vectormodq_product(r, p, u + p, c);
for (i = 0;i < p;++i) r[i] = modq_freeze(r[i]);
for (i = p;i < 768;++i) r[i] = 0;
for(i = 0; i < p; ++i)
r[i] = modq_freeze(r[i]);
for(i = p; i < 768; ++i)
r[i] = 0;
return smaller_mask(0, d);
}
#endif

@ -6,11 +6,13 @@
#define v3_16 _mm256_set1_epi16(3)
#define v10923_16 _mm256_set1_epi16(10923)
void rq_round3(modq *h,const modq *f)
void
rq_round3(modq *h, const modq *f)
{
int i;
for (i = 0;i < 768;i += 16) {
for(i = 0; i < 768; i += 16)
{
__m256i x = _mm256_loadu_si256((__m256i *)&f[i]);
__m256i x2;
x = _mm256_mulhrs_epi16(x, v10923_16);

@ -164,13 +164,15 @@ rq_decoderounded(modq *f, const unsigned char *c)
/* x is f0 + f1*1536 + f2*1536^2 */
/* with each f between 0 and 1530 */
f2 = x
f2 =
x
* _mm256_set1_pd(
0.00000042385525173611114052197733521876177320564238470979034900665283203125);
f2 = floor(f2);
x -= f2 * _mm256_set1_pd(2359296.0);
f1 = x
f1 =
x
* _mm256_set1_pd(
0.00065104166666666673894681149903362893383018672466278076171875);
f1 = floor(f1);
@ -178,17 +180,20 @@ rq_decoderounded(modq *f, const unsigned char *c)
f0 = x;
f2 -= _mm256_set1_pd(1531.0)
f2 -=
_mm256_set1_pd(1531.0)
* floor(
f2
* _mm256_set1_pd(
0.0006531678641410842804659875326933615724556148052215576171875));
f1 -= _mm256_set1_pd(1531.0)
f1 -=
_mm256_set1_pd(1531.0)
* floor(
f1
* _mm256_set1_pd(
0.0006531678641410842804659875326933615724556148052215576171875));
f0 -= _mm256_set1_pd(1531.0)
f0 -=
_mm256_set1_pd(1531.0)
* floor(
f0
* _mm256_set1_pd(

@ -2,12 +2,14 @@
#include <immintrin.h>
#include "swap.h"
void swap(void *x,void *y,int bytes,int mask)
void
swap(void *x, void *y, int bytes, int mask)
{
char c = mask;
__m256i maskvec = _mm256_set1_epi32(mask);
while (bytes >= 32) {
while(bytes >= 32)
{
__m256i xi = _mm256_loadu_si256(x);
__m256i yi = _mm256_loadu_si256(y);
__m256i xinew = _mm256_blendv_epi8(xi, yi, maskvec);
@ -18,7 +20,8 @@ void swap(void *x,void *y,int bytes,int mask)
y = 32 + (char *)y;
bytes -= 32;
}
while (bytes > 0) {
while(bytes > 0)
{
char xi = *(char *)x;
char yi = *(char *)y;
char t = c & (xi ^ yi);

@ -2,6 +2,7 @@
#define swap_h
#define swap crypto_kem_sntrup4591761_avx_swap
extern void swap(void *,void *,int,int);
extern void
swap(void *, void *, int, int);
#endif

@ -4,9 +4,11 @@
#include "small.h"
#define r3_mult crypto_kem_sntrup4591761_ref_r3_mult
extern void r3_mult(small *,const small *,const small *);
extern void
r3_mult(small *, const small *, const small *);
#define r3_recip crypto_kem_sntrup4591761_ref_r3_recip
extern int r3_recip(small *,const small *);
extern int
r3_recip(small *, const small *);
#endif

@ -2,26 +2,30 @@
#include "mod3.h"
#include "r3.h"
void r3_mult(small *h,const small *f,const small *g)
void
r3_mult(small *h, const small *f, const small *g)
{
small fg[p + p - 1];
small result;
int i, j;
for (i = 0;i < p;++i) {
for(i = 0; i < p; ++i)
{
result = 0;
for(j = 0; j <= i; ++j)
result = mod3_plusproduct(result, f[j], g[i - j]);
fg[i] = result;
}
for (i = p;i < p + p - 1;++i) {
for(i = p; i < p + p - 1; ++i)
{
result = 0;
for(j = i - p + 1; j < p; ++j)
result = mod3_plusproduct(result, f[j], g[i - j]);
fg[i] = result;
}
for (i = p + p - 2;i >= p;--i) {
for(i = p + p - 2; i >= p; --i)
{
fg[i - p] = mod3_sum(fg[i - p], fg[i]);
fg[i - p + 1] = mod3_sum(fg[i - p + 1], fg[i]);
}

@ -5,24 +5,31 @@
#include "small.h"
#define rq_encode crypto_kem_sntrup4591761_ref_rq_encode
extern void rq_encode(unsigned char *,const modq *);
extern void
rq_encode(unsigned char *, const modq *);
#define rq_decode crypto_kem_sntrup4591761_ref_rq_decode
extern void rq_decode(modq *,const unsigned char *);
extern void
rq_decode(modq *, const unsigned char *);
#define rq_encoderounded crypto_kem_sntrup4591761_ref_rq_encoderounded
extern void rq_encoderounded(unsigned char *,const modq *);
extern void
rq_encoderounded(unsigned char *, const modq *);
#define rq_decoderounded crypto_kem_sntrup4591761_ref_rq_decoderounded
extern void rq_decoderounded(modq *,const unsigned char *);
extern void
rq_decoderounded(modq *, const unsigned char *);
#define rq_round3 crypto_kem_sntrup4591761_ref_rq_round
extern void rq_round3(modq *,const modq *);
extern void
rq_round3(modq *, const modq *);
#define rq_mult crypto_kem_sntrup4591761_ref_rq_mult
extern void rq_mult(modq *,const modq *,const small *);
extern void
rq_mult(modq *, const modq *, const small *);
#define rq_recip3 crypto_kem_sntrup4591761_ref_rq_recip3
int rq_recip3(modq *,const small *);
int
rq_recip3(modq *, const small *);
#endif

@ -1,26 +1,30 @@
#include "params.h"
#include "rq.h"
void rq_mult(modq *h,const modq *f,const small *g)
void
rq_mult(modq *h, const modq *f, const small *g)
{
modq fg[p + p - 1];
modq result;
int i, j;
for (i = 0;i < p;++i) {
for(i = 0; i < p; ++i)
{
result = 0;
for(j = 0; j <= i; ++j)
result = modq_plusproduct(result, f[j], g[i - j]);
fg[i] = result;
}
for (i = p;i < p + p - 1;++i) {
for(i = p; i < p + p - 1; ++i)
{
result = 0;
for(j = i - p + 1; j < p; ++j)
result = modq_plusproduct(result, f[j], g[i - j]);
fg[i] = result;
}
for (i = p + p - 2;i >= p;--i) {
for(i = p + p - 2; i >= p; --i)
{
fg[i - p] = modq_sum(fg[i - p], fg[i]);
fg[i - p + 1] = modq_sum(fg[i - p + 1], fg[i]);
}

@ -1,7 +1,8 @@
#include "params.h"
#include "rq.h"
void rq_round3(modq *h,const modq *f)
void
rq_round3(modq *h, const modq *f)
{
int i;

@ -4,12 +4,14 @@
/* XXX: these functions rely on p mod 4 = 1 */
/* all coefficients in -1, 0, 1 */
void small_encode(unsigned char *c,const small *f)
void
small_encode(unsigned char *c, const small *f)
{
small c0;
int i;
for (i = 0;i < p/4;++i) {
for(i = 0; i < p / 4; ++i)
{
c0 = *f++ + 1;
c0 += (*f++ + 1) << 2;
c0 += (*f++ + 1) << 4;
@ -20,16 +22,21 @@ void small_encode(unsigned char *c,const small *f)
*c++ = c0;
}
void small_decode(small *f,const unsigned char *c)
void
small_decode(small *f, const unsigned char *c)
{
unsigned char c0;
int i;
for (i = 0;i < p/4;++i) {
for(i = 0; i < p / 4; ++i)
{
c0 = *c++;
*f++ = ((small) (c0 & 3)) - 1; c0 >>= 2;
*f++ = ((small) (c0 & 3)) - 1; c0 >>= 2;
*f++ = ((small) (c0 & 3)) - 1; c0 >>= 2;
*f++ = ((small)(c0 & 3)) - 1;
c0 >>= 2;
*f++ = ((small)(c0 & 3)) - 1;
c0 >>= 2;
*f++ = ((small)(c0 & 3)) - 1;
c0 >>= 2;
*f++ = ((small)(c0 & 3)) - 1;
}
c0 = *c++;

@ -1,13 +1,15 @@
#include "swap.h"
void swap(void *x,void *y,int bytes,int mask)
void
swap(void *x, void *y, int bytes, int mask)
{
int i;
char xi, yi, c, t;
c = mask;
for (i = 0;i < bytes;++i) {
for(i = 0; i < bytes; ++i)
{
xi = i[(char *)x];
yi = i[(char *)y];
t = c & (xi ^ yi);

@ -2,6 +2,7 @@
#define swap_h
#define swap crypto_kem_sntrup4591761_ref_swap
extern void swap(void *,void *,int,int);
extern void
swap(void *, void *, int, int);
#endif

@ -10,17 +10,16 @@ crypto_core_salsa(unsigned char *out, const unsigned char *in,
const unsigned char *k, const unsigned char *c,
const int rounds)
{
uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14,
x15;
uint32_t j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14,
j15;
uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
uint32_t j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
int i;
j0 = x0 = 0x61707865;
j5 = x5 = 0x3320646e;
j10 = x10 = 0x79622d32;
j15 = x15 = 0x6b206574;
if (c != NULL) {
if(c != NULL)
{
j0 = x0 = LOAD32_LE(c + 0);
j5 = x5 = LOAD32_LE(c + 4);
j10 = x10 = LOAD32_LE(c + 8);
@ -40,7 +39,8 @@ crypto_core_salsa(unsigned char *out, const unsigned char *in,
j8 = x8 = LOAD32_LE(in + 8);
j9 = x9 = LOAD32_LE(in + 12);
for (i = 0; i < rounds; i += 2) {
for(i = 0; i < rounds; i += 2)
{
x4 ^= ROTL32(x0 + x12, 7);
x8 ^= ROTL32(x4 + x0, 9);
x12 ^= ROTL32(x8 + x4, 13);

@ -13,7 +13,6 @@ Public domain.
#include "../stream_salsa20.h"
#include "salsa20_ref.h"
static int
stream_ref(unsigned char *c, unsigned long long clen, const unsigned char *n,
const unsigned char *k)
@ -132,4 +131,3 @@ struct crypto_stream_salsa20_implementation
SODIUM_C99(.stream =) stream_ref,
SODIUM_C99(.stream_xor_ic =) stream_ref_xor_ic,
};

@ -4,13 +4,13 @@
#include <stdint.h>
typedef struct crypto_stream_salsa20_implementation {
typedef struct crypto_stream_salsa20_implementation
{
int (*stream)(unsigned char *c, unsigned long long clen,
const unsigned char *n, const unsigned char *k);
int (*stream_xor_ic)(unsigned char *c, const unsigned char *m,
unsigned long long mlen,
const unsigned char *n, uint64_t ic,
const unsigned char *k);
unsigned long long mlen, const unsigned char *n,
uint64_t ic, const unsigned char *k);
} crypto_stream_salsa20_implementation;
#endif

@ -1,4 +1,5 @@
if (bytes > 0) {
if(bytes > 0)
{
__m128i diag0 = _mm_loadu_si128((__m128i *)(x + 0));
__m128i diag1 = _mm_loadu_si128((__m128i *)(x + 4));
__m128i diag2 = _mm_loadu_si128((__m128i *)(x + 8));
@ -10,7 +11,8 @@ if (bytes > 0) {
unsigned int i;
a0 = diag1;
for (i = 0; i < ROUNDS; i += 4) {
for(i = 0; i < ROUNDS; i += 4)
{
a0 = _mm_add_epi32(a0, diag0);
a1 = diag0;
b0 = a0;
@ -162,7 +164,8 @@ if (bytes > 0) {
diag3 = _mm_add_epi32(diag3, _mm_loadu_si128((__m128i *)(x + 12)));
#define ONEQUAD_SHUFFLE(A, B, C, D) \
do { \
do \
{ \
uint32_t in##A = _mm_cvtsi128_si32(diag0); \
uint32_t in##B = _mm_cvtsi128_si32(diag1); \
uint32_t in##C = _mm_cvtsi128_si32(diag2); \
@ -187,7 +190,8 @@ if (bytes > 0) {
#undef ONEQUAD
#undef ONEQUAD_SHUFFLE
for (i = 0; i < bytes; i++) {
for(i = 0; i < bytes; i++)
{
c[i] = m[i] ^ partialblock[i];
}

@ -1,4 +1,5 @@
while (bytes >= 64) {
while(bytes >= 64)
{
__m128i diag0 = _mm_loadu_si128((__m128i *)(x + 0));
__m128i diag1 = _mm_loadu_si128((__m128i *)(x + 4));
__m128i diag2 = _mm_loadu_si128((__m128i *)(x + 8));
@ -11,7 +12,8 @@ while (bytes >= 64) {
int i;
a0 = diag1;
for (i = 0; i < ROUNDS; i += 4) {
for(i = 0; i < ROUNDS; i += 4)
{
a0 = _mm_add_epi32(a0, diag0);
a1 = diag0;
b0 = a0;
@ -163,7 +165,8 @@ while (bytes >= 64) {
diag3 = _mm_add_epi32(diag3, _mm_loadu_si128((__m128i *)(x + 12)));
#define ONEQUAD_SHUFFLE(A, B, C, D) \
do { \
do \
{ \
uint32_t in##A = _mm_cvtsi128_si32(diag0); \
uint32_t in##B = _mm_cvtsi128_si32(diag1); \
uint32_t in##C = _mm_cvtsi128_si32(diag2); \
@ -195,7 +198,8 @@ while (bytes >= 64) {
in8 = x[8];
in9 = x[13];
in8++;
if (in8 == 0) {
if(in8 == 0)
{
in9++;
}
x[8] = in8;

@ -1,10 +1,9 @@
if (bytes >= 256) {
__m128i y0, y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12, y13, y14,
y15;
__m128i z0, z1, z2, z3, z4, z5, z6, z7, z8, z9, z10, z11, z12, z13, z14,
z15;
__m128i orig0, orig1, orig2, orig3, orig4, orig5, orig6, orig7, orig8,
orig9, orig10, orig11, orig12, orig13, orig14, orig15;
if(bytes >= 256)
{
__m128i y0, y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12, y13, y14, y15;
__m128i z0, z1, z2, z3, z4, z5, z6, z7, z8, z9, z10, z11, z12, z13, z14, z15;
__m128i orig0, orig1, orig2, orig3, orig4, orig5, orig6, orig7, orig8, orig9,
orig10, orig11, orig12, orig13, orig14, orig15;
uint32_t in8;
uint32_t in9;
@ -47,7 +46,8 @@ if (bytes >= 256) {
orig14 = z14;
orig15 = z15;
while (bytes >= 256) {
while(bytes >= 256)
{
/* vector implementation for z8 and z9 */
/* not sure if it helps for only 4 blocks */
const __m128i addv8 = _mm_set_epi64x(1, 0);
@ -96,11 +96,12 @@ if (bytes >= 256) {
z4 = orig4;
z8 = orig8;
for (i = 0; i < ROUNDS; i += 2) {
for(i = 0; i < ROUNDS; i += 2)
{
/* the inner loop is a direct translation (regexp search/replace)
* from the amd64-xmm6 ASM */
__m128i r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13,
r14, r15;
__m128i r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14,
r15;
y4 = z12;
y4 = _mm_add_epi32(y4, z0);

@ -1,6 +1,6 @@
if (bytes >= 512) {
__m256i y0, y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12, y13, y14,
y15;
if(bytes >= 512)
{
__m256i y0, y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12, y13, y14, y15;
/* the naive way seems as fast (if not a bit faster) than the vector way */
__m256i z0 = _mm256_set1_epi32(x[0]);
@ -41,7 +41,8 @@ if (bytes >= 512) {
uint32_t in9;
int i;
while (bytes >= 512) {
while(bytes >= 512)
{
/* vector implementation for z8 and z9 */
/* faster than the naive version for 8 blocks */
const __m256i addv8 = _mm256_set_epi64x(3, 2, 1, 0);
@ -96,11 +97,12 @@ if (bytes >= 512) {
z4 = orig4;
z8 = orig8;
for (i = 0; i < ROUNDS; i += 2) {
for(i = 0; i < ROUNDS; i += 2)
{
/* the inner loop is a direct translation (regexp search/replace)
* from the amd64-xmm6 ASM */
__m256i r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13,
r14, r15;
__m256i r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14,
r15;
y4 = z12;
y4 = _mm256_add_epi32(y4, z0);
@ -433,20 +435,13 @@ if (bytes >= 512) {
y##D = _mm256_permute2x128_si256(z##D, z##D2, 0x20); \
y##D2 = _mm256_permute2x128_si256(z##D, z##D2, 0x31); \
y##A = _mm256_xor_si256(y##A, _mm256_loadu_si256((__m256i*)(m + 0))); \
y##B = \
_mm256_xor_si256(y##B, _mm256_loadu_si256((__m256i*) (m + 64))); \
y##C = \
_mm256_xor_si256(y##C, _mm256_loadu_si256((__m256i*) (m + 128))); \
y##D = \
_mm256_xor_si256(y##D, _mm256_loadu_si256((__m256i*) (m + 192))); \
y##A2 = \
_mm256_xor_si256(y##A2, _mm256_loadu_si256((__m256i*) (m + 256))); \
y##B2 = \
_mm256_xor_si256(y##B2, _mm256_loadu_si256((__m256i*) (m + 320))); \
y##C2 = \
_mm256_xor_si256(y##C2, _mm256_loadu_si256((__m256i*) (m + 384))); \
y##D2 = \
_mm256_xor_si256(y##D2, _mm256_loadu_si256((__m256i*) (m + 448))); \
y##B = _mm256_xor_si256(y##B, _mm256_loadu_si256((__m256i*)(m + 64))); \
y##C = _mm256_xor_si256(y##C, _mm256_loadu_si256((__m256i*)(m + 128))); \
y##D = _mm256_xor_si256(y##D, _mm256_loadu_si256((__m256i*)(m + 192))); \
y##A2 = _mm256_xor_si256(y##A2, _mm256_loadu_si256((__m256i*)(m + 256))); \
y##B2 = _mm256_xor_si256(y##B2, _mm256_loadu_si256((__m256i*)(m + 320))); \
y##C2 = _mm256_xor_si256(y##C2, _mm256_loadu_si256((__m256i*)(m + 384))); \
y##D2 = _mm256_xor_si256(y##D2, _mm256_loadu_si256((__m256i*)(m + 448))); \
_mm256_storeu_si256((__m256i*)(c + 0), y##A); \
_mm256_storeu_si256((__m256i*)(c + 64), y##B); \
_mm256_storeu_si256((__m256i*)(c + 128), y##C); \

Loading…
Cancel
Save