From cf0fc3a4a9b37ba097a33c1526f79f3358e41373 Mon Sep 17 00:00:00 2001 From: orignal Date: Sun, 25 Nov 2018 10:33:48 -0500 Subject: [PATCH] some performance improvements --- libi2pd/Crypto.cpp | 5 ++- libi2pd/Crypto.h | 79 +++++++++++++++++++++++++--------------------- 2 files changed, 45 insertions(+), 39 deletions(-) diff --git a/libi2pd/Crypto.cpp b/libi2pd/Crypto.cpp index 9b66c8c5..dcd89064 100644 --- a/libi2pd/Crypto.cpp +++ b/libi2pd/Crypto.cpp @@ -1130,9 +1130,8 @@ namespace crypto } // adLen and msgLen htole64buf (padding, adLen); - polyHash.Update (padding, 8); - htole64buf (padding, msgLen); - polyHash.Update (padding, 8); + htole64buf (padding + 8, msgLen); + polyHash.Update (padding, 16); if (encrypt) // calculate Poly1305 tag and write in after encrypted data diff --git a/libi2pd/Crypto.h b/libi2pd/Crypto.h index cbfeb0e9..7f44195a 100644 --- a/libi2pd/Crypto.h +++ b/libi2pd/Crypto.h @@ -108,48 +108,55 @@ namespace crypto void operator^=(const ChipherBlock& other) // XOR { + if (!(((size_t)buf | (size_t)other.buf) & 0x0F)) // multiple of 16 ? + { + // try 128 bits if applicable #ifdef __AVX__ - if (i2p::cpu::avx) - { - __asm__ + if (i2p::cpu::avx) + { + __asm__ + ( + "vmovaps (%[buf]), %%xmm0 \n" + "vmovaps (%[other]), %%xmm1 \n" + "vxorps %%xmm0, %%xmm1, %%xmm0 \n" + "vmovaps %%xmm0, (%[buf]) \n" + : + : [buf]"r"(buf), [other]"r"(other.buf) + : "%xmm0", "%xmm1", "memory" + ); + } + else +#endif + { +#if defined(__SSE__) // SSE + __asm__ ( - "vmovups (%[buf]), %%xmm0 \n" - "vmovups (%[other]), %%xmm1 \n" - "vxorps %%xmm0, %%xmm1, %%xmm0 \n" - "vmovups %%xmm0, (%[buf]) \n" + "movaps (%[buf]), %%xmm0 \n" + "movaps (%[other]), %%xmm1 \n" + "pxor %%xmm1, %%xmm0 \n" + "movaps %%xmm0, (%[buf]) \n" : : [buf]"r"(buf), [other]"r"(other.buf) : "%xmm0", "%xmm1", "memory" - ); - } - else -#endif - { -#if defined(__SSE__) // SSE - __asm__ - ( - "movups (%[buf]), %%xmm0 \n" - "movups (%[other]), %%xmm1 \n" - "pxor %%xmm1, %%xmm0 \n" - "movups %%xmm0, (%[buf]) \n" - : - : [buf]"r"(buf), [other]"r"(other.buf) - : "%xmm0", "%xmm1", "memory" - ); -#else - if (!(((size_t)buf | (size_t)other.buf) & 0x03)) // multiple of 4 ? - { - // we are good to cast to uint32_t * - for (int i = 0; i < 4; i++) - ((uint32_t *)buf)[i] ^= ((uint32_t *)other.buf)[i]; - } - else - { - for (int i = 0; i < 16; i++) - buf[i] ^= other.buf[i]; + ); +#else + // if not we always can cast to uint64_t * + ((uint64_t *)buf)[0] ^= ((uint64_t *)other.buf)[0]; + ((uint64_t *)buf)[1] ^= ((uint64_t *)other.buf)[1]; +#endif } -#endif - } + } + else if (!(((size_t)buf | (size_t)other.buf) & 0x03)) // multiple of 4 ? + { + // we are good to cast to uint32_t * + for (int i = 0; i < 4; i++) + ((uint32_t *)buf)[i] ^= ((uint32_t *)other.buf)[i]; + } + else + { + for (int i = 0; i < 16; i++) + buf[i] ^= other.buf[i]; + } } };