diff --git a/aes.cpp b/aes.cpp index ee7d455b..32605db4 100644 --- a/aes.cpp +++ b/aes.cpp @@ -47,9 +47,9 @@ namespace crypto ( "movups (%[key]), %%xmm1 \n" "movups 16(%[key]), %%xmm3 \n" - "movaps %%xmm1, (%[shed]) \n" - "movaps %%xmm3, 16(%[shed]) \n" - "lea 32(%[shed]), %%rcx \n" + "movaps %%xmm1, (%[sched]) \n" + "movaps %%xmm3, 16(%[sched]) \n" + "lea 32(%[sched]), %%rcx \n" "aeskeygenassist $1, %%xmm3, %%xmm2 \n" KeyExpansion256 "aeskeygenassist $2, %%xmm3, %%xmm2 \n" @@ -75,58 +75,64 @@ namespace crypto "pxor %%xmm2, %%xmm1 \n" "movups %%xmm1, (%%rcx) \n" : // output - : [key]"r"(key), [shed]"r"(m_KeySchedule) // input + : [key]"r"(key), [sched]"r"(m_KeySchedule) // input : "%rcx", "%xmm1", "%xmm2", "%xmm3", "%xmm4" // clogged ); } + #define EncryptAES256 \ + "pxor (%[sched]), %%xmm0 \n" \ + "aesenc 16(%[sched]), %%xmm0 \n" \ + "aesenc 32(%[sched]), %%xmm0 \n" \ + "aesenc 48(%[sched]), %%xmm0 \n" \ + "aesenc 64(%[sched]), %%xmm0 \n" \ + "aesenc 80(%[sched]), %%xmm0 \n" \ + "aesenc 96(%[sched]), %%xmm0 \n" \ + "aesenc 112(%[sched]), %%xmm0 \n" \ + "aesenc 128(%[sched]), %%xmm0 \n" \ + "aesenc 144(%[sched]), %%xmm0 \n" \ + "aesenc 160(%[sched]), %%xmm0 \n" \ + "aesenc 176(%[sched]), %%xmm0 \n" \ + "aesenc 192(%[sched]), %%xmm0 \n" \ + "aesenc 208(%[sched]), %%xmm0 \n" \ + "aesenclast 224(%[sched]), %%xmm0 \n" + void ECBEncryptionAESNI::Encrypt (const ChipherBlock * in, ChipherBlock * out) { __asm__ ( "movups (%[in]), %%xmm0 \n" - "pxor (%[shed]), %%xmm0 \n" - "aesenc 16(%[shed]), %%xmm0 \n" - "aesenc 32(%[shed]), %%xmm0 \n" - "aesenc 48(%[shed]), %%xmm0 \n" - "aesenc 64(%[shed]), %%xmm0 \n" - "aesenc 80(%[shed]), %%xmm0 \n" - "aesenc 96(%[shed]), %%xmm0 \n" - "aesenc 112(%[shed]), %%xmm0 \n" - "aesenc 128(%[shed]), %%xmm0 \n" - "aesenc 144(%[shed]), %%xmm0 \n" - "aesenc 160(%[shed]), %%xmm0 \n" - "aesenc 176(%[shed]), %%xmm0 \n" - "aesenc 192(%[shed]), %%xmm0 \n" - "aesenc 208(%[shed]), %%xmm0 \n" - "aesenclast 224(%[shed]), %%xmm0 \n" + EncryptAES256 "movups %%xmm0, (%[out]) \n" - : : [shed]"r"(m_KeySchedule), [in]"r"(in), [out]"r"(out) : "%xmm0" + : : [sched]"r"(m_KeySchedule), [in]"r"(in), [out]"r"(out) : "%xmm0" ); } + #define DecryptAES256 \ + "pxor 224(%[sched]), %%xmm0 \n" \ + "aesdec 208(%[sched]), %%xmm0 \n" \ + "aesdec 192(%[sched]), %%xmm0 \n" \ + "aesdec 176(%[sched]), %%xmm0 \n" \ + "aesdec 160(%[sched]), %%xmm0 \n" \ + "aesdec 144(%[sched]), %%xmm0 \n" \ + "aesdec 128(%[sched]), %%xmm0 \n" \ + "aesdec 112(%[sched]), %%xmm0 \n" \ + "aesdec 96(%[sched]), %%xmm0 \n" \ + "aesdec 80(%[sched]), %%xmm0 \n" \ + "aesdec 64(%[sched]), %%xmm0 \n" \ + "aesdec 48(%[sched]), %%xmm0 \n" \ + "aesdec 32(%[sched]), %%xmm0 \n" \ + "aesdec 16(%[sched]), %%xmm0 \n" \ + "aesdeclast (%[sched]), %%xmm0 \n" + void ECBDecryptionAESNI::Decrypt (const ChipherBlock * in, ChipherBlock * out) { __asm__ ( "movups (%[in]), %%xmm0 \n" - "pxor 224(%[shed]), %%xmm0 \n" - "aesdec 208(%[shed]), %%xmm0 \n" - "aesdec 192(%[shed]), %%xmm0 \n" - "aesdec 176(%[shed]), %%xmm0 \n" - "aesdec 160(%[shed]), %%xmm0 \n" - "aesdec 144(%[shed]), %%xmm0 \n" - "aesdec 128(%[shed]), %%xmm0 \n" - "aesdec 112(%[shed]), %%xmm0 \n" - "aesdec 96(%[shed]), %%xmm0 \n" - "aesdec 80(%[shed]), %%xmm0 \n" - "aesdec 64(%[shed]), %%xmm0 \n" - "aesdec 48(%[shed]), %%xmm0 \n" - "aesdec 32(%[shed]), %%xmm0 \n" - "aesdec 16(%[shed]), %%xmm0 \n" - "aesdeclast (%[shed]), %%xmm0 \n" + DecryptAES256 "movups %%xmm0, (%[out]) \n" - : : [shed]"r"(m_KeySchedule), [in]"r"(in), [out]"r"(out) : "%xmm0" + : : [sched]"r"(m_KeySchedule), [in]"r"(in), [out]"r"(out) : "%xmm0" ); } @@ -163,12 +169,34 @@ namespace crypto void CBCEncryption::Encrypt (int numBlocks, const ChipherBlock * in, ChipherBlock * out) { +#ifdef __x86_64__ + __asm__ + ( + "movups (%[iv]), %%xmm1 \n" + "block: \n" + "movups (%[in]), %%xmm0 \n" + "pxor %%xmm1, %%xmm0 \n" + EncryptAES256 + "movaps %%xmm0, %%xmm1 \n" + "movups %%xmm0, (%[out]) \n" + "add $16, %[in] \n" + "add $16, %[out] \n" + "dec %[num] \n" + "jnz block; \n" + "movups %%xmm1, (%[iv]) \n" + : + : [iv]"r"(&m_LastBlock), [sched]"r"(m_ECBEncryption.GetKeySchedule ()), + [in]"r"(in), [out]"r"(out), [num]"r"(numBlocks) + : "%xmm0", "%xmm1", "cc", "memory" + ); +#else for (int i = 0; i < numBlocks; i++) { m_LastBlock ^= in[i]; m_ECBEncryption.Encrypt (&m_LastBlock, &m_LastBlock); out[i] = m_LastBlock; } +#endif } bool CBCEncryption::Encrypt (const uint8_t * in, std::size_t len, uint8_t * out) diff --git a/aes.h b/aes.h index 09f9ba21..5e3fcf4b 100644 --- a/aes.h +++ b/aes.h @@ -28,7 +28,8 @@ namespace crypto public: ECBCryptoAESNI (); - + uint8_t * GetKeySchedule () { return m_KeySchedule; }; + protected: void ExpandKey (const uint8_t * key); @@ -114,6 +115,7 @@ namespace crypto private: ChipherBlock m_LastBlock; + ECBEncryption m_ECBEncryption; };