CPU: remove AVX code, switch to __builtin for AES detection (#1959)

* [cpu] remove avx detect and code blocks, try to switch to __builtin
* [cpu] use __builtin_* only on x86 systems
* [cpu] perform check in separate function
* [cpu] set AES definition on MSVC
* update x86 and aes support checks at compile time
* [cmake] update comment about AES on MSVC
This commit is contained in:
R4SAS 2023-08-31 16:52:51 +00:00 committed by GitHub
parent 32c5ff23a6
commit 7b6aa41ca8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 222 additions and 232 deletions

View File

@ -34,3 +34,6 @@ trim_trailing_whitespace = false
[*.yml] [*.yml]
indent_style = space indent_style = space
indent_size = 2 indent_size = 2
[*.patch]
trim_trailing_whitespace = false

View File

@ -197,14 +197,11 @@ endif()
# Note: AES-NI and AVX is available on x86-based CPU's. # Note: AES-NI and AVX is available on x86-based CPU's.
# Here also ARM64 implementation, but currently we don't support it. # Here also ARM64 implementation, but currently we don't support it.
# MSVC is not supported. # MSVC is not supported due to different ASM processing, so we hope OpenSSL has its own checks to run optimized code.
if(MSVC)
message(STATUS "AES-NI is not supported on MSVC, option was disabled")
set(WITH_AESNI OFF)
endif()
if(WITH_AESNI AND (ARCHITECTURE MATCHES "x86_64" OR ARCHITECTURE MATCHES "i386")) if(WITH_AESNI AND (ARCHITECTURE MATCHES "x86_64" OR ARCHITECTURE MATCHES "i386"))
if(NOT MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes")
endif()
add_definitions(-D__AES__) add_definitions(-D__AES__)
endif() endif()

View File

@ -280,8 +280,6 @@ verify = true
[cpuext] [cpuext]
## Use CPU AES-NI instructions set when work with cryptography when available (default: true) ## Use CPU AES-NI instructions set when work with cryptography when available (default: true)
# aesni = true # aesni = true
## Use CPU AVX instructions set when work with cryptography when available (default: true)
# avx = true
## Force usage of CPU instructions set, even if they not found (default: false) ## Force usage of CPU instructions set, even if they not found (default: false)
## DO NOT TOUCH that option if you really don't know what are you doing! ## DO NOT TOUCH that option if you really don't know what are you doing!
# force = false # force = false

View File

@ -150,12 +150,11 @@ namespace util
bool precomputation; i2p::config::GetOption("precomputation.elgamal", precomputation); bool precomputation; i2p::config::GetOption("precomputation.elgamal", precomputation);
bool aesni; i2p::config::GetOption("cpuext.aesni", aesni); bool aesni; i2p::config::GetOption("cpuext.aesni", aesni);
bool avx; i2p::config::GetOption("cpuext.avx", avx);
bool forceCpuExt; i2p::config::GetOption("cpuext.force", forceCpuExt); bool forceCpuExt; i2p::config::GetOption("cpuext.force", forceCpuExt);
bool ssu; i2p::config::GetOption("ssu", ssu); bool ssu; i2p::config::GetOption("ssu", ssu);
if (!ssu && i2p::config::IsDefault ("precomputation.elgamal")) if (!ssu && i2p::config::IsDefault ("precomputation.elgamal"))
precomputation = false; // we don't elgamal table if no ssu, unless it's specified explicitly precomputation = false; // we don't elgamal table if no ssu, unless it's specified explicitly
i2p::crypto::InitCrypto (precomputation, aesni, avx, forceCpuExt); i2p::crypto::InitCrypto (precomputation, aesni, forceCpuExt);
i2p::transport::InitAddressFromIface (); // get address4/6 from interfaces i2p::transport::InitAddressFromIface (); // get address4/6 from interfaces

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2013-2020, The PurpleI2P Project * Copyright (c) 2013-2023, The PurpleI2P Project
* *
* This file is part of Purple i2pd project and licensed under BSD3 * This file is part of Purple i2pd project and licensed under BSD3
* *
@ -7,52 +7,52 @@
*/ */
#include "CPU.h" #include "CPU.h"
#if defined(__x86_64__) || defined(__i386__)
#include <cpuid.h>
#endif
#include "Log.h" #include "Log.h"
#ifndef bit_AES #if defined(_MSC_VER)
#define bit_AES (1 << 25) #include <intrin.h>
#endif
#ifndef bit_AVX
#define bit_AVX (1 << 28)
#endif
#ifndef bit_AES
#define bit_AES (1 << 25)
#endif
#endif
namespace i2p namespace i2p
{ {
namespace cpu namespace cpu
{ {
bool aesni = false; bool aesni = false;
bool avx = false;
void Detect(bool AesSwitch, bool AvxSwitch, bool force) inline bool cpu_support_aes()
{ {
#if defined(__x86_64__) || defined(__i386__) #if (defined(_M_AMD64) || defined(__x86_64__)) || (defined(_M_IX86) || defined(__i386__))
int info[4]; #if defined(_MSC_VER)
__cpuid(0, info[0], info[1], info[2], info[3]); int cpu_info[4];
if (info[0] >= 0x00000001) { __cpuid(cpu_info, 1);
__cpuid(0x00000001, info[0], info[1], info[2], info[3]); return ((cpu_info[2] & bit_AES) != 0);
#if defined (_WIN32) && (WINVER == 0x0501) // WinXP #elif defined(__clang__)
if (AesSwitch && force) { // only if forced #if __clang_major__ >= 6
#else __builtin_cpu_init();
if ((info[2] & bit_AES && AesSwitch) || (AesSwitch && force)) {
#endif #endif
return __builtin_cpu_supports("aes");
#elif defined(__GNUC__)
__builtin_cpu_init();
return __builtin_cpu_supports("aes");
#else
return false;
#endif
#else
return false;
#endif
}
void Detect(bool AesSwitch, bool force)
{
if ((cpu_support_aes() && AesSwitch) || (AesSwitch && force)) {
aesni = true; aesni = true;
} }
#if defined (_WIN32) && (WINVER == 0x0501) // WinXP
if (AvxSwitch && force) { // only if forced
#else
if ((info[2] & bit_AVX && AvxSwitch) || (AvxSwitch && force)) {
#endif
avx = true;
}
}
#endif // defined(__x86_64__) || defined(__i386__)
LogPrint(eLogInfo, "AESNI ", (aesni ? "enabled" : "disabled")); LogPrint(eLogInfo, "AESNI ", (aesni ? "enabled" : "disabled"));
LogPrint(eLogInfo, "AVX ", (avx ? "enabled" : "disabled"));
} }
} }
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2013-2020, The PurpleI2P Project * Copyright (c) 2013-2023, The PurpleI2P Project
* *
* This file is part of Purple i2pd project and licensed under BSD3 * This file is part of Purple i2pd project and licensed under BSD3
* *
@ -14,9 +14,8 @@ namespace i2p
namespace cpu namespace cpu
{ {
extern bool aesni; extern bool aesni;
extern bool avx;
void Detect(bool AesSwitch, bool AvxSwitch, bool force); void Detect(bool AesSwitch, bool force);
} }
} }

View File

@ -193,7 +193,7 @@ namespace config {
options_description precomputation("Precomputation options"); options_description precomputation("Precomputation options");
precomputation.add_options() precomputation.add_options()
("precomputation.elgamal", ("precomputation.elgamal",
#if defined(__x86_64__) #if (defined(_M_AMD64) || defined(__x86_64__))
value<bool>()->default_value(false), value<bool>()->default_value(false),
#else #else
value<bool>()->default_value(true), value<bool>()->default_value(true),
@ -308,7 +308,7 @@ namespace config {
options_description cpuext("CPU encryption extensions options"); options_description cpuext("CPU encryption extensions options");
cpuext.add_options() cpuext.add_options()
("cpuext.aesni", bool_switch()->default_value(true), "Use auto detection for AESNI CPU extensions. If false, AESNI will be not used") ("cpuext.aesni", bool_switch()->default_value(true), "Use auto detection for AESNI CPU extensions. If false, AESNI will be not used")
("cpuext.avx", bool_switch()->default_value(true), "Use auto detection for AVX CPU extensions. If false, AVX will be not used") ("cpuext.avx", bool_switch()->default_value(false), "Deprecated option")
("cpuext.force", bool_switch()->default_value(false), "Force usage of CPU extensions. Useful when cpuinfo is not available on virtual machines") ("cpuext.force", bool_switch()->default_value(false), "Force usage of CPU extensions. Useful when cpuinfo is not available on virtual machines")
; ;

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2013-2022, The PurpleI2P Project * Copyright (c) 2013-2023, The PurpleI2P Project
* *
* This file is part of Purple i2pd project and licensed under BSD3 * This file is part of Purple i2pd project and licensed under BSD3
* *
@ -28,6 +28,12 @@
#include "I2PEndian.h" #include "I2PEndian.h"
#include "Log.h" #include "Log.h"
#if defined(__AES__) && !defined(_MSC_VER) && ((defined(_M_AMD64) || defined(__x86_64__)) || (defined(_M_IX86) || defined(__i386__)))
#define SUPPORTS_AES 1
#else
#define SUPPORTS_AES 0
#endif
namespace i2p namespace i2p
{ {
namespace crypto namespace crypto
@ -361,7 +367,7 @@ namespace crypto
BIGNUM * b1 = BN_CTX_get (ctx); BIGNUM * b1 = BN_CTX_get (ctx);
BIGNUM * b = BN_CTX_get (ctx); BIGNUM * b = BN_CTX_get (ctx);
// select random k // select random k
#if defined(__x86_64__) #if (defined(_M_AMD64) || defined(__x86_64__))
BN_rand (k, ELGAMAL_FULL_EXPONENT_NUM_BITS, -1, 1); // full exponent for x64 BN_rand (k, ELGAMAL_FULL_EXPONENT_NUM_BITS, -1, 1); // full exponent for x64
#else #else
BN_rand (k, ELGAMAL_SHORT_EXPONENT_NUM_BITS, -1, 1); // short exponent of 226 bits BN_rand (k, ELGAMAL_SHORT_EXPONENT_NUM_BITS, -1, 1); // short exponent of 226 bits
@ -428,7 +434,7 @@ namespace crypto
void GenerateElGamalKeyPair (uint8_t * priv, uint8_t * pub) void GenerateElGamalKeyPair (uint8_t * priv, uint8_t * pub)
{ {
#if defined(__x86_64__) || defined(__i386__) || defined(_MSC_VER) #if (defined(_M_AMD64) || defined(__x86_64__)) || (defined(_M_IX86) || defined(__i386__)) || defined(_MSC_VER)
RAND_bytes (priv, 256); RAND_bytes (priv, 256);
#else #else
// lower 226 bits (28 bytes and 2 bits) only. short exponent // lower 226 bits (28 bytes and 2 bits) only. short exponent
@ -555,7 +561,7 @@ namespace crypto
} }
// AES // AES
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) #if SUPPORTS_AES
#define KeyExpansion256(round0,round1) \ #define KeyExpansion256(round0,round1) \
"pshufd $0xff, %%xmm2, %%xmm2 \n" \ "pshufd $0xff, %%xmm2, %%xmm2 \n" \
"movaps %%xmm1, %%xmm4 \n" \ "movaps %%xmm1, %%xmm4 \n" \
@ -580,7 +586,7 @@ namespace crypto
"movaps %%xmm3, "#round1"(%[sched]) \n" "movaps %%xmm3, "#round1"(%[sched]) \n"
#endif #endif
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) #if SUPPORTS_AES
void ECBCryptoAESNI::ExpandKey (const AESKey& key) void ECBCryptoAESNI::ExpandKey (const AESKey& key)
{ {
__asm__ __asm__
@ -621,7 +627,7 @@ namespace crypto
#endif #endif
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) #if SUPPORTS_AES
#define EncryptAES256(sched) \ #define EncryptAES256(sched) \
"pxor (%["#sched"]), %%xmm0 \n" \ "pxor (%["#sched"]), %%xmm0 \n" \
"aesenc 16(%["#sched"]), %%xmm0 \n" \ "aesenc 16(%["#sched"]), %%xmm0 \n" \
@ -642,7 +648,7 @@ namespace crypto
void ECBEncryption::Encrypt (const ChipherBlock * in, ChipherBlock * out) void ECBEncryption::Encrypt (const ChipherBlock * in, ChipherBlock * out)
{ {
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) #if SUPPORTS_AES
if(i2p::cpu::aesni) if(i2p::cpu::aesni)
{ {
__asm__ __asm__
@ -650,7 +656,9 @@ namespace crypto
"movups (%[in]), %%xmm0 \n" "movups (%[in]), %%xmm0 \n"
EncryptAES256(sched) EncryptAES256(sched)
"movups %%xmm0, (%[out]) \n" "movups %%xmm0, (%[out]) \n"
: : [sched]"r"(GetKeySchedule ()), [in]"r"(in), [out]"r"(out) : "%xmm0", "memory" :
: [sched]"r"(GetKeySchedule ()), [in]"r"(in), [out]"r"(out)
: "%xmm0", "memory"
); );
} }
else else
@ -660,7 +668,7 @@ namespace crypto
} }
} }
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) #if SUPPORTS_AES
#define DecryptAES256(sched) \ #define DecryptAES256(sched) \
"pxor 224(%["#sched"]), %%xmm0 \n" \ "pxor 224(%["#sched"]), %%xmm0 \n" \
"aesdec 208(%["#sched"]), %%xmm0 \n" \ "aesdec 208(%["#sched"]), %%xmm0 \n" \
@ -681,7 +689,7 @@ namespace crypto
void ECBDecryption::Decrypt (const ChipherBlock * in, ChipherBlock * out) void ECBDecryption::Decrypt (const ChipherBlock * in, ChipherBlock * out)
{ {
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) #if SUPPORTS_AES
if(i2p::cpu::aesni) if(i2p::cpu::aesni)
{ {
__asm__ __asm__
@ -689,7 +697,9 @@ namespace crypto
"movups (%[in]), %%xmm0 \n" "movups (%[in]), %%xmm0 \n"
DecryptAES256(sched) DecryptAES256(sched)
"movups %%xmm0, (%[out]) \n" "movups %%xmm0, (%[out]) \n"
: : [sched]"r"(GetKeySchedule ()), [in]"r"(in), [out]"r"(out) : "%xmm0", "memory" :
: [sched]"r"(GetKeySchedule ()), [in]"r"(in), [out]"r"(out)
: "%xmm0", "memory"
); );
} }
else else
@ -699,7 +709,7 @@ namespace crypto
} }
} }
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) #if SUPPORTS_AES
#define CallAESIMC(offset) \ #define CallAESIMC(offset) \
"movaps "#offset"(%[shed]), %%xmm0 \n" \ "movaps "#offset"(%[shed]), %%xmm0 \n" \
"aesimc %%xmm0, %%xmm0 \n" \ "aesimc %%xmm0, %%xmm0 \n" \
@ -708,7 +718,7 @@ namespace crypto
void ECBEncryption::SetKey (const AESKey& key) void ECBEncryption::SetKey (const AESKey& key)
{ {
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) #if SUPPORTS_AES
if(i2p::cpu::aesni) if(i2p::cpu::aesni)
{ {
ExpandKey (key); ExpandKey (key);
@ -722,7 +732,7 @@ namespace crypto
void ECBDecryption::SetKey (const AESKey& key) void ECBDecryption::SetKey (const AESKey& key)
{ {
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) #if SUPPORTS_AES
if(i2p::cpu::aesni) if(i2p::cpu::aesni)
{ {
ExpandKey (key); // expand encryption key first ExpandKey (key); // expand encryption key first
@ -742,7 +752,9 @@ namespace crypto
CallAESIMC(176) CallAESIMC(176)
CallAESIMC(192) CallAESIMC(192)
CallAESIMC(208) CallAESIMC(208)
: : [shed]"r"(GetKeySchedule ()) : "%xmm0", "memory" :
: [shed]"r"(GetKeySchedule ())
: "%xmm0", "memory"
); );
} }
else else
@ -754,7 +766,7 @@ namespace crypto
void CBCEncryption::Encrypt (int numBlocks, const ChipherBlock * in, ChipherBlock * out) void CBCEncryption::Encrypt (int numBlocks, const ChipherBlock * in, ChipherBlock * out)
{ {
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) #if SUPPORTS_AES
if(i2p::cpu::aesni) if(i2p::cpu::aesni)
{ {
__asm__ __asm__
@ -799,7 +811,7 @@ namespace crypto
void CBCEncryption::Encrypt (const uint8_t * in, uint8_t * out) void CBCEncryption::Encrypt (const uint8_t * in, uint8_t * out)
{ {
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) #if SUPPORTS_AES
if(i2p::cpu::aesni) if(i2p::cpu::aesni)
{ {
__asm__ __asm__
@ -823,7 +835,7 @@ namespace crypto
void CBCDecryption::Decrypt (int numBlocks, const ChipherBlock * in, ChipherBlock * out) void CBCDecryption::Decrypt (int numBlocks, const ChipherBlock * in, ChipherBlock * out)
{ {
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) #if SUPPORTS_AES
if(i2p::cpu::aesni) if(i2p::cpu::aesni)
{ {
__asm__ __asm__
@ -869,7 +881,7 @@ namespace crypto
void CBCDecryption::Decrypt (const uint8_t * in, uint8_t * out) void CBCDecryption::Decrypt (const uint8_t * in, uint8_t * out)
{ {
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) #if SUPPORTS_AES
if(i2p::cpu::aesni) if(i2p::cpu::aesni)
{ {
__asm__ __asm__
@ -893,7 +905,7 @@ namespace crypto
void TunnelEncryption::Encrypt (const uint8_t * in, uint8_t * out) void TunnelEncryption::Encrypt (const uint8_t * in, uint8_t * out)
{ {
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) #if SUPPORTS_AES
if(i2p::cpu::aesni) if(i2p::cpu::aesni)
{ {
__asm__ __asm__
@ -934,7 +946,7 @@ namespace crypto
void TunnelDecryption::Decrypt (const uint8_t * in, uint8_t * out) void TunnelDecryption::Decrypt (const uint8_t * in, uint8_t * out)
{ {
#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) #if SUPPORTS_AES
if(i2p::cpu::aesni) if(i2p::cpu::aesni)
{ {
__asm__ __asm__
@ -1285,9 +1297,9 @@ namespace crypto
} }
}*/ }*/
void InitCrypto (bool precomputation, bool aesni, bool avx, bool force) void InitCrypto (bool precomputation, bool aesni, bool force)
{ {
i2p::cpu::Detect (aesni, avx, force); i2p::cpu::Detect (aesni, force);
#if LEGACY_OPENSSL #if LEGACY_OPENSSL
SSL_library_init (); SSL_library_init ();
#endif #endif
@ -1297,7 +1309,7 @@ namespace crypto
CRYPTO_set_locking_callback (OpensslLockingCallback);*/ CRYPTO_set_locking_callback (OpensslLockingCallback);*/
if (precomputation) if (precomputation)
{ {
#if defined(__x86_64__) #if (defined(_M_AMD64) || defined(__x86_64__))
g_ElggTable = new BIGNUM * [ELGAMAL_FULL_EXPONENT_NUM_BYTES][255]; g_ElggTable = new BIGNUM * [ELGAMAL_FULL_EXPONENT_NUM_BYTES][255];
PrecalculateElggTable (g_ElggTable, ELGAMAL_FULL_EXPONENT_NUM_BYTES); PrecalculateElggTable (g_ElggTable, ELGAMAL_FULL_EXPONENT_NUM_BYTES);
#else #else
@ -1312,7 +1324,7 @@ namespace crypto
if (g_ElggTable) if (g_ElggTable)
{ {
DestroyElggTable (g_ElggTable, DestroyElggTable (g_ElggTable,
#if defined(__x86_64__) #if (defined(_M_AMD64) || defined(__x86_64__))
ELGAMAL_FULL_EXPONENT_NUM_BYTES ELGAMAL_FULL_EXPONENT_NUM_BYTES
#else #else
ELGAMAL_SHORT_EXPONENT_NUM_BYTES ELGAMAL_SHORT_EXPONENT_NUM_BYTES

View File

@ -307,7 +307,7 @@ namespace crypto
void InitNoiseIKState (NoiseSymmetricState& state, const uint8_t * pub); // Noise_IK (ratchets) void InitNoiseIKState (NoiseSymmetricState& state, const uint8_t * pub); // Noise_IK (ratchets)
// init and terminate // init and terminate
void InitCrypto (bool precomputation, bool aesni, bool avx, bool force); void InitCrypto (bool precomputation, bool aesni, bool force);
void TerminateCrypto (); void TerminateCrypto ();
} }
} }

View File

@ -803,29 +803,12 @@ namespace data
XORMetric operator^(const IdentHash& key1, const IdentHash& key2) XORMetric operator^(const IdentHash& key1, const IdentHash& key2)
{ {
XORMetric m; XORMetric m;
#if (defined(__x86_64__) || defined(__i386__)) && defined(__AVX__) // not all X86 targets supports AVX (like old Pentium, see #1600)
if(i2p::cpu::avx)
{
__asm__
(
"vmovups %1, %%ymm0 \n"
"vmovups %2, %%ymm1 \n"
"vxorps %%ymm0, %%ymm1, %%ymm1 \n"
"vmovups %%ymm1, %0 \n"
: "=m"(*m.metric)
: "m"(*key1), "m"(*key2)
: "memory", "%xmm0", "%xmm1" // should be replaced by %ymm0/1 once supported by compiler
);
}
else
#endif
{
const uint64_t * hash1 = key1.GetLL (), * hash2 = key2.GetLL (); const uint64_t * hash1 = key1.GetLL (), * hash2 = key2.GetLL ();
m.metric_ll[0] = hash1[0] ^ hash2[0]; m.metric_ll[0] = hash1[0] ^ hash2[0];
m.metric_ll[1] = hash1[1] ^ hash2[1]; m.metric_ll[1] = hash1[1] ^ hash2[1];
m.metric_ll[2] = hash1[2] ^ hash2[2]; m.metric_ll[2] = hash1[2] ^ hash2[2];
m.metric_ll[3] = hash1[3] ^ hash2[3]; m.metric_ll[3] = hash1[3] ^ hash2[3];
}
return m; return m;
} }

View File

@ -38,9 +38,8 @@ namespace api
bool precomputation; i2p::config::GetOption("precomputation.elgamal", precomputation); bool precomputation; i2p::config::GetOption("precomputation.elgamal", precomputation);
bool aesni; i2p::config::GetOption("cpuext.aesni", aesni); bool aesni; i2p::config::GetOption("cpuext.aesni", aesni);
bool avx; i2p::config::GetOption("cpuext.avx", avx);
bool forceCpuExt; i2p::config::GetOption("cpuext.force", forceCpuExt); bool forceCpuExt; i2p::config::GetOption("cpuext.force", forceCpuExt);
i2p::crypto::InitCrypto (precomputation, aesni, avx, forceCpuExt); i2p::crypto::InitCrypto (precomputation, aesni, forceCpuExt);
int netID; i2p::config::GetOption("netid", netID); int netID; i2p::config::GetOption("netid", netID);
i2p::context.SetNetID (netID); i2p::context.SetNetID (netID);