@ -19,33 +19,27 @@
std : : string s_implementations_search_path = " . " ;
static bool has_at_least_minimal_hardware ( ) {
# if defined(__x86_64__) || defined(_M_X64)
# ifndef _MSC_VER
return __builtin_cpu_supports ( " avx " ) ;
# else
int cpuInfo [ 4 ] ;
__cpuid ( cpuInfo , 1 ) ;
return cpuInfo [ 2 ] & ( 1 < < 28 ) ;
# endif
# else
return true ; // Don't know how to handle non-x86_64
# endif
}
# if !(defined(__x86_64__) || defined(_M_X64))
// irrelevant on non-x86_64
# define cpu_supports_avx() -1
# define cpu_supports_avx2() -1
# elif defined(_MSC_VER)
// MSVC
static int get_cpu_info ( int func_id , int reg_id ) {
int info [ 4 ] ;
__cpuid ( info , func_id ) ;
return info [ reg_id ] ;
}
static bool requires_avxonly ( ) {
# if defined(__x86_64__) || defined(_M_X64)
# ifndef _MSC_VER
return ! __builtin_cpu_supports ( " avx2 " ) ;
# else
int cpuInfo [ 4 ] ;
__cpuidex ( cpuInfo , 7 , 0 ) ;
return ! ( cpuInfo [ 1 ] & ( 1 < < 5 ) ) ;
# endif
// AVX via EAX=1: Processor Info and Feature Bits, bit 28 of ECX
# define cpu_supports_avx() (get_cpu_info(1, 2) & (1 << 28))
// AVX2 via EAX=7, ECX=0: Extended Features, bit 5 of EBX
# define cpu_supports_avx2() (get_cpu_info(7, 1) & (1 << 5))
# else
return false ; // Don't know how to handle non-x86_64
// gcc/clang
# define cpu_supports_avx() __builtin_cpu_supports("avx")
# define cpu_supports_avx2() __builtin_cpu_supports("avx2")
# endif
}
LLModel : : Implementation : : Implementation ( Dlhandle & & dlhandle_ )
: m_dlhandle ( new Dlhandle ( std : : move ( dlhandle_ ) ) ) {
@ -71,21 +65,25 @@ LLModel::Implementation::Implementation(Implementation &&o)
}
LLModel : : Implementation : : ~ Implementation ( ) {
if ( m_dlhandle ) delete m_dlhandle ;
delete m_dlhandle ;
}
bool LLModel : : Implementation : : isImplementation( const Dlhandle & dl ) {
static bool isImplementation( const Dlhandle & dl ) {
return dl . get < bool ( uint32_t ) > ( " is_g4a_backend_model_implementation " ) ;
}
const std : : vector < LLModel : : Implementation > & LLModel : : Implementation : : implementationList ( ) {
if ( cpu_supports_avx ( ) = = 0 ) {
throw std : : runtime_error ( " CPU does not support AVX " ) ;
}
// NOTE: allocated on heap so we leak intentionally on exit so we have a chance to clean up the
// individual models without the cleanup of the static list interfering
static auto * libs = new std : : vector < Implementation > ( [ ] ( ) {
std : : vector < Implementation > fres ;
std : : string impl_name_re = " ( bert| gptj|llamamodel-mainline)" ;
if ( requires_avxonly( ) ) {
std : : string impl_name_re = " ( gptj|llamamodel-mainline)" ;
if ( cpu_supports_avx2( ) = = 0 ) {
impl_name_re + = " -avxonly " ;
} else {
impl_name_re + = " -(default|metal) " ;
@ -107,9 +105,8 @@ const std::vector<LLModel::Implementation> &LLModel::Implementation::implementat
// Add to list if model implementation
try {
Dlhandle dl ( p . string ( ) ) ;
if ( ! Implementation: : isImplementation( dl ) ) {
if ( ! isImplementation( dl ) )
continue ;
}
fres . emplace_back ( Implementation ( std : : move ( dl ) ) ) ;
} catch ( . . . ) { }
}
@ -134,18 +131,13 @@ const LLModel::Implementation* LLModel::Implementation::implementation(const cha
return & i ;
}
if ( ! buildVariantMatched ) {
std : : cerr < < " LLModel ERROR: Could not find any implementations for build variant: " < < buildVariant < < " \n " ;
}
return nullptr ;
if ( ! buildVariantMatched )
throw std : : runtime_error ( " Could not find any implementations for build variant: " + buildVariant ) ;
return nullptr ; // unsupported model format
}
LLModel * LLModel : : Implementation : : construct ( const std : : string & modelPath , std : : string buildVariant , int n_ctx ) {
if ( ! has_at_least_minimal_hardware ( ) ) {
std : : cerr < < " LLModel ERROR: CPU does not support AVX \n " ;
return nullptr ;
}
// Get correct implementation
const Implementation * impl = nullptr ;
@ -178,7 +170,7 @@ LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::s
if ( ! impl ) {
//TODO: Auto-detect CUDA/OpenCL
if ( buildVariant = = " auto " ) {
if ( requires_avxonly( ) ) {
if ( cpu_supports_avx2( ) = = 0 ) {
buildVariant = " avxonly " ;
} else {
buildVariant = " default " ;
@ -196,15 +188,24 @@ LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::s
LLModel * LLModel : : Implementation : : constructDefaultLlama ( ) {
static std : : unique_ptr < LLModel > llama ( [ ] ( ) - > LLModel * {
const std : : vector < LLModel : : Implementation > * impls ;
try {
impls = & implementationList ( ) ;
} catch ( const std : : runtime_error & e ) {
std : : cerr < < __func__ < < " : implementationList failed: " < < e . what ( ) < < " \n " ;
return nullptr ;
}
const LLModel : : Implementation * impl = nullptr ;
for ( const auto & i : implementationList ( ) ) {
for ( const auto & i : * impl s) {
if ( i . m_buildVariant = = " metal " | | i . m_modelType ! = " LLaMA " ) continue ;
impl = & i ;
}
if ( ! impl ) {
std : : cerr < < " LLModel ERROR: Could not find CPU LLaMA implementation \n " ;
std : : cerr < < __func__ < < " : could not find llama.cpp implementation\n " ;
return nullptr ;
}
auto fres = impl - > m_construct ( ) ;
fres - > m_implementation = impl ;
return fres ;
@ -240,3 +241,7 @@ void LLModel::Implementation::setImplementationsSearchPath(const std::string& pa
const std : : string & LLModel : : Implementation : : implementationsSearchPath ( ) {
return s_implementations_search_path ;
}
bool LLModel : : Implementation : : hasSupportedCPU ( ) {
return cpu_supports_avx ( ) ! = 0 ;
}