From 3acbef14b7c2436fe033cae9036e695d77461a16 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Wed, 13 Dec 2023 12:11:09 -0500 Subject: [PATCH] fix AVX support by removing direct linking to AVX2 libs (#1750) --- gpt4all-backend/CMakeLists.txt | 2 -- gpt4all-backend/llmodel.cpp | 23 ++++++++++++++++++++++- gpt4all-backend/llmodel.h | 22 ++++++++++++---------- gpt4all-backend/llmodel_shared.cpp | 27 --------------------------- gpt4all-chat/CMakeLists.txt | 2 +- gpt4all-chat/mysettings.cpp | 2 +- 6 files changed, 36 insertions(+), 42 deletions(-) diff --git a/gpt4all-backend/CMakeLists.txt b/gpt4all-backend/CMakeLists.txt index 8f5ae2fa..39152a2e 100644 --- a/gpt4all-backend/CMakeLists.txt +++ b/gpt4all-backend/CMakeLists.txt @@ -114,8 +114,6 @@ add_library(llmodel llmodel_c.h llmodel_c.cpp dlhandle.h ) -target_link_libraries(llmodel PRIVATE ggml-mainline-default) -target_compile_definitions(llmodel PRIVATE GGML_BUILD_VARIANT="default") target_compile_definitions(llmodel PRIVATE LIB_FILE_EXT="${CMAKE_SHARED_LIBRARY_SUFFIX}") set_target_properties(llmodel PROPERTIES diff --git a/gpt4all-backend/llmodel.cpp b/gpt4all-backend/llmodel.cpp index df70c9c9..cb7cfd86 100644 --- a/gpt4all-backend/llmodel.cpp +++ b/gpt4all-backend/llmodel.cpp @@ -82,7 +82,7 @@ const std::vector &LLModel::Implementation::implementat static auto* libs = new std::vector([] () { std::vector fres; - std::string impl_name_re = "(bert|llama|gptj|llamamodel-mainline)"; + std::string impl_name_re = "(bert|gptj|llamamodel-mainline)"; if (requires_avxonly()) { impl_name_re += "-avxonly"; } else { @@ -186,6 +186,27 @@ LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::s return fres; } +LLModel *LLModel::Implementation::constructCpuLlama() { + const LLModel::Implementation *impl = nullptr; + for (const auto &i : implementationList()) { + if (i.m_buildVariant == "metal" || i.m_modelType != "LLaMA") continue; + impl = &i; + } + if (!impl) { + std::cerr << "LLModel ERROR: Could not find CPU LLaMA implementation\n"; + return nullptr; + } + auto fres = impl->m_construct(); + fres->m_implementation = impl; + return fres; +} + +std::vector LLModel::Implementation::availableGPUDevices() { + static LLModel *cpuLlama = LLModel::Implementation::constructCpuLlama(); // (memory leak) + if (cpuLlama) { return cpuLlama->availableGPUDevices(0); } + return {}; +} + void LLModel::Implementation::setImplementationsSearchPath(const std::string& path) { s_implementations_search_path = path; } diff --git a/gpt4all-backend/llmodel.h b/gpt4all-backend/llmodel.h index 5fdabc30..f11c4c74 100644 --- a/gpt4all-backend/llmodel.h +++ b/gpt4all-backend/llmodel.h @@ -15,6 +15,15 @@ class Dlhandle; class LLModel { public: using Token = int32_t; + + struct GPUDevice { + int index = 0; + int type = 0; + size_t heapSize = 0; + std::string name; + std::string vendor; + }; + class Implementation { public: Implementation(Dlhandle&&); @@ -29,14 +38,16 @@ public: static const std::vector& implementationList(); static const Implementation *implementation(const char *fname, const std::string& buildVariant); static LLModel *construct(const std::string &modelPath, std::string buildVariant = "auto"); + static std::vector availableGPUDevices(); static void setImplementationsSearchPath(const std::string& path); static const std::string& implementationsSearchPath(); private: + static LLModel *constructCpuLlama(); + bool (*m_magicMatch)(const char *fname); LLModel *(*m_construct)(); - private: std::string_view m_modelType; std::string_view m_buildVariant; Dlhandle *m_dlhandle; @@ -58,14 +69,6 @@ public: int32_t n_last_batch_tokens = 0; }; - struct GPUDevice { - int index = 0; - int type = 0; - size_t heapSize = 0; - std::string name; - std::string vendor; - }; - explicit LLModel() {} virtual ~LLModel() {} @@ -106,7 +109,6 @@ public: virtual bool initializeGPUDevice(int /*device*/) { return false; } virtual bool hasGPUDevice() { return false; } virtual bool usingGPUDevice() { return false; } - static std::vector availableGPUDevices(); protected: // These are pure virtual because subclasses need to implement as the default implementation of diff --git a/gpt4all-backend/llmodel_shared.cpp b/gpt4all-backend/llmodel_shared.cpp index 74e69786..13c3706c 100644 --- a/gpt4all-backend/llmodel_shared.cpp +++ b/gpt4all-backend/llmodel_shared.cpp @@ -4,10 +4,6 @@ #include #include -#ifdef GGML_USE_KOMPUTE -#include "ggml-vulkan.h" -#endif - void LLModel::recalculateContext(PromptContext &promptCtx, std::function recalculate) { size_t i = 0; promptCtx.n_past = 0; @@ -177,26 +173,3 @@ std::vector LLModel::embedding(const std::string &/*text*/) } return std::vector(); } - -std::vector LLModel::availableGPUDevices() -{ -#if defined(GGML_USE_KOMPUTE) - std::vector vkDevices = ggml_vk_available_devices(0); - - std::vector devices; - for(const auto& vkDevice : vkDevices) { - LLModel::GPUDevice device; - device.index = vkDevice.index; - device.type = vkDevice.type; - device.heapSize = vkDevice.heapSize; - device.name = vkDevice.name; - device.vendor = vkDevice.vendor; - - devices.push_back(device); - } - - return devices; -#else - return std::vector(); -#endif -} diff --git a/gpt4all-chat/CMakeLists.txt b/gpt4all-chat/CMakeLists.txt index 333e5f51..657845fa 100644 --- a/gpt4all-chat/CMakeLists.txt +++ b/gpt4all-chat/CMakeLists.txt @@ -173,7 +173,7 @@ else() PRIVATE Qt6::Quick Qt6::Svg Qt6::HttpServer Qt6::Sql Qt6::Pdf) endif() target_link_libraries(chat - PRIVATE llmodel bert-default) + PRIVATE llmodel) set(COMPONENT_NAME_MAIN ${PROJECT_NAME}) set(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR}/install) diff --git a/gpt4all-chat/mysettings.cpp b/gpt4all-chat/mysettings.cpp index 60cc3765..3183a321 100644 --- a/gpt4all-chat/mysettings.cpp +++ b/gpt4all-chat/mysettings.cpp @@ -64,7 +64,7 @@ MySettings::MySettings() { QSettings::setDefaultFormat(QSettings::IniFormat); - std::vector devices = LLModel::availableGPUDevices(); + std::vector devices = LLModel::Implementation::availableGPUDevices(); QVector deviceList{ "Auto" }; for (LLModel::GPUDevice &d : devices) deviceList << QString::fromStdString(d.name);