diff --git a/gpt4all-backend/llama.cpp-mainline b/gpt4all-backend/llama.cpp-mainline index 7ff671e1..703ef9c1 160000 --- a/gpt4all-backend/llama.cpp-mainline +++ b/gpt4all-backend/llama.cpp-mainline @@ -1 +1 @@ -Subproject commit 7ff671e149464d1a52b4f9e50a7819bc49e8fdaa +Subproject commit 703ef9c1252aff4f6c4e1fdc60fffe6ab9def377 diff --git a/gpt4all-backend/llamamodel.cpp b/gpt4all-backend/llamamodel.cpp index b92f6e87..ecae5f0e 100644 --- a/gpt4all-backend/llamamodel.cpp +++ b/gpt4all-backend/llamamodel.cpp @@ -168,6 +168,10 @@ bool LLamaModel::loadModel(const std::string &modelPath) d_ptr->ctx = llama_init_from_file(modelPath.c_str(), d_ptr->params); if (!d_ptr->ctx) { +#ifdef GGML_USE_KOMPUTE + // Explicitly free the device so next load it doesn't use it + ggml_vk_free_device(); +#endif std::cerr << "LLAMA ERROR: failed to load model from " << modelPath << std::endl; return false; } @@ -194,7 +198,7 @@ int32_t LLamaModel::threadCount() const { LLamaModel::~LLamaModel() { - if(d_ptr->ctx) { + if (d_ptr->ctx) { llama_free(d_ptr->ctx); } } diff --git a/gpt4all-chat/chatllm.cpp b/gpt4all-chat/chatllm.cpp index 5d6ea6d3..74208c17 100644 --- a/gpt4all-chat/chatllm.cpp +++ b/gpt4all-chat/chatllm.cpp @@ -294,9 +294,15 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo) emit reportDevice(actualDevice); bool success = m_llModelInfo.model->loadModel(filePath.toStdString()); + if (!success && actualDevice != "CPU") { + emit reportDevice("CPU"); + success = m_llModelInfo.model->loadModel(filePath.toStdString()); + } + MySettings::globalInstance()->setAttemptModelLoad(QString()); if (!success) { - delete std::exchange(m_llModelInfo.model, nullptr); + delete m_llModelInfo.model; + m_llModelInfo.model = nullptr; if (!m_isServer) LLModelStore::globalInstance()->releaseModel(m_llModelInfo); // release back into the store m_llModelInfo = LLModelInfo(); @@ -317,7 +323,8 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo) case 'S': m_llModelType = LLModelType::STARCODER_; break; default: { - delete std::exchange(m_llModelInfo.model, nullptr); + delete m_llModelInfo.model; + m_llModelInfo.model = nullptr; if (!m_isServer) LLModelStore::globalInstance()->releaseModel(m_llModelInfo); // release back into the store m_llModelInfo = LLModelInfo();