diff --git a/gpt4all-backend/llama.cpp-mainline b/gpt4all-backend/llama.cpp-mainline
index 7ff671e1..703ef9c1 160000
--- a/gpt4all-backend/llama.cpp-mainline
+++ b/gpt4all-backend/llama.cpp-mainline
@@ -1 +1 @@
-Subproject commit 7ff671e149464d1a52b4f9e50a7819bc49e8fdaa
+Subproject commit 703ef9c1252aff4f6c4e1fdc60fffe6ab9def377
diff --git a/gpt4all-backend/llamamodel.cpp b/gpt4all-backend/llamamodel.cpp
index b92f6e87..ecae5f0e 100644
--- a/gpt4all-backend/llamamodel.cpp
+++ b/gpt4all-backend/llamamodel.cpp
@@ -168,6 +168,10 @@ bool LLamaModel::loadModel(const std::string &modelPath)
 
     d_ptr->ctx = llama_init_from_file(modelPath.c_str(), d_ptr->params);
     if (!d_ptr->ctx) {
+#ifdef GGML_USE_KOMPUTE
+        // Explicitly free the device so next load it doesn't use it
+        ggml_vk_free_device();
+#endif
         std::cerr << "LLAMA ERROR: failed to load model from " <<  modelPath << std::endl;
         return false;
     }
@@ -194,7 +198,7 @@ int32_t LLamaModel::threadCount() const {
 
 LLamaModel::~LLamaModel()
 {
-    if(d_ptr->ctx) {
+    if (d_ptr->ctx) {
         llama_free(d_ptr->ctx);
     }
 }
diff --git a/gpt4all-chat/chatllm.cpp b/gpt4all-chat/chatllm.cpp
index 5d6ea6d3..74208c17 100644
--- a/gpt4all-chat/chatllm.cpp
+++ b/gpt4all-chat/chatllm.cpp
@@ -294,9 +294,15 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
                 emit reportDevice(actualDevice);
 
                 bool success = m_llModelInfo.model->loadModel(filePath.toStdString());
+                if (!success && actualDevice != "CPU") {
+                    emit reportDevice("CPU");
+                    success = m_llModelInfo.model->loadModel(filePath.toStdString());
+                }
+
                 MySettings::globalInstance()->setAttemptModelLoad(QString());
                 if (!success) {
-                    delete std::exchange(m_llModelInfo.model, nullptr);
+                    delete m_llModelInfo.model;
+                    m_llModelInfo.model = nullptr;
                     if (!m_isServer)
                         LLModelStore::globalInstance()->releaseModel(m_llModelInfo); // release back into the store
                     m_llModelInfo = LLModelInfo();
@@ -317,7 +323,8 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
                     case 'S': m_llModelType = LLModelType::STARCODER_; break;
                     default:
                         {
-                            delete std::exchange(m_llModelInfo.model, nullptr);
+                            delete m_llModelInfo.model;
+                            m_llModelInfo.model = nullptr;
                             if (!m_isServer)
                                 LLModelStore::globalInstance()->releaseModel(m_llModelInfo); // release back into the store
                             m_llModelInfo = LLModelInfo();