From 1a0088227670b3ca17f57d5c0fc0535aa34d56b9 Mon Sep 17 00:00:00 2001
From: Jared Van Bortel <jared@nomic.ai>
Date: Tue, 25 Jun 2024 11:04:01 -0400
Subject: [PATCH] embllm: fix use of llama ctx before loading (#2465)

This fixes a regression in PR #2396.

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
---
 gpt4all-chat/embllm.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/gpt4all-chat/embllm.cpp b/gpt4all-chat/embllm.cpp
index b16b1616..d42fcec5 100644
--- a/gpt4all-chat/embllm.cpp
+++ b/gpt4all-chat/embllm.cpp
@@ -84,10 +84,6 @@ bool EmbeddingLLMWorker::loadModel()
         return false;
     }
 
-    // FIXME(jared): the user may want this to take effect without having to restart
-    int n_threads = MySettings::globalInstance()->threadCount();
-    m_model->setThreadCount(n_threads);
-
     // NOTE: explicitly loads model on CPU to avoid GPU OOM
     // TODO(cebtenzzre): support GPU-accelerated embeddings
     bool success = m_model->loadModel(filePath.toStdString(), 2048, 0);
@@ -104,6 +100,11 @@ bool EmbeddingLLMWorker::loadModel()
         m_model = nullptr;
         return false;
     }
+
+    // FIXME(jared): the user may want this to take effect without having to restart
+    int n_threads = MySettings::globalInstance()->threadCount();
+    m_model->setThreadCount(n_threads);
+
     return true;
 }