From 1a0088227670b3ca17f57d5c0fc0535aa34d56b9 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Tue, 25 Jun 2024 11:04:01 -0400 Subject: [PATCH] embllm: fix use of llama ctx before loading (#2465) This fixes a regression in PR #2396. Signed-off-by: Jared Van Bortel --- gpt4all-chat/embllm.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/gpt4all-chat/embllm.cpp b/gpt4all-chat/embllm.cpp index b16b1616..d42fcec5 100644 --- a/gpt4all-chat/embllm.cpp +++ b/gpt4all-chat/embllm.cpp @@ -84,10 +84,6 @@ bool EmbeddingLLMWorker::loadModel() return false; } - // FIXME(jared): the user may want this to take effect without having to restart - int n_threads = MySettings::globalInstance()->threadCount(); - m_model->setThreadCount(n_threads); - // NOTE: explicitly loads model on CPU to avoid GPU OOM // TODO(cebtenzzre): support GPU-accelerated embeddings bool success = m_model->loadModel(filePath.toStdString(), 2048, 0); @@ -104,6 +100,11 @@ bool EmbeddingLLMWorker::loadModel() m_model = nullptr; return false; } + + // FIXME(jared): the user may want this to take effect without having to restart + int n_threads = MySettings::globalInstance()->threadCount(); + m_model->setThreadCount(n_threads); + return true; }