From 813ccaf5d10a27c23f9fedf43d9e2a87bf64d8b8 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Fri, 30 Aug 2024 12:30:24 -0400 Subject: [PATCH] server: do not process the system prompt twice for new models (#2924) Signed-off-by: Jared Van Bortel --- gpt4all-chat/CHANGELOG.md | 8 +++----- gpt4all-chat/src/chat.cpp | 1 - gpt4all-chat/src/chat.h | 1 - gpt4all-chat/src/chatllm.cpp | 21 ++++++++++++++------- gpt4all-chat/src/server.cpp | 8 ++++---- 5 files changed, 21 insertions(+), 18 deletions(-) diff --git a/gpt4all-chat/CHANGELOG.md b/gpt4all-chat/CHANGELOG.md index 6f1457dd..47926f05 100644 --- a/gpt4all-chat/CHANGELOG.md +++ b/gpt4all-chat/CHANGELOG.md @@ -8,9 +8,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ### Added - Use greedy sampling when temperature is set to zero ([#2854](https://github.com/nomic-ai/gpt4all/pull/2854)) -- Use configured system prompt in server mode and ignore system messages ([#2921](https://github.com/nomic-ai/gpt4all/pull/2921)) +- Use configured system prompt in server mode and ignore system messages ([#2921](https://github.com/nomic-ai/gpt4all/pull/2921), [#2924](https://github.com/nomic-ai/gpt4all/pull/2924)) ### Changed +- The offline update button now directs users to the offline installer releases page. (by [@3Simplex](https://github.com/3Simplex) in [#2888](https://github.com/nomic-ai/gpt4all/pull/2888)) +- Change the website link on the home page to point to the new URL ([#2915](https://github.com/nomic-ai/gpt4all/pull/2915)) - Smaller default window size, dynamic minimum size, and scaling tweaks ([#2904](https://github.com/nomic-ai/gpt4all/pull/2904)) - Only allow a single instance of program to be run at a time ([#2923](https://github.com/nomic-ai/gpt4all/pull/2923])) @@ -24,10 +26,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Fixed typo in several files. (by [@3Simplex](https://github.com/3Simplex) in [#2916](https://github.com/nomic-ai/gpt4all/pull/2916)) - Fix the antenna icon tooltip when using the local server ([#2922](https://github.com/nomic-ai/gpt4all/pull/2922)) -### Changed -- The offline update button now directs users to the offline installer releases page. (by [@3Simplex](https://github.com/3Simplex) in [#2888](https://github.com/nomic-ai/gpt4all/pull/2888)) -- Change the website link on the home page to point to the new URL ([#2915](https://github.com/nomic-ai/gpt4all/pull/2915)) - ## [3.2.1] - 2024-08-13 ### Fixed diff --git a/gpt4all-chat/src/chat.cpp b/gpt4all-chat/src/chat.cpp index a44022c0..d9a66091 100644 --- a/gpt4all-chat/src/chat.cpp +++ b/gpt4all-chat/src/chat.cpp @@ -74,7 +74,6 @@ void Chat::connectLLM() connect(this, &Chat::promptRequested, m_llmodel, &ChatLLM::prompt, Qt::QueuedConnection); connect(this, &Chat::modelChangeRequested, m_llmodel, &ChatLLM::modelChangeRequested, Qt::QueuedConnection); connect(this, &Chat::loadDefaultModelRequested, m_llmodel, &ChatLLM::loadDefaultModel, Qt::QueuedConnection); - connect(this, &Chat::loadModelRequested, m_llmodel, &ChatLLM::loadModel, Qt::QueuedConnection); connect(this, &Chat::generateNameRequested, m_llmodel, &ChatLLM::generateName, Qt::QueuedConnection); connect(this, &Chat::regenerateResponseRequested, m_llmodel, &ChatLLM::regenerateResponse, Qt::QueuedConnection); connect(this, &Chat::resetResponseRequested, m_llmodel, &ChatLLM::resetResponse, Qt::QueuedConnection); diff --git a/gpt4all-chat/src/chat.h b/gpt4all-chat/src/chat.h index 065c624e..e3cdc756 100644 --- a/gpt4all-chat/src/chat.h +++ b/gpt4all-chat/src/chat.h @@ -146,7 +146,6 @@ Q_SIGNALS: void modelInfoChanged(); void restoringFromTextChanged(); void loadDefaultModelRequested(); - void loadModelRequested(const ModelInfo &modelInfo); void generateNameRequested(); void modelLoadingErrorChanged(); void isServerChanged(); diff --git a/gpt4all-chat/src/chatllm.cpp b/gpt4all-chat/src/chatllm.cpp index 86e0026c..fd9316f5 100644 --- a/gpt4all-chat/src/chatllm.cpp +++ b/gpt4all-chat/src/chatllm.cpp @@ -249,9 +249,11 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo) // and what the type and name of that model is. I've tried to comment extensively in this method // to provide an overview of what we're doing here. - // We're already loaded with this model - if (isModelLoaded() && this->modelInfo() == modelInfo) - return true; + if (isModelLoaded() && this->modelInfo() == modelInfo) { + // already acquired -> keep it and reset + resetContext(); + return true; // already loaded + } // reset status emit modelLoadingPercentageChanged(std::numeric_limits::min()); // small non-zero positive value @@ -659,20 +661,25 @@ void ChatLLM::setModelInfo(const ModelInfo &modelInfo) emit modelInfoChanged(modelInfo); } -void ChatLLM::acquireModel() { +void ChatLLM::acquireModel() +{ m_llModelInfo = LLModelStore::globalInstance()->acquireModel(); emit loadedModelInfoChanged(); } -void ChatLLM::resetModel() { +void ChatLLM::resetModel() +{ m_llModelInfo = {}; emit loadedModelInfoChanged(); } void ChatLLM::modelChangeRequested(const ModelInfo &modelInfo) { - m_shouldBeLoaded = true; - loadModel(modelInfo); + // ignore attempts to switch to the same model twice + if (!isModelLoaded() || this->modelInfo() != modelInfo) { + m_shouldBeLoaded = true; + loadModel(modelInfo); + } } bool ChatLLM::handlePrompt(int32_t token) diff --git a/gpt4all-chat/src/server.cpp b/gpt4all-chat/src/server.cpp index 227c30a9..1da962f5 100644 --- a/gpt4all-chat/src/server.cpp +++ b/gpt4all-chat/src/server.cpp @@ -361,14 +361,14 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re if (modelInfo.filename().isEmpty()) { std::cerr << "ERROR: couldn't load default model " << modelRequested.toStdString() << std::endl; return QHttpServerResponse(QHttpServerResponder::StatusCode::BadRequest); - } else if (!loadModel(modelInfo)) { + } + + // NB: this resets the context, regardless of whether this model is already loaded + if (!loadModel(modelInfo)) { std::cerr << "ERROR: couldn't load model " << modelInfo.name().toStdString() << std::endl; return QHttpServerResponse(QHttpServerResponder::StatusCode::InternalServerError); } - // don't remember any context - resetContext(); - const QString promptTemplate = modelInfo.promptTemplate(); const float top_k = modelInfo.topK(); const int n_batch = modelInfo.promptBatchSize();