From 813ccaf5d10a27c23f9fedf43d9e2a87bf64d8b8 Mon Sep 17 00:00:00 2001
From: Jared Van Bortel <jared@nomic.ai>
Date: Fri, 30 Aug 2024 12:30:24 -0400
Subject: [PATCH] server: do not process the system prompt twice for new models
 (#2924)

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
---
 gpt4all-chat/CHANGELOG.md    |  8 +++-----
 gpt4all-chat/src/chat.cpp    |  1 -
 gpt4all-chat/src/chat.h      |  1 -
 gpt4all-chat/src/chatllm.cpp | 21 ++++++++++++++-------
 gpt4all-chat/src/server.cpp  |  8 ++++----
 5 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/gpt4all-chat/CHANGELOG.md b/gpt4all-chat/CHANGELOG.md
index 6f1457dd..47926f05 100644
--- a/gpt4all-chat/CHANGELOG.md
+++ b/gpt4all-chat/CHANGELOG.md
@@ -8,9 +8,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 
 ### Added
 - Use greedy sampling when temperature is set to zero ([#2854](https://github.com/nomic-ai/gpt4all/pull/2854))
-- Use configured system prompt in server mode and ignore system messages ([#2921](https://github.com/nomic-ai/gpt4all/pull/2921))
+- Use configured system prompt in server mode and ignore system messages ([#2921](https://github.com/nomic-ai/gpt4all/pull/2921), [#2924](https://github.com/nomic-ai/gpt4all/pull/2924))
 
 ### Changed
+- The offline update button now directs users to the offline installer releases page. (by [@3Simplex](https://github.com/3Simplex) in [#2888](https://github.com/nomic-ai/gpt4all/pull/2888))
+- Change the website link on the home page to point to the new URL ([#2915](https://github.com/nomic-ai/gpt4all/pull/2915))
 - Smaller default window size, dynamic minimum size, and scaling tweaks ([#2904](https://github.com/nomic-ai/gpt4all/pull/2904))
 - Only allow a single instance of program to be run at a time ([#2923](https://github.com/nomic-ai/gpt4all/pull/2923]))
 
@@ -24,10 +26,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 - Fixed typo in several files. (by [@3Simplex](https://github.com/3Simplex) in [#2916](https://github.com/nomic-ai/gpt4all/pull/2916))
 - Fix the antenna icon tooltip when using the local server ([#2922](https://github.com/nomic-ai/gpt4all/pull/2922))
 
-### Changed
-- The offline update button now directs users to the offline installer releases page. (by [@3Simplex](https://github.com/3Simplex) in [#2888](https://github.com/nomic-ai/gpt4all/pull/2888))
-- Change the website link on the home page to point to the new URL ([#2915](https://github.com/nomic-ai/gpt4all/pull/2915))
-
 ## [3.2.1] - 2024-08-13
 
 ### Fixed
diff --git a/gpt4all-chat/src/chat.cpp b/gpt4all-chat/src/chat.cpp
index a44022c0..d9a66091 100644
--- a/gpt4all-chat/src/chat.cpp
+++ b/gpt4all-chat/src/chat.cpp
@@ -74,7 +74,6 @@ void Chat::connectLLM()
     connect(this, &Chat::promptRequested, m_llmodel, &ChatLLM::prompt, Qt::QueuedConnection);
     connect(this, &Chat::modelChangeRequested, m_llmodel, &ChatLLM::modelChangeRequested, Qt::QueuedConnection);
     connect(this, &Chat::loadDefaultModelRequested, m_llmodel, &ChatLLM::loadDefaultModel, Qt::QueuedConnection);
-    connect(this, &Chat::loadModelRequested, m_llmodel, &ChatLLM::loadModel, Qt::QueuedConnection);
     connect(this, &Chat::generateNameRequested, m_llmodel, &ChatLLM::generateName, Qt::QueuedConnection);
     connect(this, &Chat::regenerateResponseRequested, m_llmodel, &ChatLLM::regenerateResponse, Qt::QueuedConnection);
     connect(this, &Chat::resetResponseRequested, m_llmodel, &ChatLLM::resetResponse, Qt::QueuedConnection);
diff --git a/gpt4all-chat/src/chat.h b/gpt4all-chat/src/chat.h
index 065c624e..e3cdc756 100644
--- a/gpt4all-chat/src/chat.h
+++ b/gpt4all-chat/src/chat.h
@@ -146,7 +146,6 @@ Q_SIGNALS:
     void modelInfoChanged();
     void restoringFromTextChanged();
     void loadDefaultModelRequested();
-    void loadModelRequested(const ModelInfo &modelInfo);
     void generateNameRequested();
     void modelLoadingErrorChanged();
     void isServerChanged();
diff --git a/gpt4all-chat/src/chatllm.cpp b/gpt4all-chat/src/chatllm.cpp
index 86e0026c..fd9316f5 100644
--- a/gpt4all-chat/src/chatllm.cpp
+++ b/gpt4all-chat/src/chatllm.cpp
@@ -249,9 +249,11 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
     // and what the type and name of that model is. I've tried to comment extensively in this method
     // to provide an overview of what we're doing here.
 
-    // We're already loaded with this model
-    if (isModelLoaded() && this->modelInfo() == modelInfo)
-        return true;
+    if (isModelLoaded() && this->modelInfo() == modelInfo) {
+        // already acquired -> keep it and reset
+        resetContext();
+        return true; // already loaded
+    }
 
     // reset status
     emit modelLoadingPercentageChanged(std::numeric_limits<float>::min()); // small non-zero positive value
@@ -659,20 +661,25 @@ void ChatLLM::setModelInfo(const ModelInfo &modelInfo)
     emit modelInfoChanged(modelInfo);
 }
 
-void ChatLLM::acquireModel() {
+void ChatLLM::acquireModel()
+{
     m_llModelInfo = LLModelStore::globalInstance()->acquireModel();
     emit loadedModelInfoChanged();
 }
 
-void ChatLLM::resetModel() {
+void ChatLLM::resetModel()
+{
     m_llModelInfo = {};
     emit loadedModelInfoChanged();
 }
 
 void ChatLLM::modelChangeRequested(const ModelInfo &modelInfo)
 {
-    m_shouldBeLoaded = true;
-    loadModel(modelInfo);
+    // ignore attempts to switch to the same model twice
+    if (!isModelLoaded() || this->modelInfo() != modelInfo) {
+        m_shouldBeLoaded = true;
+        loadModel(modelInfo);
+    }
 }
 
 bool ChatLLM::handlePrompt(int32_t token)
diff --git a/gpt4all-chat/src/server.cpp b/gpt4all-chat/src/server.cpp
index 227c30a9..1da962f5 100644
--- a/gpt4all-chat/src/server.cpp
+++ b/gpt4all-chat/src/server.cpp
@@ -361,14 +361,14 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re
     if (modelInfo.filename().isEmpty()) {
         std::cerr << "ERROR: couldn't load default model " << modelRequested.toStdString() << std::endl;
         return QHttpServerResponse(QHttpServerResponder::StatusCode::BadRequest);
-    } else if (!loadModel(modelInfo)) {
+    }
+
+    // NB: this resets the context, regardless of whether this model is already loaded
+    if (!loadModel(modelInfo)) {
         std::cerr << "ERROR: couldn't load model " << modelInfo.name().toStdString() << std::endl;
         return QHttpServerResponse(QHttpServerResponder::StatusCode::InternalServerError);
     }
 
-    // don't remember any context
-    resetContext();
-
     const QString promptTemplate    = modelInfo.promptTemplate();
     const float top_k               = modelInfo.topK();
     const int n_batch               = modelInfo.promptBatchSize();