The server has different lifetime mgmt than the other chats.

2024-11-18 03:25:46 +00:00 · 2023-05-13 19:33:19 -04:00 · 2023-05-13 19:33:19 -04:00 · b71c0ac3bd
commit b71c0ac3bd
parent ddc24acf33
6 changed files with 24 additions and 19 deletions
--- a/gpt4all-chat/chatlistmodel.h
+++ b/gpt4all-chat/chatlistmodel.h
@ -171,9 +171,8 @@ public:
            return;
        }

-        if (m_currentChat)
+        if (m_currentChat && m_currentChat != m_serverChat)
            m_currentChat->unloadModel();
-
        m_currentChat = chat;
        if (!m_currentChat->isModelLoaded() && m_currentChat != m_serverChat)
            m_currentChat->reloadModel();
--- a/gpt4all-chat/chatllm.cpp
+++ b/gpt4all-chat/chatllm.cpp
@ -81,13 +81,14 @@ void LLModelStore::releaseModel(const LLModelInfo &info)
    m_condition.wakeAll();
 }

-ChatLLM::ChatLLM(Chat *parent)
+ChatLLM::ChatLLM(Chat *parent, bool isServer)
    : QObject{nullptr}
    , m_promptResponseTokens(0)
    , m_promptTokens(0)
    , m_responseLogits(0)
    , m_isRecalc(false)
    , m_chat(parent)
+    , m_isServer(isServer)
 {
    moveToThread(&m_llmThread);
    connect(this, &ChatLLM::sendStartup, Network::globalInstance(), &Network::sendStartup);
@ -151,7 +152,7 @@ bool ChatLLM::loadModel(const QString &modelName)
        delete m_modelInfo.model;
        m_modelInfo.model = nullptr;
        emit isModelLoadedChanged();
-    } else {
+    } else if (!m_isServer) {
        // This is a blocking call that tries to retrieve the model we need from the model store.
        // If it succeeds, then we just have to restore state. If the store has never had a model
        // returned to it, then the modelInfo.model pointer should be null which will happen on startup
@ -163,7 +164,9 @@ bool ChatLLM::loadModel(const QString &modelName)
        // store, that our state was changed to not be loaded. If this is the case, release the model
        // back into the store and quit loading
        if (!m_shouldBeLoaded) {
+#if defined(DEBUG_MODEL_LOADING)
            qDebug() << "no longer need model" << m_chat->id() << m_modelInfo.model;
+#endif
            LLModelStore::globalInstance()->releaseModel(m_modelInfo);
            m_modelInfo = LLModelInfo();
            emit isModelLoadedChanged();
@ -232,7 +235,8 @@ bool ChatLLM::loadModel(const QString &modelName)
        } else
            emit sendModelLoaded();
    } else {
-        LLModelStore::globalInstance()->releaseModel(m_modelInfo); // release back into the store
+        if (!m_isServer)
+            LLModelStore::globalInstance()->releaseModel(m_modelInfo); // release back into the store
        const QString error = QString("Could not find model %1").arg(modelName);
        emit modelLoadingError(error);
    }
@ -436,7 +440,7 @@ void ChatLLM::forceUnloadModel()

 void ChatLLM::unloadModel()
 {
-    if (!isModelLoaded())
+    if (!isModelLoaded() || m_isServer)
        return;

    saveState();
@ -450,7 +454,7 @@ void ChatLLM::unloadModel()

 void ChatLLM::reloadModel()
 {
-    if (isModelLoaded())
+    if (isModelLoaded() || m_isServer)
        return;

 #if defined(DEBUG_MODEL_LOADING)
--- a/gpt4all-chat/chatllm.h
+++ b/gpt4all-chat/chatllm.h
@ -31,7 +31,7 @@ class ChatLLM : public QObject
    Q_PROPERTY(QString generatedName READ generatedName NOTIFY generatedNameChanged)

 public:
-    ChatLLM(Chat *parent);
+    ChatLLM(Chat *parent, bool isServer = false);
    virtual ~ChatLLM();

    bool isModelLoaded() const;
@ -87,12 +87,7 @@ Q_SIGNALS:
    void shouldBeLoadedChanged();

 protected:
-    LLModel::PromptContext m_ctx;
-    quint32 m_promptTokens;
-    quint32 m_promptResponseTokens;
    void resetContextProtected();
-
-private:
    bool handlePrompt(int32_t token);
    bool handleResponse(int32_t token, const std::string &response);
    bool handleRecalculate(bool isRecalc);
@ -102,7 +97,10 @@ private:
    void saveState();
    void restoreState();

-private:
+protected:
+    LLModel::PromptContext m_ctx;
+    quint32 m_promptTokens;
+    quint32 m_promptResponseTokens;
    LLModelInfo m_modelInfo;
    LLModelType m_modelType;
    std::string m_response;
@ -115,6 +113,7 @@ private:
    std::atomic<bool> m_stopGenerating;
    std::atomic<bool> m_shouldBeLoaded;
    bool m_isRecalc;
+    bool m_isServer;
 };

 #endif // CHATLLM_H
--- a/gpt4all-chat/qml/ChatDrawer.qml
+++ b/gpt4all-chat/qml/ChatDrawer.qml
@ -69,6 +69,7 @@ Drawer {
            anchors.bottom: checkForUpdatesButton.top
            anchors.bottomMargin: 10
            ScrollBar.vertical.policy: ScrollBar.AlwaysOn
+            clip: true

            ListView {
                id: conversationList
--- a/gpt4all-chat/qml/SettingsDialog.qml
+++ b/gpt4all-chat/qml/SettingsDialog.qml
@ -820,7 +820,7 @@ Dialog {
                            settings.sync()
                        }

-                        ToolTip.text: qsTr("WARNING: This enables the gui to act as a local web server for AI API requests")
+                        ToolTip.text: qsTr("WARNING: This enables the gui to act as a local web server for AI API requests and will increase your RAM usage as well")
                        ToolTip.visible: hovered

                        background: Rectangle {
--- a/gpt4all-chat/server.cpp
+++ b/gpt4all-chat/server.cpp
@ -52,7 +52,7 @@ static inline QJsonObject modelToJson(const ModelInfo &info)
 }

 Server::Server(Chat *chat)
-    : ChatLLM(chat)
+    : ChatLLM(chat, true /*isServer*/)
    , m_chat(chat)
    , m_server(nullptr)
 {
@ -170,13 +170,15 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re
        }
    }

+    setShouldBeLoaded(true);
+
    if (!foundModel) {
        if (!loadDefaultModel()) {
-            std::cerr << "ERROR: couldn't load default model" << model.toStdString() << std::endl;
+            std::cerr << "ERROR: couldn't load default model " << model.toStdString() << std::endl;
            return QHttpServerResponse(QHttpServerResponder::StatusCode::BadRequest);
        }
    } else if (!loadModel(model)) {
-        std::cerr << "ERROR: couldn't load model" << model.toStdString() << std::endl;
+        std::cerr << "ERROR: couldn't load model " << model.toStdString() << std::endl;
        return QHttpServerResponse(QHttpServerResponder::StatusCode::InternalServerError);
    }

@ -308,7 +310,7 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re
            repeat_last_n,
            LLM::globalInstance()->threadCount())) {

-            std::cerr << "ERROR: couldn't prompt model" << model.toStdString() << std::endl;
+            std::cerr << "ERROR: couldn't prompt model " << model.toStdString() << std::endl;
            return QHttpServerResponse(QHttpServerResponder::StatusCode::InternalServerError);
        }
        QString echoedPrompt = actualPrompt;