diff --git a/gpt4all-chat/chatlistmodel.h b/gpt4all-chat/chatlistmodel.h index 8c60b7a9..881b2cd9 100644 --- a/gpt4all-chat/chatlistmodel.h +++ b/gpt4all-chat/chatlistmodel.h @@ -171,9 +171,8 @@ public: return; } - if (m_currentChat) + if (m_currentChat && m_currentChat != m_serverChat) m_currentChat->unloadModel(); - m_currentChat = chat; if (!m_currentChat->isModelLoaded() && m_currentChat != m_serverChat) m_currentChat->reloadModel(); diff --git a/gpt4all-chat/chatllm.cpp b/gpt4all-chat/chatllm.cpp index 0e8b08cb..e3268189 100644 --- a/gpt4all-chat/chatllm.cpp +++ b/gpt4all-chat/chatllm.cpp @@ -81,13 +81,14 @@ void LLModelStore::releaseModel(const LLModelInfo &info) m_condition.wakeAll(); } -ChatLLM::ChatLLM(Chat *parent) +ChatLLM::ChatLLM(Chat *parent, bool isServer) : QObject{nullptr} , m_promptResponseTokens(0) , m_promptTokens(0) , m_responseLogits(0) , m_isRecalc(false) , m_chat(parent) + , m_isServer(isServer) { moveToThread(&m_llmThread); connect(this, &ChatLLM::sendStartup, Network::globalInstance(), &Network::sendStartup); @@ -151,7 +152,7 @@ bool ChatLLM::loadModel(const QString &modelName) delete m_modelInfo.model; m_modelInfo.model = nullptr; emit isModelLoadedChanged(); - } else { + } else if (!m_isServer) { // This is a blocking call that tries to retrieve the model we need from the model store. // If it succeeds, then we just have to restore state. If the store has never had a model // returned to it, then the modelInfo.model pointer should be null which will happen on startup @@ -163,7 +164,9 @@ bool ChatLLM::loadModel(const QString &modelName) // store, that our state was changed to not be loaded. If this is the case, release the model // back into the store and quit loading if (!m_shouldBeLoaded) { +#if defined(DEBUG_MODEL_LOADING) qDebug() << "no longer need model" << m_chat->id() << m_modelInfo.model; +#endif LLModelStore::globalInstance()->releaseModel(m_modelInfo); m_modelInfo = LLModelInfo(); emit isModelLoadedChanged(); @@ -232,7 +235,8 @@ bool ChatLLM::loadModel(const QString &modelName) } else emit sendModelLoaded(); } else { - LLModelStore::globalInstance()->releaseModel(m_modelInfo); // release back into the store + if (!m_isServer) + LLModelStore::globalInstance()->releaseModel(m_modelInfo); // release back into the store const QString error = QString("Could not find model %1").arg(modelName); emit modelLoadingError(error); } @@ -436,7 +440,7 @@ void ChatLLM::forceUnloadModel() void ChatLLM::unloadModel() { - if (!isModelLoaded()) + if (!isModelLoaded() || m_isServer) return; saveState(); @@ -450,7 +454,7 @@ void ChatLLM::unloadModel() void ChatLLM::reloadModel() { - if (isModelLoaded()) + if (isModelLoaded() || m_isServer) return; #if defined(DEBUG_MODEL_LOADING) diff --git a/gpt4all-chat/chatllm.h b/gpt4all-chat/chatllm.h index 4f6d39f7..0b1eb343 100644 --- a/gpt4all-chat/chatllm.h +++ b/gpt4all-chat/chatllm.h @@ -31,7 +31,7 @@ class ChatLLM : public QObject Q_PROPERTY(QString generatedName READ generatedName NOTIFY generatedNameChanged) public: - ChatLLM(Chat *parent); + ChatLLM(Chat *parent, bool isServer = false); virtual ~ChatLLM(); bool isModelLoaded() const; @@ -87,12 +87,7 @@ Q_SIGNALS: void shouldBeLoadedChanged(); protected: - LLModel::PromptContext m_ctx; - quint32 m_promptTokens; - quint32 m_promptResponseTokens; void resetContextProtected(); - -private: bool handlePrompt(int32_t token); bool handleResponse(int32_t token, const std::string &response); bool handleRecalculate(bool isRecalc); @@ -102,7 +97,10 @@ private: void saveState(); void restoreState(); -private: +protected: + LLModel::PromptContext m_ctx; + quint32 m_promptTokens; + quint32 m_promptResponseTokens; LLModelInfo m_modelInfo; LLModelType m_modelType; std::string m_response; @@ -115,6 +113,7 @@ private: std::atomic m_stopGenerating; std::atomic m_shouldBeLoaded; bool m_isRecalc; + bool m_isServer; }; #endif // CHATLLM_H diff --git a/gpt4all-chat/qml/ChatDrawer.qml b/gpt4all-chat/qml/ChatDrawer.qml index 8db645e6..9df5e891 100644 --- a/gpt4all-chat/qml/ChatDrawer.qml +++ b/gpt4all-chat/qml/ChatDrawer.qml @@ -69,6 +69,7 @@ Drawer { anchors.bottom: checkForUpdatesButton.top anchors.bottomMargin: 10 ScrollBar.vertical.policy: ScrollBar.AlwaysOn + clip: true ListView { id: conversationList diff --git a/gpt4all-chat/qml/SettingsDialog.qml b/gpt4all-chat/qml/SettingsDialog.qml index 7c2f6d59..87873fa8 100644 --- a/gpt4all-chat/qml/SettingsDialog.qml +++ b/gpt4all-chat/qml/SettingsDialog.qml @@ -820,7 +820,7 @@ Dialog { settings.sync() } - ToolTip.text: qsTr("WARNING: This enables the gui to act as a local web server for AI API requests") + ToolTip.text: qsTr("WARNING: This enables the gui to act as a local web server for AI API requests and will increase your RAM usage as well") ToolTip.visible: hovered background: Rectangle { diff --git a/gpt4all-chat/server.cpp b/gpt4all-chat/server.cpp index 1c2484df..3575883b 100644 --- a/gpt4all-chat/server.cpp +++ b/gpt4all-chat/server.cpp @@ -52,7 +52,7 @@ static inline QJsonObject modelToJson(const ModelInfo &info) } Server::Server(Chat *chat) - : ChatLLM(chat) + : ChatLLM(chat, true /*isServer*/) , m_chat(chat) , m_server(nullptr) { @@ -170,13 +170,15 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re } } + setShouldBeLoaded(true); + if (!foundModel) { if (!loadDefaultModel()) { - std::cerr << "ERROR: couldn't load default model" << model.toStdString() << std::endl; + std::cerr << "ERROR: couldn't load default model " << model.toStdString() << std::endl; return QHttpServerResponse(QHttpServerResponder::StatusCode::BadRequest); } } else if (!loadModel(model)) { - std::cerr << "ERROR: couldn't load model" << model.toStdString() << std::endl; + std::cerr << "ERROR: couldn't load model " << model.toStdString() << std::endl; return QHttpServerResponse(QHttpServerResponder::StatusCode::InternalServerError); } @@ -308,7 +310,7 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re repeat_last_n, LLM::globalInstance()->threadCount())) { - std::cerr << "ERROR: couldn't prompt model" << model.toStdString() << std::endl; + std::cerr << "ERROR: couldn't prompt model " << model.toStdString() << std::endl; return QHttpServerResponse(QHttpServerResponder::StatusCode::InternalServerError); } QString echoedPrompt = actualPrompt;