The server has different lifetime mgmt than the other chats.

This commit is contained in:
Adam Treat 2023-05-13 19:33:19 -04:00 committed by AT
parent ddc24acf33
commit b71c0ac3bd
6 changed files with 24 additions and 19 deletions

View File

@ -171,9 +171,8 @@ public:
return; return;
} }
if (m_currentChat) if (m_currentChat && m_currentChat != m_serverChat)
m_currentChat->unloadModel(); m_currentChat->unloadModel();
m_currentChat = chat; m_currentChat = chat;
if (!m_currentChat->isModelLoaded() && m_currentChat != m_serverChat) if (!m_currentChat->isModelLoaded() && m_currentChat != m_serverChat)
m_currentChat->reloadModel(); m_currentChat->reloadModel();

View File

@ -81,13 +81,14 @@ void LLModelStore::releaseModel(const LLModelInfo &info)
m_condition.wakeAll(); m_condition.wakeAll();
} }
ChatLLM::ChatLLM(Chat *parent) ChatLLM::ChatLLM(Chat *parent, bool isServer)
: QObject{nullptr} : QObject{nullptr}
, m_promptResponseTokens(0) , m_promptResponseTokens(0)
, m_promptTokens(0) , m_promptTokens(0)
, m_responseLogits(0) , m_responseLogits(0)
, m_isRecalc(false) , m_isRecalc(false)
, m_chat(parent) , m_chat(parent)
, m_isServer(isServer)
{ {
moveToThread(&m_llmThread); moveToThread(&m_llmThread);
connect(this, &ChatLLM::sendStartup, Network::globalInstance(), &Network::sendStartup); connect(this, &ChatLLM::sendStartup, Network::globalInstance(), &Network::sendStartup);
@ -151,7 +152,7 @@ bool ChatLLM::loadModel(const QString &modelName)
delete m_modelInfo.model; delete m_modelInfo.model;
m_modelInfo.model = nullptr; m_modelInfo.model = nullptr;
emit isModelLoadedChanged(); emit isModelLoadedChanged();
} else { } else if (!m_isServer) {
// This is a blocking call that tries to retrieve the model we need from the model store. // This is a blocking call that tries to retrieve the model we need from the model store.
// If it succeeds, then we just have to restore state. If the store has never had a model // If it succeeds, then we just have to restore state. If the store has never had a model
// returned to it, then the modelInfo.model pointer should be null which will happen on startup // returned to it, then the modelInfo.model pointer should be null which will happen on startup
@ -163,7 +164,9 @@ bool ChatLLM::loadModel(const QString &modelName)
// store, that our state was changed to not be loaded. If this is the case, release the model // store, that our state was changed to not be loaded. If this is the case, release the model
// back into the store and quit loading // back into the store and quit loading
if (!m_shouldBeLoaded) { if (!m_shouldBeLoaded) {
#if defined(DEBUG_MODEL_LOADING)
qDebug() << "no longer need model" << m_chat->id() << m_modelInfo.model; qDebug() << "no longer need model" << m_chat->id() << m_modelInfo.model;
#endif
LLModelStore::globalInstance()->releaseModel(m_modelInfo); LLModelStore::globalInstance()->releaseModel(m_modelInfo);
m_modelInfo = LLModelInfo(); m_modelInfo = LLModelInfo();
emit isModelLoadedChanged(); emit isModelLoadedChanged();
@ -232,6 +235,7 @@ bool ChatLLM::loadModel(const QString &modelName)
} else } else
emit sendModelLoaded(); emit sendModelLoaded();
} else { } else {
if (!m_isServer)
LLModelStore::globalInstance()->releaseModel(m_modelInfo); // release back into the store LLModelStore::globalInstance()->releaseModel(m_modelInfo); // release back into the store
const QString error = QString("Could not find model %1").arg(modelName); const QString error = QString("Could not find model %1").arg(modelName);
emit modelLoadingError(error); emit modelLoadingError(error);
@ -436,7 +440,7 @@ void ChatLLM::forceUnloadModel()
void ChatLLM::unloadModel() void ChatLLM::unloadModel()
{ {
if (!isModelLoaded()) if (!isModelLoaded() || m_isServer)
return; return;
saveState(); saveState();
@ -450,7 +454,7 @@ void ChatLLM::unloadModel()
void ChatLLM::reloadModel() void ChatLLM::reloadModel()
{ {
if (isModelLoaded()) if (isModelLoaded() || m_isServer)
return; return;
#if defined(DEBUG_MODEL_LOADING) #if defined(DEBUG_MODEL_LOADING)

View File

@ -31,7 +31,7 @@ class ChatLLM : public QObject
Q_PROPERTY(QString generatedName READ generatedName NOTIFY generatedNameChanged) Q_PROPERTY(QString generatedName READ generatedName NOTIFY generatedNameChanged)
public: public:
ChatLLM(Chat *parent); ChatLLM(Chat *parent, bool isServer = false);
virtual ~ChatLLM(); virtual ~ChatLLM();
bool isModelLoaded() const; bool isModelLoaded() const;
@ -87,12 +87,7 @@ Q_SIGNALS:
void shouldBeLoadedChanged(); void shouldBeLoadedChanged();
protected: protected:
LLModel::PromptContext m_ctx;
quint32 m_promptTokens;
quint32 m_promptResponseTokens;
void resetContextProtected(); void resetContextProtected();
private:
bool handlePrompt(int32_t token); bool handlePrompt(int32_t token);
bool handleResponse(int32_t token, const std::string &response); bool handleResponse(int32_t token, const std::string &response);
bool handleRecalculate(bool isRecalc); bool handleRecalculate(bool isRecalc);
@ -102,7 +97,10 @@ private:
void saveState(); void saveState();
void restoreState(); void restoreState();
private: protected:
LLModel::PromptContext m_ctx;
quint32 m_promptTokens;
quint32 m_promptResponseTokens;
LLModelInfo m_modelInfo; LLModelInfo m_modelInfo;
LLModelType m_modelType; LLModelType m_modelType;
std::string m_response; std::string m_response;
@ -115,6 +113,7 @@ private:
std::atomic<bool> m_stopGenerating; std::atomic<bool> m_stopGenerating;
std::atomic<bool> m_shouldBeLoaded; std::atomic<bool> m_shouldBeLoaded;
bool m_isRecalc; bool m_isRecalc;
bool m_isServer;
}; };
#endif // CHATLLM_H #endif // CHATLLM_H

View File

@ -69,6 +69,7 @@ Drawer {
anchors.bottom: checkForUpdatesButton.top anchors.bottom: checkForUpdatesButton.top
anchors.bottomMargin: 10 anchors.bottomMargin: 10
ScrollBar.vertical.policy: ScrollBar.AlwaysOn ScrollBar.vertical.policy: ScrollBar.AlwaysOn
clip: true
ListView { ListView {
id: conversationList id: conversationList

View File

@ -820,7 +820,7 @@ Dialog {
settings.sync() settings.sync()
} }
ToolTip.text: qsTr("WARNING: This enables the gui to act as a local web server for AI API requests") ToolTip.text: qsTr("WARNING: This enables the gui to act as a local web server for AI API requests and will increase your RAM usage as well")
ToolTip.visible: hovered ToolTip.visible: hovered
background: Rectangle { background: Rectangle {

View File

@ -52,7 +52,7 @@ static inline QJsonObject modelToJson(const ModelInfo &info)
} }
Server::Server(Chat *chat) Server::Server(Chat *chat)
: ChatLLM(chat) : ChatLLM(chat, true /*isServer*/)
, m_chat(chat) , m_chat(chat)
, m_server(nullptr) , m_server(nullptr)
{ {
@ -170,13 +170,15 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re
} }
} }
setShouldBeLoaded(true);
if (!foundModel) { if (!foundModel) {
if (!loadDefaultModel()) { if (!loadDefaultModel()) {
std::cerr << "ERROR: couldn't load default model" << model.toStdString() << std::endl; std::cerr << "ERROR: couldn't load default model " << model.toStdString() << std::endl;
return QHttpServerResponse(QHttpServerResponder::StatusCode::BadRequest); return QHttpServerResponse(QHttpServerResponder::StatusCode::BadRequest);
} }
} else if (!loadModel(model)) { } else if (!loadModel(model)) {
std::cerr << "ERROR: couldn't load model" << model.toStdString() << std::endl; std::cerr << "ERROR: couldn't load model " << model.toStdString() << std::endl;
return QHttpServerResponse(QHttpServerResponder::StatusCode::InternalServerError); return QHttpServerResponse(QHttpServerResponder::StatusCode::InternalServerError);
} }
@ -308,7 +310,7 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re
repeat_last_n, repeat_last_n,
LLM::globalInstance()->threadCount())) { LLM::globalInstance()->threadCount())) {
std::cerr << "ERROR: couldn't prompt model" << model.toStdString() << std::endl; std::cerr << "ERROR: couldn't prompt model " << model.toStdString() << std::endl;
return QHttpServerResponse(QHttpServerResponder::StatusCode::InternalServerError); return QHttpServerResponse(QHttpServerResponder::StatusCode::InternalServerError);
} }
QString echoedPrompt = actualPrompt; QString echoedPrompt = actualPrompt;