The server has different lifetime mgmt than the other chats.

This commit is contained in:
Adam Treat 2023-05-13 19:33:19 -04:00 committed by AT
parent ddc24acf33
commit b71c0ac3bd
6 changed files with 24 additions and 19 deletions

View File

@ -171,9 +171,8 @@ public:
return;
}
if (m_currentChat)
if (m_currentChat && m_currentChat != m_serverChat)
m_currentChat->unloadModel();
m_currentChat = chat;
if (!m_currentChat->isModelLoaded() && m_currentChat != m_serverChat)
m_currentChat->reloadModel();

View File

@ -81,13 +81,14 @@ void LLModelStore::releaseModel(const LLModelInfo &info)
m_condition.wakeAll();
}
ChatLLM::ChatLLM(Chat *parent)
ChatLLM::ChatLLM(Chat *parent, bool isServer)
: QObject{nullptr}
, m_promptResponseTokens(0)
, m_promptTokens(0)
, m_responseLogits(0)
, m_isRecalc(false)
, m_chat(parent)
, m_isServer(isServer)
{
moveToThread(&m_llmThread);
connect(this, &ChatLLM::sendStartup, Network::globalInstance(), &Network::sendStartup);
@ -151,7 +152,7 @@ bool ChatLLM::loadModel(const QString &modelName)
delete m_modelInfo.model;
m_modelInfo.model = nullptr;
emit isModelLoadedChanged();
} else {
} else if (!m_isServer) {
// This is a blocking call that tries to retrieve the model we need from the model store.
// If it succeeds, then we just have to restore state. If the store has never had a model
// returned to it, then the modelInfo.model pointer should be null which will happen on startup
@ -163,7 +164,9 @@ bool ChatLLM::loadModel(const QString &modelName)
// store, that our state was changed to not be loaded. If this is the case, release the model
// back into the store and quit loading
if (!m_shouldBeLoaded) {
#if defined(DEBUG_MODEL_LOADING)
qDebug() << "no longer need model" << m_chat->id() << m_modelInfo.model;
#endif
LLModelStore::globalInstance()->releaseModel(m_modelInfo);
m_modelInfo = LLModelInfo();
emit isModelLoadedChanged();
@ -232,7 +235,8 @@ bool ChatLLM::loadModel(const QString &modelName)
} else
emit sendModelLoaded();
} else {
LLModelStore::globalInstance()->releaseModel(m_modelInfo); // release back into the store
if (!m_isServer)
LLModelStore::globalInstance()->releaseModel(m_modelInfo); // release back into the store
const QString error = QString("Could not find model %1").arg(modelName);
emit modelLoadingError(error);
}
@ -436,7 +440,7 @@ void ChatLLM::forceUnloadModel()
void ChatLLM::unloadModel()
{
if (!isModelLoaded())
if (!isModelLoaded() || m_isServer)
return;
saveState();
@ -450,7 +454,7 @@ void ChatLLM::unloadModel()
void ChatLLM::reloadModel()
{
if (isModelLoaded())
if (isModelLoaded() || m_isServer)
return;
#if defined(DEBUG_MODEL_LOADING)

View File

@ -31,7 +31,7 @@ class ChatLLM : public QObject
Q_PROPERTY(QString generatedName READ generatedName NOTIFY generatedNameChanged)
public:
ChatLLM(Chat *parent);
ChatLLM(Chat *parent, bool isServer = false);
virtual ~ChatLLM();
bool isModelLoaded() const;
@ -87,12 +87,7 @@ Q_SIGNALS:
void shouldBeLoadedChanged();
protected:
LLModel::PromptContext m_ctx;
quint32 m_promptTokens;
quint32 m_promptResponseTokens;
void resetContextProtected();
private:
bool handlePrompt(int32_t token);
bool handleResponse(int32_t token, const std::string &response);
bool handleRecalculate(bool isRecalc);
@ -102,7 +97,10 @@ private:
void saveState();
void restoreState();
private:
protected:
LLModel::PromptContext m_ctx;
quint32 m_promptTokens;
quint32 m_promptResponseTokens;
LLModelInfo m_modelInfo;
LLModelType m_modelType;
std::string m_response;
@ -115,6 +113,7 @@ private:
std::atomic<bool> m_stopGenerating;
std::atomic<bool> m_shouldBeLoaded;
bool m_isRecalc;
bool m_isServer;
};
#endif // CHATLLM_H

View File

@ -69,6 +69,7 @@ Drawer {
anchors.bottom: checkForUpdatesButton.top
anchors.bottomMargin: 10
ScrollBar.vertical.policy: ScrollBar.AlwaysOn
clip: true
ListView {
id: conversationList

View File

@ -820,7 +820,7 @@ Dialog {
settings.sync()
}
ToolTip.text: qsTr("WARNING: This enables the gui to act as a local web server for AI API requests")
ToolTip.text: qsTr("WARNING: This enables the gui to act as a local web server for AI API requests and will increase your RAM usage as well")
ToolTip.visible: hovered
background: Rectangle {

View File

@ -52,7 +52,7 @@ static inline QJsonObject modelToJson(const ModelInfo &info)
}
Server::Server(Chat *chat)
: ChatLLM(chat)
: ChatLLM(chat, true /*isServer*/)
, m_chat(chat)
, m_server(nullptr)
{
@ -170,13 +170,15 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re
}
}
setShouldBeLoaded(true);
if (!foundModel) {
if (!loadDefaultModel()) {
std::cerr << "ERROR: couldn't load default model" << model.toStdString() << std::endl;
std::cerr << "ERROR: couldn't load default model " << model.toStdString() << std::endl;
return QHttpServerResponse(QHttpServerResponder::StatusCode::BadRequest);
}
} else if (!loadModel(model)) {
std::cerr << "ERROR: couldn't load model" << model.toStdString() << std::endl;
std::cerr << "ERROR: couldn't load model " << model.toStdString() << std::endl;
return QHttpServerResponse(QHttpServerResponder::StatusCode::InternalServerError);
}
@ -308,7 +310,7 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re
repeat_last_n,
LLM::globalInstance()->threadCount())) {
std::cerr << "ERROR: couldn't prompt model" << model.toStdString() << std::endl;
std::cerr << "ERROR: couldn't prompt model " << model.toStdString() << std::endl;
return QHttpServerResponse(QHttpServerResponder::StatusCode::InternalServerError);
}
QString echoedPrompt = actualPrompt;