mirror of
https://github.com/nomic-ai/gpt4all
synced 2024-11-18 03:25:46 +00:00
The server has different lifetime mgmt than the other chats.
This commit is contained in:
parent
ddc24acf33
commit
b71c0ac3bd
@ -171,9 +171,8 @@ public:
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_currentChat)
|
if (m_currentChat && m_currentChat != m_serverChat)
|
||||||
m_currentChat->unloadModel();
|
m_currentChat->unloadModel();
|
||||||
|
|
||||||
m_currentChat = chat;
|
m_currentChat = chat;
|
||||||
if (!m_currentChat->isModelLoaded() && m_currentChat != m_serverChat)
|
if (!m_currentChat->isModelLoaded() && m_currentChat != m_serverChat)
|
||||||
m_currentChat->reloadModel();
|
m_currentChat->reloadModel();
|
||||||
|
@ -81,13 +81,14 @@ void LLModelStore::releaseModel(const LLModelInfo &info)
|
|||||||
m_condition.wakeAll();
|
m_condition.wakeAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
ChatLLM::ChatLLM(Chat *parent)
|
ChatLLM::ChatLLM(Chat *parent, bool isServer)
|
||||||
: QObject{nullptr}
|
: QObject{nullptr}
|
||||||
, m_promptResponseTokens(0)
|
, m_promptResponseTokens(0)
|
||||||
, m_promptTokens(0)
|
, m_promptTokens(0)
|
||||||
, m_responseLogits(0)
|
, m_responseLogits(0)
|
||||||
, m_isRecalc(false)
|
, m_isRecalc(false)
|
||||||
, m_chat(parent)
|
, m_chat(parent)
|
||||||
|
, m_isServer(isServer)
|
||||||
{
|
{
|
||||||
moveToThread(&m_llmThread);
|
moveToThread(&m_llmThread);
|
||||||
connect(this, &ChatLLM::sendStartup, Network::globalInstance(), &Network::sendStartup);
|
connect(this, &ChatLLM::sendStartup, Network::globalInstance(), &Network::sendStartup);
|
||||||
@ -151,7 +152,7 @@ bool ChatLLM::loadModel(const QString &modelName)
|
|||||||
delete m_modelInfo.model;
|
delete m_modelInfo.model;
|
||||||
m_modelInfo.model = nullptr;
|
m_modelInfo.model = nullptr;
|
||||||
emit isModelLoadedChanged();
|
emit isModelLoadedChanged();
|
||||||
} else {
|
} else if (!m_isServer) {
|
||||||
// This is a blocking call that tries to retrieve the model we need from the model store.
|
// This is a blocking call that tries to retrieve the model we need from the model store.
|
||||||
// If it succeeds, then we just have to restore state. If the store has never had a model
|
// If it succeeds, then we just have to restore state. If the store has never had a model
|
||||||
// returned to it, then the modelInfo.model pointer should be null which will happen on startup
|
// returned to it, then the modelInfo.model pointer should be null which will happen on startup
|
||||||
@ -163,7 +164,9 @@ bool ChatLLM::loadModel(const QString &modelName)
|
|||||||
// store, that our state was changed to not be loaded. If this is the case, release the model
|
// store, that our state was changed to not be loaded. If this is the case, release the model
|
||||||
// back into the store and quit loading
|
// back into the store and quit loading
|
||||||
if (!m_shouldBeLoaded) {
|
if (!m_shouldBeLoaded) {
|
||||||
|
#if defined(DEBUG_MODEL_LOADING)
|
||||||
qDebug() << "no longer need model" << m_chat->id() << m_modelInfo.model;
|
qDebug() << "no longer need model" << m_chat->id() << m_modelInfo.model;
|
||||||
|
#endif
|
||||||
LLModelStore::globalInstance()->releaseModel(m_modelInfo);
|
LLModelStore::globalInstance()->releaseModel(m_modelInfo);
|
||||||
m_modelInfo = LLModelInfo();
|
m_modelInfo = LLModelInfo();
|
||||||
emit isModelLoadedChanged();
|
emit isModelLoadedChanged();
|
||||||
@ -232,6 +235,7 @@ bool ChatLLM::loadModel(const QString &modelName)
|
|||||||
} else
|
} else
|
||||||
emit sendModelLoaded();
|
emit sendModelLoaded();
|
||||||
} else {
|
} else {
|
||||||
|
if (!m_isServer)
|
||||||
LLModelStore::globalInstance()->releaseModel(m_modelInfo); // release back into the store
|
LLModelStore::globalInstance()->releaseModel(m_modelInfo); // release back into the store
|
||||||
const QString error = QString("Could not find model %1").arg(modelName);
|
const QString error = QString("Could not find model %1").arg(modelName);
|
||||||
emit modelLoadingError(error);
|
emit modelLoadingError(error);
|
||||||
@ -436,7 +440,7 @@ void ChatLLM::forceUnloadModel()
|
|||||||
|
|
||||||
void ChatLLM::unloadModel()
|
void ChatLLM::unloadModel()
|
||||||
{
|
{
|
||||||
if (!isModelLoaded())
|
if (!isModelLoaded() || m_isServer)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
saveState();
|
saveState();
|
||||||
@ -450,7 +454,7 @@ void ChatLLM::unloadModel()
|
|||||||
|
|
||||||
void ChatLLM::reloadModel()
|
void ChatLLM::reloadModel()
|
||||||
{
|
{
|
||||||
if (isModelLoaded())
|
if (isModelLoaded() || m_isServer)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
#if defined(DEBUG_MODEL_LOADING)
|
#if defined(DEBUG_MODEL_LOADING)
|
||||||
|
@ -31,7 +31,7 @@ class ChatLLM : public QObject
|
|||||||
Q_PROPERTY(QString generatedName READ generatedName NOTIFY generatedNameChanged)
|
Q_PROPERTY(QString generatedName READ generatedName NOTIFY generatedNameChanged)
|
||||||
|
|
||||||
public:
|
public:
|
||||||
ChatLLM(Chat *parent);
|
ChatLLM(Chat *parent, bool isServer = false);
|
||||||
virtual ~ChatLLM();
|
virtual ~ChatLLM();
|
||||||
|
|
||||||
bool isModelLoaded() const;
|
bool isModelLoaded() const;
|
||||||
@ -87,12 +87,7 @@ Q_SIGNALS:
|
|||||||
void shouldBeLoadedChanged();
|
void shouldBeLoadedChanged();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
LLModel::PromptContext m_ctx;
|
|
||||||
quint32 m_promptTokens;
|
|
||||||
quint32 m_promptResponseTokens;
|
|
||||||
void resetContextProtected();
|
void resetContextProtected();
|
||||||
|
|
||||||
private:
|
|
||||||
bool handlePrompt(int32_t token);
|
bool handlePrompt(int32_t token);
|
||||||
bool handleResponse(int32_t token, const std::string &response);
|
bool handleResponse(int32_t token, const std::string &response);
|
||||||
bool handleRecalculate(bool isRecalc);
|
bool handleRecalculate(bool isRecalc);
|
||||||
@ -102,7 +97,10 @@ private:
|
|||||||
void saveState();
|
void saveState();
|
||||||
void restoreState();
|
void restoreState();
|
||||||
|
|
||||||
private:
|
protected:
|
||||||
|
LLModel::PromptContext m_ctx;
|
||||||
|
quint32 m_promptTokens;
|
||||||
|
quint32 m_promptResponseTokens;
|
||||||
LLModelInfo m_modelInfo;
|
LLModelInfo m_modelInfo;
|
||||||
LLModelType m_modelType;
|
LLModelType m_modelType;
|
||||||
std::string m_response;
|
std::string m_response;
|
||||||
@ -115,6 +113,7 @@ private:
|
|||||||
std::atomic<bool> m_stopGenerating;
|
std::atomic<bool> m_stopGenerating;
|
||||||
std::atomic<bool> m_shouldBeLoaded;
|
std::atomic<bool> m_shouldBeLoaded;
|
||||||
bool m_isRecalc;
|
bool m_isRecalc;
|
||||||
|
bool m_isServer;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // CHATLLM_H
|
#endif // CHATLLM_H
|
||||||
|
@ -69,6 +69,7 @@ Drawer {
|
|||||||
anchors.bottom: checkForUpdatesButton.top
|
anchors.bottom: checkForUpdatesButton.top
|
||||||
anchors.bottomMargin: 10
|
anchors.bottomMargin: 10
|
||||||
ScrollBar.vertical.policy: ScrollBar.AlwaysOn
|
ScrollBar.vertical.policy: ScrollBar.AlwaysOn
|
||||||
|
clip: true
|
||||||
|
|
||||||
ListView {
|
ListView {
|
||||||
id: conversationList
|
id: conversationList
|
||||||
|
@ -820,7 +820,7 @@ Dialog {
|
|||||||
settings.sync()
|
settings.sync()
|
||||||
}
|
}
|
||||||
|
|
||||||
ToolTip.text: qsTr("WARNING: This enables the gui to act as a local web server for AI API requests")
|
ToolTip.text: qsTr("WARNING: This enables the gui to act as a local web server for AI API requests and will increase your RAM usage as well")
|
||||||
ToolTip.visible: hovered
|
ToolTip.visible: hovered
|
||||||
|
|
||||||
background: Rectangle {
|
background: Rectangle {
|
||||||
|
@ -52,7 +52,7 @@ static inline QJsonObject modelToJson(const ModelInfo &info)
|
|||||||
}
|
}
|
||||||
|
|
||||||
Server::Server(Chat *chat)
|
Server::Server(Chat *chat)
|
||||||
: ChatLLM(chat)
|
: ChatLLM(chat, true /*isServer*/)
|
||||||
, m_chat(chat)
|
, m_chat(chat)
|
||||||
, m_server(nullptr)
|
, m_server(nullptr)
|
||||||
{
|
{
|
||||||
@ -170,13 +170,15 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
setShouldBeLoaded(true);
|
||||||
|
|
||||||
if (!foundModel) {
|
if (!foundModel) {
|
||||||
if (!loadDefaultModel()) {
|
if (!loadDefaultModel()) {
|
||||||
std::cerr << "ERROR: couldn't load default model" << model.toStdString() << std::endl;
|
std::cerr << "ERROR: couldn't load default model " << model.toStdString() << std::endl;
|
||||||
return QHttpServerResponse(QHttpServerResponder::StatusCode::BadRequest);
|
return QHttpServerResponse(QHttpServerResponder::StatusCode::BadRequest);
|
||||||
}
|
}
|
||||||
} else if (!loadModel(model)) {
|
} else if (!loadModel(model)) {
|
||||||
std::cerr << "ERROR: couldn't load model" << model.toStdString() << std::endl;
|
std::cerr << "ERROR: couldn't load model " << model.toStdString() << std::endl;
|
||||||
return QHttpServerResponse(QHttpServerResponder::StatusCode::InternalServerError);
|
return QHttpServerResponse(QHttpServerResponder::StatusCode::InternalServerError);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -308,7 +310,7 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re
|
|||||||
repeat_last_n,
|
repeat_last_n,
|
||||||
LLM::globalInstance()->threadCount())) {
|
LLM::globalInstance()->threadCount())) {
|
||||||
|
|
||||||
std::cerr << "ERROR: couldn't prompt model" << model.toStdString() << std::endl;
|
std::cerr << "ERROR: couldn't prompt model " << model.toStdString() << std::endl;
|
||||||
return QHttpServerResponse(QHttpServerResponder::StatusCode::InternalServerError);
|
return QHttpServerResponse(QHttpServerResponder::StatusCode::InternalServerError);
|
||||||
}
|
}
|
||||||
QString echoedPrompt = actualPrompt;
|
QString echoedPrompt = actualPrompt;
|
||||||
|
Loading…
Reference in New Issue
Block a user