mirror of
https://github.com/nomic-ai/gpt4all
synced 2024-11-02 09:40:42 +00:00
The server has different lifetime mgmt than the other chats.
This commit is contained in:
parent
ddc24acf33
commit
b71c0ac3bd
@ -171,9 +171,8 @@ public:
|
||||
return;
|
||||
}
|
||||
|
||||
if (m_currentChat)
|
||||
if (m_currentChat && m_currentChat != m_serverChat)
|
||||
m_currentChat->unloadModel();
|
||||
|
||||
m_currentChat = chat;
|
||||
if (!m_currentChat->isModelLoaded() && m_currentChat != m_serverChat)
|
||||
m_currentChat->reloadModel();
|
||||
|
@ -81,13 +81,14 @@ void LLModelStore::releaseModel(const LLModelInfo &info)
|
||||
m_condition.wakeAll();
|
||||
}
|
||||
|
||||
ChatLLM::ChatLLM(Chat *parent)
|
||||
ChatLLM::ChatLLM(Chat *parent, bool isServer)
|
||||
: QObject{nullptr}
|
||||
, m_promptResponseTokens(0)
|
||||
, m_promptTokens(0)
|
||||
, m_responseLogits(0)
|
||||
, m_isRecalc(false)
|
||||
, m_chat(parent)
|
||||
, m_isServer(isServer)
|
||||
{
|
||||
moveToThread(&m_llmThread);
|
||||
connect(this, &ChatLLM::sendStartup, Network::globalInstance(), &Network::sendStartup);
|
||||
@ -151,7 +152,7 @@ bool ChatLLM::loadModel(const QString &modelName)
|
||||
delete m_modelInfo.model;
|
||||
m_modelInfo.model = nullptr;
|
||||
emit isModelLoadedChanged();
|
||||
} else {
|
||||
} else if (!m_isServer) {
|
||||
// This is a blocking call that tries to retrieve the model we need from the model store.
|
||||
// If it succeeds, then we just have to restore state. If the store has never had a model
|
||||
// returned to it, then the modelInfo.model pointer should be null which will happen on startup
|
||||
@ -163,7 +164,9 @@ bool ChatLLM::loadModel(const QString &modelName)
|
||||
// store, that our state was changed to not be loaded. If this is the case, release the model
|
||||
// back into the store and quit loading
|
||||
if (!m_shouldBeLoaded) {
|
||||
#if defined(DEBUG_MODEL_LOADING)
|
||||
qDebug() << "no longer need model" << m_chat->id() << m_modelInfo.model;
|
||||
#endif
|
||||
LLModelStore::globalInstance()->releaseModel(m_modelInfo);
|
||||
m_modelInfo = LLModelInfo();
|
||||
emit isModelLoadedChanged();
|
||||
@ -232,7 +235,8 @@ bool ChatLLM::loadModel(const QString &modelName)
|
||||
} else
|
||||
emit sendModelLoaded();
|
||||
} else {
|
||||
LLModelStore::globalInstance()->releaseModel(m_modelInfo); // release back into the store
|
||||
if (!m_isServer)
|
||||
LLModelStore::globalInstance()->releaseModel(m_modelInfo); // release back into the store
|
||||
const QString error = QString("Could not find model %1").arg(modelName);
|
||||
emit modelLoadingError(error);
|
||||
}
|
||||
@ -436,7 +440,7 @@ void ChatLLM::forceUnloadModel()
|
||||
|
||||
void ChatLLM::unloadModel()
|
||||
{
|
||||
if (!isModelLoaded())
|
||||
if (!isModelLoaded() || m_isServer)
|
||||
return;
|
||||
|
||||
saveState();
|
||||
@ -450,7 +454,7 @@ void ChatLLM::unloadModel()
|
||||
|
||||
void ChatLLM::reloadModel()
|
||||
{
|
||||
if (isModelLoaded())
|
||||
if (isModelLoaded() || m_isServer)
|
||||
return;
|
||||
|
||||
#if defined(DEBUG_MODEL_LOADING)
|
||||
|
@ -31,7 +31,7 @@ class ChatLLM : public QObject
|
||||
Q_PROPERTY(QString generatedName READ generatedName NOTIFY generatedNameChanged)
|
||||
|
||||
public:
|
||||
ChatLLM(Chat *parent);
|
||||
ChatLLM(Chat *parent, bool isServer = false);
|
||||
virtual ~ChatLLM();
|
||||
|
||||
bool isModelLoaded() const;
|
||||
@ -87,12 +87,7 @@ Q_SIGNALS:
|
||||
void shouldBeLoadedChanged();
|
||||
|
||||
protected:
|
||||
LLModel::PromptContext m_ctx;
|
||||
quint32 m_promptTokens;
|
||||
quint32 m_promptResponseTokens;
|
||||
void resetContextProtected();
|
||||
|
||||
private:
|
||||
bool handlePrompt(int32_t token);
|
||||
bool handleResponse(int32_t token, const std::string &response);
|
||||
bool handleRecalculate(bool isRecalc);
|
||||
@ -102,7 +97,10 @@ private:
|
||||
void saveState();
|
||||
void restoreState();
|
||||
|
||||
private:
|
||||
protected:
|
||||
LLModel::PromptContext m_ctx;
|
||||
quint32 m_promptTokens;
|
||||
quint32 m_promptResponseTokens;
|
||||
LLModelInfo m_modelInfo;
|
||||
LLModelType m_modelType;
|
||||
std::string m_response;
|
||||
@ -115,6 +113,7 @@ private:
|
||||
std::atomic<bool> m_stopGenerating;
|
||||
std::atomic<bool> m_shouldBeLoaded;
|
||||
bool m_isRecalc;
|
||||
bool m_isServer;
|
||||
};
|
||||
|
||||
#endif // CHATLLM_H
|
||||
|
@ -69,6 +69,7 @@ Drawer {
|
||||
anchors.bottom: checkForUpdatesButton.top
|
||||
anchors.bottomMargin: 10
|
||||
ScrollBar.vertical.policy: ScrollBar.AlwaysOn
|
||||
clip: true
|
||||
|
||||
ListView {
|
||||
id: conversationList
|
||||
|
@ -820,7 +820,7 @@ Dialog {
|
||||
settings.sync()
|
||||
}
|
||||
|
||||
ToolTip.text: qsTr("WARNING: This enables the gui to act as a local web server for AI API requests")
|
||||
ToolTip.text: qsTr("WARNING: This enables the gui to act as a local web server for AI API requests and will increase your RAM usage as well")
|
||||
ToolTip.visible: hovered
|
||||
|
||||
background: Rectangle {
|
||||
|
@ -52,7 +52,7 @@ static inline QJsonObject modelToJson(const ModelInfo &info)
|
||||
}
|
||||
|
||||
Server::Server(Chat *chat)
|
||||
: ChatLLM(chat)
|
||||
: ChatLLM(chat, true /*isServer*/)
|
||||
, m_chat(chat)
|
||||
, m_server(nullptr)
|
||||
{
|
||||
@ -170,13 +170,15 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re
|
||||
}
|
||||
}
|
||||
|
||||
setShouldBeLoaded(true);
|
||||
|
||||
if (!foundModel) {
|
||||
if (!loadDefaultModel()) {
|
||||
std::cerr << "ERROR: couldn't load default model" << model.toStdString() << std::endl;
|
||||
std::cerr << "ERROR: couldn't load default model " << model.toStdString() << std::endl;
|
||||
return QHttpServerResponse(QHttpServerResponder::StatusCode::BadRequest);
|
||||
}
|
||||
} else if (!loadModel(model)) {
|
||||
std::cerr << "ERROR: couldn't load model" << model.toStdString() << std::endl;
|
||||
std::cerr << "ERROR: couldn't load model " << model.toStdString() << std::endl;
|
||||
return QHttpServerResponse(QHttpServerResponder::StatusCode::InternalServerError);
|
||||
}
|
||||
|
||||
@ -308,7 +310,7 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re
|
||||
repeat_last_n,
|
||||
LLM::globalInstance()->threadCount())) {
|
||||
|
||||
std::cerr << "ERROR: couldn't prompt model" << model.toStdString() << std::endl;
|
||||
std::cerr << "ERROR: couldn't prompt model " << model.toStdString() << std::endl;
|
||||
return QHttpServerResponse(QHttpServerResponder::StatusCode::InternalServerError);
|
||||
}
|
||||
QString echoedPrompt = actualPrompt;
|
||||
|
Loading…
Reference in New Issue
Block a user