mirror of
https://github.com/nomic-ai/gpt4all
synced 2024-11-08 07:10:32 +00:00
server: do not process the system prompt twice for new models (#2924)
Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
parent
2f02cd407f
commit
813ccaf5d1
@ -8,9 +8,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
|||||||
|
|
||||||
### Added
|
### Added
|
||||||
- Use greedy sampling when temperature is set to zero ([#2854](https://github.com/nomic-ai/gpt4all/pull/2854))
|
- Use greedy sampling when temperature is set to zero ([#2854](https://github.com/nomic-ai/gpt4all/pull/2854))
|
||||||
- Use configured system prompt in server mode and ignore system messages ([#2921](https://github.com/nomic-ai/gpt4all/pull/2921))
|
- Use configured system prompt in server mode and ignore system messages ([#2921](https://github.com/nomic-ai/gpt4all/pull/2921), [#2924](https://github.com/nomic-ai/gpt4all/pull/2924))
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
|
- The offline update button now directs users to the offline installer releases page. (by [@3Simplex](https://github.com/3Simplex) in [#2888](https://github.com/nomic-ai/gpt4all/pull/2888))
|
||||||
|
- Change the website link on the home page to point to the new URL ([#2915](https://github.com/nomic-ai/gpt4all/pull/2915))
|
||||||
- Smaller default window size, dynamic minimum size, and scaling tweaks ([#2904](https://github.com/nomic-ai/gpt4all/pull/2904))
|
- Smaller default window size, dynamic minimum size, and scaling tweaks ([#2904](https://github.com/nomic-ai/gpt4all/pull/2904))
|
||||||
- Only allow a single instance of program to be run at a time ([#2923](https://github.com/nomic-ai/gpt4all/pull/2923]))
|
- Only allow a single instance of program to be run at a time ([#2923](https://github.com/nomic-ai/gpt4all/pull/2923]))
|
||||||
|
|
||||||
@ -24,10 +26,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
|||||||
- Fixed typo in several files. (by [@3Simplex](https://github.com/3Simplex) in [#2916](https://github.com/nomic-ai/gpt4all/pull/2916))
|
- Fixed typo in several files. (by [@3Simplex](https://github.com/3Simplex) in [#2916](https://github.com/nomic-ai/gpt4all/pull/2916))
|
||||||
- Fix the antenna icon tooltip when using the local server ([#2922](https://github.com/nomic-ai/gpt4all/pull/2922))
|
- Fix the antenna icon tooltip when using the local server ([#2922](https://github.com/nomic-ai/gpt4all/pull/2922))
|
||||||
|
|
||||||
### Changed
|
|
||||||
- The offline update button now directs users to the offline installer releases page. (by [@3Simplex](https://github.com/3Simplex) in [#2888](https://github.com/nomic-ai/gpt4all/pull/2888))
|
|
||||||
- Change the website link on the home page to point to the new URL ([#2915](https://github.com/nomic-ai/gpt4all/pull/2915))
|
|
||||||
|
|
||||||
## [3.2.1] - 2024-08-13
|
## [3.2.1] - 2024-08-13
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
|
@ -74,7 +74,6 @@ void Chat::connectLLM()
|
|||||||
connect(this, &Chat::promptRequested, m_llmodel, &ChatLLM::prompt, Qt::QueuedConnection);
|
connect(this, &Chat::promptRequested, m_llmodel, &ChatLLM::prompt, Qt::QueuedConnection);
|
||||||
connect(this, &Chat::modelChangeRequested, m_llmodel, &ChatLLM::modelChangeRequested, Qt::QueuedConnection);
|
connect(this, &Chat::modelChangeRequested, m_llmodel, &ChatLLM::modelChangeRequested, Qt::QueuedConnection);
|
||||||
connect(this, &Chat::loadDefaultModelRequested, m_llmodel, &ChatLLM::loadDefaultModel, Qt::QueuedConnection);
|
connect(this, &Chat::loadDefaultModelRequested, m_llmodel, &ChatLLM::loadDefaultModel, Qt::QueuedConnection);
|
||||||
connect(this, &Chat::loadModelRequested, m_llmodel, &ChatLLM::loadModel, Qt::QueuedConnection);
|
|
||||||
connect(this, &Chat::generateNameRequested, m_llmodel, &ChatLLM::generateName, Qt::QueuedConnection);
|
connect(this, &Chat::generateNameRequested, m_llmodel, &ChatLLM::generateName, Qt::QueuedConnection);
|
||||||
connect(this, &Chat::regenerateResponseRequested, m_llmodel, &ChatLLM::regenerateResponse, Qt::QueuedConnection);
|
connect(this, &Chat::regenerateResponseRequested, m_llmodel, &ChatLLM::regenerateResponse, Qt::QueuedConnection);
|
||||||
connect(this, &Chat::resetResponseRequested, m_llmodel, &ChatLLM::resetResponse, Qt::QueuedConnection);
|
connect(this, &Chat::resetResponseRequested, m_llmodel, &ChatLLM::resetResponse, Qt::QueuedConnection);
|
||||||
|
@ -146,7 +146,6 @@ Q_SIGNALS:
|
|||||||
void modelInfoChanged();
|
void modelInfoChanged();
|
||||||
void restoringFromTextChanged();
|
void restoringFromTextChanged();
|
||||||
void loadDefaultModelRequested();
|
void loadDefaultModelRequested();
|
||||||
void loadModelRequested(const ModelInfo &modelInfo);
|
|
||||||
void generateNameRequested();
|
void generateNameRequested();
|
||||||
void modelLoadingErrorChanged();
|
void modelLoadingErrorChanged();
|
||||||
void isServerChanged();
|
void isServerChanged();
|
||||||
|
@ -249,9 +249,11 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
|||||||
// and what the type and name of that model is. I've tried to comment extensively in this method
|
// and what the type and name of that model is. I've tried to comment extensively in this method
|
||||||
// to provide an overview of what we're doing here.
|
// to provide an overview of what we're doing here.
|
||||||
|
|
||||||
// We're already loaded with this model
|
if (isModelLoaded() && this->modelInfo() == modelInfo) {
|
||||||
if (isModelLoaded() && this->modelInfo() == modelInfo)
|
// already acquired -> keep it and reset
|
||||||
return true;
|
resetContext();
|
||||||
|
return true; // already loaded
|
||||||
|
}
|
||||||
|
|
||||||
// reset status
|
// reset status
|
||||||
emit modelLoadingPercentageChanged(std::numeric_limits<float>::min()); // small non-zero positive value
|
emit modelLoadingPercentageChanged(std::numeric_limits<float>::min()); // small non-zero positive value
|
||||||
@ -659,20 +661,25 @@ void ChatLLM::setModelInfo(const ModelInfo &modelInfo)
|
|||||||
emit modelInfoChanged(modelInfo);
|
emit modelInfoChanged(modelInfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ChatLLM::acquireModel() {
|
void ChatLLM::acquireModel()
|
||||||
|
{
|
||||||
m_llModelInfo = LLModelStore::globalInstance()->acquireModel();
|
m_llModelInfo = LLModelStore::globalInstance()->acquireModel();
|
||||||
emit loadedModelInfoChanged();
|
emit loadedModelInfoChanged();
|
||||||
}
|
}
|
||||||
|
|
||||||
void ChatLLM::resetModel() {
|
void ChatLLM::resetModel()
|
||||||
|
{
|
||||||
m_llModelInfo = {};
|
m_llModelInfo = {};
|
||||||
emit loadedModelInfoChanged();
|
emit loadedModelInfoChanged();
|
||||||
}
|
}
|
||||||
|
|
||||||
void ChatLLM::modelChangeRequested(const ModelInfo &modelInfo)
|
void ChatLLM::modelChangeRequested(const ModelInfo &modelInfo)
|
||||||
{
|
{
|
||||||
|
// ignore attempts to switch to the same model twice
|
||||||
|
if (!isModelLoaded() || this->modelInfo() != modelInfo) {
|
||||||
m_shouldBeLoaded = true;
|
m_shouldBeLoaded = true;
|
||||||
loadModel(modelInfo);
|
loadModel(modelInfo);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ChatLLM::handlePrompt(int32_t token)
|
bool ChatLLM::handlePrompt(int32_t token)
|
||||||
|
@ -361,14 +361,14 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re
|
|||||||
if (modelInfo.filename().isEmpty()) {
|
if (modelInfo.filename().isEmpty()) {
|
||||||
std::cerr << "ERROR: couldn't load default model " << modelRequested.toStdString() << std::endl;
|
std::cerr << "ERROR: couldn't load default model " << modelRequested.toStdString() << std::endl;
|
||||||
return QHttpServerResponse(QHttpServerResponder::StatusCode::BadRequest);
|
return QHttpServerResponse(QHttpServerResponder::StatusCode::BadRequest);
|
||||||
} else if (!loadModel(modelInfo)) {
|
}
|
||||||
|
|
||||||
|
// NB: this resets the context, regardless of whether this model is already loaded
|
||||||
|
if (!loadModel(modelInfo)) {
|
||||||
std::cerr << "ERROR: couldn't load model " << modelInfo.name().toStdString() << std::endl;
|
std::cerr << "ERROR: couldn't load model " << modelInfo.name().toStdString() << std::endl;
|
||||||
return QHttpServerResponse(QHttpServerResponder::StatusCode::InternalServerError);
|
return QHttpServerResponse(QHttpServerResponder::StatusCode::InternalServerError);
|
||||||
}
|
}
|
||||||
|
|
||||||
// don't remember any context
|
|
||||||
resetContext();
|
|
||||||
|
|
||||||
const QString promptTemplate = modelInfo.promptTemplate();
|
const QString promptTemplate = modelInfo.promptTemplate();
|
||||||
const float top_k = modelInfo.topK();
|
const float top_k = modelInfo.topK();
|
||||||
const int n_batch = modelInfo.promptBatchSize();
|
const int n_batch = modelInfo.promptBatchSize();
|
||||||
|
Loading…
Reference in New Issue
Block a user