gpt4all/gpt4all-chat/embllm.cpp
Jared Van Bortel 061d1969f8
expose n_gpu_layers parameter of llama.cpp (#1890)
Also dynamically limit the GPU layers and context length fields to the maximum supported by the model.

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2024-01-31 14:17:44 -05:00

65 lines
1.6 KiB
C++

#include "embllm.h"
#include "modellist.h"
EmbeddingLLM::EmbeddingLLM()
: QObject{nullptr}
, m_model{nullptr}
{
}
EmbeddingLLM::~EmbeddingLLM()
{
delete m_model;
m_model = nullptr;
}
bool EmbeddingLLM::loadModel()
{
const EmbeddingModels *embeddingModels = ModelList::globalInstance()->embeddingModels();
if (!embeddingModels->count())
return false;
const ModelInfo defaultModel = embeddingModels->defaultModelInfo();
QString filePath = defaultModel.dirpath + defaultModel.filename();
QFileInfo fileInfo(filePath);
if (!fileInfo.exists()) {
qWarning() << "WARNING: Could not load sbert because file does not exist";
m_model = nullptr;
return false;
}
m_model = LLModel::Implementation::construct(filePath.toStdString());
bool success = m_model->loadModel(filePath.toStdString(), 2048, 0);
if (!success) {
qWarning() << "WARNING: Could not load sbert";
delete m_model;
m_model = nullptr;
return false;
}
if (m_model->implementation().modelType() != "Bert") {
qWarning() << "WARNING: Model type is not sbert";
delete m_model;
m_model = nullptr;
return false;
}
return true;
}
bool EmbeddingLLM::hasModel() const
{
return m_model;
}
std::vector<float> EmbeddingLLM::generateEmbeddings(const QString &text)
{
if (!hasModel() && !loadModel()) {
qWarning() << "WARNING: Could not load sbert model for embeddings";
return std::vector<float>();
}
Q_ASSERT(hasModel());
return m_model->embedding(text.toStdString());
}