mirror of
https://github.com/nomic-ai/gpt4all
synced 2024-11-18 03:25:46 +00:00
Only show GPU when we're actually using it.
This commit is contained in:
parent
1fa67a585c
commit
3076e0bf26
@ -337,6 +337,16 @@ bool LLamaModel::hasGPUDevice()
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool LLamaModel::usingGPUDevice()
|
||||||
|
{
|
||||||
|
#if defined(GGML_USE_KOMPUTE)
|
||||||
|
return ggml_vk_using_vulkan();
|
||||||
|
#elif defined(GGML_USE_METAL)
|
||||||
|
return true;
|
||||||
|
#endif
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
#define DLL_EXPORT __declspec(dllexport)
|
#define DLL_EXPORT __declspec(dllexport)
|
||||||
#else
|
#else
|
||||||
|
@ -30,6 +30,7 @@ public:
|
|||||||
bool initializeGPUDevice(const GPUDevice &device) override;
|
bool initializeGPUDevice(const GPUDevice &device) override;
|
||||||
bool initializeGPUDevice(int device) override;
|
bool initializeGPUDevice(int device) override;
|
||||||
bool hasGPUDevice() override;
|
bool hasGPUDevice() override;
|
||||||
|
bool usingGPUDevice() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
LLamaPrivate *d_ptr;
|
LLamaPrivate *d_ptr;
|
||||||
|
@ -100,6 +100,7 @@ public:
|
|||||||
virtual bool initializeGPUDevice(const GPUDevice &/*device*/) { return false; }
|
virtual bool initializeGPUDevice(const GPUDevice &/*device*/) { return false; }
|
||||||
virtual bool initializeGPUDevice(int /*device*/) { return false; }
|
virtual bool initializeGPUDevice(int /*device*/) { return false; }
|
||||||
virtual bool hasGPUDevice() { return false; }
|
virtual bool hasGPUDevice() { return false; }
|
||||||
|
virtual bool usingGPUDevice() { return false; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
// These are pure virtual because subclasses need to implement as the default implementation of
|
// These are pure virtual because subclasses need to implement as the default implementation of
|
||||||
|
@ -163,7 +163,7 @@ struct mpt_hparams {
|
|||||||
int32_t n_embd = 0; //max_seq_len
|
int32_t n_embd = 0; //max_seq_len
|
||||||
int32_t n_head = 0; // n_heads
|
int32_t n_head = 0; // n_heads
|
||||||
int32_t n_layer = 0; //n_layers
|
int32_t n_layer = 0; //n_layers
|
||||||
int32_t ftype = 0;
|
int32_t ftype = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct replit_layer {
|
struct replit_layer {
|
||||||
@ -220,7 +220,7 @@ static bool kv_cache_init(
|
|||||||
params.mem_size = cache.buf.size;
|
params.mem_size = cache.buf.size;
|
||||||
params.mem_buffer = cache.buf.addr;
|
params.mem_buffer = cache.buf.addr;
|
||||||
params.no_alloc = false;
|
params.no_alloc = false;
|
||||||
|
|
||||||
cache.ctx = ggml_init(params);
|
cache.ctx = ggml_init(params);
|
||||||
if (!cache.ctx) {
|
if (!cache.ctx) {
|
||||||
fprintf(stderr, "%s: failed to allocate memory for kv cache\n", __func__);
|
fprintf(stderr, "%s: failed to allocate memory for kv cache\n", __func__);
|
||||||
@ -503,7 +503,7 @@ bool replit_model_load(const std::string & fname, std::istream &fin, replit_mode
|
|||||||
}
|
}
|
||||||
|
|
||||||
GGML_CHECK_BUF(ggml_metal_add_buffer(model.ctx_metal, "data", data_ptr, data_size, max_size));
|
GGML_CHECK_BUF(ggml_metal_add_buffer(model.ctx_metal, "data", data_ptr, data_size, max_size));
|
||||||
GGML_CHECK_BUF(ggml_metal_add_buffer(model.ctx_metal, "kv", ggml_get_mem_buffer(model.kv_self.ctx),
|
GGML_CHECK_BUF(ggml_metal_add_buffer(model.ctx_metal, "kv", ggml_get_mem_buffer(model.kv_self.ctx),
|
||||||
ggml_get_mem_size(model.kv_self.ctx), 0));
|
ggml_get_mem_size(model.kv_self.ctx), 0));
|
||||||
GGML_CHECK_BUF(ggml_metal_add_buffer(model.ctx_metal, "eval", model.eval_buf.addr, model.eval_buf.size, 0));
|
GGML_CHECK_BUF(ggml_metal_add_buffer(model.ctx_metal, "eval", model.eval_buf.addr, model.eval_buf.size, 0));
|
||||||
GGML_CHECK_BUF(ggml_metal_add_buffer(model.ctx_metal, "scr0", model.scr0_buf.addr, model.scr0_buf.size, 0));
|
GGML_CHECK_BUF(ggml_metal_add_buffer(model.ctx_metal, "scr0", model.scr0_buf.addr, model.scr0_buf.size, 0));
|
||||||
@ -975,6 +975,14 @@ const std::vector<LLModel::Token> &Replit::endTokens() const
|
|||||||
return fres;
|
return fres;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Replit::usingGPUDevice()
|
||||||
|
{
|
||||||
|
#if defined(GGML_USE_METAL)
|
||||||
|
return true;
|
||||||
|
#endif
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
#define DLL_EXPORT __declspec(dllexport)
|
#define DLL_EXPORT __declspec(dllexport)
|
||||||
#else
|
#else
|
||||||
|
@ -27,6 +27,7 @@ public:
|
|||||||
size_t restoreState(const uint8_t *src) override;
|
size_t restoreState(const uint8_t *src) override;
|
||||||
void setThreadCount(int32_t n_threads) override;
|
void setThreadCount(int32_t n_threads) override;
|
||||||
int32_t threadCount() const override;
|
int32_t threadCount() const override;
|
||||||
|
bool usingGPUDevice() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ReplitPrivate *d_ptr;
|
ReplitPrivate *d_ptr;
|
||||||
|
@ -302,6 +302,11 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
|||||||
m_llModelInfo = LLModelInfo();
|
m_llModelInfo = LLModelInfo();
|
||||||
emit modelLoadingError(QString("Could not load model due to invalid model file for %1").arg(modelInfo.filename()));
|
emit modelLoadingError(QString("Could not load model due to invalid model file for %1").arg(modelInfo.filename()));
|
||||||
} else {
|
} else {
|
||||||
|
// We might have had to fallback to CPU after load if the model is not possible to accelerate
|
||||||
|
// for instance if the quantization method is not supported on Vulkan yet
|
||||||
|
if (actualDevice != "CPU" && !m_llModelInfo.model->usingGPUDevice())
|
||||||
|
emit reportDevice("CPU");
|
||||||
|
|
||||||
switch (m_llModelInfo.model->implementation().modelType()[0]) {
|
switch (m_llModelInfo.model->implementation().modelType()[0]) {
|
||||||
case 'L': m_llModelType = LLModelType::LLAMA_; break;
|
case 'L': m_llModelType = LLModelType::LLAMA_; break;
|
||||||
case 'G': m_llModelType = LLModelType::GPTJ_; break;
|
case 'G': m_llModelType = LLModelType::GPTJ_; break;
|
||||||
|
Loading…
Reference in New Issue
Block a user