mirror of
https://github.com/nomic-ai/gpt4all
synced 2024-11-02 09:40:42 +00:00
Use F16 for kv cache on mpt.
This commit is contained in:
parent
dc559c1575
commit
eb77d5157b
@ -347,7 +347,7 @@ bool mpt_model_load(const std::string &fname, std::istream &fin, mpt_model & mod
|
|||||||
const int n_mem = n_layer*n_ctx;
|
const int n_mem = n_layer*n_ctx;
|
||||||
const int n_elements = n_embd*n_mem;
|
const int n_elements = n_embd*n_mem;
|
||||||
|
|
||||||
if (!kv_cache_init(hparams, model.kv_self, GGML_TYPE_F32, model.hparams.n_ctx)) {
|
if (!kv_cache_init(hparams, model.kv_self, GGML_TYPE_F16, model.hparams.n_ctx)) {
|
||||||
fprintf(stderr, "%s: kv_cache_init() failed for self-attention cache\n", __func__);
|
fprintf(stderr, "%s: kv_cache_init() failed for self-attention cache\n", __func__);
|
||||||
ggml_free(ctx);
|
ggml_free(ctx);
|
||||||
return false;
|
return false;
|
||||||
|
Loading…
Reference in New Issue
Block a user