diff --git a/llmodel/mpt.cpp b/llmodel/mpt.cpp index bbd516e2..ffe3ebf0 100644 --- a/llmodel/mpt.cpp +++ b/llmodel/mpt.cpp @@ -347,7 +347,7 @@ bool mpt_model_load(const std::string &fname, std::istream &fin, mpt_model & mod const int n_mem = n_layer*n_ctx; const int n_elements = n_embd*n_mem; - if (!kv_cache_init(hparams, model.kv_self, GGML_TYPE_F32, model.hparams.n_ctx)) { + if (!kv_cache_init(hparams, model.kv_self, GGML_TYPE_F16, model.hparams.n_ctx)) { fprintf(stderr, "%s: kv_cache_init() failed for self-attention cache\n", __func__); ggml_free(ctx); return false;