Use F16 for kv cache on mpt.

1 year ago · eb77d5157b
parent dc559c1575
commit eb77d5157b
1 changed files with 1 additions and 1 deletions
--- a/llmodel/mpt.cpp
+++ b/llmodel/mpt.cpp
@ -347,7 +347,7 @@ bool mpt_model_load(const std::string &fname, std::istream &fin, mpt_model & mod
        const int n_mem      = n_layer*n_ctx;
        const int n_elements = n_embd*n_mem;

-        if (!kv_cache_init(hparams, model.kv_self, GGML_TYPE_F32, model.hparams.n_ctx)) {
+        if (!kv_cache_init(hparams, model.kv_self, GGML_TYPE_F16, model.hparams.n_ctx)) {
            fprintf(stderr, "%s: kv_cache_init() failed for self-attention cache\n", __func__);
            ggml_free(ctx);
            return false;