Use F16 for kv cache on mpt.

2024-11-02 09:40:42 +00:00 · 2023-05-08 12:07:34 -04:00 · 2023-05-08 12:07:34 -04:00 · 368886015d
commit 368886015d
parent 00804c4e3e
1 changed files with 1 additions and 1 deletions
--- a/llmodel/mpt.cpp
+++ b/llmodel/mpt.cpp
@ -347,7 +347,7 @@ bool mpt_model_load(const std::string &fname, std::istream &fin, mpt_model & mod
        const int n_mem      = n_layer*n_ctx;
        const int n_elements = n_embd*n_mem;

-        if (!kv_cache_init(hparams, model.kv_self, GGML_TYPE_F32, model.hparams.n_ctx)) {
+        if (!kv_cache_init(hparams, model.kv_self, GGML_TYPE_F16, model.hparams.n_ctx)) {
            fprintf(stderr, "%s: kv_cache_init() failed for self-attention cache\n", __func__);
            ggml_free(ctx);
            return false;