diff --git a/llmodel/mpt.cpp b/llmodel/mpt.cpp
index bbd516e2..ffe3ebf0 100644
--- a/llmodel/mpt.cpp
+++ b/llmodel/mpt.cpp
@@ -347,7 +347,7 @@ bool mpt_model_load(const std::string &fname, std::istream &fin, mpt_model & mod
         const int n_mem      = n_layer*n_ctx;
         const int n_elements = n_embd*n_mem;
 
-        if (!kv_cache_init(hparams, model.kv_self, GGML_TYPE_F32, model.hparams.n_ctx)) {
+        if (!kv_cache_init(hparams, model.kv_self, GGML_TYPE_F16, model.hparams.n_ctx)) {
             fprintf(stderr, "%s: kv_cache_init() failed for self-attention cache\n", __func__);
             ggml_free(ctx);
             return false;