Use F16 for kv cache on mpt.

pull/520/head
Adam Treat 1 year ago
parent dc559c1575
commit eb77d5157b

@ -347,7 +347,7 @@ bool mpt_model_load(const std::string &fname, std::istream &fin, mpt_model & mod
const int n_mem = n_layer*n_ctx;
const int n_elements = n_embd*n_mem;
if (!kv_cache_init(hparams, model.kv_self, GGML_TYPE_F32, model.hparams.n_ctx)) {
if (!kv_cache_init(hparams, model.kv_self, GGML_TYPE_F16, model.hparams.n_ctx)) {
fprintf(stderr, "%s: kv_cache_init() failed for self-attention cache\n", __func__);
ggml_free(ctx);
return false;

Loading…
Cancel
Save