mirror of
https://github.com/nomic-ai/gpt4all
synced 2024-11-02 09:40:42 +00:00
Match Helly's impl of kv cache.
This commit is contained in:
parent
eb77d5157b
commit
98aedd2173
@ -435,7 +435,6 @@ bool mpt_model_load(const std::string &fname, std::istream &fin, mpt_model & mod
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// load the model's weights from a file path
|
// load the model's weights from a file path
|
||||||
bool mpt_model_load(const std::string & fname, mpt_model & model, mpt_vocab & vocab) {
|
bool mpt_model_load(const std::string & fname, mpt_model & model, mpt_vocab & vocab) {
|
||||||
|
|
||||||
@ -523,10 +522,14 @@ bool mpt_eval(
|
|||||||
struct ggml_tensor * Kcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, N, cur->nb[1], 1*ggml_element_size(cur)*n_embd));
|
struct ggml_tensor * Kcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, N, cur->nb[1], 1*ggml_element_size(cur)*n_embd));
|
||||||
struct ggml_tensor * Vcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, N, cur->nb[1], 2*ggml_element_size(cur)*n_embd));
|
struct ggml_tensor * Vcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, N, cur->nb[1], 2*ggml_element_size(cur)*n_embd));
|
||||||
|
|
||||||
// store key and value to memory
|
// TODO: qk_ln? (seems to be False in MPT-7B configs)
|
||||||
if (N >= 1) {
|
{
|
||||||
|
Vcur = ggml_transpose(ctx0, Vcur);
|
||||||
|
|
||||||
struct ggml_tensor * k = ggml_view_1d(ctx0, model.kv_self.k, N*n_embd, (ggml_element_size(model.kv_self.k)*n_embd)*(il*n_ctx + n_past));
|
struct ggml_tensor * k = ggml_view_1d(ctx0, model.kv_self.k, N*n_embd, (ggml_element_size(model.kv_self.k)*n_embd)*(il*n_ctx + n_past));
|
||||||
struct ggml_tensor * v = ggml_view_1d(ctx0, model.kv_self.v, N*n_embd, (ggml_element_size(model.kv_self.v)*n_embd)*(il*n_ctx + n_past));
|
struct ggml_tensor * v = ggml_view_2d(ctx0, model.kv_self.v, N, n_embd,
|
||||||
|
( n_ctx)*ggml_element_size(model.kv_self.v),
|
||||||
|
(il*n_ctx)*ggml_element_size(model.kv_self.v)*n_embd + n_past*ggml_element_size(model.kv_self.v));
|
||||||
|
|
||||||
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k));
|
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k));
|
||||||
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v));
|
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v));
|
||||||
|
Loading…
Reference in New Issue
Block a user