|
|
|
@ -30,8 +30,6 @@
|
|
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
const char *modelType_ = "GPT-J";
|
|
|
|
|
|
|
|
|
|
static const size_t MB = 1024*1024;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// default hparams (GPT-J 6B)
|
|
|
|
@ -139,7 +137,7 @@ static bool kv_cache_init(
|
|
|
|
|
const int64_t n_mem = (int64_t)n_layer*n_ctx;
|
|
|
|
|
const int64_t n_elements = n_embd*n_mem;
|
|
|
|
|
|
|
|
|
|
cache.buf.resize(2u*n_elements*ggml_type_size(wtype) + 2u*MB);
|
|
|
|
|
cache.buf.resize(2u*n_elements*ggml_type_size(wtype) + 2_MiB);
|
|
|
|
|
|
|
|
|
|
struct ggml_init_params params;
|
|
|
|
|
params.mem_size = cache.buf.size;
|
|
|
|
@ -501,7 +499,7 @@ bool gptj_eval(
|
|
|
|
|
const int n_vocab = hparams.n_vocab;
|
|
|
|
|
const int n_rot = hparams.n_rot;
|
|
|
|
|
|
|
|
|
|
const size_t init_buf_size = 1024u*MB;
|
|
|
|
|
const size_t init_buf_size = 1024_MiB;
|
|
|
|
|
if (!model.buf.addr || model.buf.size < init_buf_size)
|
|
|
|
|
model.buf.resize(init_buf_size);
|
|
|
|
|
|
|
|
|
|