starcoder: use ggml_graph_plan

pull/1389/head
aaron miller 12 months ago committed by Adam Treat
parent 27a8b020c3
commit 33c22be2aa

@ -132,6 +132,7 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
add_library(starcoder-${BUILD_VARIANT} SHARED
starcoder.cpp utils.h utils.cpp llmodel_shared.cpp llmodel_shared.h)
target_compile_definitions(starcoder-${BUILD_VARIANT} PRIVATE LLAMA_VERSIONS=>=3 LLAMA_DATE=999999)
prepare_target(starcoder llama-mainline)
endif()
endforeach()

@ -73,6 +73,7 @@ struct starcoder_model {
llm_buffer eval_buf;
llm_buffer scr0_buf;
llm_buffer scr1_buf;
llm_buffer work_buf;
};
static bool kv_cache_init(
@ -452,7 +453,7 @@ bool starcoder_model_load(const std::string & fname, starcoder_model & model, gp
// - embd_w: the predicted logits for the next token
//
bool starcoder_eval(
const starcoder_model & model,
starcoder_model & model,
const int n_threads,
const int n_past,
const std::vector<gpt_vocab::id> & embd_inp,
@ -477,7 +478,6 @@ bool starcoder_eval(
struct ggml_context * ctx0 = ggml_init(eval_ctx_params);
struct ggml_cgraph gf = {};
gf.n_threads = n_threads;
struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
memcpy(embd->data, embd_inp.data(), N*ggml_element_size(embd));
@ -730,7 +730,7 @@ bool starcoder_eval(
// run the computation
ggml_build_forward_expand(&gf, inpL);
ggml_graph_compute (ctx0, &gf);
ggml_graph_compute_g4a(model.work_buf, &gf, n_threads);
//if (n_past%100 == 0) {
// ggml_graph_print (&gf);

Loading…
Cancel
Save