limit prompt batch size to 128

python-bindings-bugfix-2
Aaron Miller 1 year ago committed by AT
parent 958c8d4fa5
commit 7a5f6e4726

@ -9,6 +9,8 @@
#include <cstdint>
#include <limits>
#define LLMODEL_MAX_PROMPT_BATCH 128
class Dlhandle;
class LLModel {

@ -52,6 +52,7 @@ void LLModel::prompt(const std::string &prompt,
promptCtx.n_predict = std::min(promptCtx.n_predict, promptCtx.n_ctx - (int) embd_inp.size());
promptCtx.n_past = std::min(promptCtx.n_past, promptCtx.n_ctx);
promptCtx.n_batch = std::min(promptCtx.n_batch, LLMODEL_MAX_PROMPT_BATCH);
// process the prompt in batches
size_t i = 0;

Loading…
Cancel
Save