limit prompt batch size to 128

2024-11-02 09:40:42 +00:00 · 2023-06-30 16:13:25 -07:00 · 2023-06-30 16:13:25 -07:00 · 7a5f6e4726
commit 7a5f6e4726
parent 958c8d4fa5
2 changed files with 3 additions and 0 deletions
--- a/gpt4all-backend/llmodel.h
+++ b/gpt4all-backend/llmodel.h
@ -9,6 +9,8 @@
 #include <cstdint>
 #include <limits>

+#define LLMODEL_MAX_PROMPT_BATCH 128
+
 class Dlhandle;

 class LLModel {
--- a/gpt4all-backend/llmodel_shared.cpp
+++ b/gpt4all-backend/llmodel_shared.cpp
@ -52,6 +52,7 @@ void LLModel::prompt(const std::string &prompt,

    promptCtx.n_predict = std::min(promptCtx.n_predict, promptCtx.n_ctx - (int) embd_inp.size());
    promptCtx.n_past = std::min(promptCtx.n_past, promptCtx.n_ctx);
+    promptCtx.n_batch = std::min(promptCtx.n_batch, LLMODEL_MAX_PROMPT_BATCH);

    // process the prompt in batches
    size_t i = 0;