Don't crash when prompt is too large.

1 year ago · a3d97fa009
parent fbce5f2078
commit a3d97fa009
3 changed files with 17 additions and 1 deletions
--- a/llm.cpp
+++ b/llm.cpp
@ -252,6 +252,13 @@ bool LLMObject::handleResponse(int32_t token, const std::string &response)
    fflush(stdout);
 #endif
    // check for error
    if (token < 0) {
        m_response.append(response);
        emit responseChanged();
        return false;
    }
    // Save the token to our prompt ctxt
    if (s_ctx.tokens.size() == s_ctx.n_ctx)
        s_ctx.tokens.erase(s_ctx.tokens.begin());
--- a/llmodel/gptj.cpp
+++ b/llmodel/gptj.cpp
@ -707,6 +707,13 @@ void GPTJ::prompt(const std::string &prompt,
    // save the context size
    promptCtx.n_ctx = d_ptr->model.hparams.n_ctx;
    if ((int) embd_inp.size() > promptCtx.n_ctx - 4) {
        response(-1, "ERROR: The prompt size exceeds the context window size and cannot be processed.");
        std::cerr << "GPT-J ERROR: The prompt is" << embd_inp.size() <<
            "tokens and the context window is" << promptCtx.n_ctx << "!\n";
        return;
    }
    promptCtx.n_predict = std::min(promptCtx.n_predict, promptCtx.n_ctx - (int) embd_inp.size());
    promptCtx.n_past = std::min(promptCtx.n_past, promptCtx.n_ctx);
--- a/llmodel/llamamodel.cpp
+++ b/llmodel/llamamodel.cpp
@ -102,7 +102,9 @@ void LLamaModel::prompt(const std::string &prompt,
    promptCtx.n_ctx = llama_n_ctx(d_ptr->ctx);
    if ((int) embd_inp.size() > promptCtx.n_ctx - 4) {
-        std::cerr << "LLAMA ERROR: prompt is too long\n";
+        response(-1, "The prompt size exceeds the context window size and cannot be processed.");
        std::cerr << "LLAMA ERROR: The prompt is" << embd_inp.size() <<
            "tokens and the context window is" << promptCtx.n_ctx << "!\n";
        return;
    }