diff --git a/llm.cpp b/llm.cpp index 8741b303..515cfa13 100644 --- a/llm.cpp +++ b/llm.cpp @@ -101,6 +101,11 @@ bool LLMObject::handleResponse(const std::string &response) m_response.append(response); emit responseChanged(); } + + // Stop generation if we encounter prompt or response tokens + QString r = QString::fromStdString(m_response); + if (r.contains("### Prompt:") || r.contains("### Response:")) + return false; return !m_stopGenerating; } @@ -109,11 +114,18 @@ bool LLMObject::prompt(const QString &prompt) if (!isModelLoaded()) return false; + QString instructPrompt = QString("Below is a prompt for either a task to complete " + "or a piece of conversation." + "Decide which and write an appropriate response to the prompt.\n" + "### Prompt:\n" + "%1" + "### Response:\n").arg(prompt); + m_stopGenerating = false; auto func = std::bind(&LLMObject::handleResponse, this, std::placeholders::_1); emit responseStarted(); qint32 logitsBefore = s_ctx.logits.size(); - m_llmodel->prompt(prompt.toStdString(), func, s_ctx, 4096 /*number of chars to predict*/); + m_llmodel->prompt(instructPrompt.toStdString(), func, s_ctx, 4096 /*number of chars to predict*/); m_responseLogits += s_ctx.logits.size() - logitsBefore; emit responseStopped(); return true;