diff --git a/llm.cpp b/llm.cpp index 515cfa13..54f6bd22 100644 --- a/llm.cpp +++ b/llm.cpp @@ -109,23 +109,20 @@ bool LLMObject::handleResponse(const std::string &response) return !m_stopGenerating; } -bool LLMObject::prompt(const QString &prompt) +bool LLMObject::prompt(const QString &prompt, const QString &prompt_template, int32_t n_predict, int32_t top_k, float top_p, + float temp, int32_t n_batch) { if (!isModelLoaded()) return false; - QString instructPrompt = QString("Below is a prompt for either a task to complete " - "or a piece of conversation." - "Decide which and write an appropriate response to the prompt.\n" - "### Prompt:\n" - "%1" - "### Response:\n").arg(prompt); + QString instructPrompt = prompt_template.arg(prompt); m_stopGenerating = false; auto func = std::bind(&LLMObject::handleResponse, this, std::placeholders::_1); emit responseStarted(); qint32 logitsBefore = s_ctx.logits.size(); - m_llmodel->prompt(instructPrompt.toStdString(), func, s_ctx, 4096 /*number of chars to predict*/); + qInfo() << instructPrompt << "\n"; + m_llmodel->prompt(instructPrompt.toStdString(), func, s_ctx, n_predict, top_k, top_p, temp, n_batch); m_responseLogits += s_ctx.logits.size() - logitsBefore; emit responseStopped(); return true; @@ -152,9 +149,10 @@ bool LLM::isModelLoaded() const return m_llmodel->isModelLoaded(); } -void LLM::prompt(const QString &prompt) +void LLM::prompt(const QString &prompt, const QString &prompt_template, int32_t n_predict, int32_t top_k, float top_p, + float temp, int32_t n_batch) { - emit promptRequested(prompt); + emit promptRequested(prompt, prompt_template, n_predict, top_k, top_p, temp, n_batch); } void LLM::resetResponse() diff --git a/llm.h b/llm.h index f5f03378..03db9f9a 100644 --- a/llm.h +++ b/llm.h @@ -26,7 +26,8 @@ public: QString modelName() const; public Q_SLOTS: - bool prompt(const QString &prompt); + bool prompt(const QString &prompt, const QString &prompt_template, int32_t n_predict, int32_t top_k, float top_p, + float temp, int32_t n_batch); Q_SIGNALS: void isModelLoadedChanged(); @@ -60,7 +61,8 @@ public: static LLM *globalInstance(); Q_INVOKABLE bool isModelLoaded() const; - Q_INVOKABLE void prompt(const QString &prompt); + Q_INVOKABLE void prompt(const QString &prompt, const QString &prompt_template, int32_t n_predict, int32_t top_k, float top_p, + float temp, int32_t n_batch); Q_INVOKABLE void resetContext(); Q_INVOKABLE void resetResponse(); Q_INVOKABLE void stopGenerating(); @@ -76,7 +78,8 @@ Q_SIGNALS: void isModelLoadedChanged(); void responseChanged(); void responseInProgressChanged(); - void promptRequested(const QString &prompt); + void promptRequested(const QString &prompt, const QString &prompt_template, int32_t n_predict, int32_t top_k, float top_p, + float temp, int32_t n_batch); void resetResponseRequested(); void resetContextRequested(); void modelNameChanged(); diff --git a/main.qml b/main.qml index 6e7f26de..1cc6f0fb 100644 --- a/main.qml +++ b/main.qml @@ -591,7 +591,9 @@ Window { if (listElement.name === qsTr("Response: ")) { listElement.currentResponse = true listElement.value = LLM.response - LLM.prompt(listElement.prompt) + LLM.prompt(listElement.prompt, settingsDialog.promptTemplate, settingsDialog.maxLength, + settingsDialog.topK, settingsDialog.topP, settingsDialog.temperature, + settingsDialog.promptBatchSize) } } } @@ -642,7 +644,8 @@ Window { chatModel.append({"name": qsTr("Prompt: "), "currentResponse": false, "value": textInput.text}) chatModel.append({"name": qsTr("Response: "), "currentResponse": true, "value": "", "prompt": prompt}) LLM.resetResponse() - LLM.prompt(prompt) + LLM.prompt(prompt, settingsDialog.promptTemplate, settingsDialog.maxLength, settingsDialog.topK, + settingsDialog.topP, settingsDialog.temperature, settingsDialog.promptBatchSize) textInput.text = "" }