diff --git a/llm.cpp b/llm.cpp
index 515cfa13..54f6bd22 100644
--- a/llm.cpp
+++ b/llm.cpp
@@ -109,23 +109,20 @@ bool LLMObject::handleResponse(const std::string &response)
     return !m_stopGenerating;
 }
 
-bool LLMObject::prompt(const QString &prompt)
+bool LLMObject::prompt(const QString &prompt, const QString &prompt_template, int32_t n_predict, int32_t top_k, float top_p,
+                       float temp, int32_t n_batch)
 {
     if (!isModelLoaded())
         return false;
 
-    QString instructPrompt = QString("Below is a prompt for either a task to complete "
-        "or a piece of conversation."
-        "Decide which and write an appropriate response to the prompt.\n"
-        "### Prompt:\n"
-        "%1"
-        "### Response:\n").arg(prompt);
+    QString instructPrompt = prompt_template.arg(prompt);
 
     m_stopGenerating = false;
     auto func = std::bind(&LLMObject::handleResponse, this, std::placeholders::_1);
     emit responseStarted();
     qint32 logitsBefore = s_ctx.logits.size();
-    m_llmodel->prompt(instructPrompt.toStdString(), func, s_ctx, 4096 /*number of chars to predict*/);
+    qInfo() << instructPrompt << "\n";
+    m_llmodel->prompt(instructPrompt.toStdString(), func, s_ctx, n_predict, top_k, top_p, temp, n_batch);
     m_responseLogits += s_ctx.logits.size() - logitsBefore;
     emit responseStopped();
     return true;
@@ -152,9 +149,10 @@ bool LLM::isModelLoaded() const
     return m_llmodel->isModelLoaded();
 }
 
-void LLM::prompt(const QString &prompt)
+void LLM::prompt(const QString &prompt, const QString &prompt_template, int32_t n_predict, int32_t top_k, float top_p,
+                 float temp, int32_t n_batch)
 {
-    emit promptRequested(prompt);
+    emit promptRequested(prompt, prompt_template, n_predict, top_k, top_p, temp, n_batch);
 }
 
 void LLM::resetResponse()
diff --git a/llm.h b/llm.h
index f5f03378..03db9f9a 100644
--- a/llm.h
+++ b/llm.h
@@ -26,7 +26,8 @@ public:
     QString modelName() const;
 
 public Q_SLOTS:
-    bool prompt(const QString &prompt);
+    bool prompt(const QString &prompt, const QString &prompt_template, int32_t n_predict, int32_t top_k, float top_p,
+                float temp, int32_t n_batch);
 
 Q_SIGNALS:
     void isModelLoadedChanged();
@@ -60,7 +61,8 @@ public:
     static LLM *globalInstance();
 
     Q_INVOKABLE bool isModelLoaded() const;
-    Q_INVOKABLE void prompt(const QString &prompt);
+    Q_INVOKABLE void prompt(const QString &prompt, const QString &prompt_template, int32_t n_predict, int32_t top_k, float top_p,
+                            float temp, int32_t n_batch);
     Q_INVOKABLE void resetContext();
     Q_INVOKABLE void resetResponse();
     Q_INVOKABLE void stopGenerating();
@@ -76,7 +78,8 @@ Q_SIGNALS:
     void isModelLoadedChanged();
     void responseChanged();
     void responseInProgressChanged();
-    void promptRequested(const QString &prompt);
+    void promptRequested(const QString &prompt, const QString &prompt_template, int32_t n_predict, int32_t top_k, float top_p,
+                         float temp, int32_t n_batch);
     void resetResponseRequested();
     void resetContextRequested();
     void modelNameChanged();
diff --git a/main.qml b/main.qml
index 6e7f26de..1cc6f0fb 100644
--- a/main.qml
+++ b/main.qml
@@ -591,7 +591,9 @@ Window {
                         if (listElement.name === qsTr("Response: ")) {
                             listElement.currentResponse = true
                             listElement.value = LLM.response
-                            LLM.prompt(listElement.prompt)
+                            LLM.prompt(listElement.prompt, settingsDialog.promptTemplate, settingsDialog.maxLength,
+                                       settingsDialog.topK, settingsDialog.topP, settingsDialog.temperature,
+                                       settingsDialog.promptBatchSize)
                         }
                     }
                 }
@@ -642,7 +644,8 @@ Window {
                 chatModel.append({"name": qsTr("Prompt: "), "currentResponse": false, "value": textInput.text})
                 chatModel.append({"name": qsTr("Response: "), "currentResponse": true, "value": "", "prompt": prompt})
                 LLM.resetResponse()
-                LLM.prompt(prompt)
+                LLM.prompt(prompt, settingsDialog.promptTemplate, settingsDialog.maxLength, settingsDialog.topK,
+                           settingsDialog.topP, settingsDialog.temperature, settingsDialog.promptBatchSize)
                 textInput.text = ""
             }