diff --git a/llm.cpp b/llm.cpp
index 8741b303..515cfa13 100644
--- a/llm.cpp
+++ b/llm.cpp
@@ -101,6 +101,11 @@ bool LLMObject::handleResponse(const std::string &response)
         m_response.append(response);
         emit responseChanged();
     }
+
+    // Stop generation if we encounter prompt or response tokens
+    QString r = QString::fromStdString(m_response);
+    if (r.contains("### Prompt:") || r.contains("### Response:"))
+        return false;
     return !m_stopGenerating;
 }
 
@@ -109,11 +114,18 @@ bool LLMObject::prompt(const QString &prompt)
     if (!isModelLoaded())
         return false;
 
+    QString instructPrompt = QString("Below is a prompt for either a task to complete "
+        "or a piece of conversation."
+        "Decide which and write an appropriate response to the prompt.\n"
+        "### Prompt:\n"
+        "%1"
+        "### Response:\n").arg(prompt);
+
     m_stopGenerating = false;
     auto func = std::bind(&LLMObject::handleResponse, this, std::placeholders::_1);
     emit responseStarted();
     qint32 logitsBefore = s_ctx.logits.size();
-    m_llmodel->prompt(prompt.toStdString(), func, s_ctx, 4096 /*number of chars to predict*/);
+    m_llmodel->prompt(instructPrompt.toStdString(), func, s_ctx, 4096 /*number of chars to predict*/);
     m_responseLogits += s_ctx.logits.size() - logitsBefore;
     emit responseStopped();
     return true;