diff --git a/gpt4all-chat/chatlistmodel.cpp b/gpt4all-chat/chatlistmodel.cpp
index 24e9a9bb..258b67c1 100644
--- a/gpt4all-chat/chatlistmodel.cpp
+++ b/gpt4all-chat/chatlistmodel.cpp
@@ -6,7 +6,7 @@
 #include <QDataStream>
 
 #define CHAT_FORMAT_MAGIC 0xF5D553CC
-#define CHAT_FORMAT_VERSION 3
+#define CHAT_FORMAT_VERSION 4
 
 ChatListModel::ChatListModel(QObject *parent)
     : QAbstractListModel(parent)
diff --git a/gpt4all-chat/chatllm.cpp b/gpt4all-chat/chatllm.cpp
index 92d14376..f28c954d 100644
--- a/gpt4all-chat/chatllm.cpp
+++ b/gpt4all-chat/chatllm.cpp
@@ -91,7 +91,6 @@ ChatLLM::ChatLLM(Chat *parent, bool isServer)
     : QObject{nullptr}
     , m_promptResponseTokens(0)
     , m_promptTokens(0)
-    , m_responseLogits(0)
     , m_isRecalc(false)
     , m_chat(parent)
     , m_timer(nullptr)
@@ -300,12 +299,9 @@ void ChatLLM::regenerateResponse()
     else
         m_ctx.n_past -= m_promptResponseTokens;
     m_ctx.n_past = std::max(0, m_ctx.n_past);
-    // FIXME: This does not seem to be needed in my testing and llama models don't to it. Remove?
-    m_ctx.logits.erase(m_ctx.logits.end() -= m_responseLogits, m_ctx.logits.end());
     m_ctx.tokens.erase(m_ctx.tokens.end() -= m_promptResponseTokens, m_ctx.tokens.end());
     m_promptResponseTokens = 0;
     m_promptTokens = 0;
-    m_responseLogits = 0;
     m_response = std::string();
     emit responseChanged();
 }
@@ -314,7 +310,6 @@ void ChatLLM::resetResponse()
 {
     m_promptTokens = 0;
     m_promptResponseTokens = 0;
-    m_responseLogits = 0;
     m_response = std::string();
     emit responseChanged();
 }
@@ -368,7 +363,7 @@ void ChatLLM::modelNameChangeRequested(const QString &modelName)
 
 bool ChatLLM::handlePrompt(int32_t token)
 {
-    // m_promptResponseTokens and m_responseLogits are related to last prompt/response not
+    // m_promptResponseTokens is related to last prompt/response not
     // the entire context window which we can reset on regenerate prompt
 #if defined(DEBUG)
     qDebug() << "prompt process" << m_chat->id() << token;
@@ -393,7 +388,7 @@ bool ChatLLM::handleResponse(int32_t token, const std::string &response)
         return false;
     }
 
-    // m_promptResponseTokens and m_responseLogits are related to last prompt/response not
+    // m_promptResponseTokens is related to last prompt/response not
     // the entire context window which we can reset on regenerate prompt
     ++m_promptResponseTokens;
     m_timer->inc();
@@ -458,7 +453,6 @@ bool ChatLLM::prompt(const QString &prompt, const QString &prompt_template, int3
     fflush(stdout);
 #endif
     m_timer->stop();
-    m_responseLogits += m_ctx.logits.size() - logitsBefore;
     std::string trimmed = trim_whitespace(m_response);
     if (trimmed != m_response) {
         m_response = trimmed;
@@ -595,7 +589,10 @@ bool ChatLLM::serialize(QDataStream &stream, int version)
     stream << response();
     stream << generatedName();
     stream << m_promptResponseTokens;
-    stream << m_responseLogits;
+    if (version <= 3) {
+        int responseLogits;
+        stream << responseLogits;
+    }
     stream << m_ctx.n_past;
     stream << quint64(m_ctx.logits.size());
     stream.writeRawData(reinterpret_cast<const char*>(m_ctx.logits.data()), m_ctx.logits.size() * sizeof(float));
@@ -624,7 +621,10 @@ bool ChatLLM::deserialize(QDataStream &stream, int version)
     stream >> nameResponse;
     m_nameResponse = nameResponse.toStdString();
     stream >> m_promptResponseTokens;
-    stream >> m_responseLogits;
+    if (version <= 3) {
+        int responseLogits;
+        stream >> responseLogits;
+    }
     stream >> m_ctx.n_past;
     quint64 logitsSize;
     stream >> logitsSize;
@@ -639,7 +639,6 @@ bool ChatLLM::deserialize(QDataStream &stream, int version)
         stream >> compressed;
         m_state = qUncompress(compressed);
     } else {
-
         stream >> m_state;
     }
 #if defined(DEBUG)
diff --git a/gpt4all-chat/chatllm.h b/gpt4all-chat/chatllm.h
index dd34f3f4..3cb0b7e2 100644
--- a/gpt4all-chat/chatllm.h
+++ b/gpt4all-chat/chatllm.h
@@ -150,7 +150,6 @@ protected:
     LLModelType m_modelType;
     std::string m_response;
     std::string m_nameResponse;
-    quint32 m_responseLogits;
     QString m_modelName;
     Chat *m_chat;
     TokenTimer *m_timer;