diff --git a/gpt4all-chat/chatlistmodel.h b/gpt4all-chat/chatlistmodel.h
index 8c60b7a9..881b2cd9 100644
--- a/gpt4all-chat/chatlistmodel.h
+++ b/gpt4all-chat/chatlistmodel.h
@@ -171,9 +171,8 @@ public:
             return;
         }
 
-        if (m_currentChat)
+        if (m_currentChat && m_currentChat != m_serverChat)
             m_currentChat->unloadModel();
-
         m_currentChat = chat;
         if (!m_currentChat->isModelLoaded() && m_currentChat != m_serverChat)
             m_currentChat->reloadModel();
diff --git a/gpt4all-chat/chatllm.cpp b/gpt4all-chat/chatllm.cpp
index 0e8b08cb..e3268189 100644
--- a/gpt4all-chat/chatllm.cpp
+++ b/gpt4all-chat/chatllm.cpp
@@ -81,13 +81,14 @@ void LLModelStore::releaseModel(const LLModelInfo &info)
     m_condition.wakeAll();
 }
 
-ChatLLM::ChatLLM(Chat *parent)
+ChatLLM::ChatLLM(Chat *parent, bool isServer)
     : QObject{nullptr}
     , m_promptResponseTokens(0)
     , m_promptTokens(0)
     , m_responseLogits(0)
     , m_isRecalc(false)
     , m_chat(parent)
+    , m_isServer(isServer)
 {
     moveToThread(&m_llmThread);
     connect(this, &ChatLLM::sendStartup, Network::globalInstance(), &Network::sendStartup);
@@ -151,7 +152,7 @@ bool ChatLLM::loadModel(const QString &modelName)
         delete m_modelInfo.model;
         m_modelInfo.model = nullptr;
         emit isModelLoadedChanged();
-    } else {
+    } else if (!m_isServer) {
         // This is a blocking call that tries to retrieve the model we need from the model store.
         // If it succeeds, then we just have to restore state. If the store has never had a model
         // returned to it, then the modelInfo.model pointer should be null which will happen on startup
@@ -163,7 +164,9 @@ bool ChatLLM::loadModel(const QString &modelName)
         // store, that our state was changed to not be loaded. If this is the case, release the model
         // back into the store and quit loading
         if (!m_shouldBeLoaded) {
+#if defined(DEBUG_MODEL_LOADING)
             qDebug() << "no longer need model" << m_chat->id() << m_modelInfo.model;
+#endif
             LLModelStore::globalInstance()->releaseModel(m_modelInfo);
             m_modelInfo = LLModelInfo();
             emit isModelLoadedChanged();
@@ -232,7 +235,8 @@ bool ChatLLM::loadModel(const QString &modelName)
         } else
             emit sendModelLoaded();
     } else {
-        LLModelStore::globalInstance()->releaseModel(m_modelInfo); // release back into the store
+        if (!m_isServer)
+            LLModelStore::globalInstance()->releaseModel(m_modelInfo); // release back into the store
         const QString error = QString("Could not find model %1").arg(modelName);
         emit modelLoadingError(error);
     }
@@ -436,7 +440,7 @@ void ChatLLM::forceUnloadModel()
 
 void ChatLLM::unloadModel()
 {
-    if (!isModelLoaded())
+    if (!isModelLoaded() || m_isServer)
         return;
 
     saveState();
@@ -450,7 +454,7 @@ void ChatLLM::unloadModel()
 
 void ChatLLM::reloadModel()
 {
-    if (isModelLoaded())
+    if (isModelLoaded() || m_isServer)
         return;
 
 #if defined(DEBUG_MODEL_LOADING)
diff --git a/gpt4all-chat/chatllm.h b/gpt4all-chat/chatllm.h
index 4f6d39f7..0b1eb343 100644
--- a/gpt4all-chat/chatllm.h
+++ b/gpt4all-chat/chatllm.h
@@ -31,7 +31,7 @@ class ChatLLM : public QObject
     Q_PROPERTY(QString generatedName READ generatedName NOTIFY generatedNameChanged)
 
 public:
-    ChatLLM(Chat *parent);
+    ChatLLM(Chat *parent, bool isServer = false);
     virtual ~ChatLLM();
 
     bool isModelLoaded() const;
@@ -87,12 +87,7 @@ Q_SIGNALS:
     void shouldBeLoadedChanged();
 
 protected:
-    LLModel::PromptContext m_ctx;
-    quint32 m_promptTokens;
-    quint32 m_promptResponseTokens;
     void resetContextProtected();
-
-private:
     bool handlePrompt(int32_t token);
     bool handleResponse(int32_t token, const std::string &response);
     bool handleRecalculate(bool isRecalc);
@@ -102,7 +97,10 @@ private:
     void saveState();
     void restoreState();
 
-private:
+protected:
+    LLModel::PromptContext m_ctx;
+    quint32 m_promptTokens;
+    quint32 m_promptResponseTokens;
     LLModelInfo m_modelInfo;
     LLModelType m_modelType;
     std::string m_response;
@@ -115,6 +113,7 @@ private:
     std::atomic<bool> m_stopGenerating;
     std::atomic<bool> m_shouldBeLoaded;
     bool m_isRecalc;
+    bool m_isServer;
 };
 
 #endif // CHATLLM_H
diff --git a/gpt4all-chat/qml/ChatDrawer.qml b/gpt4all-chat/qml/ChatDrawer.qml
index 8db645e6..9df5e891 100644
--- a/gpt4all-chat/qml/ChatDrawer.qml
+++ b/gpt4all-chat/qml/ChatDrawer.qml
@@ -69,6 +69,7 @@ Drawer {
             anchors.bottom: checkForUpdatesButton.top
             anchors.bottomMargin: 10
             ScrollBar.vertical.policy: ScrollBar.AlwaysOn
+            clip: true
 
             ListView {
                 id: conversationList
diff --git a/gpt4all-chat/qml/SettingsDialog.qml b/gpt4all-chat/qml/SettingsDialog.qml
index 7c2f6d59..87873fa8 100644
--- a/gpt4all-chat/qml/SettingsDialog.qml
+++ b/gpt4all-chat/qml/SettingsDialog.qml
@@ -820,7 +820,7 @@ Dialog {
                             settings.sync()
                         }
 
-                        ToolTip.text: qsTr("WARNING: This enables the gui to act as a local web server for AI API requests")
+                        ToolTip.text: qsTr("WARNING: This enables the gui to act as a local web server for AI API requests and will increase your RAM usage as well")
                         ToolTip.visible: hovered
 
                         background: Rectangle {
diff --git a/gpt4all-chat/server.cpp b/gpt4all-chat/server.cpp
index 1c2484df..3575883b 100644
--- a/gpt4all-chat/server.cpp
+++ b/gpt4all-chat/server.cpp
@@ -52,7 +52,7 @@ static inline QJsonObject modelToJson(const ModelInfo &info)
 }
 
 Server::Server(Chat *chat)
-    : ChatLLM(chat)
+    : ChatLLM(chat, true /*isServer*/)
     , m_chat(chat)
     , m_server(nullptr)
 {
@@ -170,13 +170,15 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re
         }
     }
 
+    setShouldBeLoaded(true);
+
     if (!foundModel) {
         if (!loadDefaultModel()) {
-            std::cerr << "ERROR: couldn't load default model" << model.toStdString() << std::endl;
+            std::cerr << "ERROR: couldn't load default model " << model.toStdString() << std::endl;
             return QHttpServerResponse(QHttpServerResponder::StatusCode::BadRequest);
         }
     } else if (!loadModel(model)) {
-        std::cerr << "ERROR: couldn't load model" << model.toStdString() << std::endl;
+        std::cerr << "ERROR: couldn't load model " << model.toStdString() << std::endl;
         return QHttpServerResponse(QHttpServerResponder::StatusCode::InternalServerError);
     }
 
@@ -308,7 +310,7 @@ QHttpServerResponse Server::handleCompletionRequest(const QHttpServerRequest &re
             repeat_last_n,
             LLM::globalInstance()->threadCount())) {
 
-            std::cerr << "ERROR: couldn't prompt model" << model.toStdString() << std::endl;
+            std::cerr << "ERROR: couldn't prompt model " << model.toStdString() << std::endl;
             return QHttpServerResponse(QHttpServerResponder::StatusCode::InternalServerError);
         }
         QString echoedPrompt = actualPrompt;