From 29e3e04fcf9a310b9d5b77688ef048ef0a4e6904 Mon Sep 17 00:00:00 2001 From: Aaron Miller Date: Mon, 24 Apr 2023 12:24:55 -0700 Subject: [PATCH] persistent threadcount setting threadcount is now on the Settings object and gets reapplied after a model switch --- llm.cpp | 17 ++++++++++++++--- llm.h | 2 ++ qml/SettingsDialog.qml | 16 +++++++++++++--- 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/llm.cpp b/llm.cpp index 11fa77bc..81981ae0 100644 --- a/llm.cpp +++ b/llm.cpp @@ -101,8 +101,10 @@ bool LLMObject::loadModelPrivate(const QString &modelName) } void LLMObject::setThreadCount(int32_t n_threads) { - m_llmodel->setThreadCount(n_threads); - emit threadCountChanged(); + if (m_llmodel->threadCount() != n_threads) { + m_llmodel->setThreadCount(n_threads); + emit threadCountChanged(); + } } int32_t LLMObject::threadCount() { @@ -297,6 +299,7 @@ LLM::LLM() connect(m_llmodel, &LLMObject::modelNameChanged, this, &LLM::modelNameChanged, Qt::QueuedConnection); connect(m_llmodel, &LLMObject::modelListChanged, this, &LLM::modelListChanged, Qt::QueuedConnection); connect(m_llmodel, &LLMObject::threadCountChanged, this, &LLM::threadCountChanged, Qt::QueuedConnection); + connect(m_llmodel, &LLMObject::threadCountChanged, this, &LLM::syncThreadCount, Qt::QueuedConnection); connect(this, &LLM::promptRequested, m_llmodel, &LLMObject::prompt, Qt::QueuedConnection); @@ -375,8 +378,16 @@ QList LLM::modelList() const return m_llmodel->modelList(); } +void LLM::syncThreadCount() { + emit setThreadCountRequested(m_desiredThreadCount); +} + void LLM::setThreadCount(int32_t n_threads) { - emit setThreadCountRequested(n_threads); + if (n_threads <= 0) { + n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()); + } + m_desiredThreadCount = n_threads; + syncThreadCount(); } int32_t LLM::threadCount() { diff --git a/llm.h b/llm.h index 6dd99865..93987e14 100644 --- a/llm.h +++ b/llm.h @@ -82,6 +82,7 @@ public: Q_INVOKABLE void resetResponse(); Q_INVOKABLE void resetContext(); Q_INVOKABLE void stopGenerating(); + Q_INVOKABLE void syncThreadCount(); Q_INVOKABLE void setThreadCount(int32_t n_threads); Q_INVOKABLE int32_t threadCount(); @@ -116,6 +117,7 @@ private Q_SLOTS: private: LLMObject *m_llmodel; + int32_t m_desiredThreadCount; bool m_responseInProgress; private: diff --git a/qml/SettingsDialog.qml b/qml/SettingsDialog.qml index 3aa5f5d2..d8610536 100644 --- a/qml/SettingsDialog.qml +++ b/qml/SettingsDialog.qml @@ -31,6 +31,7 @@ Dialog { property int defaultTopK: 40 property int defaultMaxLength: 4096 property int defaultPromptBatchSize: 9 + property int defaultThreadCount: 0 property string defaultPromptTemplate: "The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response. ### Prompt: %1 @@ -42,6 +43,7 @@ Dialog { property alias maxLength: settings.maxLength property alias promptBatchSize: settings.promptBatchSize property alias promptTemplate: settings.promptTemplate + property alias threadCount: settings.threadCount Settings { id: settings @@ -50,6 +52,7 @@ Dialog { property int topK: settingsDialog.defaultTopK property int maxLength: settingsDialog.defaultMaxLength property int promptBatchSize: settingsDialog.defaultPromptBatchSize + property int threadCount: settingsDialog.defaultThreadCount property string promptTemplate: settingsDialog.defaultPromptTemplate } @@ -60,7 +63,13 @@ Dialog { settings.maxLength = defaultMaxLength; settings.promptBatchSize = defaultPromptBatchSize; settings.promptTemplate = defaultPromptTemplate; + settings.threadCount = defaultThreadCount settings.sync() + LLM.threadCount = settings.threadCount; + } + + Component.onCompleted: { + LLM.threadCount = settings.threadCount; } Component.onDestruction: { @@ -264,7 +273,7 @@ Dialog { Layout.column: 0 } TextField { - text: LLM.threadCount.toString() + text: settingsDialog.threadCount.toString() color: theme.textColor background: Rectangle { implicitWidth: 150 @@ -272,7 +281,7 @@ Dialog { radius: 10 } padding: 10 - ToolTip.text: qsTr("Amount of processing threads to use") + ToolTip.text: qsTr("Amount of processing threads to use, a setting of 0 will use the lesser of 4 or your number of CPU threads") ToolTip.visible: hovered Layout.row: 5 Layout.column: 1 @@ -280,10 +289,11 @@ Dialog { onAccepted: { var val = parseInt(text) if (!isNaN(val)) { + settingsDialog.threadCount = val LLM.threadCount = val focus = false } else { - text = settingsDialog.nThreads.toString() + text = settingsDialog.threadCount.toString() } } Accessible.role: Accessible.EditableText