gpt4all/gpt4all-chat/chatllm.h

#ifndef CHATLLM_H
#define CHATLLM_H

#include <QObject>
#include <QThread>
#include <QFileInfo>

#include "../gpt4all-backend/llmodel.h"

enum LLModelType {
    MPT_,
    GPTJ_,
    LLAMA_,
    CHATGPT_,
};

struct LLModelInfo {
    LLModel *model = nullptr;
    QFileInfo fileInfo;
    // NOTE: This does not store the model type or name on purpose as this is left for ChatLLM which
    // must be able to serialize the information even if it is in the unloaded state
};

class Chat;
class ChatLLM : public QObject
{
    Q_OBJECT
    Q_PROPERTY(bool isModelLoaded READ isModelLoaded NOTIFY isModelLoadedChanged)
    Q_PROPERTY(QString response READ response NOTIFY responseChanged)
    Q_PROPERTY(QString modelName READ modelName WRITE setModelName NOTIFY modelNameChanged)
    Q_PROPERTY(bool isRecalc READ isRecalc NOTIFY recalcChanged)
    Q_PROPERTY(QString generatedName READ generatedName NOTIFY generatedNameChanged)

public:
    ChatLLM(Chat *parent, bool isServer = false);
    virtual ~ChatLLM();

    bool isModelLoaded() const;
    void regenerateResponse();
    void resetResponse();
    void resetContext();

    void stopGenerating() { m_stopGenerating = true; }

    bool shouldBeLoaded() const { return m_shouldBeLoaded; }
    void setShouldBeLoaded(bool b);

    QString response() const;
    QString modelName() const;

    void setModelName(const QString &modelName);

    bool isRecalc() const { return m_isRecalc; }

    QString generatedName() const { return QString::fromStdString(m_nameResponse); }

    bool serialize(QDataStream &stream, int version);
    bool deserialize(QDataStream &stream, int version);

public Q_SLOTS:
    bool prompt(const QString &prompt, const QString &prompt_template, int32_t n_predict,
        int32_t top_k, float top_p, float temp, int32_t n_batch, float repeat_penalty, int32_t repeat_penalty_tokens,
        int32_t n_threads);
    bool loadDefaultModel();
    bool loadModel(const QString &modelName);
    void modelNameChangeRequested(const QString &modelName);
    void forceUnloadModel();
    void unloadModel();
    void reloadModel();
    void generateName();
    void handleChatIdChanged();
    void handleShouldBeLoadedChanged();

Q_SIGNALS:
    void isModelLoadedChanged();
    void modelLoadingError(const QString &error);
    void responseChanged();
    void responseStarted();
    void responseStopped();
    void modelNameChanged();
    void recalcChanged();
    void sendStartup();
    void sendModelLoaded();
    void sendResetContext();
    void generatedNameChanged();
    void stateChanged();
    void threadStarted();
    void shouldBeLoadedChanged();

protected:
    void resetContextProtected();
    bool handlePrompt(int32_t token);
    bool handleResponse(int32_t token, const std::string &response);
    bool handleRecalculate(bool isRecalc);
    bool handleNamePrompt(int32_t token);
    bool handleNameResponse(int32_t token, const std::string &response);
    bool handleNameRecalculate(bool isRecalc);
    void saveState();
    void restoreState();

protected:
    LLModel::PromptContext m_ctx;
    quint32 m_promptTokens;
    quint32 m_promptResponseTokens;
    LLModelInfo m_modelInfo;
    LLModelType m_modelType;
    std::string m_response;
    std::string m_nameResponse;
    quint32 m_responseLogits;
    QString m_modelName;
    Chat *m_chat;
    QByteArray m_state;
    QThread m_llmThread;
    std::atomic<bool> m_stopGenerating;
    std::atomic<bool> m_shouldBeLoaded;
    bool m_isRecalc;
    bool m_isServer;
    bool m_isChatGPT;
};

#endif // CHATLLM_H
Major refactor in prep for multiple conversations. 2023-05-01 13:10:05 +00:00			`#ifndef CHATLLM_H`
			`#define CHATLLM_H`

			`#include <QObject>`
			`#include <QThread>`
Much better memory mgmt for multi-threaded model loading/unloading. 2023-05-13 23:05:35 +00:00			`#include <QFileInfo>`
Major refactor in prep for multiple conversations. 2023-05-01 13:10:05 +00:00
Move the llmodel C API to new top-level directory and version it. 2023-05-10 15:46:40 +00:00			`#include "../gpt4all-backend/llmodel.h"`
Major refactor in prep for multiple conversations. 2023-05-01 13:10:05 +00:00
Much better memory mgmt for multi-threaded model loading/unloading. 2023-05-13 23:05:35 +00:00			`enum LLModelType {`
			`MPT_,`
			`GPTJ_,`
Preliminary support for chatgpt models. 2023-05-15 00:12:15 +00:00			`LLAMA_,`
			`CHATGPT_,`
Much better memory mgmt for multi-threaded model loading/unloading. 2023-05-13 23:05:35 +00:00			`};`

			`struct LLModelInfo {`
			`LLModel *model = nullptr;`
			`QFileInfo fileInfo;`
			`// NOTE: This does not store the model type or name on purpose as this is left for ChatLLM which`
			`// must be able to serialize the information even if it is in the unloaded state`
			`};`

First attempt at providing a persistent chat list experience. Limitations: 1) Context is not restored for gpt-j models 2) When you switch between different model types in an existing chat the context and all the conversation is lost 3) The settings are not chat or conversation specific 4) The sizes of the chat persisted files are very large due to how much data the llama.cpp backend tries to persist. Need to investigate how we can shrink this. 2023-05-04 19:31:41 +00:00			`class Chat;`
Major refactor in prep for multiple conversations. 2023-05-01 13:10:05 +00:00			`class ChatLLM : public QObject`
			`{`
			`Q_OBJECT`
			`Q_PROPERTY(bool isModelLoaded READ isModelLoaded NOTIFY isModelLoadedChanged)`
			`Q_PROPERTY(QString response READ response NOTIFY responseChanged)`
			`Q_PROPERTY(QString modelName READ modelName WRITE setModelName NOTIFY modelNameChanged)`
			`Q_PROPERTY(bool isRecalc READ isRecalc NOTIFY recalcChanged)`
Generate names via llm. 2023-05-02 15:19:17 +00:00			`Q_PROPERTY(QString generatedName READ generatedName NOTIFY generatedNameChanged)`
Major refactor in prep for multiple conversations. 2023-05-01 13:10:05 +00:00
			`public:`
The server has different lifetime mgmt than the other chats. 2023-05-13 23:33:19 +00:00			`ChatLLM(Chat *parent, bool isServer = false);`
Cleanup the chatllm properly. 2023-05-12 18:06:03 +00:00			`virtual ~ChatLLM();`
Major refactor in prep for multiple conversations. 2023-05-01 13:10:05 +00:00
			`bool isModelLoaded() const;`
			`void regenerateResponse();`
			`void resetResponse();`
			`void resetContext();`

			`void stopGenerating() { m_stopGenerating = true; }`

Much better memory mgmt for multi-threaded model loading/unloading. 2023-05-13 23:05:35 +00:00			`bool shouldBeLoaded() const { return m_shouldBeLoaded; }`
			`void setShouldBeLoaded(bool b);`

Major refactor in prep for multiple conversations. 2023-05-01 13:10:05 +00:00			`QString response() const;`
			`QString modelName() const;`

			`void setModelName(const QString &modelName);`

			`bool isRecalc() const { return m_isRecalc; }`

Generate names via llm. 2023-05-02 15:19:17 +00:00			`QString generatedName() const { return QString::fromStdString(m_nameResponse); }`

Convert the old format properly. 2023-05-08 09:52:57 +00:00			`bool serialize(QDataStream &stream, int version);`
			`bool deserialize(QDataStream &stream, int version);`
First attempt at providing a persistent chat list experience. Limitations: 1) Context is not restored for gpt-j models 2) When you switch between different model types in an existing chat the context and all the conversation is lost 3) The settings are not chat or conversation specific 4) The sizes of the chat persisted files are very large due to how much data the llama.cpp backend tries to persist. Need to investigate how we can shrink this. 2023-05-04 19:31:41 +00:00
Major refactor in prep for multiple conversations. 2023-05-01 13:10:05 +00:00			`public Q_SLOTS:`
First attempt at providing a persistent chat list experience. Limitations: 1) Context is not restored for gpt-j models 2) When you switch between different model types in an existing chat the context and all the conversation is lost 3) The settings are not chat or conversation specific 4) The sizes of the chat persisted files are very large due to how much data the llama.cpp backend tries to persist. Need to investigate how we can shrink this. 2023-05-04 19:31:41 +00:00			`bool prompt(const QString &prompt, const QString &prompt_template, int32_t n_predict,`
			`int32_t top_k, float top_p, float temp, int32_t n_batch, float repeat_penalty, int32_t repeat_penalty_tokens,`
			`int32_t n_threads);`
			`bool loadDefaultModel();`
			`bool loadModel(const QString &modelName);`
Major refactor in prep for multiple conversations. 2023-05-01 13:10:05 +00:00			`void modelNameChangeRequested(const QString &modelName);`
Much better memory mgmt for multi-threaded model loading/unloading. 2023-05-13 23:05:35 +00:00			`void forceUnloadModel();`
First attempt at providing a persistent chat list experience. Limitations: 1) Context is not restored for gpt-j models 2) When you switch between different model types in an existing chat the context and all the conversation is lost 3) The settings are not chat or conversation specific 4) The sizes of the chat persisted files are very large due to how much data the llama.cpp backend tries to persist. Need to investigate how we can shrink this. 2023-05-04 19:31:41 +00:00			`void unloadModel();`
Much better memory mgmt for multi-threaded model loading/unloading. 2023-05-13 23:05:35 +00:00			`void reloadModel();`
Generate names via llm. 2023-05-02 15:19:17 +00:00			`void generateName();`
First attempt at providing a persistent chat list experience. Limitations: 1) Context is not restored for gpt-j models 2) When you switch between different model types in an existing chat the context and all the conversation is lost 3) The settings are not chat or conversation specific 4) The sizes of the chat persisted files are very large due to how much data the llama.cpp backend tries to persist. Need to investigate how we can shrink this. 2023-05-04 19:31:41 +00:00			`void handleChatIdChanged();`
Much better memory mgmt for multi-threaded model loading/unloading. 2023-05-13 23:05:35 +00:00			`void handleShouldBeLoadedChanged();`
Major refactor in prep for multiple conversations. 2023-05-01 13:10:05 +00:00
			`Q_SIGNALS:`
			`void isModelLoadedChanged();`
Gracefully handle when we have a previous chat where the model that it used has gone away. 2023-05-09 00:51:03 +00:00			`void modelLoadingError(const QString &error);`
Major refactor in prep for multiple conversations. 2023-05-01 13:10:05 +00:00			`void responseChanged();`
			`void responseStarted();`
			`void responseStopped();`
			`void modelNameChanged();`
			`void recalcChanged();`
			`void sendStartup();`
			`void sendModelLoaded();`
			`void sendResetContext();`
Generate names via llm. 2023-05-02 15:19:17 +00:00			`void generatedNameChanged();`
First attempt at providing a persistent chat list experience. Limitations: 1) Context is not restored for gpt-j models 2) When you switch between different model types in an existing chat the context and all the conversation is lost 3) The settings are not chat or conversation specific 4) The sizes of the chat persisted files are very large due to how much data the llama.cpp backend tries to persist. Need to investigate how we can shrink this. 2023-05-04 19:31:41 +00:00			`void stateChanged();`
httpserver 2023-05-11 20:46:25 +00:00			`void threadStarted();`
Much better memory mgmt for multi-threaded model loading/unloading. 2023-05-13 23:05:35 +00:00			`void shouldBeLoadedChanged();`
httpserver 2023-05-11 20:46:25 +00:00
			`protected:`
			`void resetContextProtected();`
Major refactor in prep for multiple conversations. 2023-05-01 13:10:05 +00:00			`bool handlePrompt(int32_t token);`
			`bool handleResponse(int32_t token, const std::string &response);`
			`bool handleRecalculate(bool isRecalc);`
Generate names via llm. 2023-05-02 15:19:17 +00:00			`bool handleNamePrompt(int32_t token);`
			`bool handleNameResponse(int32_t token, const std::string &response);`
			`bool handleNameRecalculate(bool isRecalc);`
First attempt at providing a persistent chat list experience. Limitations: 1) Context is not restored for gpt-j models 2) When you switch between different model types in an existing chat the context and all the conversation is lost 3) The settings are not chat or conversation specific 4) The sizes of the chat persisted files are very large due to how much data the llama.cpp backend tries to persist. Need to investigate how we can shrink this. 2023-05-04 19:31:41 +00:00			`void saveState();`
			`void restoreState();`
Major refactor in prep for multiple conversations. 2023-05-01 13:10:05 +00:00
The server has different lifetime mgmt than the other chats. 2023-05-13 23:33:19 +00:00			`protected:`
			`LLModel::PromptContext m_ctx;`
			`quint32 m_promptTokens;`
			`quint32 m_promptResponseTokens;`
Much better memory mgmt for multi-threaded model loading/unloading. 2023-05-13 23:05:35 +00:00			`LLModelInfo m_modelInfo;`
			`LLModelType m_modelType;`
Major refactor in prep for multiple conversations. 2023-05-01 13:10:05 +00:00			`std::string m_response;`
Generate names via llm. 2023-05-02 15:19:17 +00:00			`std::string m_nameResponse;`
Major refactor in prep for multiple conversations. 2023-05-01 13:10:05 +00:00			`quint32 m_responseLogits;`
			`QString m_modelName;`
First attempt at providing a persistent chat list experience. Limitations: 1) Context is not restored for gpt-j models 2) When you switch between different model types in an existing chat the context and all the conversation is lost 3) The settings are not chat or conversation specific 4) The sizes of the chat persisted files are very large due to how much data the llama.cpp backend tries to persist. Need to investigate how we can shrink this. 2023-05-04 19:31:41 +00:00			`Chat *m_chat;`
			`QByteArray m_state;`
Major refactor in prep for multiple conversations. 2023-05-01 13:10:05 +00:00			`QThread m_llmThread;`
			`std::atomic<bool> m_stopGenerating;`
Much better memory mgmt for multi-threaded model loading/unloading. 2023-05-13 23:05:35 +00:00			`std::atomic<bool> m_shouldBeLoaded;`
Major refactor in prep for multiple conversations. 2023-05-01 13:10:05 +00:00			`bool m_isRecalc;`
The server has different lifetime mgmt than the other chats. 2023-05-13 23:33:19 +00:00			`bool m_isServer;`
Add large network icon background for chatgpt and server modes. 2023-05-15 18:08:08 +00:00			`bool m_isChatGPT;`
Major refactor in prep for multiple conversations. 2023-05-01 13:10:05 +00:00			`};`

			`#endif // CHATLLM_H`