remove llamacpp

12 months ago · 91743069a9
parent 14e4db82b5
commit 91743069a9
4 changed files with 20 additions and 23 deletions
--- a/README.md
+++ b/README.md
@ -18,7 +18,7 @@ This is an app that let's you ask questions about any data source by leveraging
 - To run locally or deploy somewhere, execute `cp .env.template .env` and set credentials in the newly created `.env` file. Other options are manually setting of system environment variables, or storing them into `.streamlit/secrets.toml` when hosted via streamlit.
 - If you have credentials set like explained above, you can just hit `submit` in the authentication without reentering your credentials in the app.
 - To enable `Local Mode` (disabled for the demo) set `ENABLE_LOCAL_MODE` to `True` in `datachad/constants.py`. You need to have the model binaries downloaded and stored inside `./models/`
- Currently supported `Local Mode` OSS models are `GPT4All` and `LlamaCpp`
+- Currently supported `Local Mode` OSS model is [GPT4all](https://gpt4all.io/models/ggml-gpt4all-j-v1.3-groovy.bin). To add more models update `datachad/models.py`
 - If you are running `Local Mode` all your data stays locally on your machine. No API calls are made. Same with the embeddings database which stores its data to `./data/`
 - Your data won't load? Feel free to open an Issue or PR and contribute!
 - Yes, Chad in `DataChad` refers to the well-known [meme](https://www.google.com/search?q=chad+meme)
--- a/app.py
+++ b/app.py
@ -155,7 +155,7 @@ def advanced_options_form() -> None:

 # Sidebar with Authentication and Advanced Options
 with st.sidebar:
-    mode = st.selectbox("Mode", MODES.values(), key="mode")
+    mode = st.selectbox("Mode", MODES.all(), key="mode")
    if mode == MODES.LOCAL and not ENABLE_LOCAL_MODE:
        st.error(LOCAL_MODE_DISABLED_HELP, icon=PAGE_ICON)
        st.stop()
--- a/datachad/constants.py
+++ b/datachad/constants.py
@ -13,13 +13,15 @@ TEMPERATURE = 0.7
 MAX_TOKENS = 3357
 MODEL_N_CTX = 1000

-ENABLE_LOCAL_MODE = False
 ENABLE_ADVANCED_OPTIONS = True
+ENABLE_LOCAL_MODE = False

+GPT4ALL_MODEL_PATH = "models/ggml-gpt4all-j-v1.3-groovy.bin"

 DATA_PATH = Path.cwd() / "data"
 DEFAULT_DATA_SOURCE = "https://github.com/gustavz/DataChad.git"

+MODEL_HELP = "Learn more about which models are supported [here](https://github.com/gustavz/DataChad/blob/main/datachad/models.py)"

 LOCAL_MODE_DISABLED_HELP = """
 This is a demo hosted with limited resources. Local Mode is not enabled.\n
--- a/datachad/models.py
+++ b/datachad/models.py
@ -7,18 +7,15 @@ from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.embeddings.openai import Embeddings, OpenAIEmbeddings
 from langchain.llms import GPT4All, LlamaCpp

+from datachad.constants import GPT4ALL_MODEL_PATH
 from datachad.utils import logger


 class Enum:
    @classmethod
-    def values(cls):
+    def all(cls):
        return [v for k, v in cls.__dict__.items() if not k.startswith("_")]

-    @classmethod
-    def dict(cls):
-        return {k: v for k, v in cls.__dict__.items() if not k.startswith("_")}
-

@dataclass
 class Model:
@ -32,28 +29,33 @@ class Model:


 class MODES(Enum):
+    # Add more modes as needed
    OPENAI = "OpenAI"
    LOCAL = "Local"


 class EMBEDDINGS(Enum):
+    # Add more embeddings as needed
    OPENAI = "openai"
    HUGGINGFACE = "all-MiniLM-L6-v2"


 class MODELS(Enum):
-    GPT35TURBO = Model("gpt-3.5-turbo", MODES.OPENAI, EMBEDDINGS.OPENAI)
-    GPT4 = Model("gpt-4", MODES.OPENAI, EMBEDDINGS.OPENAI)
-    LLAMACPP = Model(
-        "LLAMA", MODES.LOCAL, EMBEDDINGS.HUGGINGFACE, "models/llamacpp.bin"
+    # Add more models as needed
+    GPT35TURBO = Model(
+        name="gpt-3.5-turbo", mode=MODES.OPENAI, embedding=EMBEDDINGS.OPENAI
    )
+    GPT4 = Model(name="gpt-4", mode=MODES.OPENAI, embedding=EMBEDDINGS.OPENAI)
    GPT4ALL = Model(
-        "GPT4All", MODES.LOCAL, EMBEDDINGS.HUGGINGFACE, "models/gpt4all.bin"
+        name="GPT4All",
+        mode=MODES.LOCAL,
+        embedding=EMBEDDINGS.HUGGINGFACE,
+        path=GPT4ALL_MODEL_PATH,
    )

    @classmethod
    def for_mode(cls, mode):
-        return [v for v in cls.values() if isinstance(v, Model) and v.mode == mode]
+        return [m for m in cls.all() if isinstance(m, Model) and m.mode == mode]


 def get_model() -> BaseLanguageModel:
@ -70,13 +72,6 @@ def get_model() -> BaseLanguageModel:
                temperature=st.session_state["temperature"],
                openai_api_key=st.session_state["openai_api_key"],
            )
-        case MODELS.LLAMACPP.name:
-            model = LlamaCpp(
-                model_path=st.session_state["model"].path,
-                n_ctx=st.session_state["model_n_ctx"],
-                temperature=st.session_state["temperature"],
-                verbose=True,
-            )
        case MODELS.GPT4ALL.name:
            model = GPT4All(
                model=st.session_state["model"].path,
@ -85,7 +80,7 @@ def get_model() -> BaseLanguageModel:
                temp=st.session_state["temperature"],
                verbose=True,
            )
-        # Add more models as needed
+        # Added models need to be cased here
        case _default:
            msg = f"Model {st.session_state['model']} not supported!"
            logger.error(msg)
@ -102,7 +97,7 @@ def get_embeddings() -> Embeddings:
            )
        case EMBEDDINGS.HUGGINGFACE:
            embeddings = HuggingFaceEmbeddings(model_name=EMBEDDINGS.HUGGINGFACE)
-        # Add more embeddings as needed
+        # Added embeddings need to be cased here
        case _default:
            msg = f"Embeddings {st.session_state['embeddings']} not supported!"
            logger.error(msg)