diff --git a/README.md b/README.md index d680880..991b141 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ This is an app that let's you ask questions about any data source by leveraging - To run locally or deploy somewhere, execute `cp .env.template .env` and set credentials in the newly created `.env` file. Other options are manually setting of system environment variables, or storing them into `.streamlit/secrets.toml` when hosted via streamlit. - If you have credentials set like explained above, you can just hit `submit` in the authentication without reentering your credentials in the app. - To enable `Local Mode` (disabled for the demo) set `ENABLE_LOCAL_MODE` to `True` in `datachad/constants.py`. You need to have the model binaries downloaded and stored inside `./models/` -- Currently supported `Local Mode` OSS models are `GPT4All` and `LlamaCpp` +- Currently supported `Local Mode` OSS model is [GPT4all](https://gpt4all.io/models/ggml-gpt4all-j-v1.3-groovy.bin). To add more models update `datachad/models.py` - If you are running `Local Mode` all your data stays locally on your machine. No API calls are made. Same with the embeddings database which stores its data to `./data/` - Your data won't load? Feel free to open an Issue or PR and contribute! - Yes, Chad in `DataChad` refers to the well-known [meme](https://www.google.com/search?q=chad+meme) diff --git a/app.py b/app.py index 5d0270c..007e010 100644 --- a/app.py +++ b/app.py @@ -155,7 +155,7 @@ def advanced_options_form() -> None: # Sidebar with Authentication and Advanced Options with st.sidebar: - mode = st.selectbox("Mode", MODES.values(), key="mode") + mode = st.selectbox("Mode", MODES.all(), key="mode") if mode == MODES.LOCAL and not ENABLE_LOCAL_MODE: st.error(LOCAL_MODE_DISABLED_HELP, icon=PAGE_ICON) st.stop() diff --git a/datachad/constants.py b/datachad/constants.py index 714d996..1d275ac 100644 --- a/datachad/constants.py +++ b/datachad/constants.py @@ -13,13 +13,15 @@ TEMPERATURE = 0.7 MAX_TOKENS = 3357 MODEL_N_CTX = 1000 -ENABLE_LOCAL_MODE = False ENABLE_ADVANCED_OPTIONS = True +ENABLE_LOCAL_MODE = False +GPT4ALL_MODEL_PATH = "models/ggml-gpt4all-j-v1.3-groovy.bin" DATA_PATH = Path.cwd() / "data" DEFAULT_DATA_SOURCE = "https://github.com/gustavz/DataChad.git" +MODEL_HELP = "Learn more about which models are supported [here](https://github.com/gustavz/DataChad/blob/main/datachad/models.py)" LOCAL_MODE_DISABLED_HELP = """ This is a demo hosted with limited resources. Local Mode is not enabled.\n diff --git a/datachad/models.py b/datachad/models.py index 308ed28..fa8f561 100644 --- a/datachad/models.py +++ b/datachad/models.py @@ -7,18 +7,15 @@ from langchain.embeddings import HuggingFaceEmbeddings from langchain.embeddings.openai import Embeddings, OpenAIEmbeddings from langchain.llms import GPT4All, LlamaCpp +from datachad.constants import GPT4ALL_MODEL_PATH from datachad.utils import logger class Enum: @classmethod - def values(cls): + def all(cls): return [v for k, v in cls.__dict__.items() if not k.startswith("_")] - @classmethod - def dict(cls): - return {k: v for k, v in cls.__dict__.items() if not k.startswith("_")} - @dataclass class Model: @@ -32,28 +29,33 @@ class Model: class MODES(Enum): + # Add more modes as needed OPENAI = "OpenAI" LOCAL = "Local" class EMBEDDINGS(Enum): + # Add more embeddings as needed OPENAI = "openai" HUGGINGFACE = "all-MiniLM-L6-v2" class MODELS(Enum): - GPT35TURBO = Model("gpt-3.5-turbo", MODES.OPENAI, EMBEDDINGS.OPENAI) - GPT4 = Model("gpt-4", MODES.OPENAI, EMBEDDINGS.OPENAI) - LLAMACPP = Model( - "LLAMA", MODES.LOCAL, EMBEDDINGS.HUGGINGFACE, "models/llamacpp.bin" + # Add more models as needed + GPT35TURBO = Model( + name="gpt-3.5-turbo", mode=MODES.OPENAI, embedding=EMBEDDINGS.OPENAI ) + GPT4 = Model(name="gpt-4", mode=MODES.OPENAI, embedding=EMBEDDINGS.OPENAI) GPT4ALL = Model( - "GPT4All", MODES.LOCAL, EMBEDDINGS.HUGGINGFACE, "models/gpt4all.bin" + name="GPT4All", + mode=MODES.LOCAL, + embedding=EMBEDDINGS.HUGGINGFACE, + path=GPT4ALL_MODEL_PATH, ) @classmethod def for_mode(cls, mode): - return [v for v in cls.values() if isinstance(v, Model) and v.mode == mode] + return [m for m in cls.all() if isinstance(m, Model) and m.mode == mode] def get_model() -> BaseLanguageModel: @@ -70,13 +72,6 @@ def get_model() -> BaseLanguageModel: temperature=st.session_state["temperature"], openai_api_key=st.session_state["openai_api_key"], ) - case MODELS.LLAMACPP.name: - model = LlamaCpp( - model_path=st.session_state["model"].path, - n_ctx=st.session_state["model_n_ctx"], - temperature=st.session_state["temperature"], - verbose=True, - ) case MODELS.GPT4ALL.name: model = GPT4All( model=st.session_state["model"].path, @@ -85,7 +80,7 @@ def get_model() -> BaseLanguageModel: temp=st.session_state["temperature"], verbose=True, ) - # Add more models as needed + # Added models need to be cased here case _default: msg = f"Model {st.session_state['model']} not supported!" logger.error(msg) @@ -102,7 +97,7 @@ def get_embeddings() -> Embeddings: ) case EMBEDDINGS.HUGGINGFACE: embeddings = HuggingFaceEmbeddings(model_name=EMBEDDINGS.HUGGINGFACE) - # Add more embeddings as needed + # Added embeddings need to be cased here case _default: msg = f"Embeddings {st.session_state['embeddings']} not supported!" logger.error(msg)