diff --git a/README.md b/README.md index a47416b..62e4a91 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,6 @@ This is an app that let's you ask questions about any data source by leveraging ## Good to know - As default context this git repository is taken so you can directly start asking question about its functionality without chosing an own data source. -- To run locally or deploy somewhere, execute `cp .env.template .env` and set necessary keys in the newly created secrets file. Another option is to manually set environment variables -- your file won't load? Feel free to open an Issue or PR and contribute! +- To run locally or deploy somewhere, execute `cp .env.template .env` and set necessary keys in the newly created secrets file. Other options are manually setting of environment variables, or creating a `.streamlit/secrets.toml` file and storing credentials there. +- Your data won't load? Feel free to open an Issue or PR and contribute! - Finally, yes, Chad in `DataChad` refers to the well-known [meme](https://www.google.com/search?q=chad+meme) diff --git a/app.py b/app.py index d192082..24b28ee 100644 --- a/app.py +++ b/app.py @@ -2,7 +2,13 @@ import streamlit as st from dotenv import load_dotenv from streamlit_chat import message -from constants import APP_NAME, DEFAULT_DATA_SOURCE, PAGE_ICON +from constants import ( + ACTIVELOOP_HELP, + APP_NAME, + DEFAULT_DATA_SOURCE, + OPENAI_HELP, + PAGE_ICON, +) from utils import ( authenticate, build_chain_and_clear_history, @@ -47,15 +53,29 @@ if "activeloop_org_name" not in st.session_state: with st.sidebar: st.title("Authentication") with st.form("authentication"): - openai_api_key = st.text_input("OpenAI API Key", type="password") - activeloop_token = st.text_input("ActiveLoop Token", type="password") + openai_api_key = st.text_input( + "OpenAI API Key", + type="password", + help=OPENAI_HELP, + placeholder="This field is mandatory", + ) + activeloop_token = st.text_input( + "ActiveLoop Token", + type="password", + help=ACTIVELOOP_HELP, + placeholder="Optional, using ours if emtpty", + ) activeloop_org_name = st.text_input( - "ActiveLoop Organisation Name", type="password" + "ActiveLoop Organisation Name", + type="password", + help=ACTIVELOOP_HELP, + placeholder="Optional, using ours if emtpty", ) submitted = st.form_submit_button("Submit") if submitted: authenticate(openai_api_key, activeloop_token, activeloop_org_name) + st.info("Learn how it works [here](https://github.com/gustavz/DataChad)") if not st.session_state["auth_ok"]: st.stop() @@ -81,12 +101,12 @@ data_source = st.text_input( # generate new chain for new data source / uploaded file # make sure to do this only once per input / on change if data_source and data_source != st.session_state["data_source"]: - logger.info(f"data source provided: '{data_source}'") + logger.info(f"Data source provided: '{data_source}'") build_chain_and_clear_history(data_source) st.session_state["data_source"] = data_source if uploaded_file and uploaded_file != st.session_state["uploaded_file"]: - logger.info(f"uploaded file: '{uploaded_file.name}'") + logger.info(f"Uploaded file: '{uploaded_file.name}'") data_source = save_uploaded_file(uploaded_file) build_chain_and_clear_history(data_source) delete_uploaded_file(uploaded_file) diff --git a/constants.py b/constants.py index 9b41ad3..8146b30 100644 --- a/constants.py +++ b/constants.py @@ -6,3 +6,14 @@ PAGE_ICON = "🤖" DATA_PATH = Path.cwd() / "data" DEFAULT_DATA_SOURCE = "git@github.com:gustavz/DataChad.git" + +OPENAI_HELP = """ +You can sign-up for OpenAI's API [here](https://openai.com/blog/openai-api).\n +Once you are logged in, you find the API keys [here](https://platform.openai.com/account/api-keys) +""" + +ACTIVELOOP_HELP = """ +You can create an ActiveLoops account (including 500GB of free database storage) [here](https://www.activeloop.ai/).\n +Once you are logged in, you find the API token [here](https://app.activeloop.ai/profile/gustavz/apitoken).\n +The organisation name is your username, or you can create new organisations [here](https://app.activeloop.ai/organization/new/create) +""" diff --git a/utils.py b/utils.py index 744ac31..5ec2963 100644 --- a/utils.py +++ b/utils.py @@ -54,8 +54,16 @@ def authenticate(openai_api_key, activeloop_token, activeloop_org_name): # Validate all credentials are set and correct # Check for env variables to enable local dev and deployments with shared credentials openai_api_key = openai_api_key or os.environ.get("OPENAI_API_KEY") - activeloop_token = activeloop_token or os.environ.get("ACTIVELOOP_TOKEN") - activeloop_org_name = activeloop_org_name or os.environ.get("ACTIVELOOP_ORG_NAME") + activeloop_token = ( + activeloop_token + or os.environ.get("ACTIVELOOP_TOKEN") + or st.secrets.get("ACTIVELOOP_TOKEN") + ) + activeloop_org_name = ( + activeloop_org_name + or os.environ.get("ACTIVELOOP_ORG_NAME") + or st.secrets.get("ACTIVELOOP_ORG_NAME") + ) if not (openai_api_key and activeloop_token and activeloop_org_name): st.session_state["auth_ok"] = False st.error("Credentials neither set nor stored", icon=PAGE_ICON) @@ -93,7 +101,7 @@ def save_uploaded_file(uploaded_file): file = open(file_path, "wb") file.write(file_bytes) file.close() - logger.info(f"saved {file_path}") + logger.info(f"Saved {file_path}") return file_path @@ -102,7 +110,7 @@ def delete_uploaded_file(uploaded_file): file_path = DATA_PATH / uploaded_file.name if os.path.exists(DATA_PATH): os.remove(file_path) - logger.info(f"removed {file_path}") + logger.info(f"Removed {file_path}") def load_git(data_source): @@ -171,7 +179,7 @@ def load_any_data_source(data_source): if loader: text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0) docs = loader.load_and_split(text_splitter) - logger.info(f"loaded {len(docs)} document chucks") + logger.info(f"Loaded {len(docs)} document chucks") return docs error_msg = f"Failed to load {data_source}"