switch to .env credentials

pull/1/head
Gustav von Zitzewitz 1 year ago
parent 0554c6a402
commit 1e20e055dd

@ -0,0 +1,3 @@
OPENAI_API_KEY = your openai key
ACTIVELOOP_TOKEN = your activeloop key
ACTIVELOOP_ORG_NAME = your activeloop organization name

3
.gitignore vendored

@ -1,3 +1,4 @@
data
__pycache__
.streamlit/secrets.toml
.streamlit/secrets.toml
.env

@ -1,3 +0,0 @@
OPENAI_API_KEY = "your openai key"
ACTIVELOOP_TOKEN = "your activeloop key"
ACTIVELOOP_ORG_NAME = "your activeloop organization name"

@ -15,9 +15,5 @@ This is an app that let's you ask questions about any data source by leveraging
## Good to know
- As default context this git repository is taken so you can directly start asking question about its functionality without chosing an own data source.
- To run locally or deploy somewhere, execute:
```cp .streamlit/secret.toml.template .streamlit/secret.toml```
and set necessary keys in the newly created secrets file. Another option is to manually set environment variables
- To run locally or deploy somewhere, execute `cp .env.template .env` and set necessary keys in the newly created secrets file. Another option is to manually set environment variables
- Yes, Chad in `DataChad` refers to the well-known [meme](https://www.google.com/search?q=chad+meme)

@ -1,16 +1,19 @@
import streamlit as st
from dotenv import load_dotenv
from streamlit_chat import message
from constants import APP_NAME, DEFAULT_DATA_SOURCE, PAGE_ICON
from utils import (
authenticate,
build_chain_and_clear_history,
delete_uploaded_file,
generate_response,
save_uploaded_file,
build_chain_and_clear_history,
validate_keys,
logger,
save_uploaded_file,
)
load_dotenv()
# Page options and header
st.set_option("client.showErrorDetails", True)
@ -33,22 +36,25 @@ if "data_source" not in st.session_state:
st.session_state["data_source"] = ""
if "uploaded_file" not in st.session_state:
st.session_state["uploaded_file"] = None
if "openai_api_key" not in st.session_state:
st.session_state["openai_api_key"] = None
if "activeloop_token" not in st.session_state:
st.session_state["activeloop_token"] = None
if "activeloop_org_name" not in st.session_state:
st.session_state["activeloop_org_name"] = None
# Sidebar
with st.sidebar:
st.title("Authentication")
with st.form("authentication"):
openai_key = st.text_input("OpenAI API Key", type="password", key="openai_key")
activeloop_token = st.text_input(
"ActiveLoop Token", type="password", key="activeloop_token"
)
openai_api_key = st.text_input("OpenAI API Key", type="password")
activeloop_token = st.text_input("ActiveLoop Token", type="password")
activeloop_org_name = st.text_input(
"ActiveLoop Organisation Name", type="password", key="activeloop_org_name"
"ActiveLoop Organisation Name", type="password"
)
submitted = st.form_submit_button("Submit")
if submitted:
validate_keys(openai_key, activeloop_token, activeloop_org_name)
authenticate(openai_api_key, activeloop_token, activeloop_org_name)
if not st.session_state["auth_ok"]:
st.stop()

@ -8,4 +8,5 @@ unstructured==0.6.5
pdf2image==1.16.3
pytesseract==0.3.10
beautifulsoup4==4.12.2
bs4==0.0.1
bs4==0.0.1
python-dotenv==1.0.0

@ -1,11 +1,11 @@
import logging
import os
import re
import logging
import shutil
import sys
import openai
import deeplake
import shutil
import openai
import streamlit as st
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
@ -27,8 +27,7 @@ from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import DeepLake
from constants import DATA_PATH, MODEL, PAGE_ICON, APP_NAME
from constants import APP_NAME, DATA_PATH, MODEL, PAGE_ICON
logger = logging.getLogger(APP_NAME)
@ -51,55 +50,36 @@ def configure_logger(debug=0):
configure_logger(0)
def validate_keys(openai_key, activeloop_token, activeloop_org_name):
# Validate all API related variables are set and correct
all_keys = [openai_key, activeloop_token, activeloop_org_name]
if any(all_keys):
if not all(all_keys):
st.session_state["auth_ok"] = False
st.error("You need to fill all fields", icon=PAGE_ICON)
st.stop()
os.environ["OPENAI_API_KEY"] = openai_key
os.environ["ACTIVELOOP_TOKEN"] = activeloop_token
os.environ["ACTIVELOOP_ORG_NAME"] = activeloop_org_name
else:
# Bypass for local development or deployments with stored credentials
# either env variables or streamlit secrets need to be set
try:
try:
assert os.environ.get("OPENAI_API_KEY")
assert os.environ.get("ACTIVELOOP_TOKEN")
assert os.environ.get("ACTIVELOOP_ORG_NAME")
except:
assert st.secrets.get("OPENAI_API_KEY")
assert st.secrets.get("ACTIVELOOP_TOKEN")
assert st.secrets.get("ACTIVELOOP_ORG_NAME")
os.environ["OPENAI_API_KEY"] = st.secrets.get("OPENAI_API_KEY")
os.environ["ACTIVELOOP_TOKEN"] = st.secrets.get("ACTIVELOOP_TOKEN")
os.environ["ACTIVELOOP_ORG_NAME"] = st.secrets.get(
"ACTIVELOOP_ORG_NAME"
)
except:
st.session_state["auth_ok"] = False
st.error("No credentials stored and nothing submitted", icon=PAGE_ICON)
st.stop()
def authenticate(openai_api_key, activeloop_token, activeloop_org_name):
# Validate all credentials are set and correct
# Check for env variables to enable local dev and deployments with shared credentials
openai_api_key = openai_api_key or os.environ.get("OPENAI_API_KEY")
activeloop_token = activeloop_token or os.environ.get("ACTIVELOOP_TOKEN")
activeloop_org_name = activeloop_org_name or os.environ.get("ACTIVELOOP_ORG_NAME")
if not (openai_api_key and activeloop_token and activeloop_org_name):
st.session_state["auth_ok"] = False
st.error("Credentials neither set nor stored", icon=PAGE_ICON)
st.stop()
try:
# Try to access openai and deeplake
with st.spinner("Authentifying..."):
openai.api_key = os.environ["OPENAI_API_KEY"]
openai.api_key = openai_api_key
openai.Model.list()
deeplake.exists(
f"hub://{os.environ['ACTIVELOOP_ORG_NAME']}/DataChad-Authentication-Check",
f"hub://{activeloop_org_name}/DataChad-Authentication-Check",
token=activeloop_token,
)
except Exception as e:
logger.error(f"Authentication failed with {e}")
st.session_state["auth_ok"] = False
st.error("Authentication failed", icon=PAGE_ICON)
st.stop()
logger.info("Authentification successful!")
# store credentials in the session state
st.session_state["auth_ok"] = True
st.session_state["openai_api_key"] = openai_api_key
st.session_state["activeloop_token"] = activeloop_token
st.session_state["activeloop_org_name"] = activeloop_org_name
logger.info("Authentification successful!")
def save_uploaded_file(uploaded_file):
@ -210,14 +190,19 @@ def clean_data_source_string(data_source):
def setup_vector_store(data_source):
# either load existing vector store or upload a new one to the hub
embeddings = OpenAIEmbeddings(disallowed_special=())
embeddings = OpenAIEmbeddings(
disallowed_special=(), openai_api_key=st.session_state["openai_api_key"]
)
data_source_name = clean_data_source_string(data_source)
dataset_path = f"hub://{os.environ['ACTIVELOOP_ORG_NAME']}/{data_source_name}"
if deeplake.exists(dataset_path):
dataset_path = f"hub://{st.session_state['activeloop_org_name']}/{data_source_name}"
if deeplake.exists(dataset_path, token=st.session_state["activeloop_token"]):
with st.spinner("Loading vector store..."):
logger.info(f"{dataset_path} exists -> loading")
vector_store = DeepLake(
dataset_path=dataset_path, read_only=True, embedding_function=embeddings
dataset_path=dataset_path,
read_only=True,
embedding_function=embeddings,
token=st.session_state["activeloop_token"],
)
else:
with st.spinner("Reading, embedding and uploading data to hub..."):
@ -226,7 +211,8 @@ def setup_vector_store(data_source):
vector_store = DeepLake.from_documents(
docs,
embeddings,
dataset_path=f"hub://{os.environ['ACTIVELOOP_ORG_NAME']}/{data_source_name}",
dataset_path=f"hub://{st.session_state['activeloop_org_name']}/{data_source_name}",
token=st.session_state["activeloop_token"],
)
return vector_store
@ -242,7 +228,9 @@ def get_chain(data_source):
"k": 10,
}
retriever.search_kwargs.update(search_kwargs)
model = ChatOpenAI(model_name=MODEL)
model = ChatOpenAI(
model_name=MODEL, openai_api_key=st.session_state["openai_api_key"]
)
with st.spinner("Building langchain..."):
chain = ConversationalRetrievalChain.from_llm(
model,

Loading…
Cancel
Save