switch to .env credentials

pull/1/head
Gustav von Zitzewitz 1 year ago
parent 0554c6a402
commit 1e20e055dd

@ -0,0 +1,3 @@
OPENAI_API_KEY = your openai key
ACTIVELOOP_TOKEN = your activeloop key
ACTIVELOOP_ORG_NAME = your activeloop organization name

3
.gitignore vendored

@ -1,3 +1,4 @@
data data
__pycache__ __pycache__
.streamlit/secrets.toml .streamlit/secrets.toml
.env

@ -1,3 +0,0 @@
OPENAI_API_KEY = "your openai key"
ACTIVELOOP_TOKEN = "your activeloop key"
ACTIVELOOP_ORG_NAME = "your activeloop organization name"

@ -15,9 +15,5 @@ This is an app that let's you ask questions about any data source by leveraging
## Good to know ## Good to know
- As default context this git repository is taken so you can directly start asking question about its functionality without chosing an own data source. - As default context this git repository is taken so you can directly start asking question about its functionality without chosing an own data source.
- To run locally or deploy somewhere, execute: - To run locally or deploy somewhere, execute `cp .env.template .env` and set necessary keys in the newly created secrets file. Another option is to manually set environment variables
```cp .streamlit/secret.toml.template .streamlit/secret.toml```
and set necessary keys in the newly created secrets file. Another option is to manually set environment variables
- Yes, Chad in `DataChad` refers to the well-known [meme](https://www.google.com/search?q=chad+meme) - Yes, Chad in `DataChad` refers to the well-known [meme](https://www.google.com/search?q=chad+meme)

@ -1,16 +1,19 @@
import streamlit as st import streamlit as st
from dotenv import load_dotenv
from streamlit_chat import message from streamlit_chat import message
from constants import APP_NAME, DEFAULT_DATA_SOURCE, PAGE_ICON from constants import APP_NAME, DEFAULT_DATA_SOURCE, PAGE_ICON
from utils import ( from utils import (
authenticate,
build_chain_and_clear_history,
delete_uploaded_file, delete_uploaded_file,
generate_response, generate_response,
save_uploaded_file,
build_chain_and_clear_history,
validate_keys,
logger, logger,
save_uploaded_file,
) )
load_dotenv()
# Page options and header # Page options and header
st.set_option("client.showErrorDetails", True) st.set_option("client.showErrorDetails", True)
@ -33,22 +36,25 @@ if "data_source" not in st.session_state:
st.session_state["data_source"] = "" st.session_state["data_source"] = ""
if "uploaded_file" not in st.session_state: if "uploaded_file" not in st.session_state:
st.session_state["uploaded_file"] = None st.session_state["uploaded_file"] = None
if "openai_api_key" not in st.session_state:
st.session_state["openai_api_key"] = None
if "activeloop_token" not in st.session_state:
st.session_state["activeloop_token"] = None
if "activeloop_org_name" not in st.session_state:
st.session_state["activeloop_org_name"] = None
# Sidebar # Sidebar
with st.sidebar: with st.sidebar:
st.title("Authentication") st.title("Authentication")
with st.form("authentication"): with st.form("authentication"):
openai_key = st.text_input("OpenAI API Key", type="password", key="openai_key") openai_api_key = st.text_input("OpenAI API Key", type="password")
activeloop_token = st.text_input( activeloop_token = st.text_input("ActiveLoop Token", type="password")
"ActiveLoop Token", type="password", key="activeloop_token"
)
activeloop_org_name = st.text_input( activeloop_org_name = st.text_input(
"ActiveLoop Organisation Name", type="password", key="activeloop_org_name" "ActiveLoop Organisation Name", type="password"
) )
submitted = st.form_submit_button("Submit") submitted = st.form_submit_button("Submit")
if submitted: if submitted:
validate_keys(openai_key, activeloop_token, activeloop_org_name) authenticate(openai_api_key, activeloop_token, activeloop_org_name)
if not st.session_state["auth_ok"]: if not st.session_state["auth_ok"]:
st.stop() st.stop()

@ -8,4 +8,5 @@ unstructured==0.6.5
pdf2image==1.16.3 pdf2image==1.16.3
pytesseract==0.3.10 pytesseract==0.3.10
beautifulsoup4==4.12.2 beautifulsoup4==4.12.2
bs4==0.0.1 bs4==0.0.1
python-dotenv==1.0.0

@ -1,11 +1,11 @@
import logging
import os import os
import re import re
import shutil
import logging
import sys import sys
import openai
import deeplake import deeplake
import shutil import openai
import streamlit as st import streamlit as st
from langchain.chains import ConversationalRetrievalChain from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI from langchain.chat_models import ChatOpenAI
@ -27,8 +27,7 @@ from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import DeepLake from langchain.vectorstores import DeepLake
from constants import DATA_PATH, MODEL, PAGE_ICON, APP_NAME from constants import APP_NAME, DATA_PATH, MODEL, PAGE_ICON
logger = logging.getLogger(APP_NAME) logger = logging.getLogger(APP_NAME)
@ -51,55 +50,36 @@ def configure_logger(debug=0):
configure_logger(0) configure_logger(0)
def validate_keys(openai_key, activeloop_token, activeloop_org_name): def authenticate(openai_api_key, activeloop_token, activeloop_org_name):
# Validate all API related variables are set and correct # Validate all credentials are set and correct
all_keys = [openai_key, activeloop_token, activeloop_org_name] # Check for env variables to enable local dev and deployments with shared credentials
if any(all_keys): openai_api_key = openai_api_key or os.environ.get("OPENAI_API_KEY")
if not all(all_keys): activeloop_token = activeloop_token or os.environ.get("ACTIVELOOP_TOKEN")
st.session_state["auth_ok"] = False activeloop_org_name = activeloop_org_name or os.environ.get("ACTIVELOOP_ORG_NAME")
st.error("You need to fill all fields", icon=PAGE_ICON) if not (openai_api_key and activeloop_token and activeloop_org_name):
st.stop() st.session_state["auth_ok"] = False
os.environ["OPENAI_API_KEY"] = openai_key st.error("Credentials neither set nor stored", icon=PAGE_ICON)
os.environ["ACTIVELOOP_TOKEN"] = activeloop_token st.stop()
os.environ["ACTIVELOOP_ORG_NAME"] = activeloop_org_name
else:
# Bypass for local development or deployments with stored credentials
# either env variables or streamlit secrets need to be set
try:
try:
assert os.environ.get("OPENAI_API_KEY")
assert os.environ.get("ACTIVELOOP_TOKEN")
assert os.environ.get("ACTIVELOOP_ORG_NAME")
except:
assert st.secrets.get("OPENAI_API_KEY")
assert st.secrets.get("ACTIVELOOP_TOKEN")
assert st.secrets.get("ACTIVELOOP_ORG_NAME")
os.environ["OPENAI_API_KEY"] = st.secrets.get("OPENAI_API_KEY")
os.environ["ACTIVELOOP_TOKEN"] = st.secrets.get("ACTIVELOOP_TOKEN")
os.environ["ACTIVELOOP_ORG_NAME"] = st.secrets.get(
"ACTIVELOOP_ORG_NAME"
)
except:
st.session_state["auth_ok"] = False
st.error("No credentials stored and nothing submitted", icon=PAGE_ICON)
st.stop()
try: try:
# Try to access openai and deeplake # Try to access openai and deeplake
with st.spinner("Authentifying..."): with st.spinner("Authentifying..."):
openai.api_key = os.environ["OPENAI_API_KEY"] openai.api_key = openai_api_key
openai.Model.list() openai.Model.list()
deeplake.exists( deeplake.exists(
f"hub://{os.environ['ACTIVELOOP_ORG_NAME']}/DataChad-Authentication-Check", f"hub://{activeloop_org_name}/DataChad-Authentication-Check",
token=activeloop_token,
) )
except Exception as e: except Exception as e:
logger.error(f"Authentication failed with {e}") logger.error(f"Authentication failed with {e}")
st.session_state["auth_ok"] = False st.session_state["auth_ok"] = False
st.error("Authentication failed", icon=PAGE_ICON) st.error("Authentication failed", icon=PAGE_ICON)
st.stop() st.stop()
# store credentials in the session state
logger.info("Authentification successful!")
st.session_state["auth_ok"] = True st.session_state["auth_ok"] = True
st.session_state["openai_api_key"] = openai_api_key
st.session_state["activeloop_token"] = activeloop_token
st.session_state["activeloop_org_name"] = activeloop_org_name
logger.info("Authentification successful!")
def save_uploaded_file(uploaded_file): def save_uploaded_file(uploaded_file):
@ -210,14 +190,19 @@ def clean_data_source_string(data_source):
def setup_vector_store(data_source): def setup_vector_store(data_source):
# either load existing vector store or upload a new one to the hub # either load existing vector store or upload a new one to the hub
embeddings = OpenAIEmbeddings(disallowed_special=()) embeddings = OpenAIEmbeddings(
disallowed_special=(), openai_api_key=st.session_state["openai_api_key"]
)
data_source_name = clean_data_source_string(data_source) data_source_name = clean_data_source_string(data_source)
dataset_path = f"hub://{os.environ['ACTIVELOOP_ORG_NAME']}/{data_source_name}" dataset_path = f"hub://{st.session_state['activeloop_org_name']}/{data_source_name}"
if deeplake.exists(dataset_path): if deeplake.exists(dataset_path, token=st.session_state["activeloop_token"]):
with st.spinner("Loading vector store..."): with st.spinner("Loading vector store..."):
logger.info(f"{dataset_path} exists -> loading") logger.info(f"{dataset_path} exists -> loading")
vector_store = DeepLake( vector_store = DeepLake(
dataset_path=dataset_path, read_only=True, embedding_function=embeddings dataset_path=dataset_path,
read_only=True,
embedding_function=embeddings,
token=st.session_state["activeloop_token"],
) )
else: else:
with st.spinner("Reading, embedding and uploading data to hub..."): with st.spinner("Reading, embedding and uploading data to hub..."):
@ -226,7 +211,8 @@ def setup_vector_store(data_source):
vector_store = DeepLake.from_documents( vector_store = DeepLake.from_documents(
docs, docs,
embeddings, embeddings,
dataset_path=f"hub://{os.environ['ACTIVELOOP_ORG_NAME']}/{data_source_name}", dataset_path=f"hub://{st.session_state['activeloop_org_name']}/{data_source_name}",
token=st.session_state["activeloop_token"],
) )
return vector_store return vector_store
@ -242,7 +228,9 @@ def get_chain(data_source):
"k": 10, "k": 10,
} }
retriever.search_kwargs.update(search_kwargs) retriever.search_kwargs.update(search_kwargs)
model = ChatOpenAI(model_name=MODEL) model = ChatOpenAI(
model_name=MODEL, openai_api_key=st.session_state["openai_api_key"]
)
with st.spinner("Building langchain..."): with st.spinner("Building langchain..."):
chain = ConversationalRetrievalChain.from_llm( chain = ConversationalRetrievalChain.from_llm(
model, model,

Loading…
Cancel
Save