switch from print to logger

pull/1/head
Gustav von Zitzewitz 1 year ago
parent 954a2a5859
commit 0554c6a402

@ -8,6 +8,7 @@ from utils import (
save_uploaded_file, save_uploaded_file,
build_chain_and_clear_history, build_chain_and_clear_history,
validate_keys, validate_keys,
logger,
) )
@ -74,12 +75,12 @@ data_source = st.text_input(
# generate new chain for new data source / uploaded file # generate new chain for new data source / uploaded file
# make sure to do this only once per input / on change # make sure to do this only once per input / on change
if data_source and data_source != st.session_state["data_source"]: if data_source and data_source != st.session_state["data_source"]:
print(f"data source provided: '{data_source}'") logger.info(f"data source provided: '{data_source}'")
build_chain_and_clear_history(data_source) build_chain_and_clear_history(data_source)
st.session_state["data_source"] = data_source st.session_state["data_source"] = data_source
if uploaded_file and uploaded_file != st.session_state["uploaded_file"]: if uploaded_file and uploaded_file != st.session_state["uploaded_file"]:
print(f"uploaded file: '{uploaded_file.name}'") logger.info(f"uploaded file: '{uploaded_file.name}'")
data_source = save_uploaded_file(uploaded_file) data_source = save_uploaded_file(uploaded_file)
build_chain_and_clear_history(data_source) build_chain_and_clear_history(data_source)
delete_uploaded_file(uploaded_file) delete_uploaded_file(uploaded_file)

@ -1,6 +1,8 @@
import os import os
import re import re
import logging
import sys
import openai import openai
import deeplake import deeplake
import shutil import shutil
@ -25,14 +27,34 @@ from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import DeepLake from langchain.vectorstores import DeepLake
from constants import DATA_PATH, MODEL, PAGE_ICON from constants import DATA_PATH, MODEL, PAGE_ICON, APP_NAME
logger = logging.getLogger(APP_NAME)
def configure_logger(debug=0):
log_level = logging.DEBUG if debug == 1 else logging.INFO
logger.setLevel(log_level)
stream_handler = logging.StreamHandler(stream=sys.stdout)
stream_handler.setLevel(log_level)
formatter = logging.Formatter("%(message)s")
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)
logger.propagate = False
configure_logger(0)
def validate_keys(openai_key, activeloop_token, activeloop_org_name): def validate_keys(openai_key, activeloop_token, activeloop_org_name):
# Validate all API related variables are set and correct # Validate all API related variables are set and correct
all_keys = [openai_key, activeloop_token, activeloop_org_name] all_keys = [openai_key, activeloop_token, activeloop_org_name]
if any(all_keys): if any(all_keys):
print(f"{openai_key=}\n{activeloop_token=}\n{activeloop_org_name=}")
if not all(all_keys): if not all(all_keys):
st.session_state["auth_ok"] = False st.session_state["auth_ok"] = False
st.error("You need to fill all fields", icon=PAGE_ICON) st.error("You need to fill all fields", icon=PAGE_ICON)
@ -44,31 +66,39 @@ def validate_keys(openai_key, activeloop_token, activeloop_org_name):
# Bypass for local development or deployments with stored credentials # Bypass for local development or deployments with stored credentials
# either env variables or streamlit secrets need to be set # either env variables or streamlit secrets need to be set
try: try:
assert os.environ.get("OPENAI_API_KEY") try:
assert os.environ.get("ACTIVELOOP_TOKEN") assert os.environ.get("OPENAI_API_KEY")
assert os.environ.get("ACTIVELOOP_ORG_NAME") assert os.environ.get("ACTIVELOOP_TOKEN")
except: assert os.environ.get("ACTIVELOOP_ORG_NAME")
assert st.secrets.get("OPENAI_API_KEY") except:
assert st.secrets.get("ACTIVELOOP_TOKEN") assert st.secrets.get("OPENAI_API_KEY")
assert st.secrets.get("ACTIVELOOP_ORG_NAME") assert st.secrets.get("ACTIVELOOP_TOKEN")
assert st.secrets.get("ACTIVELOOP_ORG_NAME")
os.environ["OPENAI_API_KEY"] = st.secrets.get("OPENAI_API_KEY") os.environ["OPENAI_API_KEY"] = st.secrets.get("OPENAI_API_KEY")
os.environ["ACTIVELOOP_TOKEN"] = st.secrets.get("ACTIVELOOP_TOKEN") os.environ["ACTIVELOOP_TOKEN"] = st.secrets.get("ACTIVELOOP_TOKEN")
os.environ["ACTIVELOOP_ORG_NAME"] = st.secrets.get("ACTIVELOOP_ORG_NAME") os.environ["ACTIVELOOP_ORG_NAME"] = st.secrets.get(
"ACTIVELOOP_ORG_NAME"
)
except:
st.session_state["auth_ok"] = False
st.error("No credentials stored and nothing submitted", icon=PAGE_ICON)
st.stop()
try: try:
# Try to access openai and deeplake # Try to access openai and deeplake
with st.spinner("Authentifying..."): with st.spinner("Authentifying..."):
openai.api_key = os.environ["OPENAI_API_KEY"]
openai.Model.list() openai.Model.list()
deeplake.exists( deeplake.exists(
f"hub://{os.environ['ACTIVELOOP_ORG_NAME']}/DataChad-Authentication-Check", f"hub://{os.environ['ACTIVELOOP_ORG_NAME']}/DataChad-Authentication-Check",
) )
except Exception as e: except Exception as e:
print(f"Authentication failed with {e}") logger.error(f"Authentication failed with {e}")
st.session_state["auth_ok"] = False st.session_state["auth_ok"] = False
st.error("Authentication failed", icon=PAGE_ICON) st.error("Authentication failed", icon=PAGE_ICON)
st.stop() st.stop()
print("Authentification successful!") logger.info("Authentification successful!")
st.session_state["auth_ok"] = True st.session_state["auth_ok"] = True
@ -83,7 +113,7 @@ def save_uploaded_file(uploaded_file):
file = open(file_path, "wb") file = open(file_path, "wb")
file.write(file_bytes) file.write(file_bytes)
file.close() file.close()
print(f"saved {file_path}") logger.info(f"saved {file_path}")
return file_path return file_path
@ -92,7 +122,7 @@ def delete_uploaded_file(uploaded_file):
file_path = DATA_PATH / uploaded_file.name file_path = DATA_PATH / uploaded_file.name
if os.path.exists(DATA_PATH): if os.path.exists(DATA_PATH):
os.remove(file_path) os.remove(file_path)
print(f"removed {file_path}") logger.info(f"removed {file_path}")
def load_git(data_source): def load_git(data_source):
@ -110,7 +140,7 @@ def load_git(data_source):
) )
break break
except Exception as e: except Exception as e:
print(f"error loading git: {e}") logger.error(f"error loading git: {e}")
if os.path.exists(repo_path): if os.path.exists(repo_path):
# cleanup repo afterwards # cleanup repo afterwards
shutil.rmtree(repo_path) shutil.rmtree(repo_path)
@ -161,12 +191,12 @@ def load_any_data_source(data_source):
if loader: if loader:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0) text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = loader.load_and_split(text_splitter) docs = loader.load_and_split(text_splitter)
print(f"loaded {len(docs)} document chucks") logger.info(f"loaded {len(docs)} document chucks")
return docs return docs
error_msg = f"Failed to load {data_source}" error_msg = f"Failed to load {data_source}"
st.error(error_msg, icon=PAGE_ICON) st.error(error_msg, icon=PAGE_ICON)
print(error_msg) logger.info(error_msg)
st.stop() st.stop()
@ -185,13 +215,13 @@ def setup_vector_store(data_source):
dataset_path = f"hub://{os.environ['ACTIVELOOP_ORG_NAME']}/{data_source_name}" dataset_path = f"hub://{os.environ['ACTIVELOOP_ORG_NAME']}/{data_source_name}"
if deeplake.exists(dataset_path): if deeplake.exists(dataset_path):
with st.spinner("Loading vector store..."): with st.spinner("Loading vector store..."):
print(f"{dataset_path} exists -> loading") logger.info(f"{dataset_path} exists -> loading")
vector_store = DeepLake( vector_store = DeepLake(
dataset_path=dataset_path, read_only=True, embedding_function=embeddings dataset_path=dataset_path, read_only=True, embedding_function=embeddings
) )
else: else:
with st.spinner("Reading, embedding and uploading data to hub..."): with st.spinner("Reading, embedding and uploading data to hub..."):
print(f"{dataset_path} does not exist -> uploading") logger.info(f"{dataset_path} does not exist -> uploading")
docs = load_any_data_source(data_source) docs = load_any_data_source(data_source)
vector_store = DeepLake.from_documents( vector_store = DeepLake.from_documents(
docs, docs,
@ -221,7 +251,7 @@ def get_chain(data_source):
verbose=True, verbose=True,
max_tokens_limit=3375, max_tokens_limit=3375,
) )
print(f"{data_source} is ready to go!") logger.info(f"{data_source} is ready to go!")
return chain return chain
@ -238,6 +268,6 @@ def generate_response(prompt):
response = st.session_state["chain"]( response = st.session_state["chain"](
{"question": prompt, "chat_history": st.session_state["chat_history"]} {"question": prompt, "chat_history": st.session_state["chat_history"]}
) )
print(f"{response=}") logger.info(f"{response=}")
st.session_state["chat_history"].append((prompt, response["answer"])) st.session_state["chat_history"].append((prompt, response["answer"]))
return response["answer"] return response["answer"]

Loading…
Cancel
Save