switch from print to logger

pull/1/head
Gustav von Zitzewitz 1 year ago
parent 954a2a5859
commit 0554c6a402

@ -8,6 +8,7 @@ from utils import (
save_uploaded_file,
build_chain_and_clear_history,
validate_keys,
logger,
)
@ -74,12 +75,12 @@ data_source = st.text_input(
# generate new chain for new data source / uploaded file
# make sure to do this only once per input / on change
if data_source and data_source != st.session_state["data_source"]:
print(f"data source provided: '{data_source}'")
logger.info(f"data source provided: '{data_source}'")
build_chain_and_clear_history(data_source)
st.session_state["data_source"] = data_source
if uploaded_file and uploaded_file != st.session_state["uploaded_file"]:
print(f"uploaded file: '{uploaded_file.name}'")
logger.info(f"uploaded file: '{uploaded_file.name}'")
data_source = save_uploaded_file(uploaded_file)
build_chain_and_clear_history(data_source)
delete_uploaded_file(uploaded_file)

@ -1,6 +1,8 @@
import os
import re
import logging
import sys
import openai
import deeplake
import shutil
@ -25,14 +27,34 @@ from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import DeepLake
from constants import DATA_PATH, MODEL, PAGE_ICON
from constants import DATA_PATH, MODEL, PAGE_ICON, APP_NAME
logger = logging.getLogger(APP_NAME)
def configure_logger(debug=0):
log_level = logging.DEBUG if debug == 1 else logging.INFO
logger.setLevel(log_level)
stream_handler = logging.StreamHandler(stream=sys.stdout)
stream_handler.setLevel(log_level)
formatter = logging.Formatter("%(message)s")
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)
logger.propagate = False
configure_logger(0)
def validate_keys(openai_key, activeloop_token, activeloop_org_name):
# Validate all API related variables are set and correct
all_keys = [openai_key, activeloop_token, activeloop_org_name]
if any(all_keys):
print(f"{openai_key=}\n{activeloop_token=}\n{activeloop_org_name=}")
if not all(all_keys):
st.session_state["auth_ok"] = False
st.error("You need to fill all fields", icon=PAGE_ICON)
@ -44,31 +66,39 @@ def validate_keys(openai_key, activeloop_token, activeloop_org_name):
# Bypass for local development or deployments with stored credentials
# either env variables or streamlit secrets need to be set
try:
assert os.environ.get("OPENAI_API_KEY")
assert os.environ.get("ACTIVELOOP_TOKEN")
assert os.environ.get("ACTIVELOOP_ORG_NAME")
except:
assert st.secrets.get("OPENAI_API_KEY")
assert st.secrets.get("ACTIVELOOP_TOKEN")
assert st.secrets.get("ACTIVELOOP_ORG_NAME")
try:
assert os.environ.get("OPENAI_API_KEY")
assert os.environ.get("ACTIVELOOP_TOKEN")
assert os.environ.get("ACTIVELOOP_ORG_NAME")
except:
assert st.secrets.get("OPENAI_API_KEY")
assert st.secrets.get("ACTIVELOOP_TOKEN")
assert st.secrets.get("ACTIVELOOP_ORG_NAME")
os.environ["OPENAI_API_KEY"] = st.secrets.get("OPENAI_API_KEY")
os.environ["ACTIVELOOP_TOKEN"] = st.secrets.get("ACTIVELOOP_TOKEN")
os.environ["ACTIVELOOP_ORG_NAME"] = st.secrets.get("ACTIVELOOP_ORG_NAME")
os.environ["OPENAI_API_KEY"] = st.secrets.get("OPENAI_API_KEY")
os.environ["ACTIVELOOP_TOKEN"] = st.secrets.get("ACTIVELOOP_TOKEN")
os.environ["ACTIVELOOP_ORG_NAME"] = st.secrets.get(
"ACTIVELOOP_ORG_NAME"
)
except:
st.session_state["auth_ok"] = False
st.error("No credentials stored and nothing submitted", icon=PAGE_ICON)
st.stop()
try:
# Try to access openai and deeplake
with st.spinner("Authentifying..."):
openai.api_key = os.environ["OPENAI_API_KEY"]
openai.Model.list()
deeplake.exists(
f"hub://{os.environ['ACTIVELOOP_ORG_NAME']}/DataChad-Authentication-Check",
)
except Exception as e:
print(f"Authentication failed with {e}")
logger.error(f"Authentication failed with {e}")
st.session_state["auth_ok"] = False
st.error("Authentication failed", icon=PAGE_ICON)
st.stop()
print("Authentification successful!")
logger.info("Authentification successful!")
st.session_state["auth_ok"] = True
@ -83,7 +113,7 @@ def save_uploaded_file(uploaded_file):
file = open(file_path, "wb")
file.write(file_bytes)
file.close()
print(f"saved {file_path}")
logger.info(f"saved {file_path}")
return file_path
@ -92,7 +122,7 @@ def delete_uploaded_file(uploaded_file):
file_path = DATA_PATH / uploaded_file.name
if os.path.exists(DATA_PATH):
os.remove(file_path)
print(f"removed {file_path}")
logger.info(f"removed {file_path}")
def load_git(data_source):
@ -110,7 +140,7 @@ def load_git(data_source):
)
break
except Exception as e:
print(f"error loading git: {e}")
logger.error(f"error loading git: {e}")
if os.path.exists(repo_path):
# cleanup repo afterwards
shutil.rmtree(repo_path)
@ -161,12 +191,12 @@ def load_any_data_source(data_source):
if loader:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = loader.load_and_split(text_splitter)
print(f"loaded {len(docs)} document chucks")
logger.info(f"loaded {len(docs)} document chucks")
return docs
error_msg = f"Failed to load {data_source}"
st.error(error_msg, icon=PAGE_ICON)
print(error_msg)
logger.info(error_msg)
st.stop()
@ -185,13 +215,13 @@ def setup_vector_store(data_source):
dataset_path = f"hub://{os.environ['ACTIVELOOP_ORG_NAME']}/{data_source_name}"
if deeplake.exists(dataset_path):
with st.spinner("Loading vector store..."):
print(f"{dataset_path} exists -> loading")
logger.info(f"{dataset_path} exists -> loading")
vector_store = DeepLake(
dataset_path=dataset_path, read_only=True, embedding_function=embeddings
)
else:
with st.spinner("Reading, embedding and uploading data to hub..."):
print(f"{dataset_path} does not exist -> uploading")
logger.info(f"{dataset_path} does not exist -> uploading")
docs = load_any_data_source(data_source)
vector_store = DeepLake.from_documents(
docs,
@ -221,7 +251,7 @@ def get_chain(data_source):
verbose=True,
max_tokens_limit=3375,
)
print(f"{data_source} is ready to go!")
logger.info(f"{data_source} is ready to go!")
return chain
@ -238,6 +268,6 @@ def generate_response(prompt):
response = st.session_state["chain"](
{"question": prompt, "chat_history": st.session_state["chat_history"]}
)
print(f"{response=}")
logger.info(f"{response=}")
st.session_state["chat_history"].append((prompt, response["answer"]))
return response["answer"]

Loading…
Cancel
Save