switch from print to logger

1 year ago · 0554c6a402
parent 954a2a5859
commit 0554c6a402
2 changed files with 56 additions and 25 deletions
--- a/app.py
+++ b/app.py
@ -8,6 +8,7 @@ from utils import (
    save_uploaded_file,
    build_chain_and_clear_history,
    validate_keys,
+    logger,
 )


@ -74,12 +75,12 @@ data_source = st.text_input(
 # generate new chain for new data source / uploaded file
 # make sure to do this only once per input / on change
 if data_source and data_source != st.session_state["data_source"]:
-    print(f"data source provided: '{data_source}'")
+    logger.info(f"data source provided: '{data_source}'")
    build_chain_and_clear_history(data_source)
    st.session_state["data_source"] = data_source

 if uploaded_file and uploaded_file != st.session_state["uploaded_file"]:
-    print(f"uploaded file: '{uploaded_file.name}'")
+    logger.info(f"uploaded file: '{uploaded_file.name}'")
    data_source = save_uploaded_file(uploaded_file)
    build_chain_and_clear_history(data_source)
    delete_uploaded_file(uploaded_file)
--- a/utils.py
+++ b/utils.py
@ -1,6 +1,8 @@
 import os
 import re

+import logging
+import sys
 import openai
 import deeplake
 import shutil
@ -25,14 +27,34 @@ from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.vectorstores import DeepLake

-from constants import DATA_PATH, MODEL, PAGE_ICON
+from constants import DATA_PATH, MODEL, PAGE_ICON, APP_NAME
+
+
+logger = logging.getLogger(APP_NAME)
+
+
+def configure_logger(debug=0):
+    log_level = logging.DEBUG if debug == 1 else logging.INFO
+    logger.setLevel(log_level)
+
+    stream_handler = logging.StreamHandler(stream=sys.stdout)
+    stream_handler.setLevel(log_level)
+
+    formatter = logging.Formatter("%(message)s")
+
+    stream_handler.setFormatter(formatter)
+
+    logger.addHandler(stream_handler)
+    logger.propagate = False
+
+
+configure_logger(0)


 def validate_keys(openai_key, activeloop_token, activeloop_org_name):
    # Validate all API related variables are set and correct
    all_keys = [openai_key, activeloop_token, activeloop_org_name]
    if any(all_keys):
-        print(f"{openai_key=}\n{activeloop_token=}\n{activeloop_org_name=}")
        if not all(all_keys):
            st.session_state["auth_ok"] = False
            st.error("You need to fill all fields", icon=PAGE_ICON)
@ -44,31 +66,39 @@ def validate_keys(openai_key, activeloop_token, activeloop_org_name):
        # Bypass for local development or deployments with stored credentials
        # either env variables or streamlit secrets need to be set
        try:
-            assert os.environ.get("OPENAI_API_KEY")
-            assert os.environ.get("ACTIVELOOP_TOKEN")
-            assert os.environ.get("ACTIVELOOP_ORG_NAME")
-        except:
-            assert st.secrets.get("OPENAI_API_KEY")
-            assert st.secrets.get("ACTIVELOOP_TOKEN")
-            assert st.secrets.get("ACTIVELOOP_ORG_NAME")
+            try:
+                assert os.environ.get("OPENAI_API_KEY")
+                assert os.environ.get("ACTIVELOOP_TOKEN")
+                assert os.environ.get("ACTIVELOOP_ORG_NAME")
+            except:
+                assert st.secrets.get("OPENAI_API_KEY")
+                assert st.secrets.get("ACTIVELOOP_TOKEN")
+                assert st.secrets.get("ACTIVELOOP_ORG_NAME")

-            os.environ["OPENAI_API_KEY"] = st.secrets.get("OPENAI_API_KEY")
-            os.environ["ACTIVELOOP_TOKEN"] = st.secrets.get("ACTIVELOOP_TOKEN")
-            os.environ["ACTIVELOOP_ORG_NAME"] = st.secrets.get("ACTIVELOOP_ORG_NAME")
+                os.environ["OPENAI_API_KEY"] = st.secrets.get("OPENAI_API_KEY")
+                os.environ["ACTIVELOOP_TOKEN"] = st.secrets.get("ACTIVELOOP_TOKEN")
+                os.environ["ACTIVELOOP_ORG_NAME"] = st.secrets.get(
+                    "ACTIVELOOP_ORG_NAME"
+                )
+        except:
+            st.session_state["auth_ok"] = False
+            st.error("No credentials stored and nothing submitted", icon=PAGE_ICON)
+            st.stop()
    try:
        # Try to access openai and deeplake
        with st.spinner("Authentifying..."):
+            openai.api_key = os.environ["OPENAI_API_KEY"]
            openai.Model.list()
            deeplake.exists(
                f"hub://{os.environ['ACTIVELOOP_ORG_NAME']}/DataChad-Authentication-Check",
            )
    except Exception as e:
-        print(f"Authentication failed with {e}")
+        logger.error(f"Authentication failed with {e}")
        st.session_state["auth_ok"] = False
        st.error("Authentication failed", icon=PAGE_ICON)
        st.stop()

-    print("Authentification successful!")
+    logger.info("Authentification successful!")
    st.session_state["auth_ok"] = True


@ -83,7 +113,7 @@ def save_uploaded_file(uploaded_file):
    file = open(file_path, "wb")
    file.write(file_bytes)
    file.close()
-    print(f"saved {file_path}")
+    logger.info(f"saved {file_path}")
    return file_path


@ -92,7 +122,7 @@ def delete_uploaded_file(uploaded_file):
    file_path = DATA_PATH / uploaded_file.name
    if os.path.exists(DATA_PATH):
        os.remove(file_path)
-        print(f"removed {file_path}")
+        logger.info(f"removed {file_path}")


 def load_git(data_source):
@ -110,7 +140,7 @@ def load_git(data_source):
            )
            break
        except Exception as e:
-            print(f"error loading git: {e}")
+            logger.error(f"error loading git: {e}")
        if os.path.exists(repo_path):
            # cleanup repo afterwards
            shutil.rmtree(repo_path)
@ -161,12 +191,12 @@ def load_any_data_source(data_source):
    if loader:
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
        docs = loader.load_and_split(text_splitter)
-        print(f"loaded {len(docs)} document chucks")
+        logger.info(f"loaded {len(docs)} document chucks")
        return docs

    error_msg = f"Failed to load {data_source}"
    st.error(error_msg, icon=PAGE_ICON)
-    print(error_msg)
+    logger.info(error_msg)
    st.stop()


@ -185,13 +215,13 @@ def setup_vector_store(data_source):
    dataset_path = f"hub://{os.environ['ACTIVELOOP_ORG_NAME']}/{data_source_name}"
    if deeplake.exists(dataset_path):
        with st.spinner("Loading vector store..."):
-            print(f"{dataset_path} exists -> loading")
+            logger.info(f"{dataset_path} exists -> loading")
            vector_store = DeepLake(
                dataset_path=dataset_path, read_only=True, embedding_function=embeddings
            )
    else:
        with st.spinner("Reading, embedding and uploading data to hub..."):
-            print(f"{dataset_path} does not exist -> uploading")
+            logger.info(f"{dataset_path} does not exist -> uploading")
            docs = load_any_data_source(data_source)
            vector_store = DeepLake.from_documents(
                docs,
@ -221,7 +251,7 @@ def get_chain(data_source):
            verbose=True,
            max_tokens_limit=3375,
        )
-        print(f"{data_source} is ready to go!")
+        logger.info(f"{data_source} is ready to go!")
    return chain


@ -238,6 +268,6 @@ def generate_response(prompt):
        response = st.session_state["chain"](
            {"question": prompt, "chat_history": st.session_state["chat_history"]}
        )
-        print(f"{response=}")
+        logger.info(f"{response=}")
        st.session_state["chat_history"].append((prompt, response["answer"]))
    return response["answer"]