From 5c25f3f4f754f72e43c51c4cd0980d0c885cd0ef Mon Sep 17 00:00:00 2001
From: Gustav von Zitzewitz <gustav.vonzitzewitz@datarobot.com>
Date: Tue, 16 May 2023 16:38:38 +0200
Subject: [PATCH] Add error handling

---
 app.py   |  1 +
 utils.py | 48 +++++++++++++++++++++++++++++++-----------------
 2 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/app.py b/app.py
index 2ca4a61..dedce97 100644
--- a/app.py
+++ b/app.py
@@ -129,6 +129,7 @@ data_source = st.text_input(
     placeholder="Any path or url pointing to a file or directory of files",
 )
 
+
 # generate new chain for new data source / uploaded file
 # make sure to do this only once per input / on change
 if data_source and data_source != st.session_state["data_source"]:
diff --git a/utils.py b/utils.py
index db1466c..b5efdac 100644
--- a/utils.py
+++ b/utils.py
@@ -40,6 +40,7 @@ from constants import (
     MAX_TOKENS,
     MODEL,
     PAGE_ICON,
+    REPO_URL,
     TEMPERATURE,
     K,
 )
@@ -195,6 +196,14 @@ def delete_uploaded_file(uploaded_file: UploadedFile) -> None:
         logger.info(f"Removed: {file_path}")
 
 
+def handle_load_error(e: str = None) -> None:
+    e = e or "No Loader found for your data source. Consider contributing:  {REPO_URL}!"
+    error_msg = f"Failed to load {st.session_state['data_source']} with Error:\n{e}"
+    st.error(error_msg, icon=PAGE_ICON)
+    logger.info(error_msg)
+    st.stop()
+
+
 def load_git(data_source: str, chunk_size: int = CHUNK_SIZE) -> List[Document]:
     # We need to try both common main branches
     # Thank you github for the "master" to "main" switch
@@ -213,11 +222,14 @@ def load_git(data_source: str, chunk_size: int = CHUNK_SIZE) -> List[Document]:
             )
             break
         except Exception as e:
-            logger.error(f"Error loading git: {e}")
+            logger.info(f"Error loading git: {e}")
     if os.path.exists(repo_path):
         # cleanup repo afterwards
         shutil.rmtree(repo_path)
-    return docs
+    try:
+        return docs
+    except Exception as e:
+        handle_load_error()
 
 
 def load_any_data_source(
@@ -264,7 +276,7 @@ def load_any_data_source(
             loader = PythonLoader(data_source)
         else:
             loader = UnstructuredFileLoader(data_source)
-    if loader:
+    try:
         # Chunk size is a major trade-off parameter to control result accuracy over computaion
         text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=chunk_size, chunk_overlap=0
@@ -272,11 +284,8 @@ def load_any_data_source(
         docs = loader.load_and_split(text_splitter)
         logger.info(f"Loaded: {len(docs)} document chucks")
         return docs
-
-    error_msg = f"Failed to load {data_source}"
-    st.error(error_msg, icon=PAGE_ICON)
-    logger.info(error_msg)
-    st.stop()
+    except Exception as e:
+        handle_load_error(e if loader else None)
 
 
 def clean_data_source_string(data_source_string: str) -> str:
@@ -358,15 +367,20 @@ def build_chain(
 def update_chain() -> None:
     # Build chain with parameters from session state and store it back
     # Also delete chat history to not confuse the bot with old context
-    st.session_state["chain"] = build_chain(
-        data_source=st.session_state["data_source"],
-        k=st.session_state["k"],
-        fetch_k=st.session_state["fetch_k"],
-        chunk_size=st.session_state["chunk_size"],
-        temperature=st.session_state["temperature"],
-        max_tokens=st.session_state["max_tokens"],
-    )
-    st.session_state["chat_history"] = []
+    try:
+        st.session_state["chain"] = build_chain(
+            data_source=st.session_state["data_source"],
+            k=st.session_state["k"],
+            fetch_k=st.session_state["fetch_k"],
+            chunk_size=st.session_state["chunk_size"],
+            temperature=st.session_state["temperature"],
+            max_tokens=st.session_state["max_tokens"],
+        )
+        st.session_state["chat_history"] = []
+    except Exception as e:
+        msg = f"Failed to build chain for data source {st.session_state['data_source']} with error: {e}"
+        logger.error(msg)
+        st.error(msg, icon=PAGE_ICON)
 
 
 def update_usage(cb: OpenAICallbackHandler) -> None: