fix loading files bug

2024-11-12 01:10:40 +00:00 · 2023-06-29 05:13:42 +03:00 · 2023-06-29 05:13:42 +03:00 · e22c850cae
commit e22c850cae
parent 70fee6d501
4 changed files with 12 additions and 22 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "talk-codebase"
-version = "0.1.32"
+version = "0.1.34"
 description = "talk-codebase is a powerful tool for querying and analyzing codebases."
 authors = ["Saryev Rustam <rustam1997@gmail.com>"]
 readme = "README.md"
--- a/talk_codebase/consts.py
+++ b/talk_codebase/consts.py
@ -46,14 +46,9 @@ LOADER_MAPPING = {
    ".pdf": {
        "loader": PDFMinerLoader,
        "args": {}
-    }
+    },
+    ".txt": {
+        "loader": TextLoader,
+        "args": {}
+    },
 }
-
-for ext in ALLOW_FILES:
-    if ext not in LOADER_MAPPING:
-        LOADER_MAPPING[ext] = {
-            "loader": TextLoader,
-            "args": {
-                "encoding": "utf8"
-            }
-        }
--- a/talk_codebase/llm.py
+++ b/talk_codebase/llm.py
@ -74,7 +74,7 @@ class BaseLLM:
        if index == MODEL_TYPES["OPENAI"]:
            cost = calculate_cost(docs, self.config.get("model_name"))
            approve = questionary.select(
-                f"Creating a vector store for {len(docs)} documents will cost ~${cost:.5f}. Do you want to continue?",
+                f"Creating a vector store will cost ~${cost:.5f}. Do you want to continue?",
                choices=[
                    {"name": "Yes", "value": True},
                    {"name": "No", "value": False},
@ -83,11 +83,11 @@ class BaseLLM:
            if not approve:
                exit(0)

-        spinners = Halo(text=f"Creating vector store for {len(docs)} documents", spinner='dots').start()
+        spinners = Halo(text=f"Creating vector store", spinner='dots').start()
        db = FAISS.from_documents(texts, embeddings)
        db.add_documents(texts)
        db.save_local(index_path)
-        spinners.succeed(f"Created vector store for {len(docs)} documents")
+        spinners.succeed(f"Created vector store")
        return db


--- a/talk_codebase/utils.py
+++ b/talk_codebase/utils.py
@ -42,10 +42,8 @@ class StreamStdOut(StreamingStdOutCallbackHandler):
        sys.stdout.flush()


-@Halo(text='📂 Loading files', spinner='dots')
 def load_files(root_dir):
    num_cpus = multiprocessing.cpu_count()
-    loaded_files = []
    with multiprocessing.Pool(num_cpus) as pool:
        futures = []
        for file_path in glob.glob(os.path.join(root_dir, '**/*'), recursive=True):
@ -56,16 +54,13 @@ def load_files(root_dir):
                continue
            for ext in LOADER_MAPPING:
                if file_path.endswith(ext):
-                    loader = LOADER_MAPPING[ext]['loader']
+                    print('\r' + f'📂 Loading files: {file_path}')
                    args = LOADER_MAPPING[ext]['args']
-                    load = loader(file_path, **args)
-                    futures.append(pool.apply_async(load.load_and_split))
-                    loaded_files.append(file_path)
+                    loader = LOADER_MAPPING[ext]['loader'](file_path, *args)
+                    futures.append(pool.apply_async(loader.load))
        docs = []
        for future in futures:
            docs.extend(future.get())
-
-    print('\n' + '\n'.join([f'📄 {os.path.abspath(file_path)}:' for file_path in loaded_files]))
    return docs