fix loading files bug

pull/7/head
Saryev Rustam 12 months ago
parent 70fee6d501
commit e22c850cae

@ -1,6 +1,6 @@
[tool.poetry]
name = "talk-codebase"
version = "0.1.32"
version = "0.1.34"
description = "talk-codebase is a powerful tool for querying and analyzing codebases."
authors = ["Saryev Rustam <rustam1997@gmail.com>"]
readme = "README.md"

@ -46,14 +46,9 @@ LOADER_MAPPING = {
".pdf": {
"loader": PDFMinerLoader,
"args": {}
}
},
".txt": {
"loader": TextLoader,
"args": {}
},
}
for ext in ALLOW_FILES:
if ext not in LOADER_MAPPING:
LOADER_MAPPING[ext] = {
"loader": TextLoader,
"args": {
"encoding": "utf8"
}
}

@ -74,7 +74,7 @@ class BaseLLM:
if index == MODEL_TYPES["OPENAI"]:
cost = calculate_cost(docs, self.config.get("model_name"))
approve = questionary.select(
f"Creating a vector store for {len(docs)} documents will cost ~${cost:.5f}. Do you want to continue?",
f"Creating a vector store will cost ~${cost:.5f}. Do you want to continue?",
choices=[
{"name": "Yes", "value": True},
{"name": "No", "value": False},
@ -83,11 +83,11 @@ class BaseLLM:
if not approve:
exit(0)
spinners = Halo(text=f"Creating vector store for {len(docs)} documents", spinner='dots').start()
spinners = Halo(text=f"Creating vector store", spinner='dots').start()
db = FAISS.from_documents(texts, embeddings)
db.add_documents(texts)
db.save_local(index_path)
spinners.succeed(f"Created vector store for {len(docs)} documents")
spinners.succeed(f"Created vector store")
return db

@ -42,10 +42,8 @@ class StreamStdOut(StreamingStdOutCallbackHandler):
sys.stdout.flush()
@Halo(text='📂 Loading files', spinner='dots')
def load_files(root_dir):
num_cpus = multiprocessing.cpu_count()
loaded_files = []
with multiprocessing.Pool(num_cpus) as pool:
futures = []
for file_path in glob.glob(os.path.join(root_dir, '**/*'), recursive=True):
@ -56,16 +54,13 @@ def load_files(root_dir):
continue
for ext in LOADER_MAPPING:
if file_path.endswith(ext):
loader = LOADER_MAPPING[ext]['loader']
print('\r' + f'📂 Loading files: {file_path}')
args = LOADER_MAPPING[ext]['args']
load = loader(file_path, **args)
futures.append(pool.apply_async(load.load_and_split))
loaded_files.append(file_path)
loader = LOADER_MAPPING[ext]['loader'](file_path, *args)
futures.append(pool.apply_async(loader.load))
docs = []
for future in futures:
docs.extend(future.get())
print('\n' + '\n'.join([f'📄 {os.path.abspath(file_path)}:' for file_path in loaded_files]))
return docs

Loading…
Cancel
Save