mirror of
https://github.com/rsaryev/talk-codebase
synced 2024-11-12 01:10:40 +00:00
fix loading files bug
This commit is contained in:
parent
70fee6d501
commit
e22c850cae
@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "talk-codebase"
|
||||
version = "0.1.32"
|
||||
version = "0.1.34"
|
||||
description = "talk-codebase is a powerful tool for querying and analyzing codebases."
|
||||
authors = ["Saryev Rustam <rustam1997@gmail.com>"]
|
||||
readme = "README.md"
|
||||
|
@ -46,14 +46,9 @@ LOADER_MAPPING = {
|
||||
".pdf": {
|
||||
"loader": PDFMinerLoader,
|
||||
"args": {}
|
||||
}
|
||||
},
|
||||
".txt": {
|
||||
"loader": TextLoader,
|
||||
"args": {}
|
||||
},
|
||||
}
|
||||
|
||||
for ext in ALLOW_FILES:
|
||||
if ext not in LOADER_MAPPING:
|
||||
LOADER_MAPPING[ext] = {
|
||||
"loader": TextLoader,
|
||||
"args": {
|
||||
"encoding": "utf8"
|
||||
}
|
||||
}
|
||||
|
@ -74,7 +74,7 @@ class BaseLLM:
|
||||
if index == MODEL_TYPES["OPENAI"]:
|
||||
cost = calculate_cost(docs, self.config.get("model_name"))
|
||||
approve = questionary.select(
|
||||
f"Creating a vector store for {len(docs)} documents will cost ~${cost:.5f}. Do you want to continue?",
|
||||
f"Creating a vector store will cost ~${cost:.5f}. Do you want to continue?",
|
||||
choices=[
|
||||
{"name": "Yes", "value": True},
|
||||
{"name": "No", "value": False},
|
||||
@ -83,11 +83,11 @@ class BaseLLM:
|
||||
if not approve:
|
||||
exit(0)
|
||||
|
||||
spinners = Halo(text=f"Creating vector store for {len(docs)} documents", spinner='dots').start()
|
||||
spinners = Halo(text=f"Creating vector store", spinner='dots').start()
|
||||
db = FAISS.from_documents(texts, embeddings)
|
||||
db.add_documents(texts)
|
||||
db.save_local(index_path)
|
||||
spinners.succeed(f"Created vector store for {len(docs)} documents")
|
||||
spinners.succeed(f"Created vector store")
|
||||
return db
|
||||
|
||||
|
||||
|
@ -42,10 +42,8 @@ class StreamStdOut(StreamingStdOutCallbackHandler):
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
@Halo(text='📂 Loading files', spinner='dots')
|
||||
def load_files(root_dir):
|
||||
num_cpus = multiprocessing.cpu_count()
|
||||
loaded_files = []
|
||||
with multiprocessing.Pool(num_cpus) as pool:
|
||||
futures = []
|
||||
for file_path in glob.glob(os.path.join(root_dir, '**/*'), recursive=True):
|
||||
@ -56,16 +54,13 @@ def load_files(root_dir):
|
||||
continue
|
||||
for ext in LOADER_MAPPING:
|
||||
if file_path.endswith(ext):
|
||||
loader = LOADER_MAPPING[ext]['loader']
|
||||
print('\r' + f'📂 Loading files: {file_path}')
|
||||
args = LOADER_MAPPING[ext]['args']
|
||||
load = loader(file_path, **args)
|
||||
futures.append(pool.apply_async(load.load_and_split))
|
||||
loaded_files.append(file_path)
|
||||
loader = LOADER_MAPPING[ext]['loader'](file_path, *args)
|
||||
futures.append(pool.apply_async(loader.load))
|
||||
docs = []
|
||||
for future in futures:
|
||||
docs.extend(future.get())
|
||||
|
||||
print('\n' + '\n'.join([f'📄 {os.path.abspath(file_path)}:' for file_path in loaded_files]))
|
||||
return docs
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user