You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
talk-codebase/talk_codebase/utils.py

40 lines
1.3 KiB
Python

import os
import sys
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.document_loaders import TextLoader
from talk_codebase.consts import EXCLUDE_DIRS, EXCLUDE_FILES, ALLOW_FILES
class StreamStdOut(StreamingStdOutCallbackHandler):
def on_llm_new_token(self, token: str, **kwargs) -> None:
sys.stdout.write(token)
sys.stdout.flush()
def on_llm_start(self, serialized, prompts, **kwargs):
sys.stdout.write("🤖 ")
def on_llm_end(self, response, **kwargs):
sys.stdout.write("\n")
sys.stdout.flush()
def load_files(root_dir):
docs = []
for dirpath, dirnames, filenames in os.walk(root_dir):
if any(exclude_dir in dirpath for exclude_dir in EXCLUDE_DIRS):
continue
if not filenames:
continue
for file in filenames:
if any(file.endswith(allow_file) for allow_file in ALLOW_FILES) and not any(
file == exclude_file for exclude_file in EXCLUDE_FILES):
try:
loader = TextLoader(os.path.join(dirpath, file), encoding='utf-8')
docs.extend(loader.load_and_split())
except Exception as e:
print(f"Error loading file {file}: {e}")
print(f"🤖 Loaded {len(docs)} documents")
return docs