You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
langchain/langchain/document_loaders/directory.py

27 lines
871 B
Python

"""Loading logic for loading documents from a directory."""
from pathlib import Path
from typing import List
from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader
from langchain.document_loaders.unstructured import UnstructuredFileLoader
class DirectoryLoader(BaseLoader):
"""Loading logic for loading documents from a directory."""
def __init__(self, path: str, glob: str = "**/*"):
"""Initialize with path to directory and how to glob over it."""
self.path = path
self.glob = glob
def load(self) -> List[Document]:
"""Load documents."""
p = Path(self.path)
docs = []
for i in p.glob(self.glob):
if i.is_file():
sub_docs = UnstructuredFileLoader(str(i)).load()
docs.extend(sub_docs)
return docs