mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
Adds progress bar using tqdm to directory_loader (#3349)
Approach copied from `WebBaseLoader`. Assumes the user doesn't have `tqdm` installed.
This commit is contained in:
parent
344e3508b1
commit
980cc41709
@ -68,6 +68,51 @@
|
|||||||
"len(docs)"
|
"len(docs)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "e633d62f",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Show a progress bar"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "43911860",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"By default a progress bar will not be shown. To show a progress bar, install the `tqdm` library (e.g. `pip install tqdm`), and set the `show_progress` parameter to `True`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 10,
|
||||||
|
"id": "bb93daac",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Requirement already satisfied: tqdm in /Users/jon/.pyenv/versions/3.9.16/envs/microbiome-app/lib/python3.9/site-packages (4.65.0)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"0it [00:00, ?it/s]\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"%pip install tqdm\n",
|
||||||
|
"loader = DirectoryLoader('../', glob=\"**/*.md\", show_progress=True)\n",
|
||||||
|
"docs = loader.load()"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "c5652850",
|
"id": "c5652850",
|
||||||
|
@ -35,6 +35,7 @@ class DirectoryLoader(BaseLoader):
|
|||||||
loader_cls: FILE_LOADER_TYPE = UnstructuredFileLoader,
|
loader_cls: FILE_LOADER_TYPE = UnstructuredFileLoader,
|
||||||
loader_kwargs: Union[dict, None] = None,
|
loader_kwargs: Union[dict, None] = None,
|
||||||
recursive: bool = False,
|
recursive: bool = False,
|
||||||
|
show_progress: bool = False,
|
||||||
):
|
):
|
||||||
"""Initialize with path to directory and how to glob over it."""
|
"""Initialize with path to directory and how to glob over it."""
|
||||||
if loader_kwargs is None:
|
if loader_kwargs is None:
|
||||||
@ -46,12 +47,30 @@ class DirectoryLoader(BaseLoader):
|
|||||||
self.loader_kwargs = loader_kwargs
|
self.loader_kwargs = loader_kwargs
|
||||||
self.silent_errors = silent_errors
|
self.silent_errors = silent_errors
|
||||||
self.recursive = recursive
|
self.recursive = recursive
|
||||||
|
self.show_progress = show_progress
|
||||||
|
|
||||||
def load(self) -> List[Document]:
|
def load(self) -> List[Document]:
|
||||||
"""Load documents."""
|
"""Load documents."""
|
||||||
p = Path(self.path)
|
p = Path(self.path)
|
||||||
docs = []
|
docs = []
|
||||||
items = p.rglob(self.glob) if self.recursive else p.glob(self.glob)
|
items = list(p.rglob(self.glob) if self.recursive else p.glob(self.glob))
|
||||||
|
|
||||||
|
pbar = None
|
||||||
|
if self.show_progress:
|
||||||
|
try:
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
pbar = tqdm(total=len(items))
|
||||||
|
except ImportError as e:
|
||||||
|
logger.warning(
|
||||||
|
"To log the progress of DirectoryLoader you need to install tqdm, "
|
||||||
|
"`pip install tqdm`"
|
||||||
|
)
|
||||||
|
if self.silent_errors:
|
||||||
|
logger.warning(e)
|
||||||
|
else:
|
||||||
|
raise e
|
||||||
|
|
||||||
for i in items:
|
for i in items:
|
||||||
if i.is_file():
|
if i.is_file():
|
||||||
if _is_visible(i.relative_to(p)) or self.load_hidden:
|
if _is_visible(i.relative_to(p)) or self.load_hidden:
|
||||||
@ -63,4 +82,11 @@ class DirectoryLoader(BaseLoader):
|
|||||||
logger.warning(e)
|
logger.warning(e)
|
||||||
else:
|
else:
|
||||||
raise e
|
raise e
|
||||||
|
finally:
|
||||||
|
if pbar:
|
||||||
|
pbar.update(1)
|
||||||
|
|
||||||
|
if pbar:
|
||||||
|
pbar.close()
|
||||||
|
|
||||||
return docs
|
return docs
|
||||||
|
Loading…
Reference in New Issue
Block a user