mirror of
https://github.com/hwchase17/langchain
synced 2024-11-10 01:10:59 +00:00
community: add flag to toggle progress bar (#24463)
- **Description:** Add a flag to determine whether to show progress bar - **Issue:** n/a - **Dependencies:** n/a - **Twitter handle:** n/a --------- Co-authored-by: Chester Curme <chester.curme@gmail.com>
This commit is contained in:
parent
6b08a33fa4
commit
d98b830e4b
@ -41,6 +41,7 @@ class BlackboardLoader(WebBaseLoader):
|
||||
basic_auth: Optional[Tuple[str, str]] = None,
|
||||
cookies: Optional[dict] = None,
|
||||
continue_on_failure: bool = False,
|
||||
show_progress: bool = True,
|
||||
):
|
||||
"""Initialize with blackboard course url.
|
||||
|
||||
@ -56,12 +57,15 @@ class BlackboardLoader(WebBaseLoader):
|
||||
occurs loading a url, emitting a warning instead of raising an
|
||||
exception. Setting this to True makes the loader more robust, but also
|
||||
may result in missing data. Default: False
|
||||
show_progress: whether to show a progress bar while loading. Default: True
|
||||
|
||||
Raises:
|
||||
ValueError: If blackboard course url is invalid.
|
||||
"""
|
||||
super().__init__(
|
||||
web_paths=(blackboard_course_url), continue_on_failure=continue_on_failure
|
||||
web_paths=(blackboard_course_url),
|
||||
continue_on_failure=continue_on_failure,
|
||||
show_progress=show_progress,
|
||||
)
|
||||
# Get base url
|
||||
try:
|
||||
|
@ -20,6 +20,7 @@ class GitbookLoader(WebBaseLoader):
|
||||
base_url: Optional[str] = None,
|
||||
content_selector: str = "main",
|
||||
continue_on_failure: bool = False,
|
||||
show_progress: bool = True,
|
||||
):
|
||||
"""Initialize with web page and whether to load all paths.
|
||||
|
||||
@ -36,6 +37,7 @@ class GitbookLoader(WebBaseLoader):
|
||||
occurs loading a url, emitting a warning instead of raising an
|
||||
exception. Setting this to True makes the loader more robust, but also
|
||||
may result in missing data. Default: False
|
||||
show_progress: whether to show a progress bar while loading. Default: True
|
||||
"""
|
||||
self.base_url = base_url or web_page
|
||||
if self.base_url.endswith("/"):
|
||||
@ -43,7 +45,11 @@ class GitbookLoader(WebBaseLoader):
|
||||
if load_all_paths:
|
||||
# set web_path to the sitemap if we want to crawl all paths
|
||||
web_page = f"{self.base_url}/sitemap.xml"
|
||||
super().__init__(web_paths=(web_page,), continue_on_failure=continue_on_failure)
|
||||
super().__init__(
|
||||
web_paths=(web_page,),
|
||||
continue_on_failure=continue_on_failure,
|
||||
show_progress=show_progress,
|
||||
)
|
||||
self.load_all_paths = load_all_paths
|
||||
self.content_selector = content_selector
|
||||
|
||||
|
@ -58,6 +58,8 @@ class WebBaseLoader(BaseLoader):
|
||||
bs_get_text_kwargs: Optional[Dict[str, Any]] = None,
|
||||
bs_kwargs: Optional[Dict[str, Any]] = None,
|
||||
session: Any = None,
|
||||
*,
|
||||
show_progress: bool = True,
|
||||
) -> None:
|
||||
"""Initialize loader.
|
||||
|
||||
@ -69,6 +71,7 @@ class WebBaseLoader(BaseLoader):
|
||||
raise_for_status: Raise an exception if http status code denotes an error.
|
||||
bs_get_text_kwargs: kwargs for beatifulsoup4 get_text
|
||||
bs_kwargs: kwargs for beatifulsoup4 web page parsing
|
||||
show_progress: Show progress bar when loading pages.
|
||||
"""
|
||||
# web_path kept for backwards-compatibility.
|
||||
if web_path and web_paths:
|
||||
@ -91,6 +94,7 @@ class WebBaseLoader(BaseLoader):
|
||||
self.default_parser = default_parser
|
||||
self.requests_kwargs = requests_kwargs or {}
|
||||
self.raise_for_status = raise_for_status
|
||||
self.show_progress = show_progress
|
||||
self.bs_get_text_kwargs = bs_get_text_kwargs or {}
|
||||
self.bs_kwargs = bs_kwargs or {}
|
||||
if session:
|
||||
@ -177,11 +181,14 @@ class WebBaseLoader(BaseLoader):
|
||||
task = asyncio.ensure_future(self._fetch_with_rate_limit(url, semaphore))
|
||||
tasks.append(task)
|
||||
try:
|
||||
from tqdm.asyncio import tqdm_asyncio
|
||||
if self.show_progress:
|
||||
from tqdm.asyncio import tqdm_asyncio
|
||||
|
||||
return await tqdm_asyncio.gather(
|
||||
*tasks, desc="Fetching pages", ascii=True, mininterval=1
|
||||
)
|
||||
return await tqdm_asyncio.gather(
|
||||
*tasks, desc="Fetching pages", ascii=True, mininterval=1
|
||||
)
|
||||
else:
|
||||
return await asyncio.gather(*tasks)
|
||||
except ImportError:
|
||||
warnings.warn("For better logging of progress, `pip install tqdm`")
|
||||
return await asyncio.gather(*tasks)
|
||||
|
Loading…
Reference in New Issue
Block a user