mirror of
https://github.com/hwchase17/langchain
synced 2024-11-04 06:00:26 +00:00
Fix usage of AsyncHtmlLoader with an already running event loop (#10220)
This commit is contained in:
parent
ac2310a405
commit
5d8673a3c1
@ -1,7 +1,8 @@
|
||||
import asyncio
|
||||
import logging
|
||||
import warnings
|
||||
from typing import Any, Dict, Iterator, List, Optional, Union
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import Any, Dict, Iterator, List, Optional, Union, cast
|
||||
|
||||
import aiohttp
|
||||
import requests
|
||||
@ -129,9 +130,18 @@ class AsyncHtmlLoader(BaseLoader):
|
||||
def load(self) -> List[Document]:
|
||||
"""Load text from the url(s) in web_path."""
|
||||
|
||||
try:
|
||||
# Raises RuntimeError if there is no current event loop.
|
||||
asyncio.get_running_loop()
|
||||
# If there is a current event loop, we need to run the async code
|
||||
# in a separate loop, in a separate thread.
|
||||
with ThreadPoolExecutor(max_workers=1) as executor:
|
||||
future = executor.submit(asyncio.run, self.fetch_all(self.web_paths))
|
||||
results = future.result()
|
||||
except RuntimeError:
|
||||
results = asyncio.run(self.fetch_all(self.web_paths))
|
||||
docs = []
|
||||
for i, text in enumerate(results):
|
||||
for i, text in enumerate(cast(List[str], results)):
|
||||
metadata = {"source": self.web_paths[i]}
|
||||
docs.append(Document(page_content=text, metadata=metadata))
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user