Fix usage of AsyncHtmlLoader with an already running event loop (#10220)

pull/10233/head
Nuno Campos 1 year ago committed by GitHub
parent ac2310a405
commit 5d8673a3c1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,7 +1,8 @@
import asyncio
import logging
import warnings
from typing import Any, Dict, Iterator, List, Optional, Union
from concurrent.futures import ThreadPoolExecutor
from typing import Any, Dict, Iterator, List, Optional, Union, cast
import aiohttp
import requests
@ -129,9 +130,18 @@ class AsyncHtmlLoader(BaseLoader):
def load(self) -> List[Document]:
"""Load text from the url(s) in web_path."""
results = asyncio.run(self.fetch_all(self.web_paths))
try:
# Raises RuntimeError if there is no current event loop.
asyncio.get_running_loop()
# If there is a current event loop, we need to run the async code
# in a separate loop, in a separate thread.
with ThreadPoolExecutor(max_workers=1) as executor:
future = executor.submit(asyncio.run, self.fetch_all(self.web_paths))
results = future.result()
except RuntimeError:
results = asyncio.run(self.fetch_all(self.web_paths))
docs = []
for i, text in enumerate(results):
for i, text in enumerate(cast(List[str], results)):
metadata = {"source": self.web_paths[i]}
docs.append(Document(page_content=text, metadata=metadata))

Loading…
Cancel
Save