Fix usage of AsyncHtmlLoader with an already running event loop (#10220)

This commit is contained in:
Nuno Campos 2023-09-05 15:25:28 +01:00 committed by GitHub
parent ac2310a405
commit 5d8673a3c1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,7 +1,8 @@
import asyncio
import logging
import warnings
from typing import Any, Dict, Iterator, List, Optional, Union
from concurrent.futures import ThreadPoolExecutor
from typing import Any, Dict, Iterator, List, Optional, Union, cast
import aiohttp
import requests
@ -129,9 +130,18 @@ class AsyncHtmlLoader(BaseLoader):
def load(self) -> List[Document]:
"""Load text from the url(s) in web_path."""
try:
# Raises RuntimeError if there is no current event loop.
asyncio.get_running_loop()
# If there is a current event loop, we need to run the async code
# in a separate loop, in a separate thread.
with ThreadPoolExecutor(max_workers=1) as executor:
future = executor.submit(asyncio.run, self.fetch_all(self.web_paths))
results = future.result()
except RuntimeError:
results = asyncio.run(self.fetch_all(self.web_paths))
docs = []
for i, text in enumerate(results):
for i, text in enumerate(cast(List[str], results)):
metadata = {"source": self.web_paths[i]}
docs.append(Document(page_content=text, metadata=metadata))