From 5d8673a3c1935a05b2d8c38b7dd20b90d56f9714 Mon Sep 17 00:00:00 2001 From: Nuno Campos Date: Tue, 5 Sep 2023 15:25:28 +0100 Subject: [PATCH] Fix usage of AsyncHtmlLoader with an already running event loop (#10220) --- .../langchain/document_loaders/async_html.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/libs/langchain/langchain/document_loaders/async_html.py b/libs/langchain/langchain/document_loaders/async_html.py index 286319a5ee..80436bfec6 100644 --- a/libs/langchain/langchain/document_loaders/async_html.py +++ b/libs/langchain/langchain/document_loaders/async_html.py @@ -1,7 +1,8 @@ import asyncio import logging import warnings -from typing import Any, Dict, Iterator, List, Optional, Union +from concurrent.futures import ThreadPoolExecutor +from typing import Any, Dict, Iterator, List, Optional, Union, cast import aiohttp import requests @@ -129,9 +130,18 @@ class AsyncHtmlLoader(BaseLoader): def load(self) -> List[Document]: """Load text from the url(s) in web_path.""" - results = asyncio.run(self.fetch_all(self.web_paths)) + try: + # Raises RuntimeError if there is no current event loop. + asyncio.get_running_loop() + # If there is a current event loop, we need to run the async code + # in a separate loop, in a separate thread. + with ThreadPoolExecutor(max_workers=1) as executor: + future = executor.submit(asyncio.run, self.fetch_all(self.web_paths)) + results = future.result() + except RuntimeError: + results = asyncio.run(self.fetch_all(self.web_paths)) docs = [] - for i, text in enumerate(results): + for i, text in enumerate(cast(List[str], results)): metadata = {"source": self.web_paths[i]} docs.append(Document(page_content=text, metadata=metadata))