diff --git a/docs/docs/integrations/tools/apify.ipynb b/docs/docs/integrations/tools/apify.ipynb index 96e2fef7a9..76bf7ef164 100644 --- a/docs/docs/integrations/tools/apify.ipynb +++ b/docs/docs/integrations/tools/apify.ipynb @@ -83,7 +83,7 @@ "source": [ "loader = apify.call_actor(\n", " actor_id=\"apify/website-content-crawler\",\n", - " run_input={\"startUrls\": [{\"url\": \"https://python.langchain.com/en/latest/\"}]},\n", + " run_input={\"startUrls\": [{\"url\": \"https://python.langchain.com\"}]},\n", " dataset_mapping_function=lambda item: Document(\n", " page_content=item[\"text\"] or \"\", metadata={\"source\": item[\"url\"]}\n", " ),\n", diff --git a/libs/community/langchain_community/document_loaders/apify_dataset.py b/libs/community/langchain_community/document_loaders/apify_dataset.py index 1d7d7ce186..2805ff698c 100644 --- a/libs/community/langchain_community/document_loaders/apify_dataset.py +++ b/libs/community/langchain_community/document_loaders/apify_dataset.py @@ -60,7 +60,11 @@ class ApifyDatasetLoader(BaseLoader, BaseModel): try: from apify_client import ApifyClient - values["apify_client"] = ApifyClient() + client = ApifyClient() + if httpx_client := getattr(client.http_client, "httpx_client"): + httpx_client.headers["user-agent"] += "; Origin/langchain" + + values["apify_client"] = client except ImportError: raise ImportError( "Could not import apify-client Python package. " diff --git a/libs/community/langchain_community/utilities/apify.py b/libs/community/langchain_community/utilities/apify.py index 6f37f84f21..04b893d460 100644 --- a/libs/community/langchain_community/utilities/apify.py +++ b/libs/community/langchain_community/utilities/apify.py @@ -31,8 +31,18 @@ class ApifyWrapper(BaseModel): try: from apify_client import ApifyClient, ApifyClientAsync - values["apify_client"] = ApifyClient(apify_api_token) - values["apify_client_async"] = ApifyClientAsync(apify_api_token) + client = ApifyClient(apify_api_token) + if httpx_client := getattr(client.http_client, "httpx_client"): + httpx_client.headers["user-agent"] += "; Origin/langchain" + + async_client = ApifyClientAsync(apify_api_token) + if httpx_async_client := getattr( + async_client.http_client, "httpx_async_client" + ): + httpx_async_client.headers["user-agent"] += "; Origin/langchain" + + values["apify_client"] = client + values["apify_client_async"] = async_client except ImportError: raise ImportError( "Could not import apify-client Python package. "