From 9a858a9107b95524a19ed16d5438dc4f66b16a66 Mon Sep 17 00:00:00 2001 From: Bagatur <22008038+baskaryan@users.noreply.github.com> Date: Thu, 21 Sep 2023 12:49:56 -0700 Subject: [PATCH] Bagatur/arxiv kwargs (#10903) support all arXiv api wrapper kwargs in loader --- .../langchain/document_loaders/arxiv.py | 25 +++++++------------ 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/libs/langchain/langchain/document_loaders/arxiv.py b/libs/langchain/langchain/document_loaders/arxiv.py index 0cfde95afa..a3da66223d 100644 --- a/libs/langchain/langchain/document_loaders/arxiv.py +++ b/libs/langchain/langchain/document_loaders/arxiv.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Any, List, Optional from langchain.docstore.document import Document from langchain.document_loaders.base import BaseLoader @@ -9,25 +9,18 @@ class ArxivLoader(BaseLoader): """Load a query result from `Arxiv`. The loader converts the original PDF format into the text. + + Args: + Supports all arguments of `ArxivAPIWrapper`. """ def __init__( - self, - query: str, - load_max_docs: Optional[int] = 100, - load_all_available_meta: Optional[bool] = False, + self, query: str, doc_content_chars_max: Optional[int] = None, **kwargs: Any ): self.query = query - """The query to be passed to the arxiv.org API.""" - self.load_max_docs = load_max_docs - """The maximum number of documents to load.""" - self.load_all_available_meta = load_all_available_meta - """Whether to load all available metadata.""" + self.client = ArxivAPIWrapper( + doc_content_chars_max=doc_content_chars_max, **kwargs + ) def load(self) -> List[Document]: - arxiv_client = ArxivAPIWrapper( - load_max_docs=self.load_max_docs, - load_all_available_meta=self.load_all_available_meta, - ) - docs = arxiv_client.load(self.query) - return docs + return self.client.load(self.query)