diff --git a/libs/langchain/langchain/document_loaders/arxiv.py b/libs/langchain/langchain/document_loaders/arxiv.py index 0cfde95afa..a3da66223d 100644 --- a/libs/langchain/langchain/document_loaders/arxiv.py +++ b/libs/langchain/langchain/document_loaders/arxiv.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Any, List, Optional from langchain.docstore.document import Document from langchain.document_loaders.base import BaseLoader @@ -9,25 +9,18 @@ class ArxivLoader(BaseLoader): """Load a query result from `Arxiv`. The loader converts the original PDF format into the text. + + Args: + Supports all arguments of `ArxivAPIWrapper`. """ def __init__( - self, - query: str, - load_max_docs: Optional[int] = 100, - load_all_available_meta: Optional[bool] = False, + self, query: str, doc_content_chars_max: Optional[int] = None, **kwargs: Any ): self.query = query - """The query to be passed to the arxiv.org API.""" - self.load_max_docs = load_max_docs - """The maximum number of documents to load.""" - self.load_all_available_meta = load_all_available_meta - """Whether to load all available metadata.""" + self.client = ArxivAPIWrapper( + doc_content_chars_max=doc_content_chars_max, **kwargs + ) def load(self) -> List[Document]: - arxiv_client = ArxivAPIWrapper( - load_max_docs=self.load_max_docs, - load_all_available_meta=self.load_all_available_meta, - ) - docs = arxiv_client.load(self.query) - return docs + return self.client.load(self.query)