diff --git a/langchain/utilities/arxiv.py b/langchain/utilities/arxiv.py index 1c114813..92af5df8 100644 --- a/langchain/utilities/arxiv.py +++ b/langchain/utilities/arxiv.py @@ -1,7 +1,7 @@ """Util that calls Arxiv.""" import logging import os -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional from pydantic import BaseModel, Extra, root_validator @@ -38,7 +38,7 @@ class ArxivAPIWrapper(BaseModel): ARXIV_MAX_QUERY_LENGTH = 300 load_max_docs: int = 100 load_all_available_meta: bool = False - doc_content_chars_max: int = 4000 + doc_content_chars_max: Optional[int] = 4000 class Config: """Configuration for this pydantic object.""" diff --git a/tests/integration_tests/utilities/test_arxiv.py b/tests/integration_tests/utilities/test_arxiv.py index d55f6e39..0fc2fd6d 100644 --- a/tests/integration_tests/utilities/test_arxiv.py +++ b/tests/integration_tests/utilities/test_arxiv.py @@ -66,6 +66,24 @@ def test_load_returns_limited_docs() -> None: assert_docs(docs) +def test_load_returns_limited_doc_content_chars() -> None: + """Test that returns limited doc_content_chars_max""" + + doc_content_chars_max = 100 + api_client = ArxivAPIWrapper(doc_content_chars_max=doc_content_chars_max) + docs = api_client.load("1605.08386") + assert len(docs[0].page_content) == doc_content_chars_max + + +def test_load_returns_unlimited_doc_content_chars() -> None: + """Test that returns unlimited doc_content_chars_max""" + + doc_content_chars_max = None + api_client = ArxivAPIWrapper(doc_content_chars_max=doc_content_chars_max) + docs = api_client.load("1605.08386") + assert len(docs[0].page_content) == 54337 + + def test_load_returns_full_set_of_metadata() -> None: """Test that returns several docs""" api_client = ArxivAPIWrapper(load_max_docs=1, load_all_available_meta=True)