ArxivAPIWrapper - doc_content_chars_max (#6063)

This PR refactors the ArxivAPIWrapper class making
`doc_content_chars_max` parameter optional. Additionally, tests have
been added to ensure the functionality of the doc_content_chars_max
parameter.

Fixes #6027 (issue)
This commit is contained in:
hp0404 2023-06-16 08:16:42 +03:00 committed by GitHub
parent a9b97aa6f4
commit b01cf0dd54
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 20 additions and 2 deletions

View File

@ -1,7 +1,7 @@
"""Util that calls Arxiv.""" """Util that calls Arxiv."""
import logging import logging
import os import os
from typing import Any, Dict, List from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Extra, root_validator from pydantic import BaseModel, Extra, root_validator
@ -38,7 +38,7 @@ class ArxivAPIWrapper(BaseModel):
ARXIV_MAX_QUERY_LENGTH = 300 ARXIV_MAX_QUERY_LENGTH = 300
load_max_docs: int = 100 load_max_docs: int = 100
load_all_available_meta: bool = False load_all_available_meta: bool = False
doc_content_chars_max: int = 4000 doc_content_chars_max: Optional[int] = 4000
class Config: class Config:
"""Configuration for this pydantic object.""" """Configuration for this pydantic object."""

View File

@ -66,6 +66,24 @@ def test_load_returns_limited_docs() -> None:
assert_docs(docs) assert_docs(docs)
def test_load_returns_limited_doc_content_chars() -> None:
"""Test that returns limited doc_content_chars_max"""
doc_content_chars_max = 100
api_client = ArxivAPIWrapper(doc_content_chars_max=doc_content_chars_max)
docs = api_client.load("1605.08386")
assert len(docs[0].page_content) == doc_content_chars_max
def test_load_returns_unlimited_doc_content_chars() -> None:
"""Test that returns unlimited doc_content_chars_max"""
doc_content_chars_max = None
api_client = ArxivAPIWrapper(doc_content_chars_max=doc_content_chars_max)
docs = api_client.load("1605.08386")
assert len(docs[0].page_content) == 54337
def test_load_returns_full_set_of_metadata() -> None: def test_load_returns_full_set_of_metadata() -> None:
"""Test that returns several docs""" """Test that returns several docs"""
api_client = ArxivAPIWrapper(load_max_docs=1, load_all_available_meta=True) api_client = ArxivAPIWrapper(load_max_docs=1, load_all_available_meta=True)