ArxivAPIWrapper - doc_content_chars_max (#6063)

This PR refactors the ArxivAPIWrapper class making
`doc_content_chars_max` parameter optional. Additionally, tests have
been added to ensure the functionality of the doc_content_chars_max
parameter.

Fixes #6027 (issue)
searx_updates
hp0404 11 months ago committed by GitHub
parent a9b97aa6f4
commit b01cf0dd54
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,7 +1,7 @@
"""Util that calls Arxiv."""
import logging
import os
from typing import Any, Dict, List
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Extra, root_validator
@ -38,7 +38,7 @@ class ArxivAPIWrapper(BaseModel):
ARXIV_MAX_QUERY_LENGTH = 300
load_max_docs: int = 100
load_all_available_meta: bool = False
doc_content_chars_max: int = 4000
doc_content_chars_max: Optional[int] = 4000
class Config:
"""Configuration for this pydantic object."""

@ -66,6 +66,24 @@ def test_load_returns_limited_docs() -> None:
assert_docs(docs)
def test_load_returns_limited_doc_content_chars() -> None:
"""Test that returns limited doc_content_chars_max"""
doc_content_chars_max = 100
api_client = ArxivAPIWrapper(doc_content_chars_max=doc_content_chars_max)
docs = api_client.load("1605.08386")
assert len(docs[0].page_content) == doc_content_chars_max
def test_load_returns_unlimited_doc_content_chars() -> None:
"""Test that returns unlimited doc_content_chars_max"""
doc_content_chars_max = None
api_client = ArxivAPIWrapper(doc_content_chars_max=doc_content_chars_max)
docs = api_client.load("1605.08386")
assert len(docs[0].page_content) == 54337
def test_load_returns_full_set_of_metadata() -> None:
"""Test that returns several docs"""
api_client = ArxivAPIWrapper(load_max_docs=1, load_all_available_meta=True)

Loading…
Cancel
Save