community: refactor Arxiv search logic (#27084)

PR message:

Description:
This PR refactors the Arxiv API wrapper by extracting the Arxiv search
logic into a helper function (_fetch_results) to reduce code duplication
and improve maintainability. The helper function is used in methods like
get_summaries_as_docs, run, and lazy_load, streamlining the code and
making it easier to maintain in the future.

Issue:
This is a minor refactor, so no specific issue is being fixed.

Dependencies:
No new dependencies are introduced with this change.

Add tests and docs:
No new integrations were added, so no additional tests or docs are
necessary for this PR.
Lint and test:
I have run make format, make lint, and make test to ensure all checks
pass successfully.

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
Ahmet Yasin Aytar 2024-10-15 18:43:03 +03:00 committed by GitHub
parent 57fbc6bdf1
commit 443b37403d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -94,6 +94,16 @@ class ArxivAPIWrapper(BaseModel):
)
return values
def _fetch_results(self, query: str) -> Any:
"""Helper function to fetch arxiv results based on query."""
if self.is_arxiv_identifier(query):
return self.arxiv_search(
id_list=query.split(), max_results=self.top_k_results
).results()
return self.arxiv_search(
query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results
).results()
def get_summaries_as_docs(self, query: str) -> List[Document]:
"""
Performs an arxiv search and returns list of
@ -107,16 +117,11 @@ class ArxivAPIWrapper(BaseModel):
query: a plaintext search query
"""
try:
if self.is_arxiv_identifier(query):
results = self.arxiv_search(
id_list=query.split(),
max_results=self.top_k_results,
).results()
else:
results = self.arxiv_search( # type: ignore
query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results
).results()
results = self._fetch_results(
query
) # Using helper function to fetch results
except self.arxiv_exceptions as ex:
logger.error(f"Arxiv exception: {ex}") # Added error logging
return [Document(page_content=f"Arxiv exception: {ex}")]
docs = [
Document(
@ -146,16 +151,11 @@ class ArxivAPIWrapper(BaseModel):
query: a plaintext search query
"""
try:
if self.is_arxiv_identifier(query):
results = self.arxiv_search(
id_list=query.split(),
max_results=self.top_k_results,
).results()
else:
results = self.arxiv_search( # type: ignore
query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results
).results()
results = self._fetch_results(
query
) # Using helper function to fetch results
except self.arxiv_exceptions as ex:
logger.error(f"Arxiv exception: {ex}") # Added error logging
return f"Arxiv exception: {ex}"
docs = [
f"Published: {result.updated.date()}\n"
@ -208,15 +208,9 @@ class ArxivAPIWrapper(BaseModel):
try:
# Remove the ":" and "-" from the query, as they can cause search problems
query = query.replace(":", "").replace("-", "")
if self.is_arxiv_identifier(query):
results = self.arxiv_search(
id_list=query[: self.ARXIV_MAX_QUERY_LENGTH].split(),
max_results=self.load_max_docs,
).results()
else:
results = self.arxiv_search( # type: ignore
query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.load_max_docs
).results()
results = self._fetch_results(
query
) # Using helper function to fetch results
except self.arxiv_exceptions as ex:
logger.debug("Error on arxiv: %s", ex)
return