mirror of
https://github.com/hwchase17/langchain
synced 2024-11-13 19:10:52 +00:00
community: refactor Arxiv search logic (#27084)
PR message: Description: This PR refactors the Arxiv API wrapper by extracting the Arxiv search logic into a helper function (_fetch_results) to reduce code duplication and improve maintainability. The helper function is used in methods like get_summaries_as_docs, run, and lazy_load, streamlining the code and making it easier to maintain in the future. Issue: This is a minor refactor, so no specific issue is being fixed. Dependencies: No new dependencies are introduced with this change. Add tests and docs: No new integrations were added, so no additional tests or docs are necessary for this PR. Lint and test: I have run make format, make lint, and make test to ensure all checks pass successfully. --------- Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
parent
57fbc6bdf1
commit
443b37403d
@ -94,6 +94,16 @@ class ArxivAPIWrapper(BaseModel):
|
||||
)
|
||||
return values
|
||||
|
||||
def _fetch_results(self, query: str) -> Any:
|
||||
"""Helper function to fetch arxiv results based on query."""
|
||||
if self.is_arxiv_identifier(query):
|
||||
return self.arxiv_search(
|
||||
id_list=query.split(), max_results=self.top_k_results
|
||||
).results()
|
||||
return self.arxiv_search(
|
||||
query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results
|
||||
).results()
|
||||
|
||||
def get_summaries_as_docs(self, query: str) -> List[Document]:
|
||||
"""
|
||||
Performs an arxiv search and returns list of
|
||||
@ -107,16 +117,11 @@ class ArxivAPIWrapper(BaseModel):
|
||||
query: a plaintext search query
|
||||
"""
|
||||
try:
|
||||
if self.is_arxiv_identifier(query):
|
||||
results = self.arxiv_search(
|
||||
id_list=query.split(),
|
||||
max_results=self.top_k_results,
|
||||
).results()
|
||||
else:
|
||||
results = self.arxiv_search( # type: ignore
|
||||
query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results
|
||||
).results()
|
||||
results = self._fetch_results(
|
||||
query
|
||||
) # Using helper function to fetch results
|
||||
except self.arxiv_exceptions as ex:
|
||||
logger.error(f"Arxiv exception: {ex}") # Added error logging
|
||||
return [Document(page_content=f"Arxiv exception: {ex}")]
|
||||
docs = [
|
||||
Document(
|
||||
@ -146,16 +151,11 @@ class ArxivAPIWrapper(BaseModel):
|
||||
query: a plaintext search query
|
||||
"""
|
||||
try:
|
||||
if self.is_arxiv_identifier(query):
|
||||
results = self.arxiv_search(
|
||||
id_list=query.split(),
|
||||
max_results=self.top_k_results,
|
||||
).results()
|
||||
else:
|
||||
results = self.arxiv_search( # type: ignore
|
||||
query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results
|
||||
).results()
|
||||
results = self._fetch_results(
|
||||
query
|
||||
) # Using helper function to fetch results
|
||||
except self.arxiv_exceptions as ex:
|
||||
logger.error(f"Arxiv exception: {ex}") # Added error logging
|
||||
return f"Arxiv exception: {ex}"
|
||||
docs = [
|
||||
f"Published: {result.updated.date()}\n"
|
||||
@ -208,15 +208,9 @@ class ArxivAPIWrapper(BaseModel):
|
||||
try:
|
||||
# Remove the ":" and "-" from the query, as they can cause search problems
|
||||
query = query.replace(":", "").replace("-", "")
|
||||
if self.is_arxiv_identifier(query):
|
||||
results = self.arxiv_search(
|
||||
id_list=query[: self.ARXIV_MAX_QUERY_LENGTH].split(),
|
||||
max_results=self.load_max_docs,
|
||||
).results()
|
||||
else:
|
||||
results = self.arxiv_search( # type: ignore
|
||||
query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.load_max_docs
|
||||
).results()
|
||||
results = self._fetch_results(
|
||||
query
|
||||
) # Using helper function to fetch results
|
||||
except self.arxiv_exceptions as ex:
|
||||
logger.debug("Error on arxiv: %s", ex)
|
||||
return
|
||||
|
Loading…
Reference in New Issue
Block a user