mirror of
https://github.com/hwchase17/langchain
synced 2024-11-16 06:13:16 +00:00
community[patch]: Add Pagination to GitHubIssuesLoader for Efficient GitHub Issues Retrieval (#16934)
- **Description:** Add Pagination to GitHubIssuesLoader for Efficient GitHub Issues Retrieval - **Issue:** [the issue # it fixes if applicable,](https://github.com/langchain-ai/langchain/issues/16864) --------- Co-authored-by: root <root@ip-172-31-46-160.ap-southeast-1.compute.internal> Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
b87d6f9f48
commit
37ef6ac113
@ -65,6 +65,12 @@ class GitHubIssuesLoader(BaseGitHubLoader):
|
|||||||
since: Optional[str] = None
|
since: Optional[str] = None
|
||||||
"""Only show notifications updated after the given time.
|
"""Only show notifications updated after the given time.
|
||||||
This is a timestamp in ISO 8601 format: YYYY-MM-DDTHH:MM:SSZ."""
|
This is a timestamp in ISO 8601 format: YYYY-MM-DDTHH:MM:SSZ."""
|
||||||
|
page: Optional[int] = None
|
||||||
|
"""The page number for paginated results.
|
||||||
|
Defaults to 1 in the GitHub API."""
|
||||||
|
per_page: Optional[int] = None
|
||||||
|
"""Number of items per page.
|
||||||
|
Defaults to 30 in the GitHub API."""
|
||||||
|
|
||||||
@validator("since", allow_reuse=True)
|
@validator("since", allow_reuse=True)
|
||||||
def validate_since(cls, v: Optional[str]) -> Optional[str]:
|
def validate_since(cls, v: Optional[str]) -> Optional[str]:
|
||||||
@ -112,7 +118,11 @@ class GitHubIssuesLoader(BaseGitHubLoader):
|
|||||||
if not self.include_prs and doc.metadata["is_pull_request"]:
|
if not self.include_prs and doc.metadata["is_pull_request"]:
|
||||||
continue
|
continue
|
||||||
yield doc
|
yield doc
|
||||||
if response.links and response.links.get("next"):
|
if (
|
||||||
|
response.links
|
||||||
|
and response.links.get("next")
|
||||||
|
and (not self.page and not self.per_page)
|
||||||
|
):
|
||||||
url = response.links["next"]["url"]
|
url = response.links["next"]["url"]
|
||||||
else:
|
else:
|
||||||
url = None
|
url = None
|
||||||
@ -176,6 +186,8 @@ class GitHubIssuesLoader(BaseGitHubLoader):
|
|||||||
"sort": self.sort,
|
"sort": self.sort,
|
||||||
"direction": self.direction,
|
"direction": self.direction,
|
||||||
"since": self.since,
|
"since": self.since,
|
||||||
|
"page": self.page,
|
||||||
|
"per_page": self.per_page,
|
||||||
}
|
}
|
||||||
query_params_list = [
|
query_params_list = [
|
||||||
f"{k}={v}" for k, v in query_params_dict.items() if v is not None
|
f"{k}={v}" for k, v in query_params_dict.items() if v is not None
|
||||||
|
@ -2,11 +2,17 @@ from langchain_community.document_loaders.github import GitHubIssuesLoader
|
|||||||
|
|
||||||
|
|
||||||
def test_issues_load() -> None:
|
def test_issues_load() -> None:
|
||||||
title = "DocumentLoader for GitHub"
|
title = " Add caching to BaseChatModel (issue #1644)"
|
||||||
loader = GitHubIssuesLoader(
|
loader = GitHubIssuesLoader(
|
||||||
repo="langchain-ai/langchain", creator="UmerHA", state="all"
|
repo="langchain-ai/langchain",
|
||||||
|
creator="UmerHA",
|
||||||
|
state="all",
|
||||||
|
per_page=3,
|
||||||
|
page=2,
|
||||||
|
access_token="""""",
|
||||||
)
|
)
|
||||||
docs = loader.load()
|
docs = loader.load()
|
||||||
titles = [d.metadata["title"] for d in docs]
|
titles = [d.metadata["title"] for d in docs]
|
||||||
assert title in titles
|
assert title in titles
|
||||||
assert all(doc.metadata["creator"] == "UmerHA" for doc in docs)
|
assert all(doc.metadata["creator"] == "UmerHA" for doc in docs)
|
||||||
|
assert len(docs) == 3
|
||||||
|
Loading…
Reference in New Issue
Block a user