diff --git a/libs/community/langchain_community/document_loaders/github.py b/libs/community/langchain_community/document_loaders/github.py index 8a361a46e2..65f327d948 100644 --- a/libs/community/langchain_community/document_loaders/github.py +++ b/libs/community/langchain_community/document_loaders/github.py @@ -65,6 +65,12 @@ class GitHubIssuesLoader(BaseGitHubLoader): since: Optional[str] = None """Only show notifications updated after the given time. This is a timestamp in ISO 8601 format: YYYY-MM-DDTHH:MM:SSZ.""" + page: Optional[int] = None + """The page number for paginated results. + Defaults to 1 in the GitHub API.""" + per_page: Optional[int] = None + """Number of items per page. + Defaults to 30 in the GitHub API.""" @validator("since", allow_reuse=True) def validate_since(cls, v: Optional[str]) -> Optional[str]: @@ -112,7 +118,11 @@ class GitHubIssuesLoader(BaseGitHubLoader): if not self.include_prs and doc.metadata["is_pull_request"]: continue yield doc - if response.links and response.links.get("next"): + if ( + response.links + and response.links.get("next") + and (not self.page and not self.per_page) + ): url = response.links["next"]["url"] else: url = None @@ -176,6 +186,8 @@ class GitHubIssuesLoader(BaseGitHubLoader): "sort": self.sort, "direction": self.direction, "since": self.since, + "page": self.page, + "per_page": self.per_page, } query_params_list = [ f"{k}={v}" for k, v in query_params_dict.items() if v is not None diff --git a/libs/community/tests/integration_tests/document_loaders/test_github.py b/libs/community/tests/integration_tests/document_loaders/test_github.py index a3ad86a0f9..a711e2c142 100644 --- a/libs/community/tests/integration_tests/document_loaders/test_github.py +++ b/libs/community/tests/integration_tests/document_loaders/test_github.py @@ -2,11 +2,17 @@ from langchain_community.document_loaders.github import GitHubIssuesLoader def test_issues_load() -> None: - title = "DocumentLoader for GitHub" + title = " Add caching to BaseChatModel (issue #1644)" loader = GitHubIssuesLoader( - repo="langchain-ai/langchain", creator="UmerHA", state="all" + repo="langchain-ai/langchain", + creator="UmerHA", + state="all", + per_page=3, + page=2, + access_token="""""", ) docs = loader.load() titles = [d.metadata["title"] for d in docs] assert title in titles assert all(doc.metadata["creator"] == "UmerHA" for doc in docs) + assert len(docs) == 3