Harrison/GitHub toolkit (#8047)

Co-authored-by: Trevor Dobbertin <trevordobbertin@gmail.com>
pull/8071/head
Harrison Chase 1 year ago committed by GitHub
parent ae8bc9e830
commit 1f3b987860
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -0,0 +1,167 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# GitHub\n",
"\n",
"This notebook goes over how to use the GitHub tool.\n",
"The GitHub tool allows agents to interact with a given GitHub repository. It implements CRUD operations for modifying files and can read/comment on Issues. The tool wraps the [PyGitHub](https://github.com/PyGithub/PyGithub) library.\n",
"\n",
"In order to interact with the GitHub API you must create a [GitHub app](https://docs.github.com/en/apps/creating-github-apps/about-creating-github-apps/about-creating-github-apps). Next, you must set the following environment variables:\n",
"```\n",
"GITHUB_APP_ID\n",
"GITHUB_APP_PRIVATE_KEY\n",
"GITHUB_REPOSITORY\n",
"GITHUB_BRANCH\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "shellscript"
}
},
"outputs": [],
"source": [
"%pip install pygithub"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from langchain.agents import AgentType\n",
"from langchain.agents import initialize_agent\n",
"from langchain.agents.agent_toolkits.github.toolkit import GitHubToolkit\n",
"from langchain.llms import OpenAI\n",
"from langchain.utilities.github import GitHubAPIWrapper"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"os.environ[\"GITHUB_APP_ID\"] = \"your-github-app-id\"\n",
"os.environ[\"GITHUB_APP_PRIVATE_KEY\"] = \"/path/to/your/private/key\"\n",
"os.environ[\"GITHUB_REPOSITORY\"] = \"user/repo\"\n",
"os.environ[\"GITHUB_BRANCH\"] = \"branch-name\"\n",
"os.environ[\"OPENAI_API_KEY\"] = \"your-openai-api-key\""
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"llm = OpenAI(temperature=0)\n",
"github = GitHubAPIWrapper()\n",
"toolkit = GitHubToolkit.from_github_api_wrapper(github)\n",
"agent = initialize_agent(\n",
" toolkit.get_tools(), llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m I need to figure out what issues need to be completed and how to complete them.\n",
"Action: Get Issues\n",
"Action Input: N/A\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3mFound 1 issues:\n",
"[{'title': 'Change the main script to print Hello AI!', 'number': 1}]\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I need to get more information about this issue.\n",
"Action: Get Issue\n",
"Action Input: 1\u001b[0m\n",
"Observation: \u001b[33;1m\u001b[1;3m{'title': 'Change the main script to print Hello AI!', 'body': None, 'comments': '[]'}\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I need to update the main script to print Hello AI!\n",
"Action: Update File\n",
"Action Input: main.py\n",
"OLD <<<<\n",
"print(\"Hello World!\")\n",
">>>> OLD\n",
"NEW <<<<\n",
"print(\"Hello AI!\")\n",
">>>> NEW\u001b[0m\n",
"Observation: \u001b[38;5;200m\u001b[1;3mFile content was not updated because the old content was not found. It may be helpful to use the read_file action to get the current file contents.\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I need to read the current file contents.\n",
"Action: Read File\n",
"Action Input: main.py\u001b[0m\n",
"Observation: \u001b[33;1m\u001b[1;3mprint(\"Hello world!\")\n",
"\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I need to update the main script to print Hello AI!\n",
"Action: Update File\n",
"Action Input: main.py\n",
"OLD <<<<\n",
"print(\"Hello world!\")\n",
">>>> OLD\n",
"NEW <<<<\n",
"print(\"Hello AI!\")\n",
">>>> NEW\u001b[0m\n",
"Observation: \u001b[38;5;200m\u001b[1;3mUpdated file main.py\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
"Final Answer: The main script has been updated to print \"Hello AI!\"\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"'The main script has been updated to print \"Hello AI!\"'"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agent.run(\n",
" \"You have the software engineering capabilities of a Google Principle engineer. You are tasked with completing issues on a github repository. Please look at the existing issues and complete them.\"\n",
")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

@ -0,0 +1,33 @@
"""GitHub Toolkit."""
from typing import List
from langchain.agents.agent_toolkits.base import BaseToolkit
from langchain.tools import BaseTool
from langchain.tools.github.tool import GitHubAction
from langchain.utilities.github import GitHubAPIWrapper
class GitHubToolkit(BaseToolkit):
"""GitHub Toolkit."""
tools: List[BaseTool] = []
@classmethod
def from_github_api_wrapper(
cls, github_api_wrapper: GitHubAPIWrapper
) -> "GitHubToolkit":
actions = github_api_wrapper.list()
tools = [
GitHubAction(
name=action["name"],
description=action["description"],
mode=action["mode"],
api_wrapper=github_api_wrapper,
)
for action in actions
]
return cls(tools=tools)
def get_tools(self) -> List[BaseTool]:
"""Get the tools in the toolkit."""
return self.tools

@ -0,0 +1 @@
""" GitHub Tool """

@ -0,0 +1,56 @@
# flake8: noqa
GET_ISSUES_PROMPT = """
This tool will fetch a list of the repository's issues. It will return the title, and issue number of 5 issues. It takes no input.
"""
GET_ISSUE_PROMPT = """
This tool will fetch the title, body, and comment thread of a specific issue. **VERY IMPORTANT**: You must specify the issue number as an integer.
"""
COMMENT_ON_ISSUE_PROMPT = """
This tool is useful when you need to comment on a GitHub issue. Simply pass in the issue number and the comment you would like to make. Please use this sparingly as we don't want to clutter the comment threads. **VERY IMPORTANT**: Your input to this tool MUST strictly follow these rules:
- First you must specify the issue number as an integer
- Then you must place two newlines
- Then you must specify your comment
"""
CREATE_FILE_PROMPT = """
This tool is a wrapper for the GitHub API, useful when you need to create a file in a GitHub repository. **VERY IMPORTANT**: Your input to this tool MUST strictly follow these rules:
- First you must specify which file to create by passing a full file path (**IMPORTANT**: the path must not start with a slash)
- Then you must specify the contents of the file
For example, if you would like to create a file called /test/test.txt with contents "test contents", you would pass in the following string:
test/test.txt
test contents
"""
READ_FILE_PROMPT = """
This tool is a wrapper for the GitHub API, useful when you need to read the contents of a file in a GitHub repository. Simply pass in the full file path of the file you would like to read. **IMPORTANT**: the path must not start with a slash
"""
UPDATE_FILE_PROMPT = """
This tool is a wrapper for the GitHub API, useful when you need to update the contents of a file in a GitHub repository. **VERY IMPORTANT**: Your input to this tool MUST strictly follow these rules:
- First you must specify which file to modify by passing a full file path (**IMPORTANT**: the path must not start with a slash)
- Then you must specify the old contents which you would like to replace wrapped in OLD <<<< and >>>> OLD
- Then you must specify the new contents which you would like to replace the old contents with wrapped in NEW <<<< and >>>> NEW
For example, if you would like to replace the contents of the file /test/test.txt from "old contents" to "new contents", you would pass in the following string:
test/test.txt
OLD <<<<
old contents
>>>> OLD
NEW <<<<
new contents
>>>> NEW
"""
DELETE_FILE_PROMPT = """
This tool is a wrapper for the GitHub API, useful when you need to delete a file in a GitHub repository. Simply pass in the full file path of the file you would like to delete. **IMPORTANT**: the path must not start with a slash
"""

@ -0,0 +1,64 @@
"""
This tool allows agents to interact with the pygithub library
and operate on a GitHub repository.
To use this tool, you must first set as environment variables:
GITHUB_API_TOKEN
GITHUB_REPOSITORY -> format: {owner}/{repo}
TODO: remove below
Below is a sample script that uses the Github tool:
```python
from langchain.agents import AgentType
from langchain.agents import initialize_agent
from langchain.agents.agent_toolkits.github.toolkit import GitHubToolkit
from langchain.llms import OpenAI
from langchain.utilities.github import GitHubAPIWrapper
llm = OpenAI(temperature=0)
github = GitHubAPIWrapper()
toolkit = GitHubToolkit.from_github_api_wrapper(github)
agent = initialize_agent(
toolkit.get_tools(), llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True
)
agent.run(
"{{Enter a prompt here to direct the agent}}"
)
```
"""
from typing import Optional
from pydantic import Field
from langchain.callbacks.manager import (
AsyncCallbackManagerForToolRun,
CallbackManagerForToolRun,
)
from langchain.tools.base import BaseTool
from langchain.utilities.github import GitHubAPIWrapper
class GitHubAction(BaseTool):
api_wrapper: GitHubAPIWrapper = Field(default_factory=GitHubAPIWrapper)
mode: str
name = ""
description = ""
def _run(
self,
instructions: str,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
"""Use the GitHub API to run an operation."""
return self.api_wrapper.run(self.mode, instructions)
async def _arun(
self,
_: str,
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
) -> str:
"""Use the GitHub API to run an operation."""
raise NotImplementedError("GitHubAction does not support async")

@ -0,0 +1,268 @@
"""Util that calls GitHub."""
import json
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Extra, root_validator
from langchain.tools.github.prompt import (
COMMENT_ON_ISSUE_PROMPT,
CREATE_FILE_PROMPT,
DELETE_FILE_PROMPT,
GET_ISSUE_PROMPT,
GET_ISSUES_PROMPT,
READ_FILE_PROMPT,
UPDATE_FILE_PROMPT,
)
from langchain.utils import get_from_dict_or_env
class GitHubAPIWrapper(BaseModel):
"""Wrapper for GitHub API."""
github: Any #: :meta private:
github_repo_instance: Any #: :meta private:
github_repository: Optional[str] = None
github_app_id: Optional[str] = None
github_app_private_key: Optional[str] = None
github_branch: Optional[str] = None
operations: List[Dict] = [
{
"mode": "get_issues",
"name": "Get Issues",
"description": GET_ISSUES_PROMPT,
},
{
"mode": "get_issue",
"name": "Get Issue",
"description": GET_ISSUE_PROMPT,
},
{
"mode": "comment_on_issue",
"name": "Comment on Issue",
"description": COMMENT_ON_ISSUE_PROMPT,
},
{
"mode": "create_file",
"name": "Create File",
"description": CREATE_FILE_PROMPT,
},
{
"mode": "read_file",
"name": "Read File",
"description": READ_FILE_PROMPT,
},
{
"mode": "update_file",
"name": "Update File",
"description": UPDATE_FILE_PROMPT,
},
{
"mode": "delete_file",
"name": "Delete File",
"description": DELETE_FILE_PROMPT,
},
]
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
def list(self) -> List[Dict]:
return self.operations
@root_validator()
def validate_environment(cls, values: Dict) -> Dict:
"""Validate that api key and python package exists in environment."""
github_repository = get_from_dict_or_env(
values, "github_repository", "GITHUB_REPOSITORY"
)
github_app_id = get_from_dict_or_env(values, "github_app_id", "GITHUB_APP_ID")
github_app_private_key = get_from_dict_or_env(
values, "github_app_private_key", "GITHUB_APP_PRIVATE_KEY"
)
github_branch = get_from_dict_or_env(
values, "github_branch", "GITHUB_BRANCH", default="master"
)
try:
from github import Auth, GithubIntegration
except ImportError:
raise ImportError(
"PyGithub is not installed. "
"Please install it with `pip install PyGithub`"
)
with open(github_app_private_key, "r") as f:
private_key = f.read()
auth = Auth.AppAuth(
github_app_id,
private_key,
)
gi = GithubIntegration(auth=auth)
installation = gi.get_installations()[0]
# create a GitHub instance:
g = installation.get_github_for_installation()
values["github"] = g
values["github_repo_instance"] = g.get_repo(github_repository)
values["github_repository"] = github_repository
values["github_app_id"] = github_app_id
values["github_app_private_key"] = github_app_private_key
values["github_branch"] = github_branch
return values
def parse_issues(self, issues: List[dict]) -> List[dict]:
parsed = []
for issue in issues:
title = issue["title"]
number = issue["number"]
parsed.append({"title": title, "number": number})
return parsed
def get_issues(self) -> str:
issues = self.github_repo_instance.get_issues(state="open")
parsed_issues = self.parse_issues(issues)
parsed_issues_str = (
"Found " + str(len(parsed_issues)) + " issues:\n" + str(parsed_issues)
)
return parsed_issues_str
def get_issue(self, issue_number: int) -> Dict[str, Any]:
issue = self.github_repo_instance.get_issue(number=issue_number)
# If there are too many comments
# We can't add them all to context so for now we'll just skip
if issue.get_comments().totalCount > 10:
return {
"message": (
"There are too many comments to add them all to context. "
"Please visit the issue on GitHub to see them all."
)
}
page = 0
comments = []
while True:
comments_page = issue.get_comments().get_page(page)
if len(comments_page) == 0:
break
for comment in comments_page:
comments.append(
{"body": comment["body"], "user": comment["user"]["login"]}
)
page += 1
return {
"title": issue["title"],
"body": issue["body"],
"comments": str(comments),
}
def comment_on_issue(self, comment_query: str) -> str:
# comment_query is a string which contains the issue number and the comment
# the issue number is the first word in the string
# the comment is the rest of the string
issue_number = int(comment_query.split("\n\n")[0])
comment = comment_query[len(str(issue_number)) + 2 :]
issue = self.github_repo_instance.get_issue(number=issue_number)
issue.create_comment(comment)
return "Commented on issue " + str(issue_number)
def create_file(self, file_query: str) -> str:
# file_query is a string which contains the file path and the file contents
# the file path is the first line in the string
# the file contents is the rest of the string
file_path = file_query.split("\n")[0]
file_contents = file_query[len(file_path) + 2 :]
self.github_repo_instance.create_file(
path=file_path,
message="Create " + file_path,
content=file_contents,
branch=self.github_branch,
)
return "Created file " + file_path
def read_file(self, file_path: str) -> str:
# file_path is a string which contains the file path
file = self.github_repo_instance.get_contents(file_path)
return file.decoded_content.decode("utf-8")
def update_file(self, file_query: str) -> str:
# file_query is a string which contains the file path and the file contents
# the file path is the first line in the string
# the old file contents is wrapped in OLD <<<< and >>>> OLD
# the new file contents is wrapped in NEW <<<< and >>>> NEW
# for example:
# /test/test.txt
# OLD <<<<
# old contents
# >>>> OLD
# NEW <<<<
# new contents
# >>>> NEW
# the old contents will be replaced with the new contents
file_path = file_query.split("\n")[0]
old_file_contents = file_query.split("OLD <<<<")[1].split(">>>> OLD")[0].strip()
new_file_contents = file_query.split("NEW <<<<")[1].split(">>>> NEW")[0].strip()
file_content = self.read_file(file_path)
updated_file_content = file_content.replace(
old_file_contents, new_file_contents
)
if file_content == updated_file_content:
return (
"File content was not updated because the old content was not found. "
"It may be helpful to use the read_file action to get "
"the current file contents."
)
self.github_repo_instance.update_file(
path=file_path,
message="Update " + file_path,
content=updated_file_content,
branch=self.github_branch,
sha=self.github_repo_instance.get_contents(file_path).sha,
)
return "Updated file " + file_path
def delete_file(self, file_path: str) -> str:
# file_path is a string which contains the file path
file = self.github_repo_instance.get_contents(file_path)
self.github_repo_instance.delete_file(
path=file_path,
message="Delete " + file_path,
branch=self.github_branch,
sha=file.sha,
)
return "Deleted file " + file_path
def run(self, mode: str, query: str) -> str:
if mode == "get_issues":
return self.get_issues()
elif mode == "get_issue":
return json.dumps(self.get_issue(int(query)))
elif mode == "comment_on_issue":
return self.comment_on_issue(query)
elif mode == "create_file":
return self.create_file(query)
elif mode == "read_file":
return self.read_file(query)
elif mode == "update_file":
return self.update_file(query)
elif mode == "delete_file":
return self.delete_file(query)
else:
raise ValueError("Invalid mode" + mode)
Loading…
Cancel
Save