Add SceneXplain Tool (#3752)

This commit is contained in:
Zander Chase 2023-04-28 17:01:54 -07:00 committed by GitHub
parent 72c5c15f7f
commit 6c2b16e465
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 214 additions and 0 deletions

View File

@ -0,0 +1,116 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# SceneXplain\n",
"\n",
"\n",
"[SceneXplain](https://scenex.jina.ai/) is an ImageCaptioning service accessible through the SceneXplain Tool.\n",
"\n",
"To use this tool, you'll need to make an account and fetch your API Token [from the website](https://scenex.jina.ai/api). Then you can instantiate the tool."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from langchain.tools import SceneXplainTool\n",
"\n",
"\n",
"os.environ[\"SCENEX_API_KEY\"] = \"<YOUR_API_KEY>\"\n",
"tool = SceneXplainTool()\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Usage in an Agent\n",
"\n",
"The tool can be used in any LangChain agent as follows:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m\n",
"Thought: Do I need to use a tool? Yes\n",
"Action: Image Explainer\n",
"Action Input: https://storage.googleapis.com/causal-diffusion.appspot.com/imagePrompts%2F0rw369i5h9t%2Foriginal.png\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3mIn a charmingly whimsical scene, a young girl is seen braving the rain alongside her furry companion, the lovable Totoro. The two are depicted standing on a bustling street corner, where they are sheltered from the rain by a bright yellow umbrella. The girl, dressed in a cheerful yellow frock, holds onto the umbrella with both hands while gazing up at Totoro with an expression of wonder and delight.\n",
"\n",
"Totoro, meanwhile, stands tall and proud beside his young friend, holding his own umbrella aloft to protect them both from the downpour. His furry body is rendered in rich shades of grey and white, while his large ears and wide eyes lend him an endearing charm.\n",
"\n",
"In the background of the scene, a street sign can be seen jutting out from the pavement amidst a flurry of raindrops. A sign with Chinese characters adorns its surface, adding to the sense of cultural diversity and intrigue. Despite the dreary weather, there is an undeniable sense of joy and camaraderie in this heartwarming image.\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m Do I need to use a tool? No\n",
"AI: This image appears to be a still from the 1988 Japanese animated fantasy film My Neighbor Totoro. The film follows two young girls, Satsuki and Mei, as they explore the countryside and befriend the magical forest spirits, including the titular character Totoro.\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n",
"This image appears to be a still from the 1988 Japanese animated fantasy film My Neighbor Totoro. The film follows two young girls, Satsuki and Mei, as they explore the countryside and befriend the magical forest spirits, including the titular character Totoro.\n"
]
}
],
"source": [
"from langchain.llms import OpenAI\n",
"from langchain.agents import initialize_agent\n",
"from langchain.memory import ConversationBufferMemory\n",
"\n",
"llm = OpenAI(temperature=0)\n",
"memory = ConversationBufferMemory(memory_key=\"chat_history\")\n",
"tools = [\n",
" tool\n",
"]\n",
"\n",
"agent = initialize_agent(\n",
" tools, llm, memory=memory, agent=\"conversational-react-description\", verbose=True\n",
")\n",
"output = agent.run(\n",
" input=(\n",
" \"What is in this image https://storage.googleapis.com/causal-diffusion.appspot.com/imagePrompts%2F0rw369i5h9t%2Foriginal.png. \"\n",
" \"Is it movie or a game? If it is a movie, what is the name of the movie?\"\n",
" )\n",
")\n",
"\n",
"print(output)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -26,6 +26,7 @@ from langchain.tools.playwright import (
NavigateTool, NavigateTool,
) )
from langchain.tools.plugin import AIPluginTool from langchain.tools.plugin import AIPluginTool
from langchain.tools.scenexplain.tool import SceneXplainTool
from langchain.tools.shell.tool import ShellTool from langchain.tools.shell.tool import ShellTool
__all__ = [ __all__ = [
@ -59,4 +60,6 @@ __all__ = [
"ReadFileTool", "ReadFileTool",
"ShellTool", "ShellTool",
"WriteFileTool", "WriteFileTool",
"BaseTool",
"SceneXplainTool",
] ]

View File

@ -0,0 +1 @@
"""SceneXplain API toolkit."""

View File

@ -0,0 +1,26 @@
"""Tool for the SceneXplain API."""
from pydantic import Field
from langchain.tools.base import BaseTool
from langchain.utilities.scenexplain import SceneXplainAPIWrapper
class SceneXplainTool(BaseTool):
"""Tool that adds the capability to explain images."""
name = "Image Explainer"
description = (
"An Image Captioning Tool: Use this tool to generate a detailed caption "
"for an image. The input can be an image file of any format, and "
"the output will be a text description that covers every detail of the image."
)
api_wrapper: SceneXplainAPIWrapper = Field(default_factory=SceneXplainAPIWrapper)
def _run(self, query: str) -> str:
"""Use the tool."""
return self.api_wrapper.run(query)
async def _arun(self, query: str) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError("SceneXplainTool does not support async")

View File

@ -0,0 +1,68 @@
"""Util that calls SceneXplain.
In order to set this up, you need API key for the SceneXplain API.
You can obtain a key by following the steps below.
- Sign up for a free account at https://scenex.jina.ai/.
- Navigate to the API Access page (https://scenex.jina.ai/api) and create a new API key.
"""
from typing import Dict
import requests
from pydantic import BaseModel, root_validator
from langchain.utils import get_from_dict_or_env
class SceneXplainAPIWrapper(BaseModel):
"""Wrapper for SceneXplain API.
In order to set this up, you need API key for the SceneXplain API.
You can obtain a key by following the steps below.
- Sign up for a free account at https://scenex.jina.ai/.
- Navigate to the API Access page (https://scenex.jina.ai/api)
and create a new API key.
"""
scenex_api_key: str
scenex_api_url: str = (
"https://us-central1-causal-diffusion.cloudfunctions.net/describe"
)
def _describe_image(self, image: str) -> str:
headers = {
"x-api-key": f"token {self.scenex_api_key}",
"content-type": "application/json",
}
payload = {
"data": [
{
"image": image,
"algorithm": "Ember",
"languages": ["en"],
}
]
}
response = requests.post(self.scenex_api_url, headers=headers, json=payload)
response.raise_for_status()
result = response.json().get("result", [])
img = result[0] if result else {}
return img.get("text", "")
@root_validator(pre=True)
def validate_environment(cls, values: Dict) -> Dict:
"""Validate that api key exists in environment."""
scenex_api_key = get_from_dict_or_env(
values, "scenex_api_key", "SCENEX_API_KEY"
)
values["scenex_api_key"] = scenex_api_key
return values
def run(self, image: str) -> str:
"""Run SceneXplain image explainer."""
description = self._describe_image(image)
if not description:
return "No description found."
return description