diff --git a/docs/extras/integrations/tools/dalle_image_generator.ipynb b/docs/extras/integrations/tools/dalle_image_generator.ipynb new file mode 100644 index 0000000000..83f424b15e --- /dev/null +++ b/docs/extras/integrations/tools/dalle_image_generator.ipynb @@ -0,0 +1,181 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Dall-E Image Generator\n", + "\n", + "This notebook shows how you can generate images from a prompt synthesized using an OpenAI LLM. The images are generated using Dall-E, which uses the same OpenAI API key as the LLM." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Needed if you would like to display images in the notebook\n", + "!pip install opencv-python scikit-image" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "q-k8wmp0zquh" + }, + "outputs": [], + "source": [ + "from langchain.llms import OpenAI\n", + "import os\n", + "os.environ[\"OPENAI_API_KEY\"] = \"\"" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run as a chain" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.utilities.dalle_image_generator import DallEAPIWrapper\n", + "from langchain.prompts import PromptTemplate\n", + "from langchain.chains import LLMChain\n", + "\n", + "llm = OpenAI(temperature=0.9)\n", + "prompt = PromptTemplate(\n", + " input_variables=[\"image_desc\"],\n", + " template=\"Generate a detailed prompt to generate an image based on the following description: {image_desc}\",\n", + ")\n", + "chain = LLMChain(llm=llm, prompt=prompt)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://oaidalleapiprodscus.blob.core.windows.net/private/org-rocrupyvzgcl4yf25rqq6d1v/user-WsxrbKyP2c8rfhCKWDyMfe8N/img-mg1OWiziXxQN1aR2XRsLNndg.png?st=2023-01-31T07%3A34%3A15Z&se=2023-01-31T09%3A34%3A15Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2023-01-30T22%3A19%3A44Z&ske=2023-01-31T22%3A19%3A44Z&sks=b&skv=2021-08-06&sig=XDPee5aEng%2BcbXq2mqhh39uHGZTBmJgGAerSd0g%2BMEs%3D\n" + ] + } + ], + "source": [ + "image_url = DallEAPIWrapper().run(chain.run(\"halloween night at a haunted museum\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# You can click on the link above to display the image for\n", + "# Or you can try the options below to display the image inline in this notebook\n", + "\n", + "try:\n", + " import google.colab\n", + " IN_COLAB = True\n", + "except:\n", + " IN_COLAB = False\n", + "\n", + "if IN_COLAB:\n", + " from google.colab.patches import cv2_imshow # for image display\n", + " from skimage import io\n", + "\n", + " image = io.imread(image_url) \n", + " cv2_imshow(image)\n", + "else:\n", + " import cv2\n", + " from skimage import io\n", + "\n", + " image = io.imread(image_url) \n", + " cv2.imshow('image', image)\n", + " cv2.waitKey(0) #wait for a keyboard input\n", + " cv2.destroyAllWindows()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run as a tool with an agent" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3m What is the best way to turn this description into an image?\n", + "Action: Dall-E Image Generator\n", + "Action Input: A spooky Halloween night at a haunted museum\u001b[0mhttps://oaidalleapiprodscus.blob.core.windows.net/private/org-rocrupyvzgcl4yf25rqq6d1v/user-WsxrbKyP2c8rfhCKWDyMfe8N/img-ogKfqxxOS5KWVSj4gYySR6FY.png?st=2023-01-31T07%3A38%3A25Z&se=2023-01-31T09%3A38%3A25Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2023-01-30T22%3A19%3A36Z&ske=2023-01-31T22%3A19%3A36Z&sks=b&skv=2021-08-06&sig=XsomxxBfu2CP78SzR9lrWUlbask4wBNnaMsHamy4VvU%3D\n", + "\n", + "Observation: \u001b[36;1m\u001b[1;3mhttps://oaidalleapiprodscus.blob.core.windows.net/private/org-rocrupyvzgcl4yf25rqq6d1v/user-WsxrbKyP2c8rfhCKWDyMfe8N/img-ogKfqxxOS5KWVSj4gYySR6FY.png?st=2023-01-31T07%3A38%3A25Z&se=2023-01-31T09%3A38%3A25Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2023-01-30T22%3A19%3A36Z&ske=2023-01-31T22%3A19%3A36Z&sks=b&skv=2021-08-06&sig=XsomxxBfu2CP78SzR9lrWUlbask4wBNnaMsHamy4VvU%3D\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m With the image generated, I can now make my final answer.\n", + "Final Answer: An image of a Halloween night at a haunted museum can be seen here: https://oaidalleapiprodscus.blob.core.windows.net/private/org-rocrupyvzgcl4yf25rqq6d1v/user-WsxrbKyP2c8rfhCKWDyMfe8N/img-ogKfqxxOS5KWVSj4gYySR6FY.png?st=2023-01-31T07%3A38%3A25Z&se=2023-01-31T09%3A38%3A25Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2023-01-30T22\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + } + ], + "source": [ + "from langchain.agents import load_tools\n", + "from langchain.agents import initialize_agent\n", + "\n", + "tools = load_tools(['dalle-image-generator'])\n", + "agent = initialize_agent(tools, llm, agent=\"zero-shot-react-description\", verbose=True)\n", + "output = agent.run(\"Create an image of a halloween night at a haunted museum\")" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "langchain", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + }, + "vscode": { + "interpreter": { + "hash": "3570c8892273ffbeee7ead61dc7c022b73551d9f55fb2584ac0e8e8920b18a89" + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/libs/langchain/langchain/agents/load_tools.py b/libs/langchain/langchain/agents/load_tools.py index a7b8e86062..8fc93e45ae 100644 --- a/libs/langchain/langchain/agents/load_tools.py +++ b/libs/langchain/langchain/agents/load_tools.py @@ -11,6 +11,7 @@ from langchain.callbacks.manager import Callbacks from langchain.chains.api import news_docs, open_meteo_docs, podcast_docs, tmdb_docs from langchain.chains.api.base import APIChain from langchain.chains.llm_math.base import LLMMathChain +from langchain.utilities.dalle_image_generator import DallEAPIWrapper from langchain.utilities.requests import TextRequestsWrapper from langchain.tools.arxiv.tool import ArxivQueryRun from langchain.tools.golden_query.tool import GoldenQueryRun @@ -221,6 +222,14 @@ def _get_serpapi(**kwargs: Any) -> BaseTool: ) +def _get_dalle_image_generator(**kwargs: Any) -> Tool: + return Tool( + "Dall-E Image Generator", + DallEAPIWrapper(**kwargs).run, + "A wrapper around OpenAI DALL-E API. Useful for when you need to generate images from a text description. Input should be an image description.", + ) + + def _get_twilio(**kwargs: Any) -> BaseTool: return Tool( name="Text Message", @@ -305,6 +314,7 @@ _EXTRA_OPTIONAL_TOOLS: Dict[str, Tuple[Callable[[KwArg(Any)], BaseTool], List[st ["serper_api_key", "aiosession"], ), "serpapi": (_get_serpapi, ["serpapi_api_key", "aiosession"]), + "dalle-image-generator": (_get_dalle_image_generator, ["openai_api_key"]), "twilio": (_get_twilio, ["account_sid", "auth_token", "from_number"]), "searx-search": (_get_searx_search, ["searx_host", "engines", "aiosession"]), "wikipedia": (_get_wikipedia, ["top_k_results", "lang"]), diff --git a/libs/langchain/langchain/utilities/dalle_image_generator.py b/libs/langchain/langchain/utilities/dalle_image_generator.py new file mode 100644 index 0000000000..9f4d8c19c8 --- /dev/null +++ b/libs/langchain/langchain/utilities/dalle_image_generator.py @@ -0,0 +1,61 @@ +"""Util that calls OpenAI's Dall-E Image Generator.""" +from typing import Any, Dict, Optional + +from pydantic import BaseModel, Extra, root_validator + +from langchain.utils import get_from_dict_or_env + + +class DallEAPIWrapper(BaseModel): + """Wrapper for OpenAI's DALL-E Image Generator. + + Docs for using: + 1. pip install openai + 2. save your OPENAI_API_KEY in an environment variable + + """ + + client: Any #: :meta private: + openai_api_key: Optional[str] = None + """number of images to generate""" + n: int = 1 + """size of image to generate""" + size: str = "1024x1024" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + def _dalle_image_url(self, prompt: str) -> str: + params = {"prompt": prompt, "n": self.n, "size": self.size} + response = self.client.create(**params) + return response["data"][0]["url"] + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + openai_api_key = get_from_dict_or_env( + values, "openai_api_key", "OPENAI_API_KEY" + ) + try: + import openai + + openai.api_key = openai_api_key + values["client"] = openai.Image + except ImportError: + raise ValueError( + "Could not import openai python package. " + "Please it install it with `pip install openai`." + ) + return values + + def run(self, query: str) -> str: + """Run query through OpenAI and parse result.""" + image_url = self._dalle_image_url(query) + + if image_url is None or image_url == "": + # We don't want to return the assumption alone if answer is empty + return "No image was generated" + else: + return image_url diff --git a/libs/langchain/tests/integration_tests/chains/test_dalle_agent.py b/libs/langchain/tests/integration_tests/chains/test_dalle_agent.py new file mode 100644 index 0000000000..246c2c2c6e --- /dev/null +++ b/libs/langchain/tests/integration_tests/chains/test_dalle_agent.py @@ -0,0 +1,16 @@ +"""Integration test for Dall-E image generator agent.""" +from langchain.agents import AgentType, initialize_agent, load_tools +from langchain.llms import OpenAI + + +def test_call() -> None: + """Test that the agent runs and returns output.""" + llm = OpenAI(temperature=0.9) + tools = load_tools(["dalle-image-generator"]) + + agent = initialize_agent( + tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True + ) + + output = agent.run("Create an image of a volcano island") + assert output is not None diff --git a/libs/langchain/tests/integration_tests/test_dalle.py b/libs/langchain/tests/integration_tests/test_dalle.py new file mode 100644 index 0000000000..d51cc13ff7 --- /dev/null +++ b/libs/langchain/tests/integration_tests/test_dalle.py @@ -0,0 +1,9 @@ +"""Integration test for DallE API Wrapper.""" +from langchain.utilities.dalle_image_generator import DallEAPIWrapper + + +def test_call() -> None: + """Test that call returns a URL in the output.""" + search = DallEAPIWrapper() + output = search.run("volcano island") + assert "https://oaidalleapi" in output