cr

Add a utility to generate image from a prompt (OpenAI DALL-E) (#784 )
This is a utility that allows you to generate an image from a prompt. It uses the OpenAI DALL-E image generator, which can take the same API key as the LLM. The output is a link to the generated image, which can be downloaded to a file or rendered depending on the use case. By default a single image of resolution 1024x1024 is generated.
2023-02-02 08:48:31 -08:00 · 2023-02-02 08:48:03 -08:00
5 changed files with 277 additions and 0 deletions
--- a/docs/modules/utils/examples/dalle_image_generator.ipynb
+++ b/docs/modules/utils/examples/dalle_image_generator.ipynb
@ -0,0 +1,181 @@
+{
+  "cells": [
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# Dall-E Image Generator\n",
+        "\n",
+        "This notebook shows how you can generate images from a prompt synthesized using an OpenAI LLM. The images are generated using Dall-E, which uses the same OpenAI API key as the LLM."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Needed if you would like to display images in the notebook\n",
+        "!pip install opencv-python scikit-image"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 4,
+      "metadata": {
+        "id": "q-k8wmp0zquh"
+      },
+      "outputs": [],
+      "source": [
+        "from langchain.llms import OpenAI\n",
+        "import os\n",
+        "os.environ[\"OPENAI_API_KEY\"] = \"<your-key-here>\""
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Run as a chain"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 5,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from langchain.utilities.dalle_image_generator import DallEAPIWrapper\n",
+        "from langchain.prompts import PromptTemplate\n",
+        "from langchain.chains import LLMChain\n",
+        "\n",
+        "llm = OpenAI(temperature=0.9)\n",
+        "prompt = PromptTemplate(\n",
+        "    input_variables=[\"image_desc\"],\n",
+        "    template=\"Generate a detailed prompt to generate an image based on the following description: {image_desc}\",\n",
+        ")\n",
+        "chain = LLMChain(llm=llm, prompt=prompt)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 7,
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "https://oaidalleapiprodscus.blob.core.windows.net/private/org-rocrupyvzgcl4yf25rqq6d1v/user-WsxrbKyP2c8rfhCKWDyMfe8N/img-mg1OWiziXxQN1aR2XRsLNndg.png?st=2023-01-31T07%3A34%3A15Z&se=2023-01-31T09%3A34%3A15Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2023-01-30T22%3A19%3A44Z&ske=2023-01-31T22%3A19%3A44Z&sks=b&skv=2021-08-06&sig=XDPee5aEng%2BcbXq2mqhh39uHGZTBmJgGAerSd0g%2BMEs%3D\n"
+          ]
+        }
+      ],
+      "source": [
+        "image_url = DallEAPIWrapper().run(chain.run(\"halloween night at a haunted museum\"))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# You can click on the link above to display the image for\n",
+        "# Or you can try the options below to display the image inline in this notebook\n",
+        "\n",
+        "try:\n",
+        "    import google.colab\n",
+        "    IN_COLAB = True\n",
+        "except:\n",
+        "    IN_COLAB = False\n",
+        "\n",
+        "if IN_COLAB:\n",
+        "    from google.colab.patches import cv2_imshow # for image display\n",
+        "    from skimage import io\n",
+        "\n",
+        "    image = io.imread(image_url) \n",
+        "    cv2_imshow(image)\n",
+        "else:\n",
+        "    import cv2\n",
+        "    from skimage import io\n",
+        "\n",
+        "    image = io.imread(image_url) \n",
+        "    cv2.imshow('image', image)\n",
+        "    cv2.waitKey(0)   #wait for a keyboard input\n",
+        "    cv2.destroyAllWindows()\n"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Run as a tool with an agent"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 9,
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "\n",
+            "\n",
+            "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
+            "\u001b[32;1m\u001b[1;3m What is the best way to turn this description into an image?\n",
+            "Action: Dall-E Image Generator\n",
+            "Action Input: A spooky Halloween night at a haunted museum\u001b[0mhttps://oaidalleapiprodscus.blob.core.windows.net/private/org-rocrupyvzgcl4yf25rqq6d1v/user-WsxrbKyP2c8rfhCKWDyMfe8N/img-ogKfqxxOS5KWVSj4gYySR6FY.png?st=2023-01-31T07%3A38%3A25Z&se=2023-01-31T09%3A38%3A25Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2023-01-30T22%3A19%3A36Z&ske=2023-01-31T22%3A19%3A36Z&sks=b&skv=2021-08-06&sig=XsomxxBfu2CP78SzR9lrWUlbask4wBNnaMsHamy4VvU%3D\n",
+            "\n",
+            "Observation: \u001b[36;1m\u001b[1;3mhttps://oaidalleapiprodscus.blob.core.windows.net/private/org-rocrupyvzgcl4yf25rqq6d1v/user-WsxrbKyP2c8rfhCKWDyMfe8N/img-ogKfqxxOS5KWVSj4gYySR6FY.png?st=2023-01-31T07%3A38%3A25Z&se=2023-01-31T09%3A38%3A25Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2023-01-30T22%3A19%3A36Z&ske=2023-01-31T22%3A19%3A36Z&sks=b&skv=2021-08-06&sig=XsomxxBfu2CP78SzR9lrWUlbask4wBNnaMsHamy4VvU%3D\u001b[0m\n",
+            "Thought:\u001b[32;1m\u001b[1;3m With the image generated, I can now make my final answer.\n",
+            "Final Answer: An image of a Halloween night at a haunted museum can be seen here: https://oaidalleapiprodscus.blob.core.windows.net/private/org-rocrupyvzgcl4yf25rqq6d1v/user-WsxrbKyP2c8rfhCKWDyMfe8N/img-ogKfqxxOS5KWVSj4gYySR6FY.png?st=2023-01-31T07%3A38%3A25Z&se=2023-01-31T09%3A38%3A25Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2023-01-30T22\u001b[0m\n",
+            "\n",
+            "\u001b[1m> Finished chain.\u001b[0m\n"
+          ]
+        }
+      ],
+      "source": [
+        "from langchain.agents import load_tools\n",
+        "from langchain.agents import initialize_agent\n",
+        "\n",
+        "tools = load_tools(['dalle-image-generator'])\n",
+        "agent = initialize_agent(tools, llm, agent=\"zero-shot-react-description\", verbose=True)\n",
+        "output = agent.run(\"Create an image of a halloween night at a haunted museum\")"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "langchain",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.9.16"
+    },
+    "vscode": {
+      "interpreter": {
+        "hash": "3570c8892273ffbeee7ead61dc7c022b73551d9f55fb2584ac0e8e8920b18a89"
+      }
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
--- a/langchain/agents/load_tools.py
+++ b/langchain/agents/load_tools.py
@ -12,6 +12,7 @@ from langchain.python import PythonREPL
 from langchain.requests import RequestsWrapper
 from langchain.serpapi import SerpAPIWrapper
 from langchain.utilities.bash import BashProcess
+from langchain.utilities.dalle_image_generator import DallEAPIWrapper
 from langchain.utilities.google_search import GoogleSearchAPIWrapper
 from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper

@ -130,6 +131,14 @@ def _get_google_search(**kwargs: Any) -> Tool:
    )


+def _get_dalle_image_generator(**kwargs: Any) -> Tool:
+    return Tool(
+        "Dall-E Image Generator",
+        DallEAPIWrapper(**kwargs).run,
+        "A wrapper around OpenAI DALL-E API. Useful for when you need to generate images from a text description. Input should be an image description.",
+    )
+
+
 def _get_serpapi(**kwargs: Any) -> Tool:
    return Tool(
        "Search",
@ -145,6 +154,7 @@ _EXTRA_LLM_TOOLS = {
 _EXTRA_OPTIONAL_TOOLS = {
    "wolfram-alpha": (_get_wolfram_alpha, ["wolfram_alpha_appid"]),
    "google-search": (_get_google_search, ["google_api_key", "google_cse_id"]),
+    "dalle-image-generator": (_get_dalle_image_generator, ["openai_api_key"]),
    "serpapi": (_get_serpapi, ["serpapi_api_key"]),
 }

--- a/langchain/utilities/dalle_image_generator.py
+++ b/langchain/utilities/dalle_image_generator.py
@ -0,0 +1,61 @@
+"""Util that calls OpenAI's Dall-E Image Generator."""
+from typing import Any, Dict, Optional
+
+from pydantic import BaseModel, Extra, root_validator
+
+from langchain.utils import get_from_dict_or_env
+
+
+class DallEAPIWrapper(BaseModel):
+    """Wrapper for OpenAI's DALL-E Image Generator.
+
+    Docs for using:
+    1. pip install openai
+    2. save your OPENAI_API_KEY in an environment variable
+
+    """
+
+    client: Any  #: :meta private:
+    openai_api_key: Optional[str] = None
+    """number of images to generate"""
+    n: int = 1
+    """size of image to generate"""
+    size: str = "1024x1024"
+
+    class Config:
+        """Configuration for this pydantic object."""
+
+        extra = Extra.forbid
+
+    def _dalle_image_url(self, prompt: str) -> str:
+        params = {"prompt": prompt, "n": self.n, "size": self.size}
+        response = self.client.create(**params)
+        return response["data"][0]["url"]
+
+    @root_validator()
+    def validate_environment(cls, values: Dict) -> Dict:
+        """Validate that api key and python package exists in environment."""
+        openai_api_key = get_from_dict_or_env(
+            values, "openai_api_key", "OPENAI_API_KEY"
+        )
+        try:
+            import openai
+
+            openai.api_key = openai_api_key
+            values["client"] = openai.Image
+        except ImportError:
+            raise ValueError(
+                "Could not import openai python package. "
+                "Please it install it with `pip install openai`."
+            )
+        return values
+
+    def run(self, query: str) -> str:
+        """Run query through OpenAI and parse result."""
+        image_url = self._dalle_image_url(query)
+
+        if image_url is None or image_url == "":
+            # We don't want to return the assumption alone if answer is empty
+            return "No image was generated"
+        else:
+            return image_url
--- a/tests/integration_tests/chains/test_dalle_agent.py
+++ b/tests/integration_tests/chains/test_dalle_agent.py
@ -0,0 +1,16 @@
+"""Integration test for Dall-E image generator agent."""
+from langchain.agents import initialize_agent, load_tools
+from langchain.llms import OpenAI
+
+
+def test_call() -> None:
+    """Test that the agent runs and returns output."""
+    llm = OpenAI(temperature=0.9)
+    tools = load_tools(["dalle-image-generator"])
+
+    agent = initialize_agent(
+        tools, llm, agent="zero-shot-react-description", verbose=True
+    )
+
+    output = agent.run("Create an image of a volcano island")
+    assert output is not None
--- a/tests/integration_tests/test_dalle.py
+++ b/tests/integration_tests/test_dalle.py
@ -0,0 +1,9 @@
+"""Integration test for DallE API Wrapper."""
+from langchain.utilities.dalle_image_generator import DallEAPIWrapper
+
+
+def test_call() -> None:
+    """Test that call returns a URL in the output."""
+    search = DallEAPIWrapper()
+    output = search.run("volcano island")
+    assert "https://oaidalleapi" in output