refactor: handle file

This commit is contained in:
adldotori 2023-03-18 06:05:02 +00:00
parent 073b22927a
commit 71e9489ff0
13 changed files with 306 additions and 194 deletions

2
.gitignore vendored
View File

@ -4,4 +4,6 @@ __pycache__/
.env
image/
audio/
video/
dataframe/

16
Dockerfile Normal file
View File

@ -0,0 +1,16 @@
FROM nvidia/cuda:11.7.0-runtime-ubuntu20.04
WORKDIR /app/
RUN \
apt-get update && \
apt-get install -y python3 python3-pip
RUN apt-get install uvicorn -y
RUN pip install --upgrade pip
COPY requirements.txt .
RUN pip install -r requirements.txt
COPY . .
ENTRYPOINT ["sleep", "infinity"]
# ENTRYPOINT ["python3", "-m", "uvicorn", "main:app", "--reload", "--host=0.0.0.0", "--port=8000"]

38
README.md Normal file
View File

@ -0,0 +1,38 @@
# Usage
### S3
1. Create a bucket.
2. Turn off the "Block all public access" setting for the bucket. ![image](assets/block_public_access.png)
3. Add the following text to Bucket Policy.
```json
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "AllowPublicRead",
"Effect": "Allow",
"Principal": {
"AWS": "*"
},
"Action": "s3:GetObject",
"Resource": "arn:aws:s3:::{your-bucket-name}/*"
}
]
}
```
## Environment
You must need this environments.
```
OPENAI_API_KEY
```
You need this environments.
```
serpapi: SERPAPI_API_KEY
bing-search: BING_SEARCH_URL, BING_SUBSCRIPTION_KEY
```

79
agent.py Normal file
View File

@ -0,0 +1,79 @@
from typing import Dict, List, Tuple
from llm import ChatOpenAI
from langchain.agents import load_tools
from langchain.agents.agent import AgentExecutor
from langchain.agents.tools import Tool
from langchain.agents.initialize import initialize_agent
from langchain.chains.conversation.memory import ConversationBufferMemory
from utils import AWESOMEGPT_PREFIX, AWESOMEGPT_SUFFIX
from tools.cpu import (
RequestsGet,
WineDB,
ExitConversation,
)
from tools.gpu import (
ImageEditing,
InstructPix2Pix,
Text2Image,
ImageCaptioning,
VisualQuestionAnswering,
)
from handler import Handler, FileType
def get_agent() -> Tuple[AgentExecutor, Handler]:
print("Initializing AwesomeGPT")
llm = ChatOpenAI(temperature=0)
tools = [
*load_tools(
["python_repl", "terminal", "serpapi", "wikipedia", "bing-search"],
llm=llm,
),
]
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
models = {
"RequestsGet": RequestsGet(),
"WineDB": WineDB(),
"ExitConversation": ExitConversation(memory),
"Text2Image": Text2Image("cuda"),
"ImageEditing": ImageEditing("cuda"),
"InstructPix2Pix": InstructPix2Pix("cuda"),
"VisualQuestionAnswering": VisualQuestionAnswering("cuda"),
}
for _, instance in models.items():
for e in dir(instance):
if e.startswith("inference"):
func = getattr(instance, e)
tools.append(
Tool(name=func.name, description=func.description, func=func)
)
handle_models: Dict[FileType, str] = {
FileType.IMAGE: ImageCaptioning("cuda"),
}
handler = Handler(
handle_func={
file_type: model.inference for file_type, model in handle_models.items()
}
)
return (
initialize_agent(
tools,
llm,
agent="chat-conversational-react-description",
verbose=True,
memory=memory,
agent_kwargs={
"system_message": AWESOMEGPT_PREFIX,
"human_message": AWESOMEGPT_SUFFIX,
},
),
handler,
)

Binary file not shown.

After

Width:  |  Height:  |  Size: 145 KiB

16
docker-compose.yml Normal file
View File

@ -0,0 +1,16 @@
version: "3"
services:
awesomegpt:
build:
dockerfile: Dockerfile
context: .
env_file:
- .env
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["3"] # You can choose which GPU to use
capabilities: [gpu]

87
file.py
View File

@ -1,87 +0,0 @@
import os
import requests
import uuid
from typing import Callable
from enum import Enum
from PIL import Image
import pandas as pd
from utils import IMAGE_PROMPT, DATAFRAME_PROMPT
from tools import IMAGE_MODEL
class FileType(Enum):
IMAGE = "image"
AUDIO = "audio"
VIDEO = "video"
DATAFRAME = "dataframe"
UNKNOWN = "unknown"
def handle(file_name: str) -> Callable:
"""
Parse file type from file name (ex. image, audio, video, dataframe, etc.)
"""
file_name = file_name.split("?")[0]
if file_name.endswith(".png") or file_name.endswith(".jpg"):
return handle_image
elif file_name.endswith(".mp3") or file_name.endswith(".wav"):
return handle_audio
elif file_name.endswith(".mp4") or file_name.endswith(".avi"):
return handle_video
elif file_name.endswith(".csv"):
return handle_dataframe
else:
return handle_unknown
def handle_image(i: int, file: str) -> str:
img_data = requests.get(file).content
filename = os.path.join("image", str(uuid.uuid4())[0:8] + ".png")
with open(filename, "wb") as f:
size = f.write(img_data)
print(f"Inputs: {file} ({size//1000}MB) => {filename}")
img = Image.open(filename)
width, height = img.size
ratio = min(512 / width, 512 / height)
width_new, height_new = (round(width * ratio), round(height * ratio))
img = img.resize((width_new, height_new))
img = img.convert("RGB")
img.save(filename, "PNG")
print(f"Resize image form {width}x{height} to {width_new}x{height_new}")
try:
description = IMAGE_MODEL.inference(filename)
except Exception as e:
return {"text": "image upload", "response": str(e), "additional": []}
return IMAGE_PROMPT.format(i=i, filename=filename, description=description)
def handle_audio(i: int, file: str) -> str:
return ""
def handle_video(i: int, file: str) -> str:
return ""
def handle_dataframe(i: int, file: str) -> str:
content = requests.get(file).content
filename = os.path.join("dataframe/", str(uuid.uuid4())[0:8] + ".csv")
with open(filename, "wb") as f:
size = f.write(content)
print(f"Inputs: {file} ({size//1000}MB) => {filename}")
df = pd.read_csv(filename)
try:
description = str(df.describe())
except Exception as e:
return {"text": "image upload", "response": str(e), "additional": []}
return DATAFRAME_PROMPT.format(i=i, filename=filename, description=description)
def handle_unknown(i: int, file: str) -> str:
return ""

89
handler.py Normal file
View File

@ -0,0 +1,89 @@
import os
import requests
import uuid
from typing import Callable, Dict
from enum import Enum
from PIL import Image
import pandas as pd
from utils import IMAGE_PROMPT, DATAFRAME_PROMPT
class FileType(Enum):
IMAGE = "image"
AUDIO = "audio"
VIDEO = "video"
DATAFRAME = "dataframe"
UNKNOWN = "unknown"
class Handler:
def __init__(self, handle_func: Dict[FileType, Callable]):
self.handle_func = handle_func
def handle(self, i: int, file_name: str) -> str:
"""
Parse file type from file name (ex. image, audio, video, dataframe, etc.)
"""
file_type = file_name.split("?")[0]
if file_type.endswith(".png") or file_type.endswith(".jpg"):
return self.handle_image(i, file_name)
elif file_type.endswith(".mp3") or file_type.endswith(".wav"):
return self.handle_audio(i, file_name)
elif file_type.endswith(".mp4") or file_type.endswith(".avi"):
return self.handle_video(i, file_name)
elif file_type.endswith(".csv"):
return self.handle_dataframe(i, file_name)
else:
return self.handle_unknown(i, file_name)
def handle_image(self, i: int, remote_filename: str) -> str:
img_data = requests.get(remote_filename).content
local_filename = os.path.join("image", str(uuid.uuid4())[0:8] + ".png")
with open(local_filename, "wb") as f:
size = f.write(img_data)
print(f"Inputs: {remote_filename} ({size//1000}MB) => {local_filename}")
img = Image.open(local_filename)
width, height = img.size
ratio = min(512 / width, 512 / height)
width_new, height_new = (round(width * ratio), round(height * ratio))
img = img.resize((width_new, height_new))
img = img.convert("RGB")
img.save(local_filename, "PNG")
print(f"Resize image form {width}x{height} to {width_new}x{height_new}")
try:
description = self.handle_func[FileType.IMAGE](local_filename)
except Exception as e:
return "Error: " + str(e)
return IMAGE_PROMPT.format(
i=i, filename=local_filename, description=description
)
def handle_audio(self, i: int, remote_filename: str) -> str:
return ""
def handle_video(self, i: int, remote_filename: str) -> str:
return ""
def handle_dataframe(self, i: int, remote_filename: str) -> str:
content = requests.get(remote_filename).content
local_filename = os.path.join("dataframe/", str(uuid.uuid4())[0:8] + ".csv")
with open(local_filename, "wb") as f:
size = f.write(content)
print(f"Inputs: {remote_filename} ({size//1000}MB) => {local_filename}")
df = pd.read_csv(local_filename)
try:
description = str(df.describe())
except Exception as e:
return "Error: " + str(e)
return DATAFRAME_PROMPT.format(
i=i, filename=local_filename, description=description
)
def handle_unknown(self, i: int, file: str) -> str:
return ""

84
main.py
View File

@ -1,115 +1,73 @@
from typing import List, TypedDict, Callable
from typing import List, TypedDict
import re
from langchain.agents import load_tools
from langchain.agents.initialize import initialize_agent
from langchain.agents.tools import Tool
from fastapi import FastAPI
from pydantic import BaseModel
from dotenv import load_dotenv
from s3 import upload
from llm import ChatOpenAI
from file import handle
from utils import (
AWESOMEGPT_PREFIX,
AWESOMEGPT_SUFFIX,
ERROR_PROMPT,
)
from tools import AWESOME_MODEL, memory
load_dotenv()
from utils import ERROR_PROMPT
from agent import get_agent
app = FastAPI()
print("Initializing AwesomeGPT")
llm = ChatOpenAI(temperature=0)
tools = [
*load_tools(
["python_repl", "serpapi", "wikipedia", "bing-search"],
llm=llm,
),
]
for class_name, instance in AWESOME_MODEL.items():
for e in dir(instance):
if e.startswith("inference"):
func = getattr(instance, e)
tools.append(Tool(name=func.name, description=func.description, func=func))
agent = initialize_agent(
tools,
llm,
agent="chat-conversational-react-description",
verbose=True,
memory=memory,
agent_kwargs={
"system_message": AWESOMEGPT_PREFIX,
"human_message": AWESOMEGPT_SUFFIX,
},
)
agent, handler = get_agent()
class Request(BaseModel):
text: str
state: List[str]
files: List[str]
key: str
query: str
files: List[str]
class Response(TypedDict):
text: str
response: str
additional: List[str]
files: List[str]
@app.get("/")
async def index():
return {"message": "Hello World"}
return {"message": "Hello World. I'm AwesomeGPT."}
@app.post("/command")
async def command(request: Request) -> Response:
text = request.text
state = request.state
query = request.query
files = request.files
key = request.key
print("=============== Running =============")
print("Inputs:", text, state, files)
print("Inputs:", query, files)
# TODO - add state to memory (use key)
print("======>Previous memory:\n %s" % agent.memory)
promptedText = ""
promptedQuery = ""
import time
for i, file in enumerate(files):
promptedText += handle(file)(i + 1, file)
promptedQuery += handler.handle(i + 1, file)
promptedText += text
promptedQuery += query
print("======>Prompted Text:\n %s" % promptedText)
print("======>Prompted Text:\n %s" % promptedQuery)
try:
res = agent({"input": promptedText})
res = agent({"input": promptedQuery})
except Exception as e:
try:
res = agent(
{
"input": ERROR_PROMPT.format(promptedText=promptedText, e=str(e)),
"input": ERROR_PROMPT.format(promptedQuery=promptedQuery, e=str(e)),
}
)
except Exception as e:
return {"text": promptedText, "response": str(e), "additional": []}
return {"response": str(e), "files": []}
images = re.findall("(image/\S*png)", res["output"])
dataframes = re.findall("(dataframe/\S*csv)", res["output"])
return {
"text": promptedText,
"response": res["output"],
"additional": [upload(image) for image in images],
"files": [upload(image) for image in images]
+ [upload(dataframe) for dataframe in dataframes],
}

View File

@ -6,6 +6,10 @@ langchain
fastapi
boto3
llama_index
torch==1.13.1+cu117
torch
transformers
diffusers
python_dotenv
google-search-results
psycopg2-binary
wikipedia

View File

@ -1,22 +1,12 @@
from langchain.chains.conversation.memory import ConversationBufferMemory
from utils import prompts
from env import settings
from vfm import (
ImageEditing,
InstructPix2Pix,
Text2Image,
ImageCaptioning,
VisualQuestionAnswering,
)
import requests
from llama_index.readers.database import DatabaseReader
from llama_index import GPTSimpleVectorIndex
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
from langchain.memory.chat_memory import BaseChatMemory
class RequestsGet:
@ -79,6 +69,9 @@ class WineDB:
class ExitConversation:
def __init__(self, memory: BaseChatMemory):
self.memory = memory
@prompts(
name="exit_conversation",
description="A tool to exit the conversation. "
@ -87,19 +80,5 @@ class ExitConversation:
)
def inference(self, query: str) -> str:
"""Run the tool."""
memory.chat_memory.messages = []
self.memory.chat_memory.messages = []
return ""
IMAGE_MODEL = ImageCaptioning("cuda:3")
AWESOME_MODEL = {
"RequestsGet": RequestsGet(),
"WineDB": WineDB(),
"ExitConversation": ExitConversation(),
"Text2Image": Text2Image("cuda:3"),
"ImageEditing": ImageEditing("cuda:3"),
"InstructPix2Pix": InstructPix2Pix("cuda:3"),
"VisualQuestionAnswering": VisualQuestionAnswering("cuda:3"),
}

View File

@ -8,29 +8,44 @@ from langchain.output_parsers.base import BaseOutputParser
IMAGE_PROMPT = """
{i}th image: provide a figure named {filename}. The description is: {description}.
"""
{i}th file: provide a figure named {filename}. The description is: {description}.
DATAFRAME_PROMPT = """
{i}th dataframe: provide a dataframe named {filename}. The description is: {description}.
"""
IMAGE_SUFFIX = """
Please understand and answer the image based on this information. The image understanding is complete, so don't try to understand the image again.
"""
AWESOMEGPT_PREFIX = """Awesome GPT is designed to be able to assist with a wide range of text and visual related tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. Awesome GPT is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.
Awesome GPT is able to process and understand large amounts of text and images. As a language model, Awesome GPT can not directly read images, but it has a list of tools to finish different visual tasks.
AUDIO_PROMPT = """
{i}th file: provide a audio named {filename}. The description is: {description}.
Please understand and answer the audio based on this information. The audio understanding is complete, so don't try to understand the audio again.
"""
VIDEO_PROMPT = """
{i}th file: provide a video named {filename}. The description is: {description}.
Please understand and answer the video based on this information. The video understanding is complete, so don't try to understand the video again.
"""
DATAFRAME_PROMPT = """
{i}th file: provide a dataframe named {filename}. The description is: {description}.
You are able to use the dataframe to answer the question.
You have to act like an data analyst who can do an effective analysis through dataframe.
"""
AWESOMEGPT_PREFIX = """Awesome GPT is designed to be able to assist with a wide range of text and visual related tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics.
Awesome GPT is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.
Awesome GPT is able to process and understand large amounts of various types of files. As a language model, Awesome GPT can not directly read various types of files, but it has a list of tools to finish different visual tasks.
Each image will have a file name formed as "image/xxx.png"
Each audio will have a file name formed as "audio/xxx.mp3"
Each video will have a file name formed as "video/xxx.mp4"
Each dataframe will have a file name formed as "dataframe/xxx.csv"
Awesome GPT can invoke different tools to indirectly understand pictures. When talking about images, Awesome GPT is very strict to the file name and will never fabricate nonexistent files. When using tools to generate new image files, Awesome GPT is also known that the image may not be the same as the user's demand, and will use other visual question answering tools or description tools to observe the real image. Awesome GPT is able to use tools in a sequence, and is loyal to the tool observation outputs rather than faking the image content and image file name. It will remember to provide the file name from the last tool observation, if a new image is generated.
Human may provide new figures to Awesome GPT with a description. The description helps Awesome GPT to understand this image, but Awesome GPT should use tools to finish following tasks, rather than directly imagine from the description.
Awesome GPT can invoke different tools to indirectly understand files. When talking about files, Awesome GPT is very strict to the file name and will never fabricate nonexistent files.
When using tools to generate new files, Awesome GPT is also known that the file may not be the same as the user's demand, and will use other visual question answering tools or description tools to observe the real file.
Awesome GPT is able to use tools in a sequence, and is loyal to the tool observation outputs rather than faking the file content and file name. It will remember to provide the file name from the last tool observation, if a new file is generated.
Human may provide new figures to Awesome GPT with a description. The description helps Awesome GPT to understand this file, but Awesome GPT should use tools to finish following tasks, rather than directly imagine from the description.
Overall, Awesome GPT is a powerful visual dialogue assistant tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics."""
@ -38,7 +53,8 @@ AWESOMEGPT_SUFFIX = """TOOLS
------
Awesome GPT can ask the user to use tools to look up information that may be helpful in answering the users original question.
You are very strict to the filename correctness and will never fake a file name if it does not exist.
You will remember to provide the image file name loyally if it's provided in the last tool observation.
You will remember to provide the file name loyally if it's provided in the last tool observation.
The tools the human can use are:
{{tools}}
@ -51,10 +67,12 @@ Here is the user's input (remember to respond with a markdown code snippet of a
{{{{input}}}}"""
ERROR_PROMPT = "An error has occurred for the following text: \n{promptedText} Please explain this error.\n {e}"
ERROR_PROMPT = "An error has occurred for the following text: \n{promptedQuery} Please explain this error.\n {e}"
os.makedirs("image", exist_ok=True)
os.makedirs("audio", exist_ok=True)
os.makedirs("video", exist_ok=True)
os.makedirs("dataframe", exist_ok=True)