mirror of https://github.com/corca-ai/EVAL
refactor: handlers and tools
parent
2904d5fbf2
commit
83095ec2ce
@ -1,89 +0,0 @@
|
||||
import os
|
||||
import requests
|
||||
import uuid
|
||||
from typing import Callable, Dict
|
||||
from enum import Enum
|
||||
|
||||
from PIL import Image
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from utils import IMAGE_PROMPT, DATAFRAME_PROMPT
|
||||
|
||||
|
||||
class FileType(Enum):
|
||||
IMAGE = "image"
|
||||
AUDIO = "audio"
|
||||
VIDEO = "video"
|
||||
DATAFRAME = "dataframe"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
|
||||
class Handler:
|
||||
def __init__(self, handle_func: Dict[FileType, Callable]):
|
||||
self.handle_func = handle_func
|
||||
|
||||
def handle(self, i: int, file_name: str) -> str:
|
||||
"""
|
||||
Parse file type from file name (ex. image, audio, video, dataframe, etc.)
|
||||
"""
|
||||
file_type = file_name.split("?")[0]
|
||||
|
||||
if file_type.endswith(".png") or file_type.endswith(".jpg"):
|
||||
return self.handle_image(i, file_name)
|
||||
elif file_type.endswith(".mp3") or file_type.endswith(".wav"):
|
||||
return self.handle_audio(i, file_name)
|
||||
elif file_type.endswith(".mp4") or file_type.endswith(".avi"):
|
||||
return self.handle_video(i, file_name)
|
||||
elif file_type.endswith(".csv"):
|
||||
return self.handle_dataframe(i, file_name)
|
||||
else:
|
||||
return self.handle_unknown(i, file_name)
|
||||
|
||||
def handle_image(self, i: int, remote_filename: str) -> str:
|
||||
img_data = requests.get(remote_filename).content
|
||||
local_filename = os.path.join("image", str(uuid.uuid4())[0:8] + ".png")
|
||||
with open(local_filename, "wb") as f:
|
||||
size = f.write(img_data)
|
||||
print(f"Inputs: {remote_filename} ({size//1000}MB) => {local_filename}")
|
||||
img = Image.open(local_filename)
|
||||
width, height = img.size
|
||||
ratio = min(512 / width, 512 / height)
|
||||
width_new, height_new = (round(width * ratio), round(height * ratio))
|
||||
img = img.resize((width_new, height_new))
|
||||
img = img.convert("RGB")
|
||||
img.save(local_filename, "PNG")
|
||||
print(f"Resize image form {width}x{height} to {width_new}x{height_new}")
|
||||
try:
|
||||
description = self.handle_func[FileType.IMAGE](local_filename)
|
||||
except Exception as e:
|
||||
return "Error: " + str(e)
|
||||
|
||||
return IMAGE_PROMPT.format(
|
||||
i=i, filename=local_filename, description=description
|
||||
)
|
||||
|
||||
def handle_audio(self, i: int, remote_filename: str) -> str:
|
||||
return ""
|
||||
|
||||
def handle_video(self, i: int, remote_filename: str) -> str:
|
||||
return ""
|
||||
|
||||
def handle_dataframe(self, i: int, remote_filename: str) -> str:
|
||||
content = requests.get(remote_filename).content
|
||||
local_filename = os.path.join("dataframe/", str(uuid.uuid4())[0:8] + ".csv")
|
||||
with open(local_filename, "wb") as f:
|
||||
size = f.write(content)
|
||||
print(f"Inputs: {remote_filename} ({size//1000}MB) => {local_filename}")
|
||||
df = pd.read_csv(local_filename)
|
||||
try:
|
||||
description = str(df.describe())
|
||||
except Exception as e:
|
||||
return "Error: " + str(e)
|
||||
|
||||
return DATAFRAME_PROMPT.format(
|
||||
i=i, filename=local_filename, description=description
|
||||
)
|
||||
|
||||
def handle_unknown(self, i: int, file: str) -> str:
|
||||
return ""
|
@ -0,0 +1,75 @@
|
||||
import os
|
||||
import requests
|
||||
import uuid
|
||||
from typing import Dict
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class FileType(Enum):
|
||||
IMAGE = "image"
|
||||
AUDIO = "audio"
|
||||
VIDEO = "video"
|
||||
DATAFRAME = "dataframe"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
@staticmethod
|
||||
def from_filename(url: str) -> "FileType":
|
||||
filename = url.split("?")[0]
|
||||
|
||||
if filename.endswith(".png") or filename.endswith(".jpg"):
|
||||
return FileType.IMAGE
|
||||
elif filename.endswith(".mp3") or filename.endswith(".wav"):
|
||||
return FileType.AUDIO
|
||||
elif filename.endswith(".mp4") or filename.endswith(".avi"):
|
||||
return FileType.VIDEO
|
||||
elif filename.endswith(".csv"):
|
||||
return FileType.DATAFRAME
|
||||
else:
|
||||
return FileType.UNKNOWN
|
||||
|
||||
@staticmethod
|
||||
def from_url(url: str) -> "FileType":
|
||||
return FileType.from_filename(url.split("?")[0])
|
||||
|
||||
def to_extension(self) -> str:
|
||||
if self == FileType.IMAGE:
|
||||
return ".png"
|
||||
elif self == FileType.AUDIO:
|
||||
return ".mp3"
|
||||
elif self == FileType.VIDEO:
|
||||
return ".mp4"
|
||||
elif self == FileType.DATAFRAME:
|
||||
return ".csv"
|
||||
else:
|
||||
return ".unknown"
|
||||
|
||||
|
||||
class BaseHandler:
|
||||
def handle(self, filename: str) -> str:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class FileHandler:
|
||||
def __init__(self, handlers: Dict[FileType, BaseHandler]):
|
||||
self.handlers = handlers
|
||||
|
||||
def register(self, filetype: FileType, handler: BaseHandler) -> "FileHandler":
|
||||
self.handlers[filetype] = handler
|
||||
return self
|
||||
|
||||
def download(self, url: str) -> str:
|
||||
filetype = FileType.from_url(url)
|
||||
data = requests.get(url).content
|
||||
local_filename = os.path.join(
|
||||
filetype.value, str(uuid.uuid4())[0:8] + filetype.to_extension()
|
||||
)
|
||||
with open(local_filename, "wb") as f:
|
||||
size = f.write(data)
|
||||
print(f"Inputs: {url} ({size//1000}MB) => {local_filename}")
|
||||
return local_filename
|
||||
|
||||
def handle(self, url: str) -> str:
|
||||
try:
|
||||
return self.handlers[FileType.from_url(url)].handle(self.download(url))
|
||||
except Exception as e:
|
||||
return "Error: " + str(e)
|
@ -0,0 +1,11 @@
|
||||
import pandas as pd
|
||||
from prompts.file import DATAFRAME_PROMPT
|
||||
|
||||
from .base import BaseHandler
|
||||
|
||||
|
||||
class CsvToDataframe(BaseHandler):
|
||||
def handle(self, filename: str):
|
||||
df = pd.read_csv(filename)
|
||||
description = str(df.describe())
|
||||
return DATAFRAME_PROMPT.format(filename=filename, description=description)
|
@ -0,0 +1,43 @@
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers import (
|
||||
BlipProcessor,
|
||||
BlipForConditionalGeneration,
|
||||
)
|
||||
from prompts.file import IMAGE_PROMPT
|
||||
|
||||
from .base import BaseHandler
|
||||
|
||||
|
||||
class ImageCaptioning(BaseHandler):
|
||||
def __init__(self, device):
|
||||
print("Initializing ImageCaptioning to %s" % device)
|
||||
self.device = device
|
||||
self.torch_dtype = torch.float16 if "cuda" in device else torch.float32
|
||||
self.processor = BlipProcessor.from_pretrained(
|
||||
"Salesforce/blip-image-captioning-base"
|
||||
)
|
||||
self.model = BlipForConditionalGeneration.from_pretrained(
|
||||
"Salesforce/blip-image-captioning-base", torch_dtype=self.torch_dtype
|
||||
).to(self.device)
|
||||
|
||||
def handle(self, filename: str):
|
||||
img = Image.open(filename)
|
||||
width, height = img.size
|
||||
ratio = min(512 / width, 512 / height)
|
||||
width_new, height_new = (round(width * ratio), round(height * ratio))
|
||||
img = img.resize((width_new, height_new))
|
||||
img = img.convert("RGB")
|
||||
img.save(filename, "PNG")
|
||||
print(f"Resize image form {width}x{height} to {width_new}x{height_new}")
|
||||
|
||||
inputs = self.processor(Image.open(filename), return_tensors="pt").to(
|
||||
self.device, self.torch_dtype
|
||||
)
|
||||
out = self.model.generate(**inputs)
|
||||
description = self.processor.decode(out[0], skip_special_tokens=True)
|
||||
print(
|
||||
f"\nProcessed ImageCaptioning, Input Image: {filename}, Output Text: {description}"
|
||||
)
|
||||
|
||||
return IMAGE_PROMPT.format(filename=filename, description=description)
|
@ -0,0 +1 @@
|
||||
ERROR_PROMPT = "An error has occurred for the following text: \n{promptedQuery} Please explain this error.\n {e}"
|
@ -0,0 +1,25 @@
|
||||
IMAGE_PROMPT = """
|
||||
provide a figure named {filename}. The description is: {description}.
|
||||
|
||||
Please understand and answer the image based on this information. The image understanding is complete, so don't try to understand the image again.
|
||||
"""
|
||||
|
||||
|
||||
AUDIO_PROMPT = """
|
||||
provide a audio named {filename}. The description is: {description}.
|
||||
|
||||
Please understand and answer the audio based on this information. The audio understanding is complete, so don't try to understand the audio again.
|
||||
"""
|
||||
|
||||
VIDEO_PROMPT = """
|
||||
provide a video named {filename}. The description is: {description}.
|
||||
|
||||
Please understand and answer the video based on this information. The video understanding is complete, so don't try to understand the video again.
|
||||
"""
|
||||
|
||||
DATAFRAME_PROMPT = """
|
||||
provide a dataframe named {filename}. The description is: {description}.
|
||||
|
||||
You are able to use the dataframe to answer the question.
|
||||
You have to act like an data analyst who can do an effective analysis through dataframe.
|
||||
"""
|
@ -0,0 +1,33 @@
|
||||
AWESOMEGPT_PREFIX = """Awesome GPT is designed to be able to assist with a wide range of text, visual related tasks, data analysis related tasks, auditory related tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics.
|
||||
Awesome GPT is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.
|
||||
Awesome GPT is able to process and understand large amounts of various types of files(image, audio, video, dataframe, etc.). As a language model, Awesome GPT can not directly read various types of files(text, image, audio, video, dataframe, etc.), but it has a list of tools to finish different visual tasks.
|
||||
|
||||
Each image will have a file name formed as "image/xxx.png"
|
||||
Each audio will have a file name formed as "audio/xxx.mp3"
|
||||
Each video will have a file name formed as "video/xxx.mp4"
|
||||
Each dataframe will have a file name formed as "dataframe/xxx.csv"
|
||||
|
||||
Awesome GPT can invoke different tools to indirectly understand files(image, audio, video, dataframe, etc.). When talking about files(image, audio, video, dataframe, etc.), Awesome GPT is very strict to the file name and will never fabricate nonexistent files.
|
||||
When using tools to generate new files, Awesome GPT is also known that the file(image, audio, video, dataframe, etc.) may not be the same as the user's demand, and will use other visual question answering tools or description tools to observe the real file.
|
||||
Awesome GPT is able to use tools in a sequence, and is loyal to the tool observation outputs rather than faking the file content and file name. It will remember to provide the file name from the last tool observation, if a new file is generated.
|
||||
Human may provide new figures to Awesome GPT with a description. The description helps Awesome GPT to understand this file, but Awesome GPT should use tools to finish following tasks, rather than directly imagine from the description.
|
||||
|
||||
Overall, Awesome GPT is a powerful visual dialogue assistant tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics."""
|
||||
|
||||
AWESOMEGPT_SUFFIX = """TOOLS
|
||||
------
|
||||
Awesome GPT can ask the user to use tools to look up information that may be helpful in answering the users original question.
|
||||
You are very strict to the filename correctness and will never fake a file name if it does not exist.
|
||||
You will remember to provide the file name loyally if it's provided in the last tool observation.
|
||||
|
||||
The tools the human can use are:
|
||||
|
||||
{{tools}}
|
||||
|
||||
{format_instructions}
|
||||
|
||||
USER'S INPUT
|
||||
--------------------
|
||||
Here is the user's input (remember to respond with a markdown code snippet of a json blob with a single action, and NOTHING else):
|
||||
|
||||
{{{{input}}}}"""
|
@ -0,0 +1,19 @@
|
||||
from langchain.agents.tools import Tool, BaseTool
|
||||
|
||||
|
||||
def tool(name, description):
|
||||
def decorator(func):
|
||||
func.name = name
|
||||
func.description = description
|
||||
func.is_tool = True
|
||||
return func
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
class BaseToolSet:
|
||||
def to_tools(cls) -> list[BaseTool]:
|
||||
method_tools = [getattr(cls, m) for m in dir(cls) if m.is_tool]
|
||||
return [
|
||||
Tool(name=m.name, description=m.description, func=m) for m in method_tools
|
||||
]
|
@ -0,0 +1,20 @@
|
||||
from typing import Optional
|
||||
|
||||
from langchain.llms.base import BaseLLM
|
||||
from langchain.agents import load_tools
|
||||
from langchain.agents.tools import BaseTool
|
||||
|
||||
from .base import BaseToolSet
|
||||
|
||||
|
||||
class ToolsFactory:
|
||||
@staticmethod
|
||||
def from_toolsets(toolsets: list[BaseToolSet]) -> list[BaseTool]:
|
||||
tools = []
|
||||
for toolset in toolsets:
|
||||
tools.extend(toolset.to_tools())
|
||||
return tools
|
||||
|
||||
@staticmethod
|
||||
def from_names(toolnames: list[str], llm: Optional[BaseLLM]) -> list[BaseTool]:
|
||||
return load_tools(toolnames, llm=llm)
|
Loading…
Reference in New Issue