LLaMA2 with JSON schema support template (#12435)

pull/12437/head
Lance Martin 7 months ago committed by GitHub
parent 134f085824
commit 05bbf943f2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -0,0 +1,13 @@
# Extraction with LLaMA2 Function Calling
This template shows how to do extraction of structured data from unstructured data, using LLaMA2 [fine-tuned for grammars and jsonschema](https://replicate.com/andreasjansson/llama-2-13b-chat-gguf).
Specify the scehma you want to extract in `chain.py`
By default, it will extract the title and author of papers.
## LLM
This template will use `Replicate` [hosted version](https://replicate.com/andreasjansson/llama-2-13b-chat-gguf) of LLaMA.
Be sure that `REPLICATE_API_TOKEN` is set in your environment.

@ -0,0 +1,71 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "9faf648c-541e-4368-82a8-96287dbf34de",
"metadata": {},
"source": [
"## Document Loading\n",
"\n",
"Load a blog post on agents."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "662a843a-49e8-40ec-bd32-0f44bc4159a1",
"metadata": {},
"outputs": [],
"source": [
"from langchain.document_loaders import WebBaseLoader\n",
"loader = WebBaseLoader(\"https://lilianweng.github.io/posts/2023-06-23-agent/\")\n",
"text = loader.load()"
]
},
{
"cell_type": "markdown",
"id": "67306dbd-d79c-4723-825e-7d88edb811ba",
"metadata": {},
"source": [
"## Run Template\n",
"\n",
"In `server.py`, set -\n",
"```\n",
"add_routes(app, chain_ext, path=\"/llama2_functions\")\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3668ba4b-254e-4a3b-bfb5-53242572cb1b",
"metadata": {},
"outputs": [],
"source": [
"from langserve.client import RemoteRunnable\n",
"llama2_function = RemoteRunnable('http://0.0.0.0:8001/llama2_functions')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

@ -0,0 +1,3 @@
from llama2_functions.chain import chain
__all__ = ["chain"]

@ -0,0 +1,47 @@
from langchain.chat_models import ChatOpenAI
from langchain.llms import Replicate
from langchain.prompts import ChatPromptTemplate
# LLM
replicate_id = "andreasjansson/llama-2-13b-chat-gguf:60ec5dda9ff9ee0b6f786c9d1157842e6ab3cc931139ad98fe99e08a35c5d4d4" # noqa: E501
model = Replicate(
model=replicate_id,
model_kwargs={"temperature": 0.8,
"max_length": 500,
"top_p": 0.95},
)
# Prompt with output schema specification
template = """A article will be passed to you. Extract from it all papers that are mentioned by this article.
Do not extract the name of the article itself. If no papers are mentioned that's fine - you don't need to extract any! Just return an empty list.
Do not make up or guess ANY extra information. Only extract what exactly is in the text.
Respond with json that adheres to the following jsonschema:
{{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {{
"author": {{
"type": "string",
"description": "The author of the paper."
}},
"title": {{
"type": "string",
"description": "The title of the paper."
}}
}},
"required": ["author", "title"],
"additionalProperties": false
}}""" # noqa: E501
prompt = ChatPromptTemplate.from_messages([("system", template), ("human", "{input}")])
# Chain
model = ChatOpenAI()
chain = (
prompt
| model
)

File diff suppressed because it is too large Load Diff

@ -0,0 +1,19 @@
[tool.poetry]
name = "llama2-functions"
version = "0.1.0"
description = ""
authors = ["Lance Martin <lance@langchain.dev>"]
readme = "README.md"
[tool.poetry.dependencies]
python = ">=3.8.1,<4.0"
langchain = ">=0.0.313, <0.1"
replicate = ">=0.15.4"
[tool.langserve]
export_module = "llama2_functions"
export_attr = "chain"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

@ -12,7 +12,8 @@ from langchain.utilities import SQLDatabase
# File name and URL
file_name = "mistral-7b-instruct-v0.1.Q4_K_M.gguf"
url = "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf"
url = ("https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/"
"mistral-7b-instruct-v0.1.Q4_K_M.gguf")
# Check if file is present in the current directory
if not os.path.exists(file_name):
print(f"'{file_name}' not found. Downloading...")

Loading…
Cancel
Save