initial
commit
472d14919f
@ -0,0 +1 @@
|
||||
.ipynb_checkpoints
|
@ -0,0 +1,348 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "2XVP2VXIL1"
|
||||
},
|
||||
"source": [
|
||||
"# Chains\n",
|
||||
"\n",
|
||||
"Chaining LLMs with each other or with other experts.\n",
|
||||
"\n",
|
||||
"## Getting Started\n",
|
||||
"\n",
|
||||
"- Using the simple LLM chain\n",
|
||||
"- Creating sequential chains\n",
|
||||
"- Creating a custom chain\n",
|
||||
"\n",
|
||||
"### Why Use Chains ?\n",
|
||||
"\n",
|
||||
"- combine multiple components together\n",
|
||||
"- ex: take user input, format with PromptTemplate, pass formatted text to LLM.\n",
|
||||
"\n",
|
||||
"## Query an LLM with LLMChain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "DPRWRo3fl7"
|
||||
},
|
||||
"source": [
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"import pprint as pp\n",
|
||||
"\n",
|
||||
"llm = OpenAI(temperature=0.9)\n",
|
||||
"prompt = PromptTemplate(\n",
|
||||
" input_variables=[\"product\"],\n",
|
||||
" template=\"What is a good name for a company that makes {product}\"\n",
|
||||
" )"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "tOpTb9idHh"
|
||||
},
|
||||
"source": [
|
||||
"We can now create a simple chain that takes user input format it and pass to LLM"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "QXu2N1dEEC"
|
||||
},
|
||||
"source": [
|
||||
"from langchain.chains import LLMChain\n",
|
||||
"chain = LLMChain(llm=llm, prompt=prompt, output_key='company_name')\n",
|
||||
"\n",
|
||||
"# run the chain only specifying input variables\n",
|
||||
"print(chain.run(\"hand crafted handbags\"))\n",
|
||||
"\n",
|
||||
"# NOTE: we pass data to the run of the entry chain (see sequence under)"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "\n\nUrban Crafts Co.\n"
|
||||
}
|
||||
],
|
||||
"execution_count": 1
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "Kv6bj1l9I3"
|
||||
},
|
||||
"source": [
|
||||
"## Combining chains with SequentialChain\n",
|
||||
"\n",
|
||||
"Chains that execute their links in predefined order.\n",
|
||||
"\n",
|
||||
"- SimpleSequentialChain: simplest form, each step has a single input/output. \n",
|
||||
"Output of one step is input to next.\n",
|
||||
"- SequentialChain: More advanced, multiple inputs/outputs.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Following tutorial uses SimpleSequentialChain and SequentialChain, each chains output is input to the next one.\n",
|
||||
"This sequential chain will:\n",
|
||||
" 1. create company name for a product. We just use LLMChain for that\n",
|
||||
" 2. Create a catchphrase for the product. We will use a new LLMChain for the catchphrase, as show below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "BMZLsdY9VP"
|
||||
},
|
||||
"source": [
|
||||
"second_prompt = PromptTemplate(\n",
|
||||
" input_variables=[\"company_name\"],\n",
|
||||
" template=\"Write a catchphrase for the following company: {company_name}\",\n",
|
||||
" )\n",
|
||||
"chain_two = LLMChain(llm=llm, prompt=second_prompt, output_key='catchphrase')"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "epQHxmeWCP"
|
||||
},
|
||||
"source": [
|
||||
"We now combine the two chains to create company name and catch phrase."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "SHwDHjVCxb"
|
||||
},
|
||||
"source": [
|
||||
"from langchain.chains import SimpleSequentialChain, SequentialChain"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "lKgp9HR0VX"
|
||||
},
|
||||
"source": [
|
||||
"full_chain = SimpleSequentialChain(\n",
|
||||
" chains=[chain, chain_two], verbose=True,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"print(full_chain.run(\"hand crafted handbags\"))"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "RiYcYwJhdC"
|
||||
},
|
||||
"source": [
|
||||
"---\n",
|
||||
"\n",
|
||||
"In the third prompt we create an small advertisement with the title and the product description"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "RhnqOumOtX"
|
||||
},
|
||||
"source": [
|
||||
"ad_template = \"\"\"Create a small advertisement destined for reddit. \n",
|
||||
"The advertisement is for a company with the following details:\n",
|
||||
"\n",
|
||||
"name: {company_name}\n",
|
||||
"product: {product}\n",
|
||||
"catchphrase: {catchphrase}\n",
|
||||
"\n",
|
||||
"advertisement:\n",
|
||||
"\"\"\"\n",
|
||||
"ad_prompt = PromptTemplate(\n",
|
||||
" input_variables=[\"product\", \"company_name\", \"catchphrase\"],\n",
|
||||
" template=ad_template,\n",
|
||||
" )"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "MsQnieyxgL"
|
||||
},
|
||||
"source": [
|
||||
"#Connet the three chains together\n",
|
||||
"\n",
|
||||
"ad_chain = LLMChain(llm=llm, prompt=ad_prompt, output_key='advertisement')"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "4PYfwOxTlq"
|
||||
},
|
||||
"source": [
|
||||
"final_chain = SequentialChain(\n",
|
||||
" chains=[chain, chain_two, ad_chain],\n",
|
||||
" input_variables=['product'],\n",
|
||||
" output_variables=['advertisement'],\n",
|
||||
" verbose=True\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"ad = final_chain.run('Professional Cat Cuddler')"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "\n\n\u001b[1m> Entering new SequentialChain chain...\u001b[0m\n\n\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
}
|
||||
],
|
||||
"execution_count": 2
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "2akm8eB1EV"
|
||||
},
|
||||
"source": [
|
||||
"print(ad)"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "Are you in need of a little indulgence? Then come to Purr-fect Pampering! Our professional cat cuddler will provide you with the ultimate relaxation experience. We guarantee that after a session with us, you'll be feeling more purr-fect than ever! Treat yourself to the luxurious indulgence of Purr-fect Pampering!\n"
|
||||
}
|
||||
],
|
||||
"execution_count": 3
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "1iT7gBMABZ"
|
||||
},
|
||||
"source": [
|
||||
"## Creating a custom chain\n",
|
||||
"\n",
|
||||
"Example: create a custom chain that concats output of 2 LLMChain\n",
|
||||
"\n",
|
||||
"Steps:\n",
|
||||
" 1. Subclass Chain class\n",
|
||||
" 2. Fill out `input_keys` and `output_keys`\n",
|
||||
" 3. add the `_call` method that shows how to execute chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "OUXv7kGtDH"
|
||||
},
|
||||
"source": [
|
||||
"from langchain.chains import LLMChain\n",
|
||||
"from langchain.chains.base import Chain\n",
|
||||
"\n",
|
||||
"from typing import Dict, List\n",
|
||||
"\n",
|
||||
"class ConcatenateChain(Chain):\n",
|
||||
" chain_1: LLMChain\n",
|
||||
" chain_2: LLMChain\n",
|
||||
"\n",
|
||||
" @property\n",
|
||||
" def input_keys(self) -> List[str]:\n",
|
||||
" # Union of the input keys of the two chains\n",
|
||||
" all_inputs_vars = set(self.chain_1.input_keys).union(\n",
|
||||
" set(self.chain_2.input_keys))\n",
|
||||
" return list(all_inputs_vars)\n",
|
||||
"\n",
|
||||
" @property\n",
|
||||
" def output_keys(self) -> List[str]:\n",
|
||||
" return ['concat_output']\n",
|
||||
"\n",
|
||||
" def _call(self, inputs: Dict[str, str]) -> Dict[str,str]:\n",
|
||||
" output_1 = self.chain_1.run(inputs)\n",
|
||||
" output_2 = self.chain_2.run(inputs)\n",
|
||||
" return {'concat_output': output_1 + output_2}"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "MUOMbKovF6"
|
||||
},
|
||||
"source": [
|
||||
"Running the custom chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "kBfPU3rB6L"
|
||||
},
|
||||
"source": [
|
||||
"prompt_1 = PromptTemplate(\n",
|
||||
" input_variables=['product'],\n",
|
||||
" template='what is a good name for a company that makes {product}?'\n",
|
||||
" )\n",
|
||||
"chain_1 = LLMChain(llm=llm, prompt=prompt_1)\n",
|
||||
"\n",
|
||||
"prompt_2 = PromptTemplate(\n",
|
||||
" input_variables=['product'],\n",
|
||||
" template='what is a good slogan for a company that makes {product} ?'\n",
|
||||
" )\n",
|
||||
"chain_2 = LLMChain(llm=llm, prompt=prompt_2)\n",
|
||||
"\n",
|
||||
"concat_chain = ConcatenateChain(chain_1=chain_1, chain_2=chain_2)\n",
|
||||
"\n",
|
||||
"concat_output = concat_chain.run('leather handbags')\n",
|
||||
"print(f'Concatenated output:\\n{concat_output}')"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "Concatenated output:\n\n\nLeather Luxury Boutique.\n\n\"Handcrafted Leather: The Perfect Accent for Any Look.\"\n"
|
||||
}
|
||||
],
|
||||
"execution_count": 4
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "9CdH3GtsmW"
|
||||
},
|
||||
"source": [],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"anaconda-cloud": {},
|
||||
"kernelspec": {
|
||||
"display_name": "python",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
@ -0,0 +1,193 @@
|
||||
r"""°°°
|
||||
# Chains
|
||||
|
||||
Chaining LLMs with each other or with other experts.
|
||||
|
||||
## Getting Started
|
||||
|
||||
- Using the simple LLM chain
|
||||
- Creating sequential chains
|
||||
- Creating a custom chain
|
||||
|
||||
### Why Use Chains ?
|
||||
|
||||
- combine multiple components together
|
||||
- ex: take user input, format with PromptTemplate, pass formatted text to LLM.
|
||||
|
||||
## Query an LLM with LLMChain
|
||||
|
||||
°°°"""
|
||||
#|%%--%%| <2XVP2VXIL1|DPRWRo3fl7>
|
||||
|
||||
from langchain.prompts import PromptTemplate
|
||||
from langchain.llms import OpenAI
|
||||
import pprint as pp
|
||||
|
||||
llm = OpenAI(temperature=0.9)
|
||||
prompt = PromptTemplate(
|
||||
input_variables=["product"],
|
||||
template="What is a good name for a company that makes {product}"
|
||||
)
|
||||
|
||||
#|%%--%%| <DPRWRo3fl7|tOpTb9idHh>
|
||||
r"""°°°
|
||||
We can now create a simple chain that takes user input format it and pass to LLM
|
||||
°°°"""
|
||||
#|%%--%%| <tOpTb9idHh|QXu2N1dEEC>
|
||||
|
||||
from langchain.chains import LLMChain
|
||||
chain = LLMChain(llm=llm, prompt=prompt, output_key='company_name')
|
||||
|
||||
# run the chain only specifying input variables
|
||||
print(chain.run("hand crafted handbags"))
|
||||
|
||||
# NOTE: we pass data to the run of the entry chain (see sequence under)
|
||||
|
||||
#|%%--%%| <QXu2N1dEEC|Kv6bj1l9I3>
|
||||
r"""°°°
|
||||
## Combining chains with SequentialChain
|
||||
|
||||
Chains that execute their links in predefined order.
|
||||
|
||||
- SimpleSequentialChain: simplest form, each step has a single input/output.
|
||||
Output of one step is input to next.
|
||||
- SequentialChain: More advanced, multiple inputs/outputs.
|
||||
|
||||
|
||||
Following tutorial uses SimpleSequentialChain and SequentialChain, each chains output is input to the next one.
|
||||
This sequential chain will:
|
||||
1. create company name for a product. We just use LLMChain for that
|
||||
2. Create a catchphrase for the product. We will use a new LLMChain for the catchphrase, as show below.
|
||||
°°°"""
|
||||
#|%%--%%| <Kv6bj1l9I3|BMZLsdY9VP>
|
||||
|
||||
second_prompt = PromptTemplate(
|
||||
input_variables=["company_name"],
|
||||
template="Write a catchphrase for the following company: {company_name}",
|
||||
)
|
||||
chain_two = LLMChain(llm=llm, prompt=second_prompt, output_key='catchphrase')
|
||||
|
||||
#|%%--%%| <BMZLsdY9VP|epQHxmeWCP>
|
||||
r"""°°°
|
||||
We now combine the two chains to create company name and catch phrase.
|
||||
°°°"""
|
||||
#|%%--%%| <epQHxmeWCP|SHwDHjVCxb>
|
||||
|
||||
from langchain.chains import SimpleSequentialChain, SequentialChain
|
||||
|
||||
#|%%--%%| <SHwDHjVCxb|lKgp9HR0VX>
|
||||
|
||||
full_chain = SimpleSequentialChain(
|
||||
chains=[chain, chain_two], verbose=True,
|
||||
)
|
||||
|
||||
print(full_chain.run("hand crafted handbags"))
|
||||
|
||||
#|%%--%%| <lKgp9HR0VX|RiYcYwJhdC>
|
||||
r"""°°°
|
||||
---
|
||||
|
||||
In the third prompt we create an small advertisement with the title and the product description
|
||||
°°°"""
|
||||
#|%%--%%| <RiYcYwJhdC|RhnqOumOtX>
|
||||
|
||||
ad_template = """Create a small advertisement destined for reddit.
|
||||
The advertisement is for a company with the following details:
|
||||
|
||||
name: {company_name}
|
||||
product: {product}
|
||||
catchphrase: {catchphrase}
|
||||
|
||||
advertisement:
|
||||
"""
|
||||
ad_prompt = PromptTemplate(
|
||||
input_variables=["product", "company_name", "catchphrase"],
|
||||
template=ad_template,
|
||||
)
|
||||
|
||||
#|%%--%%| <RhnqOumOtX|MsQnieyxgL>
|
||||
|
||||
#Connet the three chains together
|
||||
|
||||
ad_chain = LLMChain(llm=llm, prompt=ad_prompt, output_key='advertisement')
|
||||
|
||||
#|%%--%%| <MsQnieyxgL|4PYfwOxTlq>
|
||||
|
||||
final_chain = SequentialChain(
|
||||
chains=[chain, chain_two, ad_chain],
|
||||
input_variables=['product'],
|
||||
output_variables=['advertisement'],
|
||||
verbose=True
|
||||
)
|
||||
|
||||
ad = final_chain.run('Professional Cat Cuddler')
|
||||
#|%%--%%| <4PYfwOxTlq|2akm8eB1EV>
|
||||
|
||||
print(ad)
|
||||
|
||||
#|%%--%%| <2akm8eB1EV|1iT7gBMABZ>
|
||||
r"""°°°
|
||||
## Creating a custom chain
|
||||
|
||||
Example: create a custom chain that concats output of 2 LLMChain
|
||||
|
||||
Steps:
|
||||
1. Subclass Chain class
|
||||
2. Fill out `input_keys` and `output_keys`
|
||||
3. add the `_call` method that shows how to execute chain
|
||||
°°°"""
|
||||
#|%%--%%| <1iT7gBMABZ|OUXv7kGtDH>
|
||||
|
||||
from langchain.chains import LLMChain
|
||||
from langchain.chains.base import Chain
|
||||
|
||||
from typing import Dict, List
|
||||
|
||||
class ConcatenateChain(Chain):
|
||||
chain_1: LLMChain
|
||||
chain_2: LLMChain
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
# Union of the input keys of the two chains
|
||||
all_inputs_vars = set(self.chain_1.input_keys).union(
|
||||
set(self.chain_2.input_keys))
|
||||
return list(all_inputs_vars)
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
return ['concat_output']
|
||||
|
||||
def _call(self, inputs: Dict[str, str]) -> Dict[str,str]:
|
||||
output_1 = self.chain_1.run(inputs)
|
||||
output_2 = self.chain_2.run(inputs)
|
||||
return {'concat_output': output_1 + output_2}
|
||||
|
||||
#|%%--%%| <OUXv7kGtDH|MUOMbKovF6>
|
||||
r"""°°°
|
||||
Running the custom chain
|
||||
°°°"""
|
||||
#|%%--%%| <MUOMbKovF6|kBfPU3rB6L>
|
||||
prompt_1 = PromptTemplate(
|
||||
input_variables=['product'],
|
||||
template='what is a good name for a company that makes {product}?'
|
||||
)
|
||||
chain_1 = LLMChain(llm=llm, prompt=prompt_1)
|
||||
|
||||
prompt_2 = PromptTemplate(
|
||||
input_variables=['product'],
|
||||
template='what is a good slogan for a company that makes {product} ?'
|
||||
)
|
||||
chain_2 = LLMChain(llm=llm, prompt=prompt_2)
|
||||
|
||||
concat_chain = ConcatenateChain(chain_1=chain_1, chain_2=chain_2)
|
||||
|
||||
concat_output = concat_chain.run('leather handbags')
|
||||
print(f'Concatenated output:\n{concat_output}')
|
||||
|
||||
|
||||
#|%%--%%| <kBfPU3rB6L|9CdH3GtsmW>
|
||||
|
||||
|
||||
|
||||
|
File diff suppressed because one or more lines are too long
@ -0,0 +1,74 @@
|
||||
r"""°°°
|
||||
# Document Loaders
|
||||
|
||||
- loading text from local sources
|
||||
- main driver is `Unstructured` python package
|
||||
|
||||
## Key Concepts
|
||||
|
||||
### Document
|
||||
|
||||
container class for document information. contains:
|
||||
- page_content
|
||||
- metadata
|
||||
|
||||
### Loader
|
||||
|
||||
base class to load documents. exposes:
|
||||
- load() -> Document
|
||||
|
||||
|
||||
## Setup Unstructured
|
||||
- host dependencies
|
||||
- poppler: PDF rendering library
|
||||
- Python deps:
|
||||
- Pillow: imaging library
|
||||
°°°"""
|
||||
#|%%--%%| <4yTe29l2Ya|srwyN0cVES>
|
||||
|
||||
# %pip install pillow (already installed)
|
||||
%pip install -q unstructured[local-inference]
|
||||
|
||||
#|%%--%%| <srwyN0cVES|cbFv0eSeXq>
|
||||
|
||||
docs_dir="unstructured-examples"
|
||||
!mkdir -p $docs_dir
|
||||
!wget https://raw.githubusercontent.com/Unstructured-IO/unstructured/main/example-docs/example-10k.html -P $docs_dir
|
||||
!wget https://raw.githubusercontent.com/Unstructured-IO/unstructured/main/example-docs/layout-parser-paper.pdf -P $docs_dir
|
||||
|
||||
|
||||
#|%%--%%| <cbFv0eSeXq|U633RkWjYq>
|
||||
r"""°°°
|
||||
[repo link](https://github.com/Unstructured-IO/unstructured#coffee-getting-started)
|
||||
The easiest way to parse a document in unstructured is to use the partition brick. If you use partition brick, unstructured will detect the file type and route it to the appropriate file-specific partitioning brick. If you are using the partition brick, ensure you first install libmagic using the instructions outlined here partition will always apply the default arguments. If you need advanced features, use a document-specific brick. The partition brick currently works for .txt, .docx, .pptx, .jpg, .png, .eml, .html, and .pdf documents.
|
||||
|
||||
Requires detectonr2 inference (cuda ?)
|
||||
°°°"""
|
||||
#|%%--%%| <U633RkWjYq|FJaYuFeL0U>
|
||||
|
||||
docs_dir="unstructured-examples"
|
||||
#|%%--%%| <FJaYuFeL0U|9MKaXz7Bi4>
|
||||
|
||||
#NOTE: needs inference with facebook's detectron2
|
||||
|
||||
# from unstructured.partition.auto import partition
|
||||
|
||||
# elements = partition(docs_dir + "/layout-parser-paper.pdf")
|
||||
|
||||
|
||||
#|%%--%%| <9MKaXz7Bi4|X4mTVZAzcD>
|
||||
r"""°°°
|
||||
## Unstructured Langchain FileLoader
|
||||
|
||||
Requires detectron2
|
||||
|
||||
°°°"""
|
||||
#|%%--%%| <X4mTVZAzcD|9k0eAtsfvh>
|
||||
|
||||
from langchain.document_loaders import UnstructuredFileLoader
|
||||
|
||||
loader = UnstructuredFileLoader("./unstructured-examples/layout-parser-paper.pdf")
|
||||
|
||||
docs = loader.load()
|
||||
#|%%--%%| <9k0eAtsfvh|1lKP9jNDd4>
|
||||
|
@ -0,0 +1,13 @@
|
||||
{
|
||||
"model_name": "text-ada-001",
|
||||
"temperature": 1.0,
|
||||
"max_tokens": 256,
|
||||
"top_p": 1.0,
|
||||
"frequency_penalty": 0.0,
|
||||
"presence_penalty": 0.0,
|
||||
"n": 2,
|
||||
"best_of": 2,
|
||||
"request_timeout": null,
|
||||
"logit_bias": {},
|
||||
"_type": "openai"
|
||||
}
|
@ -0,0 +1,126 @@
|
||||
r"""°°°
|
||||
# Getting Started
|
||||
°°°"""
|
||||
# |%%--%%| <gOPXQazOh0|yMwLn4kyVM>
|
||||
|
||||
from langchain.llms import OpenAI
|
||||
|
||||
# n: how many completions to generate for each prompt
|
||||
llm = OpenAI(model_name='text-ada-001', n=2, best_of=2, temperature=1)
|
||||
llm("tell me a joke !")
|
||||
|
||||
|
||||
#|%%--%%| <yMwLn4kyVM|5F2CisYISi>
|
||||
r"""°°°
|
||||
you can call it with a list of inputs, getting back a more complete response than just the text. This complete response includes things like multiple top responses, as well as LLM provider specific information.
|
||||
°°°"""
|
||||
#|%%--%%| <5F2CisYISi|4hSGpG9eHG>
|
||||
|
||||
llm_result = llm.generate(["Tell me a joke", "Tell me a poem"] * 15)
|
||||
|
||||
#|%%--%%| <4hSGpG9eHG|kZMW1cS1Qk>
|
||||
|
||||
len(llm_result.generations)
|
||||
|
||||
#llm_result.to_dict() # see result details
|
||||
|
||||
print(llm_result.generations[0])
|
||||
print("\n\n")
|
||||
print(llm_result.generations[-1])
|
||||
llm_result.llm_output
|
||||
|
||||
|
||||
#|%%--%%| <kZMW1cS1Qk|1Kg9Ct1muS>
|
||||
r"""°°°
|
||||
## estimate number of tokens in prompt
|
||||
°°°"""
|
||||
#|%%--%%| <1Kg9Ct1muS|qz7lnBufdW>
|
||||
|
||||
llm.get_num_tokens("what is a joke")
|
||||
|
||||
#|%%--%%| <qz7lnBufdW|Q9fgYuBKEK>
|
||||
r"""°°°
|
||||
# Key Concepts
|
||||
|
||||
- Core method exposed by llms is `generate`: takes list of str returns LLMResult
|
||||
- Can also be called directly with single string as input and returns a stirng
|
||||
- Main result is `LLMResult`, input list of strings -> list of LLMResult
|
||||
Each result is a list of generations (since you can request `n` generations per input str)
|
||||
- `llm_output` contains provider specific ouptput
|
||||
°°°"""
|
||||
#|%%--%%| <Q9fgYuBKEK|3k0hrhqP7F>
|
||||
r"""°°°
|
||||
## LLM Serialization
|
||||
|
||||
Wrinting and reading llms to disk
|
||||
°°°"""
|
||||
#|%%--%%| <3k0hrhqP7F|9V7QPvLLcT>
|
||||
|
||||
from langchain.llms.loading import load_llm
|
||||
|
||||
llm.save("llm.json")
|
||||
|
||||
#|%%--%%| <9V7QPvLLcT|rcdMSwUd3W>
|
||||
|
||||
llm = load_llm("llm.json")
|
||||
|
||||
#|%%--%%| <rcdMSwUd3W|JfkBS05EUP>
|
||||
r"""°°°
|
||||
## Token Usage Tracking
|
||||
|
||||
|
||||
°°°"""
|
||||
#|%%--%%| <JfkBS05EUP|FUnKToXsu6>
|
||||
|
||||
from langchain.callbacks import get_openai_callback
|
||||
|
||||
#|%%--%%| <FUnKToXsu6|9AoUdMfzg7>
|
||||
|
||||
llm = OpenAI(model_name="text-davinci-002", n=2, best_of=2)
|
||||
|
||||
#|%%--%%| <9AoUdMfzg7|aiTKa2iDUx>
|
||||
|
||||
with get_openai_callback() as cb:
|
||||
result = llm("tell me a joke")
|
||||
print(cb.total_tokens)
|
||||
|
||||
#|%%--%%| <aiTKa2iDUx|6G5rwEeItx>
|
||||
r"""°°°
|
||||
Anything inside the context manager will get tracked.
|
||||
|
||||
Example tracking multiple calls
|
||||
°°°"""
|
||||
#|%%--%%| <6G5rwEeItx|uDJtapBCby>
|
||||
|
||||
with get_openai_callback() as cb:
|
||||
result = llm("Tell me a joke")
|
||||
result2 = llm("Tell me a funny joke")
|
||||
print(cb.total_tokens)
|
||||
|
||||
#|%%--%%| <uDJtapBCby|sLuvD8dXnk>
|
||||
r"""°°°
|
||||
If a chain or agent with multiple steps in it is used, it will track all those steps.
|
||||
°°°"""
|
||||
#|%%--%%| <sLuvD8dXnk|uUQOyO8XIw>
|
||||
|
||||
from langchain.agents import load_tools
|
||||
from langchain.agents import initialize_agent
|
||||
from langchain.llms import OpenAI
|
||||
|
||||
llm = OpenAI(temperature=0)
|
||||
tools = load_tools(["searx-search", "llm-math"], llm=llm)
|
||||
agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True)
|
||||
|
||||
#|%%--%%| <uUQOyO8XIw|nv0tixnGfg>
|
||||
|
||||
with get_openai_callback() as cb:
|
||||
res = agent.run("What is the temperature in Paris Berlin and Granada ? \
|
||||
Print every city's temperature in Celcius and Fahrenheit. Think step by step")
|
||||
|
||||
#|%%--%%| <nv0tixnGfg|vpDGqKnagk>
|
||||
|
||||
print(cb.total_tokens)
|
||||
|
||||
# |%%--%%| <vpDGqKnagk|xUoMvs6XZ0>
|
||||
|
||||
|
@ -0,0 +1,104 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "ut22SE2PmJ"
|
||||
},
|
||||
"source": [
|
||||
"## Loading PDF"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "EQL3ZDG6Dt"
|
||||
},
|
||||
"source": [
|
||||
"from langchain.document_loaders import PagedPDFSplitter\n",
|
||||
"\n",
|
||||
"loader = PagedPDFSplitter(\"./documents/layout-parser-paper.pdf\")\n",
|
||||
"pages = loader.load_and_split()"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "6LWg1c7vN6"
|
||||
},
|
||||
"source": [
|
||||
"Documents can be retrived with page numbers"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "0kFnbEI7yL"
|
||||
},
|
||||
"source": [
|
||||
"from langchain.vectorstores import FAISS\n",
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "KkXwCS4JHN"
|
||||
},
|
||||
"source": [
|
||||
"faiss_index = FAISS.from_documents(pages, OpenAIEmbeddings() )\n",
|
||||
"\n",
|
||||
"# Find docs (ie pages) most similar to query\n",
|
||||
"# k: number of docs similar to query\n",
|
||||
"docs = faiss_index.similarity_search(\"How will the community be engaged ?\", k=2)"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "RDajVoEdqh"
|
||||
},
|
||||
"source": [
|
||||
"# get page numbers + content, similar to query \n",
|
||||
"for doc in docs:\n",
|
||||
" print(\"\\n----\\n\")\n",
|
||||
" print(\"page: \" + str(doc.metadata[\"page\"] + 1))\n",
|
||||
" print(\"content:\")\n",
|
||||
" print(str(doc.page_content))"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "\n----\n\npage: 10\ncontent:\n10 Z. Shen et al.\nFig. 4: Illustration of (a) the original historical Japanese document with layout\ndetection results and (b) a recreated version of the document image that achieves\nmuch better character recognition recall. The reorganization algorithm rearranges\nthe tokens based on the their detected bounding boxes given a maximum allowed\nheight.\n4LayoutParser Community Platform\nAnother focus of LayoutParser is promoting the reusability of layout detection\nmodels and full digitization pipelines. Similar to many existing deep learning\nlibraries, LayoutParser comes with a community model hub for distributing\nlayout models. End-users can upload their self-trained models to the model hub,\nand these models can be loaded into a similar interface as the currently available\nLayoutParser pre-trained models. For example, the model trained on the News\nNavigator dataset [17] has been incorporated in the model hub.\nBeyond DL models, LayoutParser also promotes the sharing of entire doc-\nument digitization pipelines. For example, sometimes the pipeline requires the\ncombination of multiple DL models to achieve better accuracy. Currently, pipelines\nare mainly described in academic papers and implementations are often not pub-\nlicly available. To this end, the LayoutParser community platform also enables\nthe sharing of layout pipelines to promote the discussion and reuse of techniques.\nFor each shared pipeline, it has a dedicated project page, with links to the source\ncode, documentation, and an outline of the approaches. A discussion panel is\nprovided for exchanging ideas. Combined with the core LayoutParser library,\nusers can easily build reusable components based on the shared pipelines and\napply them to solve their unique problems.\n5 Use Cases\nThe core objective of LayoutParser is to make it easier to create both large-scale\nand light-weight document digitization pipelines. Large-scale document processing\n\n----\n\npage: 4\ncontent:\n4 Z. Shen et al.\nEfficient Data AnnotationC u s t o m i z e d M o d e l T r a i n i n gModel Cust omizationDI A Model HubDI A Pipeline SharingCommunity PlatformLa y out Detection ModelsDocument Images \nT h e C o r e L a y o u t P a r s e r L i b r a r yOCR ModuleSt or age & VisualizationLa y out Data Structur e\nFig. 1: The overall architecture of LayoutParser . For an input document image,\nthe core LayoutParser library provides a set of o\u000b-the-shelf tools for layout\ndetection, OCR, visualization, and storage, backed by a carefully designed layout\ndata structure. LayoutParser also supports high level customization via e\u000ecient\nlayout annotation and model training functions. These improve model accuracy\non the target samples. The community platform enables the easy sharing of DIA\nmodels and whole digitization pipelines to promote reusability and reproducibility.\nA collection of detailed documentation, tutorials and exemplar projects make\nLayoutParser easy to learn and use.\nAllenNLP [ 8] and transformers [ 34] have provided the community with complete\nDL-based support for developing and deploying models for general computer\nvision and natural language processing problems. LayoutParser , on the other\nhand, specializes speci\fcally in DIA tasks. LayoutParser is also equipped with a\ncommunity platform inspired by established model hubs such as Torch Hub [23]\nandTensorFlow Hub [1]. It enables the sharing of pretrained models as well as\nfull document processing pipelines that are unique to DIA tasks.\nThere have been a variety of document data collections to facilitate the\ndevelopment of DL models. Some examples include PRImA [ 3](magazine layouts),\nPubLayNet [ 38](academic paper layouts), Table Bank [ 18](tables in academic\npapers), Newspaper Navigator Dataset [ 16,17](newspaper \fgure layouts) and\nHJDataset [31](historical Japanese document layouts). A spectrum of models\ntrained on these datasets are currently available in the LayoutParser model zoo\nto support di\u000berent use cases.\n3 The Core LayoutParser Library\nAt the core of LayoutParser is an o\u000b-the-shelf toolkit that streamlines DL-\nbased document image analysis. Five components support a simple interface\nwith comprehensive functionalities: 1) The layout detection models enable using\npre-trained or self-trained DL models for layout detection with just four lines\nof code. 2) The detected layout information is stored in carefully engineered\n"
|
||||
}
|
||||
],
|
||||
"execution_count": 1
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "cqoPocvVBS"
|
||||
},
|
||||
"source": [],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"anaconda-cloud": {},
|
||||
"kernelspec": {
|
||||
"display_name": "python",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
@ -0,0 +1,37 @@
|
||||
r"""°°°
|
||||
## Loading PDF
|
||||
°°°"""
|
||||
#|%%--%%| <ut22SE2PmJ|EQL3ZDG6Dt>
|
||||
|
||||
from langchain.document_loaders import PagedPDFSplitter
|
||||
|
||||
loader = PagedPDFSplitter("./documents/layout-parser-paper.pdf")
|
||||
pages = loader.load_and_split()
|
||||
|
||||
#|%%--%%| <EQL3ZDG6Dt|6LWg1c7vN6>
|
||||
r"""°°°
|
||||
Documents can be retrived with page numbers
|
||||
°°°"""
|
||||
#|%%--%%| <6LWg1c7vN6|0kFnbEI7yL>
|
||||
|
||||
from langchain.vectorstores import FAISS
|
||||
from langchain.embeddings.openai import OpenAIEmbeddings
|
||||
|
||||
#|%%--%%| <0kFnbEI7yL|KkXwCS4JHN>
|
||||
|
||||
faiss_index = FAISS.from_documents(pages, OpenAIEmbeddings() )
|
||||
|
||||
# Find docs (ie pages) most similar to query
|
||||
# k: number of docs similar to query
|
||||
docs = faiss_index.similarity_search("How will the community be engaged ?", k=2)
|
||||
|
||||
#|%%--%%| <KkXwCS4JHN|RDajVoEdqh>
|
||||
# get page numbers + content, similar to query
|
||||
for doc in docs:
|
||||
print("\n----\n")
|
||||
print("page: " + str(doc.metadata["page"] + 1))
|
||||
print("content:")
|
||||
print(str(doc.page_content))
|
||||
|
||||
#|%%--%%| <RDajVoEdqh|cqoPocvVBS>
|
||||
|
@ -0,0 +1,347 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "IagIXdFJ76"
|
||||
},
|
||||
"source": [
|
||||
"# Prompt templates\n",
|
||||
"[see](https://langchain.readthedocs.io/en/latest/modules/prompts/getting_started.html)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"jukit_cell_id": "5rNpKSE97a"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"('\\n'\n",
|
||||
" 'I want you to act as a naming consultant for new companies.\\n'\n",
|
||||
" '\\n'\n",
|
||||
" 'Here are some examples of good company names:\\n'\n",
|
||||
" '\\n'\n",
|
||||
" '- search engine, Google\\n'\n",
|
||||
" '- social media, Facebook\\n'\n",
|
||||
" '- video sharing, YouTube\\n'\n",
|
||||
" '\\n'\n",
|
||||
" 'The name should be short, catchy and easy to remember.\\n'\n",
|
||||
" '\\n'\n",
|
||||
" 'What is a good name for a company that makes cookie?\\n')\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain import PromptTemplate\n",
|
||||
"import pprint as pp\n",
|
||||
"\n",
|
||||
"template = \"\"\"\n",
|
||||
"I want you to act as a naming consultant for new companies.\n",
|
||||
"\n",
|
||||
"Here are some examples of good company names:\n",
|
||||
"\n",
|
||||
"- search engine, Google\n",
|
||||
"- social media, Facebook\n",
|
||||
"- video sharing, YouTube\n",
|
||||
"\n",
|
||||
"The name should be short, catchy and easy to remember.\n",
|
||||
"\n",
|
||||
"What is a good name for a company that makes {product}?\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"prompt = PromptTemplate(\n",
|
||||
" input_variables=[\"product\"],\n",
|
||||
" template=template,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"pp.pp(prompt.format(product='cookie'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"jukit_cell_id": "UH7UDNwwOT"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'tell me a funny joke about bats.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# without inputs\n",
|
||||
"no_input_prompt = PromptTemplate(input_variables=[],\n",
|
||||
" template=\"tell me a joke.\")\n",
|
||||
"no_input_prompt.format()\n",
|
||||
"\n",
|
||||
"# with inputs\n",
|
||||
"multi_input_prompt = PromptTemplate(\n",
|
||||
" input_variables=[\"adjective\", \"content\"],\n",
|
||||
" template=\"tell me a {adjective} joke about {content}.\" \n",
|
||||
" )\n",
|
||||
"multi_input_prompt.format(adjective=\"funny\", content=\"bats\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "m4HfAhbN4T"
|
||||
},
|
||||
"source": [
|
||||
"## Loading prompt templates from LangChainHub "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"jukit_cell_id": "6sAHvM0Vrt"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n",
|
||||
"\n",
|
||||
"Current conversation:\n",
|
||||
"\n",
|
||||
"Human: what is 1 + 1?\n",
|
||||
"AI:\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.prompts import load_prompt\n",
|
||||
"\n",
|
||||
"prompt=load_prompt(\"lc://prompts/conversation/prompt.json\")\n",
|
||||
"#NOTE: is there a helper to quickly build a history ? \n",
|
||||
"print(prompt.format(history=\"\", input=\"what is 1 + 1?\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "u2xOTHeA5E"
|
||||
},
|
||||
"source": [
|
||||
"## Pass few shot examples to prompt template"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"jukit_cell_id": "KO5IXCuzjw"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Give the antonym of every input\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Word: happy\n",
|
||||
"Antonym: sad\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Word: tall\n",
|
||||
"Antonym: short\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Word: fast\n",
|
||||
"Antonym:\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain import FewShotPromptTemplate\n",
|
||||
"\n",
|
||||
"# create a list of few shot examples\n",
|
||||
"examples = [\n",
|
||||
" {\"word\": \"happy\", \"antonym\": \"sad\"},\n",
|
||||
" {\"word\": \"tall\", \"antonym\": \"short\"},\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
"# next e specify a template for format the examples\n",
|
||||
"# we use PromptTemplate class\n",
|
||||
"example_formatter_template = \"\"\"\n",
|
||||
"Word: {word}\n",
|
||||
"Antonym: {antonym}\n",
|
||||
"\"\"\"\n",
|
||||
"example_pr = PromptTemplate(\n",
|
||||
" input_variables=[\"word\", \"antonym\"],\n",
|
||||
" template=example_formatter_template,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"# now we can use FewShotPromptTemplate\n",
|
||||
"few_shot_prompt = FewShotPromptTemplate(\n",
|
||||
" # examples we want to insert in prompt\n",
|
||||
" examples=examples,\n",
|
||||
" # how we want examples to be formatted in prompt\n",
|
||||
" example_prompt=example_pr,\n",
|
||||
" # The prefix is some text that goes before the examples in the prompt.\n",
|
||||
" # Usually, this consists of intructions.\n",
|
||||
" prefix=\"Give the antonym of every input\",\n",
|
||||
" #The suffix is some text that goes after the examples in the prompt.\n",
|
||||
" suffix=\"Word: {input}\\nAntonym:\",\n",
|
||||
" # The input variables are the variables that the overall prompt expects.\n",
|
||||
" input_variables=[\"input\"],\n",
|
||||
" # The example_separator is the string we will use to join the prefix, examples, and suffix together with.\n",
|
||||
" example_separator=\"\\n\\n\",\n",
|
||||
" )\n",
|
||||
"# generate few shot prompt using input\n",
|
||||
"print(few_shot_prompt.format(input=\"fast\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "fV1qxeDncc"
|
||||
},
|
||||
"source": [
|
||||
"## Select examples from prompt template\n",
|
||||
"\n",
|
||||
"- for a large number of exaamples use ExampleSelector to select a subset of\n",
|
||||
" most informative ones for language model.\n",
|
||||
"- LengthBasedExampleSelector selects examples based on length of input.\n",
|
||||
" practical to to construct prompt that do not extend over context window\n",
|
||||
" based on input length"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"jukit_cell_id": "uuIdYaJ4wD"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Give the antonym of every input\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Word: happy\n",
|
||||
"Antonym: sad\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Word: tall\n",
|
||||
"Antonym: short\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Word: energetic\n",
|
||||
"Antonym: lethargic\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Word: sunny\n",
|
||||
"Antonym: gloomy\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Word: big\n",
|
||||
"Antonym:\n",
|
||||
"----------\n",
|
||||
"Give the antonym of every input\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Word: happy\n",
|
||||
"Antonym: sad\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Word: big and huge and massive and large and gigantic and tall and much much much much much bigger than everything else\n",
|
||||
"Antonym:\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.prompts.example_selector import LengthBasedExampleSelector\n",
|
||||
"\n",
|
||||
"#These are a lot of examples of a pretend task of creating antonyms.\n",
|
||||
"examples = [\n",
|
||||
" {\"word\": \"happy\", \"antonym\": \"sad\"},\n",
|
||||
" {\"word\": \"tall\", \"antonym\": \"short\"},\n",
|
||||
" {\"word\": \"energetic\", \"antonym\": \"lethargic\"},\n",
|
||||
" {\"word\": \"sunny\", \"antonym\": \"gloomy\"},\n",
|
||||
" {\"word\": \"windy\", \"antonym\": \"calm\"},\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"example_selector = LengthBasedExampleSelector(\n",
|
||||
" examples=examples,\n",
|
||||
" # This is the PromptTemplate being used to format the examples.\n",
|
||||
" example_prompt=example_pr,\n",
|
||||
" # This is the maximum length that the formatted examples should be.\n",
|
||||
" # Length is measured by the get_text_length function below.\n",
|
||||
" max_length=30,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"# We can now use the `example_selector` to create a `FewShotPromptTemplate`.\n",
|
||||
"dynamic_prompt = FewShotPromptTemplate(\n",
|
||||
" # We provide an ExampleSelector instead of examples.\n",
|
||||
" example_selector=example_selector,\n",
|
||||
" example_prompt=example_pr,\n",
|
||||
" prefix=\"Give the antonym of every input\",\n",
|
||||
" suffix=\"Word: {input}\\nAntonym:\",\n",
|
||||
" input_variables=[\"input\"],\n",
|
||||
" example_separator=\"\\n\\n\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# We can now generate a prompt using the `format` method.\n",
|
||||
"print(dynamic_prompt.format(input=\"big\"))\n",
|
||||
"\n",
|
||||
"print(\"----------\")\n",
|
||||
"\n",
|
||||
"# In contrast, if we provide a very long input, the LengthBasedExampleSelector\n",
|
||||
"# will select fewer examples to include in the prompt.\n",
|
||||
"long_string = \"big and huge and massive and large and gigantic and tall and much much much much much bigger than everything else\"\n",
|
||||
"print(dynamic_prompt.format(input=long_string))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"anaconda-cloud": {},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
@ -0,0 +1,152 @@
|
||||
r"""°°°
|
||||
# Prompt templates
|
||||
[see](https://langchain.readthedocs.io/en/latest/modules/prompts/getting_started.html)
|
||||
°°°"""
|
||||
# |%%--%%| <IagIXdFJ76|5rNpKSE97a>
|
||||
|
||||
from langchain import PromptTemplate
|
||||
import pprint as pp
|
||||
|
||||
template = """
|
||||
I want you to act as a naming consultant for new companies.
|
||||
|
||||
Here are some examples of good company names:
|
||||
|
||||
- search engine, Google
|
||||
- social media, Facebook
|
||||
- video sharing, YouTube
|
||||
|
||||
The name should be short, catchy and easy to remember.
|
||||
|
||||
What is a good name for a company that makes {product}?
|
||||
"""
|
||||
|
||||
prompt = PromptTemplate(
|
||||
input_variables=["product"],
|
||||
template=template,
|
||||
)
|
||||
|
||||
pp.pp(prompt.format(product='cookie'))
|
||||
|
||||
# |%%--%%| <5rNpKSE97a|UH7UDNwwOT>
|
||||
|
||||
# without inputs
|
||||
no_input_prompt = PromptTemplate(input_variables=[],
|
||||
template="tell me a joke.")
|
||||
no_input_prompt.format()
|
||||
|
||||
# with inputs
|
||||
multi_input_prompt = PromptTemplate(
|
||||
input_variables=["adjective", "content"],
|
||||
template="tell me a {adjective} joke about {content}."
|
||||
)
|
||||
multi_input_prompt.format(adjective="funny", content="bats")
|
||||
|
||||
# |%%--%%| <UH7UDNwwOT|m4HfAhbN4T>
|
||||
r"""°°°
|
||||
## Loading prompt templates from LangChainHub
|
||||
°°°"""
|
||||
# |%%--%%| <m4HfAhbN4T|6sAHvM0Vrt>
|
||||
|
||||
from langchain.prompts import load_prompt
|
||||
|
||||
prompt=load_prompt("lc://prompts/conversation/prompt.json")
|
||||
#NOTE: is there a helper to quickly build a history ?
|
||||
print(prompt.format(history="", input="what is 1 + 1?"))
|
||||
|
||||
# |%%--%%| <6sAHvM0Vrt|u2xOTHeA5E>
|
||||
r"""°°°
|
||||
## Pass few shot examples to prompt template
|
||||
°°°"""
|
||||
# |%%--%%| <u2xOTHeA5E|KO5IXCuzjw>
|
||||
|
||||
from langchain import FewShotPromptTemplate
|
||||
|
||||
# create a list of few shot examples
|
||||
examples = [
|
||||
{"word": "happy", "antonym": "sad"},
|
||||
{"word": "tall", "antonym": "short"},
|
||||
]
|
||||
|
||||
# next e specify a template for format the examples
|
||||
# we use PromptTemplate class
|
||||
example_formatter_template = """
|
||||
Word: {word}
|
||||
Antonym: {antonym}
|
||||
"""
|
||||
example_pr = PromptTemplate(
|
||||
input_variables=["word", "antonym"],
|
||||
template=example_formatter_template,
|
||||
)
|
||||
|
||||
# now we can use FewShotPromptTemplate
|
||||
few_shot_prompt = FewShotPromptTemplate(
|
||||
# examples we want to insert in prompt
|
||||
examples=examples,
|
||||
# how we want examples to be formatted in prompt
|
||||
example_prompt=example_pr,
|
||||
# The prefix is some text that goes before the examples in the prompt.
|
||||
# Usually, this consists of intructions.
|
||||
prefix="Give the antonym of every input",
|
||||
#The suffix is some text that goes after the examples in the prompt.
|
||||
suffix="Word: {input}\nAntonym:",
|
||||
# The input variables are the variables that the overall prompt expects.
|
||||
input_variables=["input"],
|
||||
# The example_separator is the string we will use to join the prefix, examples, and suffix together with.
|
||||
example_separator="\n\n",
|
||||
)
|
||||
# generate few shot prompt using input
|
||||
print(few_shot_prompt.format(input="fast"))
|
||||
|
||||
# |%%--%%| <KO5IXCuzjw|fV1qxeDncc>
|
||||
r"""°°°
|
||||
## Select examples from prompt template
|
||||
|
||||
- for a large number of exaamples use ExampleSelector to select a subset of
|
||||
most informative ones for language model.
|
||||
- LengthBasedExampleSelector selects examples based on length of input.
|
||||
practical to to construct prompt that do not extend over context window
|
||||
based on input length
|
||||
°°°"""
|
||||
# |%%--%%| <fV1qxeDncc|uuIdYaJ4wD>
|
||||
|
||||
from langchain.prompts.example_selector import LengthBasedExampleSelector
|
||||
|
||||
#These are a lot of examples of a pretend task of creating antonyms.
|
||||
examples = [
|
||||
{"word": "happy", "antonym": "sad"},
|
||||
{"word": "tall", "antonym": "short"},
|
||||
{"word": "energetic", "antonym": "lethargic"},
|
||||
{"word": "sunny", "antonym": "gloomy"},
|
||||
{"word": "windy", "antonym": "calm"},
|
||||
]
|
||||
|
||||
example_selector = LengthBasedExampleSelector(
|
||||
examples=examples,
|
||||
# This is the PromptTemplate being used to format the examples.
|
||||
example_prompt=example_pr,
|
||||
# This is the maximum length that the formatted examples should be.
|
||||
# Length is measured by the get_text_length function below.
|
||||
max_length=30,
|
||||
)
|
||||
|
||||
# We can now use the `example_selector` to create a `FewShotPromptTemplate`.
|
||||
dynamic_prompt = FewShotPromptTemplate(
|
||||
# We provide an ExampleSelector instead of examples.
|
||||
example_selector=example_selector,
|
||||
example_prompt=example_pr,
|
||||
prefix="Give the antonym of every input",
|
||||
suffix="Word: {input}\nAntonym:",
|
||||
input_variables=["input"],
|
||||
example_separator="\n\n",
|
||||
)
|
||||
|
||||
# We can now generate a prompt using the `format` method.
|
||||
print(dynamic_prompt.format(input="big"))
|
||||
|
||||
print("----------")
|
||||
|
||||
# In contrast, if we provide a very long input, the LengthBasedExampleSelector
|
||||
# will select fewer examples to include in the prompt.
|
||||
long_string = "big and huge and massive and large and gigantic and tall and much much much much much bigger than everything else"
|
||||
print(dynamic_prompt.format(input=long_string))
|
@ -0,0 +1,23 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "NONE"
|
||||
},
|
||||
"source": [],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"anaconda-cloud": {},
|
||||
"kernelspec": {
|
||||
"display_name": "python",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
@ -0,0 +1,78 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "f0GEGDeLRe"
|
||||
},
|
||||
"source": [],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "bNvqfQkX7p"
|
||||
},
|
||||
"source": [
|
||||
"# Utils\n",
|
||||
"\n",
|
||||
"Connect LLMs to tools and other sources of knowledge\n",
|
||||
"\n",
|
||||
"- Text Splitter\n",
|
||||
"- Embeddigs:\n",
|
||||
" - Return embeddigns (list of floats)\n",
|
||||
"- Vectorstores\n",
|
||||
" - Datastores to store embeddings of documents in vector form.\n",
|
||||
" - Expose methods for passing in a string and retrieve similar document\n",
|
||||
"- Python Repl\n",
|
||||
"- Bash\n",
|
||||
"- Requests Wrapper (requests lib)\n",
|
||||
"- Search\n",
|
||||
"\n",
|
||||
"## Generic Utilities\n",
|
||||
"\n",
|
||||
"### Bash"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "ph5kvpzBNj"
|
||||
},
|
||||
"source": [
|
||||
"from langchain.utilities import BashProcess\n",
|
||||
"\n",
|
||||
"bash = BashProcess()\n",
|
||||
"print(bash.run(\"ls\"))"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": "document-loaders.ipynb\ndocument-loaders.py\ndocuments\nllm.json\nllms.ipynb\nllms.py\npdf-loader.ipynb\npdf-loader.py\nquickstart.ipynb\nquickstart.py\ntest.ipynb\ntest.py\nutils.py\n\n"
|
||||
}
|
||||
],
|
||||
"execution_count": 1
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"jukit_cell_id": "4qBrDqHb3v"
|
||||
},
|
||||
"source": [],
|
||||
"outputs": [],
|
||||
"execution_count": null
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"anaconda-cloud": {},
|
||||
"kernelspec": {
|
||||
"display_name": "python",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
@ -0,0 +1,32 @@
|
||||
|
||||
#|%%--%%| <f0GEGDeLRe|bNvqfQkX7p>
|
||||
r"""°°°
|
||||
# Utils
|
||||
|
||||
Connect LLMs to tools and other sources of knowledge
|
||||
|
||||
- Text Splitter
|
||||
- Embeddigs:
|
||||
- Return embeddigns (list of floats)
|
||||
- Vectorstores
|
||||
- Datastores to store embeddings of documents in vector form.
|
||||
- Expose methods for passing in a string and retrieve similar document
|
||||
- Python Repl
|
||||
- Bash
|
||||
- Requests Wrapper (requests lib)
|
||||
- Search
|
||||
|
||||
## Generic Utilities
|
||||
|
||||
### Bash
|
||||
°°°"""
|
||||
#|%%--%%| <bNvqfQkX7p|ph5kvpzBNj>
|
||||
|
||||
from langchain.utilities import BashProcess
|
||||
|
||||
bash = BashProcess()
|
||||
print(bash.run("ls"))
|
||||
|
||||
#|%%--%%| <ph5kvpzBNj|4qBrDqHb3v>
|
||||
|
||||
#TODO: rest of utils
|
Loading…
Reference in New Issue