forked from Archives/langchain
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
526 lines
12 KiB
Plaintext
526 lines
12 KiB
Plaintext
1 year ago
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "f36d938c",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# LLM Caching\n",
|
||
|
"This notebook covers how to cache results of individual LLM calls."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 1,
|
||
|
"id": "10ad9224",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"from langchain.llms import OpenAI"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "b50f0598",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
1 year ago
|
"## In Memory Cache"
|
||
1 year ago
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 2,
|
||
|
"id": "426ff912",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import langchain\n",
|
||
|
"from langchain.cache import InMemoryCache\n",
|
||
|
"langchain.llm_cache = InMemoryCache()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 3,
|
||
|
"id": "f69f6283",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# To make the caching really obvious, lets use a slower model.\n",
|
||
|
"llm = OpenAI(model_name=\"text-davinci-002\", n=2, best_of=2)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 4,
|
||
|
"id": "64005d1f",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
1 year ago
|
"CPU times: user 30.7 ms, sys: 18.6 ms, total: 49.3 ms\n",
|
||
|
"Wall time: 791 ms\n"
|
||
1 year ago
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
1 year ago
|
"\"\\n\\nWhy couldn't the bicycle stand up by itself? Because it was...two tired!\""
|
||
1 year ago
|
]
|
||
|
},
|
||
|
"execution_count": 4,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"%%time\n",
|
||
|
"# The first time, it is not yet in cache, so it should take longer\n",
|
||
|
"llm(\"Tell me a joke\")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 5,
|
||
|
"id": "c8a1cb2b",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
1 year ago
|
"CPU times: user 80 µs, sys: 0 ns, total: 80 µs\n",
|
||
|
"Wall time: 83.9 µs\n"
|
||
1 year ago
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
1 year ago
|
"\"\\n\\nWhy couldn't the bicycle stand up by itself? Because it was...two tired!\""
|
||
1 year ago
|
]
|
||
|
},
|
||
|
"execution_count": 5,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"%%time\n",
|
||
|
"# The second time it is, so it goes faster\n",
|
||
|
"llm(\"Tell me a joke\")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "4bf59c12",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
1 year ago
|
"## SQLite Cache"
|
||
1 year ago
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
1 year ago
|
"execution_count": 9,
|
||
|
"id": "3ff65b00",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"!rm .langchain.db"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 10,
|
||
1 year ago
|
"id": "5f036236",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# We can do the same thing with a SQLite cache\n",
|
||
|
"from langchain.cache import SQLiteCache\n",
|
||
|
"langchain.llm_cache = SQLiteCache(database_path=\".langchain.db\")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
1 year ago
|
"execution_count": 11,
|
||
1 year ago
|
"id": "fa18e3af",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
1 year ago
|
"CPU times: user 17 ms, sys: 9.76 ms, total: 26.7 ms\n",
|
||
|
"Wall time: 825 ms\n"
|
||
1 year ago
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'"
|
||
|
]
|
||
|
},
|
||
1 year ago
|
"execution_count": 11,
|
||
1 year ago
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"%%time\n",
|
||
|
"# The first time, it is not yet in cache, so it should take longer\n",
|
||
|
"llm(\"Tell me a joke\")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
1 year ago
|
"execution_count": 12,
|
||
1 year ago
|
"id": "5bf2f6fd",
|
||
1 year ago
|
"metadata": {
|
||
|
"scrolled": true
|
||
|
},
|
||
1 year ago
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
1 year ago
|
"CPU times: user 2.46 ms, sys: 1.23 ms, total: 3.7 ms\n",
|
||
|
"Wall time: 2.67 ms\n"
|
||
1 year ago
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'"
|
||
|
]
|
||
|
},
|
||
1 year ago
|
"execution_count": 12,
|
||
1 year ago
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"%%time\n",
|
||
|
"# The second time it is, so it goes faster\n",
|
||
|
"llm(\"Tell me a joke\")"
|
||
|
]
|
||
|
},
|
||
1 year ago
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "278ad7ae",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
1 year ago
|
"## Redis Cache"
|
||
1 year ago
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
1 year ago
|
"execution_count": null,
|
||
1 year ago
|
"id": "39f6eb0b",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# We can do the same thing with a Redis cache\n",
|
||
|
"# (make sure your local Redis instance is running first before running this example)\n",
|
||
|
"from redis import Redis\n",
|
||
|
"from langchain.cache import RedisCache\n",
|
||
|
"langchain.llm_cache = RedisCache(redis_=Redis())"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
1 year ago
|
"execution_count": null,
|
||
1 year ago
|
"id": "28920749",
|
||
|
"metadata": {},
|
||
1 year ago
|
"outputs": [],
|
||
1 year ago
|
"source": [
|
||
|
"%%time\n",
|
||
|
"# The first time, it is not yet in cache, so it should take longer\n",
|
||
|
"llm(\"Tell me a joke\")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
1 year ago
|
"execution_count": null,
|
||
1 year ago
|
"id": "94bf9415",
|
||
|
"metadata": {},
|
||
1 year ago
|
"outputs": [],
|
||
1 year ago
|
"source": [
|
||
|
"%%time\n",
|
||
|
"# The second time it is, so it goes faster\n",
|
||
|
"llm(\"Tell me a joke\")"
|
||
|
]
|
||
|
},
|
||
1 year ago
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "934943dc",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
1 year ago
|
"## SQLAlchemy Cache"
|
||
1 year ago
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
1 year ago
|
"execution_count": null,
|
||
1 year ago
|
"id": "acccff40",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# You can use SQLAlchemyCache to cache with any SQL database supported by SQLAlchemy.\n",
|
||
|
"\n",
|
||
|
"# from langchain.cache import SQLAlchemyCache\n",
|
||
|
"# from sqlalchemy import create_engine\n",
|
||
|
"\n",
|
||
|
"# engine = create_engine(\"postgresql://postgres:postgres@localhost:5432/postgres\")\n",
|
||
|
"# langchain.llm_cache = SQLAlchemyCache(engine)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "0c69d84d",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
1 year ago
|
"## Optional Caching\n",
|
||
1 year ago
|
"You can also turn off caching for specific LLMs should you choose. In the example below, even though global caching is enabled, we turn it off for a specific LLM"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
1 year ago
|
"execution_count": 13,
|
||
1 year ago
|
"id": "6af46e2b",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"llm = OpenAI(model_name=\"text-davinci-002\", n=2, best_of=2, cache=False)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
1 year ago
|
"execution_count": 14,
|
||
1 year ago
|
"id": "26c4fd8f",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
1 year ago
|
"CPU times: user 5.8 ms, sys: 2.71 ms, total: 8.51 ms\n",
|
||
|
"Wall time: 745 ms\n"
|
||
1 year ago
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'"
|
||
|
]
|
||
|
},
|
||
1 year ago
|
"execution_count": 14,
|
||
1 year ago
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"%%time\n",
|
||
|
"llm(\"Tell me a joke\")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
1 year ago
|
"execution_count": 15,
|
||
1 year ago
|
"id": "46846b20",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
1 year ago
|
"CPU times: user 4.91 ms, sys: 2.64 ms, total: 7.55 ms\n",
|
||
|
"Wall time: 623 ms\n"
|
||
1 year ago
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
1 year ago
|
"'\\n\\nTwo guys stole a calendar. They got six months each.'"
|
||
1 year ago
|
]
|
||
|
},
|
||
1 year ago
|
"execution_count": 15,
|
||
1 year ago
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"%%time\n",
|
||
|
"llm(\"Tell me a joke\")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "5da41b77",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
1 year ago
|
"## Optional Caching in Chains\n",
|
||
1 year ago
|
"You can also turn off caching for particular nodes in chains. Note that because of certain interfaces, its often easier to construct the chain first, and then edit the LLM afterwards.\n",
|
||
|
"\n",
|
||
|
"As an example, we will load a summarizer map-reduce chain. We will cache results for the map-step, but then not freeze it for the combine step."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
1 year ago
|
"execution_count": 16,
|
||
1 year ago
|
"id": "9afa3f7a",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"llm = OpenAI(model_name=\"text-davinci-002\")\n",
|
||
|
"no_cache_llm = OpenAI(model_name=\"text-davinci-002\", cache=False)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
1 year ago
|
"execution_count": 17,
|
||
1 year ago
|
"id": "98a78e8e",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||
|
"from langchain.chains.mapreduce import MapReduceChain\n",
|
||
|
"\n",
|
||
|
"text_splitter = CharacterTextSplitter()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
1 year ago
|
"execution_count": 18,
|
||
1 year ago
|
"id": "2bfb099b",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
1 year ago
|
"with open('../../state_of_the_union.txt') as f:\n",
|
||
1 year ago
|
" state_of_the_union = f.read()\n",
|
||
|
"texts = text_splitter.split_text(state_of_the_union)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
1 year ago
|
"execution_count": 19,
|
||
1 year ago
|
"id": "f78b7f51",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"from langchain.docstore.document import Document\n",
|
||
|
"docs = [Document(page_content=t) for t in texts[:3]]\n",
|
||
|
"from langchain.chains.summarize import load_summarize_chain"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
1 year ago
|
"execution_count": 20,
|
||
1 year ago
|
"id": "a2a30822",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"chain = load_summarize_chain(llm, chain_type=\"map_reduce\", reduce_llm=no_cache_llm)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
1 year ago
|
"execution_count": 21,
|
||
1 year ago
|
"id": "a545b743",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
1 year ago
|
"CPU times: user 452 ms, sys: 60.3 ms, total: 512 ms\n",
|
||
|
"Wall time: 5.09 s\n"
|
||
1 year ago
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
1 year ago
|
"'\\n\\nPresident Biden is discussing the American Rescue Plan and the Bipartisan Infrastructure Law, which will create jobs and help Americans. He also talks about his vision for America, which includes investing in education and infrastructure. In response to Russian aggression in Ukraine, the United States is joining with European allies to impose sanctions and isolate Russia. American forces are being mobilized to protect NATO countries in the event that Putin decides to keep moving west. The Ukrainians are bravely fighting back, but the next few weeks will be hard for them. Putin will pay a high price for his actions in the long run. Americans should not be alarmed, as the United States is taking action to protect its interests and allies.'"
|
||
1 year ago
|
]
|
||
|
},
|
||
1 year ago
|
"execution_count": 21,
|
||
1 year ago
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"%%time\n",
|
||
|
"chain.run(docs)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "3ed85e9d",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"When we run it again, we see that it runs substantially faster but the final answer is different. This is due to caching at the map steps, but not at the reduce step."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
1 year ago
|
"execution_count": 22,
|
||
1 year ago
|
"id": "39cbb282",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
1 year ago
|
"CPU times: user 11.5 ms, sys: 4.33 ms, total: 15.8 ms\n",
|
||
|
"Wall time: 1.04 s\n"
|
||
1 year ago
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
1 year ago
|
"'\\n\\nPresident Biden is discussing the American Rescue Plan and the Bipartisan Infrastructure Law, which will create jobs and help Americans. He also talks about his vision for America, which includes investing in education and infrastructure.'"
|
||
1 year ago
|
]
|
||
|
},
|
||
1 year ago
|
"execution_count": 22,
|
||
1 year ago
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"%%time\n",
|
||
|
"chain.run(docs)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "9df0dab8",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3 (ipykernel)",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
1 year ago
|
"version": "3.10.9"
|
||
1 year ago
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 5
|
||
|
}
|