diff --git a/docs/extras/integrations/llms/llm_caching.ipynb b/docs/extras/integrations/llms/llm_caching.ipynb index 0d0ba39744..9e1f7bebb2 100644 --- a/docs/extras/integrations/llms/llm_caching.ipynb +++ b/docs/extras/integrations/llms/llm_caching.ipynb @@ -95,7 +95,7 @@ { "data": { "text/plain": [ - "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'" + "\"\\n\\nWhy couldn't the bicycle stand up by itself? It was...two tired!\"" ] }, "execution_count": 7, @@ -811,6 +811,228 @@ "langchain.llm_cache = SQLAlchemyCache(engine, FulltextLLMCache)" ] }, + { + "cell_type": "markdown", + "id": "eeba7d60", + "metadata": {}, + "source": [ + "## `Cassandra` caches\n", + "\n", + "You can use Cassandra / Astra DB for caching LLM responses, choosing from the exact-match `CassandraCache` or the (vector-similarity-based) `CassandraSemanticCache`.\n", + "\n", + "Let's see both in action in the following cells." + ] + }, + { + "cell_type": "markdown", + "id": "a4a6725d", + "metadata": {}, + "source": [ + "#### Connect to the DB\n", + "\n", + "First you need to establish a `Session` to the DB and to specify a _keyspace_ for the cache table(s). The following gets you started with an Astra DB instance (see e.g. [here](https://cassio.org/start_here/#vector-database) for more backends and connection options)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "cc53ce1b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Keyspace name? my_keyspace\n", + "\n", + "Astra DB Token (\"AstraCS:...\") ········\n", + "Full path to your Secure Connect Bundle? /path/to/secure-connect-databasename.zip\n" + ] + } + ], + "source": [ + "import getpass\n", + "\n", + "keyspace = input(\"\\nKeyspace name? \")\n", + "ASTRA_DB_APPLICATION_TOKEN = getpass.getpass('\\nAstra DB Token (\"AstraCS:...\") ')\n", + "ASTRA_DB_SECURE_BUNDLE_PATH = input(\"Full path to your Secure Connect Bundle? \")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "4617f485", + "metadata": {}, + "outputs": [], + "source": [ + "from cassandra.cluster import Cluster\n", + "from cassandra.auth import PlainTextAuthProvider\n", + "\n", + "cluster = Cluster(\n", + " cloud={\n", + " \"secure_connect_bundle\": ASTRA_DB_SECURE_BUNDLE_PATH,\n", + " },\n", + " auth_provider=PlainTextAuthProvider(\"token\", ASTRA_DB_APPLICATION_TOKEN),\n", + ")\n", + "session = cluster.connect()" + ] + }, + { + "cell_type": "markdown", + "id": "8665664a", + "metadata": {}, + "source": [ + "### Exact cache\n", + "\n", + "This will avoid invoking the LLM when the supplied prompt is _exactly_ the same as one encountered already:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "00a5e66f", + "metadata": {}, + "outputs": [], + "source": [ + "import langchain\n", + "from langchain.cache import CassandraCache\n", + "\n", + "langchain.llm_cache = CassandraCache(session=session, keyspace=keyspace)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "956a5145", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "The Moon always shows the same side because it is tidally locked to Earth.\n", + "CPU times: user 41.7 ms, sys: 153 µs, total: 41.8 ms\n", + "Wall time: 1.96 s\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "print(llm(\"Why is the Moon always showing the same side?\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "158f0151", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "The Moon always shows the same side because it is tidally locked to Earth.\n", + "CPU times: user 4.09 ms, sys: 0 ns, total: 4.09 ms\n", + "Wall time: 119 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "print(llm(\"Why is the Moon always showing the same side?\"))" + ] + }, + { + "cell_type": "markdown", + "id": "8fc4d017", + "metadata": {}, + "source": [ + "### Semantic cache\n", + "\n", + "This cache will do a semantic similarity search and return a hit if it finds a cached entry that is similar enough, For this, you need to provide an `Embeddings` instance of your choice." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "b9ad3f54", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.embeddings import OpenAIEmbeddings\n", + "\n", + "embedding=OpenAIEmbeddings()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "4623f95e", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.cache import CassandraSemanticCache\n", + "\n", + "langchain.llm_cache = CassandraSemanticCache(\n", + " session=session, keyspace=keyspace, embedding=embedding, table_name=\"cass_sem_cache\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "1a8e577b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "The Moon always shows the same side because it is tidally locked with Earth. This means that the same side of the Moon always faces Earth.\n", + "CPU times: user 21.3 ms, sys: 177 µs, total: 21.4 ms\n", + "Wall time: 3.09 s\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "print(llm(\"Why is the Moon always showing the same side?\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "f7abddfd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "The Moon always shows the same side because it is tidally locked with Earth. This means that the same side of the Moon always faces Earth.\n", + "CPU times: user 10.9 ms, sys: 17 µs, total: 10.9 ms\n", + "Wall time: 461 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "print(llm(\"How come we always see one face of the moon?\"))" + ] + }, { "cell_type": "markdown", "id": "0c69d84d",