Update chroma notebook (#7978)

Fix up the Chroma notebook
- remove `.persist()` -- this is no longer in Chroma as of `0.4.0`
- update output to match `0.4.0`
- other cleanup work
This commit is contained in:
Jeff Huber 2023-07-20 06:25:31 -07:00 committed by GitHub
parent 4a5894db47
commit 5694e7b8cf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -45,7 +45,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 1,
"id": "ae9fcf3e",
"metadata": {},
"outputs": [
@ -53,7 +53,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
"Using embedded DuckDB without persistence: data will be transient\n"
"/Users/jeff/.pyenv/versions/3.10.10/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
},
{
@ -108,26 +109,15 @@
"\n",
"Extending the previous example, if you want to save to disk, simply initialize the Chroma client and pass the directory where you want the data to be saved to. \n",
"\n",
"`Caution`: Chroma makes a best-effort to automatically save data to disk, however multiple in-memory clients can stomp each other's work. As a best practice, only have one client per path running at any given time.\n",
"\n",
"`Protip`: Sometimes you can call `db.persist()` to force a save. "
"`Caution`: Chroma makes a best-effort to automatically save data to disk, however multiple in-memory clients can stomp each other's work. As a best practice, only have one client per path running at any given time."
]
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 2,
"id": "49f9bd49",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using embedded DuckDB with persistence: data will be stored in: ./chroma_db\n",
"Using embedded DuckDB with persistence: data will be stored in: ./chroma_db\n",
"No embedding_function provided, using default embedding function: SentenceTransformerEmbeddingFunction\n"
]
},
{
"name": "stdout",
"output_type": "stream",
@ -145,7 +135,6 @@
"source": [
"# save to disk\n",
"db2 = Chroma.from_documents(docs, embedding_function, persist_directory=\"./chroma_db\")\n",
"db2.persist()\n",
"docs = db2.similarity_search(query)\n",
"\n",
"# load from disk\n",
@ -154,6 +143,66 @@
"print(docs[0].page_content)"
]
},
{
"cell_type": "markdown",
"id": "63318cc9",
"metadata": {},
"source": [
"## Passing a Chroma Client into Langchain\n",
"\n",
"You can also create a Chroma Client and pass it to LangChain. This is particularly useful if you want easier access to the underlying database.\n",
"\n",
"You can also specify the collection name that you want LangChain to use."
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "22f4a0ce",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Add of existing embedding ID: 1\n",
"Add of existing embedding ID: 2\n",
"Add of existing embedding ID: 3\n",
"Add of existing embedding ID: 1\n",
"Add of existing embedding ID: 2\n",
"Add of existing embedding ID: 3\n",
"Add of existing embedding ID: 1\n",
"Insert of existing embedding ID: 1\n",
"Add of existing embedding ID: 2\n",
"Insert of existing embedding ID: 2\n",
"Add of existing embedding ID: 3\n",
"Insert of existing embedding ID: 3\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"There are 3 in the collection\n"
]
}
],
"source": [
"import chromadb\n",
"\n",
"persistent_client = chromadb.PersistentClient()\n",
"collection = persistent_client.get_or_create_collection(\"collection_name\")\n",
"collection.add(ids=[\"1\", \"2\", \"3\"], documents=[\"a\", \"b\", \"c\"])\n",
"\n",
"langchain_chroma = Chroma(\n",
" client=persistent_client,\n",
" collection_name=\"collection_name\",\n",
" embedding_function=embedding_function,\n",
")\n",
"\n",
"print(\"There are\", langchain_chroma._collection.count(), \"in the collection\")"
]
},
{
"cell_type": "markdown",
"id": "e9cf6d70",
@ -174,18 +223,10 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 4,
"id": "74aee70e",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"No embedding_function provided, using default embedding function: SentenceTransformerEmbeddingFunction\n",
"No embedding_function provided, using default embedding function: SentenceTransformerEmbeddingFunction\n"
]
},
{
"name": "stdout",
"output_type": "stream",
@ -206,13 +247,7 @@
"import uuid\n",
"from chromadb.config import Settings\n",
"\n",
"client = chromadb.Client(\n",
" Settings(\n",
" chroma_api_impl=\"rest\",\n",
" chroma_server_host=\"localhost\",\n",
" chroma_server_http_port=\"8000\",\n",
" )\n",
")\n",
"client = chromadb.HttpClient(settings=Settings(allow_reset=True))\n",
"client.reset() # resets the database\n",
"collection = client.create_collection(\"my_collection\")\n",
"for doc in docs:\n",
@ -244,25 +279,18 @@
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 5,
"id": "81a02810",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using embedded DuckDB without persistence: data will be transient\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'source': '../../../state_of_the_union.txt', 'new_value': 'hello world'}\n",
"{'ids': ['1'], 'embeddings': None, 'documents': ['Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.'], 'metadatas': [{'source': '../../../state_of_the_union.txt', 'new_value': 'hello world'}]}\n",
"count before 4\n",
"count after 3\n"
"{'source': '../../../state_of_the_union.txt'}\n",
"{'ids': ['1'], 'embeddings': None, 'metadatas': [{'new_value': 'hello world', 'source': '../../../state_of_the_union.txt'}], 'documents': ['Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.']}\n",
"count before 46\n",
"count after 45\n"
]
}
],
@ -301,7 +329,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 6,
"id": "42080f37-8fd1-4cec-acd9-15d2b03b2f4d",
"metadata": {
"tags": []
@ -318,7 +346,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 7,
"id": "c7a94d6c-b4d4-4498-9bdd-eb50c92b85c5",
"metadata": {
"tags": []
@ -332,19 +360,12 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 8,
"id": "5eabdb75",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using embedded DuckDB without persistence: data will be transient\n"
]
},
{
"name": "stdout",
"output_type": "stream",
@ -361,10 +382,13 @@
],
"source": [
"embeddings = OpenAIEmbeddings()\n",
"db5 = Chroma.from_documents(docs, embeddings)\n",
"new_client = chromadb.EphemeralClient()\n",
"openai_lc_client = Chroma.from_documents(\n",
" docs, embeddings, client=new_client, collection_name=\"openai_collection\"\n",
")\n",
"\n",
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"docs = db.similarity_search(query)\n",
"docs = openai_lc_client.similarity_search(query)\n",
"print(docs[0].page_content)"
]
},
@ -396,7 +420,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 9,
"id": "72aaa9c8",
"metadata": {
"tags": []
@ -408,7 +432,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 10,
"id": "d88e958e",
"metadata": {
"tags": []
@ -418,10 +442,10 @@
"data": {
"text/plain": [
"(Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt'}),\n",
" 0.3949805498123169)"
" 1.1972057819366455)"
]
},
"execution_count": 11,
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
@ -446,7 +470,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 11,
"id": "96ff911a",
"metadata": {},
"outputs": [],
@ -456,7 +480,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 12,
"id": "f00be6d0",
"metadata": {},
"outputs": [
@ -466,7 +490,7 @@
"Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt'})"
]
},
"execution_count": 8,
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
@ -489,50 +513,17 @@
},
{
"cell_type": "code",
"execution_count": 17,
"id": "a5119221",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'source': 'some_other_source'}\n",
"{'ids': ['1'], 'embeddings': None, 'documents': ['Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.'], 'metadatas': [{'source': 'some_other_source'}]}\n"
]
}
],
"source": [
"# create simple ids\n",
"ids = [str(i) for i in range(1, len(docs) + 1)]\n",
"\n",
"# add data\n",
"example_db = Chroma.from_documents(docs, embedding_function, ids=ids)\n",
"docs = example_db.similarity_search(query)\n",
"print(docs[0].metadata)\n",
"\n",
"# update the source for a document\n",
"docs[0].metadata = {\"source\": \"some_other_source\"}\n",
"example_db.update_document(ids[0], docs[0])\n",
"print(example_db._collection.get(ids=[ids[0]]))"
]
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 13,
"id": "81600dc1",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'ids': ['1'],\n",
" 'embeddings': None,\n",
" 'documents': ['Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.'],\n",
" 'metadatas': [{'source': 'some_other_source'}]}"
"{'ids': [], 'embeddings': None, 'metadatas': [], 'documents': []}"
]
},
"execution_count": 18,
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@ -559,7 +550,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
"version": "3.10.10"
}
},
"nbformat": 4,