DOCS updated `async-faiss` example (#13434)

The original notebook has the `faiss` title which is duplicated in
the`faiss.jpynb`. As a result, we have two `faiss` items in the
vectorstore ToC. And the first item breaks the searching order (it is
placed between `A...` items).
- I updated title to `Asynchronous Faiss`.
pull/13492/head
Leonid Ganeline 8 months ago committed by GitHub
parent 9dfad613c2
commit 1d2981114f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -38,8 +38,8 @@
},
{
"cell_type": "code",
"execution_count": 2,
"id": "47f9b495-88f1-4286-8d5d-1416103931a7",
"execution_count": null,
"id": "dc37144c-208d-4ab3-9f3a-0407a69fe052",
"metadata": {
"tags": []
},
@ -51,33 +51,13 @@
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
"\n",
"# Uncomment the following line if you need to initialize FAISS with no AVX2 optimization\n",
"# os.environ['FAISS_NO_AVX2'] = '1'"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "aac9563e",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# os.environ['FAISS_NO_AVX2'] = '1'\n",
"\n",
"from langchain.document_loaders import TextLoader\n",
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain.vectorstores import FAISS"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "a3c3999a",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.vectorstores import FAISS\n",
"\n",
"from langchain.document_loaders import TextLoader\n",
"\n",
"loader = TextLoader(\"../../../extras/modules/state_of_the_union.txt\")\n",
@ -200,31 +180,15 @@
},
{
"cell_type": "code",
"execution_count": 16,
"id": "428a6816",
"metadata": {},
"outputs": [],
"source": [
"db.save_local(\"faiss_index\")"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "56d1841c",
"metadata": {},
"outputs": [],
"source": [
"new_db = FAISS.load_local(\"faiss_index\", embeddings)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "39055525",
"execution_count": null,
"id": "1b31fe27-e0b3-42c6-b17c-8270b517ee1f",
"metadata": {},
"outputs": [],
"source": [
"db.save_local(\"faiss_index\")\n",
"\n",
"new_db = FAISS.load_local(\"faiss_index\", embeddings)\n",
"\n",
"docs = new_db.similarity_search(query)"
]
},
@ -266,30 +230,11 @@
"metadata": {},
"outputs": [],
"source": [
"pkl = db.serialize_to_bytes() # serializes the faiss index"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "eb083247",
"metadata": {
"vscode": {
"languageId": "r"
}
},
"outputs": [],
"source": [
"embeddings = HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e36e220b",
"metadata": {},
"outputs": [],
"source": [
"from langchain.embeddings.huggingface import HuggingFaceEmbeddings\n",
"\n",
"pkl = db.serialize_to_bytes() # serializes the faiss\n",
"embeddings = HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n",
"\n",
"db = FAISS.deserialize_from_bytes(\n",
" embeddings=embeddings, serialized=pkl\n",
") # Load the index"
@ -306,33 +251,14 @@
},
{
"cell_type": "code",
"execution_count": 20,
"id": "6dfd2b78",
"execution_count": null,
"id": "9b8f5e31-3f40-4e94-8d97-5883125efba7",
"metadata": {},
"outputs": [],
"source": [
"db1 = FAISS.from_texts([\"foo\"], embeddings)\n",
"db2 = FAISS.from_texts([\"bar\"], embeddings)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "29960da7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'068c473b-d420-487a-806b-fb0ccea7f711': Document(page_content='foo', metadata={})}"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"db2 = FAISS.from_texts([\"bar\"], embeddings)\n",
"\n",
"db1.docstore._dict"
]
},

@ -5,15 +5,16 @@
"id": "683953b3",
"metadata": {},
"source": [
"# Faiss\n",
"# Faiss (Async)\n",
"\n",
">[Facebook AI Similarity Search (Faiss)](https://engineering.fb.com/2017/03/29/data-infrastructure/faiss-a-library-for-efficient-similarity-search/) is a library for efficient similarity search and clustering of dense vectors. It contains algorithms that search in sets of vectors of any size, up to ones that possibly do not fit in RAM. It also contains supporting code for evaluation and parameter tuning.\n",
"\n",
"[Faiss documentation](https://faiss.ai/).\n",
"\n",
"This notebook shows how to use functionality related to the `FAISS` vector database using asyncio.\n",
"This notebook shows how to use functionality related to the `FAISS` vector database using `asyncio`.\n",
"LangChain implemented the synchronous and asynchronous vector store functions.\n",
"\n",
"See synchronous version [here](https://python.langchain.com/docs/integrations/vectorstores/faiss)."
"See `synchronous` version [here](https://python.langchain.com/docs/integrations/vectorstores/faiss)."
]
},
{
@ -40,8 +41,8 @@
},
{
"cell_type": "code",
"execution_count": 1,
"id": "47f9b495-88f1-4286-8d5d-1416103931a7",
"execution_count": null,
"id": "971a172a-2d87-4eec-be92-87aa174fec30",
"metadata": {
"tags": []
},
@ -53,33 +54,13 @@
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
"\n",
"# Uncomment the following line if you need to initialize FAISS with no AVX2 optimization\n",
"# os.environ['FAISS_NO_AVX2'] = '1'"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "aac9563e",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# os.environ['FAISS_NO_AVX2'] = '1'\n",
"\n",
"from langchain.document_loaders import TextLoader\n",
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain.vectorstores import FAISS"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "a3c3999a",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.vectorstores import FAISS\n",
"\n",
"from langchain.document_loaders import TextLoader\n",
"\n",
"loader = TextLoader(\"../../../extras/modules/state_of_the_union.txt\")\n",
@ -87,47 +68,13 @@
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n",
"\n",
"embeddings = OpenAIEmbeddings()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "5eabdb75",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"embeddings = OpenAIEmbeddings()\n",
"\n",
"db = await FAISS.afrom_documents(docs, embeddings)\n",
"\n",
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"docs = await db.asimilarity_search(query)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "4b172de8",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
"\n",
"Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
"\n",
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
"\n",
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.\n"
]
}
],
"source": [
"docs = await db.asimilarity_search(query)\n",
"\n",
"print(docs[0].page_content)"
]
},
@ -142,33 +89,13 @@
},
{
"cell_type": "code",
"execution_count": 8,
"id": "186ee1d8",
"execution_count": null,
"id": "30bf7c85-a273-45dc-ae9e-f138e330b42e",
"metadata": {},
"outputs": [],
"source": [
"docs_and_scores = await db.asimilarity_search_with_score(query)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "284e04b5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', metadata={'source': './state_of_the_union.txt'}),\n",
" 0.36871302)"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"docs_and_scores = await db.asimilarity_search_with_score(query)\n",
"\n",
"docs_and_scores[0]"
]
},
@ -202,52 +129,17 @@
},
{
"cell_type": "code",
"execution_count": 11,
"id": "428a6816",
"metadata": {},
"outputs": [],
"source": [
"db.save_local(\"faiss_index\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "56d1841c",
"metadata": {},
"outputs": [],
"source": [
"new_db = FAISS.load_local(\"faiss_index\", embeddings, asynchronous=True)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "39055525",
"execution_count": null,
"id": "88e11f08-1ac8-45aa-8bc0-56439ef87256",
"metadata": {},
"outputs": [],
"source": [
"docs = await new_db.asimilarity_search(query)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "98378c4e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', metadata={'source': './state_of_the_union.txt'})"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"db.save_local(\"faiss_index\")\n",
"\n",
"new_db = FAISS.load_local(\"faiss_index\", embeddings, asynchronous=True)\n",
"\n",
"docs = await new_db.asimilarity_search(query)\n",
"\n",
"docs[0]"
]
},
@ -261,26 +153,6 @@
"you can pickle the FAISS Index by these functions. If you use embeddings model which is of 90 mb (sentence-transformers/all-MiniLM-L6-v2 or any other model), the resultant pickle size would be more than 90 mb. the size of the model is also included in the overall size. To overcome this, use the below functions. These functions only serializes FAISS index and size would be much lesser. this can be helpful if you wish to store the index in database like sql."
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "d8faead5",
"metadata": {},
"outputs": [],
"source": [
"pkl = db.serialize_to_bytes() # serializes the faiss index"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "eb083247",
"metadata": {},
"outputs": [],
"source": [
"embeddings = HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\")"
]
},
{
"cell_type": "code",
"execution_count": null,
@ -288,6 +160,10 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.embeddings.huggingface import HuggingFaceEmbeddings\n",
"\n",
"pkl = db.serialize_to_bytes() # serializes the faiss index\n",
"embeddings = HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n",
"db = FAISS.deserialize_from_bytes(\n",
" embeddings=embeddings, serialized=pkl, asynchronous=True\n",
") # Load the index"
@ -596,7 +472,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
"version": "3.10.12"
}
},
"nbformat": 4,

@ -500,6 +500,10 @@
"source": "/docs/integrations/vectorstores/cassandra",
"destination": "/docs/integrations/vectorstores/astradb"
},
{
"source": "/docs/integrations/vectorstores/async_faiss",
"destination": "/docs/integrations/vectorstores/faiss_async"
},
{
"source": "/docs/integrations/cerebriumai",
"destination": "/docs/integrations/providers/cerebriumai"

Loading…
Cancel
Save