Minor change to use SearchIndexingBufferedSender to support optimized batch indexing (#712)

pull/725/head
Farzad Sunavala 8 months ago committed by GitHub
parent c777f1025a
commit 552262ea89
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -55,6 +55,7 @@
"from azure.search.documents import SearchClient \n",
"from azure.search.documents.indexes import SearchIndexClient \n",
"from azure.search.documents.models import Vector \n",
"from azure.search.documents import SearchIndexingBufferedSender\n",
"from azure.search.documents.indexes.models import ( \n",
" SearchIndex, \n",
" SearchField, \n",
@ -69,7 +70,7 @@
" SemanticSettings, \n",
" VectorSearch, \n",
" HnswVectorSearchAlgorithmConfiguration, \n",
") \n"
")"
]
},
{
@ -394,26 +395,19 @@
"# Convert the DataFrame to a list of dictionaries \n",
"documents = article_df.to_dict(orient='records') \n",
" \n",
"search_client = SearchClient(endpoint=search_service_endpoint, index_name=index_name, credential=credential) \n",
"# Use SearchIndexingBufferedSender to upload the documents in batches optimized for indexing \n",
"with SearchIndexingBufferedSender(search_service_endpoint, index_name, AzureKeyCredential(search_service_api_key)) as batch_client: \n",
" # Add upload actions for all documents \n",
" batch_client.upload_documents(documents=documents) \n",
" \n",
"# Define the batch upload size \n",
"batch_size = 250 \n",
" \n",
"# Split the documents into batches \n",
"batches = [documents[i:i + batch_size] for i in range(0, len(documents), batch_size)] \n",
" \n",
"# Upload each batch of documents \n",
"for batch in batches: \n",
" result = search_client.upload_documents(batch) \n",
" \n",
"print(f\"Uploaded {len(documents)} documents in total\") \n"
"print(f\"Uploaded {len(documents)} documents in total\") "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"If you dataset didn't already contain pre-computed embeddings, you can create embeddings by using the below function using the `openai` python library. You'll also notice the same function and model are being used to generate query embeddings for performing vector searches."
"If your dataset didn't already contain pre-computed embeddings, you can create embeddings by using the below function using the `openai` python library. You'll also notice the same function and model are being used to generate query embeddings for performing vector searches."
]
},
{

Loading…
Cancel
Save