langchain/docs/extras/integrations/text_embedding/elasticsearch.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "1eZl1oaVUNeC"
   },
   "source": [
    "# Elasticsearch\n",
    "Walkthrough of how to generate embeddings using a hosted embedding model in Elasticsearch\n",
    "\n",
    "The easiest way to instantiate the `ElasticsearchEmbeddings` class it either\n",
    "- using the `from_credentials` constructor if you are using Elastic Cloud\n",
    "- or using the `from_es_connection` constructor with any Elasticsearch cluster"
   ],
   "id": "72644940"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "6dJxqebov4eU"
   },
   "outputs": [],
   "source": [
    "!pip -q install elasticsearch langchain"
   ],
   "id": "298759cb"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "RV7C3DUmv4aq"
   },
   "outputs": [],
   "source": [
    "import elasticsearch\n",
    "from langchain.embeddings.elasticsearch import ElasticsearchEmbeddings"
   ],
   "id": "76489aff"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "MrT3jplJvp09"
   },
   "outputs": [],
   "source": [
    "# Define the model ID\n",
    "model_id = \"your_model_id\""
   ],
   "id": "57bfdc82"
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "j5F-nwLVS_Zu"
   },
   "source": [
    "## Testing with `from_credentials`\n",
    "This required an Elastic Cloud `cloud_id`"
   ],
   "id": "0ffad1ec"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "svtdnC-dvpxR"
   },
   "outputs": [],
   "source": [
    "# Instantiate ElasticsearchEmbeddings using credentials\n",
    "embeddings = ElasticsearchEmbeddings.from_credentials(\n",
    "    model_id,\n",
    "    es_cloud_id=\"your_cloud_id\",\n",
    "    es_user=\"your_user\",\n",
    "    es_password=\"your_password\",\n",
    ")"
   ],
   "id": "fc2e9dcb"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "7DXZAK7Kvpth"
   },
   "outputs": [],
   "source": [
    "# Create embeddings for multiple documents\n",
    "documents = [\n",
    "    \"This is an example document.\",\n",
    "    \"Another example document to generate embeddings for.\",\n",
    "]\n",
    "document_embeddings = embeddings.embed_documents(documents)"
   ],
   "id": "8ee7f1fc"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "K8ra75W_vpqy"
   },
   "outputs": [],
   "source": [
    "# Print document embeddings\n",
    "for i, embedding in enumerate(document_embeddings):\n",
    "    print(f\"Embedding for document {i+1}: {embedding}\")"
   ],
   "id": "0b9d8471"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "V4Q5kQo9vpna"
   },
   "outputs": [],
   "source": [
    "# Create an embedding for a single query\n",
    "query = \"This is a single query.\"\n",
    "query_embedding = embeddings.embed_query(query)"
   ],
   "id": "3989ab23"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "O0oQDzGKvpkz"
   },
   "outputs": [],
   "source": [
    "# Print query embedding\n",
    "print(f\"Embedding for query: {query_embedding}\")"
   ],
   "id": "0da6d2bf"
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "rHN03yV6TJ5q"
   },
   "source": [
    "## Testing with Existing Elasticsearch client connection\n",
    "This can be used with any Elasticsearch deployment"
   ],
   "id": "32700096"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "GMQcJDwBTJFm"
   },
   "outputs": [],
   "source": [
    "# Create Elasticsearch connection\n",
    "es_connection = Elasticsearch(\n",
    "    hosts=[\"https://es_cluster_url:port\"], basic_auth=(\"user\", \"password\")\n",
    ")"
   ],
   "id": "0bc60465"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "WTYIU4u3TJO1"
   },
   "outputs": [],
   "source": [
    "# Instantiate ElasticsearchEmbeddings using es_connection\n",
    "embeddings = ElasticsearchEmbeddings.from_es_connection(\n",
    "    model_id,\n",
    "    es_connection,\n",
    ")"
   ],
   "id": "8085843b"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "4gdAUHwoTJO3"
   },
   "outputs": [],
   "source": [
    "# Create embeddings for multiple documents\n",
    "documents = [\n",
    "    \"This is an example document.\",\n",
    "    \"Another example document to generate embeddings for.\",\n",
    "]\n",
    "document_embeddings = embeddings.embed_documents(documents)"
   ],
   "id": "59a90bf3"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "RC_-tov6TJO3"
   },
   "outputs": [],
   "source": [
    "# Print document embeddings\n",
    "for i, embedding in enumerate(document_embeddings):\n",
    "    print(f\"Embedding for document {i+1}: {embedding}\")"
   ],
   "id": "54b18673"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "6GEnHBqETJO3"
   },
   "outputs": [],
   "source": [
    "# Create an embedding for a single query\n",
    "query = \"This is a single query.\"\n",
    "query_embedding = embeddings.embed_query(query)"
   ],
   "id": "a4812d5e"
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "-kyUQAXDTJO4"
   },
   "outputs": [],
   "source": [
    "# Print query embedding\n",
    "print(f\"Embedding for query: {query_embedding}\")"
   ],
   "id": "c6c69916"
  }
 ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}