From 8ee56b9a5b3751db122bd896daeb1e0b7766def3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Am=C3=A9lie?= Date: Sat, 29 Jul 2023 02:06:54 +0200 Subject: [PATCH] Feature: Add support for meilisearch vectorstore (#7649) **Description:** Add support for Meilisearch vector store. Resolve #7603 - No external dependencies added - A notebook has been added @rlancemartin https://twitter.com/meilisearch Co-authored-by: Bagatur --- .../vectorstores/meilisearch.ipynb | 306 +++++++++++++++++ .../langchain/vectorstores/__init__.py | 2 + .../langchain/vectorstores/meilisearch.py | 312 ++++++++++++++++++ .../docker-compose/meilisearch.yaml | 17 + .../vectorstores/test_meilisearch.py | 143 ++++++++ 5 files changed, 780 insertions(+) create mode 100644 docs/extras/integrations/vectorstores/meilisearch.ipynb create mode 100644 libs/langchain/langchain/vectorstores/meilisearch.py create mode 100644 libs/langchain/tests/integration_tests/vectorstores/docker-compose/meilisearch.yaml create mode 100644 libs/langchain/tests/integration_tests/vectorstores/test_meilisearch.py diff --git a/docs/extras/integrations/vectorstores/meilisearch.ipynb b/docs/extras/integrations/vectorstores/meilisearch.ipynb new file mode 100644 index 0000000000..7f640ea0e4 --- /dev/null +++ b/docs/extras/integrations/vectorstores/meilisearch.ipynb @@ -0,0 +1,306 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Meilisearch\n", + "\n", + "> [Meilisearch](https://meilisearch.com) is an open-source, lightning-fast, and hyper relevant search engine. It comes with great defaults to help developers build snappy search experiences. \n", + ">\n", + "> You can [self-host Meilisearch](https://www.meilisearch.com/docs/learn/getting_started/installation#local-installation) or run on [Meilisearch Cloud](https://www.meilisearch.com/pricing).\n", + "\n", + "Meilisearch v1.3 supports vector search. This page guides you through integrating Meilisearch as a vector store and using it to perform vector search." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup\n", + "\n", + "### Launching a Meilisearch instance\n", + "\n", + "You will need a running Meilisearch instance to use as your vector store. You can run [Meilisearch in local](https://www.meilisearch.com/docs/learn/getting_started/installation#local-installation) or create a [Meilisearch Cloud](https://cloud.meilisearch.com/) account.\n", + "\n", + "As of Meilisearch v1.3, vector storage is an experimental feature. After launching your Meilisearch instance, you need to **enable vector storage**. For self-hosted Meilisearch, read the docs on [enabling experimental features](https://www.meilisearch.com/docs/learn/experimental/vector-search). On **Meilisearch Cloud**, enable _Vector Store_ via your project _Settings_ page.\n", + "\n", + "You should now have a running Meilisearch instance with vector storage enabled. 🎉\n", + "\n", + "### Credentials\n", + "\n", + "To interact with your Meilisearch instance, the Meilisearch SDK needs a host (URL of your instance) and an API key.\n", + "\n", + "**Host**\n", + "\n", + "- In **local**, the default host is `localhost:7700`\n", + "- On **Meilisearch Cloud**, find the host in your project _Settings_ page\n", + "\n", + "**API keys**\n", + "\n", + "Meilisearch instance provides you with three API keys out of the box: \n", + "- A `MASTER KEY` — it should only be used to create your Meilisearch instance\n", + "- A `ADMIN KEY` — use it only server-side to update your database and its settings\n", + "- A `SEARCH KEY` — a key that you can safely share in front-end applications\n", + "\n", + "You can create [additional API keys](https://www.meilisearch.com/docs/learn/security/master_api_keys) as needed." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Installing dependencies\n", + "\n", + "This guide uses the [Meilisearch Python SDK](https://github.com/meilisearch/meilisearch-python). You can install it by running:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install meilisearch" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For more information, refer to the [Meilisearch Python SDK documentation](https://meilisearch.github.io/meilisearch-python/)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Examples\n", + "\n", + "There are multiple ways to initialize the Meilisearch vector store: providing a Meilisearch client or the _URL_ and _API key_ as needed. In our examples, the credentials will be loaded from the environment.\n", + "\n", + "You can make environment variables available in your Notebook environment by using `os` and `getpass`. You can use this technique for all the following examples." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import getpass\n", + "\n", + "os.environ[\"MEILI_HTTP_ADDR\"] = getpass.getpass(\"Meilisearch HTTP address and port:\")\n", + "os.environ[\"MEILI_MASTER_KEY\"] = getpass.getpass(\"Meilisearch API Key:\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We want to use OpenAIEmbeddings so we have to get the OpenAI API Key." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Adding text and embeddings\n", + "\n", + "This example adds text to the Meilisearch vector database without having to initialize a Meilisearch vector store." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.vectorstores import Meilisearch\n", + "from langchain.embeddings.openai import OpenAIEmbeddings\n", + "from langchain.text_splitter import CharacterTextSplitter\n", + "\n", + "embeddings = OpenAIEmbeddings()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"../../../state_of_the_union.txt\") as f:\n", + " state_of_the_union = f.read()\n", + "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", + "texts = text_splitter.split_text(state_of_the_union)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Use Meilisearch vector store to store texts & associated embeddings as vector\n", + "vector_store = Meilisearch.from_texts(texts=texts, embedding=embeddings)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Behind the scenes, Meilisearch will convert the text to multiple vectors. This will bring us to the same result as the following example." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Adding documents and embeddings\n", + "\n", + "In this example, we'll use Langchain TextSplitter to split the text in multiple documents. Then, we'll store these documents along with their embeddings." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.document_loaders import TextLoader\n", + "\n", + "# Load text\n", + "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "documents = loader.load()\n", + "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", + "\n", + "# Create documents\n", + "docs = text_splitter.split_documents(documents)\n", + "\n", + "# Import documents & embeddings in the vector store\n", + "vector_store = Meilisearch.from_documents(documents=documents, embedding=embeddings)\n", + "\n", + "# Search in our vector store\n", + "query = \"What did the president say about Ketanji Brown Jackson\"\n", + "docs = vector_store.similarity_search(query)\n", + "print(docs[0].page_content)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Add documents by creating a Meilisearch Vectorstore" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this approach, we create a vector store object and add documents to it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.vectorstores import Meilisearch\n", + "import meilisearch\n", + "\n", + "client = meilisearch.Client(url=\"http://127.0.0.1:7700\", api_key=\"***\")\n", + "vector_store = Meilisearch(\n", + " embedding=embeddings, client=client, index_name=\"langchain_demo\", text_key=\"text\"\n", + ")\n", + "vector_store.add_documents(documents)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Similarity Search with score\n", + "\n", + "This specific method allows you to return the documents and the distance score of the query to them." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "docs_and_scores = vector_store.similarity_search_with_score(query)\n", + "docs_and_scores[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Similarity Search by vector" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "embedding_vector = embeddings.embed_query(query)\n", + "docs_and_scores = vector_store.similarity_search_by_vector(embedding_vector)\n", + "docs_and_scores[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Additional resources\n", + "\n", + "Documentation\n", + "- [Meilisearch](https://www.meilisearch.com/docs/)\n", + "- [Meilisearch Python SDK](https://python-sdk.meilisearch.com)\n", + "\n", + "Open-source repositories\n", + "- [Meilisearch repository](https://github.com/meilisearch/meilisearch)\n", + "- [Meilisearch Python SDK](https://github.com/meilisearch/meilisearch-python)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/libs/langchain/langchain/vectorstores/__init__.py b/libs/langchain/langchain/vectorstores/__init__.py index dc524f5117..c4eeed02ed 100644 --- a/libs/langchain/langchain/vectorstores/__init__.py +++ b/libs/langchain/langchain/vectorstores/__init__.py @@ -24,6 +24,7 @@ from langchain.vectorstores.hologres import Hologres from langchain.vectorstores.lancedb import LanceDB from langchain.vectorstores.marqo import Marqo from langchain.vectorstores.matching_engine import MatchingEngine +from langchain.vectorstores.meilisearch import Meilisearch from langchain.vectorstores.milvus import Milvus from langchain.vectorstores.mongodb_atlas import MongoDBAtlasVectorSearch from langchain.vectorstores.myscale import MyScale, MyScaleSettings @@ -68,6 +69,7 @@ __all__ = [ "LanceDB", "MatchingEngine", "Marqo", + "Meilisearch", "Milvus", "Zilliz", "SingleStoreDB", diff --git a/libs/langchain/langchain/vectorstores/meilisearch.py b/libs/langchain/langchain/vectorstores/meilisearch.py new file mode 100644 index 0000000000..cb9f5d9841 --- /dev/null +++ b/libs/langchain/langchain/vectorstores/meilisearch.py @@ -0,0 +1,312 @@ +"""Wrapper around Meilisearch vector database.""" +from __future__ import annotations + +import uuid +from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Type + +from langchain.docstore.document import Document +from langchain.embeddings.base import Embeddings +from langchain.utils import get_from_env +from langchain.vectorstores.base import VectorStore + +if TYPE_CHECKING: + from meilisearch import Client + + +def _create_client( + client: Optional[Client] = None, + url: Optional[str] = None, + api_key: Optional[str] = None, +) -> Client: + try: + import meilisearch + except ImportError: + raise ValueError( + "Could not import meilisearch python package. " + "Please install it with `pip install meilisearch`." + ) + if not client: + url = url or get_from_env("url", "MEILI_HTTP_ADDR") + try: + api_key = api_key or get_from_env("api_key", "MEILI_MASTER_KEY") + except Exception: + pass + client = meilisearch.Client(url=url, api_key=api_key) + elif not isinstance(client, meilisearch.Client): + raise ValueError( + f"client should be an instance of meilisearch.Client, " + f"got {type(client)}" + ) + try: + client.version() + except ValueError as e: + raise ValueError(f"Failed to connect to Meilisearch: {e}") + return client + + +class Meilisearch(VectorStore): + """Initialize wrapper around Meilisearch vector database. + + To use this, you need to have `meilisearch` python package installed, + and a running Meilisearch instance. + + To learn more about Meilisearch Python, refer to the in-depth + Meilisearch Python documentation: https://meilisearch.github.io/meilisearch-python/. + + See the following documentation for how to run a Meilisearch instance: + https://www.meilisearch.com/docs/learn/getting_started/quick_start. + + Example: + .. code-block:: python + + from langchain.vectorstores import Meilisearch + from langchain.embeddings.openai import OpenAIEmbeddings + import meilisearch + + # api_key is optional; provide it if your meilisearch instance requires it + client = meilisearch.Client(url='http://127.0.0.1:7700', api_key='***') + embeddings = OpenAIEmbeddings() + vectorstore = Meilisearch( + embedding=embeddings, + client=client, + index_name='langchain_demo', + text_key='text') + """ + + def __init__( + self, + embedding: Embeddings, + client: Optional[Client] = None, + url: Optional[str] = None, + api_key: Optional[str] = None, + index_name: str = "langchain-demo", + text_key: str = "text", + metadata_key: str = "metadata", + ): + """Initialize with Meilisearch client.""" + client = _create_client(client=client, url=url, api_key=api_key) + + self._client = client + self._index_name = index_name + self._embedding = embedding + self._text_key = text_key + self._metadata_key = metadata_key + + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + ids: Optional[List[str]] = None, + **kwargs: Any, + ) -> List[str]: + """Run more texts through the embedding and add them to the vector store. + + Args: + texts (Iterable[str]): Iterable of strings/text to add to the vectorstore. + metadatas (Optional[List[dict]]): Optional list of metadata. + Defaults to None. + ids Optional[List[str]]: Optional list of IDs. + Defaults to None. + + Returns: + List[str]: List of IDs of the texts added to the vectorstore. + """ + texts = list(texts) + + # Embed and create the documents + docs = [] + if ids is None: + ids = [uuid.uuid4().hex for _ in texts] + if metadatas is None: + metadatas = [{} for _ in texts] + embedding_vectors = self._embedding.embed_documents(texts) + + for i, text in enumerate(texts): + id = ids[i] + metadata = metadatas[i] + metadata[self._text_key] = text + embedding = embedding_vectors[i] + docs.append( + { + "id": id, + "_vectors": embedding, + f"{self._metadata_key}": metadata, + } + ) + + # Send to Meilisearch + self._client.index(str(self._index_name)).add_documents(docs) + return ids + + def similarity_search( + self, + query: str, + k: int = 4, + filter: Optional[Dict[str, str]] = None, + **kwargs: Any, + ) -> List[Document]: + """Return meilisearch documents most similar to the query. + + Args: + query (str): Query text for which to find similar documents. + k (int): Number of documents to return. Defaults to 4. + filter (Optional[Dict[str, str]]): Filter by metadata. + Defaults to None. + + Returns: + List[Document]: List of Documents most similar to the query + text and score for each. + """ + docs_and_scores = self.similarity_search_with_score( + query=query, + k=k, + filter=filter, + kwargs=kwargs, + ) + return [doc for doc, _ in docs_and_scores] + + def similarity_search_with_score( + self, + query: str, + k: int = 4, + filter: Optional[Dict[str, str]] = None, + **kwargs: Any, + ) -> List[Tuple[Document, float]]: + """Return meilisearch documents most similar to the query, along with scores. + + Args: + query (str): Query text for which to find similar documents. + k (int): Number of documents to return. Defaults to 4. + filter (Optional[Dict[str, str]]): Filter by metadata. + Defaults to None. + + Returns: + List[Document]: List of Documents most similar to the query + text and score for each. + """ + _query = self._embedding.embed_query(query) + + docs = self.similarity_search_by_vector_with_scores( + embedding=_query, + k=k, + filter=filter, + kwargs=kwargs, + ) + return docs + + def similarity_search_by_vector_with_scores( + self, + embedding: List[float], + k: int = 4, + filter: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Tuple[Document, float]]: + """Return meilisearch documents most similar to embedding vector. + + Args: + embedding (List[float]): Embedding to look up similar documents. + k (int): Number of documents to return. Defaults to 4. + filter (Optional[Dict[str, str]]): Filter by metadata. + Defaults to None. + + Returns: + List[Document]: List of Documents most similar to the query + vector and score for each. + """ + docs = [] + results = self._client.index(str(self._index_name)).search( + "", {"vector": embedding, "limit": k, "filter": filter} + ) + + for result in results["hits"]: + metadata = result[self._metadata_key] + if self._text_key in metadata: + text = metadata.pop(self._text_key) + semantic_score = result["_semanticScore"] + docs.append( + (Document(page_content=text, metadata=metadata), semantic_score) + ) + + return docs + + def similarity_search_by_vector( + self, + embedding: List[float], + k: int = 4, + filter: Optional[Dict[str, str]] = None, + **kwargs: Any, + ) -> List[Document]: + """Return meilisearch documents most similar to embedding vector. + + Args: + embedding (List[float]): Embedding to look up similar documents. + k (int): Number of documents to return. Defaults to 4. + filter (Optional[Dict[str, str]]): Filter by metadata. + Defaults to None. + + Returns: + List[Document]: List of Documents most similar to the query + vector and score for each. + """ + docs = self.similarity_search_by_vector_with_scores( + embedding=embedding, + k=k, + filter=filter, + kwargs=kwargs, + ) + return [doc for doc, _ in docs] + + @classmethod + def from_texts( + cls: Type[Meilisearch], + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + client: Optional[Client] = None, + url: Optional[str] = None, + api_key: Optional[str] = None, + index_name: str = "langchain-demo", + ids: Optional[List[str]] = None, + text_key: Optional[str] = "text", + metadata_key: Optional[str] = "metadata", + **kwargs: Any, + ) -> Meilisearch: + """Construct Meilisearch wrapper from raw documents. + + This is a user-friendly interface that: + 1. Embeds documents. + 2. Adds the documents to a provided Meilisearch index. + + This is intended to be a quick way to get started. + + Example: + .. code-block:: python + + from langchain import Meilisearch + from langchain.embeddings import OpenAIEmbeddings + import meilisearch + + # The environment should be the one specified next to the API key + # in your Meilisearch console + client = meilisearch.Client(url='http://127.0.0.1:7700', api_key='***') + embeddings = OpenAIEmbeddings() + docsearch = Meilisearch.from_texts( + client=client, + embeddings=embeddings, + ) + """ + client = _create_client(client=client, url=url, api_key=api_key) + + vectorstore = cls( + embedding=embedding, + client=client, + index_name=index_name, + ) + vectorstore.add_texts( + texts=texts, + metadatas=metadatas, + ids=ids, + text_key=text_key, + metadata_key=metadata_key, + ) + return vectorstore diff --git a/libs/langchain/tests/integration_tests/vectorstores/docker-compose/meilisearch.yaml b/libs/langchain/tests/integration_tests/vectorstores/docker-compose/meilisearch.yaml new file mode 100644 index 0000000000..dc1c4f7d98 --- /dev/null +++ b/libs/langchain/tests/integration_tests/vectorstores/docker-compose/meilisearch.yaml @@ -0,0 +1,17 @@ +version: "3.8" + +services: + meilisearch: + image: getmeili/meilisearch:latest + environment: + - MEILI_MASTER_KEY=${MEILI_MASTER_KEY:-masterKey} + - MEILI_NO_ANALYTICS=${MEILI_NO_ANALYTICS:-true} + - MEILI_ENV=${MEILI_ENV:-development} + ports: + - ${MEILI_PORT:-7700}:7700 + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:7700"] + interval: 10s + timeout: 5s + retries: 5 diff --git a/libs/langchain/tests/integration_tests/vectorstores/test_meilisearch.py b/libs/langchain/tests/integration_tests/vectorstores/test_meilisearch.py new file mode 100644 index 0000000000..630f9a0b75 --- /dev/null +++ b/libs/langchain/tests/integration_tests/vectorstores/test_meilisearch.py @@ -0,0 +1,143 @@ +"""Test Meilisearch functionality.""" +from typing import Generator + +import meilisearch +import pytest +import requests + +from langchain.docstore.document import Document +from langchain.vectorstores import Meilisearch +from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings + +INDEX_NAME = "test-langchain-demo" +TEST_MEILI_HTTP_ADDR = "http://localhost:7700" +TEST_MEILI_MASTER_KEY = "masterKey" + + +class TestMeilisearchVectorSearch: + @pytest.fixture(scope="class", autouse=True) + def enable_vector_search(self) -> Generator[str, None, None]: + requests.patch( + f"{TEST_MEILI_HTTP_ADDR}/experimental-features", + headers={"Authorization": f"Bearer {TEST_MEILI_MASTER_KEY}"}, + json={"vectorStore": True}, + timeout=10, + ) + yield "done" + requests.patch( + f"{TEST_MEILI_HTTP_ADDR}/experimental-features", + headers={"Authorization": f"Bearer {TEST_MEILI_MASTER_KEY}"}, + json={"vectorStore": False}, + timeout=10, + ) + + @pytest.fixture(autouse=True) + def setup(self) -> None: + self.delete_all_indexes() + + @pytest.fixture(scope="class", autouse=True) + def teardown_test(self) -> Generator[str, None, None]: + # Yields back to the test function. + yield "done" + self.delete_all_indexes() + + def delete_all_indexes(self) -> None: + client = self.client() + # Deletes all the indexes in the Meilisearch instance. + indexes = client.get_indexes() + for index in indexes["results"]: + task = client.index(index.uid).delete() + client.wait_for_task(task.task_uid) + + def client(self) -> meilisearch.Client: + return meilisearch.Client(TEST_MEILI_HTTP_ADDR, TEST_MEILI_MASTER_KEY) + + def _wait_last_task(self) -> None: + client = self.client() + # Get the last task + tasks = client.get_tasks() + # Wait for the last task to be completed + client.wait_for_task(tasks.results[0].uid) + + def test_meilisearch(self) -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + vectorstore = Meilisearch.from_texts( + texts=texts, + embedding=FakeEmbeddings(), + url=TEST_MEILI_HTTP_ADDR, + api_key=TEST_MEILI_MASTER_KEY, + index_name=INDEX_NAME, + ) + self._wait_last_task() + output = vectorstore.similarity_search("foo", k=1) + assert output == [Document(page_content="foo")] + + def test_meilisearch_with_client(self) -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + vectorstore = Meilisearch.from_texts( + texts=texts, + embedding=FakeEmbeddings(), + client=self.client(), + index_name=INDEX_NAME, + ) + self._wait_last_task() + output = vectorstore.similarity_search("foo", k=1) + assert output == [Document(page_content="foo")] + + def test_meilisearch_with_metadatas(self) -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": i} for i in range(len(texts))] + docsearch = Meilisearch.from_texts( + texts=texts, + embedding=FakeEmbeddings(), + url=TEST_MEILI_HTTP_ADDR, + api_key=TEST_MEILI_MASTER_KEY, + index_name=INDEX_NAME, + metadatas=metadatas, + ) + self._wait_last_task() + output = docsearch.similarity_search("foo", k=1) + assert len(output) == 1 + assert output[0].page_content == "foo" + assert output[0].metadata["page"] == 0 + assert output == [Document(page_content="foo", metadata={"page": 0})] + + def test_meilisearch_with_metadatas_with_scores(self) -> None: + """Test end to end construction and scored search.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": str(i)} for i in range(len(texts))] + docsearch = Meilisearch.from_texts( + texts=texts, + embedding=FakeEmbeddings(), + url=TEST_MEILI_HTTP_ADDR, + api_key=TEST_MEILI_MASTER_KEY, + index_name=INDEX_NAME, + metadatas=metadatas, + ) + self._wait_last_task() + output = docsearch.similarity_search_with_score("foo", k=1) + assert output == [(Document(page_content="foo", metadata={"page": "0"}), 9.0)] + + def test_meilisearch_with_metadatas_with_scores_using_vector(self) -> None: + """Test end to end construction and scored search, using embedding vector.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": str(i)} for i in range(len(texts))] + embeddings = FakeEmbeddings() + + docsearch = Meilisearch.from_texts( + texts=texts, + embedding=FakeEmbeddings(), + url=TEST_MEILI_HTTP_ADDR, + api_key=TEST_MEILI_MASTER_KEY, + index_name=INDEX_NAME, + metadatas=metadatas, + ) + embedded_query = embeddings.embed_query("foo") + self._wait_last_task() + output = docsearch.similarity_search_by_vector_with_scores( + embedding=embedded_query, k=1 + ) + assert output == [(Document(page_content="foo", metadata={"page": "0"}), 9.0)]