From 9907cb0485a577a2e041d227a9359f8102fcb3a2 Mon Sep 17 00:00:00 2001 From: drod Date: Fri, 14 Apr 2023 07:09:00 +0200 Subject: [PATCH] Refactor similarity_search function in elastic_vector_search.py (#2761) Optimization :Limit search results when k < 10 Fix issue when k > 10: Elasticsearch will return only 10 docs [default-search-result](https://www.elastic.co/guide/en/elasticsearch/reference/current/paginate-search-results.html) By default, searches return the top 10 matching hits Add size parameter to the search request to limit the number of returned results from Elasticsearch. Remove slicing of the hits list, since the response will already contain the desired number of results. --- langchain/vectorstores/elastic_vector_search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/langchain/vectorstores/elastic_vector_search.py b/langchain/vectorstores/elastic_vector_search.py index cb238563..17af42c6 100644 --- a/langchain/vectorstores/elastic_vector_search.py +++ b/langchain/vectorstores/elastic_vector_search.py @@ -200,8 +200,8 @@ class ElasticVectorSearch(VectorStore, ABC): """ embedding = self.embedding.embed_query(query) script_query = _default_script_query(embedding) - response = self.client.search(index=self.index_name, query=script_query) - hits = [hit["_source"] for hit in response["hits"]["hits"][:k]] + response = self.client.search(index=self.index_name, query=script_query, size=k) + hits = [hit["_source"] for hit in response["hits"]["hits"]] documents = [ Document(page_content=hit["text"], metadata=hit["metadata"]) for hit in hits ]