From 4b505060bd29e532641dbdc17691e83ea717fafc Mon Sep 17 00:00:00 2001 From: Hech <53417823+HeChangHaoGary@users.noreply.github.com> Date: Tue, 15 Aug 2023 16:06:06 +0800 Subject: [PATCH] fix: max_marginal_relevance_search and docs in Dingo (#9244) --- .../integrations/vectorstores/dingo.ipynb | 12 +++++---- .../langchain/langchain/vectorstores/dingo.py | 26 +++++++++++++------ 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/docs/extras/integrations/vectorstores/dingo.ipynb b/docs/extras/integrations/vectorstores/dingo.ipynb index fab91b3273..0369f1ecb0 100644 --- a/docs/extras/integrations/vectorstores/dingo.ipynb +++ b/docs/extras/integrations/vectorstores/dingo.ipynb @@ -23,7 +23,9 @@ }, "outputs": [], "source": [ - "!pip install dingodb" + "!pip install dingodb\n", + "or install latest:\n", + "!pip install git+https://git@github.com/dingodb/pydingo.git" ] }, { @@ -107,7 +109,7 @@ "dingo_client = DingoDB(user=\"\", password=\"\", host=[\"127.0.0.1:13000\"])\n", "# First, check if our index already exists. If it doesn't, we create it\n", "if index_name not in dingo_client.get_index():\n", - " # we create a new index\n", + " # we create a new index, modify to your own\n", " dingo_client.create_index(\n", " index_name=index_name,\n", " dimension=1536,\n", @@ -150,7 +152,7 @@ "metadata": {}, "outputs": [], "source": [ - "print(docs[0][1])" + "print(docs[0].page_content)" ] }, { @@ -170,9 +172,9 @@ "metadata": {}, "outputs": [], "source": [ - "vectorstore = Dingo(client, embeddings.embed_query, \"text\")\n", + "vectorstore = Dingo(embeddings, \"text\", client=dingo_client, index_name=index_name)\n", "\n", - "vectorstore.add_texts(\"More text!\")" + "vectorstore.add_texts([\"More text!\"])" ] }, { diff --git a/libs/langchain/langchain/vectorstores/dingo.py b/libs/langchain/langchain/vectorstores/dingo.py index a1f31eeb40..44765cdd31 100644 --- a/libs/langchain/langchain/vectorstores/dingo.py +++ b/libs/langchain/langchain/vectorstores/dingo.py @@ -112,9 +112,11 @@ class Dingo(VectorStore): # upsert to Dingo for i in range(0, len(list(texts)), batch_size): j = i + batch_size - self._client.vector_add( + add_res = self._client.vector_add( self._index_name, metadatas_list[i:j], embeds[i:j], ids[i:j] ) + if not add_res: + raise Exception("vector add fail") return ids @@ -205,20 +207,26 @@ class Dingo(VectorStore): List of Documents selected by maximal marginal relevance. """ results = self._client.vector_search( - self._index_name, [embedding], search_params, k + self._index_name, [embedding], search_params=search_params, top_k=k ) mmr_selected = maximal_marginal_relevance( np.array([embedding], dtype=np.float32), - [item["floatValues"] for item in results[0]["vectorWithDistances"]], + [ + item["vector"]["floatValues"] + for item in results[0]["vectorWithDistances"] + ], k=k, lambda_mult=lambda_mult, ) - selected = [ - results[0]["vectorWithDistances"][i]["metaData"] for i in mmr_selected - ] + selected = [] + for i in mmr_selected: + meta_data = {} + for k, v in results[0]["vectorWithDistances"][i]["scalarData"].items(): + meta_data.update({str(k): v["fields"][0]["data"]}) + selected.append(meta_data) return [ - Document(page_content=metadata.pop((self._text_key)), metadata=metadata) + Document(page_content=metadata.pop(self._text_key), metadata=metadata) for metadata in selected ] @@ -328,9 +336,11 @@ class Dingo(VectorStore): # upsert to Dingo for i in range(0, len(list(texts)), batch_size): j = i + batch_size - dingo_client.vector_add( + add_res = dingo_client.vector_add( index_name, metadatas_list[i:j], embeds[i:j], ids[i:j] ) + if not add_res: + raise Exception("vector add fail") return cls(embedding, text_key, client=dingo_client, index_name=index_name) def delete(