WeaviateHybridSearchRetriever option to enable scores. (#7861)

Description: This PR adds the option to retrieve scores and explanations
in the WeaviateHybridSearchRetriever. This feature improves the
usability of the retriever by allowing users to understand the scoring
logic behind the search results and further refine their search queries.

Issue: This PR is a solution to the issue #7855 
Dependencies: This PR does not introduce any new dependencies.

Tag maintainer: @rlancemartin, @eyurtsev

I have included a unit test for the added feature, ensuring that it
retrieves scores and explanations correctly. I have also included an
example notebook demonstrating its use.
pull/7893/head
Bill Zhang 1 year ago committed by GitHub
parent 527210972e
commit dda11d2a05
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -48,9 +48,9 @@
"import os\n",
"\n",
"WEAVIATE_URL = os.getenv(\"WEAVIATE_URL\")\n",
"auth_client_secret = (weaviate.AuthApiKey(api_key=os.getenv(\"WEAVIATE_API_KEY\")),)\n",
"client = weaviate.Client(\n",
" url=WEAVIATE_URL,\n",
" auth_client_secret=weaviate.AuthApiKey(api_key=os.getenv(\"WEAVIATE_API_KEY\")),\n",
" additional_headers={\n",
" \"X-Openai-Api-Key\": os.getenv(\"OPENAI_API_KEY\"),\n",
" },\n",
@ -68,10 +68,7 @@
{
"name": "stderr",
"output_type": "stream",
"text": [
"/workspaces/langchain/langchain/vectorstores/analyticdb.py:20: MovedIn20Warning: The ``declarative_base()`` function is now available as sqlalchemy.orm.declarative_base(). (deprecated since: 2.0) (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)\n",
" Base = declarative_base() # type: Any\n"
]
"text": []
}
],
"source": [
@ -87,7 +84,11 @@
"outputs": [],
"source": [
"retriever = WeaviateHybridSearchRetriever(\n",
" client, index_name=\"LangChain\", text_key=\"text\"\n",
" client=client,\n",
" index_name=\"LangChain\",\n",
" text_key=\"text\",\n",
" attributes=[],\n",
" create_schema_if_missing=True,\n",
")"
]
},
@ -152,11 +153,11 @@
{
"data": {
"text/plain": [
"['eda16d7d-437d-4613-84ae-c2e38705ec7a',\n",
" '04b501bf-192b-4e72-be77-2fbbe7e67ebf',\n",
" '18a1acdb-23b7-4482-ab04-a6c2ed51de77',\n",
" '88e82cc3-c020-4b5a-b3c6-ca7cf3fc6a04',\n",
" 'f6abd9d5-32ed-46c4-bd08-f8d0f7c9fc95']"
"['3a27b0a5-8dbb-4fee-9eba-8b6bc2c252be',\n",
" 'eeb9fd9b-a3ac-4d60-a55b-a63a25d3b907',\n",
" '7ebbdae7-1061-445f-a046-1989f2343d8f',\n",
" 'c2ab315b-3cab-467f-b23a-b26ed186318d',\n",
" 'b83765f2-e5d2-471f-8c02-c3350ade4c4f']"
]
},
"execution_count": 6,
@ -238,6 +239,41 @@
" },\n",
")"
]
},
{
"cell_type": "markdown",
"id": "5ae2899e",
"metadata": {},
"source": [
"Do a hybrid search with scores:"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "4fffd0af",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content='Prof. Sterling explores the potential for harmonious coexistence between humans and artificial intelligence. The book discusses how AI can be integrated into society in a beneficial and non-disruptive manner.', metadata={'_additional': {'explainScore': '(bm25)\\n(hybrid) Document eeb9fd9b-a3ac-4d60-a55b-a63a25d3b907 contributed 0.00819672131147541 to the score\\n(hybrid) Document eeb9fd9b-a3ac-4d60-a55b-a63a25d3b907 contributed 0.00819672131147541 to the score', 'score': '0.016393442'}}),\n",
" Document(page_content=\"In his follow-up to 'Symbiosis', Prof. Sterling takes a look at the subtle, unnoticed presence and influence of AI in our everyday lives. It reveals how AI has become woven into our routines, often without our explicit realization.\", metadata={'_additional': {'explainScore': '(bm25)\\n(hybrid) Document b83765f2-e5d2-471f-8c02-c3350ade4c4f contributed 0.0078125 to the score\\n(hybrid) Document b83765f2-e5d2-471f-8c02-c3350ade4c4f contributed 0.008064516129032258 to the score', 'score': '0.015877016'}}),\n",
" Document(page_content='In her second book, Dr. Simmons delves deeper into the ethical considerations surrounding AI development and deployment. It is an eye-opening examination of the dilemmas faced by developers, policymakers, and society at large.', metadata={'_additional': {'explainScore': '(bm25)\\n(hybrid) Document 7ebbdae7-1061-445f-a046-1989f2343d8f contributed 0.008064516129032258 to the score\\n(hybrid) Document 7ebbdae7-1061-445f-a046-1989f2343d8f contributed 0.0078125 to the score', 'score': '0.015877016'}}),\n",
" Document(page_content='A comprehensive analysis of the evolution of artificial intelligence, from its inception to its future prospects. Dr. Simmons covers ethical considerations, potentials, and threats posed by AI.', metadata={'_additional': {'explainScore': '(vector) [-0.0071824766 -0.0006682752 0.001723625 -0.01897258 -0.0045127636 0.0024410256 -0.020503938 0.013768672 0.009520169 -0.037972264]... \\n(hybrid) Document 3a27b0a5-8dbb-4fee-9eba-8b6bc2c252be contributed 0.007936507936507936 to the score', 'score': '0.007936508'}})]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"retriever.get_relevant_documents(\n",
" \"AI integration in society\",\n",
" score=True,\n",
")"
]
}
],
"metadata": {
@ -256,7 +292,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
"version": "3.9.17"
}
},
"nbformat": 4,

@ -98,12 +98,16 @@ class WeaviateHybridSearchRetriever(BaseRetriever):
*,
run_manager: CallbackManagerForRetrieverRun,
where_filter: Optional[Dict[str, object]] = None,
score: bool = False,
) -> List[Document]:
"""Look up similar documents in Weaviate."""
query_obj = self.client.query.get(self.index_name, self.attributes)
if where_filter:
query_obj = query_obj.with_where(where_filter)
if score:
query_obj = query_obj.with_additional(["score", "explainScore"])
result = query_obj.with_hybrid(query, alpha=self.alpha).with_limit(self.k).do()
if "errors" in result:
raise ValueError(f"Error during query: {result['errors']}")

@ -61,6 +61,29 @@ class TestWeaviateHybridSearchRetriever:
Document(page_content="bar", metadata={"page": 1}),
]
@pytest.mark.vcr(ignore_localhost=True)
def test_get_relevant_documents_with_score(self, weaviate_url: str) -> None:
"""Test end to end construction and MRR search."""
texts = ["foo", "bar", "baz"]
metadatas = [{"page": i} for i in range(len(texts))]
client = Client(weaviate_url)
retriever = WeaviateHybridSearchRetriever(
client=client,
index_name=f"LangChain_{uuid4().hex}",
text_key="text",
attributes=["page"],
)
for i, text in enumerate(texts):
retriever.add_documents(
[Document(page_content=text, metadata=metadatas[i])]
)
output = retriever.get_relevant_documents("foo", score=True)
for doc in output:
assert "_additional" in doc.metadata
@pytest.mark.vcr(ignore_localhost=True)
def test_get_relevant_documents_with_filter(self, weaviate_url: str) -> None:
"""Test end to end construction and MRR search."""

Loading…
Cancel
Save