Better docs for weaviate hybrid search (#5290)

# Better docs for weaviate hybrid search

<!--
Thank you for contributing to LangChain! Your PR will appear in our next
release under the title you set. Please make sure it highlights your
valuable contribution.

Replace this with a description of the change, the issue it fixes (if
applicable), and relevant context. List any dependencies required for
this change.

After you're done, someone will review your PR. They may suggest
improvements. If no one reviews your PR within a few days, feel free to
@-mention the same people again, as notifications can get lost.
-->

<!-- Remove if not applicable -->

Fixes: NA

## Before submitting

<!-- If you're adding a new integration, include an integration test and
an example notebook showing its use! -->

## Who can review?

Community members can review the PR once tests pass. Tag
maintainers/contributors who might be interested:

<!-- For a quicker response, figure out the right person to tag with @

        @hwchase17 - project lead

        Tracing / Callbacks
        - @agola11

        Async
        - @agola11

        DataLoaders
        - @eyurtsev

        Models
        - @hwchase17
        - @agola11

        Agents / Tools / Toolkits
        - @vowelparrot
        
        VectorStores / Retrievers / Memory
        - @dev2049
        
 -->
@dev2049
This commit is contained in:
Shukri 2023-05-27 00:30:41 +08:00 committed by GitHub
parent 641303a361
commit 58e95cd11e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 151 additions and 23 deletions

View File

@ -16,9 +16,18 @@
"This notebook shows how to use `Weaviate hybrid search` as a LangChain retriever." "This notebook shows how to use `Weaviate hybrid search` as a LangChain retriever."
] ]
}, },
{
"attachments": {},
"cell_type": "markdown",
"id": "c307b082",
"metadata": {},
"source": [
"Set up the retriever:"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 1,
"id": "bba863a2-977c-4add-b5f4-bfc33a80eae5", "id": "bba863a2-977c-4add-b5f4-bfc33a80eae5",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -30,7 +39,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 2,
"id": "c10dd962", "id": "c10dd962",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -38,18 +47,33 @@
"import weaviate\n", "import weaviate\n",
"import os\n", "import os\n",
"\n", "\n",
"WEAVIATE_URL = \"...\"\n", "WEAVIATE_URL = os.getenv(\"WEAVIATE_URL\")\n",
"client = weaviate.Client(\n", "client = weaviate.Client(\n",
" url=WEAVIATE_URL,\n", " url=WEAVIATE_URL,\n",
")" " auth_client_secret=weaviate.AuthApiKey(api_key=os.getenv(\"WEAVIATE_API_KEY\")),\n",
" additional_headers={\n",
" \"X-Openai-Api-Key\": os.getenv(\"OPENAI_API_KEY\"),\n",
" },\n",
")\n",
"\n",
"# client.schema.delete_all()"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 3,
"id": "f47a2bfe", "id": "f47a2bfe",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/workspaces/langchain/langchain/vectorstores/analyticdb.py:20: MovedIn20Warning: The ``declarative_base()`` function is now available as sqlalchemy.orm.declarative_base(). (deprecated since: 2.0) (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)\n",
" Base = declarative_base() # type: Any\n"
]
}
],
"source": [ "source": [
"from langchain.retrievers.weaviate_hybrid_search import WeaviateHybridSearchRetriever\n", "from langchain.retrievers.weaviate_hybrid_search import WeaviateHybridSearchRetriever\n",
"from langchain.schema import Document" "from langchain.schema import Document"
@ -57,37 +81,85 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 4,
"id": "f2eff08e", "id": "f2eff08e",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"retriever = WeaviateHybridSearchRetriever(client, index_name=\"LangChain\", text_key=\"text\")" "retriever = WeaviateHybridSearchRetriever(\n",
" client, index_name=\"LangChain\", text_key=\"text\"\n",
")"
] ]
}, },
{ {
"cell_type": "code", "attachments": {},
"execution_count": 4, "cell_type": "markdown",
"id": "cd8a7b17", "id": "b68debff",
"metadata": {}, "metadata": {},
"outputs": [],
"source": [ "source": [
"docs = [Document(page_content=\"foo\")]" "Add some data:"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 5,
"id": "cd8a7b17",
"metadata": {},
"outputs": [],
"source": [
"docs = [\n",
" Document(\n",
" metadata={\n",
" \"title\": \"Embracing The Future: AI Unveiled\",\n",
" \"author\": \"Dr. Rebecca Simmons\",\n",
" },\n",
" page_content=\"A comprehensive analysis of the evolution of artificial intelligence, from its inception to its future prospects. Dr. Simmons covers ethical considerations, potentials, and threats posed by AI.\",\n",
" ),\n",
" Document(\n",
" metadata={\n",
" \"title\": \"Symbiosis: Harmonizing Humans and AI\",\n",
" \"author\": \"Prof. Jonathan K. Sterling\",\n",
" },\n",
" page_content=\"Prof. Sterling explores the potential for harmonious coexistence between humans and artificial intelligence. The book discusses how AI can be integrated into society in a beneficial and non-disruptive manner.\",\n",
" ),\n",
" Document(\n",
" metadata={\"title\": \"AI: The Ethical Quandary\", \"author\": \"Dr. Rebecca Simmons\"},\n",
" page_content=\"In her second book, Dr. Simmons delves deeper into the ethical considerations surrounding AI development and deployment. It is an eye-opening examination of the dilemmas faced by developers, policymakers, and society at large.\",\n",
" ),\n",
" Document(\n",
" metadata={\n",
" \"title\": \"Conscious Constructs: The Search for AI Sentience\",\n",
" \"author\": \"Dr. Samuel Cortez\",\n",
" },\n",
" page_content=\"Dr. Cortez takes readers on a journey exploring the controversial topic of AI consciousness. The book provides compelling arguments for and against the possibility of true AI sentience.\",\n",
" ),\n",
" Document(\n",
" metadata={\n",
" \"title\": \"Invisible Routines: Hidden AI in Everyday Life\",\n",
" \"author\": \"Prof. Jonathan K. Sterling\",\n",
" },\n",
" page_content=\"In his follow-up to 'Symbiosis', Prof. Sterling takes a look at the subtle, unnoticed presence and influence of AI in our everyday lives. It reveals how AI has become woven into our routines, often without our explicit realization.\",\n",
" ),\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "3c5970db", "id": "3c5970db",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"['3f79d151-fb84-44cf-85e0-8682bfe145e0']" "['eda16d7d-437d-4613-84ae-c2e38705ec7a',\n",
" '04b501bf-192b-4e72-be77-2fbbe7e67ebf',\n",
" '18a1acdb-23b7-4482-ab04-a6c2ed51de77',\n",
" '88e82cc3-c020-4b5a-b3c6-ca7cf3fc6a04',\n",
" 'f6abd9d5-32ed-46c4-bd08-f8d0f7c9fc95']"
] ]
}, },
"execution_count": 5, "execution_count": 6,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -96,34 +168,76 @@
"retriever.add_documents(docs)" "retriever.add_documents(docs)"
] ]
}, },
{
"attachments": {},
"cell_type": "markdown",
"id": "6e030694",
"metadata": {},
"source": [
"Do a hybrid search:"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 7,
"id": "bf7dbb98", "id": "bf7dbb98",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"[Document(page_content='foo', metadata={})]" "[Document(page_content='In her second book, Dr. Simmons delves deeper into the ethical considerations surrounding AI development and deployment. It is an eye-opening examination of the dilemmas faced by developers, policymakers, and society at large.', metadata={}),\n",
" Document(page_content='A comprehensive analysis of the evolution of artificial intelligence, from its inception to its future prospects. Dr. Simmons covers ethical considerations, potentials, and threats posed by AI.', metadata={}),\n",
" Document(page_content=\"In his follow-up to 'Symbiosis', Prof. Sterling takes a look at the subtle, unnoticed presence and influence of AI in our everyday lives. It reveals how AI has become woven into our routines, often without our explicit realization.\", metadata={}),\n",
" Document(page_content='Prof. Sterling explores the potential for harmonious coexistence between humans and artificial intelligence. The book discusses how AI can be integrated into society in a beneficial and non-disruptive manner.', metadata={})]"
] ]
}, },
"execution_count": 6, "execution_count": 7,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"retriever.get_relevant_documents(\"foo\")" "retriever.get_relevant_documents(\"the ethical implications of AI\")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "d0c5bb4d",
"metadata": {},
"source": [
"Do a hybrid search with where filter:"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 8,
"id": "b2bc87c1", "id": "b2bc87c1",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
"source": [] {
"data": {
"text/plain": [
"[Document(page_content='Prof. Sterling explores the potential for harmonious coexistence between humans and artificial intelligence. The book discusses how AI can be integrated into society in a beneficial and non-disruptive manner.', metadata={}),\n",
" Document(page_content=\"In his follow-up to 'Symbiosis', Prof. Sterling takes a look at the subtle, unnoticed presence and influence of AI in our everyday lives. It reveals how AI has become woven into our routines, often without our explicit realization.\", metadata={})]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"retriever.get_relevant_documents(\n",
" \"AI integration in society\",\n",
" where_filter={\n",
" \"path\": [\"author\"],\n",
" \"operator\": \"Equal\",\n",
" \"valueString\": \"Prof. Jonathan K. Sterling\",\n",
" },\n",
")"
]
} }
], ],
"metadata": { "metadata": {
@ -142,7 +256,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.10.6" "version": "3.9.16"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@ -19,6 +19,7 @@ class WeaviateHybridSearchRetriever(BaseRetriever):
alpha: float = 0.5, alpha: float = 0.5,
k: int = 4, k: int = 4,
attributes: Optional[List[str]] = None, attributes: Optional[List[str]] = None,
create_schema_if_missing: bool = True,
): ):
try: try:
import weaviate import weaviate
@ -40,6 +41,19 @@ class WeaviateHybridSearchRetriever(BaseRetriever):
if attributes is not None: if attributes is not None:
self._query_attrs.extend(attributes) self._query_attrs.extend(attributes)
if create_schema_if_missing:
self._create_schema_if_missing()
def _create_schema_if_missing(self) -> None:
class_obj = {
"class": self._index_name,
"properties": [{"name": self._text_key, "dataType": ["text"]}],
"vectorizer": "text2vec-openai",
}
if not self._client.schema.exists(self._index_name):
self._client.schema.create_class(class_obj)
class Config: class Config:
"""Configuration for this pydantic object.""" """Configuration for this pydantic object."""