diff --git a/docs/snippets/modules/data_connection/retrievers/get_started.mdx b/docs/snippets/modules/data_connection/retrievers/get_started.mdx index e87b50966a..4cad675846 100644 --- a/docs/snippets/modules/data_connection/retrievers/get_started.mdx +++ b/docs/snippets/modules/data_connection/retrievers/get_started.mdx @@ -159,6 +159,22 @@ index.vectorstore.as_retriever() +It can also be convenient to filter the vectorstore by the metadata associated with documents, particularly when your vectorstore has multiple sources. This can be done using the `query` method like so: + + +```python +index.query("Summarize the general content of this document.", retriever_kwargs={"search_kwargs": {"filter": {"source": "../state_of_the_union.txt"}}}) +``` + + + +``` + " The document is a speech given by President Trump to the nation on the occasion of his 245th birthday. The speech highlights the importance of American values and the challenges facing the country, including the ongoing conflict in Ukraine, the ongoing trade war with China, and the ongoing conflict in Syria. The speech also discusses the importance of investing in emerging technologies and American manufacturing, and calls on Congress to pass the Bipartisan Innovation Act and other important legislation." +``` + + + + ## Walkthrough Okay, so what's actually going on? How is this index getting created? diff --git a/libs/langchain/langchain/indexes/vectorstore.py b/libs/langchain/langchain/indexes/vectorstore.py index daa83092d0..c9229f9d7b 100644 --- a/libs/langchain/langchain/indexes/vectorstore.py +++ b/libs/langchain/langchain/indexes/vectorstore.py @@ -1,4 +1,4 @@ -from typing import Any, List, Optional, Type +from typing import Any, Dict, List, Optional, Type from pydantic import BaseModel, Extra, Field @@ -31,22 +31,32 @@ class VectorStoreIndexWrapper(BaseModel): arbitrary_types_allowed = True def query( - self, question: str, llm: Optional[BaseLanguageModel] = None, **kwargs: Any + self, + question: str, + llm: Optional[BaseLanguageModel] = None, + retriever_kwargs: Optional[Dict[str, Any]] = None, + **kwargs: Any ) -> str: """Query the vectorstore.""" llm = llm or OpenAI(temperature=0) + retriever_kwargs = retriever_kwargs or {} chain = RetrievalQA.from_chain_type( - llm, retriever=self.vectorstore.as_retriever(), **kwargs + llm, retriever=self.vectorstore.as_retriever(**retriever_kwargs), **kwargs ) return chain.run(question) def query_with_sources( - self, question: str, llm: Optional[BaseLanguageModel] = None, **kwargs: Any + self, + question: str, + llm: Optional[BaseLanguageModel] = None, + retriever_kwargs: Optional[Dict[str, Any]] = None, + **kwargs: Any ) -> dict: """Query the vectorstore and get back sources.""" llm = llm or OpenAI(temperature=0) + retriever_kwargs = retriever_kwargs or {} chain = RetrievalQAWithSourcesChain.from_chain_type( - llm, retriever=self.vectorstore.as_retriever(), **kwargs + llm, retriever=self.vectorstore.as_retriever(**retriever_kwargs), **kwargs ) return chain({chain.question_key: question})