mirror of
https://github.com/hwchase17/langchain
synced 2024-11-08 07:10:35 +00:00
66c41c0dbf
This PR adds a self-querying template using Qdrant as a vector store. The template uses an artificial dataset and was implemented in a way that simplifies passing different components and choosing LLM and embedding providers. --------- Co-authored-by: Erick Friis <erick@langchain.dev>
93 lines
3.4 KiB
Python
93 lines
3.4 KiB
Python
import os
|
|
from typing import List, Optional
|
|
|
|
from langchain.chains.query_constructor.schema import AttributeInfo
|
|
from langchain.embeddings import OpenAIEmbeddings
|
|
from langchain.llms import BaseLLM
|
|
from langchain.llms.openai import OpenAI
|
|
from langchain.pydantic_v1 import BaseModel
|
|
from langchain.retrievers import SelfQueryRetriever
|
|
from langchain.schema import Document, StrOutputParser
|
|
from langchain.schema.embeddings import Embeddings
|
|
from langchain.schema.runnable import RunnableParallel, RunnablePassthrough
|
|
from langchain.vectorstores.qdrant import Qdrant
|
|
from qdrant_client import QdrantClient
|
|
|
|
from self_query_qdrant import defaults, helper, prompts
|
|
|
|
|
|
class Query(BaseModel):
|
|
__root__: str
|
|
|
|
|
|
def create_chain(
|
|
llm: Optional[BaseLLM] = None,
|
|
embeddings: Optional[Embeddings] = None,
|
|
document_contents: str = defaults.DEFAULT_DOCUMENT_CONTENTS,
|
|
metadata_field_info: List[AttributeInfo] = defaults.DEFAULT_METADATA_FIELD_INFO,
|
|
collection_name: str = defaults.DEFAULT_COLLECTION_NAME,
|
|
):
|
|
"""
|
|
Create a chain that can be used to query a Qdrant vector store with a self-querying
|
|
capability. By default, this chain will use the OpenAI LLM and OpenAIEmbeddings, and
|
|
work with the default document contents and metadata field info. You can override
|
|
these defaults by passing in your own values.
|
|
:param llm: an LLM to use for generating text
|
|
:param embeddings: an Embeddings to use for generating queries
|
|
:param document_contents: a description of the document set
|
|
:param metadata_field_info: list of metadata attributes
|
|
:param collection_name: name of the Qdrant collection to use
|
|
:return:
|
|
"""
|
|
llm = llm or OpenAI()
|
|
embeddings = embeddings or OpenAIEmbeddings()
|
|
|
|
# Set up a vector store to store your vectors and metadata
|
|
client = QdrantClient(
|
|
url=os.environ.get("QDRANT_URL", "http://localhost:6333"),
|
|
api_key=os.environ.get("QDRANT_API_KEY"),
|
|
)
|
|
vectorstore = Qdrant(
|
|
client=client,
|
|
collection_name=collection_name,
|
|
embeddings=embeddings,
|
|
)
|
|
|
|
# Set up a retriever to query your vector store with self-querying capabilities
|
|
retriever = SelfQueryRetriever.from_llm(
|
|
llm, vectorstore, document_contents, metadata_field_info, verbose=True
|
|
)
|
|
|
|
context = RunnableParallel(
|
|
context=retriever | helper.combine_documents,
|
|
query=RunnablePassthrough(),
|
|
)
|
|
pipeline = context | prompts.LLM_CONTEXT_PROMPT | llm | StrOutputParser()
|
|
return pipeline.with_types(input_type=Query)
|
|
|
|
|
|
def initialize(
|
|
embeddings: Optional[Embeddings] = None,
|
|
collection_name: str = defaults.DEFAULT_COLLECTION_NAME,
|
|
documents: List[Document] = defaults.DEFAULT_DOCUMENTS,
|
|
):
|
|
"""
|
|
Initialize a vector store with a set of documents. By default, the documents will be
|
|
compatible with the default metadata field info. You can override these defaults by
|
|
passing in your own values.
|
|
:param embeddings: an Embeddings to use for generating queries
|
|
:param collection_name: name of the Qdrant collection to use
|
|
:param documents: a list of documents to initialize the vector store with
|
|
:return:
|
|
"""
|
|
embeddings = embeddings or OpenAIEmbeddings()
|
|
|
|
# Set up a vector store to store your vectors and metadata
|
|
Qdrant.from_documents(
|
|
documents, embedding=embeddings, collection_name=collection_name
|
|
)
|
|
|
|
|
|
# Create the default chain
|
|
chain = create_chain()
|