mirror of
https://github.com/hwchase17/langchain
synced 2024-11-10 01:10:59 +00:00
4f4b020582
# Astra DB Vector store integration - **Description:** This PR adds a `VectorStore` implementation for DataStax Astra DB using its HTTP API - **Issue:** (no related issue) - **Dependencies:** A new required dependency is `astrapy` (`>=0.5.3`) which was added to pyptoject.toml, optional, as per guidelines - **Tag maintainer:** I recently mentioned to @baskaryan this integration was coming - **Twitter handle:** `@rsprrs` if you want to mention me This PR introduces the `AstraDB` vector store class, extensive integration test coverage, a reworking of the documentation which conflates Cassandra and Astra DB on a single "provider" page and a new, completely reworked vector-store example notebook (common to the Cassandra store, since parts of the flow is shared by the two APIs). I also took care in ensuring docs (and redirects therein) are behaving correctly. All style, linting, typechecks and tests pass as far as the `AstraDB` integration is concerned. I could build the documentation and check it all right (but ran into trouble with the `api_docs_build` makefile target which I could not verify: `Error: Unable to import module 'plan_and_execute.agent_executor' with error: No module named 'langchain_experimental'` was the first of many similar errors) Thank you for a review! Stefano --------- Co-authored-by: Erick Friis <erick@langchain.dev>
54 lines
1.7 KiB
Python
54 lines
1.7 KiB
Python
import os
|
|
|
|
from langchain.chat_models import ChatOpenAI
|
|
from langchain.embeddings import OpenAIEmbeddings
|
|
from langchain.prompts import ChatPromptTemplate
|
|
from langchain.schema.output_parser import StrOutputParser
|
|
from langchain.schema.runnable import RunnablePassthrough
|
|
from langchain.vectorstores import AstraDB
|
|
|
|
from .populate_vector_store import populate
|
|
|
|
# inits
|
|
llm = ChatOpenAI()
|
|
embeddings = OpenAIEmbeddings()
|
|
vector_store = AstraDB(
|
|
embedding=embeddings,
|
|
collection_name="langserve_rag_demo",
|
|
token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
|
|
api_endpoint=os.environ["ASTRA_DB_API_ENDPOINT"],
|
|
namespace=os.environ.get("ASTRA_DB_KEYSPACE"),
|
|
)
|
|
retriever = vector_store.as_retriever(search_kwargs={"k": 3})
|
|
|
|
# For demo reasons, let's ensure there are rows on the vector store.
|
|
# Please remove this and/or adapt to your use case!
|
|
|
|
inserted_lines = populate(vector_store)
|
|
if inserted_lines:
|
|
print(f"Done ({inserted_lines} lines inserted).")
|
|
|
|
entomology_template = """
|
|
You are an expert entomologist, tasked with answering enthusiast biologists' questions.
|
|
You must answer based only on the provided context, do not make up any fact.
|
|
Your answers must be concise and to the point, but strive to provide scientific details
|
|
(such as family, order, Latin names, and so on when appropriate).
|
|
You MUST refuse to answer questions on other topics than entomology,
|
|
as well as questions whose answer is not found in the provided context.
|
|
|
|
CONTEXT:
|
|
{context}
|
|
|
|
QUESTION: {question}
|
|
|
|
YOUR ANSWER:"""
|
|
|
|
entomology_prompt = ChatPromptTemplate.from_template(entomology_template)
|
|
|
|
chain = (
|
|
{"context": retriever, "question": RunnablePassthrough()}
|
|
| entomology_prompt
|
|
| llm
|
|
| StrOutputParser()
|
|
)
|