langchain/templates/rag-astradb/astradb_entomology_rag/__init__.py

import os

from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
from langchain.vectorstores import AstraDB

from .populate_vector_store import populate

# inits
llm = ChatOpenAI()
embeddings = OpenAIEmbeddings()
vector_store = AstraDB(
    embedding=embeddings,
    collection_name="langserve_rag_demo",
    token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
    api_endpoint=os.environ["ASTRA_DB_API_ENDPOINT"],
    namespace=os.environ.get("ASTRA_DB_KEYSPACE"),
)
retriever = vector_store.as_retriever(search_kwargs={"k": 3})

# For demo reasons, let's ensure there are rows on the vector store.
# Please remove this and/or adapt to your use case!

inserted_lines = populate(vector_store)
if inserted_lines:
    print(f"Done ({inserted_lines} lines inserted).")

entomology_template = """
You are an expert entomologist, tasked with answering enthusiast biologists' questions.
You must answer based only on the provided context, do not make up any fact.
Your answers must be concise and to the point, but strive to provide scientific details
(such as family, order, Latin names, and so on when appropriate).
You MUST refuse to answer questions on other topics than entomology,
as well as questions whose answer is not found in the provided context.

CONTEXT:
{context}

QUESTION: {question}

YOUR ANSWER:"""

entomology_prompt = ChatPromptTemplate.from_template(entomology_template)

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | entomology_prompt
    | llm
    | StrOutputParser()
)
Add "Astra DB" vector store integration (#12966) # Astra DB Vector store integration - Description: This PR adds a `VectorStore` implementation for DataStax Astra DB using its HTTP API - Issue: (no related issue) - Dependencies: A new required dependency is `astrapy` (`>=0.5.3`) which was added to pyptoject.toml, optional, as per guidelines - Tag maintainer: I recently mentioned to @baskaryan this integration was coming - Twitter handle: `@rsprrs` if you want to mention me This PR introduces the `AstraDB` vector store class, extensive integration test coverage, a reworking of the documentation which conflates Cassandra and Astra DB on a single "provider" page and a new, completely reworked vector-store example notebook (common to the Cassandra store, since parts of the flow is shared by the two APIs). I also took care in ensuring docs (and redirects therein) are behaving correctly. All style, linting, typechecks and tests pass as far as the `AstraDB` integration is concerned. I could build the documentation and check it all right (but ran into trouble with the `api_docs_build` makefile target which I could not verify: `Error: Unable to import module 'plan_and_execute.agent_executor' with error: No module named 'langchain_experimental'` was the first of many similar errors) Thank you for a review! Stefano --------- Co-authored-by: Erick Friis <erick@langchain.dev> 2023-11-07 22:45:33 +00:00			`import os`

			`from langchain.chat_models import ChatOpenAI`
			`from langchain.embeddings import OpenAIEmbeddings`
			`from langchain.prompts import ChatPromptTemplate`
			`from langchain.schema.output_parser import StrOutputParser`
			`from langchain.schema.runnable import RunnablePassthrough`
			`from langchain.vectorstores import AstraDB`

			`from .populate_vector_store import populate`

			`# inits`
			`llm = ChatOpenAI()`
			`embeddings = OpenAIEmbeddings()`
			`vector_store = AstraDB(`
			`embedding=embeddings,`
			`collection_name="langserve_rag_demo",`
			`token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],`
			`api_endpoint=os.environ["ASTRA_DB_API_ENDPOINT"],`
			`namespace=os.environ.get("ASTRA_DB_KEYSPACE"),`
			`)`
			`retriever = vector_store.as_retriever(search_kwargs={"k": 3})`

			`# For demo reasons, let's ensure there are rows on the vector store.`
			`# Please remove this and/or adapt to your use case!`

			`inserted_lines = populate(vector_store)`
			`if inserted_lines:`
			`print(f"Done ({inserted_lines} lines inserted).")`

			`entomology_template = """`
			`You are an expert entomologist, tasked with answering enthusiast biologists' questions.`
			`You must answer based only on the provided context, do not make up any fact.`
			`Your answers must be concise and to the point, but strive to provide scientific details`
			`(such as family, order, Latin names, and so on when appropriate).`
			`You MUST refuse to answer questions on other topics than entomology,`
			`as well as questions whose answer is not found in the provided context.`

			`CONTEXT:`
			`{context}`

			`QUESTION: {question}`

			`YOUR ANSWER:"""`

			`entomology_prompt = ChatPromptTemplate.from_template(entomology_template)`

			`chain = (`
			`{"context": retriever, "question": RunnablePassthrough()}`
			`\| entomology_prompt`
			`\| llm`
			`\| StrOutputParser()`
			`)`