langchain/templates/cassandra-entomology-rag/cassandra_entomology_rag/__init__.py

70 lines
2.0 KiB
Python
Raw Normal View History

import os
import cassio
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
2023-10-27 02:44:30 +00:00
from langchain.schema.runnable import RunnablePassthrough
from langchain.vectorstores import Cassandra
from .populate_vector_store import populate
use_cassandra = int(os.environ.get("USE_CASSANDRA_CLUSTER", "0"))
if use_cassandra:
from .cassandra_cluster_init import get_cassandra_connection
2023-10-29 22:50:09 +00:00
session, keyspace = get_cassandra_connection()
cassio.init(
session=session,
keyspace=keyspace,
)
else:
cassio.init(
token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
database_id=os.environ["ASTRA_DB_ID"],
keyspace=os.environ.get("ASTRA_DB_KEYSPACE"),
)
# inits
llm = ChatOpenAI()
embeddings = OpenAIEmbeddings()
vector_store = Cassandra(
session=None,
keyspace=None,
embedding=embeddings,
table_name="langserve_rag_demo",
)
2023-10-29 22:50:09 +00:00
retriever = vector_store.as_retriever(search_kwargs={"k": 3})
# For demo reasons, let's ensure there are rows on the vector store.
# Please remove this and/or adapt to your use case!
inserted_lines = populate(vector_store)
if inserted_lines:
print(f"Done ({inserted_lines} lines inserted).")
entomology_template = """
You are an expert entomologist, tasked with answering enthusiast biologists' questions.
You must answer based only on the provided context, do not make up any fact.
Your answers must be concise and to the point, but strive to provide scientific details
(such as family, order, Latin names, and so on when appropriate).
You MUST refuse to answer questions on other topics than entomology,
as well as questions whose answer is not found in the provided context.
CONTEXT:
{context}
QUESTION: {question}
YOUR ANSWER:"""
entomology_prompt = ChatPromptTemplate.from_template(entomology_template)
chain = (
2023-10-29 22:50:09 +00:00
{"context": retriever, "question": RunnablePassthrough()}
| entomology_prompt
| llm
| StrOutputParser()
2023-10-29 22:50:09 +00:00
)