mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
0175906437
**Description:** This template utilizes Chroma and TGI (Text Generation Inference) to execute RAG on the Intel Xeon Scalable Processors. It serves as a demonstration for users, illustrating the deployment of the RAG service on the Intel Xeon Scalable Processors and showcasing the resulting performance enhancements. **Issue:** None **Dependencies:** The template contains the poetry project requirements to run this template. CPU TGI batching is WIP. **Twitter handle:** None --------- Signed-off-by: lvliang-intel <liang1.lv@intel.com> Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
73 lines
1.9 KiB
Python
73 lines
1.9 KiB
Python
from langchain.callbacks import streaming_stdout
|
|
from langchain_community.embeddings import HuggingFaceEmbeddings
|
|
from langchain_community.llms import HuggingFaceEndpoint
|
|
from langchain_community.vectorstores import Chroma
|
|
from langchain_core.output_parsers import StrOutputParser
|
|
from langchain_core.prompts import ChatPromptTemplate
|
|
from langchain_core.pydantic_v1 import BaseModel
|
|
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
|
|
from langchain_core.vectorstores import VectorStoreRetriever
|
|
|
|
|
|
# Make this look better in the docs.
|
|
class Question(BaseModel):
|
|
__root__: str
|
|
|
|
|
|
# Init Embeddings
|
|
embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
|
|
|
knowledge_base = Chroma(
|
|
persist_directory="/tmp/xeon_rag_db",
|
|
embedding_function=embedder,
|
|
collection_name="xeon-rag",
|
|
)
|
|
query = "What was Nike's revenue in 2023?"
|
|
docs = knowledge_base.similarity_search(query)
|
|
print(docs[0].page_content)
|
|
retriever = VectorStoreRetriever(
|
|
vectorstore=knowledge_base, search_type="mmr", search_kwargs={"k": 1, "fetch_k": 5}
|
|
)
|
|
|
|
# Define our prompt
|
|
template = """
|
|
Use the following pieces of context from retrieved
|
|
dataset to answer the question. Do not make up an answer if there is no
|
|
context provided to help answer it.
|
|
|
|
Context:
|
|
---------
|
|
{context}
|
|
|
|
---------
|
|
Question: {question}
|
|
---------
|
|
|
|
Answer:
|
|
"""
|
|
|
|
|
|
prompt = ChatPromptTemplate.from_template(template)
|
|
|
|
|
|
ENDPOINT_URL = "http://localhost:8080"
|
|
callbacks = [streaming_stdout.StreamingStdOutCallbackHandler()]
|
|
model = HuggingFaceEndpoint(
|
|
endpoint_url=ENDPOINT_URL,
|
|
max_new_tokens=512,
|
|
top_k=10,
|
|
top_p=0.95,
|
|
typical_p=0.95,
|
|
temperature=0.01,
|
|
repetition_penalty=1.03,
|
|
streaming=True,
|
|
)
|
|
|
|
# RAG Chain
|
|
chain = (
|
|
RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
|
|
| prompt
|
|
| model
|
|
| StrOutputParser()
|
|
).with_types(input_type=Question)
|