Add RAG template for Timescale Vector (#12651)
<!-- Thank you for contributing to LangChain!
Replace this entire comment with:
- **Description:** a description of the change,
- **Issue:** the issue # it fixes (if applicable),
- **Dependencies:** any dependencies required for this change,
- **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below),
- **Twitter handle:** we announce bigger features on Twitter. If your PR
gets announced, and you'd like a mention, we'll gladly shout you out!
Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.
See contribution guidelines for more information on how to write/run
tests, lint, etc:
https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md
If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in `docs/extras`
directory.
If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
-->
---------
Co-authored-by: Matvey Arye <mat@timescale.com>
2023-10-31 16:56:29 +00:00
|
|
|
# ruff: noqa: E501
|
|
|
|
|
|
|
|
import os
|
|
|
|
from datetime import timedelta
|
|
|
|
|
|
|
|
from langchain.chains.query_constructor.base import AttributeInfo
|
|
|
|
from langchain.retrievers.self_query.base import SelfQueryRetriever
|
2024-01-02 20:32:16 +00:00
|
|
|
from langchain_community.chat_models import ChatOpenAI
|
|
|
|
from langchain_community.embeddings.openai import OpenAIEmbeddings
|
|
|
|
from langchain_community.llms import OpenAI
|
2024-01-02 21:47:11 +00:00
|
|
|
from langchain_community.vectorstores.timescalevector import TimescaleVector
|
docs[patch], templates[patch]: Import from core (#14575)
Update imports to use core for the low-hanging fruit changes. Ran
following
```bash
git grep -l 'langchain.schema.runnable' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.runnable/langchain_core.runnables/g'
git grep -l 'langchain.schema.output_parser' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.output_parser/langchain_core.output_parsers/g'
git grep -l 'langchain.schema.messages' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.messages/langchain_core.messages/g'
git grep -l 'langchain.schema.chat_histry' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.chat_history/langchain_core.chat_history/g'
git grep -l 'langchain.schema.prompt_template' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.prompt_template/langchain_core.prompts/g'
git grep -l 'from langchain.pydantic_v1' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.pydantic_v1/from langchain_core.pydantic_v1/g'
git grep -l 'from langchain.tools.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.tools\.base/from langchain_core.tools/g'
git grep -l 'from langchain.chat_models.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.chat_models.base/from langchain_core.language_models.chat_models/g'
git grep -l 'from langchain.llms.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.llms\.base\ /from langchain_core.language_models.llms\ /g'
git grep -l 'from langchain.embeddings.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.embeddings\.base/from langchain_core.embeddings/g'
git grep -l 'from langchain.vectorstores.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.vectorstores\.base/from langchain_core.vectorstores/g'
git grep -l 'from langchain.agents.tools' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.agents\.tools/from langchain_core.tools/g'
git grep -l 'from langchain.schema.output' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.output\ /from langchain_core.outputs\ /g'
git grep -l 'from langchain.schema.embeddings' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.embeddings/from langchain_core.embeddings/g'
git grep -l 'from langchain.schema.document' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.document/from langchain_core.documents/g'
git grep -l 'from langchain.schema.agent' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.agent/from langchain_core.agents/g'
git grep -l 'from langchain.schema.prompt ' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.prompt\ /from langchain_core.prompt_values /g'
git grep -l 'from langchain.schema.language_model' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.language_model/from langchain_core.language_models/g'
```
2023-12-12 00:49:10 +00:00
|
|
|
from langchain_core.output_parsers import StrOutputParser
|
2024-01-03 21:28:05 +00:00
|
|
|
from langchain_core.prompts import ChatPromptTemplate
|
2023-12-12 23:31:14 +00:00
|
|
|
from langchain_core.pydantic_v1 import BaseModel
|
docs[patch], templates[patch]: Import from core (#14575)
Update imports to use core for the low-hanging fruit changes. Ran
following
```bash
git grep -l 'langchain.schema.runnable' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.runnable/langchain_core.runnables/g'
git grep -l 'langchain.schema.output_parser' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.output_parser/langchain_core.output_parsers/g'
git grep -l 'langchain.schema.messages' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.messages/langchain_core.messages/g'
git grep -l 'langchain.schema.chat_histry' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.chat_history/langchain_core.chat_history/g'
git grep -l 'langchain.schema.prompt_template' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.prompt_template/langchain_core.prompts/g'
git grep -l 'from langchain.pydantic_v1' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.pydantic_v1/from langchain_core.pydantic_v1/g'
git grep -l 'from langchain.tools.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.tools\.base/from langchain_core.tools/g'
git grep -l 'from langchain.chat_models.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.chat_models.base/from langchain_core.language_models.chat_models/g'
git grep -l 'from langchain.llms.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.llms\.base\ /from langchain_core.language_models.llms\ /g'
git grep -l 'from langchain.embeddings.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.embeddings\.base/from langchain_core.embeddings/g'
git grep -l 'from langchain.vectorstores.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.vectorstores\.base/from langchain_core.vectorstores/g'
git grep -l 'from langchain.agents.tools' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.agents\.tools/from langchain_core.tools/g'
git grep -l 'from langchain.schema.output' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.output\ /from langchain_core.outputs\ /g'
git grep -l 'from langchain.schema.embeddings' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.embeddings/from langchain_core.embeddings/g'
git grep -l 'from langchain.schema.document' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.document/from langchain_core.documents/g'
git grep -l 'from langchain.schema.agent' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.agent/from langchain_core.agents/g'
git grep -l 'from langchain.schema.prompt ' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.prompt\ /from langchain_core.prompt_values /g'
git grep -l 'from langchain.schema.language_model' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.language_model/from langchain_core.language_models/g'
```
2023-12-12 00:49:10 +00:00
|
|
|
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
|
Add RAG template for Timescale Vector (#12651)
<!-- Thank you for contributing to LangChain!
Replace this entire comment with:
- **Description:** a description of the change,
- **Issue:** the issue # it fixes (if applicable),
- **Dependencies:** any dependencies required for this change,
- **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below),
- **Twitter handle:** we announce bigger features on Twitter. If your PR
gets announced, and you'd like a mention, we'll gladly shout you out!
Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.
See contribution guidelines for more information on how to write/run
tests, lint, etc:
https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md
If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in `docs/extras`
directory.
If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
-->
---------
Co-authored-by: Matvey Arye <mat@timescale.com>
2023-10-31 16:56:29 +00:00
|
|
|
|
|
|
|
from .load_sample_dataset import load_ts_git_dataset
|
|
|
|
|
|
|
|
# to enable debug uncomment the following lines:
|
|
|
|
# from langchain.globals import set_debug
|
|
|
|
# set_debug(True)
|
|
|
|
|
|
|
|
# from dotenv import find_dotenv, load_dotenv
|
|
|
|
# _ = load_dotenv(find_dotenv())
|
|
|
|
|
|
|
|
if os.environ.get("TIMESCALE_SERVICE_URL", None) is None:
|
|
|
|
raise Exception("Missing `TIMESCALE_SERVICE_URL` environment variable.")
|
|
|
|
|
|
|
|
SERVICE_URL = os.environ["TIMESCALE_SERVICE_URL"]
|
|
|
|
LOAD_SAMPLE_DATA = os.environ.get("LOAD_SAMPLE_DATA", False)
|
|
|
|
|
|
|
|
|
|
|
|
# DATASET SPECIFIC CODE
|
|
|
|
# Load the sample dataset. You will have to change this to load your own dataset.
|
|
|
|
collection_name = "timescale_commits"
|
|
|
|
partition_interval = timedelta(days=7)
|
|
|
|
if LOAD_SAMPLE_DATA:
|
|
|
|
load_ts_git_dataset(
|
|
|
|
SERVICE_URL,
|
|
|
|
collection_name=collection_name,
|
|
|
|
num_records=500,
|
|
|
|
partition_interval=partition_interval,
|
|
|
|
)
|
|
|
|
|
|
|
|
# This will change depending on the metadata stored in your dataset.
|
|
|
|
document_content_description = "The git log commit summary containing the commit hash, author, date of commit, change summary and change details"
|
|
|
|
metadata_field_info = [
|
|
|
|
AttributeInfo(
|
|
|
|
name="id",
|
|
|
|
description="A UUID v1 generated from the date of the commit",
|
|
|
|
type="uuid",
|
|
|
|
),
|
|
|
|
AttributeInfo(
|
|
|
|
# This is a special attribute represent the timestamp of the uuid.
|
|
|
|
name="__uuid_timestamp",
|
|
|
|
description="The timestamp of the commit. Specify in YYYY-MM-DDTHH::MM:SSZ format",
|
|
|
|
type="datetime.datetime",
|
|
|
|
),
|
|
|
|
AttributeInfo(
|
|
|
|
name="author_name",
|
|
|
|
description="The name of the author of the commit",
|
|
|
|
type="string",
|
|
|
|
),
|
|
|
|
AttributeInfo(
|
|
|
|
name="author_email",
|
|
|
|
description="The email address of the author of the commit",
|
|
|
|
type="string",
|
|
|
|
),
|
|
|
|
]
|
|
|
|
# END DATASET SPECIFIC CODE
|
|
|
|
|
|
|
|
embeddings = OpenAIEmbeddings()
|
|
|
|
vectorstore = TimescaleVector(
|
|
|
|
embedding=embeddings,
|
|
|
|
collection_name=collection_name,
|
|
|
|
service_url=SERVICE_URL,
|
|
|
|
time_partition_interval=partition_interval,
|
|
|
|
)
|
|
|
|
|
|
|
|
llm = OpenAI(temperature=0)
|
|
|
|
retriever = SelfQueryRetriever.from_llm(
|
|
|
|
llm,
|
|
|
|
vectorstore,
|
|
|
|
document_content_description,
|
|
|
|
metadata_field_info,
|
|
|
|
enable_limit=True,
|
|
|
|
verbose=True,
|
|
|
|
)
|
|
|
|
|
|
|
|
template = """Answer the question based only on the following context:
|
|
|
|
{context}
|
|
|
|
|
|
|
|
Question: {question}
|
|
|
|
"""
|
|
|
|
prompt = ChatPromptTemplate.from_template(template)
|
|
|
|
|
|
|
|
model = ChatOpenAI(temperature=0, model="gpt-4")
|
|
|
|
|
|
|
|
# RAG chain
|
|
|
|
chain = (
|
|
|
|
RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
|
|
|
|
| prompt
|
|
|
|
| model
|
|
|
|
| StrOutputParser()
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
class Question(BaseModel):
|
|
|
|
__root__: str
|
|
|
|
|
|
|
|
|
|
|
|
chain = chain.with_types(input_type=Question)
|