langchain/templates/rag-timescale-hybrid-search-time/rag_timescale_hybrid_search_time/chain.py

# ruff: noqa: E501

import os
from datetime import timedelta

from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_community.chat_models import ChatOpenAI
from langchain_community.embeddings.openai import OpenAIEmbeddings
from langchain_community.llms import OpenAI
from langchain_community.vectorstores.timescalevector import TimescaleVector
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

from .load_sample_dataset import load_ts_git_dataset

# to enable debug uncomment the following lines:
# from langchain.globals import set_debug
# set_debug(True)

# from dotenv import find_dotenv, load_dotenv
# _ = load_dotenv(find_dotenv())

if os.environ.get("TIMESCALE_SERVICE_URL", None) is None:
    raise Exception("Missing `TIMESCALE_SERVICE_URL` environment variable.")

SERVICE_URL = os.environ["TIMESCALE_SERVICE_URL"]
LOAD_SAMPLE_DATA = os.environ.get("LOAD_SAMPLE_DATA", False)


# DATASET SPECIFIC CODE
# Load the sample dataset. You will have to change this to load your own dataset.
collection_name = "timescale_commits"
partition_interval = timedelta(days=7)
if LOAD_SAMPLE_DATA:
    load_ts_git_dataset(
        SERVICE_URL,
        collection_name=collection_name,
        num_records=500,
        partition_interval=partition_interval,
    )

# This will change depending on the metadata stored in your dataset.
document_content_description = "The git log commit summary containing the commit hash, author, date of commit, change summary and change details"
metadata_field_info = [
    AttributeInfo(
        name="id",
        description="A UUID v1 generated from the date of the commit",
        type="uuid",
    ),
    AttributeInfo(
        # This is a special attribute represent the timestamp of the uuid.
        name="__uuid_timestamp",
        description="The timestamp of the commit. Specify in YYYY-MM-DDTHH::MM:SSZ format",
        type="datetime.datetime",
    ),
    AttributeInfo(
        name="author_name",
        description="The name of the author of the commit",
        type="string",
    ),
    AttributeInfo(
        name="author_email",
        description="The email address of the author of the commit",
        type="string",
    ),
]
# END DATASET SPECIFIC CODE

embeddings = OpenAIEmbeddings()
vectorstore = TimescaleVector(
    embedding=embeddings,
    collection_name=collection_name,
    service_url=SERVICE_URL,
    time_partition_interval=partition_interval,
)

llm = OpenAI(temperature=0)
retriever = SelfQueryRetriever.from_llm(
    llm,
    vectorstore,
    document_content_description,
    metadata_field_info,
    enable_limit=True,
    verbose=True,
)

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

model = ChatOpenAI(temperature=0, model="gpt-4")

# RAG chain
chain = (
    RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
    | prompt
    | model
    | StrOutputParser()
)


class Question(BaseModel):
    __root__: str


chain = chain.with_types(input_type=Question)
Add RAG template for Timescale Vector (#12651) <!-- Thank you for contributing to LangChain! Replace this entire comment with: - Description: a description of the change, - Issue: the issue # it fixes (if applicable), - Dependencies: any dependencies required for this change, - Tag maintainer: for a quicker response, tag the relevant maintainer (see below), - Twitter handle: we announce bigger features on Twitter. If your PR gets announced, and you'd like a mention, we'll gladly shout you out! Please make sure your PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` to check this locally. See contribution guidelines for more information on how to write/run tests, lint, etc: https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md If you're adding a new integration, please include: 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/extras` directory. If no one reviews your PR within a few days, please @-mention one of @baskaryan, @eyurtsev, @hwchase17. --> --------- Co-authored-by: Matvey Arye <mat@timescale.com> 2023-10-31 16:56:29 +00:00			`# ruff: noqa: E501`

			`import os`
			`from datetime import timedelta`

			`from langchain.chains.query_constructor.base import AttributeInfo`
			`from langchain.retrievers.self_query.base import SelfQueryRetriever`
docs, community[patch], experimental[patch], langchain[patch], cli[pa… (#15412) …tch]: import models from community ran ```bash git grep -l 'from langchain\.chat_models' \| xargs -L 1 sed -i '' "s/from\ langchain\.chat_models/from\ langchain_community.chat_models/g" git grep -l 'from langchain\.llms' \| xargs -L 1 sed -i '' "s/from\ langchain\.llms/from\ langchain_community.llms/g" git grep -l 'from langchain\.embeddings' \| xargs -L 1 sed -i '' "s/from\ langchain\.embeddings/from\ langchain_community.embeddings/g" git checkout master libs/langchain/tests/unit_tests/llms git checkout master libs/langchain/tests/unit_tests/chat_models git checkout master libs/langchain/tests/unit_tests/embeddings/test_imports.py make format cd libs/langchain; make format cd ../experimental; make format cd ../core; make format ``` 2024-01-02 20:32:16 +00:00			`from langchain_community.chat_models import ChatOpenAI`
			`from langchain_community.embeddings.openai import OpenAIEmbeddings`
			`from langchain_community.llms import OpenAI`
docs, experimental[patch], langchain[patch], community[patch]: update storage imports (#15429) ran ```bash g grep -l "langchain.vectorstores" \| xargs -L 1 sed -i '' "s/langchain\.vectorstores/langchain_community.vectorstores/g" g grep -l "langchain.document_loaders" \| xargs -L 1 sed -i '' "s/langchain\.document_loaders/langchain_community.document_loaders/g" g grep -l "langchain.chat_loaders" \| xargs -L 1 sed -i '' "s/langchain\.chat_loaders/langchain_community.chat_loaders/g" g grep -l "langchain.document_transformers" \| xargs -L 1 sed -i '' "s/langchain\.document_transformers/langchain_community.document_transformers/g" g grep -l "langchain\.graphs" \| xargs -L 1 sed -i '' "s/langchain\.graphs/langchain_community.graphs/g" g grep -l "langchain\.memory\.chat_message_histories" \| xargs -L 1 sed -i '' "s/langchain\.memory\.chat_message_histories/langchain_community.chat_message_histories/g" gco master libs/langchain/tests/unit_tests//test_imports.py gco master libs/langchain/tests/unit_tests/*/test_public_api.py ``` 2024-01-02 21:47:11 +00:00			`from langchain_community.vectorstores.timescalevector import TimescaleVector`
docs[patch], templates[patch]: Import from core (#14575) Update imports to use core for the low-hanging fruit changes. Ran following ```bash git grep -l 'langchain.schema.runnable' {docs,templates,cookbook} \| xargs sed -i '' 's/langchain\.schema\.runnable/langchain_core.runnables/g' git grep -l 'langchain.schema.output_parser' {docs,templates,cookbook} \| xargs sed -i '' 's/langchain\.schema\.output_parser/langchain_core.output_parsers/g' git grep -l 'langchain.schema.messages' {docs,templates,cookbook} \| xargs sed -i '' 's/langchain\.schema\.messages/langchain_core.messages/g' git grep -l 'langchain.schema.chat_histry' {docs,templates,cookbook} \| xargs sed -i '' 's/langchain\.schema\.chat_history/langchain_core.chat_history/g' git grep -l 'langchain.schema.prompt_template' {docs,templates,cookbook} \| xargs sed -i '' 's/langchain\.schema\.prompt_template/langchain_core.prompts/g' git grep -l 'from langchain.pydantic_v1' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.pydantic_v1/from langchain_core.pydantic_v1/g' git grep -l 'from langchain.tools.base' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.tools\.base/from langchain_core.tools/g' git grep -l 'from langchain.chat_models.base' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.chat_models.base/from langchain_core.language_models.chat_models/g' git grep -l 'from langchain.llms.base' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.llms\.base\ /from langchain_core.language_models.llms\ /g' git grep -l 'from langchain.embeddings.base' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.embeddings\.base/from langchain_core.embeddings/g' git grep -l 'from langchain.vectorstores.base' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.vectorstores\.base/from langchain_core.vectorstores/g' git grep -l 'from langchain.agents.tools' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.agents\.tools/from langchain_core.tools/g' git grep -l 'from langchain.schema.output' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.schema\.output\ /from langchain_core.outputs\ /g' git grep -l 'from langchain.schema.embeddings' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.schema\.embeddings/from langchain_core.embeddings/g' git grep -l 'from langchain.schema.document' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.schema\.document/from langchain_core.documents/g' git grep -l 'from langchain.schema.agent' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.schema\.agent/from langchain_core.agents/g' git grep -l 'from langchain.schema.prompt ' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.schema\.prompt\ /from langchain_core.prompt_values /g' git grep -l 'from langchain.schema.language_model' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.schema\.language_model/from langchain_core.language_models/g' ``` 2023-12-12 00:49:10 +00:00			`from langchain_core.output_parsers import StrOutputParser`
templates: fix deps (#15439) 2024-01-03 21:28:05 +00:00			`from langchain_core.prompts import ChatPromptTemplate`
templates[patch]: fix pydantic imports (#14632) 2023-12-12 23:31:14 +00:00			`from langchain_core.pydantic_v1 import BaseModel`
docs[patch], templates[patch]: Import from core (#14575) Update imports to use core for the low-hanging fruit changes. Ran following ```bash git grep -l 'langchain.schema.runnable' {docs,templates,cookbook} \| xargs sed -i '' 's/langchain\.schema\.runnable/langchain_core.runnables/g' git grep -l 'langchain.schema.output_parser' {docs,templates,cookbook} \| xargs sed -i '' 's/langchain\.schema\.output_parser/langchain_core.output_parsers/g' git grep -l 'langchain.schema.messages' {docs,templates,cookbook} \| xargs sed -i '' 's/langchain\.schema\.messages/langchain_core.messages/g' git grep -l 'langchain.schema.chat_histry' {docs,templates,cookbook} \| xargs sed -i '' 's/langchain\.schema\.chat_history/langchain_core.chat_history/g' git grep -l 'langchain.schema.prompt_template' {docs,templates,cookbook} \| xargs sed -i '' 's/langchain\.schema\.prompt_template/langchain_core.prompts/g' git grep -l 'from langchain.pydantic_v1' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.pydantic_v1/from langchain_core.pydantic_v1/g' git grep -l 'from langchain.tools.base' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.tools\.base/from langchain_core.tools/g' git grep -l 'from langchain.chat_models.base' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.chat_models.base/from langchain_core.language_models.chat_models/g' git grep -l 'from langchain.llms.base' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.llms\.base\ /from langchain_core.language_models.llms\ /g' git grep -l 'from langchain.embeddings.base' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.embeddings\.base/from langchain_core.embeddings/g' git grep -l 'from langchain.vectorstores.base' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.vectorstores\.base/from langchain_core.vectorstores/g' git grep -l 'from langchain.agents.tools' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.agents\.tools/from langchain_core.tools/g' git grep -l 'from langchain.schema.output' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.schema\.output\ /from langchain_core.outputs\ /g' git grep -l 'from langchain.schema.embeddings' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.schema\.embeddings/from langchain_core.embeddings/g' git grep -l 'from langchain.schema.document' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.schema\.document/from langchain_core.documents/g' git grep -l 'from langchain.schema.agent' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.schema\.agent/from langchain_core.agents/g' git grep -l 'from langchain.schema.prompt ' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.schema\.prompt\ /from langchain_core.prompt_values /g' git grep -l 'from langchain.schema.language_model' {docs,templates,cookbook} \| xargs sed -i '' 's/from langchain\.schema\.language_model/from langchain_core.language_models/g' ``` 2023-12-12 00:49:10 +00:00			`from langchain_core.runnables import RunnableParallel, RunnablePassthrough`
Add RAG template for Timescale Vector (#12651) <!-- Thank you for contributing to LangChain! Replace this entire comment with: - Description: a description of the change, - Issue: the issue # it fixes (if applicable), - Dependencies: any dependencies required for this change, - Tag maintainer: for a quicker response, tag the relevant maintainer (see below), - Twitter handle: we announce bigger features on Twitter. If your PR gets announced, and you'd like a mention, we'll gladly shout you out! Please make sure your PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` to check this locally. See contribution guidelines for more information on how to write/run tests, lint, etc: https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md If you're adding a new integration, please include: 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/extras` directory. If no one reviews your PR within a few days, please @-mention one of @baskaryan, @eyurtsev, @hwchase17. --> --------- Co-authored-by: Matvey Arye <mat@timescale.com> 2023-10-31 16:56:29 +00:00
			`from .load_sample_dataset import load_ts_git_dataset`

			`# to enable debug uncomment the following lines:`
			`# from langchain.globals import set_debug`
			`# set_debug(True)`

			`# from dotenv import find_dotenv, load_dotenv`
			`# _ = load_dotenv(find_dotenv())`

			`if os.environ.get("TIMESCALE_SERVICE_URL", None) is None:`
			raise Exception("Missing `TIMESCALE_SERVICE_URL` environment variable.")

			`SERVICE_URL = os.environ["TIMESCALE_SERVICE_URL"]`
			`LOAD_SAMPLE_DATA = os.environ.get("LOAD_SAMPLE_DATA", False)`


			`# DATASET SPECIFIC CODE`
			`# Load the sample dataset. You will have to change this to load your own dataset.`
			`collection_name = "timescale_commits"`
			`partition_interval = timedelta(days=7)`
			`if LOAD_SAMPLE_DATA:`
			`load_ts_git_dataset(`
			`SERVICE_URL,`
			`collection_name=collection_name,`
			`num_records=500,`
			`partition_interval=partition_interval,`
			`)`

			`# This will change depending on the metadata stored in your dataset.`
			`document_content_description = "The git log commit summary containing the commit hash, author, date of commit, change summary and change details"`
			`metadata_field_info = [`
			`AttributeInfo(`
			`name="id",`
			`description="A UUID v1 generated from the date of the commit",`
			`type="uuid",`
			`),`
			`AttributeInfo(`
			`# This is a special attribute represent the timestamp of the uuid.`
			`name="__uuid_timestamp",`
			`description="The timestamp of the commit. Specify in YYYY-MM-DDTHH::MM:SSZ format",`
			`type="datetime.datetime",`
			`),`
			`AttributeInfo(`
			`name="author_name",`
			`description="The name of the author of the commit",`
			`type="string",`
			`),`
			`AttributeInfo(`
			`name="author_email",`
			`description="The email address of the author of the commit",`
			`type="string",`
			`),`
			`]`
			`# END DATASET SPECIFIC CODE`

			`embeddings = OpenAIEmbeddings()`
			`vectorstore = TimescaleVector(`
			`embedding=embeddings,`
			`collection_name=collection_name,`
			`service_url=SERVICE_URL,`
			`time_partition_interval=partition_interval,`
			`)`

			`llm = OpenAI(temperature=0)`
			`retriever = SelfQueryRetriever.from_llm(`
			`llm,`
			`vectorstore,`
			`document_content_description,`
			`metadata_field_info,`
			`enable_limit=True,`
			`verbose=True,`
			`)`

			`template = """Answer the question based only on the following context:`
			`{context}`

			`Question: {question}`
			`"""`
			`prompt = ChatPromptTemplate.from_template(template)`

			`model = ChatOpenAI(temperature=0, model="gpt-4")`

			`# RAG chain`
			`chain = (`
			`RunnableParallel({"context": retriever, "question": RunnablePassthrough()})`
			`\| prompt`
			`\| model`
			`\| StrOutputParser()`
			`)`


			`class Question(BaseModel):`
			`__root__: str`


			`chain = chain.with_types(input_type=Question)`