From 25de47878bd48606a755a98b22caad3d34cfc2eb Mon Sep 17 00:00:00 2001 From: ccurme Date: Fri, 5 Jul 2024 14:00:23 -0400 Subject: [PATCH] langchain[patch]: deprecate AnalyzeDocumentChain (#23769) --- docs/docs/tutorials/summarization.ipynb | 11 ++-- .../chains/combine_documents/base.py | 59 +++++++++++++++++++ 2 files changed, 64 insertions(+), 6 deletions(-) diff --git a/docs/docs/tutorials/summarization.ipynb b/docs/docs/tutorials/summarization.ipynb index 20f4087ddb..410bd17f5a 100644 --- a/docs/docs/tutorials/summarization.ipynb +++ b/docs/docs/tutorials/summarization.ipynb @@ -640,7 +640,7 @@ "metadata": {}, "source": [ "## Splitting and summarizing in a single chain\n", - "For convenience, we can wrap both the text splitting of our long document and summarizing in a single `AnalyzeDocumentsChain`." + "For convenience, we can wrap both the text splitting of our long document and summarizing in a single [chain](/docs/how_to/sequence):" ] }, { @@ -650,12 +650,11 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain.chains import AnalyzeDocumentChain\n", + "def split_text(text: str):\n", + " return text_splitter.create_documents([text])\n", "\n", - "summarize_document_chain = AnalyzeDocumentChain(\n", - " combine_docs_chain=chain, text_splitter=text_splitter\n", - ")\n", - "summarize_document_chain.invoke(docs[0].page_content)" + "\n", + "summarize_document_chain = split_text | chain" ] }, { diff --git a/libs/langchain/langchain/chains/combine_documents/base.py b/libs/langchain/langchain/chains/combine_documents/base.py index 6746c9df8f..90e965996d 100644 --- a/libs/langchain/langchain/chains/combine_documents/base.py +++ b/libs/langchain/langchain/chains/combine_documents/base.py @@ -3,6 +3,7 @@ from abc import ABC, abstractmethod from typing import Any, Dict, List, Optional, Tuple, Type +from langchain_core._api import deprecated from langchain_core.callbacks import ( AsyncCallbackManagerForChainRun, CallbackManagerForChainRun, @@ -157,12 +158,70 @@ class BaseCombineDocumentsChain(Chain, ABC): return extra_return_dict +@deprecated( + since="0.2.7", + alternative=( + "example in API reference with more detail: " + "https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.base.AnalyzeDocumentChain.html" # noqa: E501 + ), + removal="1.0", +) class AnalyzeDocumentChain(Chain): """Chain that splits documents, then analyzes it in pieces. This chain is parameterized by a TextSplitter and a CombineDocumentsChain. This chain takes a single document as input, and then splits it up into chunks and then passes those chucks to the CombineDocumentsChain. + + This class is deprecated. See below for alternative implementations which + supports async and streaming modes of operation. + + If the underlying combine documents chain takes one ``input_documents`` argument + (e.g., chains generated by ``load_summarize_chain``): + + .. code-block:: python + + split_text = lambda x: text_splitter.create_documents([x]) + + summarize_document_chain = split_text | chain + + If the underlying chain takes additional arguments (e.g., ``load_qa_chain``, which + takes an additional ``question`` argument), we can use the following: + + .. code-block:: python + + from operator import itemgetter + from langchain_core.runnables import RunnableLambda, RunnableParallel + + split_text = RunnableLambda( + lambda x: text_splitter.create_documents([x]) + ) + summarize_document_chain = RunnableParallel( + question=itemgetter("question"), + input_documents=itemgetter("input_document") | split_text, + ) | chain.pick("output_text") + + To additionally return the input parameters, as ``AnalyzeDocumentChain`` does, + we can wrap this construction with ``RunnablePassthrough``: + + .. code-block:: python + + from operator import itemgetter + from langchain_core.runnables import ( + RunnableLambda, + RunnableParallel, + RunnablePassthrough, + ) + + split_text = RunnableLambda( + lambda x: text_splitter.create_documents([x]) + ) + summarize_document_chain = RunnablePassthrough.assign( + output_text=RunnableParallel( + question=itemgetter("question"), + input_documents=itemgetter("input_document") | split_text, + ) | chain.pick("output_text") + ) """ input_key: str = "input_document" #: :meta private: