langchain[patch]: deprecate AnalyzeDocumentChain (#23769)

This commit is contained in:
ccurme 2024-07-05 14:00:23 -04:00 committed by GitHub
parent 42d049f618
commit 25de47878b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 64 additions and 6 deletions

View File

@ -640,7 +640,7 @@
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Splitting and summarizing in a single chain\n", "## Splitting and summarizing in a single chain\n",
"For convenience, we can wrap both the text splitting of our long document and summarizing in a single `AnalyzeDocumentsChain`." "For convenience, we can wrap both the text splitting of our long document and summarizing in a single [chain](/docs/how_to/sequence):"
] ]
}, },
{ {
@ -650,12 +650,11 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.chains import AnalyzeDocumentChain\n", "def split_text(text: str):\n",
" return text_splitter.create_documents([text])\n",
"\n", "\n",
"summarize_document_chain = AnalyzeDocumentChain(\n", "\n",
" combine_docs_chain=chain, text_splitter=text_splitter\n", "summarize_document_chain = split_text | chain"
")\n",
"summarize_document_chain.invoke(docs[0].page_content)"
] ]
}, },
{ {

View File

@ -3,6 +3,7 @@
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional, Tuple, Type from typing import Any, Dict, List, Optional, Tuple, Type
from langchain_core._api import deprecated
from langchain_core.callbacks import ( from langchain_core.callbacks import (
AsyncCallbackManagerForChainRun, AsyncCallbackManagerForChainRun,
CallbackManagerForChainRun, CallbackManagerForChainRun,
@ -157,12 +158,70 @@ class BaseCombineDocumentsChain(Chain, ABC):
return extra_return_dict return extra_return_dict
@deprecated(
since="0.2.7",
alternative=(
"example in API reference with more detail: "
"https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.base.AnalyzeDocumentChain.html" # noqa: E501
),
removal="1.0",
)
class AnalyzeDocumentChain(Chain): class AnalyzeDocumentChain(Chain):
"""Chain that splits documents, then analyzes it in pieces. """Chain that splits documents, then analyzes it in pieces.
This chain is parameterized by a TextSplitter and a CombineDocumentsChain. This chain is parameterized by a TextSplitter and a CombineDocumentsChain.
This chain takes a single document as input, and then splits it up into chunks This chain takes a single document as input, and then splits it up into chunks
and then passes those chucks to the CombineDocumentsChain. and then passes those chucks to the CombineDocumentsChain.
This class is deprecated. See below for alternative implementations which
supports async and streaming modes of operation.
If the underlying combine documents chain takes one ``input_documents`` argument
(e.g., chains generated by ``load_summarize_chain``):
.. code-block:: python
split_text = lambda x: text_splitter.create_documents([x])
summarize_document_chain = split_text | chain
If the underlying chain takes additional arguments (e.g., ``load_qa_chain``, which
takes an additional ``question`` argument), we can use the following:
.. code-block:: python
from operator import itemgetter
from langchain_core.runnables import RunnableLambda, RunnableParallel
split_text = RunnableLambda(
lambda x: text_splitter.create_documents([x])
)
summarize_document_chain = RunnableParallel(
question=itemgetter("question"),
input_documents=itemgetter("input_document") | split_text,
) | chain.pick("output_text")
To additionally return the input parameters, as ``AnalyzeDocumentChain`` does,
we can wrap this construction with ``RunnablePassthrough``:
.. code-block:: python
from operator import itemgetter
from langchain_core.runnables import (
RunnableLambda,
RunnableParallel,
RunnablePassthrough,
)
split_text = RunnableLambda(
lambda x: text_splitter.create_documents([x])
)
summarize_document_chain = RunnablePassthrough.assign(
output_text=RunnableParallel(
question=itemgetter("question"),
input_documents=itemgetter("input_document") | split_text,
) | chain.pick("output_text")
)
""" """
input_key: str = "input_document" #: :meta private: input_key: str = "input_document" #: :meta private: