langchain[patch]: runnable agent streaming param (#18761)

Usage: ```python agent = RunnableAgent(runnable=runnable, .., stream_runnable=False) ``` or for convenience ```python agent_executor = AgentExecutor(agent=agent, ..., stream_runnable=False) ```
7 months ago · bc6249c889
parent c8c592d3f1
commit bc6249c889
1 changed files with 87 additions and 46 deletions
--- a/libs/langchain/langchain/agents/agent.py
+++ b/libs/langchain/langchain/agents/agent.py
@ -345,6 +345,14 @@ class RunnableAgent(BaseSingleActionAgent):
    """Runnable to call to get agent action."""
    input_keys_arg: List[str] = []
    return_keys_arg: List[str] = []
    stream_runnable: bool = True
    """Whether to stream from the runnable or not. 
    If True then underlying LLM is invoked in a streaming fashion to make it possible 
        to get access to the individual LLM tokens when using stream_log with the Agent 
        Executor. If False then LLM is invoked in a non-streaming fashion and 
        individual LLM tokens will not be available in stream_log.
    """
    class Config:
        """Configuration for this pydantic object."""
@ -378,17 +386,21 @@ class RunnableAgent(BaseSingleActionAgent):
            Action specifying what tool to use.
        """
        inputs = {**kwargs, **{"intermediate_steps": intermediate_steps}}
        # Use streaming to make sure that the underlying LLM is invoked in a streaming
        # fashion to make it possible to get access to the individual LLM tokens
        # when using stream_log with the Agent Executor.
        # Because the response from the plan is not a generator, we need to
        # accumulate the output into final output and return that.
        final_output: Any = None
-        for chunk in self.runnable.stream(inputs, config={"callbacks": callbacks}):
+        if self.stream_runnable:
-            if final_output is None:
+            # Use streaming to make sure that the underlying LLM is invoked in a
-                final_output = chunk
+            # streaming
-            else:
+            # fashion to make it possible to get access to the individual LLM tokens
-                final_output += chunk
+            # when using stream_log with the Agent Executor.
            # Because the response from the plan is not a generator, we need to
            # accumulate the output into final output and return that.
            for chunk in self.runnable.stream(inputs, config={"callbacks": callbacks}):
                if final_output is None:
                    final_output = chunk
                else:
                    final_output += chunk
        else:
            final_output = self.runnable.invoke(inputs, config={"callbacks": callbacks})
        return final_output
@ -414,18 +426,24 @@ class RunnableAgent(BaseSingleActionAgent):
        """
        inputs = {**kwargs, **{"intermediate_steps": intermediate_steps}}
        final_output: Any = None
-        # Use streaming to make sure that the underlying LLM is invoked in a streaming
+        if self.stream_runnable:
-        # fashion to make it possible to get access to the individual LLM tokens
+            # Use streaming to make sure that the underlying LLM is invoked in a
-        # when using stream_log with the Agent Executor.
+            # streaming
-        # Because the response from the plan is not a generator, we need to
+            # fashion to make it possible to get access to the individual LLM tokens
-        # accumulate the output into final output and return that.
+            # when using stream_log with the Agent Executor.
-        async for chunk in self.runnable.astream(
+            # Because the response from the plan is not a generator, we need to
-            inputs, config={"callbacks": callbacks}
+            # accumulate the output into final output and return that.
-        ):
+            async for chunk in self.runnable.astream(
-            if final_output is None:
+                inputs, config={"callbacks": callbacks}
-                final_output = chunk
+            ):
-            else:
+                if final_output is None:
-                final_output += chunk
+                    final_output = chunk
                else:
                    final_output += chunk
        else:
            final_output = await self.runnable.ainvoke(
                inputs, config={"callbacks": callbacks}
            )
        return final_output
@ -436,6 +454,14 @@ class RunnableMultiActionAgent(BaseMultiActionAgent):
    """Runnable to call to get agent actions."""
    input_keys_arg: List[str] = []
    return_keys_arg: List[str] = []
    stream_runnable: bool = True
    """Whether to stream from the runnable or not. 
    If True then underlying LLM is invoked in a streaming fashion to make it possible 
        to get access to the individual LLM tokens when using stream_log with the Agent 
        Executor. If False then LLM is invoked in a non-streaming fashion and 
        individual LLM tokens will not be available in stream_log.
    """
    class Config:
        """Configuration for this pydantic object."""
@ -477,17 +503,21 @@ class RunnableMultiActionAgent(BaseMultiActionAgent):
            Action specifying what tool to use.
        """
        inputs = {**kwargs, **{"intermediate_steps": intermediate_steps}}
        # Use streaming to make sure that the underlying LLM is invoked in a streaming
        # fashion to make it possible to get access to the individual LLM tokens
        # when using stream_log with the Agent Executor.
        # Because the response from the plan is not a generator, we need to
        # accumulate the output into final output and return that.
        final_output: Any = None
-        for chunk in self.runnable.stream(inputs, config={"callbacks": callbacks}):
+        if self.stream_runnable:
-            if final_output is None:
+            # Use streaming to make sure that the underlying LLM is invoked in a
-                final_output = chunk
+            # streaming
-            else:
+            # fashion to make it possible to get access to the individual LLM tokens
-                final_output += chunk
+            # when using stream_log with the Agent Executor.
            # Because the response from the plan is not a generator, we need to
            # accumulate the output into final output and return that.
            for chunk in self.runnable.stream(inputs, config={"callbacks": callbacks}):
                if final_output is None:
                    final_output = chunk
                else:
                    final_output += chunk
        else:
            final_output = self.runnable.invoke(inputs, config={"callbacks": callbacks})
        return final_output
@ -512,19 +542,25 @@ class RunnableMultiActionAgent(BaseMultiActionAgent):
            Action specifying what tool to use.
        """
        inputs = {**kwargs, **{"intermediate_steps": intermediate_steps}}
        # Use streaming to make sure that the underlying LLM is invoked in a streaming
        # fashion to make it possible to get access to the individual LLM tokens
        # when using stream_log with the Agent Executor.
        # Because the response from the plan is not a generator, we need to
        # accumulate the output into final output and return that.
        final_output: Any = None
-        async for chunk in self.runnable.astream(
+        if self.stream_runnable:
-            inputs, config={"callbacks": callbacks}
+            # Use streaming to make sure that the underlying LLM is invoked in a
-        ):
+            # streaming
-            if final_output is None:
+            # fashion to make it possible to get access to the individual LLM tokens
-                final_output = chunk
+            # when using stream_log with the Agent Executor.
-            else:
+            # Because the response from the plan is not a generator, we need to
-                final_output += chunk
+            # accumulate the output into final output and return that.
            async for chunk in self.runnable.astream(
                inputs, config={"callbacks": callbacks}
            ):
                if final_output is None:
                    final_output = chunk
                else:
                    final_output += chunk
        else:
            final_output = await self.runnable.ainvoke(
                inputs, config={"callbacks": callbacks}
            )
        return final_output
@ -977,10 +1013,15 @@ class AgentExecutor(Chain):
            else:
                multi_action = output_type == Union[List[AgentAction], AgentFinish]
            stream_runnable = values.pop("stream_runnable", True)
            if multi_action:
-                values["agent"] = RunnableMultiActionAgent(runnable=agent)
+                values["agent"] = RunnableMultiActionAgent(
                    runnable=agent, stream_runnable=stream_runnable
                )
            else:
-                values["agent"] = RunnableAgent(runnable=agent)
+                values["agent"] = RunnableAgent(
                    runnable=agent, stream_runnable=stream_runnable
                )
        return values
    def save(self, file_path: Union[Path, str]) -> None: