openai[patch]: wrap stream code in context manager blocks (#18013)

**Description:** Use the `Stream` context managers in `ChatOpenAi` `stream` and `astream` method. Using the context manager returned by the OpenAI client makes it possible to terminate the stream early since the response connection will be closed when the context manager exists. **Issue:** #5340 **Twitter handle:** @snopoke --------- Co-authored-by: Bagatur <baskaryan@gmail.com> Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
6 months ago · a682f0d12b
parent 6c11c8dac6
commit a682f0d12b
1 changed files with 54 additions and 51 deletions
--- a/libs/partners/openai/langchain_openai/chat_models/base.py
+++ b/libs/partners/openai/langchain_openai/chat_models/base.py
@ -457,7 +457,8 @@ class ChatOpenAI(BaseChatModel):
        params = {**params, **kwargs, "stream": True}

        default_chunk_class = AIMessageChunk
-        for chunk in self.client.create(messages=message_dicts, **params):
+        with self.client.create(messages=message_dicts, **params) as response:
+            for chunk in response:
                if not isinstance(chunk, dict):
                    chunk = chunk.model_dump()
                if len(chunk["choices"]) == 0:
@ -479,7 +480,9 @@ class ChatOpenAI(BaseChatModel):
                    message=chunk, generation_info=generation_info or None
                )
                if run_manager:
-                run_manager.on_llm_new_token(chunk.text, chunk=chunk, logprobs=logprobs)
+                    run_manager.on_llm_new_token(
+                        chunk.text, chunk=chunk, logprobs=logprobs
+                    )
                yield chunk

    def _generate(
@ -553,9 +556,9 @@ class ChatOpenAI(BaseChatModel):
        params = {**params, **kwargs, "stream": True}

        default_chunk_class = AIMessageChunk
-        async for chunk in await self.async_client.create(
-            messages=message_dicts, **params
-        ):
+        response = await self.async_client.create(messages=message_dicts, **params)
+        async with response:
+            async for chunk in response:
                if not isinstance(chunk, dict):
                    chunk = chunk.model_dump()
                if len(chunk["choices"]) == 0: