openai[patch]: wrap stream code in context manager blocks (#18013)

**Description:**
Use the `Stream` context managers in `ChatOpenAi` `stream` and `astream`
method.

Using the context manager returned by the OpenAI client makes it
possible to terminate the stream early since the response connection
will be closed when the context manager exists.

**Issue:** #5340
**Twitter handle:** @snopoke

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
pull/20218/head
Simon Kelly 6 months ago committed by GitHub
parent 6c11c8dac6
commit a682f0d12b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -457,7 +457,8 @@ class ChatOpenAI(BaseChatModel):
params = {**params, **kwargs, "stream": True}
default_chunk_class = AIMessageChunk
for chunk in self.client.create(messages=message_dicts, **params):
with self.client.create(messages=message_dicts, **params) as response:
for chunk in response:
if not isinstance(chunk, dict):
chunk = chunk.model_dump()
if len(chunk["choices"]) == 0:
@ -479,7 +480,9 @@ class ChatOpenAI(BaseChatModel):
message=chunk, generation_info=generation_info or None
)
if run_manager:
run_manager.on_llm_new_token(chunk.text, chunk=chunk, logprobs=logprobs)
run_manager.on_llm_new_token(
chunk.text, chunk=chunk, logprobs=logprobs
)
yield chunk
def _generate(
@ -553,9 +556,9 @@ class ChatOpenAI(BaseChatModel):
params = {**params, **kwargs, "stream": True}
default_chunk_class = AIMessageChunk
async for chunk in await self.async_client.create(
messages=message_dicts, **params
):
response = await self.async_client.create(messages=message_dicts, **params)
async with response:
async for chunk in response:
if not isinstance(chunk, dict):
chunk = chunk.model_dump()
if len(chunk["choices"]) == 0:

Loading…
Cancel
Save