|
|
|
@ -367,6 +367,8 @@ class BaseChatOpenAI(BaseChatModel):
|
|
|
|
|
extra_body: Optional[Mapping[str, Any]] = None
|
|
|
|
|
"""Optional additional JSON properties to include in the request parameters when
|
|
|
|
|
making requests to OpenAI compatible APIs, such as vLLM."""
|
|
|
|
|
include_response_headers: bool = False
|
|
|
|
|
"""Whether to include response headers in the output message response_metadata."""
|
|
|
|
|
|
|
|
|
|
class Config:
|
|
|
|
|
"""Configuration for this pydantic object."""
|
|
|
|
@ -510,7 +512,15 @@ class BaseChatOpenAI(BaseChatModel):
|
|
|
|
|
kwargs["stream"] = True
|
|
|
|
|
payload = self._get_request_payload(messages, stop=stop, **kwargs)
|
|
|
|
|
default_chunk_class: Type[BaseMessageChunk] = AIMessageChunk
|
|
|
|
|
with self.client.create(**payload) as response:
|
|
|
|
|
if self.include_response_headers:
|
|
|
|
|
raw_response = self.client.with_raw_response.create(**payload)
|
|
|
|
|
response = raw_response.parse()
|
|
|
|
|
base_generation_info = {"headers": dict(raw_response.headers)}
|
|
|
|
|
else:
|
|
|
|
|
response = self.client.create(**payload)
|
|
|
|
|
base_generation_info = {}
|
|
|
|
|
with response:
|
|
|
|
|
is_first_chunk = True
|
|
|
|
|
for chunk in response:
|
|
|
|
|
if not isinstance(chunk, dict):
|
|
|
|
|
chunk = chunk.model_dump()
|
|
|
|
@ -536,7 +546,7 @@ class BaseChatOpenAI(BaseChatModel):
|
|
|
|
|
message_chunk = _convert_delta_to_message_chunk(
|
|
|
|
|
choice["delta"], default_chunk_class
|
|
|
|
|
)
|
|
|
|
|
generation_info = {}
|
|
|
|
|
generation_info = {**base_generation_info} if is_first_chunk else {}
|
|
|
|
|
if finish_reason := choice.get("finish_reason"):
|
|
|
|
|
generation_info["finish_reason"] = finish_reason
|
|
|
|
|
if model_name := chunk.get("model"):
|
|
|
|
@ -555,6 +565,7 @@ class BaseChatOpenAI(BaseChatModel):
|
|
|
|
|
run_manager.on_llm_new_token(
|
|
|
|
|
generation_chunk.text, chunk=generation_chunk, logprobs=logprobs
|
|
|
|
|
)
|
|
|
|
|
is_first_chunk = False
|
|
|
|
|
yield generation_chunk
|
|
|
|
|
|
|
|
|
|
def _generate(
|
|
|
|
@ -570,8 +581,14 @@ class BaseChatOpenAI(BaseChatModel):
|
|
|
|
|
)
|
|
|
|
|
return generate_from_stream(stream_iter)
|
|
|
|
|
payload = self._get_request_payload(messages, stop=stop, **kwargs)
|
|
|
|
|
response = self.client.create(**payload)
|
|
|
|
|
return self._create_chat_result(response)
|
|
|
|
|
if self.include_response_headers:
|
|
|
|
|
raw_response = self.client.with_raw_response.create(**payload)
|
|
|
|
|
response = raw_response.parse()
|
|
|
|
|
generation_info = {"headers": dict(raw_response.headers)}
|
|
|
|
|
else:
|
|
|
|
|
response = self.client.create(**payload)
|
|
|
|
|
generation_info = None
|
|
|
|
|
return self._create_chat_result(response, generation_info)
|
|
|
|
|
|
|
|
|
|
def _get_request_payload(
|
|
|
|
|
self,
|
|
|
|
@ -590,7 +607,9 @@ class BaseChatOpenAI(BaseChatModel):
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def _create_chat_result(
|
|
|
|
|
self, response: Union[dict, openai.BaseModel]
|
|
|
|
|
self,
|
|
|
|
|
response: Union[dict, openai.BaseModel],
|
|
|
|
|
generation_info: Optional[Dict] = None,
|
|
|
|
|
) -> ChatResult:
|
|
|
|
|
generations = []
|
|
|
|
|
if not isinstance(response, dict):
|
|
|
|
@ -612,7 +631,9 @@ class BaseChatOpenAI(BaseChatModel):
|
|
|
|
|
"output_tokens": token_usage.get("completion_tokens", 0),
|
|
|
|
|
"total_tokens": token_usage.get("total_tokens", 0),
|
|
|
|
|
}
|
|
|
|
|
generation_info = dict(finish_reason=res.get("finish_reason"))
|
|
|
|
|
generation_info = dict(
|
|
|
|
|
finish_reason=res.get("finish_reason"), **(generation_info or {})
|
|
|
|
|
)
|
|
|
|
|
if "logprobs" in res:
|
|
|
|
|
generation_info["logprobs"] = res["logprobs"]
|
|
|
|
|
gen = ChatGeneration(message=message, generation_info=generation_info)
|
|
|
|
@ -634,8 +655,15 @@ class BaseChatOpenAI(BaseChatModel):
|
|
|
|
|
kwargs["stream"] = True
|
|
|
|
|
payload = self._get_request_payload(messages, stop=stop, **kwargs)
|
|
|
|
|
default_chunk_class: Type[BaseMessageChunk] = AIMessageChunk
|
|
|
|
|
response = await self.async_client.create(**payload)
|
|
|
|
|
if self.include_response_headers:
|
|
|
|
|
raw_response = self.async_client.with_raw_response.create(**payload)
|
|
|
|
|
response = raw_response.parse()
|
|
|
|
|
base_generation_info = {"headers": dict(raw_response.headers)}
|
|
|
|
|
else:
|
|
|
|
|
response = self.async_client.create(**payload)
|
|
|
|
|
base_generation_info = {}
|
|
|
|
|
async with response:
|
|
|
|
|
is_first_chunk = True
|
|
|
|
|
async for chunk in response:
|
|
|
|
|
if not isinstance(chunk, dict):
|
|
|
|
|
chunk = chunk.model_dump()
|
|
|
|
@ -664,7 +692,7 @@ class BaseChatOpenAI(BaseChatModel):
|
|
|
|
|
choice["delta"],
|
|
|
|
|
default_chunk_class,
|
|
|
|
|
)
|
|
|
|
|
generation_info = {}
|
|
|
|
|
generation_info = {**base_generation_info} if is_first_chunk else {}
|
|
|
|
|
if finish_reason := choice.get("finish_reason"):
|
|
|
|
|
generation_info["finish_reason"] = finish_reason
|
|
|
|
|
if model_name := chunk.get("model"):
|
|
|
|
@ -685,6 +713,7 @@ class BaseChatOpenAI(BaseChatModel):
|
|
|
|
|
chunk=generation_chunk,
|
|
|
|
|
logprobs=logprobs,
|
|
|
|
|
)
|
|
|
|
|
is_first_chunk = False
|
|
|
|
|
yield generation_chunk
|
|
|
|
|
|
|
|
|
|
async def _agenerate(
|
|
|
|
@ -700,8 +729,16 @@ class BaseChatOpenAI(BaseChatModel):
|
|
|
|
|
)
|
|
|
|
|
return await agenerate_from_stream(stream_iter)
|
|
|
|
|
payload = self._get_request_payload(messages, stop=stop, **kwargs)
|
|
|
|
|
response = await self.async_client.create(**payload)
|
|
|
|
|
return await run_in_executor(None, self._create_chat_result, response)
|
|
|
|
|
if self.include_response_headers:
|
|
|
|
|
raw_response = await self.async_client.with_raw_response.create(**payload)
|
|
|
|
|
response = raw_response.parse()
|
|
|
|
|
generation_info = {"headers": dict(raw_response.headers)}
|
|
|
|
|
else:
|
|
|
|
|
response = await self.async_client.create(**payload)
|
|
|
|
|
generation_info = None
|
|
|
|
|
return await run_in_executor(
|
|
|
|
|
None, self._create_chat_result, response, generation_info
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def _identifying_params(self) -> Dict[str, Any]:
|
|
|
|
|