mirror of https://github.com/hwchase17/langchain
Feature/fix azureopenai model mappings (#8621)
This pull request aims to ensure that the `OpenAICallbackHandler` can properly calculate the total cost for Azure OpenAI chat models. The following changes have resolved this issue: - The `model_name` has been added to the ChatResult llm_output. Without this, the default values of `gpt-35-turbo` were applied. This was causing the total cost for Azure OpenAI's GPT-4 to be significantly inaccurate. - A new parameter `model_version` has been added to `AzureChatOpenAI`. Azure does not include the model version in the response. With the addition of `model_name`, this is not a significant issue for GPT-4 models, but it's an issue for GPT-3.5-Turbo. Version 0301 (default) of GPT-3.5-Turbo on Azure has a flat rate of 0.002 per 1k tokens for both prompt and completion. However, version 0613 introduced a split in pricing for prompt and completion tokens. - The `OpenAICallbackHandler` implementation has been updated with the proper model names, versions, and cost per 1k tokens. Unit tests have been added to ensure the functionality works as expected; the Azure ChatOpenAI notebook has been updated with examples. Maintainers: @hwchase17, @baskaryan Twitter handle: @jjczopek --------- Co-authored-by: Jerzy Czopek <jerzy.czopek@avanade.com> Co-authored-by: Bagatur <baskaryan@gmail.com>pull/8995/head
parent
269f85b7b7
commit
539672a7fd
@ -0,0 +1,52 @@
|
||||
import json
|
||||
import os
|
||||
from typing import Any, Mapping, cast
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain.chat_models.azure_openai import AzureChatOpenAI
|
||||
|
||||
os.environ["OPENAI_API_KEY"] = "test"
|
||||
os.environ["OPENAI_API_BASE"] = "https://oai.azure.com/"
|
||||
os.environ["OPENAI_API_VERSION"] = "2023-05-01"
|
||||
|
||||
|
||||
@pytest.mark.requires("openai")
|
||||
@pytest.mark.parametrize(
|
||||
"model_name", ["gpt-4", "gpt-4-32k", "gpt-35-turbo", "gpt-35-turbo-16k"]
|
||||
)
|
||||
def test_model_name_set_on_chat_result_when_present_in_response(
|
||||
model_name: str,
|
||||
) -> None:
|
||||
sample_response_text = f"""
|
||||
{{
|
||||
"id": "chatcmpl-7ryweq7yc8463fas879t9hdkkdf",
|
||||
"object": "chat.completion",
|
||||
"created": 1690381189,
|
||||
"model": "{model_name}",
|
||||
"choices": [
|
||||
{{
|
||||
"index": 0,
|
||||
"finish_reason": "stop",
|
||||
"message": {{
|
||||
"role": "assistant",
|
||||
"content": "I'm an AI assistant that can help you."
|
||||
}}
|
||||
}}
|
||||
],
|
||||
"usage": {{
|
||||
"completion_tokens": 28,
|
||||
"prompt_tokens": 15,
|
||||
"total_tokens": 43
|
||||
}}
|
||||
}}
|
||||
"""
|
||||
# convert sample_response_text to instance of Mapping[str, Any]
|
||||
sample_response = json.loads(sample_response_text)
|
||||
mock_response = cast(Mapping[str, Any], sample_response)
|
||||
mock_chat = AzureChatOpenAI()
|
||||
chat_result = mock_chat._create_chat_result(mock_response)
|
||||
assert (
|
||||
chat_result.llm_output is not None
|
||||
and chat_result.llm_output["model_name"] == model_name
|
||||
)
|
Loading…
Reference in New Issue