From a08baa97c557fd37e61d561672d68d5503dca1b1 Mon Sep 17 00:00:00 2001 From: Jacob Ajit Date: Thu, 13 Jul 2023 01:23:17 -0400 Subject: [PATCH] Use modern OpenAI endpoints for embeddings (#6573) - Description: LangChain passes [engine](https://github.com/hwchase17/langchain/blob/master/langchain/embeddings/openai.py#L256) and not `model` as a field when making OpenAI requests. Within the `openai` Python library, for OpenAI requests, this [makes a call](https://github.com/openai/openai-python/blob/main/openai/api_resources/abstract/engine_api_resource.py#L58) to an endpoint of the form `https://api.openai.com/v1/engines/{engine_id}/embeddings`. These endpoints are [deprecated](https://help.openai.com/en/articles/6283125-what-happened-to-engines) in favor of endpoints of the format `https://api.openai.com/v1/embeddings`, where `model` is passed as a parameter in the request body. While these deprecated endpoints continue to function for now, they may not be supported indefinitely and should be avoided in favor of the newer API format. It appears that `engine` was passed in instead of `model` to make both Azure OpenAI and OpenAI calls work similarly. However, the inclusion of `engine` [causes](https://github.com/openai/openai-python/blob/main/openai/api_resources/abstract/engine_api_resource.py#L58) OpenAI to use the deprecated endpoint, requiring a diverging code path for Azure OpenAI calls where `engine` is passed in additionally (Azure OpenAI requires `engine` to specify a deployment, and can optionally take in `model`). In the long-term, it may be worth considering spinning off Azure OpenAI embeddings into a separate class for ease of use and maintenance, similar to the [implementation for chat models](https://github.com/hwchase17/langchain/blob/master/langchain/chat_models/azure_openai.py). --- langchain/embeddings/openai.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/langchain/embeddings/openai.py b/langchain/embeddings/openai.py index 8e028e52bd..c7ebf277e8 100644 --- a/langchain/embeddings/openai.py +++ b/langchain/embeddings/openai.py @@ -253,7 +253,7 @@ class OpenAIEmbeddings(BaseModel, Embeddings): @property def _invocation_params(self) -> Dict: openai_args = { - "engine": self.deployment, + "model": self.model, "request_timeout": self.request_timeout, "headers": self.headers, "api_key": self.openai_api_key, @@ -262,6 +262,8 @@ class OpenAIEmbeddings(BaseModel, Embeddings): "api_type": self.openai_api_type, "api_version": self.openai_api_version, } + if self.openai_api_type in ("azure", "azure_ad", "azuread"): + openai_args["engine"] = self.deployment if self.openai_proxy: import openai