2023-12-11 21:53:30 +00:00
|
|
|
import json
|
2024-03-28 23:38:20 +00:00
|
|
|
import warnings
|
|
|
|
from typing import (
|
|
|
|
Any,
|
|
|
|
AsyncIterator,
|
|
|
|
Dict,
|
|
|
|
Iterator,
|
|
|
|
List,
|
|
|
|
Mapping,
|
|
|
|
Optional,
|
|
|
|
Type,
|
|
|
|
cast,
|
|
|
|
)
|
2023-12-11 21:53:30 +00:00
|
|
|
|
2024-03-28 23:38:20 +00:00
|
|
|
from langchain_core.callbacks import (
|
|
|
|
AsyncCallbackManagerForLLMRun,
|
|
|
|
CallbackManagerForLLMRun,
|
|
|
|
)
|
2024-01-24 01:08:51 +00:00
|
|
|
from langchain_core.language_models.chat_models import BaseChatModel
|
2023-12-11 21:53:30 +00:00
|
|
|
from langchain_core.messages import (
|
|
|
|
AIMessage,
|
2024-03-28 23:38:20 +00:00
|
|
|
AIMessageChunk,
|
2023-12-11 21:53:30 +00:00
|
|
|
BaseMessage,
|
2024-03-28 23:38:20 +00:00
|
|
|
BaseMessageChunk,
|
2023-12-11 21:53:30 +00:00
|
|
|
ChatMessage,
|
2024-03-28 23:38:20 +00:00
|
|
|
ChatMessageChunk,
|
|
|
|
FunctionMessageChunk,
|
2023-12-11 21:53:30 +00:00
|
|
|
HumanMessage,
|
2024-03-28 23:38:20 +00:00
|
|
|
HumanMessageChunk,
|
2023-12-11 21:53:30 +00:00
|
|
|
SystemMessage,
|
2024-03-28 23:38:20 +00:00
|
|
|
SystemMessageChunk,
|
|
|
|
ToolMessageChunk,
|
2023-12-11 21:53:30 +00:00
|
|
|
)
|
2024-03-28 23:38:20 +00:00
|
|
|
from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
|
2023-12-11 21:53:30 +00:00
|
|
|
|
|
|
|
from langchain_community.llms.azureml_endpoint import (
|
2024-01-24 01:08:51 +00:00
|
|
|
AzureMLBaseEndpoint,
|
|
|
|
AzureMLEndpointApiType,
|
2023-12-11 21:53:30 +00:00
|
|
|
ContentFormatterBase,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
class LlamaContentFormatter(ContentFormatterBase):
|
2024-02-09 20:48:57 +00:00
|
|
|
"""Content formatter for `LLaMA`."""
|
|
|
|
|
2024-02-05 21:42:59 +00:00
|
|
|
def __init__(self) -> None:
|
2024-01-24 01:08:51 +00:00
|
|
|
raise TypeError(
|
|
|
|
"`LlamaContentFormatter` is deprecated for chat models. Use "
|
2024-03-28 23:38:20 +00:00
|
|
|
"`CustomOpenAIContentFormatter` instead."
|
2024-01-24 01:08:51 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2024-03-28 23:38:20 +00:00
|
|
|
class CustomOpenAIChatContentFormatter(ContentFormatterBase):
|
|
|
|
"""Chat Content formatter for models with OpenAI like API scheme."""
|
2023-12-11 21:53:30 +00:00
|
|
|
|
|
|
|
SUPPORTED_ROLES: List[str] = ["user", "assistant", "system"]
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def _convert_message_to_dict(message: BaseMessage) -> Dict:
|
2024-02-09 20:48:57 +00:00
|
|
|
"""Converts a message to a dict according to a role"""
|
2023-12-11 21:53:30 +00:00
|
|
|
content = cast(str, message.content)
|
|
|
|
if isinstance(message, HumanMessage):
|
|
|
|
return {
|
|
|
|
"role": "user",
|
|
|
|
"content": ContentFormatterBase.escape_special_characters(content),
|
|
|
|
}
|
|
|
|
elif isinstance(message, AIMessage):
|
|
|
|
return {
|
|
|
|
"role": "assistant",
|
|
|
|
"content": ContentFormatterBase.escape_special_characters(content),
|
|
|
|
}
|
|
|
|
elif isinstance(message, SystemMessage):
|
|
|
|
return {
|
|
|
|
"role": "system",
|
|
|
|
"content": ContentFormatterBase.escape_special_characters(content),
|
|
|
|
}
|
|
|
|
elif (
|
|
|
|
isinstance(message, ChatMessage)
|
2024-03-28 23:38:20 +00:00
|
|
|
and message.role in CustomOpenAIChatContentFormatter.SUPPORTED_ROLES
|
2023-12-11 21:53:30 +00:00
|
|
|
):
|
|
|
|
return {
|
|
|
|
"role": message.role,
|
|
|
|
"content": ContentFormatterBase.escape_special_characters(content),
|
|
|
|
}
|
|
|
|
else:
|
|
|
|
supported = ",".join(
|
2024-03-28 23:38:20 +00:00
|
|
|
[role for role in CustomOpenAIChatContentFormatter.SUPPORTED_ROLES]
|
2023-12-11 21:53:30 +00:00
|
|
|
)
|
|
|
|
raise ValueError(
|
|
|
|
f"""Received unsupported role.
|
|
|
|
Supported roles for the LLaMa Foundation Model: {supported}"""
|
|
|
|
)
|
|
|
|
|
2024-01-24 01:08:51 +00:00
|
|
|
@property
|
|
|
|
def supported_api_types(self) -> List[AzureMLEndpointApiType]:
|
2024-03-28 23:38:20 +00:00
|
|
|
return [AzureMLEndpointApiType.dedicated, AzureMLEndpointApiType.serverless]
|
2024-01-24 01:08:51 +00:00
|
|
|
|
2024-02-05 21:42:59 +00:00
|
|
|
def format_messages_request_payload(
|
2024-01-24 01:08:51 +00:00
|
|
|
self,
|
|
|
|
messages: List[BaseMessage],
|
|
|
|
model_kwargs: Dict,
|
|
|
|
api_type: AzureMLEndpointApiType,
|
2024-02-05 21:42:59 +00:00
|
|
|
) -> bytes:
|
2024-01-24 01:08:51 +00:00
|
|
|
"""Formats the request according to the chosen api"""
|
2023-12-11 21:53:30 +00:00
|
|
|
chat_messages = [
|
2024-03-28 23:38:20 +00:00
|
|
|
CustomOpenAIChatContentFormatter._convert_message_to_dict(message)
|
2023-12-11 21:53:30 +00:00
|
|
|
for message in messages
|
|
|
|
]
|
2024-03-28 23:38:20 +00:00
|
|
|
if api_type in [
|
|
|
|
AzureMLEndpointApiType.dedicated,
|
|
|
|
AzureMLEndpointApiType.realtime,
|
|
|
|
]:
|
2024-01-24 01:08:51 +00:00
|
|
|
request_payload = json.dumps(
|
|
|
|
{
|
|
|
|
"input_data": {
|
|
|
|
"input_string": chat_messages,
|
|
|
|
"parameters": model_kwargs,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
)
|
|
|
|
elif api_type == AzureMLEndpointApiType.serverless:
|
|
|
|
request_payload = json.dumps({"messages": chat_messages, **model_kwargs})
|
|
|
|
else:
|
|
|
|
raise ValueError(
|
|
|
|
f"`api_type` {api_type} is not supported by this formatter"
|
|
|
|
)
|
2024-02-05 21:42:59 +00:00
|
|
|
return str.encode(request_payload)
|
2023-12-11 21:53:30 +00:00
|
|
|
|
2024-02-05 21:42:59 +00:00
|
|
|
def format_response_payload(
|
|
|
|
self,
|
|
|
|
output: bytes,
|
2024-03-28 23:38:20 +00:00
|
|
|
api_type: AzureMLEndpointApiType = AzureMLEndpointApiType.dedicated,
|
2024-01-24 01:08:51 +00:00
|
|
|
) -> ChatGeneration:
|
2023-12-11 21:53:30 +00:00
|
|
|
"""Formats response"""
|
2024-03-28 23:38:20 +00:00
|
|
|
if api_type in [
|
|
|
|
AzureMLEndpointApiType.dedicated,
|
|
|
|
AzureMLEndpointApiType.realtime,
|
|
|
|
]:
|
2024-01-24 01:08:51 +00:00
|
|
|
try:
|
|
|
|
choice = json.loads(output)["output"]
|
|
|
|
except (KeyError, IndexError, TypeError) as e:
|
2024-02-05 21:42:59 +00:00
|
|
|
raise ValueError(self.format_error_msg.format(api_type=api_type)) from e
|
2024-01-24 01:08:51 +00:00
|
|
|
return ChatGeneration(
|
|
|
|
message=BaseMessage(
|
|
|
|
content=choice.strip(),
|
|
|
|
type="assistant",
|
|
|
|
),
|
|
|
|
generation_info=None,
|
|
|
|
)
|
|
|
|
if api_type == AzureMLEndpointApiType.serverless:
|
|
|
|
try:
|
|
|
|
choice = json.loads(output)["choices"][0]
|
|
|
|
if not isinstance(choice, dict):
|
|
|
|
raise TypeError(
|
|
|
|
"Endpoint response is not well formed for a chat "
|
|
|
|
"model. Expected `dict` but `{type(choice)}` was received."
|
|
|
|
)
|
|
|
|
except (KeyError, IndexError, TypeError) as e:
|
2024-02-05 21:42:59 +00:00
|
|
|
raise ValueError(self.format_error_msg.format(api_type=api_type)) from e
|
2024-01-24 01:08:51 +00:00
|
|
|
return ChatGeneration(
|
|
|
|
message=BaseMessage(
|
|
|
|
content=choice["message"]["content"].strip(),
|
|
|
|
type=choice["message"]["role"],
|
|
|
|
),
|
|
|
|
generation_info=dict(
|
|
|
|
finish_reason=choice.get("finish_reason"),
|
|
|
|
logprobs=choice.get("logprobs"),
|
|
|
|
),
|
|
|
|
)
|
|
|
|
raise ValueError(f"`api_type` {api_type} is not supported by this formatter")
|
2023-12-11 21:53:30 +00:00
|
|
|
|
|
|
|
|
2024-03-28 23:38:20 +00:00
|
|
|
class LlamaChatContentFormatter(CustomOpenAIChatContentFormatter):
|
|
|
|
"""Deprecated: Kept for backwards compatibility
|
|
|
|
|
|
|
|
Chat Content formatter for Llama."""
|
|
|
|
|
|
|
|
def __init__(self) -> None:
|
|
|
|
super().__init__()
|
|
|
|
warnings.warn(
|
|
|
|
"""`LlamaChatContentFormatter` will be deprecated in the future.
|
|
|
|
Please use `CustomOpenAIChatContentFormatter` instead.
|
|
|
|
"""
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2024-03-19 04:10:42 +00:00
|
|
|
class MistralChatContentFormatter(LlamaChatContentFormatter):
|
|
|
|
"""Content formatter for `Mistral`."""
|
|
|
|
|
|
|
|
def format_messages_request_payload(
|
|
|
|
self,
|
|
|
|
messages: List[BaseMessage],
|
|
|
|
model_kwargs: Dict,
|
|
|
|
api_type: AzureMLEndpointApiType,
|
|
|
|
) -> bytes:
|
|
|
|
"""Formats the request according to the chosen api"""
|
|
|
|
chat_messages = [self._convert_message_to_dict(message) for message in messages]
|
|
|
|
|
|
|
|
if chat_messages and chat_messages[0]["role"] == "system":
|
|
|
|
# Mistral OSS models do not explicitly support system prompts, so we have to
|
|
|
|
# stash in the first user prompt
|
|
|
|
chat_messages[1]["content"] = (
|
|
|
|
chat_messages[0]["content"] + "\n\n" + chat_messages[1]["content"]
|
|
|
|
)
|
|
|
|
del chat_messages[0]
|
|
|
|
|
|
|
|
if api_type == AzureMLEndpointApiType.realtime:
|
|
|
|
request_payload = json.dumps(
|
|
|
|
{
|
|
|
|
"input_data": {
|
|
|
|
"input_string": chat_messages,
|
|
|
|
"parameters": model_kwargs,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
)
|
|
|
|
elif api_type == AzureMLEndpointApiType.serverless:
|
|
|
|
request_payload = json.dumps({"messages": chat_messages, **model_kwargs})
|
|
|
|
else:
|
|
|
|
raise ValueError(
|
|
|
|
f"`api_type` {api_type} is not supported by this formatter"
|
|
|
|
)
|
|
|
|
return str.encode(request_payload)
|
|
|
|
|
|
|
|
|
2024-01-24 01:08:51 +00:00
|
|
|
class AzureMLChatOnlineEndpoint(BaseChatModel, AzureMLBaseEndpoint):
|
|
|
|
"""Azure ML Online Endpoint chat models.
|
2023-12-11 21:53:30 +00:00
|
|
|
|
|
|
|
Example:
|
|
|
|
.. code-block:: python
|
2024-01-24 01:08:51 +00:00
|
|
|
azure_llm = AzureMLOnlineEndpoint(
|
2024-03-28 23:38:20 +00:00
|
|
|
endpoint_url="https://<your-endpoint>.<your_region>.inference.ml.azure.com/v1/chat/completions",
|
|
|
|
endpoint_api_type=AzureMLApiType.serverless,
|
2023-12-11 21:53:30 +00:00
|
|
|
endpoint_api_key="my-api-key",
|
2024-01-24 01:08:51 +00:00
|
|
|
content_formatter=chat_content_formatter,
|
2023-12-11 21:53:30 +00:00
|
|
|
)
|
2024-01-24 01:08:51 +00:00
|
|
|
""" # noqa: E501
|
2023-12-11 21:53:30 +00:00
|
|
|
|
|
|
|
@property
|
|
|
|
def _identifying_params(self) -> Dict[str, Any]:
|
|
|
|
"""Get the identifying parameters."""
|
|
|
|
_model_kwargs = self.model_kwargs or {}
|
|
|
|
return {
|
|
|
|
**{"model_kwargs": _model_kwargs},
|
|
|
|
}
|
|
|
|
|
|
|
|
@property
|
|
|
|
def _llm_type(self) -> str:
|
|
|
|
"""Return type of llm."""
|
|
|
|
return "azureml_chat_endpoint"
|
|
|
|
|
2024-01-24 01:08:51 +00:00
|
|
|
def _generate(
|
2023-12-11 21:53:30 +00:00
|
|
|
self,
|
|
|
|
messages: List[BaseMessage],
|
|
|
|
stop: Optional[List[str]] = None,
|
|
|
|
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
|
|
|
**kwargs: Any,
|
2024-01-24 01:08:51 +00:00
|
|
|
) -> ChatResult:
|
2023-12-11 21:53:30 +00:00
|
|
|
"""Call out to an AzureML Managed Online endpoint.
|
|
|
|
Args:
|
|
|
|
messages: The messages in the conversation with the chat model.
|
|
|
|
stop: Optional list of stop words to use when generating.
|
|
|
|
Returns:
|
|
|
|
The string generated by the model.
|
|
|
|
Example:
|
|
|
|
.. code-block:: python
|
|
|
|
response = azureml_model("Tell me a joke.")
|
|
|
|
"""
|
|
|
|
_model_kwargs = self.model_kwargs or {}
|
2024-01-24 01:08:51 +00:00
|
|
|
_model_kwargs.update(kwargs)
|
|
|
|
if stop:
|
|
|
|
_model_kwargs["stop"] = stop
|
2023-12-11 21:53:30 +00:00
|
|
|
|
2024-02-05 21:42:59 +00:00
|
|
|
request_payload = self.content_formatter.format_messages_request_payload(
|
2024-01-24 01:08:51 +00:00
|
|
|
messages, _model_kwargs, self.endpoint_api_type
|
|
|
|
)
|
|
|
|
response_payload = self.http_client.call(
|
|
|
|
body=request_payload, run_manager=run_manager
|
2023-12-11 21:53:30 +00:00
|
|
|
)
|
2024-01-24 01:08:51 +00:00
|
|
|
generations = self.content_formatter.format_response_payload(
|
|
|
|
response_payload, self.endpoint_api_type
|
2023-12-11 21:53:30 +00:00
|
|
|
)
|
2024-01-24 01:08:51 +00:00
|
|
|
return ChatResult(generations=[generations])
|
2024-03-28 23:38:20 +00:00
|
|
|
|
|
|
|
def _stream(
|
|
|
|
self,
|
|
|
|
messages: List[BaseMessage],
|
|
|
|
stop: Optional[List[str]] = None,
|
|
|
|
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
|
|
|
**kwargs: Any,
|
|
|
|
) -> Iterator[ChatGenerationChunk]:
|
|
|
|
self.endpoint_url = self.endpoint_url.replace("/chat/completions", "")
|
|
|
|
timeout = None if "timeout" not in kwargs else kwargs["timeout"]
|
|
|
|
|
|
|
|
import openai
|
|
|
|
|
|
|
|
params = {}
|
|
|
|
client_params = {
|
|
|
|
"api_key": self.endpoint_api_key.get_secret_value(),
|
|
|
|
"base_url": self.endpoint_url,
|
|
|
|
"timeout": timeout,
|
|
|
|
"default_headers": None,
|
|
|
|
"default_query": None,
|
|
|
|
"http_client": None,
|
|
|
|
}
|
|
|
|
|
|
|
|
client = openai.OpenAI(**client_params)
|
|
|
|
message_dicts = [
|
|
|
|
CustomOpenAIChatContentFormatter._convert_message_to_dict(m)
|
|
|
|
for m in messages
|
|
|
|
]
|
|
|
|
params = {"stream": True, "stop": stop, "model": None, **kwargs}
|
|
|
|
|
|
|
|
default_chunk_class = AIMessageChunk
|
|
|
|
for chunk in client.chat.completions.create(messages=message_dicts, **params):
|
|
|
|
if not isinstance(chunk, dict):
|
|
|
|
chunk = chunk.dict()
|
|
|
|
if len(chunk["choices"]) == 0:
|
|
|
|
continue
|
|
|
|
choice = chunk["choices"][0]
|
|
|
|
chunk = _convert_delta_to_message_chunk(
|
|
|
|
choice["delta"], default_chunk_class
|
|
|
|
)
|
|
|
|
generation_info = {}
|
|
|
|
if finish_reason := choice.get("finish_reason"):
|
|
|
|
generation_info["finish_reason"] = finish_reason
|
|
|
|
logprobs = choice.get("logprobs")
|
|
|
|
if logprobs:
|
|
|
|
generation_info["logprobs"] = logprobs
|
|
|
|
default_chunk_class = chunk.__class__
|
|
|
|
chunk = ChatGenerationChunk(
|
|
|
|
message=chunk, generation_info=generation_info or None
|
|
|
|
)
|
|
|
|
if run_manager:
|
|
|
|
run_manager.on_llm_new_token(chunk.text, chunk=chunk, logprobs=logprobs)
|
|
|
|
yield chunk
|
|
|
|
|
|
|
|
async def _astream(
|
|
|
|
self,
|
|
|
|
messages: List[BaseMessage],
|
|
|
|
stop: Optional[List[str]] = None,
|
|
|
|
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
|
|
|
**kwargs: Any,
|
|
|
|
) -> AsyncIterator[ChatGenerationChunk]:
|
|
|
|
self.endpoint_url = self.endpoint_url.replace("/chat/completions", "")
|
|
|
|
timeout = None if "timeout" not in kwargs else kwargs["timeout"]
|
|
|
|
|
|
|
|
import openai
|
|
|
|
|
|
|
|
params = {}
|
|
|
|
client_params = {
|
|
|
|
"api_key": self.endpoint_api_key.get_secret_value(),
|
|
|
|
"base_url": self.endpoint_url,
|
|
|
|
"timeout": timeout,
|
|
|
|
"default_headers": None,
|
|
|
|
"default_query": None,
|
|
|
|
"http_client": None,
|
|
|
|
}
|
|
|
|
|
|
|
|
async_client = openai.AsyncOpenAI(**client_params)
|
|
|
|
message_dicts = [
|
|
|
|
CustomOpenAIChatContentFormatter._convert_message_to_dict(m)
|
|
|
|
for m in messages
|
|
|
|
]
|
|
|
|
params = {"stream": True, "stop": stop, "model": None, **kwargs}
|
|
|
|
|
|
|
|
default_chunk_class = AIMessageChunk
|
|
|
|
async for chunk in await async_client.chat.completions.create(
|
|
|
|
messages=message_dicts, **params
|
|
|
|
):
|
|
|
|
if not isinstance(chunk, dict):
|
|
|
|
chunk = chunk.dict()
|
|
|
|
if len(chunk["choices"]) == 0:
|
|
|
|
continue
|
|
|
|
choice = chunk["choices"][0]
|
|
|
|
chunk = _convert_delta_to_message_chunk(
|
|
|
|
choice["delta"], default_chunk_class
|
|
|
|
)
|
|
|
|
generation_info = {}
|
|
|
|
if finish_reason := choice.get("finish_reason"):
|
|
|
|
generation_info["finish_reason"] = finish_reason
|
|
|
|
logprobs = choice.get("logprobs")
|
|
|
|
if logprobs:
|
|
|
|
generation_info["logprobs"] = logprobs
|
|
|
|
default_chunk_class = chunk.__class__
|
|
|
|
chunk = ChatGenerationChunk(
|
|
|
|
message=chunk, generation_info=generation_info or None
|
|
|
|
)
|
|
|
|
if run_manager:
|
|
|
|
await run_manager.on_llm_new_token(
|
|
|
|
token=chunk.text, chunk=chunk, logprobs=logprobs
|
|
|
|
)
|
|
|
|
yield chunk
|
|
|
|
|
|
|
|
|
|
|
|
def _convert_delta_to_message_chunk(
|
|
|
|
_dict: Mapping[str, Any], default_class: Type[BaseMessageChunk]
|
|
|
|
) -> BaseMessageChunk:
|
|
|
|
role = cast(str, _dict.get("role"))
|
|
|
|
content = cast(str, _dict.get("content") or "")
|
|
|
|
additional_kwargs: Dict = {}
|
|
|
|
if _dict.get("function_call"):
|
|
|
|
function_call = dict(_dict["function_call"])
|
|
|
|
if "name" in function_call and function_call["name"] is None:
|
|
|
|
function_call["name"] = ""
|
|
|
|
additional_kwargs["function_call"] = function_call
|
|
|
|
if _dict.get("tool_calls"):
|
|
|
|
additional_kwargs["tool_calls"] = _dict["tool_calls"]
|
|
|
|
|
|
|
|
if role == "user" or default_class == HumanMessageChunk:
|
|
|
|
return HumanMessageChunk(content=content)
|
|
|
|
elif role == "assistant" or default_class == AIMessageChunk:
|
|
|
|
return AIMessageChunk(content=content, additional_kwargs=additional_kwargs)
|
|
|
|
elif role == "system" or default_class == SystemMessageChunk:
|
|
|
|
return SystemMessageChunk(content=content)
|
|
|
|
elif role == "function" or default_class == FunctionMessageChunk:
|
|
|
|
return FunctionMessageChunk(content=content, name=_dict["name"])
|
|
|
|
elif role == "tool" or default_class == ToolMessageChunk:
|
|
|
|
return ToolMessageChunk(content=content, tool_call_id=_dict["tool_call_id"])
|
|
|
|
elif role or default_class == ChatMessageChunk:
|
|
|
|
return ChatMessageChunk(content=content, role=role)
|
|
|
|
else:
|
|
|
|
return default_class(content=content)
|