mirror of
https://github.com/hwchase17/langchain
synced 2024-11-11 19:11:02 +00:00
812 lines
31 KiB
Python
812 lines
31 KiB
Python
|
import json
|
||
|
from operator import itemgetter
|
||
|
from pathlib import Path
|
||
|
from typing import (
|
||
|
Any,
|
||
|
Callable,
|
||
|
Dict,
|
||
|
Iterator,
|
||
|
List,
|
||
|
Mapping,
|
||
|
Optional,
|
||
|
Sequence,
|
||
|
Type,
|
||
|
Union,
|
||
|
cast,
|
||
|
)
|
||
|
|
||
|
from langchain_core.callbacks import CallbackManagerForLLMRun
|
||
|
from langchain_core.language_models import LanguageModelInput
|
||
|
from langchain_core.language_models.chat_models import (
|
||
|
BaseChatModel,
|
||
|
generate_from_stream,
|
||
|
)
|
||
|
from langchain_core.messages import (
|
||
|
AIMessage,
|
||
|
AIMessageChunk,
|
||
|
BaseMessage,
|
||
|
BaseMessageChunk,
|
||
|
ChatMessage,
|
||
|
ChatMessageChunk,
|
||
|
FunctionMessage,
|
||
|
FunctionMessageChunk,
|
||
|
HumanMessage,
|
||
|
HumanMessageChunk,
|
||
|
SystemMessage,
|
||
|
SystemMessageChunk,
|
||
|
ToolMessage,
|
||
|
ToolMessageChunk,
|
||
|
)
|
||
|
from langchain_core.messages.tool import InvalidToolCall, ToolCall, ToolCallChunk
|
||
|
from langchain_core.output_parsers.base import OutputParserLike
|
||
|
from langchain_core.output_parsers.openai_tools import (
|
||
|
JsonOutputKeyToolsParser,
|
||
|
PydanticToolsParser,
|
||
|
make_invalid_tool_call,
|
||
|
parse_tool_call,
|
||
|
)
|
||
|
from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
|
||
|
from langchain_core.pydantic_v1 import BaseModel, Field, root_validator
|
||
|
from langchain_core.runnables import Runnable, RunnableMap, RunnablePassthrough
|
||
|
from langchain_core.tools import BaseTool
|
||
|
from langchain_core.utils.function_calling import convert_to_openai_tool
|
||
|
|
||
|
|
||
|
class ChatLlamaCpp(BaseChatModel):
|
||
|
"""llama.cpp model.
|
||
|
|
||
|
To use, you should have the llama-cpp-python library installed, and provide the
|
||
|
path to the Llama model as a named parameter to the constructor.
|
||
|
Check out: https://github.com/abetlen/llama-cpp-python
|
||
|
|
||
|
"""
|
||
|
|
||
|
client: Any #: :meta private:
|
||
|
|
||
|
model_path: str
|
||
|
"""The path to the Llama model file."""
|
||
|
|
||
|
lora_base: Optional[str] = None
|
||
|
"""The path to the Llama LoRA base model."""
|
||
|
|
||
|
lora_path: Optional[str] = None
|
||
|
"""The path to the Llama LoRA. If None, no LoRa is loaded."""
|
||
|
|
||
|
n_ctx: int = 512
|
||
|
"""Token context window."""
|
||
|
|
||
|
n_parts: int = -1
|
||
|
"""Number of parts to split the model into.
|
||
|
If -1, the number of parts is automatically determined."""
|
||
|
|
||
|
seed: int = -1
|
||
|
"""Seed. If -1, a random seed is used."""
|
||
|
|
||
|
f16_kv: bool = True
|
||
|
"""Use half-precision for key/value cache."""
|
||
|
|
||
|
logits_all: bool = False
|
||
|
"""Return logits for all tokens, not just the last token."""
|
||
|
|
||
|
vocab_only: bool = False
|
||
|
"""Only load the vocabulary, no weights."""
|
||
|
|
||
|
use_mlock: bool = False
|
||
|
"""Force system to keep model in RAM."""
|
||
|
|
||
|
n_threads: Optional[int] = None
|
||
|
"""Number of threads to use.
|
||
|
If None, the number of threads is automatically determined."""
|
||
|
|
||
|
n_batch: int = 8
|
||
|
"""Number of tokens to process in parallel.
|
||
|
Should be a number between 1 and n_ctx."""
|
||
|
|
||
|
n_gpu_layers: Optional[int] = None
|
||
|
"""Number of layers to be loaded into gpu memory. Default None."""
|
||
|
|
||
|
suffix: Optional[str] = None
|
||
|
"""A suffix to append to the generated text. If None, no suffix is appended."""
|
||
|
|
||
|
max_tokens: int = 256
|
||
|
"""The maximum number of tokens to generate."""
|
||
|
|
||
|
temperature: float = 0.8
|
||
|
"""The temperature to use for sampling."""
|
||
|
|
||
|
top_p: float = 0.95
|
||
|
"""The top-p value to use for sampling."""
|
||
|
|
||
|
logprobs: Optional[int] = None
|
||
|
"""The number of logprobs to return. If None, no logprobs are returned."""
|
||
|
|
||
|
echo: bool = False
|
||
|
"""Whether to echo the prompt."""
|
||
|
|
||
|
stop: Optional[List[str]] = None
|
||
|
"""A list of strings to stop generation when encountered."""
|
||
|
|
||
|
repeat_penalty: float = 1.1
|
||
|
"""The penalty to apply to repeated tokens."""
|
||
|
|
||
|
top_k: int = 40
|
||
|
"""The top-k value to use for sampling."""
|
||
|
|
||
|
last_n_tokens_size: int = 64
|
||
|
"""The number of tokens to look back when applying the repeat_penalty."""
|
||
|
|
||
|
use_mmap: bool = True
|
||
|
"""Whether to keep the model loaded in RAM"""
|
||
|
|
||
|
rope_freq_scale: float = 1.0
|
||
|
"""Scale factor for rope sampling."""
|
||
|
|
||
|
rope_freq_base: float = 10000.0
|
||
|
"""Base frequency for rope sampling."""
|
||
|
|
||
|
model_kwargs: Dict[str, Any] = Field(default_factory=dict)
|
||
|
"""Any additional parameters to pass to llama_cpp.Llama."""
|
||
|
|
||
|
streaming: bool = True
|
||
|
"""Whether to stream the results, token by token."""
|
||
|
|
||
|
grammar_path: Optional[Union[str, Path]] = None
|
||
|
"""
|
||
|
grammar_path: Path to the .gbnf file that defines formal grammars
|
||
|
for constraining model outputs. For instance, the grammar can be used
|
||
|
to force the model to generate valid JSON or to speak exclusively in emojis. At most
|
||
|
one of grammar_path and grammar should be passed in.
|
||
|
"""
|
||
|
grammar: Any = None
|
||
|
"""
|
||
|
grammar: formal grammar for constraining model outputs. For instance, the grammar
|
||
|
can be used to force the model to generate valid JSON or to speak exclusively in
|
||
|
emojis. At most one of grammar_path and grammar should be passed in.
|
||
|
"""
|
||
|
|
||
|
verbose: bool = True
|
||
|
"""Print verbose output to stderr."""
|
||
|
|
||
|
@root_validator(pre=False, skip_on_failure=True)
|
||
|
def validate_environment(cls, values: Dict) -> Dict:
|
||
|
"""Validate that llama-cpp-python library is installed."""
|
||
|
try:
|
||
|
from llama_cpp import Llama, LlamaGrammar
|
||
|
except ImportError:
|
||
|
raise ImportError(
|
||
|
"Could not import llama-cpp-python library. "
|
||
|
"Please install the llama-cpp-python library to "
|
||
|
"use this embedding model: pip install llama-cpp-python"
|
||
|
)
|
||
|
|
||
|
model_path = values["model_path"]
|
||
|
model_param_names = [
|
||
|
"rope_freq_scale",
|
||
|
"rope_freq_base",
|
||
|
"lora_path",
|
||
|
"lora_base",
|
||
|
"n_ctx",
|
||
|
"n_parts",
|
||
|
"seed",
|
||
|
"f16_kv",
|
||
|
"logits_all",
|
||
|
"vocab_only",
|
||
|
"use_mlock",
|
||
|
"n_threads",
|
||
|
"n_batch",
|
||
|
"use_mmap",
|
||
|
"last_n_tokens_size",
|
||
|
"verbose",
|
||
|
]
|
||
|
model_params = {k: values[k] for k in model_param_names}
|
||
|
# For backwards compatibility, only include if non-null.
|
||
|
if values["n_gpu_layers"] is not None:
|
||
|
model_params["n_gpu_layers"] = values["n_gpu_layers"]
|
||
|
|
||
|
model_params.update(values["model_kwargs"])
|
||
|
|
||
|
try:
|
||
|
values["client"] = Llama(model_path, **model_params)
|
||
|
except Exception as e:
|
||
|
raise ValueError(
|
||
|
f"Could not load Llama model from path: {model_path}. "
|
||
|
f"Received error {e}"
|
||
|
)
|
||
|
|
||
|
if values["grammar"] and values["grammar_path"]:
|
||
|
grammar = values["grammar"]
|
||
|
grammar_path = values["grammar_path"]
|
||
|
raise ValueError(
|
||
|
"Can only pass in one of grammar and grammar_path. Received "
|
||
|
f"{grammar=} and {grammar_path=}."
|
||
|
)
|
||
|
elif isinstance(values["grammar"], str):
|
||
|
values["grammar"] = LlamaGrammar.from_string(values["grammar"])
|
||
|
elif values["grammar_path"]:
|
||
|
values["grammar"] = LlamaGrammar.from_file(values["grammar_path"])
|
||
|
else:
|
||
|
pass
|
||
|
return values
|
||
|
|
||
|
def _get_parameters(self, stop: Optional[List[str]]) -> Dict[str, Any]:
|
||
|
"""
|
||
|
Performs sanity check, preparing parameters in format needed by llama_cpp.
|
||
|
|
||
|
Returns:
|
||
|
Dictionary containing the combined parameters.
|
||
|
"""
|
||
|
|
||
|
params = self._default_params
|
||
|
|
||
|
# llama_cpp expects the "stop" key not this, so we remove it:
|
||
|
stop_sequences = params.pop("stop_sequences")
|
||
|
|
||
|
# then sets it as configured, or default to an empty list:
|
||
|
params["stop"] = stop or stop_sequences or self.stop or []
|
||
|
|
||
|
return params
|
||
|
|
||
|
def _create_message_dicts(
|
||
|
self, messages: List[BaseMessage]
|
||
|
) -> List[Dict[str, Any]]:
|
||
|
message_dicts = [_convert_message_to_dict(m) for m in messages]
|
||
|
|
||
|
return message_dicts
|
||
|
|
||
|
def _create_chat_result(self, response: dict) -> ChatResult:
|
||
|
generations = []
|
||
|
for res in response["choices"]:
|
||
|
message = _convert_dict_to_message(res["message"])
|
||
|
generation_info = dict(finish_reason=res.get("finish_reason"))
|
||
|
if "logprobs" in res:
|
||
|
generation_info["logprobs"] = res["logprobs"]
|
||
|
gen = ChatGeneration(message=message, generation_info=generation_info)
|
||
|
generations.append(gen)
|
||
|
token_usage = response.get("usage", {})
|
||
|
llm_output = {
|
||
|
"token_usage": token_usage,
|
||
|
# "system_fingerprint": response.get("system_fingerprint", ""),
|
||
|
}
|
||
|
return ChatResult(generations=generations, llm_output=llm_output)
|
||
|
|
||
|
def _generate(
|
||
|
self,
|
||
|
messages: List[BaseMessage],
|
||
|
stop: Optional[List[str]] = None,
|
||
|
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
||
|
**kwargs: Any,
|
||
|
) -> ChatResult:
|
||
|
params = {**self._get_parameters(stop), **kwargs}
|
||
|
|
||
|
# Check tool_choice is whether available, if yes then run no stream with tool
|
||
|
# calling
|
||
|
if self.streaming and not params.get("tool_choice"):
|
||
|
stream_iter = self._stream(messages, run_manager=run_manager, **kwargs)
|
||
|
return generate_from_stream(stream_iter)
|
||
|
|
||
|
message_dicts = self._create_message_dicts(messages)
|
||
|
|
||
|
response = self.client.create_chat_completion(messages=message_dicts, **params)
|
||
|
|
||
|
return self._create_chat_result(response)
|
||
|
|
||
|
def _stream(
|
||
|
self,
|
||
|
messages: List[BaseMessage],
|
||
|
stop: Optional[List[str]] = None,
|
||
|
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
||
|
**kwargs: Any,
|
||
|
) -> Iterator[ChatGenerationChunk]:
|
||
|
params = {**self._get_parameters(stop), **kwargs}
|
||
|
message_dicts = self._create_message_dicts(messages)
|
||
|
|
||
|
result = self.client.create_chat_completion(
|
||
|
messages=message_dicts, stream=True, **params
|
||
|
)
|
||
|
|
||
|
default_chunk_class = AIMessageChunk
|
||
|
count = 0
|
||
|
for chunk in result:
|
||
|
count += 1
|
||
|
if not isinstance(chunk, dict):
|
||
|
chunk = chunk.model_dump()
|
||
|
if len(chunk["choices"]) == 0:
|
||
|
continue
|
||
|
choice = chunk["choices"][0]
|
||
|
if choice["delta"] is None:
|
||
|
continue
|
||
|
chunk = _convert_delta_to_message_chunk(
|
||
|
choice["delta"], default_chunk_class
|
||
|
)
|
||
|
generation_info = {}
|
||
|
if finish_reason := choice.get("finish_reason"):
|
||
|
generation_info["finish_reason"] = finish_reason
|
||
|
logprobs = choice.get("logprobs")
|
||
|
if logprobs:
|
||
|
generation_info["logprobs"] = logprobs
|
||
|
default_chunk_class = chunk.__class__
|
||
|
chunk = ChatGenerationChunk(
|
||
|
message=chunk, generation_info=generation_info or None
|
||
|
)
|
||
|
if run_manager:
|
||
|
run_manager.on_llm_new_token(chunk.text, chunk=chunk, logprobs=logprobs)
|
||
|
yield chunk
|
||
|
|
||
|
def bind_tools(
|
||
|
self,
|
||
|
tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]],
|
||
|
*,
|
||
|
tool_choice: Optional[Union[Dict[str, Dict], bool, str]] = None,
|
||
|
**kwargs: Any,
|
||
|
) -> Runnable[LanguageModelInput, BaseMessage]:
|
||
|
"""Bind tool-like objects to this chat model
|
||
|
|
||
|
tool_choice: does not currently support "any", "auto" choices like OpenAI
|
||
|
tool-calling API. should be a dict of the form to force this tool
|
||
|
{"type": "function", "function": {"name": <<tool_name>>}}.
|
||
|
"""
|
||
|
formatted_tools = [convert_to_openai_tool(tool) for tool in tools]
|
||
|
tool_names = [ft["function"]["name"] for ft in formatted_tools]
|
||
|
if tool_choice:
|
||
|
if isinstance(tool_choice, dict):
|
||
|
if not any(
|
||
|
tool_choice["function"]["name"] == name for name in tool_names
|
||
|
):
|
||
|
raise ValueError(
|
||
|
f"Tool choice {tool_choice=} was specified, but the only "
|
||
|
f"provided tools were {tool_names}."
|
||
|
)
|
||
|
elif isinstance(tool_choice, str):
|
||
|
chosen = [
|
||
|
f for f in formatted_tools if f["function"]["name"] == tool_choice
|
||
|
]
|
||
|
if not chosen:
|
||
|
raise ValueError(
|
||
|
f"Tool choice {tool_choice=} was specified, but the only "
|
||
|
f"provided tools were {tool_names}."
|
||
|
)
|
||
|
elif isinstance(tool_choice, bool):
|
||
|
if len(formatted_tools) > 1:
|
||
|
raise ValueError(
|
||
|
"tool_choice=True can only be specified when a single tool is "
|
||
|
f"passed in. Received {len(tools)} tools."
|
||
|
)
|
||
|
tool_choice = formatted_tools[0]
|
||
|
else:
|
||
|
raise ValueError(
|
||
|
"""Unrecognized tool_choice type. Expected dict having format like
|
||
|
this {"type": "function", "function": {"name": <<tool_name>>}}"""
|
||
|
f"Received: {tool_choice}"
|
||
|
)
|
||
|
|
||
|
kwargs["tool_choice"] = tool_choice
|
||
|
formatted_tools = [convert_to_openai_tool(tool) for tool in tools]
|
||
|
return super().bind(tools=formatted_tools, **kwargs)
|
||
|
|
||
|
def with_structured_output(
|
||
|
self,
|
||
|
schema: Optional[Union[Dict, Type[BaseModel]]] = None,
|
||
|
*,
|
||
|
include_raw: bool = False,
|
||
|
**kwargs: Any,
|
||
|
) -> Runnable[LanguageModelInput, Union[Dict, BaseModel]]:
|
||
|
"""Model wrapper that returns outputs formatted to match the given schema.
|
||
|
|
||
|
Args:
|
||
|
schema: The output schema as a dict or a Pydantic class. If a Pydantic class
|
||
|
then the model output will be an object of that class. If a dict then
|
||
|
the model output will be a dict. With a Pydantic class the returned
|
||
|
attributes will be validated, whereas with a dict they will not be. If
|
||
|
`method` is "function_calling" and `schema` is a dict, then the dict
|
||
|
must match the OpenAI function-calling spec or be a valid JSON schema
|
||
|
with top level 'title' and 'description' keys specified.
|
||
|
include_raw: If False then only the parsed structured output is returned. If
|
||
|
an error occurs during model output parsing it will be raised. If True
|
||
|
then both the raw model response (a BaseMessage) and the parsed model
|
||
|
response will be returned. If an error occurs during output parsing it
|
||
|
will be caught and returned as well. The final output is always a dict
|
||
|
with keys "raw", "parsed", and "parsing_error".
|
||
|
kwargs: Any other args to bind to model, ``self.bind(..., **kwargs)``.
|
||
|
|
||
|
Returns:
|
||
|
A Runnable that takes any ChatModel input and returns as output:
|
||
|
|
||
|
If include_raw is True then a dict with keys:
|
||
|
raw: BaseMessage
|
||
|
parsed: Optional[_DictOrPydantic]
|
||
|
parsing_error: Optional[BaseException]
|
||
|
|
||
|
If include_raw is False then just _DictOrPydantic is returned,
|
||
|
where _DictOrPydantic depends on the schema:
|
||
|
|
||
|
If schema is a Pydantic class then _DictOrPydantic is the Pydantic
|
||
|
class.
|
||
|
|
||
|
If schema is a dict then _DictOrPydantic is a dict.
|
||
|
|
||
|
Example: Pydantic schema (include_raw=False):
|
||
|
.. code-block:: python
|
||
|
|
||
|
from langchain_community.chat_models import ChatLlamaCpp
|
||
|
from langchain_core.pydantic_v1 import BaseModel
|
||
|
|
||
|
class AnswerWithJustification(BaseModel):
|
||
|
'''An answer to the user question along with justification for the answer.'''
|
||
|
answer: str
|
||
|
justification: str
|
||
|
|
||
|
llm = ChatLlamaCpp(
|
||
|
temperature=0.,
|
||
|
model_path="./SanctumAI-meta-llama-3-8b-instruct.Q8_0.gguf",
|
||
|
n_ctx=10000,
|
||
|
n_gpu_layers=4,
|
||
|
n_batch=200,
|
||
|
max_tokens=512,
|
||
|
n_threads=multiprocessing.cpu_count() - 1,
|
||
|
repeat_penalty=1.5,
|
||
|
top_p=0.5,
|
||
|
stop=["<|end_of_text|>", "<|eot_id|>"],
|
||
|
)
|
||
|
structured_llm = llm.with_structured_output(AnswerWithJustification)
|
||
|
|
||
|
structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers")
|
||
|
|
||
|
# -> AnswerWithJustification(
|
||
|
# answer='They weigh the same',
|
||
|
# justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'
|
||
|
# )
|
||
|
|
||
|
Example: Pydantic schema (include_raw=True):
|
||
|
.. code-block:: python
|
||
|
|
||
|
from langchain_community.chat_models import ChatLlamaCpp
|
||
|
from langchain_core.pydantic_v1 import BaseModel
|
||
|
|
||
|
class AnswerWithJustification(BaseModel):
|
||
|
'''An answer to the user question along with justification for the answer.'''
|
||
|
answer: str
|
||
|
justification: str
|
||
|
|
||
|
llm = ChatLlamaCpp(
|
||
|
temperature=0.,
|
||
|
model_path="./SanctumAI-meta-llama-3-8b-instruct.Q8_0.gguf",
|
||
|
n_ctx=10000,
|
||
|
n_gpu_layers=4,
|
||
|
n_batch=200,
|
||
|
max_tokens=512,
|
||
|
n_threads=multiprocessing.cpu_count() - 1,
|
||
|
repeat_penalty=1.5,
|
||
|
top_p=0.5,
|
||
|
stop=["<|end_of_text|>", "<|eot_id|>"],
|
||
|
)
|
||
|
structured_llm = llm.with_structured_output(AnswerWithJustification, include_raw=True)
|
||
|
|
||
|
structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers")
|
||
|
# -> {
|
||
|
# 'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Ao02pnFYXD6GN1yzc0uXPsvF', 'function': {'arguments': '{"answer":"They weigh the same.","justification":"Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ."}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}),
|
||
|
# 'parsed': AnswerWithJustification(answer='They weigh the same.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'),
|
||
|
# 'parsing_error': None
|
||
|
# }
|
||
|
|
||
|
Example: dict schema (include_raw=False):
|
||
|
.. code-block:: python
|
||
|
|
||
|
from langchain_community.chat_models import ChatLlamaCpp
|
||
|
from langchain_core.pydantic_v1 import BaseModel
|
||
|
from langchain_core.utils.function_calling import convert_to_openai_tool
|
||
|
|
||
|
class AnswerWithJustification(BaseModel):
|
||
|
'''An answer to the user question along with justification for the answer.'''
|
||
|
answer: str
|
||
|
justification: str
|
||
|
|
||
|
dict_schema = convert_to_openai_tool(AnswerWithJustification)
|
||
|
llm = ChatLlamaCpp(
|
||
|
temperature=0.,
|
||
|
model_path="./SanctumAI-meta-llama-3-8b-instruct.Q8_0.gguf",
|
||
|
n_ctx=10000,
|
||
|
n_gpu_layers=4,
|
||
|
n_batch=200,
|
||
|
max_tokens=512,
|
||
|
n_threads=multiprocessing.cpu_count() - 1,
|
||
|
repeat_penalty=1.5,
|
||
|
top_p=0.5,
|
||
|
stop=["<|end_of_text|>", "<|eot_id|>"],
|
||
|
)
|
||
|
structured_llm = llm.with_structured_output(dict_schema)
|
||
|
|
||
|
structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers")
|
||
|
# -> {
|
||
|
# 'answer': 'They weigh the same',
|
||
|
# 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
|
||
|
# }
|
||
|
|
||
|
""" # noqa: E501
|
||
|
|
||
|
if kwargs:
|
||
|
raise ValueError(f"Received unsupported arguments {kwargs}")
|
||
|
is_pydantic_schema = isinstance(schema, type) and issubclass(schema, BaseModel)
|
||
|
if schema is None:
|
||
|
raise ValueError(
|
||
|
"schema must be specified when method is 'function_calling'. "
|
||
|
"Received None."
|
||
|
)
|
||
|
llm = self.bind_tools([schema], tool_choice=True)
|
||
|
if is_pydantic_schema:
|
||
|
output_parser: OutputParserLike = PydanticToolsParser(
|
||
|
tools=[cast(Type, schema)], first_tool_only=True
|
||
|
)
|
||
|
else:
|
||
|
key_name = convert_to_openai_tool(schema)["function"]["name"]
|
||
|
output_parser = JsonOutputKeyToolsParser(
|
||
|
key_name=key_name, first_tool_only=True
|
||
|
)
|
||
|
|
||
|
if include_raw:
|
||
|
parser_assign = RunnablePassthrough.assign(
|
||
|
parsed=itemgetter("raw") | output_parser, parsing_error=lambda _: None
|
||
|
)
|
||
|
parser_none = RunnablePassthrough.assign(parsed=lambda _: None)
|
||
|
parser_with_fallback = parser_assign.with_fallbacks(
|
||
|
[parser_none], exception_key="parsing_error"
|
||
|
)
|
||
|
return RunnableMap(raw=llm) | parser_with_fallback
|
||
|
else:
|
||
|
return llm | output_parser
|
||
|
|
||
|
@property
|
||
|
def _identifying_params(self) -> Dict[str, Any]:
|
||
|
"""Return a dictionary of identifying parameters.
|
||
|
|
||
|
This information is used by the LangChain callback system, which
|
||
|
is used for tracing purposes make it possible to monitor LLMs.
|
||
|
"""
|
||
|
return {
|
||
|
# The model name allows users to specify custom token counting
|
||
|
# rules in LLM monitoring applications (e.g., in LangSmith users
|
||
|
# can provide per token pricing for their model and monitor
|
||
|
# costs for the given LLM.)
|
||
|
**{"model_path": self.model_path},
|
||
|
**self._default_params,
|
||
|
}
|
||
|
|
||
|
@property
|
||
|
def _llm_type(self) -> str:
|
||
|
"""Get the type of language model used by this chat model."""
|
||
|
return "llama-cpp-python"
|
||
|
|
||
|
@property
|
||
|
def _default_params(self) -> Dict[str, Any]:
|
||
|
"""Get the default parameters for calling create_chat_completion."""
|
||
|
params: Dict = {
|
||
|
"max_tokens": self.max_tokens,
|
||
|
"temperature": self.temperature,
|
||
|
"top_p": self.top_p,
|
||
|
"top_k": self.top_k,
|
||
|
"logprobs": self.logprobs,
|
||
|
"stop_sequences": self.stop, # key here is convention among LLM classes
|
||
|
"repeat_penalty": self.repeat_penalty,
|
||
|
}
|
||
|
if self.grammar:
|
||
|
params["grammar"] = self.grammar
|
||
|
return params
|
||
|
|
||
|
|
||
|
def _lc_tool_call_to_openai_tool_call(tool_call: ToolCall) -> dict:
|
||
|
return {
|
||
|
"type": "function",
|
||
|
"id": tool_call["id"],
|
||
|
"function": {
|
||
|
"name": tool_call["name"],
|
||
|
"arguments": json.dumps(tool_call["args"]),
|
||
|
},
|
||
|
}
|
||
|
|
||
|
|
||
|
def _lc_invalid_tool_call_to_openai_tool_call(
|
||
|
invalid_tool_call: InvalidToolCall,
|
||
|
) -> dict:
|
||
|
return {
|
||
|
"type": "function",
|
||
|
"id": invalid_tool_call["id"],
|
||
|
"function": {
|
||
|
"name": invalid_tool_call["name"],
|
||
|
"arguments": invalid_tool_call["args"],
|
||
|
},
|
||
|
}
|
||
|
|
||
|
|
||
|
def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:
|
||
|
"""Convert a dictionary to a LangChain message.
|
||
|
|
||
|
Args:
|
||
|
_dict: The dictionary.
|
||
|
|
||
|
Returns:
|
||
|
The LangChain message.
|
||
|
"""
|
||
|
role = _dict.get("role")
|
||
|
name = _dict.get("name")
|
||
|
id_ = _dict.get("id")
|
||
|
if role == "user":
|
||
|
return HumanMessage(content=_dict.get("content", ""), id=id_, name=name)
|
||
|
elif role == "assistant":
|
||
|
# Fix for azure
|
||
|
# Also OpenAI returns None for tool invocations
|
||
|
content = _dict.get("content", "") or ""
|
||
|
additional_kwargs: Dict = {}
|
||
|
if function_call := _dict.get("function_call"):
|
||
|
additional_kwargs["function_call"] = dict(function_call)
|
||
|
tool_calls = []
|
||
|
invalid_tool_calls = []
|
||
|
if raw_tool_calls := _dict.get("tool_calls"):
|
||
|
additional_kwargs["tool_calls"] = raw_tool_calls
|
||
|
for raw_tool_call in raw_tool_calls:
|
||
|
try:
|
||
|
tc = parse_tool_call(raw_tool_call, return_id=True)
|
||
|
except Exception as e:
|
||
|
invalid_tc = make_invalid_tool_call(raw_tool_call, str(e))
|
||
|
invalid_tool_calls.append(invalid_tc)
|
||
|
else:
|
||
|
if not tc:
|
||
|
continue
|
||
|
else:
|
||
|
tool_calls.append(tc)
|
||
|
return AIMessage(
|
||
|
content=content,
|
||
|
additional_kwargs=additional_kwargs,
|
||
|
name=name,
|
||
|
id=id_,
|
||
|
tool_calls=tool_calls, # type: ignore[arg-type]
|
||
|
invalid_tool_calls=invalid_tool_calls,
|
||
|
)
|
||
|
elif role == "system":
|
||
|
return SystemMessage(content=_dict.get("content", ""), name=name, id=id_)
|
||
|
elif role == "function":
|
||
|
return FunctionMessage(
|
||
|
content=_dict.get("content", ""), name=cast(str, _dict.get("name")), id=id_
|
||
|
)
|
||
|
elif role == "tool":
|
||
|
additional_kwargs = {}
|
||
|
if "name" in _dict:
|
||
|
additional_kwargs["name"] = _dict["name"]
|
||
|
return ToolMessage(
|
||
|
content=_dict.get("content", ""),
|
||
|
tool_call_id=cast(str, _dict.get("tool_call_id")),
|
||
|
additional_kwargs=additional_kwargs,
|
||
|
name=name,
|
||
|
id=id_,
|
||
|
)
|
||
|
else:
|
||
|
return ChatMessage(
|
||
|
content=_dict.get("content", ""), role=cast(str, role), id=id_
|
||
|
)
|
||
|
|
||
|
|
||
|
def _format_message_content(content: Any) -> Any:
|
||
|
"""Format message content."""
|
||
|
if content and isinstance(content, list):
|
||
|
# Remove unexpected block types
|
||
|
formatted_content = []
|
||
|
for block in content:
|
||
|
if (
|
||
|
isinstance(block, dict)
|
||
|
and "type" in block
|
||
|
and block["type"] == "tool_use"
|
||
|
):
|
||
|
continue
|
||
|
else:
|
||
|
formatted_content.append(block)
|
||
|
else:
|
||
|
formatted_content = content
|
||
|
|
||
|
return formatted_content
|
||
|
|
||
|
|
||
|
def _convert_message_to_dict(message: BaseMessage) -> dict:
|
||
|
"""Convert a LangChain message to a dictionary.
|
||
|
|
||
|
Args:
|
||
|
message: The LangChain message.
|
||
|
|
||
|
Returns:
|
||
|
The dictionary.
|
||
|
"""
|
||
|
message_dict: Dict[str, Any] = {
|
||
|
"content": _format_message_content(message.content),
|
||
|
}
|
||
|
if (name := message.name or message.additional_kwargs.get("name")) is not None:
|
||
|
message_dict["name"] = name
|
||
|
|
||
|
# populate role and additional message data
|
||
|
if isinstance(message, ChatMessage):
|
||
|
message_dict["role"] = message.role
|
||
|
elif isinstance(message, HumanMessage):
|
||
|
message_dict["role"] = "user"
|
||
|
elif isinstance(message, AIMessage):
|
||
|
message_dict["role"] = "assistant"
|
||
|
if "function_call" in message.additional_kwargs:
|
||
|
message_dict["function_call"] = message.additional_kwargs["function_call"]
|
||
|
if message.tool_calls or message.invalid_tool_calls:
|
||
|
message_dict["tool_calls"] = [
|
||
|
_lc_tool_call_to_openai_tool_call(tc) for tc in message.tool_calls
|
||
|
] + [
|
||
|
_lc_invalid_tool_call_to_openai_tool_call(tc)
|
||
|
for tc in message.invalid_tool_calls
|
||
|
]
|
||
|
elif "tool_calls" in message.additional_kwargs:
|
||
|
message_dict["tool_calls"] = message.additional_kwargs["tool_calls"]
|
||
|
tool_call_supported_props = {"id", "type", "function"}
|
||
|
message_dict["tool_calls"] = [
|
||
|
{k: v for k, v in tool_call.items() if k in tool_call_supported_props}
|
||
|
for tool_call in message_dict["tool_calls"]
|
||
|
]
|
||
|
else:
|
||
|
pass
|
||
|
# If tool calls present, content null value should be None not empty string.
|
||
|
if "function_call" in message_dict or "tool_calls" in message_dict:
|
||
|
message_dict["content"] = message_dict["content"] or None
|
||
|
elif isinstance(message, SystemMessage):
|
||
|
message_dict["role"] = "system"
|
||
|
elif isinstance(message, FunctionMessage):
|
||
|
message_dict["role"] = "function"
|
||
|
elif isinstance(message, ToolMessage):
|
||
|
message_dict["role"] = "tool"
|
||
|
message_dict["tool_call_id"] = message.tool_call_id
|
||
|
|
||
|
supported_props = {"content", "role", "tool_call_id"}
|
||
|
message_dict = {k: v for k, v in message_dict.items() if k in supported_props}
|
||
|
else:
|
||
|
raise TypeError(f"Got unknown type {message}")
|
||
|
return message_dict
|
||
|
|
||
|
|
||
|
def _convert_delta_to_message_chunk(
|
||
|
_dict: Mapping[str, Any], default_class: Type[BaseMessageChunk]
|
||
|
) -> BaseMessageChunk:
|
||
|
id_ = _dict.get("id")
|
||
|
role = cast(str, _dict.get("role"))
|
||
|
content = cast(str, _dict.get("content") or "")
|
||
|
additional_kwargs: Dict = {}
|
||
|
if _dict.get("function_call"):
|
||
|
function_call = dict(_dict["function_call"])
|
||
|
if "name" in function_call and function_call["name"] is None:
|
||
|
function_call["name"] = ""
|
||
|
additional_kwargs["function_call"] = function_call
|
||
|
tool_call_chunks = []
|
||
|
if raw_tool_calls := _dict.get("tool_calls"):
|
||
|
additional_kwargs["tool_calls"] = raw_tool_calls
|
||
|
for rtc in raw_tool_calls:
|
||
|
try:
|
||
|
tool_call = ToolCallChunk(
|
||
|
name=rtc["function"].get("name"),
|
||
|
args=rtc["function"].get("arguments"),
|
||
|
id=rtc.get("id"),
|
||
|
index=rtc["index"],
|
||
|
)
|
||
|
tool_call_chunks.append(tool_call)
|
||
|
except KeyError:
|
||
|
pass
|
||
|
|
||
|
if role == "user" or default_class == HumanMessageChunk:
|
||
|
return HumanMessageChunk(content=content, id=id_)
|
||
|
elif role == "assistant" or default_class == AIMessageChunk:
|
||
|
return AIMessageChunk(
|
||
|
content=content,
|
||
|
additional_kwargs=additional_kwargs,
|
||
|
id=id_,
|
||
|
tool_call_chunks=tool_call_chunks,
|
||
|
)
|
||
|
elif role == "system" or default_class == SystemMessageChunk:
|
||
|
return SystemMessageChunk(content=content, id=id_)
|
||
|
elif role == "function" or default_class == FunctionMessageChunk:
|
||
|
return FunctionMessageChunk(content=content, name=_dict["name"], id=id_)
|
||
|
elif role == "tool" or default_class == ToolMessageChunk:
|
||
|
return ToolMessageChunk(
|
||
|
content=content, tool_call_id=_dict["tool_call_id"], id=id_
|
||
|
)
|
||
|
elif role or default_class == ChatMessageChunk:
|
||
|
return ChatMessageChunk(content=content, role=role, id=id_)
|
||
|
else:
|
||
|
return default_class(content=content, id=id_) # type: ignore
|