forked from Archives/langchain
b588446bf9
- added unittest for schema.py covering utility functions and token counting. - fixed a nit. based on huggingface doc, the tokenizer model is gpt-2. [link](https://huggingface.co/transformers/v4.8.2/_modules/transformers/models/gpt2/tokenization_gpt2_fast.html) - make lint && make format, passed on local - screenshot of new test running result <img width="1283" alt="Screenshot 2023-04-27 at 9 51 55 PM" src="https://user-images.githubusercontent.com/62768671/235057441-c0ac3406-9541-453f-ba14-3ebb08656114.png">
78 lines
2.2 KiB
Python
78 lines
2.2 KiB
Python
"""Test formatting functionality."""
|
|
|
|
import unittest
|
|
|
|
from langchain.schema import (
|
|
AIMessage,
|
|
HumanMessage,
|
|
SystemMessage,
|
|
get_buffer_string,
|
|
messages_from_dict,
|
|
messages_to_dict,
|
|
)
|
|
|
|
|
|
class TestGetBufferString(unittest.TestCase):
|
|
human_msg: HumanMessage = HumanMessage(content="human")
|
|
ai_msg: AIMessage = AIMessage(content="ai")
|
|
sys_msg: SystemMessage = SystemMessage(content="sys")
|
|
|
|
def test_empty_input(self) -> None:
|
|
self.assertEqual(get_buffer_string([]), "")
|
|
|
|
def test_valid_single_message(self) -> None:
|
|
expected_output = f"Human: {self.human_msg.content}"
|
|
self.assertEqual(
|
|
get_buffer_string([self.human_msg]),
|
|
expected_output,
|
|
)
|
|
|
|
def test_custom_human_prefix(self) -> None:
|
|
prefix = "H"
|
|
expected_output = f"{prefix}: {self.human_msg.content}"
|
|
self.assertEqual(
|
|
get_buffer_string([self.human_msg], human_prefix="H"),
|
|
expected_output,
|
|
)
|
|
|
|
def test_custom_ai_prefix(self) -> None:
|
|
prefix = "A"
|
|
expected_output = f"{prefix}: {self.ai_msg.content}"
|
|
self.assertEqual(
|
|
get_buffer_string([self.ai_msg], ai_prefix="A"),
|
|
expected_output,
|
|
)
|
|
|
|
def test_multiple_msg(self) -> None:
|
|
msgs = [self.human_msg, self.ai_msg, self.sys_msg]
|
|
expected_output = "\n".join(
|
|
[
|
|
f"Human: {self.human_msg.content}",
|
|
f"AI: {self.ai_msg.content}",
|
|
f"System: {self.sys_msg.content}",
|
|
]
|
|
)
|
|
self.assertEqual(
|
|
get_buffer_string(msgs),
|
|
expected_output,
|
|
)
|
|
|
|
|
|
class TestMessageDictConversion(unittest.TestCase):
|
|
human_msg: HumanMessage = HumanMessage(
|
|
content="human", additional_kwargs={"key": "value"}
|
|
)
|
|
ai_msg: AIMessage = AIMessage(content="ai")
|
|
sys_msg: SystemMessage = SystemMessage(content="sys")
|
|
|
|
def test_multiple_msg(self) -> None:
|
|
msgs = [
|
|
self.human_msg,
|
|
self.ai_msg,
|
|
self.sys_msg,
|
|
]
|
|
self.assertEqual(
|
|
messages_from_dict(messages_to_dict(msgs)),
|
|
msgs,
|
|
)
|