core[minor]: Prevent PydanticOutputParser from encoding schema as ASCII (#25386)

This allows users to provide parameter descriptions in the pydantic
models in other languages.

Continuing this PR: https://github.com/langchain-ai/langchain/pull/24809
This commit is contained in:
Eugene Yurtsev 2024-08-14 09:54:31 -04:00 committed by GitHub
parent 27690506d0
commit dc51cc5690
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 22 additions and 1 deletions

View File

@ -92,7 +92,7 @@ class PydanticOutputParser(JsonOutputParser, Generic[TBaseModel]):
if "type" in reduced_schema:
del reduced_schema["type"]
# Ensure json in context is well-formed with double quotes.
schema_str = json.dumps(reduced_schema)
schema_str = json.dumps(reduced_schema, ensure_ascii=False)
return _PYDANTIC_FORMAT_INSTRUCTIONS.format(schema=schema_str)

View File

@ -100,3 +100,24 @@ def test_pydantic_output_parser_type_inference() -> None:
"title": "SampleModel",
"type": "object",
}
def test_format_instructions_preserves_language() -> None:
"""Test format instructions does not attempt to encode into ascii."""
from langchain_core.pydantic_v1 import BaseModel, Field
description = (
"你好, こんにちは, नमस्ते, Bonjour, Hola, "
"Olá, 안녕하세요, Jambo, Merhaba, Γειά σου"
)
class Foo(BaseModel):
hello: str = Field(
description=(
"你好, こんにちは, नमस्ते, Bonjour, Hola, "
"Olá, 안녕하세요, Jambo, Merhaba, Γειά σου"
)
)
parser = PydanticOutputParser(pydantic_object=Foo) # type: ignore
assert description in parser.get_format_instructions()