core[patch]: fix xml output parser transform (#19530)

Previous PR passed _parser attribute which apparently is not meant to be
used by user code and causes non deterministic failures on CI when
testing the transform and a transform methods. Reverting this change
temporarily.
pull/19532/head^2
Eugene Yurtsev 3 months ago committed by GitHub
parent e6952b04d5
commit 56f4c5459b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -1,7 +1,6 @@
import re
from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Union
from xml.etree import ElementTree as ET
from xml.etree.ElementTree import TreeBuilder
from langchain_core.exceptions import OutputParserException
from langchain_core.messages import BaseMessage
@ -61,13 +60,7 @@ class XMLOutputParser(BaseTransformOutputParser):
def _transform(
self, input: Iterator[Union[str, BaseMessage]]
) -> Iterator[AddableDict]:
# Imports are temporarily placed here to avoid issue with caching on CI
# likely if you're reading this you can move them to the top of the file
from defusedxml.ElementTree import DefusedXMLParser # type: ignore[import]
parser = ET.XMLPullParser(
["start", "end"], _parser=DefusedXMLParser(target=TreeBuilder())
)
parser = ET.XMLPullParser(["start", "end"])
xml_start_re = re.compile(r"<[a-zA-Z:_]")
xml_started = False
current_path: List[str] = []
@ -117,12 +110,7 @@ class XMLOutputParser(BaseTransformOutputParser):
async def _atransform(
self, input: AsyncIterator[Union[str, BaseMessage]]
) -> AsyncIterator[AddableDict]:
# Imports are temporarily placed here to avoid issue with caching on CI
# likely if you're reading this you can move them to the top of the file
from defusedxml.ElementTree import DefusedXMLParser # type: ignore[import]
_parser = DefusedXMLParser(target=TreeBuilder())
parser = ET.XMLPullParser(["start", "end"], _parser=_parser)
parser = ET.XMLPullParser(["start", "end"])
xml_start_re = re.compile(r"<[a-zA-Z:_]")
xml_started = False
current_path: List[str] = []

@ -1,6 +1,5 @@
"""Test XMLOutputParser"""
from typing import AsyncIterator
from xml.etree.ElementTree import ParseError
import pytest
@ -100,17 +99,3 @@ async def tests_billion_laughs_attack() -> None:
with pytest.raises(OutputParserException):
await parser.aparse(MALICIOUS_XML)
with pytest.raises(ParseError):
# Right now raises undefined entity error
assert list(parser.transform(iter(MALICIOUS_XML))) == [
{"foo": [{"bar": [{"baz": None}]}]}
]
async def _as_iter(string: str) -> AsyncIterator[str]:
for c in string:
yield c
with pytest.raises(ParseError):
chunks = [chunk async for chunk in parser.atransform(_as_iter(MALICIOUS_XML))]
assert chunks == [{"foo": [{"bar": [{"baz": None}]}]}]

Loading…
Cancel
Save