LLM Graph transformer dealing with empty strings (#23368)

Pydantic allows empty strings:

```
from langchain.pydantic_v1 import Field, BaseModel

class Property(BaseModel):
  """A single property consisting of key and value"""
  key: str = Field(..., description="key")
  value: str = Field(..., description="value")

x = Property(key="", value="")
```

Which can produce errors downstream. We simply ignore those records
This commit is contained in:
Tomaz Bratanic 2024-06-25 10:01:53 -07:00 committed by GitHub
parent d3520a784f
commit 22fa32e164
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -525,7 +525,7 @@ def _format_nodes(nodes: List[Node]) -> List[Node]:
return [
Node(
id=el.id.title() if isinstance(el.id, str) else el.id,
type=el.type.capitalize(),
type=el.type.capitalize() if el.type else None, # handle empty strings
properties=el.properties,
)
for el in nodes
@ -576,13 +576,17 @@ def _convert_to_graph_document(
else: # If there are no validation errors use parsed pydantic object
parsed_schema: _Graph = raw_schema["parsed"]
nodes = (
[map_to_base_node(node) for node in parsed_schema.nodes]
[map_to_base_node(node) for node in parsed_schema.nodes if node.id]
if parsed_schema.nodes
else []
)
relationships = (
[map_to_base_relationship(rel) for rel in parsed_schema.relationships]
[
map_to_base_relationship(rel)
for rel in parsed_schema.relationships
if rel.type and rel.source_node_id and rel.target_node_id
]
if parsed_schema.relationships
else []
)