mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
experimental: LLMGraphTransformer - added relationship properties. (#21856)
- **Description:** The generated relationships in the graph had no properties, but the Relationship class was properly defined with properties. This made it very difficult to transform conditional sentences into a graph. Adding properties to relationships can solve this issue elegantly. The changes expand on the existing LLMGraphTransformer implementation but add the possibility to define allowed relationship properties like this: LLMGraphTransformer(llm=llm, relationship_properties=["Condition", "Time"],) - **Issue:** no issue found - **Dependencies:** n/a - **Twitter handle:** @IstvanSpace -Quick Test ================================================================= from dotenv import load_dotenv import os from langchain_community.graphs import Neo4jGraph from langchain_experimental.graph_transformers import LLMGraphTransformer from langchain_openai import ChatOpenAI from langchain_core.prompts import ChatPromptTemplate from langchain_core.documents import Document load_dotenv() os.environ["NEO4J_URI"] = os.getenv("NEO4J_URI") os.environ["NEO4J_USERNAME"] = os.getenv("NEO4J_USERNAME") os.environ["NEO4J_PASSWORD"] = os.getenv("NEO4J_PASSWORD") graph = Neo4jGraph() llm = ChatOpenAI(temperature=0, model_name="gpt-4o") llm_transformer = LLMGraphTransformer(llm=llm) #text = "Harry potter likes pies, but only if it rains outside" text = "Jack has a dog named Max. Jack only walks Max if it is sunny outside." documents = [Document(page_content=text)] llm_transformer_props = LLMGraphTransformer( llm=llm, relationship_properties=["Condition"], ) graph_documents_props = llm_transformer_props.convert_to_graph_documents(documents) print(f"Nodes:{graph_documents_props[0].nodes}") print(f"Relationships:{graph_documents_props[0].relationships}") graph.add_graph_documents(graph_documents_props) --------- Co-authored-by: Istvan Lorincz <istvan.lorincz@pm.me> Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
This commit is contained in:
parent
694ae87748
commit
513e491ce9
@ -72,8 +72,8 @@ system_prompt = (
|
||||
"You are a top-tier algorithm designed for extracting information in structured "
|
||||
"formats to build a knowledge graph.\n"
|
||||
"Try to capture as much information from the text as possible without "
|
||||
"sacrifing accuracy. Do not add any information that is not explicitly "
|
||||
"mentioned in the text\n"
|
||||
"sacrificing accuracy. Do not add any information that is not explicitly "
|
||||
"mentioned in the text.\n"
|
||||
"- **Nodes** represent entities and concepts.\n"
|
||||
"- The aim is to achieve simplicity and clarity in the knowledge graph, making it\n"
|
||||
"accessible for a vast audience.\n"
|
||||
@ -82,8 +82,8 @@ system_prompt = (
|
||||
"Ensure you use basic or elementary types for node labels.\n"
|
||||
"- For example, when you identify an entity representing a person, "
|
||||
"always label it as **'person'**. Avoid using more specific terms "
|
||||
"like 'mathematician' or 'scientist'"
|
||||
" - **Node IDs**: Never utilize integers as node IDs. Node IDs should be "
|
||||
"like 'mathematician' or 'scientist'."
|
||||
"- **Node IDs**: Never utilize integers as node IDs. Node IDs should be "
|
||||
"names or human-readable identifiers found in the text.\n"
|
||||
"- **Relationships** represent connections between entities or concepts.\n"
|
||||
"Ensure consistency and generality in relationship types when constructing "
|
||||
@ -138,8 +138,8 @@ def _get_additional_info(input_type: str) -> str:
|
||||
elif input_type == "relationship":
|
||||
return (
|
||||
"Instead of using specific and momentary types such as "
|
||||
"'BECAME_PROFESSOR', use more general and timeless relationship types like "
|
||||
"'PROFESSOR'. However, do not sacrifice any accuracy for generality"
|
||||
"'BECAME_PROFESSOR', use more general and timeless relationship types "
|
||||
"like 'PROFESSOR'. However, do not sacrifice any accuracy for generality"
|
||||
)
|
||||
elif input_type == "property":
|
||||
return ""
|
||||
@ -280,10 +280,32 @@ def create_simple_model(
|
||||
rel_types: Optional[List[str]] = None,
|
||||
node_properties: Union[bool, List[str]] = False,
|
||||
llm_type: Optional[str] = None,
|
||||
relationship_properties: Union[bool, List[str]] = False,
|
||||
) -> Type[_Graph]:
|
||||
"""
|
||||
Simple model allows to limit node and/or relationship types.
|
||||
Doesn't have any node or relationship properties.
|
||||
Create a simple graph model with optional constraints on node
|
||||
and relationship types.
|
||||
|
||||
Args:
|
||||
node_labels (Optional[List[str]]): Specifies the allowed node types.
|
||||
Defaults to None, allowing all node types.
|
||||
rel_types (Optional[List[str]]): Specifies the allowed relationship types.
|
||||
Defaults to None, allowing all relationship types.
|
||||
node_properties (Union[bool, List[str]]): Specifies if node properties should
|
||||
be included. If a list is provided, only properties with keys in the list
|
||||
will be included. If True, all properties are included. Defaults to False.
|
||||
relationship_properties (Union[bool, List[str]]): Specifies if relationship
|
||||
properties should be included. If a list is provided, only properties with
|
||||
keys in the list will be included. If True, all properties are included.
|
||||
Defaults to False.
|
||||
llm_type (Optional[str]): The type of the language model. Defaults to None.
|
||||
Only openai supports enum param: openai-chat.
|
||||
|
||||
Returns:
|
||||
Type[_Graph]: A graph model with the specified constraints.
|
||||
|
||||
Raises:
|
||||
ValueError: If 'id' is included in the node or relationship properties list.
|
||||
"""
|
||||
|
||||
node_fields: Dict[str, Tuple[Any, Any]] = {
|
||||
@ -325,39 +347,80 @@ def create_simple_model(
|
||||
)
|
||||
SimpleNode = create_model("SimpleNode", **node_fields) # type: ignore
|
||||
|
||||
class SimpleRelationship(BaseModel):
|
||||
"""Represents a directed relationship between two nodes in a graph."""
|
||||
relationship_fields: Dict[str, Tuple[Any, Any]] = {
|
||||
"source_node_id": (
|
||||
str,
|
||||
Field(
|
||||
...,
|
||||
description="Name or human-readable unique identifier of source node",
|
||||
),
|
||||
),
|
||||
"source_node_type": (
|
||||
str,
|
||||
optional_enum_field(
|
||||
node_labels,
|
||||
description="The type or label of the source node.",
|
||||
input_type="node",
|
||||
),
|
||||
),
|
||||
"target_node_id": (
|
||||
str,
|
||||
Field(
|
||||
...,
|
||||
description="Name or human-readable unique identifier of target node",
|
||||
),
|
||||
),
|
||||
"target_node_type": (
|
||||
str,
|
||||
optional_enum_field(
|
||||
node_labels,
|
||||
description="The type or label of the target node.",
|
||||
input_type="node",
|
||||
),
|
||||
),
|
||||
"type": (
|
||||
str,
|
||||
optional_enum_field(
|
||||
rel_types,
|
||||
description="The type of the relationship.",
|
||||
input_type="relationship",
|
||||
),
|
||||
),
|
||||
}
|
||||
if relationship_properties:
|
||||
if (
|
||||
isinstance(relationship_properties, list)
|
||||
and "id" in relationship_properties
|
||||
):
|
||||
raise ValueError(
|
||||
"The relationship property 'id' is reserved and cannot be used."
|
||||
)
|
||||
# Map True to empty array
|
||||
relationship_properties_mapped: List[str] = (
|
||||
[] if relationship_properties is True else relationship_properties
|
||||
)
|
||||
|
||||
source_node_id: str = Field(
|
||||
description="Name or human-readable unique identifier of source node"
|
||||
)
|
||||
source_node_type: str = optional_enum_field(
|
||||
node_labels,
|
||||
description="The type or label of the source node.",
|
||||
input_type="node",
|
||||
llm_type=llm_type,
|
||||
)
|
||||
target_node_id: str = Field(
|
||||
description="Name or human-readable unique identifier of target node"
|
||||
)
|
||||
target_node_type: str = optional_enum_field(
|
||||
node_labels,
|
||||
description="The type or label of the target node.",
|
||||
input_type="node",
|
||||
llm_type=llm_type,
|
||||
)
|
||||
type: str = optional_enum_field(
|
||||
rel_types,
|
||||
description="The type of the relationship.",
|
||||
input_type="relationship",
|
||||
llm_type=llm_type,
|
||||
class RelationshipProperty(BaseModel):
|
||||
"""A single property consisting of key and value"""
|
||||
|
||||
key: str = optional_enum_field(
|
||||
relationship_properties_mapped,
|
||||
description="Property key.",
|
||||
input_type="property",
|
||||
)
|
||||
value: str = Field(..., description="value")
|
||||
|
||||
relationship_fields["properties"] = (
|
||||
Optional[List[RelationshipProperty]],
|
||||
Field(None, description="List of relationship properties"),
|
||||
)
|
||||
SimpleRelationship = create_model("SimpleRelationship", **relationship_fields) # type: ignore
|
||||
|
||||
class DynamicGraph(_Graph):
|
||||
"""Represents a graph document consisting of nodes and relationships."""
|
||||
|
||||
nodes: Optional[List[SimpleNode]] = Field(description="List of nodes") # type: ignore
|
||||
relationships: Optional[List[SimpleRelationship]] = Field(
|
||||
relationships: Optional[List[SimpleRelationship]] = Field( # type: ignore
|
||||
description="List of relationships"
|
||||
)
|
||||
|
||||
@ -377,7 +440,13 @@ def map_to_base_relationship(rel: Any) -> Relationship:
|
||||
"""Map the SimpleRelationship to the base Relationship."""
|
||||
source = Node(id=rel.source_node_id, type=rel.source_node_type)
|
||||
target = Node(id=rel.target_node_id, type=rel.target_node_type)
|
||||
return Relationship(source=source, target=target, type=rel.type)
|
||||
properties = {}
|
||||
if hasattr(rel, "properties") and rel.properties:
|
||||
for p in rel.properties:
|
||||
properties[format_property_key(p.key)] = p.value
|
||||
return Relationship(
|
||||
source=source, target=target, type=rel.type, properties=properties
|
||||
)
|
||||
|
||||
|
||||
def _parse_and_clean_json(
|
||||
@ -387,10 +456,15 @@ def _parse_and_clean_json(
|
||||
for node in argument_json["nodes"]:
|
||||
if not node.get("id"): # Id is mandatory, skip this node
|
||||
continue
|
||||
node_properties = {}
|
||||
if "properties" in node and node["properties"]:
|
||||
for p in node["properties"]:
|
||||
node_properties[format_property_key(p["key"])] = p["value"]
|
||||
nodes.append(
|
||||
Node(
|
||||
id=node["id"],
|
||||
type=node.get("type"),
|
||||
properties=node_properties,
|
||||
)
|
||||
)
|
||||
relationships = []
|
||||
@ -423,6 +497,11 @@ def _parse_and_clean_json(
|
||||
except IndexError:
|
||||
rel["target_node_type"] = None
|
||||
|
||||
rel_properties = {}
|
||||
if "properties" in rel and rel["properties"]:
|
||||
for p in rel["properties"]:
|
||||
rel_properties[format_property_key(p["key"])] = p["value"]
|
||||
|
||||
source_node = Node(
|
||||
id=rel["source_node_id"],
|
||||
type=rel["source_node_type"],
|
||||
@ -436,6 +515,7 @@ def _parse_and_clean_json(
|
||||
source=source_node,
|
||||
target=target_node,
|
||||
type=rel["type"],
|
||||
properties=rel_properties,
|
||||
)
|
||||
)
|
||||
return nodes, relationships
|
||||
@ -458,6 +538,7 @@ def _format_relationships(rels: List[Relationship]) -> List[Relationship]:
|
||||
source=_format_nodes([el.source])[0],
|
||||
target=_format_nodes([el.target])[0],
|
||||
type=el.type.replace(" ", "_").upper(),
|
||||
properties=el.properties,
|
||||
)
|
||||
for el in rels
|
||||
]
|
||||
@ -513,8 +594,8 @@ class LLMGraphTransformer:
|
||||
"""Transform documents into graph-based documents using a LLM.
|
||||
|
||||
It allows specifying constraints on the types of nodes and relationships to include
|
||||
in the output graph. The class doesn't support neither extract and node or
|
||||
relationship properties
|
||||
in the output graph. The class supports extracting properties for both nodes and
|
||||
relationships.
|
||||
|
||||
Args:
|
||||
llm (BaseLanguageModel): An instance of a language model supporting structured
|
||||
@ -553,6 +634,7 @@ class LLMGraphTransformer:
|
||||
prompt: Optional[ChatPromptTemplate] = None,
|
||||
strict_mode: bool = True,
|
||||
node_properties: Union[bool, List[str]] = False,
|
||||
relationship_properties: Union[bool, List[str]] = False,
|
||||
) -> None:
|
||||
self.allowed_nodes = allowed_nodes
|
||||
self.allowed_relationships = allowed_relationships
|
||||
@ -564,14 +646,14 @@ class LLMGraphTransformer:
|
||||
except NotImplementedError:
|
||||
self._function_call = False
|
||||
if not self._function_call:
|
||||
if node_properties:
|
||||
if node_properties or relationship_properties:
|
||||
raise ValueError(
|
||||
"The 'node_properties' parameter cannot be used "
|
||||
"in combination with a LLM that doesn't support "
|
||||
"The 'node_properties' and 'relationship_properties' parameters "
|
||||
"cannot be used in combination with a LLM that doesn't support "
|
||||
"native function calling."
|
||||
)
|
||||
try:
|
||||
import json_repair
|
||||
import json_repair # type: ignore
|
||||
|
||||
self.json_repair = json_repair
|
||||
except ImportError:
|
||||
@ -590,7 +672,11 @@ class LLMGraphTransformer:
|
||||
except AttributeError:
|
||||
llm_type = None
|
||||
schema = create_simple_model(
|
||||
allowed_nodes, allowed_relationships, node_properties, llm_type
|
||||
allowed_nodes,
|
||||
allowed_relationships,
|
||||
node_properties,
|
||||
llm_type,
|
||||
relationship_properties,
|
||||
)
|
||||
structured_llm = llm.with_structured_output(schema, include_raw=True)
|
||||
prompt = prompt or default_prompt
|
||||
|
Loading…
Reference in New Issue
Block a user