From 513e491ce96efccf2fd5d200864fc279d8254bf5 Mon Sep 17 00:00:00 2001 From: Istvan/Nebulinq <148502186+istvan-nebulinq@users.noreply.github.com> Date: Fri, 14 Jun 2024 11:41:04 -0700 Subject: [PATCH] experimental: LLMGraphTransformer - added relationship properties. (#21856) - **Description:** The generated relationships in the graph had no properties, but the Relationship class was properly defined with properties. This made it very difficult to transform conditional sentences into a graph. Adding properties to relationships can solve this issue elegantly. The changes expand on the existing LLMGraphTransformer implementation but add the possibility to define allowed relationship properties like this: LLMGraphTransformer(llm=llm, relationship_properties=["Condition", "Time"],) - **Issue:** no issue found - **Dependencies:** n/a - **Twitter handle:** @IstvanSpace -Quick Test ================================================================= from dotenv import load_dotenv import os from langchain_community.graphs import Neo4jGraph from langchain_experimental.graph_transformers import LLMGraphTransformer from langchain_openai import ChatOpenAI from langchain_core.prompts import ChatPromptTemplate from langchain_core.documents import Document load_dotenv() os.environ["NEO4J_URI"] = os.getenv("NEO4J_URI") os.environ["NEO4J_USERNAME"] = os.getenv("NEO4J_USERNAME") os.environ["NEO4J_PASSWORD"] = os.getenv("NEO4J_PASSWORD") graph = Neo4jGraph() llm = ChatOpenAI(temperature=0, model_name="gpt-4o") llm_transformer = LLMGraphTransformer(llm=llm) #text = "Harry potter likes pies, but only if it rains outside" text = "Jack has a dog named Max. Jack only walks Max if it is sunny outside." documents = [Document(page_content=text)] llm_transformer_props = LLMGraphTransformer( llm=llm, relationship_properties=["Condition"], ) graph_documents_props = llm_transformer_props.convert_to_graph_documents(documents) print(f"Nodes:{graph_documents_props[0].nodes}") print(f"Relationships:{graph_documents_props[0].relationships}") graph.add_graph_documents(graph_documents_props) --------- Co-authored-by: Istvan Lorincz Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> --- .../graph_transformers/llm.py | 170 +++++++++++++----- 1 file changed, 128 insertions(+), 42 deletions(-) diff --git a/libs/experimental/langchain_experimental/graph_transformers/llm.py b/libs/experimental/langchain_experimental/graph_transformers/llm.py index e29cfb01b0..10c65ef7b9 100644 --- a/libs/experimental/langchain_experimental/graph_transformers/llm.py +++ b/libs/experimental/langchain_experimental/graph_transformers/llm.py @@ -72,8 +72,8 @@ system_prompt = ( "You are a top-tier algorithm designed for extracting information in structured " "formats to build a knowledge graph.\n" "Try to capture as much information from the text as possible without " - "sacrifing accuracy. Do not add any information that is not explicitly " - "mentioned in the text\n" + "sacrificing accuracy. Do not add any information that is not explicitly " + "mentioned in the text.\n" "- **Nodes** represent entities and concepts.\n" "- The aim is to achieve simplicity and clarity in the knowledge graph, making it\n" "accessible for a vast audience.\n" @@ -82,8 +82,8 @@ system_prompt = ( "Ensure you use basic or elementary types for node labels.\n" "- For example, when you identify an entity representing a person, " "always label it as **'person'**. Avoid using more specific terms " - "like 'mathematician' or 'scientist'" - " - **Node IDs**: Never utilize integers as node IDs. Node IDs should be " + "like 'mathematician' or 'scientist'." + "- **Node IDs**: Never utilize integers as node IDs. Node IDs should be " "names or human-readable identifiers found in the text.\n" "- **Relationships** represent connections between entities or concepts.\n" "Ensure consistency and generality in relationship types when constructing " @@ -138,8 +138,8 @@ def _get_additional_info(input_type: str) -> str: elif input_type == "relationship": return ( "Instead of using specific and momentary types such as " - "'BECAME_PROFESSOR', use more general and timeless relationship types like " - "'PROFESSOR'. However, do not sacrifice any accuracy for generality" + "'BECAME_PROFESSOR', use more general and timeless relationship types " + "like 'PROFESSOR'. However, do not sacrifice any accuracy for generality" ) elif input_type == "property": return "" @@ -280,10 +280,32 @@ def create_simple_model( rel_types: Optional[List[str]] = None, node_properties: Union[bool, List[str]] = False, llm_type: Optional[str] = None, + relationship_properties: Union[bool, List[str]] = False, ) -> Type[_Graph]: """ - Simple model allows to limit node and/or relationship types. - Doesn't have any node or relationship properties. + Create a simple graph model with optional constraints on node + and relationship types. + + Args: + node_labels (Optional[List[str]]): Specifies the allowed node types. + Defaults to None, allowing all node types. + rel_types (Optional[List[str]]): Specifies the allowed relationship types. + Defaults to None, allowing all relationship types. + node_properties (Union[bool, List[str]]): Specifies if node properties should + be included. If a list is provided, only properties with keys in the list + will be included. If True, all properties are included. Defaults to False. + relationship_properties (Union[bool, List[str]]): Specifies if relationship + properties should be included. If a list is provided, only properties with + keys in the list will be included. If True, all properties are included. + Defaults to False. + llm_type (Optional[str]): The type of the language model. Defaults to None. + Only openai supports enum param: openai-chat. + + Returns: + Type[_Graph]: A graph model with the specified constraints. + + Raises: + ValueError: If 'id' is included in the node or relationship properties list. """ node_fields: Dict[str, Tuple[Any, Any]] = { @@ -325,39 +347,80 @@ def create_simple_model( ) SimpleNode = create_model("SimpleNode", **node_fields) # type: ignore - class SimpleRelationship(BaseModel): - """Represents a directed relationship between two nodes in a graph.""" + relationship_fields: Dict[str, Tuple[Any, Any]] = { + "source_node_id": ( + str, + Field( + ..., + description="Name or human-readable unique identifier of source node", + ), + ), + "source_node_type": ( + str, + optional_enum_field( + node_labels, + description="The type or label of the source node.", + input_type="node", + ), + ), + "target_node_id": ( + str, + Field( + ..., + description="Name or human-readable unique identifier of target node", + ), + ), + "target_node_type": ( + str, + optional_enum_field( + node_labels, + description="The type or label of the target node.", + input_type="node", + ), + ), + "type": ( + str, + optional_enum_field( + rel_types, + description="The type of the relationship.", + input_type="relationship", + ), + ), + } + if relationship_properties: + if ( + isinstance(relationship_properties, list) + and "id" in relationship_properties + ): + raise ValueError( + "The relationship property 'id' is reserved and cannot be used." + ) + # Map True to empty array + relationship_properties_mapped: List[str] = ( + [] if relationship_properties is True else relationship_properties + ) - source_node_id: str = Field( - description="Name or human-readable unique identifier of source node" - ) - source_node_type: str = optional_enum_field( - node_labels, - description="The type or label of the source node.", - input_type="node", - llm_type=llm_type, - ) - target_node_id: str = Field( - description="Name or human-readable unique identifier of target node" - ) - target_node_type: str = optional_enum_field( - node_labels, - description="The type or label of the target node.", - input_type="node", - llm_type=llm_type, - ) - type: str = optional_enum_field( - rel_types, - description="The type of the relationship.", - input_type="relationship", - llm_type=llm_type, + class RelationshipProperty(BaseModel): + """A single property consisting of key and value""" + + key: str = optional_enum_field( + relationship_properties_mapped, + description="Property key.", + input_type="property", + ) + value: str = Field(..., description="value") + + relationship_fields["properties"] = ( + Optional[List[RelationshipProperty]], + Field(None, description="List of relationship properties"), ) + SimpleRelationship = create_model("SimpleRelationship", **relationship_fields) # type: ignore class DynamicGraph(_Graph): """Represents a graph document consisting of nodes and relationships.""" nodes: Optional[List[SimpleNode]] = Field(description="List of nodes") # type: ignore - relationships: Optional[List[SimpleRelationship]] = Field( + relationships: Optional[List[SimpleRelationship]] = Field( # type: ignore description="List of relationships" ) @@ -377,7 +440,13 @@ def map_to_base_relationship(rel: Any) -> Relationship: """Map the SimpleRelationship to the base Relationship.""" source = Node(id=rel.source_node_id, type=rel.source_node_type) target = Node(id=rel.target_node_id, type=rel.target_node_type) - return Relationship(source=source, target=target, type=rel.type) + properties = {} + if hasattr(rel, "properties") and rel.properties: + for p in rel.properties: + properties[format_property_key(p.key)] = p.value + return Relationship( + source=source, target=target, type=rel.type, properties=properties + ) def _parse_and_clean_json( @@ -387,10 +456,15 @@ def _parse_and_clean_json( for node in argument_json["nodes"]: if not node.get("id"): # Id is mandatory, skip this node continue + node_properties = {} + if "properties" in node and node["properties"]: + for p in node["properties"]: + node_properties[format_property_key(p["key"])] = p["value"] nodes.append( Node( id=node["id"], type=node.get("type"), + properties=node_properties, ) ) relationships = [] @@ -423,6 +497,11 @@ def _parse_and_clean_json( except IndexError: rel["target_node_type"] = None + rel_properties = {} + if "properties" in rel and rel["properties"]: + for p in rel["properties"]: + rel_properties[format_property_key(p["key"])] = p["value"] + source_node = Node( id=rel["source_node_id"], type=rel["source_node_type"], @@ -436,6 +515,7 @@ def _parse_and_clean_json( source=source_node, target=target_node, type=rel["type"], + properties=rel_properties, ) ) return nodes, relationships @@ -458,6 +538,7 @@ def _format_relationships(rels: List[Relationship]) -> List[Relationship]: source=_format_nodes([el.source])[0], target=_format_nodes([el.target])[0], type=el.type.replace(" ", "_").upper(), + properties=el.properties, ) for el in rels ] @@ -513,8 +594,8 @@ class LLMGraphTransformer: """Transform documents into graph-based documents using a LLM. It allows specifying constraints on the types of nodes and relationships to include - in the output graph. The class doesn't support neither extract and node or - relationship properties + in the output graph. The class supports extracting properties for both nodes and + relationships. Args: llm (BaseLanguageModel): An instance of a language model supporting structured @@ -553,6 +634,7 @@ class LLMGraphTransformer: prompt: Optional[ChatPromptTemplate] = None, strict_mode: bool = True, node_properties: Union[bool, List[str]] = False, + relationship_properties: Union[bool, List[str]] = False, ) -> None: self.allowed_nodes = allowed_nodes self.allowed_relationships = allowed_relationships @@ -564,14 +646,14 @@ class LLMGraphTransformer: except NotImplementedError: self._function_call = False if not self._function_call: - if node_properties: + if node_properties or relationship_properties: raise ValueError( - "The 'node_properties' parameter cannot be used " - "in combination with a LLM that doesn't support " + "The 'node_properties' and 'relationship_properties' parameters " + "cannot be used in combination with a LLM that doesn't support " "native function calling." ) try: - import json_repair + import json_repair # type: ignore self.json_repair = json_repair except ImportError: @@ -590,7 +672,11 @@ class LLMGraphTransformer: except AttributeError: llm_type = None schema = create_simple_model( - allowed_nodes, allowed_relationships, node_properties, llm_type + allowed_nodes, + allowed_relationships, + node_properties, + llm_type, + relationship_properties, ) structured_llm = llm.with_structured_output(schema, include_raw=True) prompt = prompt or default_prompt