From 916b195c5e18a929c2fb4de19d9e762a0d3b94a6 Mon Sep 17 00:00:00 2001 From: Prashanth Rao <35005448+prrao87@users.noreply.github.com> Date: Mon, 11 Nov 2024 21:57:31 -0500 Subject: [PATCH] =?UTF-8?q?K=C3=B9zu:=20Update=20relationship=20table=20la?= =?UTF-8?q?bel=20nomenclature=20(#16886)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update relationship table label nomenclature * Update pyproject.toml --- .../graph_stores/kuzu/kuzu_property_graph.py | 13 +-- .../llama_index/graph_stores/kuzu/utils.py | 93 +++++++++++-------- .../pyproject.toml | 2 +- 3 files changed, 60 insertions(+), 48 deletions(-) diff --git a/llama-index-integrations/graph_stores/llama-index-graph-stores-kuzu/llama_index/graph_stores/kuzu/kuzu_property_graph.py b/llama-index-integrations/graph_stores/llama-index-graph-stores-kuzu/llama_index/graph_stores/kuzu/kuzu_property_graph.py index 079fa6b6c8..27515e5f53 100644 --- a/llama-index-integrations/graph_stores/llama-index-graph-stores-kuzu/llama_index/graph_stores/kuzu/kuzu_property_graph.py +++ b/llama-index-integrations/graph_stores/llama-index-graph-stores-kuzu/llama_index/graph_stores/kuzu/kuzu_property_graph.py @@ -170,16 +170,17 @@ class KuzuPropertyGraphStore(PropertyGraphStore): def upsert_relations(self, relations: List[Relation]) -> None: for rel in relations: if self.has_structured_schema: - src, _, dst = utils.lookup_relation(rel.label, self.relationship_schema) + src, rel_tbl_name, dst = utils.lookup_relation( + rel.label, self.relationship_schema + ) else: - src, dst = "Entity", "Entity" + src, rel_tbl_name, dst = "Entity", "LINKS", "Entity" - rel_tbl_name = f"LINKS_{src}_{dst}" # Connect entities to each other self.connection.execute( f""" MATCH (a:{src} {{id: $source_id}}), - (b:{dst} {{id: $target_id}}) + (b:{dst} {{id: $target_id}}) MERGE (a)-[r:{rel_tbl_name} {{label: $label}}]->(b) SET r.triplet_source_id = $triplet_source_id """, @@ -196,8 +197,8 @@ class KuzuPropertyGraphStore(PropertyGraphStore): MATCH (a:{src} {{id: $source_id}}), (b:{dst} {{id: $target_id}}), (c:Chunk {{id: $triplet_source_id}}) - MERGE (c)-[:LINKS_Chunk_{src} {{label: "MENTIONS"}}]->(a) - MERGE (c)-[:LINKS_Chunk_{dst} {{label: "MENTIONS"}}]->(b) + MERGE (c)-[:MENTIONS]->(a) + MERGE (c)-[:MENTIONS]->(b) """, parameters={ "source_id": rel.source_id, diff --git a/llama-index-integrations/graph_stores/llama-index-graph-stores-kuzu/llama_index/graph_stores/kuzu/utils.py b/llama-index-integrations/graph_stores/llama-index-graph-stores-kuzu/llama_index/graph_stores/kuzu/utils.py index e3e8317b35..9e8a0f0f06 100644 --- a/llama-index-integrations/graph_stores/llama-index-graph-stores-kuzu/llama_index/graph_stores/kuzu/utils.py +++ b/llama-index-integrations/graph_stores/llama-index-graph-stores-kuzu/llama_index/graph_stores/kuzu/utils.py @@ -62,12 +62,35 @@ def lookup_relation(relation: str, triples: List[Triple]) -> Triple: def create_chunk_node_table(connection: kuzu.Connection) -> None: # For now, the additional `properties` dict from LlamaIndex is stored as a string # TODO: See if it makes sense to add better support for property metadata as columns - if "Chunk" not in connection._get_node_table_names(): + connection.execute( + f""" + CREATE NODE TABLE IF NOT EXISTS Chunk ( + id STRING, + text STRING, + label STRING, + embedding DOUBLE[], + creation_date DATE, + last_modified_date DATE, + file_name STRING, + file_path STRING, + file_size INT64, + file_type STRING, + ref_doc_id STRING, + PRIMARY KEY(id) + ) + """ + ) + + +def create_entity_node_tables(connection: kuzu.Connection, entities: List[str]) -> None: + for tbl_name in entities: + # For now, the additional `properties` dict from LlamaIndex is stored as a string + # TODO: See if it makes sense to add better support for property metadata as columns connection.execute( f""" - CREATE NODE TABLE Chunk ( + CREATE NODE TABLE IF NOT EXISTS {tbl_name} ( id STRING, - text STRING, + name STRING, label STRING, embedding DOUBLE[], creation_date DATE, @@ -76,54 +99,42 @@ def create_chunk_node_table(connection: kuzu.Connection) -> None: file_path STRING, file_size INT64, file_type STRING, - ref_doc_id STRING, + triplet_source_id STRING, PRIMARY KEY(id) ) """ ) -def create_entity_node_tables(connection: kuzu.Connection, entities: List[str]) -> None: - for tbl_name in entities: - # For now, the additional `properties` dict from LlamaIndex is stored as a string - # TODO: See if it makes sense to add better support for property metadata as columns - if tbl_name not in connection._get_node_table_names(): - connection.execute( - f""" - CREATE NODE TABLE {tbl_name} ( - id STRING, - name STRING, - label STRING, - embedding DOUBLE[], - creation_date DATE, - last_modified_date DATE, - file_name STRING, - file_path STRING, - file_size INT64, - file_type STRING, - triplet_source_id STRING, - PRIMARY KEY(id) - ) - """ - ) +def create_entity_relationship_table( + connection: kuzu.Connection, label: str, src_id: str, dst_id: str +) -> None: + connection.execute( + f""" + CREATE REL TABLE IF NOT EXISTS {label} ( + FROM {src_id} TO {dst_id}, + label STRING, + triplet_source_id STRING + ); + """ + ) def create_relation_tables( connection: kuzu.Connection, entities: List[str], relationship_schema: List[Triple] ) -> None: - rel_tables = [tbl["name"] for tbl in connection._get_rel_table_names()] - # We use Kùzu relationship table group creation DDL commands to create relationship tables - ddl = "" - if not any("LINKS" in table for table in rel_tables): - ddl = "CREATE REL TABLE GROUP LINKS (" - table_names = [] - for src, _, dst in relationship_schema: - table_names.append(f"FROM {src} TO {dst}") - for entity in entities: - table_names.append(f"FROM Chunk TO {entity}") - table_names = list(set(table_names)) - ddl += ", ".join(table_names) - # Add common properties for all the tables here - ddl += ", label STRING, triplet_source_id STRING)" + # Create relationship tables for each entity + for src, rel_label, dst in relationship_schema: + create_entity_relationship_table(connection, rel_label, src, dst) + + ddl = "CREATE REL TABLE GROUP IF NOT EXISTS MENTIONS (" + table_names = [] + for entity in entities: + table_names.append(f"FROM Chunk TO {entity}") + table_names = list(set(table_names)) + ddl += ", ".join(table_names) + # Add common properties for all the tables here + ddl += ", label STRING, triplet_source_id STRING)" + if ddl: connection.execute(ddl) diff --git a/llama-index-integrations/graph_stores/llama-index-graph-stores-kuzu/pyproject.toml b/llama-index-integrations/graph_stores/llama-index-graph-stores-kuzu/pyproject.toml index d01490cf16..56122a4589 100644 --- a/llama-index-integrations/graph_stores/llama-index-graph-stores-kuzu/pyproject.toml +++ b/llama-index-integrations/graph_stores/llama-index-graph-stores-kuzu/pyproject.toml @@ -28,7 +28,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-graph-stores-kuzu" readme = "README.md" -version = "0.3.2" +version = "0.4.0" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -- GitLab