diff --git a/llama-index-integrations/graph_stores/llama-index-graph-stores-kuzu/llama_index/graph_stores/kuzu/kuzu_property_graph.py b/llama-index-integrations/graph_stores/llama-index-graph-stores-kuzu/llama_index/graph_stores/kuzu/kuzu_property_graph.py index 079fa6b6c81aa38714ba570feae42d0ae8601984..27515e5f5377626959a0202e8072772669b6f7ad 100644 --- a/llama-index-integrations/graph_stores/llama-index-graph-stores-kuzu/llama_index/graph_stores/kuzu/kuzu_property_graph.py +++ b/llama-index-integrations/graph_stores/llama-index-graph-stores-kuzu/llama_index/graph_stores/kuzu/kuzu_property_graph.py @@ -170,16 +170,17 @@ class KuzuPropertyGraphStore(PropertyGraphStore): def upsert_relations(self, relations: List[Relation]) -> None: for rel in relations: if self.has_structured_schema: - src, _, dst = utils.lookup_relation(rel.label, self.relationship_schema) + src, rel_tbl_name, dst = utils.lookup_relation( + rel.label, self.relationship_schema + ) else: - src, dst = "Entity", "Entity" + src, rel_tbl_name, dst = "Entity", "LINKS", "Entity" - rel_tbl_name = f"LINKS_{src}_{dst}" # Connect entities to each other self.connection.execute( f""" MATCH (a:{src} {{id: $source_id}}), - (b:{dst} {{id: $target_id}}) + (b:{dst} {{id: $target_id}}) MERGE (a)-[r:{rel_tbl_name} {{label: $label}}]->(b) SET r.triplet_source_id = $triplet_source_id """, @@ -196,8 +197,8 @@ class KuzuPropertyGraphStore(PropertyGraphStore): MATCH (a:{src} {{id: $source_id}}), (b:{dst} {{id: $target_id}}), (c:Chunk {{id: $triplet_source_id}}) - MERGE (c)-[:LINKS_Chunk_{src} {{label: "MENTIONS"}}]->(a) - MERGE (c)-[:LINKS_Chunk_{dst} {{label: "MENTIONS"}}]->(b) + MERGE (c)-[:MENTIONS]->(a) + MERGE (c)-[:MENTIONS]->(b) """, parameters={ "source_id": rel.source_id, diff --git a/llama-index-integrations/graph_stores/llama-index-graph-stores-kuzu/llama_index/graph_stores/kuzu/utils.py b/llama-index-integrations/graph_stores/llama-index-graph-stores-kuzu/llama_index/graph_stores/kuzu/utils.py index e3e8317b35937865364694ca41a369ec14f81905..9e8a0f0f0624ec86d4e3098f2fa64d4e5f8c3dd7 100644 --- a/llama-index-integrations/graph_stores/llama-index-graph-stores-kuzu/llama_index/graph_stores/kuzu/utils.py +++ b/llama-index-integrations/graph_stores/llama-index-graph-stores-kuzu/llama_index/graph_stores/kuzu/utils.py @@ -62,12 +62,35 @@ def lookup_relation(relation: str, triples: List[Triple]) -> Triple: def create_chunk_node_table(connection: kuzu.Connection) -> None: # For now, the additional `properties` dict from LlamaIndex is stored as a string # TODO: See if it makes sense to add better support for property metadata as columns - if "Chunk" not in connection._get_node_table_names(): + connection.execute( + f""" + CREATE NODE TABLE IF NOT EXISTS Chunk ( + id STRING, + text STRING, + label STRING, + embedding DOUBLE[], + creation_date DATE, + last_modified_date DATE, + file_name STRING, + file_path STRING, + file_size INT64, + file_type STRING, + ref_doc_id STRING, + PRIMARY KEY(id) + ) + """ + ) + + +def create_entity_node_tables(connection: kuzu.Connection, entities: List[str]) -> None: + for tbl_name in entities: + # For now, the additional `properties` dict from LlamaIndex is stored as a string + # TODO: See if it makes sense to add better support for property metadata as columns connection.execute( f""" - CREATE NODE TABLE Chunk ( + CREATE NODE TABLE IF NOT EXISTS {tbl_name} ( id STRING, - text STRING, + name STRING, label STRING, embedding DOUBLE[], creation_date DATE, @@ -76,54 +99,42 @@ def create_chunk_node_table(connection: kuzu.Connection) -> None: file_path STRING, file_size INT64, file_type STRING, - ref_doc_id STRING, + triplet_source_id STRING, PRIMARY KEY(id) ) """ ) -def create_entity_node_tables(connection: kuzu.Connection, entities: List[str]) -> None: - for tbl_name in entities: - # For now, the additional `properties` dict from LlamaIndex is stored as a string - # TODO: See if it makes sense to add better support for property metadata as columns - if tbl_name not in connection._get_node_table_names(): - connection.execute( - f""" - CREATE NODE TABLE {tbl_name} ( - id STRING, - name STRING, - label STRING, - embedding DOUBLE[], - creation_date DATE, - last_modified_date DATE, - file_name STRING, - file_path STRING, - file_size INT64, - file_type STRING, - triplet_source_id STRING, - PRIMARY KEY(id) - ) - """ - ) +def create_entity_relationship_table( + connection: kuzu.Connection, label: str, src_id: str, dst_id: str +) -> None: + connection.execute( + f""" + CREATE REL TABLE IF NOT EXISTS {label} ( + FROM {src_id} TO {dst_id}, + label STRING, + triplet_source_id STRING + ); + """ + ) def create_relation_tables( connection: kuzu.Connection, entities: List[str], relationship_schema: List[Triple] ) -> None: - rel_tables = [tbl["name"] for tbl in connection._get_rel_table_names()] - # We use Kùzu relationship table group creation DDL commands to create relationship tables - ddl = "" - if not any("LINKS" in table for table in rel_tables): - ddl = "CREATE REL TABLE GROUP LINKS (" - table_names = [] - for src, _, dst in relationship_schema: - table_names.append(f"FROM {src} TO {dst}") - for entity in entities: - table_names.append(f"FROM Chunk TO {entity}") - table_names = list(set(table_names)) - ddl += ", ".join(table_names) - # Add common properties for all the tables here - ddl += ", label STRING, triplet_source_id STRING)" + # Create relationship tables for each entity + for src, rel_label, dst in relationship_schema: + create_entity_relationship_table(connection, rel_label, src, dst) + + ddl = "CREATE REL TABLE GROUP IF NOT EXISTS MENTIONS (" + table_names = [] + for entity in entities: + table_names.append(f"FROM Chunk TO {entity}") + table_names = list(set(table_names)) + ddl += ", ".join(table_names) + # Add common properties for all the tables here + ddl += ", label STRING, triplet_source_id STRING)" + if ddl: connection.execute(ddl) diff --git a/llama-index-integrations/graph_stores/llama-index-graph-stores-kuzu/pyproject.toml b/llama-index-integrations/graph_stores/llama-index-graph-stores-kuzu/pyproject.toml index d01490cf16b7ae25667a87a10971d910702809c8..56122a45890739e7a8ad0f043588681ea9303141 100644 --- a/llama-index-integrations/graph_stores/llama-index-graph-stores-kuzu/pyproject.toml +++ b/llama-index-integrations/graph_stores/llama-index-graph-stores-kuzu/pyproject.toml @@ -28,7 +28,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-graph-stores-kuzu" readme = "README.md" -version = "0.3.2" +version = "0.4.0" [tool.poetry.dependencies] python = ">=3.8.1,<4.0"