From 5f6782038a538b9fad3ea156c4523d4adebc00c3 Mon Sep 17 00:00:00 2001
From: Marcus Schiesser <mail@marcusschiesser.de>
Date: Wed, 9 Oct 2024 11:56:02 +0700
Subject: [PATCH] Fix that node parsers generate nodes with UUIDs (#1311)

---
 .changeset/healthy-tables-hug.md              |  6 +++
 packages/core/src/schema/node.ts              |  4 +-
 packages/core/src/utils/index.ts              |  1 -
 packages/core/src/utils/uuid.ts               | 22 -----------
 packages/core/tests/utils/uuid.test.ts        | 37 -------------------
 .../src/vector-store/QdrantVectorStore.ts     |  3 +-
 .../tests/indices/VectorStoreIndex.test.ts    |  4 +-
 7 files changed, 11 insertions(+), 66 deletions(-)
 create mode 100644 .changeset/healthy-tables-hug.md
 delete mode 100644 packages/core/src/utils/uuid.ts
 delete mode 100644 packages/core/tests/utils/uuid.test.ts

diff --git a/.changeset/healthy-tables-hug.md b/.changeset/healthy-tables-hug.md
new file mode 100644
index 000000000..d44406786
--- /dev/null
+++ b/.changeset/healthy-tables-hug.md
@@ -0,0 +1,6 @@
+---
+"llamaindex": patch
+"@llamaindex/core": patch
+---
+
+Fix that node parsers generate nodes with UUIDs
diff --git a/packages/core/src/schema/node.ts b/packages/core/src/schema/node.ts
index a79226db1..459dbfb66 100644
--- a/packages/core/src/schema/node.ts
+++ b/packages/core/src/schema/node.ts
@@ -479,7 +479,7 @@ export function buildNodeFromSplits(
     ) {
       const imageDoc = doc as ImageNode;
       const imageNode = new ImageNode({
-        id_: imageDoc.id_ ?? idGenerator(i, imageDoc),
+        id_: idGenerator(i, imageDoc),
         text: textChunk,
         image: imageDoc.image,
         embedding: imageDoc.embedding,
@@ -496,7 +496,7 @@ export function buildNodeFromSplits(
     ) {
       const textDoc = doc as TextNode;
       const node = new TextNode({
-        id_: textDoc.id_ ?? idGenerator(i, textDoc),
+        id_: idGenerator(i, textDoc),
         text: textChunk,
         embedding: textDoc.embedding,
         excludedEmbedMetadataKeys: [...textDoc.excludedEmbedMetadataKeys],
diff --git a/packages/core/src/utils/index.ts b/packages/core/src/utils/index.ts
index 4c29807ce..d040f010a 100644
--- a/packages/core/src/utils/index.ts
+++ b/packages/core/src/utils/index.ts
@@ -80,4 +80,3 @@ export {
 } from "./llms";
 
 export { objectEntries } from "./object-entries";
-export { UUIDFromString } from "./uuid";
diff --git a/packages/core/src/utils/uuid.ts b/packages/core/src/utils/uuid.ts
deleted file mode 100644
index a01fb7a60..000000000
--- a/packages/core/src/utils/uuid.ts
+++ /dev/null
@@ -1,22 +0,0 @@
-import { createSHA256 } from "@llamaindex/env";
-
-export function UUIDFromString(input: string) {
-  const hashFunction = createSHA256();
-  hashFunction.update(input);
-  const base64Hash = hashFunction.digest();
-
-  // Convert base64 to hex
-  const hexHash = Buffer.from(base64Hash, "base64").toString("hex");
-
-  // Format the hash to resemble a UUID (version 5 style)
-  const uuid = [
-    hexHash.substring(0, 8),
-    hexHash.substring(8, 12),
-    "5" + hexHash.substring(12, 15), // Set the version to 5 (name-based)
-    ((parseInt(hexHash.substring(15, 17), 16) & 0x3f) | 0x80).toString(16) +
-      hexHash.substring(17, 19), // Set the variant
-    hexHash.substring(19, 31),
-  ].join("-");
-
-  return uuid;
-}
diff --git a/packages/core/tests/utils/uuid.test.ts b/packages/core/tests/utils/uuid.test.ts
deleted file mode 100644
index dc1d6055f..000000000
--- a/packages/core/tests/utils/uuid.test.ts
+++ /dev/null
@@ -1,37 +0,0 @@
-import { UUIDFromString } from "@llamaindex/core/utils";
-import { describe, expect, it } from "vitest";
-
-const UUID_REGEX =
-  /^[0-9a-f]{8}-[0-9a-f]{4}-5[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i;
-
-describe("UUIDFromString", () => {
-  it("should convert string to UUID", () => {
-    const string = "document_id_1";
-    const result = UUIDFromString(string);
-    expect(result).toBeDefined();
-    expect(result).toMatch(UUID_REGEX);
-  });
-
-  it("should return the same UUID for the same input string", () => {
-    const string = "document_id_1";
-    const result1 = UUIDFromString(string);
-    const result2 = UUIDFromString(string);
-    expect(result1).toEqual(result2);
-  });
-
-  it("should return the different UUID for different input strings", () => {
-    const string1 = "document_id_1";
-    const string2 = "document_id_2";
-    const result1 = UUIDFromString(string1);
-    const result2 = UUIDFromString(string2);
-    expect(result1).not.toEqual(result2);
-  });
-
-  it("should handle case-sensitive input strings", () => {
-    const string1 = "document_id_1";
-    const string2 = "Document_Id_1";
-    const result1 = UUIDFromString(string1);
-    const result2 = UUIDFromString(string2);
-    expect(result1).not.toEqual(result2);
-  });
-});
diff --git a/packages/llamaindex/src/vector-store/QdrantVectorStore.ts b/packages/llamaindex/src/vector-store/QdrantVectorStore.ts
index 2fbb05064..4204ae619 100644
--- a/packages/llamaindex/src/vector-store/QdrantVectorStore.ts
+++ b/packages/llamaindex/src/vector-store/QdrantVectorStore.ts
@@ -10,7 +10,6 @@ import {
   type VectorStoreQueryResult,
 } from "./types.js";
 
-import { UUIDFromString } from "@llamaindex/core/utils";
 import type { QdrantClientParams, Schemas } from "@qdrant/js-client-rest";
 import { QdrantClient } from "@qdrant/js-client-rest";
 import { metadataDictToNode, nodeToMetadata } from "./utils.js";
@@ -171,7 +170,7 @@ export class QdrantVectorStore
 
       for (let k = 0; k < nodeIds.length; k++) {
         const point: PointStruct = {
-          id: UUIDFromString(nodeIds[k]!.id_),
+          id: nodeIds[k]!.id_,
           payload: payloads[k]!,
           vector: vectors[k]!,
         };
diff --git a/packages/llamaindex/tests/indices/VectorStoreIndex.test.ts b/packages/llamaindex/tests/indices/VectorStoreIndex.test.ts
index a807f8bed..9bca51af7 100644
--- a/packages/llamaindex/tests/indices/VectorStoreIndex.test.ts
+++ b/packages/llamaindex/tests/indices/VectorStoreIndex.test.ts
@@ -27,7 +27,7 @@ describe("VectorStoreIndex", () => {
       runs: number = 2,
     ): Promise<Array<number>> => {
       const documents = [new Document({ text: "lorem ipsem", id_: "1" })];
-      const entries: number[] = [];
+      const entries = [];
       for (let i = 0; i < runs; i++) {
         await VectorStoreIndex.fromDocuments(documents, {
           serviceContext,
@@ -43,7 +43,7 @@ describe("VectorStoreIndex", () => {
 
   test("fromDocuments stores duplicates without a doc store strategy", async () => {
     const entries = await testStrategy(DocStoreStrategy.NONE);
-    expect(entries[0]).toBe(entries[1]);
+    expect(entries[0]! + 1).toBe(entries[1]);
   });
 
   test("fromDocuments ignores duplicates with upserts doc store strategy", async () => {
-- 
GitLab