From ee697fb1b3b551b7356ffffa882016c02bf6e90e Mon Sep 17 00:00:00 2001
From: Thuc Pham <51660321+thucpn@users.noreply.github.com>
Date: Mon, 7 Oct 2024 14:17:04 +0700
Subject: [PATCH] fix: generate uuid when inserting to Qdrant (#1301)

---
 .changeset/tame-carrots-drop.md               |  6 +++
 packages/core/src/utils/index.ts              |  1 +
 packages/core/src/utils/uuid.ts               | 22 +++++++++++
 packages/core/tests/utils/uuid.test.ts        | 37 +++++++++++++++++++
 .../src/vector-store/QdrantVectorStore.ts     |  3 +-
 5 files changed, 68 insertions(+), 1 deletion(-)
 create mode 100644 .changeset/tame-carrots-drop.md
 create mode 100644 packages/core/src/utils/uuid.ts
 create mode 100644 packages/core/tests/utils/uuid.test.ts

diff --git a/.changeset/tame-carrots-drop.md b/.changeset/tame-carrots-drop.md
new file mode 100644
index 000000000..f99e7ce8e
--- /dev/null
+++ b/.changeset/tame-carrots-drop.md
@@ -0,0 +1,6 @@
+---
+"@llamaindex/core": patch
+"llamaindex": patch
+---
+
+fix: generate uuid when inserting to Qdrant
diff --git a/packages/core/src/utils/index.ts b/packages/core/src/utils/index.ts
index d040f010a..4c29807ce 100644
--- a/packages/core/src/utils/index.ts
+++ b/packages/core/src/utils/index.ts
@@ -80,3 +80,4 @@ export {
 } from "./llms";
 
 export { objectEntries } from "./object-entries";
+export { UUIDFromString } from "./uuid";
diff --git a/packages/core/src/utils/uuid.ts b/packages/core/src/utils/uuid.ts
new file mode 100644
index 000000000..a01fb7a60
--- /dev/null
+++ b/packages/core/src/utils/uuid.ts
@@ -0,0 +1,22 @@
+import { createSHA256 } from "@llamaindex/env";
+
+export function UUIDFromString(input: string) {
+  const hashFunction = createSHA256();
+  hashFunction.update(input);
+  const base64Hash = hashFunction.digest();
+
+  // Convert base64 to hex
+  const hexHash = Buffer.from(base64Hash, "base64").toString("hex");
+
+  // Format the hash to resemble a UUID (version 5 style)
+  const uuid = [
+    hexHash.substring(0, 8),
+    hexHash.substring(8, 12),
+    "5" + hexHash.substring(12, 15), // Set the version to 5 (name-based)
+    ((parseInt(hexHash.substring(15, 17), 16) & 0x3f) | 0x80).toString(16) +
+      hexHash.substring(17, 19), // Set the variant
+    hexHash.substring(19, 31),
+  ].join("-");
+
+  return uuid;
+}
diff --git a/packages/core/tests/utils/uuid.test.ts b/packages/core/tests/utils/uuid.test.ts
new file mode 100644
index 000000000..dc1d6055f
--- /dev/null
+++ b/packages/core/tests/utils/uuid.test.ts
@@ -0,0 +1,37 @@
+import { UUIDFromString } from "@llamaindex/core/utils";
+import { describe, expect, it } from "vitest";
+
+const UUID_REGEX =
+  /^[0-9a-f]{8}-[0-9a-f]{4}-5[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i;
+
+describe("UUIDFromString", () => {
+  it("should convert string to UUID", () => {
+    const string = "document_id_1";
+    const result = UUIDFromString(string);
+    expect(result).toBeDefined();
+    expect(result).toMatch(UUID_REGEX);
+  });
+
+  it("should return the same UUID for the same input string", () => {
+    const string = "document_id_1";
+    const result1 = UUIDFromString(string);
+    const result2 = UUIDFromString(string);
+    expect(result1).toEqual(result2);
+  });
+
+  it("should return the different UUID for different input strings", () => {
+    const string1 = "document_id_1";
+    const string2 = "document_id_2";
+    const result1 = UUIDFromString(string1);
+    const result2 = UUIDFromString(string2);
+    expect(result1).not.toEqual(result2);
+  });
+
+  it("should handle case-sensitive input strings", () => {
+    const string1 = "document_id_1";
+    const string2 = "Document_Id_1";
+    const result1 = UUIDFromString(string1);
+    const result2 = UUIDFromString(string2);
+    expect(result1).not.toEqual(result2);
+  });
+});
diff --git a/packages/llamaindex/src/vector-store/QdrantVectorStore.ts b/packages/llamaindex/src/vector-store/QdrantVectorStore.ts
index 4204ae619..2fbb05064 100644
--- a/packages/llamaindex/src/vector-store/QdrantVectorStore.ts
+++ b/packages/llamaindex/src/vector-store/QdrantVectorStore.ts
@@ -10,6 +10,7 @@ import {
   type VectorStoreQueryResult,
 } from "./types.js";
 
+import { UUIDFromString } from "@llamaindex/core/utils";
 import type { QdrantClientParams, Schemas } from "@qdrant/js-client-rest";
 import { QdrantClient } from "@qdrant/js-client-rest";
 import { metadataDictToNode, nodeToMetadata } from "./utils.js";
@@ -170,7 +171,7 @@ export class QdrantVectorStore
 
       for (let k = 0; k < nodeIds.length; k++) {
         const point: PointStruct = {
-          id: nodeIds[k]!.id_,
+          id: UUIDFromString(nodeIds[k]!.id_),
           payload: payloads[k]!,
           vector: vectors[k]!,
         };
-- 
GitLab