From ee697fb1b3b551b7356ffffa882016c02bf6e90e Mon Sep 17 00:00:00 2001 From: Thuc Pham <51660321+thucpn@users.noreply.github.com> Date: Mon, 7 Oct 2024 14:17:04 +0700 Subject: [PATCH] fix: generate uuid when inserting to Qdrant (#1301) --- .changeset/tame-carrots-drop.md | 6 +++ packages/core/src/utils/index.ts | 1 + packages/core/src/utils/uuid.ts | 22 +++++++++++ packages/core/tests/utils/uuid.test.ts | 37 +++++++++++++++++++ .../src/vector-store/QdrantVectorStore.ts | 3 +- 5 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 .changeset/tame-carrots-drop.md create mode 100644 packages/core/src/utils/uuid.ts create mode 100644 packages/core/tests/utils/uuid.test.ts diff --git a/.changeset/tame-carrots-drop.md b/.changeset/tame-carrots-drop.md new file mode 100644 index 000000000..f99e7ce8e --- /dev/null +++ b/.changeset/tame-carrots-drop.md @@ -0,0 +1,6 @@ +--- +"@llamaindex/core": patch +"llamaindex": patch +--- + +fix: generate uuid when inserting to Qdrant diff --git a/packages/core/src/utils/index.ts b/packages/core/src/utils/index.ts index d040f010a..4c29807ce 100644 --- a/packages/core/src/utils/index.ts +++ b/packages/core/src/utils/index.ts @@ -80,3 +80,4 @@ export { } from "./llms"; export { objectEntries } from "./object-entries"; +export { UUIDFromString } from "./uuid"; diff --git a/packages/core/src/utils/uuid.ts b/packages/core/src/utils/uuid.ts new file mode 100644 index 000000000..a01fb7a60 --- /dev/null +++ b/packages/core/src/utils/uuid.ts @@ -0,0 +1,22 @@ +import { createSHA256 } from "@llamaindex/env"; + +export function UUIDFromString(input: string) { + const hashFunction = createSHA256(); + hashFunction.update(input); + const base64Hash = hashFunction.digest(); + + // Convert base64 to hex + const hexHash = Buffer.from(base64Hash, "base64").toString("hex"); + + // Format the hash to resemble a UUID (version 5 style) + const uuid = [ + hexHash.substring(0, 8), + hexHash.substring(8, 12), + "5" + hexHash.substring(12, 15), // Set the version to 5 (name-based) + ((parseInt(hexHash.substring(15, 17), 16) & 0x3f) | 0x80).toString(16) + + hexHash.substring(17, 19), // Set the variant + hexHash.substring(19, 31), + ].join("-"); + + return uuid; +} diff --git a/packages/core/tests/utils/uuid.test.ts b/packages/core/tests/utils/uuid.test.ts new file mode 100644 index 000000000..dc1d6055f --- /dev/null +++ b/packages/core/tests/utils/uuid.test.ts @@ -0,0 +1,37 @@ +import { UUIDFromString } from "@llamaindex/core/utils"; +import { describe, expect, it } from "vitest"; + +const UUID_REGEX = + /^[0-9a-f]{8}-[0-9a-f]{4}-5[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i; + +describe("UUIDFromString", () => { + it("should convert string to UUID", () => { + const string = "document_id_1"; + const result = UUIDFromString(string); + expect(result).toBeDefined(); + expect(result).toMatch(UUID_REGEX); + }); + + it("should return the same UUID for the same input string", () => { + const string = "document_id_1"; + const result1 = UUIDFromString(string); + const result2 = UUIDFromString(string); + expect(result1).toEqual(result2); + }); + + it("should return the different UUID for different input strings", () => { + const string1 = "document_id_1"; + const string2 = "document_id_2"; + const result1 = UUIDFromString(string1); + const result2 = UUIDFromString(string2); + expect(result1).not.toEqual(result2); + }); + + it("should handle case-sensitive input strings", () => { + const string1 = "document_id_1"; + const string2 = "Document_Id_1"; + const result1 = UUIDFromString(string1); + const result2 = UUIDFromString(string2); + expect(result1).not.toEqual(result2); + }); +}); diff --git a/packages/llamaindex/src/vector-store/QdrantVectorStore.ts b/packages/llamaindex/src/vector-store/QdrantVectorStore.ts index 4204ae619..2fbb05064 100644 --- a/packages/llamaindex/src/vector-store/QdrantVectorStore.ts +++ b/packages/llamaindex/src/vector-store/QdrantVectorStore.ts @@ -10,6 +10,7 @@ import { type VectorStoreQueryResult, } from "./types.js"; +import { UUIDFromString } from "@llamaindex/core/utils"; import type { QdrantClientParams, Schemas } from "@qdrant/js-client-rest"; import { QdrantClient } from "@qdrant/js-client-rest"; import { metadataDictToNode, nodeToMetadata } from "./utils.js"; @@ -170,7 +171,7 @@ export class QdrantVectorStore for (let k = 0; k < nodeIds.length; k++) { const point: PointStruct = { - id: nodeIds[k]!.id_, + id: UUIDFromString(nodeIds[k]!.id_), payload: payloads[k]!, vector: vectors[k]!, }; -- GitLab