diff --git a/.changeset/tame-carrots-drop.md b/.changeset/tame-carrots-drop.md new file mode 100644 index 0000000000000000000000000000000000000000..f99e7ce8ecf84a8b43f3f28a71a91a5dce9ffdac --- /dev/null +++ b/.changeset/tame-carrots-drop.md @@ -0,0 +1,6 @@ +--- +"@llamaindex/core": patch +"llamaindex": patch +--- + +fix: generate uuid when inserting to Qdrant diff --git a/packages/core/src/utils/index.ts b/packages/core/src/utils/index.ts index d040f010abf7113312e9e7a2598d78545f2624bd..4c29807ceca07c2aea94b8941709eaa8f8be1673 100644 --- a/packages/core/src/utils/index.ts +++ b/packages/core/src/utils/index.ts @@ -80,3 +80,4 @@ export { } from "./llms"; export { objectEntries } from "./object-entries"; +export { UUIDFromString } from "./uuid"; diff --git a/packages/core/src/utils/uuid.ts b/packages/core/src/utils/uuid.ts new file mode 100644 index 0000000000000000000000000000000000000000..a01fb7a602fef1cbcbc547e23b4e5f4cb830db45 --- /dev/null +++ b/packages/core/src/utils/uuid.ts @@ -0,0 +1,22 @@ +import { createSHA256 } from "@llamaindex/env"; + +export function UUIDFromString(input: string) { + const hashFunction = createSHA256(); + hashFunction.update(input); + const base64Hash = hashFunction.digest(); + + // Convert base64 to hex + const hexHash = Buffer.from(base64Hash, "base64").toString("hex"); + + // Format the hash to resemble a UUID (version 5 style) + const uuid = [ + hexHash.substring(0, 8), + hexHash.substring(8, 12), + "5" + hexHash.substring(12, 15), // Set the version to 5 (name-based) + ((parseInt(hexHash.substring(15, 17), 16) & 0x3f) | 0x80).toString(16) + + hexHash.substring(17, 19), // Set the variant + hexHash.substring(19, 31), + ].join("-"); + + return uuid; +} diff --git a/packages/core/tests/utils/uuid.test.ts b/packages/core/tests/utils/uuid.test.ts new file mode 100644 index 0000000000000000000000000000000000000000..dc1d6055fce18ed4dbd5f9a671c0aa55b424b6b8 --- /dev/null +++ b/packages/core/tests/utils/uuid.test.ts @@ -0,0 +1,37 @@ +import { UUIDFromString } from "@llamaindex/core/utils"; +import { describe, expect, it } from "vitest"; + +const UUID_REGEX = + /^[0-9a-f]{8}-[0-9a-f]{4}-5[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i; + +describe("UUIDFromString", () => { + it("should convert string to UUID", () => { + const string = "document_id_1"; + const result = UUIDFromString(string); + expect(result).toBeDefined(); + expect(result).toMatch(UUID_REGEX); + }); + + it("should return the same UUID for the same input string", () => { + const string = "document_id_1"; + const result1 = UUIDFromString(string); + const result2 = UUIDFromString(string); + expect(result1).toEqual(result2); + }); + + it("should return the different UUID for different input strings", () => { + const string1 = "document_id_1"; + const string2 = "document_id_2"; + const result1 = UUIDFromString(string1); + const result2 = UUIDFromString(string2); + expect(result1).not.toEqual(result2); + }); + + it("should handle case-sensitive input strings", () => { + const string1 = "document_id_1"; + const string2 = "Document_Id_1"; + const result1 = UUIDFromString(string1); + const result2 = UUIDFromString(string2); + expect(result1).not.toEqual(result2); + }); +}); diff --git a/packages/llamaindex/src/vector-store/QdrantVectorStore.ts b/packages/llamaindex/src/vector-store/QdrantVectorStore.ts index 4204ae6197d2cf79a6b4ff6190ccc9a3d7158a39..2fbb050646564d0af08ffb3a61eef52e0d1549fb 100644 --- a/packages/llamaindex/src/vector-store/QdrantVectorStore.ts +++ b/packages/llamaindex/src/vector-store/QdrantVectorStore.ts @@ -10,6 +10,7 @@ import { type VectorStoreQueryResult, } from "./types.js"; +import { UUIDFromString } from "@llamaindex/core/utils"; import type { QdrantClientParams, Schemas } from "@qdrant/js-client-rest"; import { QdrantClient } from "@qdrant/js-client-rest"; import { metadataDictToNode, nodeToMetadata } from "./utils.js"; @@ -170,7 +171,7 @@ export class QdrantVectorStore for (let k = 0; k < nodeIds.length; k++) { const point: PointStruct = { - id: nodeIds[k]!.id_, + id: UUIDFromString(nodeIds[k]!.id_), payload: payloads[k]!, vector: vectors[k]!, };