Skip to content
Snippets Groups Projects
Unverified Commit 5f678203 authored by Marcus Schiesser's avatar Marcus Schiesser Committed by GitHub
Browse files

Fix that node parsers generate nodes with UUIDs (#1311)

parent fe08d045
No related branches found
No related tags found
No related merge requests found
---
"llamaindex": patch
"@llamaindex/core": patch
---
Fix that node parsers generate nodes with UUIDs
......@@ -479,7 +479,7 @@ export function buildNodeFromSplits(
) {
const imageDoc = doc as ImageNode;
const imageNode = new ImageNode({
id_: imageDoc.id_ ?? idGenerator(i, imageDoc),
id_: idGenerator(i, imageDoc),
text: textChunk,
image: imageDoc.image,
embedding: imageDoc.embedding,
......@@ -496,7 +496,7 @@ export function buildNodeFromSplits(
) {
const textDoc = doc as TextNode;
const node = new TextNode({
id_: textDoc.id_ ?? idGenerator(i, textDoc),
id_: idGenerator(i, textDoc),
text: textChunk,
embedding: textDoc.embedding,
excludedEmbedMetadataKeys: [...textDoc.excludedEmbedMetadataKeys],
......
......@@ -80,4 +80,3 @@ export {
} from "./llms";
export { objectEntries } from "./object-entries";
export { UUIDFromString } from "./uuid";
import { createSHA256 } from "@llamaindex/env";
export function UUIDFromString(input: string) {
const hashFunction = createSHA256();
hashFunction.update(input);
const base64Hash = hashFunction.digest();
// Convert base64 to hex
const hexHash = Buffer.from(base64Hash, "base64").toString("hex");
// Format the hash to resemble a UUID (version 5 style)
const uuid = [
hexHash.substring(0, 8),
hexHash.substring(8, 12),
"5" + hexHash.substring(12, 15), // Set the version to 5 (name-based)
((parseInt(hexHash.substring(15, 17), 16) & 0x3f) | 0x80).toString(16) +
hexHash.substring(17, 19), // Set the variant
hexHash.substring(19, 31),
].join("-");
return uuid;
}
import { UUIDFromString } from "@llamaindex/core/utils";
import { describe, expect, it } from "vitest";
const UUID_REGEX =
/^[0-9a-f]{8}-[0-9a-f]{4}-5[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i;
describe("UUIDFromString", () => {
it("should convert string to UUID", () => {
const string = "document_id_1";
const result = UUIDFromString(string);
expect(result).toBeDefined();
expect(result).toMatch(UUID_REGEX);
});
it("should return the same UUID for the same input string", () => {
const string = "document_id_1";
const result1 = UUIDFromString(string);
const result2 = UUIDFromString(string);
expect(result1).toEqual(result2);
});
it("should return the different UUID for different input strings", () => {
const string1 = "document_id_1";
const string2 = "document_id_2";
const result1 = UUIDFromString(string1);
const result2 = UUIDFromString(string2);
expect(result1).not.toEqual(result2);
});
it("should handle case-sensitive input strings", () => {
const string1 = "document_id_1";
const string2 = "Document_Id_1";
const result1 = UUIDFromString(string1);
const result2 = UUIDFromString(string2);
expect(result1).not.toEqual(result2);
});
});
......@@ -10,7 +10,6 @@ import {
type VectorStoreQueryResult,
} from "./types.js";
import { UUIDFromString } from "@llamaindex/core/utils";
import type { QdrantClientParams, Schemas } from "@qdrant/js-client-rest";
import { QdrantClient } from "@qdrant/js-client-rest";
import { metadataDictToNode, nodeToMetadata } from "./utils.js";
......@@ -171,7 +170,7 @@ export class QdrantVectorStore
for (let k = 0; k < nodeIds.length; k++) {
const point: PointStruct = {
id: UUIDFromString(nodeIds[k]!.id_),
id: nodeIds[k]!.id_,
payload: payloads[k]!,
vector: vectors[k]!,
};
......
......@@ -27,7 +27,7 @@ describe("VectorStoreIndex", () => {
runs: number = 2,
): Promise<Array<number>> => {
const documents = [new Document({ text: "lorem ipsem", id_: "1" })];
const entries: number[] = [];
const entries = [];
for (let i = 0; i < runs; i++) {
await VectorStoreIndex.fromDocuments(documents, {
serviceContext,
......@@ -43,7 +43,7 @@ describe("VectorStoreIndex", () => {
test("fromDocuments stores duplicates without a doc store strategy", async () => {
const entries = await testStrategy(DocStoreStrategy.NONE);
expect(entries[0]).toBe(entries[1]);
expect(entries[0]! + 1).toBe(entries[1]);
});
test("fromDocuments ignores duplicates with upserts doc store strategy", async () => {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment