From b17d439d6d7fc5868cb91ab59fe92d3d642c4513 Mon Sep 17 00:00:00 2001 From: LAWG <101877402+LawrenceGB@users.noreply.github.com> Date: Thu, 3 Oct 2024 19:52:26 +0100 Subject: [PATCH] fix: ensure `id_` is correctly passed during creation (#1282) Co-authored-by: lawrencegb <lawrence@3api.com> Co-authored-by: Alex Yang <himself65@outlook.com> --- .changeset/forty-swans-tan.md | 5 +++++ packages/core/src/schema/node.ts | 4 ++-- packages/llamaindex/tests/indices/VectorStoreIndex.test.ts | 4 ++-- 3 files changed, 9 insertions(+), 4 deletions(-) create mode 100644 .changeset/forty-swans-tan.md diff --git a/.changeset/forty-swans-tan.md b/.changeset/forty-swans-tan.md new file mode 100644 index 000000000..4b8449a40 --- /dev/null +++ b/.changeset/forty-swans-tan.md @@ -0,0 +1,5 @@ +--- +"@llamaindex/core": patch +--- + +Fix #1278: resolved issue where the id\_ was not correctly passed as the id when creating a TextNode. As a result, the upsert operation to the vector database was using a generated ID instead of the provided document ID, if available. diff --git a/packages/core/src/schema/node.ts b/packages/core/src/schema/node.ts index 459dbfb66..a79226db1 100644 --- a/packages/core/src/schema/node.ts +++ b/packages/core/src/schema/node.ts @@ -479,7 +479,7 @@ export function buildNodeFromSplits( ) { const imageDoc = doc as ImageNode; const imageNode = new ImageNode({ - id_: idGenerator(i, imageDoc), + id_: imageDoc.id_ ?? idGenerator(i, imageDoc), text: textChunk, image: imageDoc.image, embedding: imageDoc.embedding, @@ -496,7 +496,7 @@ export function buildNodeFromSplits( ) { const textDoc = doc as TextNode; const node = new TextNode({ - id_: idGenerator(i, textDoc), + id_: textDoc.id_ ?? idGenerator(i, textDoc), text: textChunk, embedding: textDoc.embedding, excludedEmbedMetadataKeys: [...textDoc.excludedEmbedMetadataKeys], diff --git a/packages/llamaindex/tests/indices/VectorStoreIndex.test.ts b/packages/llamaindex/tests/indices/VectorStoreIndex.test.ts index 9bca51af7..a807f8bed 100644 --- a/packages/llamaindex/tests/indices/VectorStoreIndex.test.ts +++ b/packages/llamaindex/tests/indices/VectorStoreIndex.test.ts @@ -27,7 +27,7 @@ describe("VectorStoreIndex", () => { runs: number = 2, ): Promise<Array<number>> => { const documents = [new Document({ text: "lorem ipsem", id_: "1" })]; - const entries = []; + const entries: number[] = []; for (let i = 0; i < runs; i++) { await VectorStoreIndex.fromDocuments(documents, { serviceContext, @@ -43,7 +43,7 @@ describe("VectorStoreIndex", () => { test("fromDocuments stores duplicates without a doc store strategy", async () => { const entries = await testStrategy(DocStoreStrategy.NONE); - expect(entries[0]! + 1).toBe(entries[1]); + expect(entries[0]).toBe(entries[1]); }); test("fromDocuments ignores duplicates with upserts doc store strategy", async () => { -- GitLab