diff --git a/.changeset/forty-swans-tan.md b/.changeset/forty-swans-tan.md new file mode 100644 index 0000000000000000000000000000000000000000..4b8449a40dff61c586417b89bef88705000c703b --- /dev/null +++ b/.changeset/forty-swans-tan.md @@ -0,0 +1,5 @@ +--- +"@llamaindex/core": patch +--- + +Fix #1278: resolved issue where the id\_ was not correctly passed as the id when creating a TextNode. As a result, the upsert operation to the vector database was using a generated ID instead of the provided document ID, if available. diff --git a/packages/core/src/schema/node.ts b/packages/core/src/schema/node.ts index 459dbfb6680f5e1119764182aa3b3efb79271fe3..a79226db115ffd8869e578aed75e498745186a6d 100644 --- a/packages/core/src/schema/node.ts +++ b/packages/core/src/schema/node.ts @@ -479,7 +479,7 @@ export function buildNodeFromSplits( ) { const imageDoc = doc as ImageNode; const imageNode = new ImageNode({ - id_: idGenerator(i, imageDoc), + id_: imageDoc.id_ ?? idGenerator(i, imageDoc), text: textChunk, image: imageDoc.image, embedding: imageDoc.embedding, @@ -496,7 +496,7 @@ export function buildNodeFromSplits( ) { const textDoc = doc as TextNode; const node = new TextNode({ - id_: idGenerator(i, textDoc), + id_: textDoc.id_ ?? idGenerator(i, textDoc), text: textChunk, embedding: textDoc.embedding, excludedEmbedMetadataKeys: [...textDoc.excludedEmbedMetadataKeys], diff --git a/packages/llamaindex/tests/indices/VectorStoreIndex.test.ts b/packages/llamaindex/tests/indices/VectorStoreIndex.test.ts index 9bca51af7e9dfaae84789d8bee9618072bfec600..a807f8bed63cc2d6b596cdb1f2f3532976cbccac 100644 --- a/packages/llamaindex/tests/indices/VectorStoreIndex.test.ts +++ b/packages/llamaindex/tests/indices/VectorStoreIndex.test.ts @@ -27,7 +27,7 @@ describe("VectorStoreIndex", () => { runs: number = 2, ): Promise<Array<number>> => { const documents = [new Document({ text: "lorem ipsem", id_: "1" })]; - const entries = []; + const entries: number[] = []; for (let i = 0; i < runs; i++) { await VectorStoreIndex.fromDocuments(documents, { serviceContext, @@ -43,7 +43,7 @@ describe("VectorStoreIndex", () => { test("fromDocuments stores duplicates without a doc store strategy", async () => { const entries = await testStrategy(DocStoreStrategy.NONE); - expect(entries[0]! + 1).toBe(entries[1]); + expect(entries[0]).toBe(entries[1]); }); test("fromDocuments ignores duplicates with upserts doc store strategy", async () => {