From b68d8697dfa9ee33d8734a7df9a05f5d38aa38f9 Mon Sep 17 00:00:00 2001 From: Marcus Schiesser <mail@marcusschiesser.de> Date: Mon, 8 Jan 2024 16:48:44 +0700 Subject: [PATCH] fix: add missing metadata after node parsing (#317) --- packages/core/src/nodeParsers/utils.ts | 4 ++ .../nodeParsers/SimpleNodeParser.test.ts | 39 +++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 packages/core/src/tests/nodeParsers/SimpleNodeParser.test.ts diff --git a/packages/core/src/nodeParsers/utils.ts b/packages/core/src/nodeParsers/utils.ts index 96765288a..c6e325ba7 100644 --- a/packages/core/src/nodeParsers/utils.ts +++ b/packages/core/src/nodeParsers/utils.ts @@ -53,6 +53,10 @@ export function getNodesFromDocument( const node = new TextNode({ text: textSplit, metadata: includeMetadata ? _.cloneDeep(document.metadata) : {}, + excludedEmbedMetadataKeys: _.cloneDeep( + document.excludedEmbedMetadataKeys, + ), + excludedLlmMetadataKeys: _.cloneDeep(document.excludedLlmMetadataKeys), }); node.relationships[NodeRelationship.SOURCE] = document.asRelatedNodeInfo(); nodes.push(node); diff --git a/packages/core/src/tests/nodeParsers/SimpleNodeParser.test.ts b/packages/core/src/tests/nodeParsers/SimpleNodeParser.test.ts new file mode 100644 index 000000000..59eaad92f --- /dev/null +++ b/packages/core/src/tests/nodeParsers/SimpleNodeParser.test.ts @@ -0,0 +1,39 @@ +import { Document } from "../../Node"; +import { SimpleNodeParser } from "../../nodeParsers"; + +describe("SimpleNodeParser", () => { + let simpleNodeParser: SimpleNodeParser; + + beforeEach(() => { + simpleNodeParser = new SimpleNodeParser({ + chunkSize: 1024, + chunkOverlap: 20, + }); + }); + + test("getNodesFromDocuments should return child nodes with equal but not the same metadata", () => { + const doc = new Document({ + text: "Hello. Cat Mouse. Dog.", + metadata: { animals: true }, + excludedLlmMetadataKeys: ["animals"], + excludedEmbedMetadataKeys: ["animals"], + }); + const result = simpleNodeParser.getNodesFromDocuments([doc]); + expect(result.length).toEqual(1); + const node = result[0]; + // check not the same object + expect(node.metadata).not.toBe(doc.metadata); + expect(node.excludedLlmMetadataKeys).not.toBe(doc.excludedLlmMetadataKeys); + expect(node.excludedEmbedMetadataKeys).not.toBe( + doc.excludedEmbedMetadataKeys, + ); + // but the same content + expect(node.metadata).toEqual(doc.metadata); + expect(node.excludedLlmMetadataKeys).toEqual(doc.excludedLlmMetadataKeys); + expect(node.excludedEmbedMetadataKeys).toEqual( + doc.excludedEmbedMetadataKeys, + ); + // check relationship + expect(node.sourceNode?.nodeId).toBe(doc.id_); + }); +}); -- GitLab