diff --git a/packages/core/src/nodeParsers/utils.ts b/packages/core/src/nodeParsers/utils.ts index 96765288aef81fabb6900b77fa5196e52f0bf6b7..c6e325ba7ef7ef74b1ddc7b2eab0f3af45191694 100644 --- a/packages/core/src/nodeParsers/utils.ts +++ b/packages/core/src/nodeParsers/utils.ts @@ -53,6 +53,10 @@ export function getNodesFromDocument( const node = new TextNode({ text: textSplit, metadata: includeMetadata ? _.cloneDeep(document.metadata) : {}, + excludedEmbedMetadataKeys: _.cloneDeep( + document.excludedEmbedMetadataKeys, + ), + excludedLlmMetadataKeys: _.cloneDeep(document.excludedLlmMetadataKeys), }); node.relationships[NodeRelationship.SOURCE] = document.asRelatedNodeInfo(); nodes.push(node); diff --git a/packages/core/src/tests/nodeParsers/SimpleNodeParser.test.ts b/packages/core/src/tests/nodeParsers/SimpleNodeParser.test.ts new file mode 100644 index 0000000000000000000000000000000000000000..59eaad92fd6e16418140c60a911861a147eb280c --- /dev/null +++ b/packages/core/src/tests/nodeParsers/SimpleNodeParser.test.ts @@ -0,0 +1,39 @@ +import { Document } from "../../Node"; +import { SimpleNodeParser } from "../../nodeParsers"; + +describe("SimpleNodeParser", () => { + let simpleNodeParser: SimpleNodeParser; + + beforeEach(() => { + simpleNodeParser = new SimpleNodeParser({ + chunkSize: 1024, + chunkOverlap: 20, + }); + }); + + test("getNodesFromDocuments should return child nodes with equal but not the same metadata", () => { + const doc = new Document({ + text: "Hello. Cat Mouse. Dog.", + metadata: { animals: true }, + excludedLlmMetadataKeys: ["animals"], + excludedEmbedMetadataKeys: ["animals"], + }); + const result = simpleNodeParser.getNodesFromDocuments([doc]); + expect(result.length).toEqual(1); + const node = result[0]; + // check not the same object + expect(node.metadata).not.toBe(doc.metadata); + expect(node.excludedLlmMetadataKeys).not.toBe(doc.excludedLlmMetadataKeys); + expect(node.excludedEmbedMetadataKeys).not.toBe( + doc.excludedEmbedMetadataKeys, + ); + // but the same content + expect(node.metadata).toEqual(doc.metadata); + expect(node.excludedLlmMetadataKeys).toEqual(doc.excludedLlmMetadataKeys); + expect(node.excludedEmbedMetadataKeys).toEqual( + doc.excludedEmbedMetadataKeys, + ); + // check relationship + expect(node.sourceNode?.nodeId).toBe(doc.id_); + }); +});