From b68d8697dfa9ee33d8734a7df9a05f5d38aa38f9 Mon Sep 17 00:00:00 2001
From: Marcus Schiesser <mail@marcusschiesser.de>
Date: Mon, 8 Jan 2024 16:48:44 +0700
Subject: [PATCH] fix: add missing metadata after node parsing (#317)

---
 packages/core/src/nodeParsers/utils.ts        |  4 ++
 .../nodeParsers/SimpleNodeParser.test.ts      | 39 +++++++++++++++++++
 2 files changed, 43 insertions(+)
 create mode 100644 packages/core/src/tests/nodeParsers/SimpleNodeParser.test.ts

diff --git a/packages/core/src/nodeParsers/utils.ts b/packages/core/src/nodeParsers/utils.ts
index 96765288a..c6e325ba7 100644
--- a/packages/core/src/nodeParsers/utils.ts
+++ b/packages/core/src/nodeParsers/utils.ts
@@ -53,6 +53,10 @@ export function getNodesFromDocument(
     const node = new TextNode({
       text: textSplit,
       metadata: includeMetadata ? _.cloneDeep(document.metadata) : {},
+      excludedEmbedMetadataKeys: _.cloneDeep(
+        document.excludedEmbedMetadataKeys,
+      ),
+      excludedLlmMetadataKeys: _.cloneDeep(document.excludedLlmMetadataKeys),
     });
     node.relationships[NodeRelationship.SOURCE] = document.asRelatedNodeInfo();
     nodes.push(node);
diff --git a/packages/core/src/tests/nodeParsers/SimpleNodeParser.test.ts b/packages/core/src/tests/nodeParsers/SimpleNodeParser.test.ts
new file mode 100644
index 000000000..59eaad92f
--- /dev/null
+++ b/packages/core/src/tests/nodeParsers/SimpleNodeParser.test.ts
@@ -0,0 +1,39 @@
+import { Document } from "../../Node";
+import { SimpleNodeParser } from "../../nodeParsers";
+
+describe("SimpleNodeParser", () => {
+  let simpleNodeParser: SimpleNodeParser;
+
+  beforeEach(() => {
+    simpleNodeParser = new SimpleNodeParser({
+      chunkSize: 1024,
+      chunkOverlap: 20,
+    });
+  });
+
+  test("getNodesFromDocuments should return child nodes with equal but not the same metadata", () => {
+    const doc = new Document({
+      text: "Hello. Cat Mouse. Dog.",
+      metadata: { animals: true },
+      excludedLlmMetadataKeys: ["animals"],
+      excludedEmbedMetadataKeys: ["animals"],
+    });
+    const result = simpleNodeParser.getNodesFromDocuments([doc]);
+    expect(result.length).toEqual(1);
+    const node = result[0];
+    // check not the same object
+    expect(node.metadata).not.toBe(doc.metadata);
+    expect(node.excludedLlmMetadataKeys).not.toBe(doc.excludedLlmMetadataKeys);
+    expect(node.excludedEmbedMetadataKeys).not.toBe(
+      doc.excludedEmbedMetadataKeys,
+    );
+    // but the same content
+    expect(node.metadata).toEqual(doc.metadata);
+    expect(node.excludedLlmMetadataKeys).toEqual(doc.excludedLlmMetadataKeys);
+    expect(node.excludedEmbedMetadataKeys).toEqual(
+      doc.excludedEmbedMetadataKeys,
+    );
+    // check relationship
+    expect(node.sourceNode?.nodeId).toBe(doc.id_);
+  });
+});
-- 
GitLab