From 83f2848d479befe4090aa980bef3e5fc50feb55a Mon Sep 17 00:00:00 2001 From: Thuc Pham <51660321+thucpn@users.noreply.github.com> Date: Wed, 9 Oct 2024 12:34:46 +0700 Subject: [PATCH] feat: add test split nodes with UUID (#1315) --- .../tests/node-parser/text-splitter.test.ts | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/packages/core/tests/node-parser/text-splitter.test.ts b/packages/core/tests/node-parser/text-splitter.test.ts index 1ccf9a93a..531896492 100644 --- a/packages/core/tests/node-parser/text-splitter.test.ts +++ b/packages/core/tests/node-parser/text-splitter.test.ts @@ -2,6 +2,7 @@ import { SentenceSplitter, splitBySentenceTokenizer, } from "@llamaindex/core/node-parser"; +import { Document } from "@llamaindex/core/schema"; import { describe, expect, test } from "vitest"; describe("sentence splitter", () => { @@ -115,4 +116,26 @@ describe("sentence splitter", () => { const split = splitBySentenceTokenizer(); expect(split(text)).toEqual([text]); }); + + test("split nodes with UUID IDs and correct relationships", () => { + const UUID_REGEX = + /^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i; + const sentenceSplitter = new SentenceSplitter(); + const docId = "test-doc-id"; + const doc = new Document({ + id_: docId, + text: "This is a test sentence. This is another test sentence.", + }); + const nodes = sentenceSplitter.getNodesFromDocuments([doc]); + nodes.forEach((node) => { + // test node id should match uuid regex + expect(node.id_).toMatch(UUID_REGEX); + + // test source reference to the doc ID + const source = node.relationships?.SOURCE; + expect(source).toBeDefined(); + expect(source).toHaveProperty("nodeId"); + expect((source as { nodeId: string }).nodeId).toEqual(docId); + }); + }); }); -- GitLab