From 88d42ccd8d476bad0d87e1e8a77bb80a9d16ee7d Mon Sep 17 00:00:00 2001 From: Marcus Schiesser <mail@marcusschiesser.de> Date: Tue, 2 Jan 2024 16:30:35 +0700 Subject: [PATCH] refactor: move node parsers to own package --- packages/core/src/ServiceContext.ts | 4 +-- packages/core/src/index.ts | 2 +- .../SimpleNodeParser.ts} | 29 +++++-------------- packages/core/src/nodeParsers/index.ts | 2 ++ packages/core/src/nodeParsers/types.ts | 13 +++++++++ 5 files changed, 26 insertions(+), 24 deletions(-) rename packages/core/src/{NodeParser.ts => nodeParsers/SimpleNodeParser.ts} (82%) create mode 100644 packages/core/src/nodeParsers/index.ts create mode 100644 packages/core/src/nodeParsers/types.ts diff --git a/packages/core/src/ServiceContext.ts b/packages/core/src/ServiceContext.ts index e1490a199..7bdb2c664 100644 --- a/packages/core/src/ServiceContext.ts +++ b/packages/core/src/ServiceContext.ts @@ -1,7 +1,7 @@ import { CallbackManager } from "./callbacks/CallbackManager"; import { BaseEmbedding, OpenAIEmbedding } from "./embeddings"; -import { LLM, OpenAI } from "./llm/LLM"; -import { NodeParser, SimpleNodeParser } from "./NodeParser"; +import { LLM, OpenAI } from "./llm"; +import { NodeParser, SimpleNodeParser } from "./nodeParsers"; import { PromptHelper } from "./PromptHelper"; /** diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index e170e99d8..e995ca015 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -2,7 +2,6 @@ export * from "./ChatEngine"; export * from "./ChatHistory"; export * from "./GlobalsHelper"; export * from "./Node"; -export * from "./NodeParser"; export * from "./OutputParser"; export * from "./Prompt"; export * from "./PromptHelper"; @@ -18,6 +17,7 @@ export * from "./constants"; export * from "./embeddings"; export * from "./indices"; export * from "./llm"; +export * from "./nodeParsers"; export * from "./readers/AssemblyAI"; export * from "./readers/CSVReader"; export * from "./readers/HTMLReader"; diff --git a/packages/core/src/NodeParser.ts b/packages/core/src/nodeParsers/SimpleNodeParser.ts similarity index 82% rename from packages/core/src/NodeParser.ts rename to packages/core/src/nodeParsers/SimpleNodeParser.ts index d39aae5ae..8a1738f57 100644 --- a/packages/core/src/NodeParser.ts +++ b/packages/core/src/nodeParsers/SimpleNodeParser.ts @@ -4,9 +4,10 @@ import { ImageDocument, NodeRelationship, TextNode, -} from "./Node"; -import { SentenceSplitter } from "./TextSplitter"; -import { DEFAULT_CHUNK_OVERLAP, DEFAULT_CHUNK_SIZE } from "./constants"; +} from "../Node"; +import { SentenceSplitter } from "../TextSplitter"; +import { DEFAULT_CHUNK_OVERLAP, DEFAULT_CHUNK_SIZE } from "../constants"; +import { NodeParser } from "./types"; /** * Splits the text of a document into smaller parts. @@ -14,25 +15,23 @@ import { DEFAULT_CHUNK_OVERLAP, DEFAULT_CHUNK_SIZE } from "./constants"; * @param textSplitter - The text splitter to use. * @returns An array of text splits. */ -export function getTextSplitsFromDocument( +function getTextSplitsFromDocument( document: Document, textSplitter: SentenceSplitter, ) { const text = document.getText(); - const splits = textSplitter.splitText(text); - - return splits; + return textSplitter.splitText(text); } /** * Generates an array of nodes from a document. - * @param document - The document to generate nodes from. + * @param doc * @param textSplitter - The text splitter to use. * @param includeMetadata - Whether to include metadata in the nodes. * @param includePrevNextRel - Whether to include previous and next relationships in the nodes. * @returns An array of nodes. */ -export function getNodesFromDocument( +function getNodesFromDocument( doc: BaseNode, textSplitter: SentenceSplitter, includeMetadata: boolean = true, @@ -74,18 +73,6 @@ export function getNodesFromDocument( return nodes; } -/** - * A NodeParser generates Nodes from Documents - */ -export interface NodeParser { - /** - * Generates an array of nodes from an array of documents. - * @param documents - The documents to generate nodes from. - * @returns An array of nodes. - */ - getNodesFromDocuments(documents: BaseNode[]): BaseNode[]; -} - /** * SimpleNodeParser is the default NodeParser. It splits documents into TextNodes using a splitter, by default SentenceSplitter */ diff --git a/packages/core/src/nodeParsers/index.ts b/packages/core/src/nodeParsers/index.ts new file mode 100644 index 000000000..4094a9767 --- /dev/null +++ b/packages/core/src/nodeParsers/index.ts @@ -0,0 +1,2 @@ +export * from "./SimpleNodeParser"; +export * from "./types"; diff --git a/packages/core/src/nodeParsers/types.ts b/packages/core/src/nodeParsers/types.ts new file mode 100644 index 000000000..23a4a6f48 --- /dev/null +++ b/packages/core/src/nodeParsers/types.ts @@ -0,0 +1,13 @@ +import { BaseNode } from "../Node"; + +/** + * A NodeParser generates Nodes from Documents + */ +export interface NodeParser { + /** + * Generates an array of nodes from an array of documents. + * @param documents - The documents to generate nodes from. + * @returns An array of nodes. + */ + getNodesFromDocuments(documents: BaseNode[]): BaseNode[]; +} -- GitLab