From 965cfd291e885891fa8a5cbf33c1c52ba53285a7 Mon Sep 17 00:00:00 2001 From: Marcus Schiesser <mail@marcusschiesser.de> Date: Mon, 29 Jan 2024 17:31:09 +0700 Subject: [PATCH] fix: use pipeline instead of nodeparser (#471) --- packages/core/src/indices/BaseIndex.ts | 8 +++++--- packages/core/src/indices/vectorStore/VectorStoreIndex.ts | 6 ++++-- packages/core/src/ingestion/IngestionPipeline.ts | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/packages/core/src/indices/BaseIndex.ts b/packages/core/src/indices/BaseIndex.ts index c63b38919..db04a1285 100644 --- a/packages/core/src/indices/BaseIndex.ts +++ b/packages/core/src/indices/BaseIndex.ts @@ -2,6 +2,7 @@ import { BaseNode, Document, jsonToNode } from "../Node"; import { BaseRetriever } from "../Retriever"; import { ServiceContext } from "../ServiceContext"; import { randomUUID } from "../env"; +import { runTransformations } from "../ingestion"; import { StorageContext } from "../storage/StorageContext"; import { BaseDocumentStore } from "../storage/docStore/types"; import { BaseIndexStore } from "../storage/indexStore/types"; @@ -188,9 +189,10 @@ export abstract class BaseIndex<T> { * @param document */ async insert(document: Document) { - const nodes = this.serviceContext.nodeParser.getNodesFromDocuments([ - document, - ]); + const nodes = await runTransformations( + [document], + [this.serviceContext.nodeParser], + ); await this.insertNodes(nodes); this.docStore.setDocumentHash(document.id_, document.hash); } diff --git a/packages/core/src/indices/vectorStore/VectorStoreIndex.ts b/packages/core/src/indices/vectorStore/VectorStoreIndex.ts index 832d46216..2403e1b54 100644 --- a/packages/core/src/indices/vectorStore/VectorStoreIndex.ts +++ b/packages/core/src/indices/vectorStore/VectorStoreIndex.ts @@ -17,6 +17,7 @@ import { ClipEmbedding, MultiModalEmbedding, } from "../../embeddings"; +import { runTransformations } from "../../ingestion"; import { BaseNodePostprocessor } from "../../postprocessors"; import { BaseIndexStore, @@ -225,8 +226,9 @@ export class VectorStoreIndex extends BaseIndex<IndexDict> { if (args.logProgress) { console.log("Using node parser on documents..."); } - args.nodes = - args.serviceContext.nodeParser.getNodesFromDocuments(documents); + args.nodes = await runTransformations(documents, [ + args.serviceContext.nodeParser, + ]); if (args.logProgress) { console.log("Finished parsing documents."); } diff --git a/packages/core/src/ingestion/IngestionPipeline.ts b/packages/core/src/ingestion/IngestionPipeline.ts index 377b7b670..64e569023 100644 --- a/packages/core/src/ingestion/IngestionPipeline.ts +++ b/packages/core/src/ingestion/IngestionPipeline.ts @@ -19,7 +19,7 @@ export async function runTransformations( nodesToRun: BaseNode[], transformations: TransformComponent[], transformOptions: any = {}, - { inPlace = true, cache }: TransformRunArgs, + { inPlace = true, cache }: TransformRunArgs = {}, ): Promise<BaseNode[]> { let nodes = nodesToRun; if (!inPlace) { -- GitLab