From 965cfd291e885891fa8a5cbf33c1c52ba53285a7 Mon Sep 17 00:00:00 2001
From: Marcus Schiesser <mail@marcusschiesser.de>
Date: Mon, 29 Jan 2024 17:31:09 +0700
Subject: [PATCH] fix: use pipeline instead of nodeparser (#471)

---
 packages/core/src/indices/BaseIndex.ts                    | 8 +++++---
 packages/core/src/indices/vectorStore/VectorStoreIndex.ts | 6 ++++--
 packages/core/src/ingestion/IngestionPipeline.ts          | 2 +-
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/packages/core/src/indices/BaseIndex.ts b/packages/core/src/indices/BaseIndex.ts
index c63b38919..db04a1285 100644
--- a/packages/core/src/indices/BaseIndex.ts
+++ b/packages/core/src/indices/BaseIndex.ts
@@ -2,6 +2,7 @@ import { BaseNode, Document, jsonToNode } from "../Node";
 import { BaseRetriever } from "../Retriever";
 import { ServiceContext } from "../ServiceContext";
 import { randomUUID } from "../env";
+import { runTransformations } from "../ingestion";
 import { StorageContext } from "../storage/StorageContext";
 import { BaseDocumentStore } from "../storage/docStore/types";
 import { BaseIndexStore } from "../storage/indexStore/types";
@@ -188,9 +189,10 @@ export abstract class BaseIndex<T> {
    * @param document
    */
   async insert(document: Document) {
-    const nodes = this.serviceContext.nodeParser.getNodesFromDocuments([
-      document,
-    ]);
+    const nodes = await runTransformations(
+      [document],
+      [this.serviceContext.nodeParser],
+    );
     await this.insertNodes(nodes);
     this.docStore.setDocumentHash(document.id_, document.hash);
   }
diff --git a/packages/core/src/indices/vectorStore/VectorStoreIndex.ts b/packages/core/src/indices/vectorStore/VectorStoreIndex.ts
index 832d46216..2403e1b54 100644
--- a/packages/core/src/indices/vectorStore/VectorStoreIndex.ts
+++ b/packages/core/src/indices/vectorStore/VectorStoreIndex.ts
@@ -17,6 +17,7 @@ import {
   ClipEmbedding,
   MultiModalEmbedding,
 } from "../../embeddings";
+import { runTransformations } from "../../ingestion";
 import { BaseNodePostprocessor } from "../../postprocessors";
 import {
   BaseIndexStore,
@@ -225,8 +226,9 @@ export class VectorStoreIndex extends BaseIndex<IndexDict> {
     if (args.logProgress) {
       console.log("Using node parser on documents...");
     }
-    args.nodes =
-      args.serviceContext.nodeParser.getNodesFromDocuments(documents);
+    args.nodes = await runTransformations(documents, [
+      args.serviceContext.nodeParser,
+    ]);
     if (args.logProgress) {
       console.log("Finished parsing documents.");
     }
diff --git a/packages/core/src/ingestion/IngestionPipeline.ts b/packages/core/src/ingestion/IngestionPipeline.ts
index 377b7b670..64e569023 100644
--- a/packages/core/src/ingestion/IngestionPipeline.ts
+++ b/packages/core/src/ingestion/IngestionPipeline.ts
@@ -19,7 +19,7 @@ export async function runTransformations(
   nodesToRun: BaseNode[],
   transformations: TransformComponent[],
   transformOptions: any = {},
-  { inPlace = true, cache }: TransformRunArgs,
+  { inPlace = true, cache }: TransformRunArgs = {},
 ): Promise<BaseNode[]> {
   let nodes = nodesToRun;
   if (!inPlace) {
-- 
GitLab