From e938a4d154edc1b63d86c87cefe2bb8d5456b4d8 Mon Sep 17 00:00:00 2001 From: yisding <yi.s.ding@gmail.com> Date: Mon, 30 Oct 2023 13:51:03 -0700 Subject: [PATCH] minor changes --- apps/simple/directory.ts | 11 ++- .../core/src/readers/SimpleDirectoryReader.ts | 73 +++++++++---------- 2 files changed, 43 insertions(+), 41 deletions(-) diff --git a/apps/simple/directory.ts b/apps/simple/directory.ts index cf71bc1c4..bc4dd592b 100644 --- a/apps/simple/directory.ts +++ b/apps/simple/directory.ts @@ -1,8 +1,13 @@ import { SimpleDirectoryReader } from "llamaindex"; -function callback(category: string, name: string, status: any, message?: string): boolean { +function callback( + category: string, + name: string, + status: any, + message?: string, +): boolean { console.log(category, name, status, message); - if (name.endsWith('.pdf')) { + if (name.endsWith(".pdf")) { console.log("I DON'T WANT PDF FILES!"); return false; } @@ -12,7 +17,7 @@ function callback(category: string, name: string, status: any, message?: string) async function main() { // Load page const reader = new SimpleDirectoryReader(callback); - const params = { directoryPath: "./data"}; + const params = { directoryPath: "./data" }; await reader.loadData(params); } diff --git a/packages/core/src/readers/SimpleDirectoryReader.ts b/packages/core/src/readers/SimpleDirectoryReader.ts index 1abb1e125..9a2e30aa1 100644 --- a/packages/core/src/readers/SimpleDirectoryReader.ts +++ b/packages/core/src/readers/SimpleDirectoryReader.ts @@ -1,19 +1,24 @@ import _ from "lodash"; import { Document } from "../Node"; -import { DEFAULT_FS } from "../storage/constants"; import { CompleteFileSystem, walk } from "../storage/FileSystem"; -import { BaseReader } from "./base"; +import { DEFAULT_FS } from "../storage/constants"; import { PapaCSVReader } from "./CSVReader"; import { DocxReader } from "./DocxReader"; import { HTMLReader } from "./HTMLReader"; import { MarkdownReader } from "./MarkdownReader"; import { PDFReader } from "./PDFReader"; +import { BaseReader } from "./base"; -type ReaderCallback = (category: string, name: string, status: ReaderStatus, message?: string) => boolean; +type ReaderCallback = ( + category: "file" | "directory", + name: string, + status: ReaderStatus, + message?: string, +) => boolean; enum ReaderStatus { - Started = 0, - Completed, - Error + STARTED = 0, + COMPLETE, + ERROR, } /** @@ -47,7 +52,7 @@ export type SimpleDirectoryReaderLoadDataProps = { }; /** - * Read all of the documents in a directory. + * Read all of the documents in a directory. * By default, supports the list of file types * in the FILE_EXIT_TO_READER map. */ @@ -60,12 +65,11 @@ export class SimpleDirectoryReader implements BaseReader { defaultReader = new TextFileReader(), fileExtToReader = FILE_EXT_TO_READER, }: SimpleDirectoryReaderLoadDataProps): Promise<Document[]> { - // Observer can decide to skip the directory - if (this.doObserverCheck( - 'Directory', directoryPath, ReaderStatus.Started - ) == false) { - return Promise.reject('Cancelled'); + if ( + !this.doObserverCheck("directory", directoryPath, ReaderStatus.STARTED) + ) { + return []; } let docs: Document[] = []; @@ -74,13 +78,11 @@ export class SimpleDirectoryReader implements BaseReader { const fileExt = _.last(filePath.split(".")) || ""; // Observer can decide to skip each file - if (this.doObserverCheck( - 'File', filePath, ReaderStatus.Started - ) == false) { + if (!this.doObserverCheck("file", filePath, ReaderStatus.STARTED)) { // Skip this file continue; - } - + } + let reader = null; if (fileExt in fileExtToReader) { @@ -92,50 +94,45 @@ export class SimpleDirectoryReader implements BaseReader { console.warn(msg); // In an error condition, observer's false cancels the whole process. - if (this.doObserverCheck( - 'File', filePath, ReaderStatus.Error, msg - ) == false) { - return this.getCancelled(); + if ( + !this.doObserverCheck("file", filePath, ReaderStatus.ERROR, msg) + ) { + return []; } - + continue; } const fileDocs = await reader.loadData(filePath, fs); // Observer can still cancel addition of the resulting docs from this file - if (this.doObserverCheck( - 'File', filePath, ReaderStatus.Completed - )) { + if (this.doObserverCheck("file", filePath, ReaderStatus.COMPLETE)) { docs.push(...fileDocs); - } + } } catch (e) { const msg = `Error reading file ${filePath}: ${e}`; console.error(msg); // In an error condition, observer's false cancels the whole process. - if (this.doObserverCheck( - 'File', filePath, ReaderStatus.Error, msg - ) == false) { - return this.getCancelled(); + if (!this.doObserverCheck("file", filePath, ReaderStatus.ERROR, msg)) { + return []; } } } // After successful import of all files, directory completion // is only a notification for observer, cannot be cancelled. - this.doObserverCheck( - 'Directory', directoryPath, ReaderStatus.Completed - ); + this.doObserverCheck("directory", directoryPath, ReaderStatus.COMPLETE); return docs; } - private getCancelled() { - return Promise.reject('Cancelled'); - } - - private doObserverCheck(category: string, name: string, status: ReaderStatus, message?: string): boolean { + private doObserverCheck( + category: "file" | "directory", + name: string, + status: ReaderStatus, + message?: string, + ): boolean { if (this.observer) { return this.observer(category, name, status, message); } -- GitLab