From d10cca28fc2312ffa96a76888a0bd7b5469bccce Mon Sep 17 00:00:00 2001 From: Marcus Schiesser <mail@marcusschiesser.de> Date: Thu, 6 Jun 2024 10:37:20 +0200 Subject: [PATCH] chore: use FileReader interface when possible (#912) --- packages/core/src/readers/PDFReader.ts | 4 ++-- .../core/src/readers/SimpleDirectoryReader.edge.ts | 14 +++++++------- packages/core/src/readers/SimpleDirectoryReader.ts | 4 ++-- packages/core/src/readers/TextFileReader.ts | 4 ++-- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/packages/core/src/readers/PDFReader.ts b/packages/core/src/readers/PDFReader.ts index caa9580b9..bda8ab753 100644 --- a/packages/core/src/readers/PDFReader.ts +++ b/packages/core/src/readers/PDFReader.ts @@ -1,11 +1,11 @@ import { fs } from "@llamaindex/env"; import { Document } from "../Node.js"; -import type { BaseReader } from "./type.js"; +import type { FileReader } from "./type.js"; /** * Read the text of a PDF */ -export class PDFReader implements BaseReader { +export class PDFReader implements FileReader { async loadData(file: string): Promise<Document[]> { const content = await fs.readFile(file); const pages = await readPDF(content); diff --git a/packages/core/src/readers/SimpleDirectoryReader.edge.ts b/packages/core/src/readers/SimpleDirectoryReader.edge.ts index 5362391e4..589f45585 100644 --- a/packages/core/src/readers/SimpleDirectoryReader.edge.ts +++ b/packages/core/src/readers/SimpleDirectoryReader.edge.ts @@ -1,8 +1,8 @@ -import { fs, path } from "@llamaindex/env"; +import { path } from "@llamaindex/env"; import { Document, type Metadata } from "../Node.js"; import { walk } from "../storage/FileSystem.js"; import { TextFileReader } from "./TextFileReader.js"; -import type { BaseReader } from "./type.js"; +import type { BaseReader, FileReader } from "./type.js"; import pLimit from "./utils.js"; type ReaderCallback = ( @@ -20,13 +20,13 @@ enum ReaderStatus { export type SimpleDirectoryReaderLoadDataParams = { directoryPath: string; // Fallback Reader, defaults to TextFileReader - defaultReader?: BaseReader | null; + defaultReader?: FileReader | null; // Map file extensions individually to readers - fileExtToReader?: Record<string, BaseReader>; + fileExtToReader?: Record<string, FileReader>; // Number of workers, defaults to 1. Must be between 1 and 9. numWorkers?: number; // Overrides reader for all file extensions - overrideReader?: BaseReader; + overrideReader?: FileReader; }; type ProcessFileParams = Omit< @@ -115,7 +115,7 @@ export class SimpleDirectoryReader implements BaseReader { return []; } - let reader: BaseReader; + let reader: FileReader; if (params.overrideReader) { reader = params.overrideReader; @@ -135,7 +135,7 @@ export class SimpleDirectoryReader implements BaseReader { return []; } - const fileDocs = await reader.loadData(filePath, fs); + const fileDocs = await reader.loadData(filePath); fileDocs.forEach(addMetaData(filePath)); // Observer can still cancel addition of the resulting docs from this file diff --git a/packages/core/src/readers/SimpleDirectoryReader.ts b/packages/core/src/readers/SimpleDirectoryReader.ts index 70d49ef3b..10b3b9bdb 100644 --- a/packages/core/src/readers/SimpleDirectoryReader.ts +++ b/packages/core/src/readers/SimpleDirectoryReader.ts @@ -10,9 +10,9 @@ import { type SimpleDirectoryReaderLoadDataParams, } from "./SimpleDirectoryReader.edge.js"; import { TextFileReader } from "./TextFileReader.js"; -import type { BaseReader } from "./type.js"; +import type { FileReader } from "./type.js"; -export const FILE_EXT_TO_READER: Record<string, BaseReader> = { +export const FILE_EXT_TO_READER: Record<string, FileReader> = { txt: new TextFileReader(), pdf: new PDFReader(), csv: new PapaCSVReader(), diff --git a/packages/core/src/readers/TextFileReader.ts b/packages/core/src/readers/TextFileReader.ts index 7d51ac53b..0b91e69bf 100644 --- a/packages/core/src/readers/TextFileReader.ts +++ b/packages/core/src/readers/TextFileReader.ts @@ -1,12 +1,12 @@ import { fs } from "@llamaindex/env"; import { Document } from "../Node.js"; -import type { BaseReader } from "./type.js"; +import type { FileReader } from "./type.js"; /** * Read a .txt file */ -export class TextFileReader implements BaseReader { +export class TextFileReader implements FileReader { async loadData(file: string): Promise<Document[]> { const dataBuffer = await fs.readFile(file, "utf-8"); return [new Document({ text: dataBuffer, id_: file })]; -- GitLab