From a68053ca4e24a0f7c547aed2dc9b60bbe18c6acf Mon Sep 17 00:00:00 2001 From: Alex Yang <himself65@outlook.com> Date: Wed, 31 Jan 2024 21:10:41 -0600 Subject: [PATCH] fix: remove as any type (#494) --- packages/core/src/env/index.ts | 16 ++++++++- packages/core/src/readers/CSVReader.ts | 2 +- packages/core/src/readers/DocxReader.ts | 2 +- packages/core/src/readers/HTMLReader.ts | 2 +- packages/core/src/readers/MarkdownReader.ts | 2 +- packages/core/src/readers/PDFReader.ts | 7 +--- .../core/src/readers/SimpleDirectoryReader.ts | 2 +- packages/core/src/storage/FileSystem.ts | 35 +++++++++++-------- .../core/src/tests/GenericFileSystem.test.ts | 17 +++++---- 9 files changed, 52 insertions(+), 33 deletions(-) diff --git a/packages/core/src/env/index.ts b/packages/core/src/env/index.ts index a88c1b3af..286efe683 100644 --- a/packages/core/src/env/index.ts +++ b/packages/core/src/env/index.ts @@ -18,6 +18,20 @@ export function createSHA256(): SHA256 { }; } -export const defaultFS: CompleteFileSystem = fs; +export const defaultFS: CompleteFileSystem = { + writeFile: function (path: string, content: string) { + return fs.writeFile(path, content, "utf-8"); + }, + readRawFile(path: string): Promise<Buffer> { + return fs.readFile(path); + }, + readFile: function (path: string) { + return fs.readFile(path, "utf-8"); + }, + access: fs.access, + mkdir: fs.mkdir, + readdir: fs.readdir, + stat: fs.stat, +}; export { EOL, ok, path, randomUUID }; diff --git a/packages/core/src/readers/CSVReader.ts b/packages/core/src/readers/CSVReader.ts index e92eddaf3..ca4e196b6 100644 --- a/packages/core/src/readers/CSVReader.ts +++ b/packages/core/src/readers/CSVReader.ts @@ -43,7 +43,7 @@ export class PapaCSVReader implements BaseReader { file: string, fs: GenericFileSystem = defaultFS, ): Promise<Document[]> { - const fileContent: string = await fs.readFile(file, "utf-8"); + const fileContent = await fs.readFile(file); const result = Papa.parse(fileContent, this.papaConfig); const textList = result.data.map((row: any) => { // Compatible with header row mode diff --git a/packages/core/src/readers/DocxReader.ts b/packages/core/src/readers/DocxReader.ts index e394b712d..2fb5bd644 100644 --- a/packages/core/src/readers/DocxReader.ts +++ b/packages/core/src/readers/DocxReader.ts @@ -10,7 +10,7 @@ export class DocxReader implements BaseReader { file: string, fs: GenericFileSystem = defaultFS, ): Promise<Document[]> { - const dataBuffer = (await fs.readFile(file)) as any; + const dataBuffer = await fs.readRawFile(file); const { value } = await mammoth.extractRawText({ buffer: dataBuffer }); return [new Document({ text: value, id_: file })]; } diff --git a/packages/core/src/readers/HTMLReader.ts b/packages/core/src/readers/HTMLReader.ts index f2fa2be34..806c18193 100644 --- a/packages/core/src/readers/HTMLReader.ts +++ b/packages/core/src/readers/HTMLReader.ts @@ -22,7 +22,7 @@ export class HTMLReader implements BaseReader { file: string, fs: GenericFileSystem = defaultFS, ): Promise<Document[]> { - const dataBuffer = await fs.readFile(file, "utf-8"); + const dataBuffer = await fs.readFile(file); const htmlOptions = this.getOptions(); const content = await this.parseContent(dataBuffer, htmlOptions); return [new Document({ text: content, id_: file })]; diff --git a/packages/core/src/readers/MarkdownReader.ts b/packages/core/src/readers/MarkdownReader.ts index fef040d42..44fc70eba 100644 --- a/packages/core/src/readers/MarkdownReader.ts +++ b/packages/core/src/readers/MarkdownReader.ts @@ -92,7 +92,7 @@ export class MarkdownReader implements BaseReader { file: string, fs: GenericFileSystem = defaultFS, ): Promise<Document[]> { - const content = await fs.readFile(file, { encoding: "utf-8" }); + const content = await fs.readFile(file); const tups = this.parseTups(content); const results: Document[] = []; for (const [header, value] of tups) { diff --git a/packages/core/src/readers/PDFReader.ts b/packages/core/src/readers/PDFReader.ts index 63067d006..fd478bd89 100644 --- a/packages/core/src/readers/PDFReader.ts +++ b/packages/core/src/readers/PDFReader.ts @@ -11,12 +11,7 @@ export class PDFReader implements BaseReader { file: string, fs: GenericFileSystem = defaultFS, ): Promise<Document[]> { - // todo: fix fs type - const content = (await fs.readFile(file)) as unknown; - if (!(content instanceof Buffer)) { - console.warn(`PDF File ${file} can only be loaded using the Node FS`); - return []; - } + const content = await fs.readRawFile(file); const text = await readPDF(content); return text.map((text) => { const sha256 = createSHA256(); diff --git a/packages/core/src/readers/SimpleDirectoryReader.ts b/packages/core/src/readers/SimpleDirectoryReader.ts index 2dc053f34..6dcb4d688 100644 --- a/packages/core/src/readers/SimpleDirectoryReader.ts +++ b/packages/core/src/readers/SimpleDirectoryReader.ts @@ -30,7 +30,7 @@ export class TextFileReader implements BaseReader { file: string, fs: CompleteFileSystem = defaultFS, ): Promise<Document[]> { - const dataBuffer = await fs.readFile(file, "utf-8"); + const dataBuffer = await fs.readFile(file); return [new Document({ text: dataBuffer, id_: file })]; } } diff --git a/packages/core/src/storage/FileSystem.ts b/packages/core/src/storage/FileSystem.ts index e32335283..cf58573da 100644 --- a/packages/core/src/storage/FileSystem.ts +++ b/packages/core/src/storage/FileSystem.ts @@ -1,5 +1,4 @@ import _ from "lodash"; -import type nodeFS from "node:fs/promises"; /** * A filesystem interface that is meant to be compatible with @@ -8,20 +7,23 @@ import type nodeFS from "node:fs/promises"; * browsers. */ export type GenericFileSystem = { - writeFile( - path: string, - content: string, - options?: Parameters<typeof nodeFS.writeFile>[2], - ): Promise<void>; - readFile( - path: string, - options?: Parameters<typeof nodeFS.readFile>[1], - ): Promise<string>; + writeFile(path: string, content: string): Promise<void>; + /** + * Reads a file and returns its content as a raw buffer. + */ + readRawFile(path: string): Promise<Buffer>; + /** + * Reads a file and returns its content as an utf-8 string. + */ + readFile(path: string): Promise<string>; access(path: string): Promise<void>; mkdir( path: string, - options?: Parameters<typeof nodeFS.mkdir>[1], - ): Promise<void>; + options: { + recursive: boolean; + }, + ): Promise<string | undefined>; + mkdir(path: string): Promise<void>; }; export type WalkableFileSystem = { @@ -45,7 +47,7 @@ export class InMemoryFileSystem implements CompleteFileSystem { this.files[path] = _.cloneDeep(content); } - async readFile(path: string, options?: unknown): Promise<string> { + async readFile(path: string): Promise<string> { if (!(path in this.files)) { throw new Error(`File ${path} does not exist`); } @@ -58,8 +60,9 @@ export class InMemoryFileSystem implements CompleteFileSystem { } } - async mkdir(path: string, options?: unknown): Promise<void> { + async mkdir(path: string) { this.files[path] = _.get(this.files, path, null); + return undefined; } async readdir(path: string): Promise<string[]> { @@ -69,6 +72,10 @@ export class InMemoryFileSystem implements CompleteFileSystem { async stat(path: string): Promise<any> { throw new Error("Not implemented"); } + + async readRawFile(path: string): Promise<Buffer> { + throw new Error("Not implemented"); + } } // FS utility functions diff --git a/packages/core/src/tests/GenericFileSystem.test.ts b/packages/core/src/tests/GenericFileSystem.test.ts index 7b7b576e8..45be8f568 100644 --- a/packages/core/src/tests/GenericFileSystem.test.ts +++ b/packages/core/src/tests/GenericFileSystem.test.ts @@ -1,6 +1,7 @@ import nodeFS from "node:fs/promises"; import os from "os"; import path from "path"; +import { defaultFS } from "../env"; import { GenericFileSystem, InMemoryFileSystem, @@ -16,7 +17,7 @@ type FileSystemUnderTest = { tempDir: string; }; -describe.each<FileSystemUnderTest>([ +const cases: FileSystemUnderTest[] = [ { name: "InMemoryFileSystem", prepare: async () => {}, @@ -27,17 +28,19 @@ describe.each<FileSystemUnderTest>([ tempDir: "./", }, { - name: "Node.js fs", + name: "Default fs", prepare: async function () { this.tempDir = await nodeFS.mkdtemp(path.join(os.tmpdir(), "jest-")); }, cleanup: async function () { await nodeFS.rm(this.tempDir, { recursive: true }); }, - implementation: nodeFS, + implementation: defaultFS, tempDir: "./", }, -])("Test %s", (testParams) => { +]; + +describe.each<FileSystemUnderTest>(cases)("Test %s", (testParams) => { let testFS: GenericFileSystem; let tempDir: string; @@ -58,7 +61,7 @@ describe.each<FileSystemUnderTest>([ describe("writeFile", () => { it("writes file to memory", async () => { await testFS.writeFile(`${tempDir}/test.txt`, "Hello, world!"); - expect(await testFS.readFile(`${tempDir}/test.txt`, "utf-8")).toBe( + expect(await testFS.readFile(`${tempDir}/test.txt`)).toBe( "Hello, world!", ); }); @@ -66,7 +69,7 @@ describe.each<FileSystemUnderTest>([ it("overwrites existing file", async () => { await testFS.writeFile(`${tempDir}/test.txt`, "Hello, world!"); await testFS.writeFile(`${tempDir}/test.txt`, "Hello, again!"); - expect(await testFS.readFile(`${tempDir}/test.txt`, "utf-8")).toBe( + expect(await testFS.readFile(`${tempDir}/test.txt`)).toBe( "Hello, again!", ); }); @@ -75,7 +78,7 @@ describe.each<FileSystemUnderTest>([ describe("readFile", () => { it("throws error for non-existing file", async () => { await expect( - testFS.readFile(`${tempDir}/not_exist.txt`, "utf-8"), + testFS.readFile(`${tempDir}/not_exist.txt`), ).rejects.toThrow(); }); }); -- GitLab