From a68053ca4e24a0f7c547aed2dc9b60bbe18c6acf Mon Sep 17 00:00:00 2001
From: Alex Yang <himself65@outlook.com>
Date: Wed, 31 Jan 2024 21:10:41 -0600
Subject: [PATCH] fix: remove as any type (#494)

---
 packages/core/src/env/index.ts                | 16 ++++++++-
 packages/core/src/readers/CSVReader.ts        |  2 +-
 packages/core/src/readers/DocxReader.ts       |  2 +-
 packages/core/src/readers/HTMLReader.ts       |  2 +-
 packages/core/src/readers/MarkdownReader.ts   |  2 +-
 packages/core/src/readers/PDFReader.ts        |  7 +---
 .../core/src/readers/SimpleDirectoryReader.ts |  2 +-
 packages/core/src/storage/FileSystem.ts       | 35 +++++++++++--------
 .../core/src/tests/GenericFileSystem.test.ts  | 17 +++++----
 9 files changed, 52 insertions(+), 33 deletions(-)

diff --git a/packages/core/src/env/index.ts b/packages/core/src/env/index.ts
index a88c1b3af..286efe683 100644
--- a/packages/core/src/env/index.ts
+++ b/packages/core/src/env/index.ts
@@ -18,6 +18,20 @@ export function createSHA256(): SHA256 {
   };
 }
 
-export const defaultFS: CompleteFileSystem = fs;
+export const defaultFS: CompleteFileSystem = {
+  writeFile: function (path: string, content: string) {
+    return fs.writeFile(path, content, "utf-8");
+  },
+  readRawFile(path: string): Promise<Buffer> {
+    return fs.readFile(path);
+  },
+  readFile: function (path: string) {
+    return fs.readFile(path, "utf-8");
+  },
+  access: fs.access,
+  mkdir: fs.mkdir,
+  readdir: fs.readdir,
+  stat: fs.stat,
+};
 
 export { EOL, ok, path, randomUUID };
diff --git a/packages/core/src/readers/CSVReader.ts b/packages/core/src/readers/CSVReader.ts
index e92eddaf3..ca4e196b6 100644
--- a/packages/core/src/readers/CSVReader.ts
+++ b/packages/core/src/readers/CSVReader.ts
@@ -43,7 +43,7 @@ export class PapaCSVReader implements BaseReader {
     file: string,
     fs: GenericFileSystem = defaultFS,
   ): Promise<Document[]> {
-    const fileContent: string = await fs.readFile(file, "utf-8");
+    const fileContent = await fs.readFile(file);
     const result = Papa.parse(fileContent, this.papaConfig);
     const textList = result.data.map((row: any) => {
       // Compatible with header row mode
diff --git a/packages/core/src/readers/DocxReader.ts b/packages/core/src/readers/DocxReader.ts
index e394b712d..2fb5bd644 100644
--- a/packages/core/src/readers/DocxReader.ts
+++ b/packages/core/src/readers/DocxReader.ts
@@ -10,7 +10,7 @@ export class DocxReader implements BaseReader {
     file: string,
     fs: GenericFileSystem = defaultFS,
   ): Promise<Document[]> {
-    const dataBuffer = (await fs.readFile(file)) as any;
+    const dataBuffer = await fs.readRawFile(file);
     const { value } = await mammoth.extractRawText({ buffer: dataBuffer });
     return [new Document({ text: value, id_: file })];
   }
diff --git a/packages/core/src/readers/HTMLReader.ts b/packages/core/src/readers/HTMLReader.ts
index f2fa2be34..806c18193 100644
--- a/packages/core/src/readers/HTMLReader.ts
+++ b/packages/core/src/readers/HTMLReader.ts
@@ -22,7 +22,7 @@ export class HTMLReader implements BaseReader {
     file: string,
     fs: GenericFileSystem = defaultFS,
   ): Promise<Document[]> {
-    const dataBuffer = await fs.readFile(file, "utf-8");
+    const dataBuffer = await fs.readFile(file);
     const htmlOptions = this.getOptions();
     const content = await this.parseContent(dataBuffer, htmlOptions);
     return [new Document({ text: content, id_: file })];
diff --git a/packages/core/src/readers/MarkdownReader.ts b/packages/core/src/readers/MarkdownReader.ts
index fef040d42..44fc70eba 100644
--- a/packages/core/src/readers/MarkdownReader.ts
+++ b/packages/core/src/readers/MarkdownReader.ts
@@ -92,7 +92,7 @@ export class MarkdownReader implements BaseReader {
     file: string,
     fs: GenericFileSystem = defaultFS,
   ): Promise<Document[]> {
-    const content = await fs.readFile(file, { encoding: "utf-8" });
+    const content = await fs.readFile(file);
     const tups = this.parseTups(content);
     const results: Document[] = [];
     for (const [header, value] of tups) {
diff --git a/packages/core/src/readers/PDFReader.ts b/packages/core/src/readers/PDFReader.ts
index 63067d006..fd478bd89 100644
--- a/packages/core/src/readers/PDFReader.ts
+++ b/packages/core/src/readers/PDFReader.ts
@@ -11,12 +11,7 @@ export class PDFReader implements BaseReader {
     file: string,
     fs: GenericFileSystem = defaultFS,
   ): Promise<Document[]> {
-    // todo: fix fs type
-    const content = (await fs.readFile(file)) as unknown;
-    if (!(content instanceof Buffer)) {
-      console.warn(`PDF File ${file} can only be loaded using the Node FS`);
-      return [];
-    }
+    const content = await fs.readRawFile(file);
     const text = await readPDF(content);
     return text.map((text) => {
       const sha256 = createSHA256();
diff --git a/packages/core/src/readers/SimpleDirectoryReader.ts b/packages/core/src/readers/SimpleDirectoryReader.ts
index 2dc053f34..6dcb4d688 100644
--- a/packages/core/src/readers/SimpleDirectoryReader.ts
+++ b/packages/core/src/readers/SimpleDirectoryReader.ts
@@ -30,7 +30,7 @@ export class TextFileReader implements BaseReader {
     file: string,
     fs: CompleteFileSystem = defaultFS,
   ): Promise<Document[]> {
-    const dataBuffer = await fs.readFile(file, "utf-8");
+    const dataBuffer = await fs.readFile(file);
     return [new Document({ text: dataBuffer, id_: file })];
   }
 }
diff --git a/packages/core/src/storage/FileSystem.ts b/packages/core/src/storage/FileSystem.ts
index e32335283..cf58573da 100644
--- a/packages/core/src/storage/FileSystem.ts
+++ b/packages/core/src/storage/FileSystem.ts
@@ -1,5 +1,4 @@
 import _ from "lodash";
-import type nodeFS from "node:fs/promises";
 
 /**
  * A filesystem interface that is meant to be compatible with
@@ -8,20 +7,23 @@ import type nodeFS from "node:fs/promises";
  * browsers.
  */
 export type GenericFileSystem = {
-  writeFile(
-    path: string,
-    content: string,
-    options?: Parameters<typeof nodeFS.writeFile>[2],
-  ): Promise<void>;
-  readFile(
-    path: string,
-    options?: Parameters<typeof nodeFS.readFile>[1],
-  ): Promise<string>;
+  writeFile(path: string, content: string): Promise<void>;
+  /**
+   * Reads a file and returns its content as a raw buffer.
+   */
+  readRawFile(path: string): Promise<Buffer>;
+  /**
+   * Reads a file and returns its content as an utf-8 string.
+   */
+  readFile(path: string): Promise<string>;
   access(path: string): Promise<void>;
   mkdir(
     path: string,
-    options?: Parameters<typeof nodeFS.mkdir>[1],
-  ): Promise<void>;
+    options: {
+      recursive: boolean;
+    },
+  ): Promise<string | undefined>;
+  mkdir(path: string): Promise<void>;
 };
 
 export type WalkableFileSystem = {
@@ -45,7 +47,7 @@ export class InMemoryFileSystem implements CompleteFileSystem {
     this.files[path] = _.cloneDeep(content);
   }
 
-  async readFile(path: string, options?: unknown): Promise<string> {
+  async readFile(path: string): Promise<string> {
     if (!(path in this.files)) {
       throw new Error(`File ${path} does not exist`);
     }
@@ -58,8 +60,9 @@ export class InMemoryFileSystem implements CompleteFileSystem {
     }
   }
 
-  async mkdir(path: string, options?: unknown): Promise<void> {
+  async mkdir(path: string) {
     this.files[path] = _.get(this.files, path, null);
+    return undefined;
   }
 
   async readdir(path: string): Promise<string[]> {
@@ -69,6 +72,10 @@ export class InMemoryFileSystem implements CompleteFileSystem {
   async stat(path: string): Promise<any> {
     throw new Error("Not implemented");
   }
+
+  async readRawFile(path: string): Promise<Buffer> {
+    throw new Error("Not implemented");
+  }
 }
 
 // FS utility functions
diff --git a/packages/core/src/tests/GenericFileSystem.test.ts b/packages/core/src/tests/GenericFileSystem.test.ts
index 7b7b576e8..45be8f568 100644
--- a/packages/core/src/tests/GenericFileSystem.test.ts
+++ b/packages/core/src/tests/GenericFileSystem.test.ts
@@ -1,6 +1,7 @@
 import nodeFS from "node:fs/promises";
 import os from "os";
 import path from "path";
+import { defaultFS } from "../env";
 import {
   GenericFileSystem,
   InMemoryFileSystem,
@@ -16,7 +17,7 @@ type FileSystemUnderTest = {
   tempDir: string;
 };
 
-describe.each<FileSystemUnderTest>([
+const cases: FileSystemUnderTest[] = [
   {
     name: "InMemoryFileSystem",
     prepare: async () => {},
@@ -27,17 +28,19 @@ describe.each<FileSystemUnderTest>([
     tempDir: "./",
   },
   {
-    name: "Node.js fs",
+    name: "Default fs",
     prepare: async function () {
       this.tempDir = await nodeFS.mkdtemp(path.join(os.tmpdir(), "jest-"));
     },
     cleanup: async function () {
       await nodeFS.rm(this.tempDir, { recursive: true });
     },
-    implementation: nodeFS,
+    implementation: defaultFS,
     tempDir: "./",
   },
-])("Test %s", (testParams) => {
+];
+
+describe.each<FileSystemUnderTest>(cases)("Test %s", (testParams) => {
   let testFS: GenericFileSystem;
   let tempDir: string;
 
@@ -58,7 +61,7 @@ describe.each<FileSystemUnderTest>([
   describe("writeFile", () => {
     it("writes file to memory", async () => {
       await testFS.writeFile(`${tempDir}/test.txt`, "Hello, world!");
-      expect(await testFS.readFile(`${tempDir}/test.txt`, "utf-8")).toBe(
+      expect(await testFS.readFile(`${tempDir}/test.txt`)).toBe(
         "Hello, world!",
       );
     });
@@ -66,7 +69,7 @@ describe.each<FileSystemUnderTest>([
     it("overwrites existing file", async () => {
       await testFS.writeFile(`${tempDir}/test.txt`, "Hello, world!");
       await testFS.writeFile(`${tempDir}/test.txt`, "Hello, again!");
-      expect(await testFS.readFile(`${tempDir}/test.txt`, "utf-8")).toBe(
+      expect(await testFS.readFile(`${tempDir}/test.txt`)).toBe(
         "Hello, again!",
       );
     });
@@ -75,7 +78,7 @@ describe.each<FileSystemUnderTest>([
   describe("readFile", () => {
     it("throws error for non-existing file", async () => {
       await expect(
-        testFS.readFile(`${tempDir}/not_exist.txt`, "utf-8"),
+        testFS.readFile(`${tempDir}/not_exist.txt`),
       ).rejects.toThrow();
     });
   });
-- 
GitLab