Skip to content
Snippets Groups Projects
Commit a7edc4d2 authored by Sourabh Desai's avatar Sourabh Desai
Browse files

update method signature

parent 1bcf7604
No related branches found
No related tags found
No related merge requests found
...@@ -5,13 +5,13 @@ import { DEFAULT_FS } from "../storage/constants"; ...@@ -5,13 +5,13 @@ import { DEFAULT_FS } from "../storage/constants";
import { default as pdfParse } from "pdf-parse"; import { default as pdfParse } from "pdf-parse";
import _ from "lodash"; import _ from "lodash";
export class PDFReader implements BaseReader { export default class PDFReader implements BaseReader {
async loadData( async loadData(
file: string, file: string,
fs: GenericFileSystem = DEFAULT_FS fs: GenericFileSystem = DEFAULT_FS
): Promise<Document> { ): Promise<Document[]> {
let dataBuffer = (await fs.readFile(file)) as any; let dataBuffer = (await fs.readFile(file)) as any;
const data = await pdfParse(dataBuffer); const data = await pdfParse(dataBuffer);
return new Document(data.text, file); return [new Document(data.text, file)];
} }
} }
import _ from "lodash";
import { Document } from "../Document"; import { Document } from "../Document";
import { BaseReader } from "./base"; import { BaseReader } from "./base";
import { CompleteFileSystem, walk } from "../storage/FileSystem"; import { CompleteFileSystem, walk } from "../storage/FileSystem";
import { DEFAULT_FS } from "../storage/constants"; import { DEFAULT_FS } from "../storage/constants";
import PDFReader from "./PDFReader";
export default class SimpleDirectoryReader implements BaseReader { export class TextFileReader implements BaseReader {
async loadData( async loadData(
directoryPath: string, file: string,
fs: CompleteFileSystem = DEFAULT_FS as CompleteFileSystem fs: CompleteFileSystem = DEFAULT_FS as CompleteFileSystem
): Promise<Document[]> { ): Promise<Document[]> {
const docs: Document[] = []; const dataBuffer = await fs.readFile(file, "utf-8");
return [new Document(dataBuffer, file)];
}
}
const FILE_EXT_TO_READER: { [key: string]: BaseReader } = {
txt: new TextFileReader(),
pdf: new PDFReader(),
};
export type SimpleDirectoryReaderLoadDataProps = {
directoryPath: string;
fs?: CompleteFileSystem;
defaultReader?: BaseReader | null;
fileExtToReader?: { [key: string]: BaseReader };
};
export default class SimpleDirectoryReader implements BaseReader {
async loadData({
directoryPath,
fs = DEFAULT_FS as CompleteFileSystem,
defaultReader = new TextFileReader(),
fileExtToReader = FILE_EXT_TO_READER,
}: SimpleDirectoryReaderLoadDataProps): Promise<Document[]> {
let docs: Document[] = [];
for await (const filePath of walk(fs, directoryPath)) { for await (const filePath of walk(fs, directoryPath)) {
try { try {
const fileData = await fs.readFile(filePath); const fileExt = _.last(filePath.split(".")) || "";
docs.push(new Document(fileData, directoryPath));
let reader = null;
if (fileExt in fileExtToReader) {
reader = fileExtToReader[fileExt];
} else if (!_.isNil(defaultReader)) {
reader = defaultReader;
} else {
console.warn(`No reader for file extension of ${filePath}`);
continue;
}
const fileDocs = await reader.loadData(filePath, fs);
docs.push(...fileDocs);
} catch (e) { } catch (e) {
console.error(`Error reading file ${filePath}: ${e}`); console.error(`Error reading file ${filePath}: ${e}`);
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment