Skip to content
Snippets Groups Projects
Commit a5e3e10e authored by yisding's avatar yisding
Browse files

dynamic import of string-strip-html

parent 99afbdd6
Branches
Tags
No related merge requests found
...@@ -3,33 +3,33 @@ ...@@ -3,33 +3,33 @@
"version": "0.0.31", "version": "0.0.31",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"@anthropic-ai/sdk": "^0.8.0", "@anthropic-ai/sdk": "^0.8.1",
"@notionhq/client": "^2.2.13", "@notionhq/client": "^2.2.13",
"lodash": "^4.17.21", "lodash": "^4.17.21",
"mammoth": "^1.6.0", "mammoth": "^1.6.0",
"md-utils-ts": "^2.0.0", "md-utils-ts": "^2.0.0",
"mongodb": "^6.2.0", "mongodb": "^6.2.0",
"notion-md-crawler": "^0.0.2", "notion-md-crawler": "^0.0.2",
"openai": "^4.13.0", "openai": "^4.14.0",
"papaparse": "^5.4.1", "papaparse": "^5.4.1",
"pdf-parse": "^1.1.1", "pdf-parse": "^1.1.1",
"portkey-ai": "^0.1.13", "portkey-ai": "^0.1.13",
"rake-modified": "^1.0.8", "rake-modified": "^1.0.8",
"replicate": "^0.20.1", "replicate": "^0.20.1",
"string-strip-html": "^8.5.0", "string-strip-html": "^13.4.3",
"tiktoken": "^1.0.10", "tiktoken": "^1.0.10",
"uuid": "^9.0.1", "uuid": "^9.0.1",
"wink-nlp": "^1.14.3" "wink-nlp": "^1.14.3"
}, },
"devDependencies": { "devDependencies": {
"@types/lodash": "^4.14.200", "@types/lodash": "^4.14.200",
"@types/node": "^18.18.6", "@types/node": "^18.18.7",
"@types/papaparse": "^5.3.10", "@types/papaparse": "^5.3.10",
"@types/pdf-parse": "^1.1.3", "@types/pdf-parse": "^1.1.3",
"@types/uuid": "^9.0.6", "@types/uuid": "^9.0.6",
"node-stdlib-browser": "^1.2.0", "node-stdlib-browser": "^1.2.0",
"tsup": "^7.2.0", "tsup": "^7.2.0",
"typescript": "^4.9.5" "typescript": "^5.2.2"
}, },
"engines": { "engines": {
"node": ">=18.0.0" "node": ">=18.0.0"
......
import { stripHtml } from "string-strip-html";
import { Document } from "../Node"; import { Document } from "../Node";
import { DEFAULT_FS } from "../storage/constants"; import { DEFAULT_FS } from "../storage/constants";
import { GenericFileSystem } from "../storage/FileSystem"; import { GenericFileSystem } from "../storage/FileSystem";
...@@ -25,7 +24,7 @@ export class HTMLReader implements BaseReader { ...@@ -25,7 +24,7 @@ export class HTMLReader implements BaseReader {
): Promise<Document[]> { ): Promise<Document[]> {
const dataBuffer = await fs.readFile(file, "utf-8"); const dataBuffer = await fs.readFile(file, "utf-8");
const htmlOptions = this.getOptions(); const htmlOptions = this.getOptions();
const content = this.parseContent(dataBuffer, htmlOptions); const content = await this.parseContent(dataBuffer, htmlOptions);
return [new Document({ text: content, id_: file })]; return [new Document({ text: content, id_: file })];
} }
...@@ -36,7 +35,8 @@ export class HTMLReader implements BaseReader { ...@@ -36,7 +35,8 @@ export class HTMLReader implements BaseReader {
* @see getOptions * @see getOptions
* @returns The HTML content, stripped of unwanted tags and attributes * @returns The HTML content, stripped of unwanted tags and attributes
*/ */
parseContent(html: string, options: any = {}): string { async parseContent(html: string, options: any = {}): Promise<string> {
const { stripHtml } = await import("string-strip-html"); // ESM only
return stripHtml(html).result; return stripHtml(html).result;
} }
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
"esModuleInterop": true, "esModuleInterop": true,
"forceConsistentCasingInFileNames": true, "forceConsistentCasingInFileNames": true,
"isolatedModules": true, "isolatedModules": true,
"module": "esnext",
"moduleResolution": "node", "moduleResolution": "node",
"preserveWatchOutput": true, "preserveWatchOutput": true,
"skipLibCheck": true, "skipLibCheck": true,
......
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment