From 6bc5bddb5900bb56dd01f8c54363e3d3401ad139 Mon Sep 17 00:00:00 2001 From: Fabian Wimmer <github@insightby.ai> Date: Tue, 11 Jun 2024 11:31:01 +0200 Subject: [PATCH] feat: add new options to LlamaParseReader (#915) --- .changeset/brown-numbers-tease.md | 5 ++ packages/core/src/readers/LlamaParseReader.ts | 56 +++++++++++++------ 2 files changed, 43 insertions(+), 18 deletions(-) create mode 100644 .changeset/brown-numbers-tease.md diff --git a/.changeset/brown-numbers-tease.md b/.changeset/brown-numbers-tease.md new file mode 100644 index 000000000..1f7338538 --- /dev/null +++ b/.changeset/brown-numbers-tease.md @@ -0,0 +1,5 @@ +--- +"llamaindex": patch +--- + +feat: add cache disabling, fast mode, do not unroll columns mode and custom page seperator to LlamaParseReader diff --git a/packages/core/src/readers/LlamaParseReader.ts b/packages/core/src/readers/LlamaParseReader.ts index dc85b6457..25b4b51d6 100644 --- a/packages/core/src/readers/LlamaParseReader.ts +++ b/packages/core/src/readers/LlamaParseReader.ts @@ -110,22 +110,30 @@ export class LlamaParseReader extends FileReader { apiKey: string; // The base URL of the Llama Parsing API. baseUrl: string = "https://api.cloud.llamaindex.ai/api/parsing"; - // The maximum timeout in seconds to wait for the parsing to finish. - maxTimeout = 2000; + // The result type for the parser. + resultType: ResultType = "text"; // The interval in seconds to check if the parsing is done. checkInterval = 1; + // The maximum timeout in seconds to wait for the parsing to finish. + maxTimeout = 2000; // Whether to print the progress of the parsing. verbose = true; - // The result type for the parser. - resultType: ResultType = "text"; // The language of the text to parse. language: Language = "en"; - // The parsing instruction for the parser. - parsingInstruction: string = ""; - // If set to true, the parser will ignore diagonal text (when the text rotation in degrees modulo 90 is not 0). - skipDiagonalText: boolean = false; - // If set to true, the cache will be ignored and the document re-processes. All document are kept in cache for 48hours after the job was completed to avoid processing the same document twice. - invalidateCache: boolean = false; + // The parsing instruction for the parser. Backend default is an empty string. + parsingInstruction?: string; + // Wether to ignore diagonal text (when the text rotation in degrees is not 0, 90, 180 or 270, so not a horizontal or vertical text). Backend default is false. + skipDiagonalText?: boolean; + // Wheter to ignore the cache and re-process the document. All documents are kept in cache for 48hours after the job was completed to avoid processing the same document twice. Backend default is false. + invalidateCache?: boolean; + // Wether the document should not be cached in the first place. Backend default is false. + doNotCache?: boolean; + // Wether to use a faster mode to extract text from documents. This mode will skip OCR of images, and table/heading reconstruction. Note: Non-compatible with gpt4oMode. Backend default is false. + fastMode?: boolean; + // Wether to keep column in the text according to document layout. Reduce reconstruction accuracy, and LLM's/embedings performances in most cases. + doNotUnrollColumns?: boolean; + // The page separator to use to split the text. Default is None, which means the parser will use the default separator '\\n---\\n'. + pageSeperator?: string; // Whether to use gpt-4o to extract text from documents. gpt4oMode: boolean = false; // The API key for the GPT-4o API. Optional, lowers the cost of parsing. Can be set as an env variable: LLAMA_CLOUD_GPT4O_API_KEY. @@ -162,14 +170,26 @@ export class LlamaParseReader extends FileReader { const body = new FormData(); body.set("file", new Blob([data], { type: mimeType })); - body.append("language", this.language); - body.append("parsing_instruction", this.parsingInstruction); - body.append("skip_diagonal_text", this.skipDiagonalText.toString()); - body.append("invalidate_cache", this.invalidateCache.toString()); - body.append("gpt4o_mode", this.gpt4oMode.toString()); - if (this.gpt4oMode && this.gpt4oApiKey) { - body.append("gpt4o_api_key", this.gpt4oApiKey); - } + + const LlamaParseBodyParams = { + language: this.language, + parsing_instruction: this.parsingInstruction, + skip_diagonal_text: this.skipDiagonalText?.toString(), + invalidate_cache: this.invalidateCache?.toString(), + do_not_cache: this.doNotCache?.toString(), + fast_mode: this.fastMode?.toString(), + do_not_unroll_columns: this.doNotUnrollColumns?.toString(), + page_seperator: this.pageSeperator, + gpt4o_mode: this.gpt4oMode?.toString(), + gpt4o_api_key: this.gpt4oApiKey, + }; + + // Appends body with any defined LlamaParseBodyParams + Object.entries(LlamaParseBodyParams).forEach(([key, value]) => { + if (value !== undefined) { + body.append(key, value); + } + }); const headers = { Authorization: `Bearer ${this.apiKey}`, -- GitLab