From e8f8bea969a23e30c356c61616d0d53e0e392ade Mon Sep 17 00:00:00 2001 From: Fabian Wimmer <github@insightby.ai> Date: Fri, 5 Jul 2024 09:32:26 +0200 Subject: [PATCH] feat: add boundingBox and targetPages to LlamaParseReader (#1017) --- .changeset/slimy-bees-yell.md | 5 +++++ apps/docs/docs/modules/data_loaders/llama_parse/index.mdx | 2 ++ .../docs/modules/data_loaders/llama_parse/json_mode.mdx | 2 +- packages/llamaindex/src/readers/LlamaParseReader.ts | 6 ++++++ 4 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 .changeset/slimy-bees-yell.md diff --git a/.changeset/slimy-bees-yell.md b/.changeset/slimy-bees-yell.md new file mode 100644 index 000000000..eda01b6bc --- /dev/null +++ b/.changeset/slimy-bees-yell.md @@ -0,0 +1,5 @@ +--- +"llamaindex": patch +--- + +feat: add boundingBox and targetPages to LlamaParseReader diff --git a/apps/docs/docs/modules/data_loaders/llama_parse/index.mdx b/apps/docs/docs/modules/data_loaders/llama_parse/index.mdx index fefe8793a..830232561 100644 --- a/apps/docs/docs/modules/data_loaders/llama_parse/index.mdx +++ b/apps/docs/docs/modules/data_loaders/llama_parse/index.mdx @@ -44,6 +44,8 @@ They can be divided into two groups. - `pageSeperator?` Optional. The page seperator to use. Defaults is `\\n---\\n`. - `gpt4oMode` set to true to use GPT-4o to extract content. Default is `false`. - `gpt4oApiKey?` Optional. Set the GPT-4o API key. Lowers the cost of parsing by using your own API key. Your OpenAI account will be charged. Can also be set in the environment variable `LLAMA_CLOUD_GPT4O_API_KEY`. +- `boundingBox?` Optional. Specify an area of the document to parse. Expects the bounding box margins as a string in clockwise order, e.g. `boundingBox = "0.1,0,0,0"` to not parse the top 10% of the document. +- `targetPages?` Optional. Specify which pages to parse by specifying them as a comma-seperated list. First page is `0`. - `numWorkers` as in the python version, is set in `SimpleDirectoryReader`. Default is 1. ### LlamaParse with SimpleDirectoryReader diff --git a/apps/docs/docs/modules/data_loaders/llama_parse/json_mode.mdx b/apps/docs/docs/modules/data_loaders/llama_parse/json_mode.mdx index 838354e7b..b11afafd9 100644 --- a/apps/docs/docs/modules/data_loaders/llama_parse/json_mode.mdx +++ b/apps/docs/docs/modules/data_loaders/llama_parse/json_mode.mdx @@ -8,7 +8,7 @@ In JSON mode, LlamaParse will return a data structure representing the parsed ob ## Usage -For Json mode, you need to use `loadJson`. The `resultType` is automatically set with this method. Currently it can't be used with `SimpleDirectoryReader`. +For Json mode, you need to use `loadJson`. The `resultType` is automatically set with this method. More information about indexing the results on the next page. ```ts diff --git a/packages/llamaindex/src/readers/LlamaParseReader.ts b/packages/llamaindex/src/readers/LlamaParseReader.ts index ef63fa02a..dd2333785 100644 --- a/packages/llamaindex/src/readers/LlamaParseReader.ts +++ b/packages/llamaindex/src/readers/LlamaParseReader.ts @@ -133,6 +133,10 @@ export class LlamaParseReader extends FileReader { gpt4oMode: boolean = false; // The API key for the GPT-4o API. Optional, lowers the cost of parsing. Can be set as an env variable: LLAMA_CLOUD_GPT4O_API_KEY. gpt4oApiKey?: string; + // The bounding box to use to extract text from documents. Describe as a string containing the bounding box margins. + boundingBox?: string; + // The target pages to extract text from documents. Describe as a comma separated list of page numbers. The first page of the document is page 0 + targetPages?: string; // Whether or not to ignore and skip errors raised during parsing. ignoreErrors: boolean = true; // numWorkers is implemented in SimpleDirectoryReader @@ -183,6 +187,8 @@ export class LlamaParseReader extends FileReader { page_seperator: this.pageSeperator, gpt4o_mode: this.gpt4oMode?.toString(), gpt4o_api_key: this.gpt4oApiKey, + bounding_box: this.boundingBox, + target_pages: this.targetPages, }; // Appends body with any defined LlamaParseBodyParams -- GitLab