From 85c2e198a4a76b81e19754bd683d5d6db1725ea6 Mon Sep 17 00:00:00 2001 From: Alex Yang <himself65@outlook.com> Date: Mon, 16 Sep 2024 09:29:33 -0700 Subject: [PATCH] feat: llama cloud sdk update (#1206) --- .changeset/witty-rockets-fry.md | 9 + packages/cloud/openapi.json | 2889 +++++++++++++++-- packages/cloud/package.json | 23 + .../src/reader.ts} | 378 +-- packages/cloud/src/utils.ts | 3 + packages/cloud/tsconfig.json | 13 +- packages/llamaindex/src/readers/index.ts | 6 +- packages/llamaindex/tsconfig.json | 3 + pnpm-lock.yaml | 10 + 9 files changed, 2779 insertions(+), 555 deletions(-) create mode 100644 .changeset/witty-rockets-fry.md rename packages/{llamaindex/src/readers/LlamaParseReader.ts => cloud/src/reader.ts} (71%) create mode 100644 packages/cloud/src/utils.ts diff --git a/.changeset/witty-rockets-fry.md b/.changeset/witty-rockets-fry.md new file mode 100644 index 000000000..033c8c574 --- /dev/null +++ b/.changeset/witty-rockets-fry.md @@ -0,0 +1,9 @@ +--- +"@llamaindex/cloud": patch +"llamaindex": patch +--- + +feat: `@llamaindex/cloud` package update + +- Bump to latest openapi schema +- Move LlamaParse class from llamaindex, this will allow you use llamaparse in more non-node.js environment diff --git a/packages/cloud/openapi.json b/packages/cloud/openapi.json index c7d02b420..e7baf161b 100644 --- a/packages/cloud/openapi.json +++ b/packages/cloud/openapi.json @@ -1,16 +1,9 @@ { "openapi": "3.1.0", "info": { - "title": "LlamaCloud", - "termsOfService": "https://www.llamaindex.ai/files/terms-of-service.pdf", + "title": "Llama Platform", "version": "0.1.0" }, - "servers": [ - { - "url": "https://api.cloud.llamaindex.ai/", - "description": "LlamaCloud Production Server" - } - ], "paths": { "/api/v1/api-keys": { "get": { @@ -56,6 +49,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -110,6 +106,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -176,6 +175,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -223,6 +225,206 @@ } }, "security": [ + { + "HTTPBearer": [] + }, + { + "HTTPBearer": [] + } + ] + } + }, + "/api/v1/validate-integrations/validate-embedding-connection": { + "post": { + "summary": "Validate Embedding Connection", + "description": "Validate an embedding connection.", + "operationId": "validate_embedding_connection_api_v1_validate_integrations_validate_embedding_connection_post", + "parameters": [ + { + "required": false, + "schema": { + "type": "string", + "title": "Session" + }, + "name": "session", + "in": "cookie" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/AzureOpenAIEmbeddingConfig" + }, + { + "$ref": "#/components/schemas/CohereEmbeddingConfig" + }, + { + "$ref": "#/components/schemas/GeminiEmbeddingConfig" + }, + { + "$ref": "#/components/schemas/HuggingFaceInferenceAPIEmbeddingConfig" + }, + { + "$ref": "#/components/schemas/OpenAIEmbeddingConfig" + }, + { + "$ref": "#/components/schemas/VertexAIEmbeddingConfig" + }, + { + "$ref": "#/components/schemas/BedrockEmbeddingConfig" + } + ], + "title": "Embedding Config" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BaseConnectionValidation" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + }, + "security": [ + { + "HTTPBearer": [] + }, + { + "HTTPBearer": [] + } + ] + } + }, + "/api/v1/validate-integrations/validate-data-source-connection": { + "post": { + "summary": "Validate Data Source Connection", + "description": "Validate a data source connection.", + "operationId": "validate_data_source_connection_api_v1_validate_integrations_validate_data_source_connection_post", + "parameters": [ + { + "required": false, + "schema": { + "type": "string", + "title": "Session" + }, + "name": "session", + "in": "cookie" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DataSourceCreate" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BaseConnectionValidation" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + }, + "security": [ + { + "HTTPBearer": [] + }, + { + "HTTPBearer": [] + } + ] + } + }, + "/api/v1/validate-integrations/validate-data-sink-connection": { + "post": { + "summary": "Validate Data Sink Connection", + "description": "Validate a data sink connection.", + "operationId": "validate_data_sink_connection_api_v1_validate_integrations_validate_data_sink_connection_post", + "parameters": [ + { + "required": false, + "schema": { + "type": "string", + "title": "Session" + }, + "name": "session", + "in": "cookie" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DataSinkCreate" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BaseConnectionValidation" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + }, + "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -283,6 +485,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -357,6 +562,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -431,6 +639,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -487,6 +698,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -551,6 +765,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -598,6 +815,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -668,6 +888,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -742,6 +965,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -816,6 +1042,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -872,6 +1101,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -936,6 +1168,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -983,6 +1218,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -1033,6 +1271,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -1087,6 +1328,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -1141,6 +1385,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -1187,6 +1434,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -1241,6 +1491,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -1297,6 +1550,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -1361,6 +1617,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -1408,6 +1667,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -1468,6 +1730,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -1540,6 +1805,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -1598,6 +1866,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -1663,7 +1934,10 @@ "security": [ { "HTTPBearer": [] - } + }, + { + "HTTPBearer": [] + } ] } }, @@ -1730,6 +2004,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -1794,6 +2071,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -1858,6 +2138,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -1914,6 +2197,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -1978,6 +2264,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -2025,6 +2314,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -2085,6 +2377,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -2149,6 +2444,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -2219,6 +2517,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -2279,6 +2580,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -2339,6 +2643,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -2403,6 +2710,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -2463,6 +2773,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -2527,6 +2840,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -2603,6 +2919,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -2665,6 +2984,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -2731,6 +3053,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -2788,6 +3113,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -2848,6 +3176,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -2912,6 +3243,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -2976,6 +3310,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -3036,6 +3373,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -3102,6 +3442,158 @@ } }, "security": [ + { + "HTTPBearer": [] + }, + { + "HTTPBearer": [] + } + ] + } + }, + "/api/v1/files/{id}/page_screenshots": { + "get": { + "tags": ["Files"], + "summary": "List File Page Screenshots", + "description": "List metadata for all screenshots of pages from a file.", + "operationId": "list_file_page_screenshots_api_v1_files__id__page_screenshots_get", + "parameters": [ + { + "required": true, + "schema": { + "type": "string", + "format": "uuid", + "title": "Id" + }, + "name": "id", + "in": "path" + }, + { + "required": false, + "schema": { + "type": "string", + "format": "uuid", + "title": "Project Id" + }, + "name": "project_id", + "in": "query" + }, + { + "required": false, + "schema": { + "type": "string", + "title": "Session" + }, + "name": "session", + "in": "cookie" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "items": { + "$ref": "#/components/schemas/PageScreenshotMetadata" + }, + "type": "array", + "title": "Response List File Page Screenshots Api V1 Files Id Page Screenshots Get" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + }, + "security": [ + { + "HTTPBearer": [] + }, + { + "HTTPBearer": [] + } + ] + } + }, + "/api/v1/files/{id}/page_screenshots/{page_index}": { + "get": { + "tags": ["Files"], + "summary": "Get File Page Screenshot", + "description": "Get screenshot of a page from a file.", + "operationId": "get_file_page_screenshot_api_v1_files__id__page_screenshots__page_index__get", + "parameters": [ + { + "required": true, + "schema": { + "type": "string", + "format": "uuid", + "title": "Id" + }, + "name": "id", + "in": "path" + }, + { + "required": true, + "schema": { + "type": "integer", + "title": "Page Index" + }, + "name": "page_index", + "in": "path" + }, + { + "required": false, + "schema": { + "type": "string", + "format": "uuid", + "title": "Project Id" + }, + "name": "project_id", + "in": "query" + }, + { + "required": false, + "schema": { + "type": "string", + "title": "Session" + }, + "name": "session", + "in": "cookie" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + }, + "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -3188,6 +3680,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -3252,6 +3747,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -3316,6 +3814,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -3372,6 +3873,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -3436,6 +3940,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -3483,6 +3990,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -3539,6 +4049,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -3595,6 +4108,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -3651,6 +4167,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -3721,6 +4240,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -3795,6 +4317,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -3865,6 +4390,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -3941,6 +4469,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -4021,6 +4552,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -4093,6 +4627,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -4161,7 +4698,10 @@ "security": [ { "HTTPBearer": [] - } + }, + { + "HTTPBearer": [] + } ] } }, @@ -4235,6 +4775,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -4292,6 +4835,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -4362,6 +4908,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -4409,6 +4958,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -4469,6 +5021,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -4541,6 +5096,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -4548,6 +5106,83 @@ } }, "/api/v1/pipelines/{pipeline_id}/data-sources/{data_source_id}": { + "put": { + "tags": ["Pipelines"], + "summary": "Update Pipeline Data Source", + "description": "Update the configuration of a data source in a pipeline.", + "operationId": "update_pipeline_data_source_api_v1_pipelines__pipeline_id__data_sources__data_source_id__put", + "parameters": [ + { + "required": true, + "schema": { + "type": "string", + "format": "uuid", + "title": "Pipeline Id" + }, + "name": "pipeline_id", + "in": "path" + }, + { + "required": true, + "schema": { + "type": "string", + "format": "uuid", + "title": "Data Source Id" + }, + "name": "data_source_id", + "in": "path" + }, + { + "required": false, + "schema": { + "type": "string", + "title": "Session" + }, + "name": "session", + "in": "cookie" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PipelineDataSourceUpdate" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PipelineDataSource" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + }, + "security": [ + { + "HTTPBearer": [] + }, + { + "HTTPBearer": [] + } + ] + }, "delete": { "tags": ["Pipelines"], "summary": "Delete Pipeline Data Source", @@ -4600,6 +5235,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -4666,6 +5304,78 @@ } }, "security": [ + { + "HTTPBearer": [] + }, + { + "HTTPBearer": [] + } + ] + } + }, + "/api/v1/pipelines/{pipeline_id}/data-sources/{data_source_id}/status": { + "get": { + "tags": ["Pipelines"], + "summary": "Get Pipeline Data Source Status", + "description": "Get the status of a data source for a pipeline.", + "operationId": "get_pipeline_data_source_status_api_v1_pipelines__pipeline_id__data_sources__data_source_id__status_get", + "parameters": [ + { + "required": true, + "schema": { + "type": "string", + "format": "uuid", + "title": "Pipeline Id" + }, + "name": "pipeline_id", + "in": "path" + }, + { + "required": true, + "schema": { + "type": "string", + "format": "uuid", + "title": "Data Source Id" + }, + "name": "data_source_id", + "in": "path" + }, + { + "required": false, + "schema": { + "type": "string", + "title": "Session" + }, + "name": "session", + "in": "cookie" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ManagedIngestionStatusResponse" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + }, + "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -4732,6 +5442,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -4792,6 +5505,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -4858,6 +5574,68 @@ } }, "security": [ + { + "HTTPBearer": [] + }, + { + "HTTPBearer": [] + } + ] + } + }, + "/api/v1/pipelines/{pipeline_id}/playground-session": { + "get": { + "tags": ["Pipelines"], + "summary": "Get Playground Session", + "description": "Get a playground session for a user and pipeline.", + "operationId": "get_playground_session_api_v1_pipelines__pipeline_id__playground_session_get", + "parameters": [ + { + "required": true, + "schema": { + "type": "string", + "format": "uuid", + "title": "Pipeline Id" + }, + "name": "pipeline_id", + "in": "path" + }, + { + "required": false, + "schema": { + "type": "string", + "title": "Session" + }, + "name": "session", + "in": "cookie" + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PlaygroundSession" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + }, + "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -4868,6 +5646,7 @@ "post": { "tags": ["Pipelines"], "summary": "Chat", + "description": "Make a retrieval query + chat completion for a managed pipeline.", "operationId": "chat_api_v1_pipelines__pipeline_id__chat_post", "parameters": [ { @@ -4894,7 +5673,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ChatParams" + "$ref": "#/components/schemas/ChatInputParams" } } }, @@ -4921,6 +5700,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -5023,6 +5805,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -5095,6 +5880,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -5167,6 +5955,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -5232,6 +6023,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -5288,6 +6082,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -5353,6 +6150,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -5422,6 +6222,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -5478,6 +6281,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -5544,7 +6350,10 @@ "security": [ { "HTTPBearer": [] - } + }, + { + "HTTPBearer": [] + } ] }, "delete": { @@ -5589,6 +6398,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -5649,6 +6461,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -5713,6 +6528,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -5787,6 +6605,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -5843,6 +6664,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -5907,6 +6731,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -5954,6 +6781,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -5984,7 +6814,7 @@ "application/json": { "schema": { "items": { - "$ref": "#/components/schemas/SupportedEvalLLMModel" + "$ref": "#/components/schemas/SupportedLLMModel" }, "type": "array", "title": "Response List Supported Models Api V1 Evals Models Get" @@ -6004,6 +6834,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -6065,6 +6898,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -6118,8 +6954,7 @@ "$ref": "#/components/schemas/Body_upload_file_api_v1_parsing_upload_post" } } - }, - "required": true + } }, "responses": { "200": { @@ -6144,6 +6979,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -6190,6 +7028,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -6246,6 +7087,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -6302,6 +7146,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -6358,6 +7205,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -6412,6 +7262,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -6468,6 +7321,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -6522,6 +7378,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -6578,6 +7437,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -6632,6 +7494,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -6682,6 +7547,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -6747,6 +7615,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -6876,6 +7747,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -6933,6 +7807,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -7031,6 +7908,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -7084,6 +7964,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -7139,6 +8022,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -7194,6 +8080,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -7257,6 +8146,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -7316,6 +8208,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -7369,6 +8264,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -7424,6 +8322,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -7483,6 +8384,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -7538,6 +8442,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -7599,6 +8506,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -7652,8 +8562,7 @@ "$ref": "#/components/schemas/Body_upload_file_api_parsing_upload_post" } } - }, - "required": true + } }, "responses": { "200": { @@ -7678,6 +8587,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -7724,6 +8636,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -7780,6 +8695,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -7836,6 +8754,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -7892,6 +8813,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -7946,6 +8870,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -8002,6 +8929,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -8056,6 +8986,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -8112,6 +9045,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -8166,6 +9102,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -8216,6 +9155,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -8281,6 +9223,9 @@ } }, "security": [ + { + "HTTPBearer": [] + }, { "HTTPBearer": [] } @@ -8356,10 +9301,65 @@ "title": "APIKeyUpdate", "description": "Schema for updating an API key." }, + "AdvancedModeTransformConfig": { + "properties": { + "mode": { + "type": "string", + "enum": ["advanced"], + "title": "Mode", + "default": "advanced" + }, + "segmentation_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/NoneSegmentationConfig" + }, + { + "$ref": "#/components/schemas/PageSegmentationConfig" + }, + { + "$ref": "#/components/schemas/ElementSegmentationConfig" + } + ], + "title": "Segmentation Config", + "description": "Configuration for the segmentation." + }, + "chunking_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/NoneChunkingConfig" + }, + { + "$ref": "#/components/schemas/CharacterChunkingConfig" + }, + { + "$ref": "#/components/schemas/TokenChunkingConfig" + }, + { + "$ref": "#/components/schemas/SentenceChunkingConfig" + }, + { + "$ref": "#/components/schemas/SemanticChunkingConfig" + } + ], + "title": "Chunking Config", + "description": "Configuration for the chunking." + } + }, + "type": "object", + "title": "AdvancedModeTransformConfig" + }, "AutoTransformConfig": { "properties": { + "mode": { + "type": "string", + "enum": ["auto"], + "title": "Mode", + "default": "auto" + }, "chunk_size": { "type": "integer", + "exclusiveMinimum": 0.0, "title": "Chunk Size", "description": "Chunk size for the transformation.", "default": 1024 @@ -8368,7 +9368,8 @@ "type": "integer", "title": "Chunk Overlap", "description": "Chunk overlap for the transformation.", - "default": 20 + "default": 200, + "gte": 0 } }, "type": "object", @@ -8481,11 +9482,49 @@ "title": "AzureOpenAIEmbedding", "description": "OpenAI class for embeddings.\n\nArgs:\n mode (str): Mode for embedding.\n Defaults to OpenAIEmbeddingMode.TEXT_SEARCH_MODE.\n Options are:\n\n - OpenAIEmbeddingMode.SIMILARITY_MODE\n - OpenAIEmbeddingMode.TEXT_SEARCH_MODE\n\n model (str): Model for embedding.\n Defaults to OpenAIEmbeddingModelType.TEXT_EMBED_ADA_002.\n Options are:\n\n - OpenAIEmbeddingModelType.DAVINCI\n - OpenAIEmbeddingModelType.CURIE\n - OpenAIEmbeddingModelType.BABBAGE\n - OpenAIEmbeddingModelType.ADA\n - OpenAIEmbeddingModelType.TEXT_EMBED_ADA_002" }, + "AzureOpenAIEmbeddingConfig": { + "properties": { + "type": { + "type": "string", + "enum": ["AZURE_EMBEDDING"], + "title": "Type", + "description": "Type of the embedding model.", + "default": "AZURE_EMBEDDING" + }, + "component": { + "allOf": [ + { + "$ref": "#/components/schemas/AzureOpenAIEmbedding" + } + ], + "title": "Component", + "description": "Configuration for the Azure OpenAI embedding model." + } + }, + "type": "object", + "title": "AzureOpenAIEmbeddingConfig" + }, "Base": { "properties": {}, "type": "object", "title": "Base" }, + "BaseConnectionValidation": { + "properties": { + "success": { + "type": "boolean", + "title": "Success" + }, + "message": { + "type": "string", + "title": "Message" + } + }, + "type": "object", + "required": ["success", "message"], + "title": "BaseConnectionValidation", + "description": "Base response model for connection validation." + }, "BasePromptTemplate": { "properties": { "metadata": { @@ -8609,6 +9648,28 @@ "title": "BedrockEmbedding", "description": "Base class for embeddings." }, + "BedrockEmbeddingConfig": { + "properties": { + "type": { + "type": "string", + "enum": ["BEDROCK_EMBEDDING"], + "title": "Type", + "description": "Type of the embedding model.", + "default": "BEDROCK_EMBEDDING" + }, + "component": { + "allOf": [ + { + "$ref": "#/components/schemas/BedrockEmbedding" + } + ], + "title": "Component", + "description": "Configuration for the Bedrock embedding model." + } + }, + "type": "object", + "title": "BedrockEmbeddingConfig" + }, "Body_import_pipeline_metadata_api_v1_pipelines__pipeline_id__metadata_put": { "properties": { "upload_file": { @@ -8708,6 +9769,36 @@ "title": "Page Suffix", "default": "" }, + "webhook_url": { + "type": "string", + "title": "Webhook Url", + "default": "" + }, + "take_screenshot": { + "type": "boolean", + "title": "Take Screenshot", + "default": false + }, + "disable_ocr": { + "type": "boolean", + "title": "Disable Ocr", + "default": false + }, + "disable_reconstruction": { + "type": "boolean", + "title": "Disable Reconstruction", + "default": false + }, + "input_s3_path": { + "type": "string", + "title": "Input S3 Path", + "default": "" + }, + "output_s3_path_prefix": { + "type": "string", + "title": "Output S3 Path Prefix", + "default": "" + }, "file": { "type": "string", "format": "binary", @@ -8715,7 +9806,6 @@ } }, "type": "object", - "required": ["file"], "title": "Body_upload_file_api_parsing_upload_post" }, "Body_upload_file_api_v1_files_post": { @@ -8817,60 +9907,159 @@ "title": "Page Suffix", "default": "" }, - "file": { + "webhook_url": { "type": "string", - "format": "binary", - "title": "File" - } - }, - "type": "object", - "required": ["file"], - "title": "Body_upload_file_api_v1_parsing_upload_post" - }, - "ChatData": { - "properties": { - "retrieval_parameters": { - "$ref": "#/components/schemas/PresetRetrievalParams" + "title": "Webhook Url", + "default": "" + }, + "take_screenshot": { + "type": "boolean", + "title": "Take Screenshot", + "default": false + }, + "disable_ocr": { + "type": "boolean", + "title": "Disable Ocr", + "default": false + }, + "disable_reconstruction": { + "type": "boolean", + "title": "Disable Reconstruction", + "default": false + }, + "input_s3_path": { + "type": "string", + "title": "Input S3 Path", + "default": "" + }, + "output_s3_path_prefix": { + "type": "string", + "title": "Output S3 Path Prefix", + "default": "" + }, + "file": { + "type": "string", + "format": "binary", + "title": "File" + } + }, + "type": "object", + "title": "Body_upload_file_api_v1_parsing_upload_post" + }, + "BoxAuthMechanism": { + "type": "string", + "enum": ["developer_token", "ccg"], + "title": "BoxAuthMechanism", + "description": "An enumeration." + }, + "CharacterChunkingConfig": { + "properties": { + "chunk_size": { + "type": "integer", + "exclusiveMinimum": 0.0, + "title": "Chunk Size", + "default": 1024 + }, + "chunk_overlap": { + "type": "integer", + "title": "Chunk Overlap", + "default": 200, + "gte": 0 + }, + "mode": { + "type": "string", + "enum": ["character"], + "title": "Mode", + "default": "character" + } + }, + "type": "object", + "title": "CharacterChunkingConfig" + }, + "CharacterSplitter": { + "properties": { + "include_metadata": { + "type": "boolean", + "title": "Include Metadata", + "description": "Whether or not to consider metadata when splitting.", + "default": true + }, + "include_prev_next_rel": { + "type": "boolean", + "title": "Include Prev Next Rel", + "description": "Include prev/next node relationships.", + "default": true + }, + "callback_manager": { + "type": "object", + "title": "Callback Manager", + "default": {} + }, + "chunk_size": { + "type": "integer", + "exclusiveMinimum": 0.0, + "title": "Chunk Size", + "description": "The token chunk size for each chunk.", + "default": 1024 + }, + "chunk_overlap": { + "type": "integer", + "title": "Chunk Overlap", + "description": "The token overlap of each chunk when splitting.", + "default": 200, + "gte": 0 + }, + "separator": { + "type": "string", + "title": "Separator", + "description": "Default separator for splitting into words", + "default": " " + }, + "paragraph_separator": { + "type": "string", + "title": "Paragraph Separator", + "description": "Separator between paragraphs.", + "default": "\n\n\n" + }, + "secondary_chunking_regex": { + "type": "string", + "title": "Secondary Chunking Regex", + "description": "Backup regex for splitting into sentences.", + "default": "[^,.;。?ï¼]+[,.;。?ï¼]?" }, "class_name": { "type": "string", "title": "Class Name", - "default": "base_component" + "default": "SentenceSplitter" } }, "type": "object", - "required": ["retrieval_parameters"], - "title": "ChatData", - "description": "Base schema model for BaseComponent classes used in the platform.\nComes with special serialization logic for types used commonly in platform codebase." + "title": "CharacterSplitter", + "description": "A splitter that splits text into characters." }, - "ChatMessage": { + "ChatData": { "properties": { - "role": { - "allOf": [ - { - "$ref": "#/components/schemas/MessageRole" - } - ], - "default": "user" + "retrieval_parameters": { + "$ref": "#/components/schemas/PresetRetrievalParams" }, - "content": { - "title": "Content", - "default": "" + "llm_parameters": { + "$ref": "#/components/schemas/LLMParameters" }, - "additional_kwargs": { - "type": "object", - "title": "Additional Kwargs" + "class_name": { + "type": "string", + "title": "Class Name", + "default": "base_component" } }, "type": "object", - "title": "ChatMessage", - "description": "Chat message." + "title": "ChatData", + "description": "Base schema model for BaseComponent classes used in the platform.\nComes with special serialization logic for types used commonly in platform codebase." }, - "ChatParams": { + "ChatInputParams": { "properties": { "messages": { "items": { - "$ref": "#/components/schemas/ChatMessage" + "$ref": "#/components/schemas/InputMessage" }, "type": "array", "title": "Messages" @@ -8885,8 +10074,54 @@ } }, "type": "object", - "required": ["messages", "data"], - "title": "ChatParams", + "title": "ChatInputParams", + "description": "Base schema model for BaseComponent classes used in the platform.\nComes with special serialization logic for types used commonly in platform codebase." + }, + "ChatMessage": { + "properties": { + "id": { + "type": "string", + "format": "uuid", + "title": "Id" + }, + "index": { + "type": "integer", + "title": "Index", + "description": "The index of the message in the chat." + }, + "annotations": { + "items": { + "$ref": "#/components/schemas/MessageAnnotation" + }, + "type": "array", + "title": "Annotations", + "description": "Retrieval annotations for the message." + }, + "role": { + "$ref": "#/components/schemas/MessageRole" + }, + "content": { + "type": "string", + "title": "Content", + "description": "Text content of the generation" + }, + "additional_kwargs": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "title": "Additional Kwargs", + "description": "Additional arguments passed to the model" + }, + "class_name": { + "type": "string", + "title": "Class Name", + "default": "base_component" + } + }, + "type": "object", + "required": ["id", "index", "role"], + "title": "ChatMessage", "description": "Base schema model for BaseComponent classes used in the platform.\nComes with special serialization logic for types used commonly in platform codebase." }, "CheckoutSessionCreatePayload": { @@ -8930,8 +10165,7 @@ "prefix": { "type": "string", "title": "Prefix", - "description": "The prefix of the Azure Storage Blob objects to read from.", - "default": "" + "description": "The prefix of the Azure Storage Blob objects to read from." }, "account_name": { "type": "string", @@ -8943,6 +10177,21 @@ "title": "Account Key", "description": "The Azure Storage Blob account key to use for authentication." }, + "tenant_id": { + "type": "string", + "title": "Tenant Id", + "description": "The Azure AD tenant ID to use for authentication." + }, + "client_id": { + "type": "string", + "title": "Client Id", + "description": "The Azure AD client ID to use for authentication." + }, + "client_secret": { + "type": "string", + "title": "Client Secret", + "description": "The Azure AD client secret to use for authentication." + }, "class_name": { "type": "string", "title": "Class Name", @@ -8950,12 +10199,7 @@ } }, "type": "object", - "required": [ - "container_name", - "account_url", - "account_name", - "account_key" - ], + "required": ["container_name", "account_url"], "title": "CloudAzStorageBlobDataSource", "description": "Base component object to capture class names." }, @@ -8991,6 +10235,18 @@ "type": "integer", "title": "Embedding Dimension" }, + "client_id": { + "type": "string", + "title": "Client Id" + }, + "client_secret": { + "type": "string", + "title": "Client Secret" + }, + "tenant_id": { + "type": "string", + "title": "Tenant Id" + }, "class_name": { "type": "string", "title": "Class Name", @@ -9002,6 +10258,57 @@ "title": "CloudAzureAISearchVectorStore", "description": "Cloud Azure AI Search Vector Store." }, + "CloudBoxDataSource": { + "properties": { + "folder_id": { + "type": "string", + "title": "Folder Id", + "description": "The ID of the Box folder to read from." + }, + "authentication_mechanism": { + "allOf": [ + { + "$ref": "#/components/schemas/BoxAuthMechanism" + } + ], + "description": "The type of authentication to use (Developer Token or CCG)" + }, + "developer_token": { + "type": "string", + "title": "Developer Token", + "description": "Developer token for authentication if authentication_mechanism is 'developer_token'." + }, + "client_id": { + "type": "string", + "title": "Client Id", + "description": "Box API key used for identifying the application the user is authenticating with" + }, + "client_secret": { + "type": "string", + "title": "Client Secret", + "description": "Box API secret used for making auth requests." + }, + "user_id": { + "type": "string", + "title": "User Id", + "description": "Box User ID, if provided authenticates as user." + }, + "enterprise_id": { + "type": "string", + "title": "Enterprise Id", + "description": "Box Enterprise ID, if provided authenticates as service." + }, + "class_name": { + "type": "string", + "title": "Class Name", + "default": "CloudBoxDataSource" + } + }, + "type": "object", + "required": ["authentication_mechanism"], + "title": "CloudBoxDataSource", + "description": "Base component object to capture class names." + }, "CloudChromaVectorStore": { "properties": { "supports_nested_metadata_filters": { @@ -9131,6 +10438,14 @@ "title": "Excluded Llm Metadata Keys", "default": [] }, + "page_positions": { + "items": { + "type": "integer" + }, + "type": "array", + "title": "Page Positions", + "description": "indices in the CloudDocument.text where a new page begins. e.g. Second page starts at index specified by page_positions[1]." + }, "id": { "type": "string", "title": "Id" @@ -9167,6 +10482,14 @@ "title": "Excluded Llm Metadata Keys", "default": [] }, + "page_positions": { + "items": { + "type": "integer" + }, + "type": "array", + "title": "Page Positions", + "description": "indices in the CloudDocument.text where a new page begins. e.g. Second page starts at index specified by page_positions[1]." + }, "id": { "type": "string", "title": "Id" @@ -9177,6 +10500,29 @@ "title": "CloudDocumentCreate", "description": "Create a new cloud document." }, + "CloudGoogleDriveDataSource": { + "properties": { + "folder_id": { + "type": "string", + "title": "Folder Id", + "description": "The ID of the Google Drive folder to read from." + }, + "service_account_key": { + "type": "object", + "title": "Service Account Key", + "description": "The service account key JSON to use for authentication." + }, + "class_name": { + "type": "string", + "title": "Class Name", + "default": "CloudGoogleDriveDataSource" + } + }, + "type": "object", + "required": ["folder_id", "service_account_key"], + "title": "CloudGoogleDriveDataSource", + "description": "Base component object to capture class names." + }, "CloudJiraDataSource": { "properties": { "email": { @@ -9220,6 +10566,80 @@ "title": "CloudJiraDataSource", "description": "Cloud Jira Data Source integrating JiraReader." }, + "CloudMilvusVectorStore": { + "properties": { + "supports_nested_metadata_filters": { + "type": "boolean", + "const": false, + "title": "Supports Nested Metadata Filters", + "default": false + }, + "uri": { + "type": "string", + "title": "Uri" + }, + "collection_name": { + "type": "string", + "title": "Collection Name" + }, + "token": { + "type": "string", + "title": "Token" + }, + "embedding_dimension": { + "type": "integer", + "title": "Embedding Dimension" + }, + "class_name": { + "type": "string", + "title": "Class Name", + "default": "CloudMilvusVectorStore" + } + }, + "type": "object", + "required": ["uri"], + "title": "CloudMilvusVectorStore", + "description": "Cloud Milvus Vector Store." + }, + "CloudMongoDBAtlasVectorSearch": { + "properties": { + "supports_nested_metadata_filters": { + "type": "boolean", + "const": false, + "title": "Supports Nested Metadata Filters", + "default": false + }, + "mongodb_uri": { + "type": "string", + "title": "Mongodb Uri" + }, + "db_name": { + "type": "string", + "title": "Db Name" + }, + "collection_name": { + "type": "string", + "title": "Collection Name" + }, + "vector_index_name": { + "type": "string", + "title": "Vector Index Name" + }, + "fulltext_index_name": { + "type": "string", + "title": "Fulltext Index Name" + }, + "class_name": { + "type": "string", + "title": "Class Name", + "default": "CloudMongoDBAtlasVectorSearch" + } + }, + "type": "object", + "required": ["mongodb_uri", "db_name", "collection_name"], + "title": "CloudMongoDBAtlasVectorSearch", + "description": "Cloud MongoDB Atlas Vector Store.\n\nThis class is used to store the configuration for a MongoDB Atlas vector store,\nso that it can be created and used in LlamaCloud.\n\nArgs:\n mongodb_uri (str): URI for connecting to MongoDB Atlas\n db_name (str): name of the MongoDB database\n collection_name (str): name of the MongoDB collection\n vector_index_name (str): name of the MongoDB Atlas vector index\n fulltext_index_name (str): name of the MongoDB Atlas full-text index" + }, "CloudNotionPageDataSource": { "properties": { "integration_token": { @@ -9339,13 +10759,25 @@ "title": "Supports Nested Metadata Filters", "default": false }, - "connection_string": { + "database": { "type": "string", - "title": "Connection String" + "title": "Database" }, - "async_connection_string": { + "host": { + "type": "string", + "title": "Host" + }, + "password": { "type": "string", - "title": "Async Connection String" + "title": "Password" + }, + "port": { + "type": "string", + "title": "Port" + }, + "user": { + "type": "string", + "title": "User" }, "table_name": { "type": "string", @@ -9361,27 +10793,8 @@ }, "hybrid_search": { "type": "boolean", - "title": "Hybrid Search" - }, - "text_search_config": { - "type": "string", - "title": "Text Search Config" - }, - "cache_ok": { - "type": "boolean", - "title": "Cache Ok" - }, - "perform_setup": { - "type": "boolean", - "title": "Perform Setup" - }, - "debug": { - "type": "boolean", - "title": "Debug" - }, - "use_jsonb": { - "type": "boolean", - "title": "Use Jsonb" + "title": "Hybrid Search", + "default": true }, "class_name": { "type": "string", @@ -9391,17 +10804,14 @@ }, "type": "object", "required": [ - "connection_string", - "async_connection_string", + "database", + "host", + "password", + "port", + "user", "table_name", "schema_name", - "embed_dim", - "hybrid_search", - "text_search_config", - "cache_ok", - "perform_setup", - "debug", - "use_jsonb" + "embed_dim" ], "title": "CloudPostgresVectorStore", "description": "Base class for cloud vector stores." @@ -9491,6 +10901,11 @@ "title": "Site Name", "description": "The name of the SharePoint site to download from." }, + "site_id": { + "type": "string", + "title": "Site Id", + "description": "The ID of the SharePoint site to download from." + }, "folder_path": { "type": "string", "title": "Folder Path", @@ -9528,7 +10943,7 @@ } }, "type": "object", - "required": ["site_name", "client_id", "client_secret", "tenant_id"], + "required": ["client_id", "client_secret", "tenant_id"], "title": "CloudSharepointDataSource", "description": "Base component object to capture class names." }, @@ -9732,6 +11147,28 @@ "title": "CohereEmbedding", "description": "CohereEmbedding uses the Cohere API to generate embeddings for text." }, + "CohereEmbeddingConfig": { + "properties": { + "type": { + "type": "string", + "enum": ["COHERE_EMBEDDING"], + "title": "Type", + "description": "Type of the embedding model.", + "default": "COHERE_EMBEDDING" + }, + "component": { + "allOf": [ + { + "$ref": "#/components/schemas/CohereEmbedding" + } + ], + "title": "Component", + "description": "Configuration for the Cohere embedding model." + } + }, + "type": "object", + "title": "CohereEmbeddingConfig" + }, "ConfigurableDataSinkNames": { "type": "string", "enum": [ @@ -9740,7 +11177,9 @@ "POSTGRES", "QDRANT", "WEAVIATE", - "AZUREAI_SEARCH" + "AZUREAI_SEARCH", + "MONGODB_ATLAS", + "MILVUS" ], "title": "ConfigurableDataSinkNames", "description": "An enumeration." @@ -9750,12 +11189,14 @@ "enum": [ "S3", "AZURE_STORAGE_BLOB", + "GOOGLE_DRIVE", "MICROSOFT_ONEDRIVE", "MICROSOFT_SHAREPOINT", "SLACK", "NOTION_PAGE", "CONFLUENCE", - "JIRA" + "JIRA", + "BOX" ], "title": "ConfigurableDataSourceNames", "description": "An enumeration." @@ -9802,20 +11243,20 @@ "ConfigurableTransformationNames": { "type": "string", "enum": [ + "CHARACTER_SPLITTER", + "PAGE_SPLITTER_NODE_PARSER", "CODE_NODE_PARSER", "SENTENCE_AWARE_NODE_PARSER", "TOKEN_AWARE_NODE_PARSER", - "HTML_NODE_PARSER", "MARKDOWN_NODE_PARSER", - "JSON_NODE_PARSER", - "SIMPLE_FILE_NODE_PARSER", "MARKDOWN_ELEMENT_NODE_PARSER", "OPENAI_EMBEDDING", "AZURE_EMBEDDING", "COHERE_EMBEDDING", "BEDROCK_EMBEDDING", "HUGGINGFACE_API_EMBEDDING", - "GEMINI_EMBEDDING" + "GEMINI_EMBEDDING", + "VERTEXAI_EMBEDDING" ], "title": "ConfigurableTransformationNames", "description": "An enumeration." @@ -9843,25 +11284,22 @@ { "anyOf": [ { - "$ref": "#/components/schemas/CodeSplitter" + "$ref": "#/components/schemas/CharacterSplitter" }, { - "$ref": "#/components/schemas/SentenceSplitter" + "$ref": "#/components/schemas/PageSplitterNodeParser" }, { - "$ref": "#/components/schemas/TokenTextSplitter" - }, - { - "$ref": "#/components/schemas/HTMLNodeParser" + "$ref": "#/components/schemas/CodeSplitter" }, { - "$ref": "#/components/schemas/MarkdownNodeParser" + "$ref": "#/components/schemas/SentenceSplitter" }, { - "$ref": "#/components/schemas/JSONNodeParser" + "$ref": "#/components/schemas/TokenTextSplitter" }, { - "$ref": "#/components/schemas/SimpleFileNodeParser" + "$ref": "#/components/schemas/MarkdownNodeParser" }, { "$ref": "#/components/schemas/MarkdownElementNodeParser" @@ -9883,6 +11321,9 @@ }, { "$ref": "#/components/schemas/GeminiEmbedding" + }, + { + "$ref": "#/components/schemas/ExtendVertexTextEmbedding" } ] } @@ -9962,6 +11403,12 @@ }, { "$ref": "#/components/schemas/CloudAzureAISearchVectorStore" + }, + { + "$ref": "#/components/schemas/CloudMongoDBAtlasVectorSearch" + }, + { + "$ref": "#/components/schemas/CloudMilvusVectorStore" } ] } @@ -10013,6 +11460,12 @@ }, { "$ref": "#/components/schemas/CloudAzureAISearchVectorStore" + }, + { + "$ref": "#/components/schemas/CloudMongoDBAtlasVectorSearch" + }, + { + "$ref": "#/components/schemas/CloudMilvusVectorStore" } ] } @@ -10081,10 +11534,16 @@ "$ref": "#/components/schemas/CloudQdrantVectorStore" }, { - "$ref": "#/components/schemas/CloudWeaviateVectorStore" + "$ref": "#/components/schemas/CloudWeaviateVectorStore" + }, + { + "$ref": "#/components/schemas/CloudAzureAISearchVectorStore" + }, + { + "$ref": "#/components/schemas/CloudMongoDBAtlasVectorSearch" }, { - "$ref": "#/components/schemas/CloudAzureAISearchVectorStore" + "$ref": "#/components/schemas/CloudMilvusVectorStore" } ] } @@ -10166,6 +11625,9 @@ { "$ref": "#/components/schemas/CloudAzStorageBlobDataSource" }, + { + "$ref": "#/components/schemas/CloudGoogleDriveDataSource" + }, { "$ref": "#/components/schemas/CloudOneDriveDataSource" }, @@ -10183,6 +11645,9 @@ }, { "$ref": "#/components/schemas/CloudJiraDataSource" + }, + { + "$ref": "#/components/schemas/CloudBoxDataSource" } ] } @@ -10251,6 +11716,9 @@ { "$ref": "#/components/schemas/CloudAzStorageBlobDataSource" }, + { + "$ref": "#/components/schemas/CloudGoogleDriveDataSource" + }, { "$ref": "#/components/schemas/CloudOneDriveDataSource" }, @@ -10268,6 +11736,9 @@ }, { "$ref": "#/components/schemas/CloudJiraDataSource" + }, + { + "$ref": "#/components/schemas/CloudBoxDataSource" } ] } @@ -10357,6 +11828,9 @@ { "$ref": "#/components/schemas/CloudAzStorageBlobDataSource" }, + { + "$ref": "#/components/schemas/CloudGoogleDriveDataSource" + }, { "$ref": "#/components/schemas/CloudOneDriveDataSource" }, @@ -10374,6 +11848,9 @@ }, { "$ref": "#/components/schemas/CloudJiraDataSource" + }, + { + "$ref": "#/components/schemas/CloudBoxDataSource" } ] } @@ -10400,57 +11877,17 @@ "title": "DefaultOrganizationUpdate", "description": "Schema for updating the default organization for a user." }, - "EmbeddingConfig": { + "ElementSegmentationConfig": { "properties": { - "type": { - "allOf": [ - { - "$ref": "#/components/schemas/EmbeddingConfigType" - } - ], - "description": "Type of the embedding model.", - "default": "OPENAI_EMBEDDING" - }, - "component": { - "anyOf": [ - { - "$ref": "#/components/schemas/OpenAIEmbedding" - }, - { - "$ref": "#/components/schemas/AzureOpenAIEmbedding" - }, - { - "$ref": "#/components/schemas/BedrockEmbedding" - }, - { - "$ref": "#/components/schemas/CohereEmbedding" - }, - { - "$ref": "#/components/schemas/GeminiEmbedding" - }, - { - "$ref": "#/components/schemas/HuggingFaceInferenceAPIEmbedding" - } - ], - "title": "Component", - "description": "Configuration for the transformation." + "mode": { + "type": "string", + "enum": ["element"], + "title": "Mode", + "default": "element" } }, "type": "object", - "title": "EmbeddingConfig" - }, - "EmbeddingConfigType": { - "type": "string", - "enum": [ - "OPENAI_EMBEDDING", - "AZURE_EMBEDDING", - "BEDROCK_EMBEDDING", - "COHERE_EMBEDDING", - "GEMINI_EMBEDDING", - "HUGGINGFACE_API_EMBEDDING" - ], - "title": "EmbeddingConfigType", - "description": "An enumeration." + "title": "ElementSegmentationConfig" }, "EvalDataset": { "properties": { @@ -10597,6 +12034,10 @@ "status": { "$ref": "#/components/schemas/StatusEnum" }, + "error_code": { + "type": "string", + "title": "Error Code" + }, "error_message": { "type": "string", "title": "Error Message" @@ -10681,11 +12122,11 @@ "llm_model": { "allOf": [ { - "$ref": "#/components/schemas/SupportedEvalLLMModelNames" + "$ref": "#/components/schemas/SupportedLLMModelNames" } ], "description": "The LLM model to use within eval execution.", - "default": "GPT_3_5_TURBO" + "default": "GPT_4O" }, "qa_prompt_tmpl": { "type": "string", @@ -10703,7 +12144,7 @@ "llm_model": { "allOf": [ { - "$ref": "#/components/schemas/SupportedEvalLLMModelNames" + "$ref": "#/components/schemas/SupportedLLMModelNames" } ], "description": "The LLM model to use within eval execution." @@ -10718,24 +12159,6 @@ "title": "EvalExecutionParamsOverride", "description": "Schema for the params override for an eval execution." }, - "EvalLLMModelData": { - "properties": { - "name": { - "type": "string", - "title": "Name", - "description": "The name of the LLM model." - }, - "description": { - "type": "string", - "title": "Description", - "description": "The description of the LLM model." - } - }, - "type": "object", - "required": ["name", "description"], - "title": "EvalLLMModelData", - "description": "Schema for an eval LLM model." - }, "EvalQuestion": { "properties": { "id": { @@ -10870,6 +12293,86 @@ "title": "EvalQuestionResult", "description": "Schema for the result of an eval question job." }, + "ExtendVertexTextEmbedding": { + "properties": { + "model_name": { + "type": "string", + "title": "Model Name", + "description": "The name of the embedding model.", + "default": "unknown" + }, + "embed_batch_size": { + "type": "integer", + "exclusiveMinimum": 0.0, + "title": "Embed Batch Size", + "description": "The batch size for embedding calls.", + "default": 10, + "lte": 2048 + }, + "callback_manager": { + "type": "object", + "title": "Callback Manager", + "default": {} + }, + "num_workers": { + "type": "integer", + "title": "Num Workers", + "description": "The number of workers to use for async embedding calls." + }, + "embed_mode": { + "allOf": [ + { + "$ref": "#/components/schemas/VertexEmbeddingMode" + } + ], + "description": "The embedding mode to use." + }, + "additional_kwargs": { + "type": "object", + "title": "Additional Kwargs", + "description": "Additional kwargs for the Vertex." + }, + "client_email": { + "type": "string", + "title": "Client Email", + "description": "The client email to use when making Vertex API calls." + }, + "token_uri": { + "type": "string", + "title": "Token Uri", + "description": "The token uri to use when making Vertex API calls." + }, + "private_key_id": { + "type": "string", + "title": "Private Key Id", + "description": "The private key id to use when making Vertex API calls." + }, + "private_key": { + "type": "string", + "title": "Private Key", + "description": "The private key to use when making Vertex API calls." + }, + "project": { + "type": "string", + "title": "Project", + "description": "The default GCP project to use when making Vertex API calls." + }, + "location": { + "type": "string", + "title": "Location", + "description": "The default location to use when making API calls." + }, + "class_name": { + "type": "string", + "title": "Class Name", + "default": "ExtendVertexTextEmbedding" + } + }, + "type": "object", + "required": ["embed_mode", "project", "location"], + "title": "ExtendVertexTextEmbedding", + "description": "Base class for embeddings." + }, "ExtractionJob": { "properties": { "id": { @@ -11375,7 +12878,8 @@ "any", "all", "text_match", - "contains" + "contains", + "is_empty" ], "title": "FilterOperator", "description": "Vector store filter operator." @@ -11433,56 +12937,27 @@ "title": "GeminiEmbedding", "description": "Google Gemini embeddings.\n\nArgs:\n model_name (str): Model for embedding.\n Defaults to \"models/embedding-001\".\n\n api_key (Optional[str]): API key to access the model. Defaults to None.\n api_base (Optional[str]): API base to access the model. Defaults to Official Base.\n transport (Optional[str]): Transport to access the model." }, - "HTMLNodeParser": { + "GeminiEmbeddingConfig": { "properties": { - "include_metadata": { - "type": "boolean", - "title": "Include Metadata", - "description": "Whether or not to consider metadata when splitting.", - "default": true - }, - "include_prev_next_rel": { - "type": "boolean", - "title": "Include Prev Next Rel", - "description": "Include prev/next node relationships.", - "default": true - }, - "callback_manager": { - "type": "object", - "title": "Callback Manager", - "default": {} - }, - "tags": { - "items": { - "type": "string" - }, - "type": "array", - "title": "Tags", - "description": "HTML tags to extract text from.", - "default": [ - "p", - "h1", - "h2", - "h3", - "h4", - "h5", - "h6", - "li", - "b", - "i", - "u", - "section" - ] - }, - "class_name": { + "type": { "type": "string", - "title": "Class Name", - "default": "HTMLNodeParser" + "enum": ["GEMINI_EMBEDDING"], + "title": "Type", + "description": "Type of the embedding model.", + "default": "GEMINI_EMBEDDING" + }, + "component": { + "allOf": [ + { + "$ref": "#/components/schemas/GeminiEmbedding" + } + ], + "title": "Component", + "description": "Configuration for the Gemini embedding model." } }, "type": "object", - "title": "HTMLNodeParser", - "description": "HTML node parser.\n\nSplits a document into Nodes using custom HTML splitting logic.\n\nArgs:\n include_metadata (bool): whether to include metadata in nodes\n include_prev_next_rel (bool): whether to include prev/next relationships" + "title": "GeminiEmbeddingConfig" }, "HTTPValidationError": { "properties": { @@ -11589,34 +13064,97 @@ "title": "HuggingFaceInferenceAPIEmbedding", "description": "Wrapper on the Hugging Face's Inference API for embeddings.\n\nOverview of the design:\n- Uses the feature extraction task: https://huggingface.co/tasks/feature-extraction" }, - "JSONNodeParser": { + "HuggingFaceInferenceAPIEmbeddingConfig": { "properties": { - "include_metadata": { - "type": "boolean", - "title": "Include Metadata", - "description": "Whether or not to consider metadata when splitting.", - "default": true + "type": { + "type": "string", + "enum": ["HUGGINGFACE_API_EMBEDDING"], + "title": "Type", + "description": "Type of the embedding model.", + "default": "HUGGINGFACE_API_EMBEDDING" }, - "include_prev_next_rel": { - "type": "boolean", - "title": "Include Prev Next Rel", - "description": "Include prev/next node relationships.", - "default": true + "component": { + "allOf": [ + { + "$ref": "#/components/schemas/HuggingFaceInferenceAPIEmbedding" + } + ], + "title": "Component", + "description": "Configuration for the HuggingFace Inference API embedding model." + } + }, + "type": "object", + "title": "HuggingFaceInferenceAPIEmbeddingConfig" + }, + "IngestionErrorResponse": { + "properties": { + "job_id": { + "type": "string", + "format": "uuid", + "title": "Job Id", + "description": "ID of the job that failed." }, - "callback_manager": { + "message": { + "type": "string", + "title": "Message", + "description": "List of errors that occurred during ingestion." + }, + "step": { + "allOf": [ + { + "$ref": "#/components/schemas/JobNameMapping" + } + ], + "description": "Name of the job that failed." + } + }, + "type": "object", + "required": ["job_id", "message", "step"], + "title": "IngestionErrorResponse" + }, + "InputMessage": { + "properties": { + "id": { + "type": "string", + "title": "Id", + "description": "ID of the message, if any. Not necessarily a UUID." + }, + "role": { + "$ref": "#/components/schemas/MessageRole" + }, + "content": { + "type": "string", + "title": "Content" + }, + "data": { "type": "object", - "title": "Callback Manager", - "default": {} + "title": "Data", + "description": "Additional data to be stored with the message." }, "class_name": { "type": "string", "title": "Class Name", - "default": "JSONNodeParser" + "default": "base_component" } }, "type": "object", - "title": "JSONNodeParser", - "description": "JSON node parser.\n\nSplits a document into Nodes using custom JSON splitting logic.\n\nArgs:\n include_metadata (bool): whether to include metadata in nodes\n include_prev_next_rel (bool): whether to include prev/next relationships" + "required": ["role", "content"], + "title": "InputMessage", + "description": "This is distinct from a ChatMessage because this schema is enforced by the AI Chat library used in the frontend" + }, + "JobNameMapping": { + "type": "string", + "enum": [ + "MANAGED_INGESTION", + "DATA_SOURCE", + "FILES_UPDATE", + "FILE_UPDATER", + "PARSE", + "TRANSFORM", + "INGESTION" + ], + "title": "JobNameMapping", + "description": "Enum for mapping original job names to readable names." }, "LLM": { "properties": { @@ -11660,8 +13198,63 @@ } }, "type": "object", - "title": "LLM", - "description": "The LLM class is the main class for interacting with language models.\n\nAttributes:\n system_prompt (Optional[str]):\n System prompt for LLM calls.\n messages_to_prompt (Callable):\n Function to convert a list of messages to an LLM prompt.\n completion_to_prompt (Callable):\n Function to convert a completion to an LLM prompt.\n output_parser (Optional[BaseOutputParser]):\n Output parser to parse, validate, and correct errors programmatically.\n pydantic_program_mode (PydanticProgramMode):\n Pydantic program mode to use for structured prediction." + "title": "LLM", + "description": "The LLM class is the main class for interacting with language models.\n\nAttributes:\n system_prompt (Optional[str]):\n System prompt for LLM calls.\n messages_to_prompt (Callable):\n Function to convert a list of messages to an LLM prompt.\n completion_to_prompt (Callable):\n Function to convert a completion to an LLM prompt.\n output_parser (Optional[BaseOutputParser]):\n Output parser to parse, validate, and correct errors programmatically.\n pydantic_program_mode (PydanticProgramMode):\n Pydantic program mode to use for structured prediction." + }, + "LLMModelData": { + "properties": { + "name": { + "type": "string", + "title": "Name", + "description": "The name of the LLM model." + }, + "description": { + "type": "string", + "title": "Description", + "description": "The description of the LLM model." + }, + "multi_modal": { + "type": "boolean", + "title": "Multi Modal", + "description": "Whether the model supports multi-modal image input" + } + }, + "type": "object", + "required": ["name", "description", "multi_modal"], + "title": "LLMModelData", + "description": "Schema for an eval LLM model." + }, + "LLMParameters": { + "properties": { + "model_name": { + "allOf": [ + { + "$ref": "#/components/schemas/SupportedLLMModelNames" + } + ], + "description": "The name of the model to use for LLM completions.", + "default": "GPT_3_5_TURBO" + }, + "system_prompt": { + "type": "string", + "title": "System Prompt", + "description": "The system prompt to use for the completion." + }, + "temperature": { + "type": "number", + "title": "Temperature", + "description": "The temperature value for the model.", + "default": 0.1 + }, + "class_name": { + "type": "string", + "title": "Class Name", + "default": "base_component" + } + }, + "type": "object", + "title": "LLMParameters", + "description": "Base schema model for BaseComponent classes used in the platform.\nComes with special serialization logic for types used commonly in platform codebase." }, "LlamaParseParameters": { "properties": { @@ -11682,6 +13275,11 @@ "title": "Disable Ocr", "default": false }, + "disable_reconstruction": { + "type": "boolean", + "title": "Disable Reconstruction", + "default": false + }, "invalidate_cache": { "type": "boolean", "title": "Invalidate Cache", @@ -11732,7 +13330,7 @@ "default": "" }, "use_vendor_multimodal_model": { - "type": "string", + "type": "boolean", "title": "Use Vendor Multimodal Model", "default": false }, @@ -11755,11 +13353,31 @@ "type": "string", "title": "Page Suffix", "default": "" + }, + "webhook_url": { + "type": "string", + "title": "Webhook Url", + "default": "" + }, + "take_screenshot": { + "type": "boolean", + "title": "Take Screenshot", + "default": false + }, + "s3_input_path": { + "type": "string", + "title": "S3 Input Path", + "default": "" + }, + "s3_output_path_prefix": { + "type": "string", + "title": "S3 Output Path Prefix", + "default": "" } }, "type": "object", "title": "LlamaParseParameters", - "description": "Settings that can be configured for how to use LlamaParse to parse files witin a LlamaCloud pipeline." + "description": "Settings that can be configured for how to use LlamaParse to parse files within a LlamaCloud pipeline." }, "LlamaParseSupportedFileExtensions": { "type": "string", @@ -11798,6 +13416,14 @@ ".sxi", ".sti", ".epub", + ".jpg", + ".jpeg", + ".png", + ".gif", + ".bmp", + ".svg", + ".tiff", + ".webp", ".html", ".htm", ".xls", @@ -11992,6 +13618,18 @@ }, "ManagedIngestionStatusResponse": { "properties": { + "job_id": { + "type": "string", + "format": "uuid", + "title": "Job Id", + "description": "ID of the latest job." + }, + "deployment_date": { + "type": "string", + "format": "date-time", + "title": "Deployment Date", + "description": "Date of the deployment." + }, "status": { "allOf": [ { @@ -11999,6 +13637,14 @@ } ], "description": "Status of the ingestion." + }, + "error": { + "items": { + "$ref": "#/components/schemas/IngestionErrorResponse" + }, + "type": "array", + "title": "Error", + "description": "List of errors that occurred during ingestion." } }, "type": "object", @@ -12099,6 +13745,28 @@ "title": "MarkdownNodeParser", "description": "Markdown node parser.\n\nSplits a document into Nodes using custom Markdown splitting logic.\n\nArgs:\n include_metadata (bool): whether to include metadata in nodes\n include_prev_next_rel (bool): whether to include prev/next relationships" }, + "MessageAnnotation": { + "properties": { + "type": { + "type": "string", + "title": "Type" + }, + "data": { + "type": "string", + "format": "json-string", + "title": "Data" + }, + "class_name": { + "type": "string", + "title": "Class Name", + "default": "base_component" + } + }, + "type": "object", + "required": ["type"], + "title": "MessageAnnotation", + "description": "Base schema model for BaseComponent classes used in the platform.\nComes with special serialization logic for types used commonly in platform codebase." + }, "MessageRole": { "type": "string", "enum": [ @@ -12161,7 +13829,7 @@ } }, "type": "object", - "required": ["key", "value"], + "required": ["key"], "title": "MetadataFilter", "description": "Comprehensive metadata filter for vector stores to support more operators.\n\nValue uses Strict* types, as int, float and str are compatible types and were all\nconverted to string before.\n\nSee: https://docs.pydantic.dev/latest/usage/types/#strict-types" }, @@ -12245,6 +13913,30 @@ "title": "NodeParser", "description": "Base interface for node parser." }, + "NoneChunkingConfig": { + "properties": { + "mode": { + "type": "string", + "enum": ["none"], + "title": "Mode", + "default": "none" + } + }, + "type": "object", + "title": "NoneChunkingConfig" + }, + "NoneSegmentationConfig": { + "properties": { + "mode": { + "type": "string", + "enum": ["none"], + "title": "Mode", + "default": "none" + } + }, + "type": "object", + "title": "NoneSegmentationConfig" + }, "ObjectType": { "type": "string", "enum": ["1", "2", "3", "4"], @@ -12343,6 +14035,28 @@ "title": "OpenAIEmbedding", "description": "OpenAI class for embeddings.\n\nArgs:\n mode (str): Mode for embedding.\n Defaults to OpenAIEmbeddingMode.TEXT_SEARCH_MODE.\n Options are:\n\n - OpenAIEmbeddingMode.SIMILARITY_MODE\n - OpenAIEmbeddingMode.TEXT_SEARCH_MODE\n\n model (str): Model for embedding.\n Defaults to OpenAIEmbeddingModelType.TEXT_EMBED_ADA_002.\n Options are:\n\n - OpenAIEmbeddingModelType.DAVINCI\n - OpenAIEmbeddingModelType.CURIE\n - OpenAIEmbeddingModelType.BABBAGE\n - OpenAIEmbeddingModelType.ADA\n - OpenAIEmbeddingModelType.TEXT_EMBED_ADA_002" }, + "OpenAIEmbeddingConfig": { + "properties": { + "type": { + "type": "string", + "enum": ["OPENAI_EMBEDDING"], + "title": "Type", + "description": "Type of the embedding model.", + "default": "OPENAI_EMBEDDING" + }, + "component": { + "allOf": [ + { + "$ref": "#/components/schemas/OpenAIEmbedding" + } + ], + "title": "Component", + "description": "Configuration for the OpenAI embedding model." + } + }, + "type": "object", + "title": "OpenAIEmbeddingConfig" + }, "Organization": { "properties": { "id": { @@ -12405,6 +14119,104 @@ "title": "OrganizationUpdate", "description": "Schema for updating an organization." }, + "PageScreenshotMetadata": { + "properties": { + "page_index": { + "type": "integer", + "minimum": 0.0, + "title": "Page Index", + "description": "The index of the page for which the screenshot is taken (0-indexed)" + }, + "file_id": { + "type": "string", + "format": "uuid", + "title": "File Id", + "description": "The ID of the file that the page screenshot was taken from" + }, + "image_size": { + "type": "integer", + "minimum": 0.0, + "title": "Image Size", + "description": "The size of the image in bytes" + } + }, + "type": "object", + "required": ["page_index", "file_id", "image_size"], + "title": "PageScreenshotMetadata" + }, + "PageScreenshotNodeWithScore": { + "properties": { + "node": { + "$ref": "#/components/schemas/PageScreenshotMetadata" + }, + "score": { + "type": "number", + "title": "Score", + "description": "The score of the screenshot node" + }, + "class_name": { + "type": "string", + "title": "Class Name", + "default": "NodeWithScore" + } + }, + "type": "object", + "required": ["node", "score"], + "title": "PageScreenshotNodeWithScore", + "description": "Page screenshot metadata with score" + }, + "PageSegmentationConfig": { + "properties": { + "mode": { + "type": "string", + "enum": ["page"], + "title": "Mode", + "default": "page" + }, + "page_separator": { + "type": "string", + "title": "Page Separator", + "default": "\n---\n" + } + }, + "type": "object", + "title": "PageSegmentationConfig" + }, + "PageSplitterNodeParser": { + "properties": { + "include_metadata": { + "type": "boolean", + "title": "Include Metadata", + "description": "Whether or not to consider metadata when splitting.", + "default": true + }, + "include_prev_next_rel": { + "type": "boolean", + "title": "Include Prev Next Rel", + "description": "Include prev/next node relationships.", + "default": true + }, + "callback_manager": { + "type": "object", + "title": "Callback Manager", + "default": {} + }, + "page_separator": { + "type": "string", + "title": "Page Separator", + "description": "Separator to split text into pages.", + "default": "\n---\n" + }, + "class_name": { + "type": "string", + "title": "Class Name", + "default": "base_component" + } + }, + "type": "object", + "title": "PageSplitterNodeParser", + "description": "Split text into pages." + }, "ParserLanguages": { "type": "string", "enum": [ @@ -12557,6 +14369,14 @@ }, "status": { "$ref": "#/components/schemas/StatusEnum" + }, + "error_code": { + "type": "string", + "title": "Error Code" + }, + "error_message": { + "type": "string", + "title": "Error Message" } }, "type": "object", @@ -12621,18 +14441,11 @@ } }, "type": "object", - "required": ["usage_pdf_pages", "max_pdf_pages"], + "required": ["usage_pdf_pages"], "title": "ParsingUsage" }, "Pipeline": { "properties": { - "configured_transformations": { - "items": { - "$ref": "#/components/schemas/ConfiguredTransformationItem" - }, - "type": "array", - "title": "Configured Transformations" - }, "id": { "type": "string", "format": "uuid", @@ -12675,6 +14488,74 @@ "title": "Managed Pipeline Id", "description": "The ID of the ManagedPipeline this playground pipeline is linked to." }, + "embedding_config": { + "oneOf": [ + { + "$ref": "#/components/schemas/AzureOpenAIEmbeddingConfig" + }, + { + "$ref": "#/components/schemas/CohereEmbeddingConfig" + }, + { + "$ref": "#/components/schemas/GeminiEmbeddingConfig" + }, + { + "$ref": "#/components/schemas/HuggingFaceInferenceAPIEmbeddingConfig" + }, + { + "$ref": "#/components/schemas/OpenAIEmbeddingConfig" + }, + { + "$ref": "#/components/schemas/VertexAIEmbeddingConfig" + }, + { + "$ref": "#/components/schemas/BedrockEmbeddingConfig" + } + ], + "title": "Embedding Config", + "discriminator": { + "propertyName": "type", + "mapping": { + "AZURE_EMBEDDING": "#/components/schemas/AzureOpenAIEmbeddingConfig", + "COHERE_EMBEDDING": "#/components/schemas/CohereEmbeddingConfig", + "GEMINI_EMBEDDING": "#/components/schemas/GeminiEmbeddingConfig", + "HUGGINGFACE_API_EMBEDDING": "#/components/schemas/HuggingFaceInferenceAPIEmbeddingConfig", + "OPENAI_EMBEDDING": "#/components/schemas/OpenAIEmbeddingConfig", + "VERTEXAI_EMBEDDING": "#/components/schemas/VertexAIEmbeddingConfig", + "BEDROCK_EMBEDDING": "#/components/schemas/BedrockEmbeddingConfig" + } + } + }, + "configured_transformations": { + "items": { + "$ref": "#/components/schemas/ConfiguredTransformationItem" + }, + "type": "array", + "title": "Configured Transformations", + "description": "Deprecated don't use it, List of configured transformations.", + "default": [] + }, + "config_hash": { + "allOf": [ + { + "$ref": "#/components/schemas/PipelineConfigurationHashes" + } + ], + "title": "Config Hash", + "description": "Hashes for the configuration of the pipeline." + }, + "transform_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/AutoTransformConfig" + }, + { + "$ref": "#/components/schemas/AdvancedModeTransformConfig" + } + ], + "title": "Transform Config", + "description": "Configuration for the transformation." + }, "preset_retrieval_parameters": { "allOf": [ { @@ -12713,25 +14594,70 @@ } }, "type": "object", - "required": ["configured_transformations", "id", "name", "project_id"], + "required": ["id", "name", "project_id", "embedding_config"], "title": "Pipeline", "description": "Schema for a pipeline." }, + "PipelineConfigurationHashes": { + "properties": { + "embedding_config_hash": { + "type": "string", + "title": "Embedding Config Hash", + "description": "Hash of the embedding config.", + "default": "" + }, + "parsing_config_hash": { + "type": "string", + "title": "Parsing Config Hash", + "description": "Hash of the llama parse parameters.", + "default": "" + }, + "transform_config_hash": { + "type": "string", + "title": "Transform Config Hash", + "description": "Hash of the transform config.", + "default": "" + } + }, + "type": "object", + "title": "PipelineConfigurationHashes", + "description": "Hashes for the configuration of a pipeline." + }, "PipelineCreate": { "properties": { "embedding_config": { - "allOf": [ + "anyOf": [ { - "$ref": "#/components/schemas/EmbeddingConfig" + "$ref": "#/components/schemas/AzureOpenAIEmbeddingConfig" + }, + { + "$ref": "#/components/schemas/CohereEmbeddingConfig" + }, + { + "$ref": "#/components/schemas/GeminiEmbeddingConfig" + }, + { + "$ref": "#/components/schemas/HuggingFaceInferenceAPIEmbeddingConfig" + }, + { + "$ref": "#/components/schemas/OpenAIEmbeddingConfig" + }, + { + "$ref": "#/components/schemas/VertexAIEmbeddingConfig" + }, + { + "$ref": "#/components/schemas/BedrockEmbeddingConfig" } ], - "title": "Embedding Config", - "description": "Configuration for the embedding model." + "title": "Embedding Config" }, "transform_config": { - "allOf": [ + "anyOf": [ + { + "$ref": "#/components/schemas/AutoTransformConfig" + }, { - "$ref": "#/components/schemas/TransformConfig" + "$ref": "#/components/schemas/AdvancedModeTransformConfig" } ], "title": "Transform Config", @@ -12743,7 +14669,8 @@ }, "type": "array", "title": "Configured Transformations", - "description": "List of configured transformations." + "description": "Deprecated, use embedding_config or transform_config instead. configured transformations for the pipeline.", + "default": [] }, "data_sink_id": { "type": "string", @@ -12883,6 +14810,9 @@ { "$ref": "#/components/schemas/CloudAzStorageBlobDataSource" }, + { + "$ref": "#/components/schemas/CloudGoogleDriveDataSource" + }, { "$ref": "#/components/schemas/CloudOneDriveDataSource" }, @@ -12900,6 +14830,9 @@ }, { "$ref": "#/components/schemas/CloudJiraDataSource" + }, + { + "$ref": "#/components/schemas/CloudBoxDataSource" } ] } @@ -12922,6 +14855,23 @@ "format": "uuid", "title": "Pipeline Id", "description": "The ID of the pipeline." + }, + "last_synced_at": { + "type": "string", + "format": "date-time", + "title": "Last Synced At", + "description": "The last time the data source was automatically synced." + }, + "sync_interval": { + "type": "number", + "format": "time-delta", + "title": "Sync Interval", + "description": "The interval at which the data source should be synced." + }, + "sync_schedule_set_by": { + "type": "string", + "title": "Sync Schedule Set By", + "description": "The id of the user who set the sync schedule." } }, "type": "object", @@ -12932,7 +14882,8 @@ "component", "project_id", "data_source_id", - "pipeline_id" + "pipeline_id", + "last_synced_at" ], "title": "PipelineDataSource", "description": "Schema for a data source in a pipeline." @@ -12944,6 +14895,12 @@ "format": "uuid", "title": "Data Source Id", "description": "The ID of the data source." + }, + "sync_interval": { + "type": "number", + "format": "time-delta", + "title": "Sync Interval", + "description": "The interval at which the data source should be synced." } }, "type": "object", @@ -12951,6 +14908,19 @@ "title": "PipelineDataSourceCreate", "description": "Schema for creating an association between a data source and a pipeline." }, + "PipelineDataSourceUpdate": { + "properties": { + "sync_interval": { + "type": "number", + "format": "time-delta", + "title": "Sync Interval", + "description": "The interval at which the data source should be synced." + } + }, + "type": "object", + "title": "PipelineDataSourceUpdate", + "description": "Schema for updating an association between a data source and a pipeline." + }, "PipelineDeployment": { "properties": { "id": { @@ -13121,6 +15091,34 @@ "type": "object", "title": "Custom Metadata", "description": "Custom metadata for the file" + }, + "config_hash": { + "additionalProperties": { + "anyOf": [ + { + "type": "object" + }, + { + "items": {}, + "type": "array" + }, + { + "type": "string" + }, + { + "type": "integer" + }, + { + "type": "number" + }, + { + "type": "boolean" + } + ] + }, + "type": "object", + "title": "Config Hash", + "description": "Hashes for the configuration of the pipeline." } }, "type": "object", @@ -13214,18 +15212,38 @@ "PipelineUpdate": { "properties": { "embedding_config": { - "allOf": [ + "anyOf": [ + { + "$ref": "#/components/schemas/AzureOpenAIEmbeddingConfig" + }, + { + "$ref": "#/components/schemas/CohereEmbeddingConfig" + }, + { + "$ref": "#/components/schemas/GeminiEmbeddingConfig" + }, { - "$ref": "#/components/schemas/EmbeddingConfig" + "$ref": "#/components/schemas/HuggingFaceInferenceAPIEmbeddingConfig" + }, + { + "$ref": "#/components/schemas/OpenAIEmbeddingConfig" + }, + { + "$ref": "#/components/schemas/VertexAIEmbeddingConfig" + }, + { + "$ref": "#/components/schemas/BedrockEmbeddingConfig" } ], - "title": "Embedding Config", - "description": "Configuration for the embedding model." + "title": "Embedding Config" }, "transform_config": { - "allOf": [ + "anyOf": [ + { + "$ref": "#/components/schemas/AutoTransformConfig" + }, { - "$ref": "#/components/schemas/TransformConfig" + "$ref": "#/components/schemas/AdvancedModeTransformConfig" } ], "title": "Transform Config", @@ -13237,7 +15255,8 @@ }, "type": "array", "title": "Configured Transformations", - "description": "List of configured transformations." + "description": "Deprecated, use embedding_config or transform_config instead. configured transformations for the pipeline.", + "default": [] }, "data_sink_id": { "type": "string", @@ -13296,6 +15315,83 @@ "title": "PipelineUpdate", "description": "Schema for updating a pipeline." }, + "PlaygroundSession": { + "properties": { + "id": { + "type": "string", + "format": "uuid", + "title": "Id", + "description": "Unique identifier" + }, + "created_at": { + "type": "string", + "format": "date-time", + "title": "Created At", + "description": "Creation datetime" + }, + "updated_at": { + "type": "string", + "format": "date-time", + "title": "Updated At", + "description": "Update datetime" + }, + "pipeline_id": { + "type": "string", + "format": "uuid", + "title": "Pipeline Id" + }, + "user_id": { + "type": "string", + "title": "User Id" + }, + "llm_params_id": { + "type": "string", + "format": "uuid", + "title": "Llm Params Id" + }, + "llm_params": { + "allOf": [ + { + "$ref": "#/components/schemas/LLMParameters" + } + ], + "title": "Llm Params", + "description": "LLM parameters last used in this session." + }, + "retrieval_params_id": { + "type": "string", + "format": "uuid", + "title": "Retrieval Params Id" + }, + "retrieval_params": { + "allOf": [ + { + "$ref": "#/components/schemas/PresetRetrievalParams" + } + ], + "title": "Retrieval Params", + "description": "Preset retrieval parameters last used in this session." + }, + "chat_messages": { + "items": { + "$ref": "#/components/schemas/ChatMessage" + }, + "type": "array", + "title": "Chat Messages", + "description": "Chat message history for this session." + } + }, + "type": "object", + "required": [ + "id", + "pipeline_id", + "user_id", + "llm_params_id", + "retrieval_params_id" + ], + "title": "PlaygroundSession", + "description": "A playground session for a user." + }, "Pooling": { "type": "string", "enum": ["cls", "mean", "last"], @@ -13323,8 +15419,7 @@ "enable_reranking": { "type": "boolean", "title": "Enable Reranking", - "description": "Enable reranking for retrieval", - "default": true + "description": "Enable reranking for retrieval" }, "rerank_top_n": { "type": "integer", @@ -13339,8 +15434,7 @@ "maximum": 1.0, "minimum": 0.0, "title": "Alpha", - "description": "Alpha value for hybrid retrieval to determine the weights between dense and sparse retrieval. 0 is sparse retrieval and 1 is dense retrieval.", - "default": 0.5 + "description": "Alpha value for hybrid retrieval to determine the weights between dense and sparse retrieval. 0 is sparse retrieval and 1 is dense retrieval." }, "search_filters": { "allOf": [ @@ -13367,6 +15461,12 @@ ], "description": "The retrieval mode for the query.", "default": "chunks" + }, + "retrieve_image_nodes": { + "type": "boolean", + "title": "Retrieve Image Nodes", + "description": "Whether to retrieve image nodes.", + "default": false } }, "type": "object", @@ -13616,8 +15716,7 @@ "enable_reranking": { "type": "boolean", "title": "Enable Reranking", - "description": "Enable reranking for retrieval", - "default": true + "description": "Enable reranking for retrieval" }, "rerank_top_n": { "type": "integer", @@ -13632,8 +15731,7 @@ "maximum": 1.0, "minimum": 0.0, "title": "Alpha", - "description": "Alpha value for hybrid retrieval to determine the weights between dense and sparse retrieval. 0 is sparse retrieval and 1 is dense retrieval.", - "default": 0.5 + "description": "Alpha value for hybrid retrieval to determine the weights between dense and sparse retrieval. 0 is sparse retrieval and 1 is dense retrieval." }, "search_filters": { "allOf": [ @@ -13661,6 +15759,12 @@ "description": "The retrieval mode for the query.", "default": "chunks" }, + "retrieve_image_nodes": { + "type": "boolean", + "title": "Retrieve Image Nodes", + "description": "Whether to retrieve image nodes.", + "default": false + }, "query": { "type": "string", "title": "Query", @@ -13688,6 +15792,14 @@ "title": "Retrieval Nodes", "description": "The nodes retrieved by the pipeline for the given query." }, + "image_nodes": { + "items": { + "$ref": "#/components/schemas/PageScreenshotNodeWithScore" + }, + "type": "array", + "title": "Image Nodes", + "description": "The image nodes retrieved by the pipeline for the given query." + }, "retrieval_latency": { "additionalProperties": { "type": "number" @@ -13707,6 +15819,62 @@ "title": "RetrieveResults", "description": "Schema for the result of an retrieval execution." }, + "SemanticChunkingConfig": { + "properties": { + "mode": { + "type": "string", + "enum": ["semantic"], + "title": "Mode", + "default": "semantic" + }, + "buffer_size": { + "type": "integer", + "title": "Buffer Size", + "default": 1 + }, + "breakpoint_percentile_threshold": { + "type": "integer", + "title": "Breakpoint Percentile Threshold", + "default": 95 + } + }, + "type": "object", + "title": "SemanticChunkingConfig" + }, + "SentenceChunkingConfig": { + "properties": { + "chunk_size": { + "type": "integer", + "exclusiveMinimum": 0.0, + "title": "Chunk Size", + "default": 1024 + }, + "chunk_overlap": { + "type": "integer", + "title": "Chunk Overlap", + "default": 200, + "gte": 0 + }, + "mode": { + "type": "string", + "enum": ["sentence"], + "title": "Mode", + "default": "sentence" + }, + "separator": { + "type": "string", + "title": "Separator", + "default": " " + }, + "paragraph_separator": { + "type": "string", + "title": "Paragraph Separator", + "default": "\n\n\n" + } + }, + "type": "object", + "title": "SentenceChunkingConfig" + }, "SentenceSplitter": { "properties": { "include_metadata": { @@ -13768,70 +15936,54 @@ "title": "SentenceSplitter", "description": "Parse text with a preference for complete sentences.\n\nIn general, this class tries to keep sentences and paragraphs together. Therefore\ncompared to the original TokenTextSplitter, there are less likely to be\nhanging sentences or parts of sentences at the end of the node chunk." }, - "SimpleFileNodeParser": { - "properties": { - "include_metadata": { - "type": "boolean", - "title": "Include Metadata", - "description": "Whether or not to consider metadata when splitting.", - "default": true - }, - "include_prev_next_rel": { - "type": "boolean", - "title": "Include Prev Next Rel", - "description": "Include prev/next node relationships.", - "default": true - }, - "callback_manager": { - "type": "object", - "title": "Callback Manager", - "default": {} - }, - "class_name": { - "type": "string", - "title": "Class Name", - "default": "SimpleFileNodeParser" - } - }, - "type": "object", - "title": "SimpleFileNodeParser", - "description": "Simple file node parser.\n\nSplits a document loaded from a file into Nodes using logic based on the file type\nautomatically detects the NodeParser to use based on file type\n\nArgs:\n include_metadata (bool): whether to include metadata in nodes\n include_prev_next_rel (bool): whether to include prev/next relationships" - }, "StatusEnum": { "type": "string", "enum": ["PENDING", "SUCCESS", "ERROR", "PARTIAL_SUCCESS"], "title": "StatusEnum", "description": "Enum for representing the status of a job" }, - "SupportedEvalLLMModel": { + "SupportedLLMModel": { "properties": { "name": { "allOf": [ { - "$ref": "#/components/schemas/SupportedEvalLLMModelNames" + "$ref": "#/components/schemas/SupportedLLMModelNames" } ], - "description": "The name of the supported eval LLM model." + "description": "The name of the supported LLM model." + }, + "enabled": { + "type": "boolean", + "title": "Enabled", + "description": "Whether the LLM model is enabled for use in LlamaCloud.", + "default": true }, "details": { "allOf": [ { - "$ref": "#/components/schemas/EvalLLMModelData" + "$ref": "#/components/schemas/LLMModelData" } ], "title": "Details", - "description": "The details of the supported eval LLM model." + "description": "The details of the supported LLM model." } }, "type": "object", "required": ["name", "details"], - "title": "SupportedEvalLLMModel", + "title": "SupportedLLMModel", "description": "Response Schema for a supported eval LLM model." }, - "SupportedEvalLLMModelNames": { + "SupportedLLMModelNames": { "type": "string", - "enum": ["GPT_3_5_TURBO", "GPT_4", "GPT_4_TURBO"], - "title": "SupportedEvalLLMModelNames", + "enum": [ + "GPT_3_5_TURBO", + "GPT_4", + "GPT_4_TURBO", + "GPT_4O", + "GPT_4O_MINI", + "AZURE_OPENAI" + ], + "title": "SupportedLLMModelNames", "description": "An enumeration." }, "TextNode": { @@ -13958,6 +16110,35 @@ "title": "TextNodeWithScore", "description": "Same as NodeWithScore but type for node is a TextNode instead of BaseNode.\nFastAPI doesn't accept abstract classes like BaseNode." }, + "TokenChunkingConfig": { + "properties": { + "chunk_size": { + "type": "integer", + "exclusiveMinimum": 0.0, + "title": "Chunk Size", + "default": 1024 + }, + "chunk_overlap": { + "type": "integer", + "title": "Chunk Overlap", + "default": 200, + "gte": 0 + }, + "mode": { + "type": "string", + "enum": ["token"], + "title": "Mode", + "default": "token" + }, + "separator": { + "type": "string", + "title": "Separator", + "default": " " + } + }, + "type": "object", + "title": "TokenChunkingConfig" + }, "TokenTextSplitter": { "properties": { "include_metadata": { @@ -14013,36 +16194,6 @@ "title": "TokenTextSplitter", "description": "Implementation of splitting text that looks at word tokens." }, - "TransformConfig": { - "properties": { - "mode": { - "allOf": [ - { - "$ref": "#/components/schemas/TransformConfigMode" - } - ], - "description": "Mode for the transformation configuration.", - "default": "AUTO" - }, - "config": { - "allOf": [ - { - "$ref": "#/components/schemas/AutoTransformConfig" - } - ], - "title": "Config", - "description": "Configuration for the transformation." - } - }, - "type": "object", - "title": "TransformConfig" - }, - "TransformConfigMode": { - "type": "string", - "enum": ["AUTO", "ADVANCED"], - "title": "TransformConfigMode", - "description": "An enumeration." - }, "TransformationCategoryNames": { "type": "string", "enum": ["NODE_PARSER", "EMBEDDING"], @@ -14173,6 +16324,40 @@ "type": "object", "required": ["loc", "msg", "type"], "title": "ValidationError" + }, + "VertexAIEmbeddingConfig": { + "properties": { + "type": { + "type": "string", + "enum": ["VERTEXAI_EMBEDDING"], + "title": "Type", + "description": "Type of the embedding model.", + "default": "VERTEXAI_EMBEDDING" + }, + "component": { + "allOf": [ + { + "$ref": "#/components/schemas/ExtendVertexTextEmbedding" + } + ], + "title": "Component", + "description": "Configuration for the VertexAI embedding model." + } + }, + "type": "object", + "title": "VertexAIEmbeddingConfig" + }, + "VertexEmbeddingMode": { + "type": "string", + "enum": [ + "default", + "classification", + "clustering", + "similarity", + "retrieval" + ], + "title": "VertexEmbeddingMode", + "description": "VertexAI embedding mode.\n\nAttributes:\n DEFAULT_MODE (str): The default embedding mode, for older models before August 2023,\n that does not support task_type\n CLASSIFICATION_MODE (str): Optimizes embeddings for classification tasks.\n CLUSTERING_MODE (str): Optimizes embeddings for clustering tasks.\n SEMANTIC_SIMILARITY_MODE (str): Optimizes embeddings for tasks that require assessments of semantic similarity.\n RETRIEVAL_MODE (str): Optimizes embeddings for retrieval tasks, including search and document retrieval." } }, "securitySchemes": { diff --git a/packages/cloud/package.json b/packages/cloud/package.json index 94cfeea8f..dd60fa9ee 100644 --- a/packages/cloud/package.json +++ b/packages/cloud/package.json @@ -26,6 +26,20 @@ "types": "./dist/api.d.ts", "default": "./dist/api.js" } + }, + "./reader": { + "require": { + "types": "./dist/reader.d.cts", + "default": "./dist/reader.cjs" + }, + "import": { + "types": "./dist/reader.d.ts", + "default": "./dist/reader.js" + }, + "default": { + "types": "./dist/reader.d.ts", + "default": "./dist/reader.js" + } } }, "repository": { @@ -36,6 +50,15 @@ "devDependencies": { "@hey-api/client-fetch": "^0.2.4", "@hey-api/openapi-ts": "^0.53.0", + "@llamaindex/core": "workspace:^0.2.0", + "@llamaindex/env": "workspace:^0.1.10", "bunchee": "5.3.2" + }, + "peerDependencies": { + "@llamaindex/core": "workspace:^0.2.0", + "@llamaindex/env": "workspace:^0.1.10" + }, + "dependencies": { + "magic-bytes.js": "^1.10.0" } } diff --git a/packages/llamaindex/src/readers/LlamaParseReader.ts b/packages/cloud/src/reader.ts similarity index 71% rename from packages/llamaindex/src/readers/LlamaParseReader.ts rename to packages/cloud/src/reader.ts index fe081c33f..76ac2457f 100644 --- a/packages/llamaindex/src/readers/LlamaParseReader.ts +++ b/packages/cloud/src/reader.ts @@ -1,92 +1,17 @@ +import { createClient, createConfig, type Client } from "@hey-api/client-fetch"; import { Document, FileReader } from "@llamaindex/core/schema"; import { fs, getEnv } from "@llamaindex/env"; import { filetypeinfo } from "magic-bytes.js"; +import { + ParsingService, + type Body_upload_file_api_v1_parsing_upload_post, + type ParserLanguages, +} from "./api"; +import { sleep } from "./utils"; + +export type Language = ParserLanguages; export type ResultType = "text" | "markdown" | "json"; -export type Language = - | "abq" - | "ady" - | "af" - | "ang" - | "ar" - | "as" - | "ava" - | "az" - | "be" - | "bg" - | "bh" - | "bho" - | "bn" - | "bs" - | "ch_sim" - | "ch_tra" - | "che" - | "cs" - | "cy" - | "da" - | "dar" - | "de" - | "en" - | "es" - | "et" - | "fa" - | "fr" - | "ga" - | "gom" - | "hi" - | "hr" - | "hu" - | "id" - | "inh" - | "is" - | "it" - | "ja" - | "kbd" - | "kn" - | "ko" - | "ku" - | "la" - | "lbe" - | "lez" - | "lt" - | "lv" - | "mah" - | "mai" - | "mi" - | "mn" - | "mr" - | "ms" - | "mt" - | "ne" - | "new" - | "nl" - | "no" - | "oc" - | "pi" - | "pl" - | "pt" - | "ro" - | "ru" - | "rs_cyrillic" - | "rs_latin" - | "sck" - | "sk" - | "sl" - | "sq" - | "sv" - | "sw" - | "ta" - | "tab" - | "te" - | "th" - | "tjk" - | "tl" - | "tr" - | "ug" - | "uk" - | "ur" - | "uz" - | "vi"; const SUPPORT_FILE_EXT: string[] = [ ".pdf", @@ -181,6 +106,15 @@ const SUPPORT_FILE_EXT: string[] = [ ".tsv", ]; +//todo: should move into @llamaindex/env +type WriteStream = { + write: (text: string) => void; +}; + +// Do not modify this variable or cause type errors +// eslint-disable-next-line no-var +var process: any; + /** * Represents a reader for parsing files using the LlamaParse API. * See https://github.com/run-llama/llama_parse @@ -188,8 +122,8 @@ const SUPPORT_FILE_EXT: string[] = [ export class LlamaParseReader extends FileReader { // The API key for the LlamaParse API. Can be set as an environment variable: LLAMA_CLOUD_API_KEY apiKey: string; - // The base URL of the Llama Parsing API. - baseUrl: string = "https://api.cloud.llamaindex.ai/api/parsing"; + // The base URL of the Llama Cloud Platform. + baseUrl: string = "https://api.cloud.llamaindex.ai"; // The result type for the parser. resultType: ResultType = "text"; // The interval in seconds to check if the parsing is done. @@ -199,7 +133,7 @@ export class LlamaParseReader extends FileReader { // Whether to print the progress of the parsing. verbose = true; // The language of the text to parse. - language: Language = "en"; + language: ParserLanguages[] = ["en"]; // The parsing instruction for the parser. Backend default is an empty string. parsingInstruction?: string | undefined; // Wether to ignore diagonal text (when the text rotation in degrees is not 0, 90, 180 or 270, so not a horizontal or vertical text). Backend default is false. @@ -237,14 +171,25 @@ export class LlamaParseReader extends FileReader { // The API key for the multimodal API. Can also be set as an env variable: LLAMA_CLOUD_VENDOR_MULTIMODAL_API_KEY vendorMultimodalApiKey?: string | undefined; // numWorkers is implemented in SimpleDirectoryReader + stdout?: WriteStream | undefined; + + readonly #client: Client; constructor( - params: Partial<LlamaParseReader> & { + params: Partial<Omit<LlamaParseReader, "language" | "apiKey">> & { + language?: ParserLanguages | ParserLanguages[] | undefined; apiKey?: string | undefined; } = {}, ) { super(); Object.assign(this, params); + this.language = Array.isArray(this.language) + ? this.language + : [this.language]; + this.stdout = + (params.stdout ?? typeof process !== "undefined") + ? process!.stdout + : undefined; const apiKey = params.apiKey ?? getEnv("LLAMA_CLOUD_API_KEY"); if (!apiKey) { throw new Error( @@ -252,6 +197,12 @@ export class LlamaParseReader extends FileReader { ); } this.apiKey = apiKey; + if (this.baseUrl.endsWith("/")) { + this.baseUrl = this.baseUrl.slice(0, -"/".length); + } + if (this.baseUrl.endsWith("/api/parsing")) { + this.baseUrl = this.baseUrl.slice(0, -"/api/parsing".length); + } if (params.gpt4oMode) { params.gpt4oApiKey = @@ -266,12 +217,21 @@ export class LlamaParseReader extends FileReader { this.vendorMultimodalApiKey = params.vendorMultimodalApiKey; } + + this.#client = createClient( + createConfig({ + headers: { + Authorization: `Bearer ${this.apiKey}`, + }, + baseUrl: this.baseUrl, + }), + ); } // Create a job for the LlamaParse API private async createJob( data: Uint8Array, - fileName?: string, + fileName: string = "unknown", ): Promise<string> { // Load data, set the mime type const { mime, extension } = await LlamaParseReader.getMimeType(data); @@ -281,111 +241,126 @@ export class LlamaParseReader extends FileReader { console.log(`Starting load for ${name} file`); } - const body = new FormData(); - body.set("file", new Blob([data], { type: mime }), fileName); - - const LlamaParseBodyParams = { + const body = { + file: new File([data], fileName, { type: mime }), language: this.language, parsing_instruction: this.parsingInstruction, - skip_diagonal_text: this.skipDiagonalText?.toString(), - invalidate_cache: this.invalidateCache?.toString(), - do_not_cache: this.doNotCache?.toString(), - fast_mode: this.fastMode?.toString(), - do_not_unroll_columns: this.doNotUnrollColumns?.toString(), + skip_diagonal_text: this.skipDiagonalText, + invalidate_cache: this.invalidateCache, + do_not_cache: this.doNotCache, + fast_mode: this.fastMode, + do_not_unroll_columns: this.doNotUnrollColumns, page_separator: this.pageSeparator, page_prefix: this.pagePrefix, page_suffix: this.pageSuffix, - gpt4o_mode: this.gpt4oMode?.toString(), + gpt4o_mode: this.gpt4oMode, gpt4o_api_key: this.gpt4oApiKey, bounding_box: this.boundingBox, target_pages: this.targetPages, - use_vendor_multimodal_model: this.useVendorMultimodalModel?.toString(), + use_vendor_multimodal_model: this.useVendorMultimodalModel, vendor_multimodal_model_name: this.vendorMultimodalModelName, vendor_multimodal_api_key: this.vendorMultimodalApiKey, - }; - - // Filter out params with invalid values that would cause issues on the backend. - const filteredParams = this.filterSpecificParams(LlamaParseBodyParams, [ - "page_separator", - "page_prefix", - "page_suffix", - "bounding_box", - "target_pages", - ]); - - // Appends body with any defined LlamaParseBodyParams - Object.entries(filteredParams).forEach(([key, value]) => { - if (value !== undefined) { - body.append(key, value); - } - }); - - const headers = { - Authorization: `Bearer ${this.apiKey}`, - }; - - // Send the request, start job - const url = `${this.baseUrl}/upload`; - const response = await fetch(url, { + // fixme: does these fields need to be set? + webhook_url: undefined, + take_screenshot: undefined, + disable_ocr: undefined, + disable_reconstruction: undefined, + input_s3_path: undefined, + output_s3_path_prefix: undefined, + } satisfies { + [Key in keyof Body_upload_file_api_v1_parsing_upload_post]-?: + | Body_upload_file_api_v1_parsing_upload_post[Key] + | undefined; + } as unknown as Body_upload_file_api_v1_parsing_upload_post; + + const response = await ParsingService.uploadFileApiV1ParsingUploadPost({ + client: this.#client, + throwOnError: true, signal: AbortSignal.timeout(this.maxTimeout * 1000), - method: "POST", body, - headers, }); - if (!response.ok) { - throw new Error(`Failed to parse the file: ${await response.text()}`); - } - const jsonResponse = await response.json(); - return jsonResponse.id; + + return response.data.id; } // Get the result of the job - private async getJobResult(jobId: string, resultType: string): Promise<any> { - const resultUrl = `${this.baseUrl}/job/${jobId}/result/${resultType}`; - const statusUrl = `${this.baseUrl}/job/${jobId}`; - const headers = { Authorization: `Bearer ${this.apiKey}` }; - + private async getJobResult( + jobId: string, + resultType: "text" | "json" | "markdown", + ): Promise<any> { const signal = AbortSignal.timeout(this.maxTimeout * 1000); let tries = 0; while (true) { - await new Promise((resolve) => - setTimeout(resolve, this.checkInterval * 1000), - ); + await sleep(this.checkInterval * 1000); // Check the job status. If unsuccessful response, checks if maximum timeout has been reached. If reached, throws an error - const statusResponse = await fetch(statusUrl, { - headers, - signal, - }); - if (!statusResponse.ok) { - signal.throwIfAborted(); - if (this.verbose && tries % 10 === 0) { - process.stdout.write("."); - } - tries++; - continue; - } + const result = + await ParsingService.getParsingJobDetailsApiV1ParsingJobJobIdDetailsGet( + { + client: this.#client, + throwOnError: true, + path: { + job_id: jobId, + }, + signal, + }, + ); + const { data } = result; - // If response is succesful, check status of job. Allowed values "PENDING", "SUCCESS", "ERROR", "CANCELED" - const statusJson = await statusResponse.json(); - const status = statusJson.status; + const status = (data as Record<string, unknown>)["status"]; // If job has completed, return the result if (status === "SUCCESS") { - const resultResponse = await fetch(resultUrl, { - headers, - signal, - }); - if (!resultResponse.ok) { - throw new Error( - `Failed to fetch result: ${await resultResponse.text()}`, - ); + let result; + switch (resultType) { + case "json": { + result = + await ParsingService.getJobJsonResultApiV1ParsingJobJobIdResultJsonGet( + { + client: this.#client, + throwOnError: true, + path: { + job_id: jobId, + }, + signal, + }, + ); + break; + } + case "markdown": { + result = + await ParsingService.getJobResultApiV1ParsingJobJobIdResultMarkdownGet( + { + client: this.#client, + throwOnError: true, + path: { + job_id: jobId, + }, + signal, + }, + ); + break; + } + case "text": { + result = + await ParsingService.getJobTextResultApiV1ParsingJobJobIdResultTextGet( + { + client: this.#client, + throwOnError: true, + path: { + job_id: jobId, + }, + signal, + }, + ); + break; + } } - return resultResponse.json(); + return result.data; // If job is still pending, check if maximum timeout has been reached. If reached, throws an error } else if (status === "PENDING") { signal.throwIfAborted(); if (this.verbose && tries % 10 === 0) { - process.stdout.write("."); + this.stdout?.write("."); } tries++; } else { @@ -408,36 +383,34 @@ export class LlamaParseReader extends FileReader { fileContent: Uint8Array, fileName?: string, ): Promise<Document[]> { - let jobId; - try { - // Creates a job for the file - jobId = await this.createJob(fileContent, fileName); - if (this.verbose) { - console.log(`Started parsing the file under job id ${jobId}`); - } + return this.createJob(fileContent, fileName) + .then(async (jobId) => { + if (this.verbose) { + console.log(`Started parsing the file under job id ${jobId}`); + } - // Return results as Document objects - const jobResults = await this.getJobResult(jobId, this.resultType); - const resultText = jobResults[this.resultType]; + // Return results as Document objects + const jobResults = await this.getJobResult(jobId, this.resultType); + const resultText = jobResults[this.resultType]; - // Split the text by separator if splitByPage is true - if (this.splitByPage) { - return this.splitTextBySeparator(resultText); - } + // Split the text by separator if splitByPage is true + if (this.splitByPage) { + return this.splitTextBySeparator(resultText); + } - return [ - new Document({ - text: resultText, - }), - ]; - } catch (e) { - console.error(`Error while parsing file under job id ${jobId}`, e); - if (this.ignoreErrors) { - return []; - } else { - throw e; - } - } + return [ + new Document({ + text: resultText, + }), + ]; + }) + .catch((error) => { + if (this.ignoreErrors) { + return []; + } else { + throw error; + } + }); } /** * Loads data from a file and returns an array of JSON objects. @@ -551,15 +524,20 @@ export class LlamaParseReader extends FileReader { imagePath: string, jobId: string, ): Promise<void> { - const headers = { Authorization: `Bearer ${this.apiKey}` }; - // Construct the image URL - const imageUrl = `${this.baseUrl}/job/${jobId}/result/image/${imageName}`; - const response = await fetch(imageUrl, { headers }); - if (!response.ok) { - throw new Error(`Failed to download image: ${await response.text()}`); + const response = + await ParsingService.getJobImageResultApiV1ParsingJobJobIdResultImageNameGet( + { + client: this.#client, + path: { + job_id: jobId, + name: imageName, + }, + }, + ); + if (response.error) { + throw new Error(`Failed to download image: ${response.error.detail}`); } - // Convert the response to an ArrayBuffer and then to a Buffer - const arrayBuffer = await response.arrayBuffer(); + const arrayBuffer = (await response.data) as ArrayBuffer; const buffer = new Uint8Array(arrayBuffer); // Write the image buffer to the specified imagePath await fs.writeFile(imagePath, buffer); diff --git a/packages/cloud/src/utils.ts b/packages/cloud/src/utils.ts new file mode 100644 index 000000000..f9a5c4a74 --- /dev/null +++ b/packages/cloud/src/utils.ts @@ -0,0 +1,3 @@ +export async function sleep(ms: number): Promise<void> { + return new Promise((resolve) => setTimeout(resolve, ms)); +} diff --git a/packages/cloud/tsconfig.json b/packages/cloud/tsconfig.json index f8a54fc7b..c82970826 100644 --- a/packages/cloud/tsconfig.json +++ b/packages/cloud/tsconfig.json @@ -8,8 +8,17 @@ "moduleResolution": "Bundler", "skipLibCheck": true, "strict": true, - "lib": ["DOM", "ESNext"] + "lib": ["DOM", "ESNext"], + "types": [] }, "include": ["./src"], - "exclude": ["node_modules"] + "exclude": ["node_modules"], + "references": [ + { + "path": "../core/tsconfig.json" + }, + { + "path": "../env/tsconfig.json" + } + ] } diff --git a/packages/llamaindex/src/readers/index.ts b/packages/llamaindex/src/readers/index.ts index cc83dfb9f..58fcec2c7 100644 --- a/packages/llamaindex/src/readers/index.ts +++ b/packages/llamaindex/src/readers/index.ts @@ -1,3 +1,8 @@ +export { + LlamaParseReader, + type Language, + type ResultType, +} from "@llamaindex/cloud/reader"; export * from "./AssemblyAIReader.js"; export * from "./CSVReader.js"; export * from "./DiscordReader.js"; @@ -5,7 +10,6 @@ export * from "./DocxReader.js"; export * from "./HTMLReader.js"; export * from "./ImageReader.js"; export * from "./JSONReader.js"; -export * from "./LlamaParseReader.js"; export * from "./MarkdownReader.js"; export * from "./NotionReader.js"; export * from "./PDFReader.js"; diff --git a/packages/llamaindex/tsconfig.json b/packages/llamaindex/tsconfig.json index 067420783..072ea6123 100644 --- a/packages/llamaindex/tsconfig.json +++ b/packages/llamaindex/tsconfig.json @@ -18,6 +18,9 @@ }, { "path": "../env/tsconfig.json" + }, + { + "path": "../cloud/tsconfig.json" } ] } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 266fde8e2..7753eecf5 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -340,6 +340,10 @@ importers: version: 5.6.2 packages/cloud: + dependencies: + magic-bytes.js: + specifier: ^1.10.0 + version: 1.10.0 devDependencies: '@hey-api/client-fetch': specifier: ^0.2.4 @@ -347,6 +351,12 @@ importers: '@hey-api/openapi-ts': specifier: ^0.53.0 version: 0.53.0(typescript@5.6.2) + '@llamaindex/core': + specifier: workspace:^0.2.0 + version: link:../core + '@llamaindex/env': + specifier: workspace:^0.1.10 + version: link:../env bunchee: specifier: 5.3.2 version: 5.3.2(typescript@5.6.2) -- GitLab