From cd70d3b47807fb5478bd9b7e57617737fcd512f7 Mon Sep 17 00:00:00 2001 From: George He <georgewho96@gmail.com> Date: Sun, 16 Feb 2025 13:11:56 -0800 Subject: [PATCH] Improve error logs for llamacloud indices (#17827) --- .../indices/managed/llama_cloud/base.py | 23 ++++++++++++++----- .../pyproject.toml | 2 +- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/llama_index/indices/managed/llama_cloud/base.py b/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/llama_index/indices/managed/llama_cloud/base.py index c3cac40b3..9da112316 100644 --- a/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/llama_index/indices/managed/llama_cloud/base.py +++ b/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/llama_index/indices/managed/llama_cloud/base.py @@ -12,6 +12,7 @@ from typing import Any, List, Optional, Sequence, Type from urllib.parse import quote_plus from llama_cloud import ( + ManagedIngestionStatusResponse, PipelineCreate, PipelineCreateEmbeddingConfig, PipelineCreateTransformConfig, @@ -165,31 +166,41 @@ class LlamaCloudIndex(BaseManagedIndex): self, verbose: bool = False, raise_on_partial_success: bool = False, - ) -> None: + sleep_interval: float = 0.5, + ) -> Optional[ManagedIngestionStatusResponse]: + if sleep_interval < 0.5: + # minimum sleep interval at 0.5 seconds to prevent rate-limiting + sleep_interval = 0.5 if verbose: - print("Syncing pipeline: ", end="") + print(f"Syncing pipeline {self.pipeline.id}: ", end="") is_done = False + status_response: Optional[ManagedIngestionStatusResponse] = None while not is_done: - status = self._client.pipelines.get_pipeline_status( + status_response = self._client.pipelines.get_pipeline_status( pipeline_id=self.pipeline.id - ).status + ) + status = status_response.status if status == ManagedIngestionStatus.ERROR or ( raise_on_partial_success and status == ManagedIngestionStatus.PARTIAL_SUCCESS ): - raise ValueError(f"Pipeline ingestion failed for {self.pipeline.id}") + error_details = status_response.json() + raise ValueError( + f"Pipeline ingestion failed for {self.pipeline.id}. Error details: {error_details}" + ) elif status in [ ManagedIngestionStatus.NOT_STARTED, ManagedIngestionStatus.IN_PROGRESS, ]: if verbose: print(".", end="") - time.sleep(0.5) + time.sleep(sleep_interval) else: is_done = True if verbose: print("Done!") + return status_response def _wait_for_file_ingestion( self, diff --git a/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/pyproject.toml b/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/pyproject.toml index b0a344c59..f967eca32 100644 --- a/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/pyproject.toml +++ b/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/pyproject.toml @@ -34,7 +34,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-indices-managed-llama-cloud" readme = "README.md" -version = "0.6.4" +version = "0.6.5" [tool.poetry.dependencies] python = ">=3.9,<4.0" -- GitLab