From cd70d3b47807fb5478bd9b7e57617737fcd512f7 Mon Sep 17 00:00:00 2001
From: George He <georgewho96@gmail.com>
Date: Sun, 16 Feb 2025 13:11:56 -0800
Subject: [PATCH] Improve error logs for llamacloud indices (#17827)

---
 .../indices/managed/llama_cloud/base.py       | 23 ++++++++++++++-----
 .../pyproject.toml                            |  2 +-
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/llama_index/indices/managed/llama_cloud/base.py b/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/llama_index/indices/managed/llama_cloud/base.py
index c3cac40b3..9da112316 100644
--- a/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/llama_index/indices/managed/llama_cloud/base.py
+++ b/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/llama_index/indices/managed/llama_cloud/base.py
@@ -12,6 +12,7 @@ from typing import Any, List, Optional, Sequence, Type
 from urllib.parse import quote_plus
 
 from llama_cloud import (
+    ManagedIngestionStatusResponse,
     PipelineCreate,
     PipelineCreateEmbeddingConfig,
     PipelineCreateTransformConfig,
@@ -165,31 +166,41 @@ class LlamaCloudIndex(BaseManagedIndex):
         self,
         verbose: bool = False,
         raise_on_partial_success: bool = False,
-    ) -> None:
+        sleep_interval: float = 0.5,
+    ) -> Optional[ManagedIngestionStatusResponse]:
+        if sleep_interval < 0.5:
+            # minimum sleep interval at 0.5 seconds to prevent rate-limiting
+            sleep_interval = 0.5
         if verbose:
-            print("Syncing pipeline: ", end="")
+            print(f"Syncing pipeline {self.pipeline.id}: ", end="")
 
         is_done = False
+        status_response: Optional[ManagedIngestionStatusResponse] = None
         while not is_done:
-            status = self._client.pipelines.get_pipeline_status(
+            status_response = self._client.pipelines.get_pipeline_status(
                 pipeline_id=self.pipeline.id
-            ).status
+            )
+            status = status_response.status
             if status == ManagedIngestionStatus.ERROR or (
                 raise_on_partial_success
                 and status == ManagedIngestionStatus.PARTIAL_SUCCESS
             ):
-                raise ValueError(f"Pipeline ingestion failed for {self.pipeline.id}")
+                error_details = status_response.json()
+                raise ValueError(
+                    f"Pipeline ingestion failed for {self.pipeline.id}. Error details: {error_details}"
+                )
             elif status in [
                 ManagedIngestionStatus.NOT_STARTED,
                 ManagedIngestionStatus.IN_PROGRESS,
             ]:
                 if verbose:
                     print(".", end="")
-                time.sleep(0.5)
+                time.sleep(sleep_interval)
             else:
                 is_done = True
                 if verbose:
                     print("Done!")
+        return status_response
 
     def _wait_for_file_ingestion(
         self,
diff --git a/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/pyproject.toml b/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/pyproject.toml
index b0a344c59..f967eca32 100644
--- a/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/pyproject.toml
+++ b/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/pyproject.toml
@@ -34,7 +34,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-indices-managed-llama-cloud"
 readme = "README.md"
-version = "0.6.4"
+version = "0.6.5"
 
 [tool.poetry.dependencies]
 python = ">=3.9,<4.0"
-- 
GitLab