diff --git a/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/llama_index/indices/managed/llama_cloud/base.py b/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/llama_index/indices/managed/llama_cloud/base.py index 5ce75df307596e6bd11bb0f1c62d158db623b854..49497582d9f782b5452235bf46765cb62ce9ab91 100644 --- a/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/llama_index/indices/managed/llama_cloud/base.py +++ b/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/llama_index/indices/managed/llama_cloud/base.py @@ -130,6 +130,40 @@ class LlamaCloudIndex(BaseManagedIndex): if verbose: print("Done!") + def _wait_for_file_ingestion( + self, + file_id: str, + verbose: bool = False, + raise_on_error: bool = False, + ) -> None: + pipeline_id = self._get_pipeline_id() + client = self._client + if verbose: + print("Loading file: ", end="") + + # wait until the file is loaded + is_done = False + while not is_done: + status = client.pipelines.get_pipeline_file_status( + pipeline_id=pipeline_id, file_id=file_id + ).status + if status == ManagedIngestionStatus.ERROR: + if verbose: + print(f"File ingestion failed for {file_id}") + if raise_on_error: + raise ValueError(f"File ingestion failed for {file_id}") + elif status in [ + ManagedIngestionStatus.NOT_STARTED, + ManagedIngestionStatus.IN_PROGRESS, + ]: + if verbose: + print(".", end="") + time.sleep(0.5) + else: + is_done = True + if verbose: + print("Done!") + def _wait_for_documents_ingestion( self, doc_ids: List[str], @@ -461,6 +495,8 @@ class LlamaCloudIndex(BaseManagedIndex): file_path: str, resource_info: Optional[Dict[str, Any]] = None, verbose: bool = False, + wait_for_ingestion: bool = True, + raise_on_error: bool = False, ) -> str: """Upload a file to the index.""" with open(file_path, "rb") as f: @@ -478,9 +514,10 @@ class LlamaCloudIndex(BaseManagedIndex): pipeline_id=pipeline_id, request=[pipeline_file_create] ) - self._wait_for_pipeline_ingestion( - verbose=verbose, raise_on_partial_success=False - ) + if wait_for_ingestion: + self._wait_for_file_ingestion( + file.id, verbose=verbose, raise_on_error=raise_on_error + ) return file.id def upload_file_from_url( @@ -492,6 +529,8 @@ class LlamaCloudIndex(BaseManagedIndex): verify_ssl: bool = True, follow_redirects: bool = True, verbose: bool = False, + wait_for_ingestion: bool = True, + raise_on_error: bool = False, ) -> str: """Upload a file from a URL to the index.""" file = self._client.files.upload_file_from_url( @@ -512,9 +551,10 @@ class LlamaCloudIndex(BaseManagedIndex): pipeline_id=pipeline_id, request=[pipeline_file_create] ) - self._wait_for_pipeline_ingestion( - verbose=verbose, raise_on_partial_success=False - ) + if wait_for_ingestion: + self._wait_for_file_ingestion( + file.id, verbose=verbose, raise_on_error=raise_on_error + ) return file.id # Nodes related methods (not implemented for LlamaCloudIndex) diff --git a/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/pyproject.toml b/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/pyproject.toml index 9810d623c0c122c7f4d173a3bdb99a2ca64ea08b..18f6bc81df5881ccf21054e4ca46ca82d34632c1 100644 --- a/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/pyproject.toml +++ b/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/pyproject.toml @@ -34,7 +34,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-indices-managed-llama-cloud" readme = "README.md" -version = "0.4.1" +version = "0.4.2" [tool.poetry.dependencies] python = ">=3.8.1,<4.0"