diff --git a/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/llama_index/indices/managed/llama_cloud/base.py b/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/llama_index/indices/managed/llama_cloud/base.py index b67fe17b787dd8af3fbe1514c51a07139efd4b47..5ce75df307596e6bd11bb0f1c62d158db623b854 100644 --- a/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/llama_index/indices/managed/llama_cloud/base.py +++ b/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/llama_index/indices/managed/llama_cloud/base.py @@ -17,6 +17,7 @@ from llama_cloud import ( ManagedIngestionStatus, CloudDocumentCreate, CloudDocument, + PipelineFileCreate, ) from llama_index.core.base.base_query_engine import BaseQueryEngine @@ -455,6 +456,67 @@ class LlamaCloudIndex(BaseManagedIndex): verbose=verbose, raise_on_partial_success=False ) + def upload_file( + self, + file_path: str, + resource_info: Optional[Dict[str, Any]] = None, + verbose: bool = False, + ) -> str: + """Upload a file to the index.""" + with open(file_path, "rb") as f: + file = self._client.files.upload_file( + project_id=self._get_project_id(), upload_file=f + ) + if verbose: + print(f"Uploaded file {file.id} with name {file.name}") + if resource_info: + self._client.files.update(file_id=file.id, request=resource_info) + # Add file to pipeline + pipeline_id = self._get_pipeline_id() + pipeline_file_create = PipelineFileCreate(file_id=file.id) + self._client.pipelines.add_files_to_pipeline( + pipeline_id=pipeline_id, request=[pipeline_file_create] + ) + + self._wait_for_pipeline_ingestion( + verbose=verbose, raise_on_partial_success=False + ) + return file.id + + def upload_file_from_url( + self, + file_name: str, + url: str, + proxy_url: Optional[str] = None, + request_headers: Optional[Dict[str, str]] = None, + verify_ssl: bool = True, + follow_redirects: bool = True, + verbose: bool = False, + ) -> str: + """Upload a file from a URL to the index.""" + file = self._client.files.upload_file_from_url( + project_id=self._get_project_id(), + name=file_name, + url=url, + proxy_url=proxy_url, + request_headers=request_headers, + verify_ssl=verify_ssl, + follow_redirects=follow_redirects, + ) + if verbose: + print(f"Uploaded file {file.id} with ID {file.id}") + # Add file to pipeline + pipeline_id = self._get_pipeline_id() + pipeline_file_create = PipelineFileCreate(file_id=file.id) + self._client.pipelines.add_files_to_pipeline( + pipeline_id=pipeline_id, request=[pipeline_file_create] + ) + + self._wait_for_pipeline_ingestion( + verbose=verbose, raise_on_partial_success=False + ) + return file.id + # Nodes related methods (not implemented for LlamaCloudIndex) def _insert(self, nodes: Sequence[BaseNode], **insert_kwargs: Any) -> None: diff --git a/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/pyproject.toml b/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/pyproject.toml index 91df56a18b8a8353c272ef635b881dba4efa4f88..9810d623c0c122c7f4d173a3bdb99a2ca64ea08b 100644 --- a/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/pyproject.toml +++ b/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/pyproject.toml @@ -34,11 +34,11 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-indices-managed-llama-cloud" readme = "README.md" -version = "0.4.0" +version = "0.4.1" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -llama-cloud = ">=0.0.11" +llama-cloud = ">=0.1.5" llama-index-core = "^0.11.13.post1" [tool.poetry.group.dev.dependencies] diff --git a/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/tests/test_indices_managed_llama_cloud.py b/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/tests/test_indices_managed_llama_cloud.py index ad8f19d747c52fcd65c3cb539a0298170a5b9d6b..d1f3887e59e1a13c05db5983c3bcff2dce75b1a8 100644 --- a/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/tests/test_indices_managed_llama_cloud.py +++ b/llama-index-integrations/indices/llama-index-indices-managed-llama-cloud/tests/test_indices_managed_llama_cloud.py @@ -1,4 +1,5 @@ from typing import Optional +import tempfile from llama_index.core.indices.managed.base import BaseManagedIndex from llama_index.indices.managed.llama_cloud import LlamaCloudIndex from llama_index.core.schema import Document @@ -18,7 +19,7 @@ def test_class(): @pytest.mark.skipif( - not base_url or not api_key, reason="No platform base url or api keyset" + not base_url or not api_key, reason="No platform base url or api key set" ) @pytest.mark.skipif(not openai_api_key, reason="No openai api key set") @pytest.mark.integration() @@ -40,7 +41,7 @@ def test_retrieve(): @pytest.mark.parametrize("organization_id", [None, organization_id]) @pytest.mark.skipif( - not base_url or not api_key, reason="No platform base url or api keyset" + not base_url or not api_key, reason="No platform base url or api key set" ) @pytest.mark.skipif(not openai_api_key, reason="No openai api key set") @pytest.mark.integration() @@ -101,3 +102,68 @@ def test_documents_crud(organization_id: Optional[str]): docs = index.ref_doc_info assert len(docs) == 2 assert "3" not in docs + + +@pytest.mark.skipif( + not base_url or not api_key, reason="No platform base url or api key set" +) +@pytest.mark.skipif(not openai_api_key, reason="No openai api key set") +@pytest.mark.integration() +def test_upload_file(): + os.environ["OPENAI_API_KEY"] = openai_api_key + index = LlamaCloudIndex( + name="test", # assumes this pipeline exists + project_name="Default", + api_key=api_key, + base_url=base_url, + ) + + # Create a temporary file to upload + with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as temp_file: + temp_file.write(b"Sample content for testing upload.") + temp_file_path = temp_file.name + + try: + # Upload the file + file_id = index.upload_file(temp_file_path, verbose=True) + assert file_id is not None + + # Verify the file is part of the index + docs = index.ref_doc_info + temp_file_name = os.path.basename(temp_file_path) + assert any( + temp_file_name == doc.metadata.get("file_name") for doc in docs.values() + ) + + finally: + # Clean up the temporary file + os.remove(temp_file_path) + + +@pytest.mark.skipif( + not base_url or not api_key, reason="No platform base url or api key set" +) +@pytest.mark.skipif(not openai_api_key, reason="No openai api key set") +@pytest.mark.integration() +def test_upload_file_from_url(): + os.environ["OPENAI_API_KEY"] = openai_api_key + index = LlamaCloudIndex( + name="test", # assumes this pipeline exists + project_name="Default", + api_key=api_key, + base_url=base_url, + ) + + # Define a URL to a file for testing + test_file_url = "https://www.google.com/robots.txt" + test_file_name = "google_robots.txt" + + # Upload the file from the URL + file_id = index.upload_file_from_url( + file_name=test_file_name, url=test_file_url, verbose=True + ) + assert file_id is not None + + # Verify the file is part of the index + docs = index.ref_doc_info + assert any(test_file_name == doc.metadata.get("file_name") for doc in docs.values())