diff --git a/llama-index-core/llama_index/core/multi_modal_llms/generic_utils.py b/llama-index-core/llama_index/core/multi_modal_llms/generic_utils.py index 8cba775c0ed80266e892b123536eaed289e5fb54..d274138996b76dec58fa7fdad1328b4bd9d5ddd5 100644 --- a/llama-index-core/llama_index/core/multi_modal_llms/generic_utils.py +++ b/llama-index-core/llama_index/core/multi_modal_llms/generic_utils.py @@ -1,7 +1,7 @@ import base64 import filetype import logging -from typing import List, Sequence, Optional +from typing import List, Optional, Sequence import requests @@ -91,6 +91,7 @@ def infer_image_mimetype_from_file_path(image_file_path: str) -> str: Returns: str: MIME type of the image: image/jpeg, image/png, image/gif, or image/webp. + Defaults to `image/jpeg`. """ # Get the file extension file_extension = image_file_path.split(".")[-1].lower() @@ -117,6 +118,7 @@ def infer_image_mimetype_from_base64(base64_string: str) -> Optional[str]: Returns: Optional[str]: MIME type of the image: image/jpeg, image/png, image/gif, or image/webp. + `None` if the MIME type cannot be inferred. """ # Decode the base64 string decoded_data = base64.b64decode(base64_string) @@ -126,3 +128,28 @@ def infer_image_mimetype_from_base64(base64_string: str) -> Optional[str]: # Return the MIME type if detected, otherwise return None return kind.mime if kind is not None else None + + +def set_base64_and_mimetype_for_image_docs( + image_documents: Sequence[ImageDocument], +) -> Sequence[ImageDocument]: + """Set the base64 and mimetype fields for the image documents. + + Args: + image_documents (Sequence[ImageDocument]): Sequence of ImageDocument objects. + + Returns: + Sequence[ImageDocument]: ImageDocuments with base64 and detected mimetypes set. + """ + base64_strings = image_documents_to_base64(image_documents) + for image_doc, base64_str in zip(image_documents, base64_strings): + image_doc.image = base64_str + image_doc.image_mimetype = infer_image_mimetype_from_base64(image_doc.image) + if not image_doc.image_mimetype and image_doc.image_path: + image_doc.image_mimetype = infer_image_mimetype_from_file_path( + image_doc.image_path + ) + else: + # Defaults to `image/jpeg` if the mimetype cannot be inferred + image_doc.image_mimetype = "image/jpeg" + return image_documents diff --git a/llama-index-core/tests/multi_modal_llms/test_generic_utils.py b/llama-index-core/tests/multi_modal_llms/test_generic_utils.py index 20759c90497dfdfaf8cf94544d6f320242001bb7..dd47d0ef9f9e3b81e8f72a0d8bb90b35fe730a9a 100644 --- a/llama-index-core/tests/multi_modal_llms/test_generic_utils.py +++ b/llama-index-core/tests/multi_modal_llms/test_generic_utils.py @@ -12,6 +12,7 @@ from llama_index.core.multi_modal_llms.generic_utils import ( image_documents_to_base64, infer_image_mimetype_from_base64, infer_image_mimetype_from_file_path, + set_base64_and_mimetype_for_image_docs, ) # Expected values @@ -102,6 +103,7 @@ def test_complete_workflow(): def test_infer_image_mimetype_from_base64(): + """Test inferring image mimetype from base64-encoded data.""" # Create a minimal valid PNG in base64 base64_png = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAACklEQVR4nGMAAQAABQABDQottAAAAABJRU5ErkJggg==" @@ -109,11 +111,12 @@ def test_infer_image_mimetype_from_base64(): assert result == "image/png" # Valid, meaningless base64 - result = infer_image_mimetype_from_base64("lEQVR4nGMAAQAABQABDQ") + result = infer_image_mimetype_from_base64(EXP_BASE64) assert result is None def test_infer_image_mimetype_from_file_path(): + """Test inferring image mimetype from file extensions.""" # JPG/JPEG assert infer_image_mimetype_from_file_path("image.jpg") == "image/jpeg" assert infer_image_mimetype_from_file_path("image.jpeg") == "image/jpeg" @@ -127,5 +130,24 @@ def test_infer_image_mimetype_from_file_path(): # WEBP assert infer_image_mimetype_from_file_path("image.webp") == "image/webp" - # Catch-all default + # Catch-all defaults assert infer_image_mimetype_from_file_path("image.asf32") == "image/jpeg" + assert infer_image_mimetype_from_file_path("") == "image/jpeg" + + +def test_set_base64_and_mimetype_for_image_docs(): + """Test setting base64 and mimetype fields for ImageDocument objects.""" + image_docs = [ + ImageDocument(image=EXP_BASE64), + ImageDocument(image_path="test.asdf"), + ] + + with patch("requests.get") as mock_get: + mock_get.return_value.content = EXP_BINARY + with patch("builtins.open", mock_open(read_data=EXP_BINARY)): + results = set_base64_and_mimetype_for_image_docs(image_docs) + + assert len(results) == 2 + assert results[0].image == EXP_BASE64 + assert results[0].image_mimetype == "image/jpeg" + assert results[1].image_mimetype == "image/jpeg" diff --git a/llama-index-integrations/llms/llama-index-llms-anthropic/llama_index/llms/anthropic/utils.py b/llama-index-integrations/llms/llama-index-llms-anthropic/llama_index/llms/anthropic/utils.py index d8cd988a4a95ddeaed26fa769a5179eafd322f6c..ab8f0ea3542e7b1923e22e5ed733c199cfc43fc0 100644 --- a/llama-index-integrations/llms/llama-index-llms-anthropic/llama_index/llms/anthropic/utils.py +++ b/llama-index-integrations/llms/llama-index-llms-anthropic/llama_index/llms/anthropic/utils.py @@ -1,3 +1,7 @@ +""" +Utility functions for the Anthropic SDK LLM integration. +""" + from typing import Dict, Sequence, Tuple from llama_index.core.base.llms.types import ChatMessage, ChatResponse, MessageRole @@ -13,33 +17,56 @@ from anthropic.types.beta.prompt_caching import ( HUMAN_PREFIX = "\n\nHuman:" ASSISTANT_PREFIX = "\n\nAssistant:" -CLAUDE_MODELS: Dict[str, int] = { +# AWS Bedrock Anthropic identifiers +BEDROCK_INFERENCE_PROFILE_CLAUDE_MODELS: Dict[str, int] = { + "anthropic.claude-3-haiku-20240307-v1:0": 200000, + "anthropic.claude-3-sonnet-20240229-v1:0": 200000, + "anthropic.claude-3-opus-20240229-v1:0": 200000, + "anthropic.claude-3-5-sonnet-20240620-v1:0": 200000, + "anthropic.claude-3-5-sonnet-20241022-v2:0": 200000, + "anthropic.claude-3-5-haiku-20241022-v1:0": 200000, +} +BEDROCK_CLAUDE_MODELS: Dict[str, int] = { + "anthropic.claude-instant-v1": 100000, + "anthropic.claude-v2": 100000, + "anthropic.claude-v2:1": 200000, +} + +# GCP Vertex AI Anthropic identifiers +VERTEX_CLAUDE_MODELS: Dict[str, int] = { + "claude-3-opus@20240229": 200000, + "claude-3-sonnet@20240229": 200000, + "claude-3-haiku@20240307": 200000, + "claude-3-5-sonnet@20240620": 200000, + "claude-3-5-sonnet-v2@20241022": 200000, + "claude-3-5-haiku@20241022": 200000, +} + +# Anthropic API/SDK identifiers +ANTHROPIC_MODELS: Dict[str, int] = { "claude-instant-1": 100000, "claude-instant-1.2": 100000, "claude-2": 100000, "claude-2.0": 100000, "claude-2.1": 200000, - "claude-3-opus-latest": 180000, - "claude-3-opus-20240229": 180000, - "claude-3-opus@20240229": 180000, # Alternate name for Vertex AI - "anthropic.claude-3-opus-20240229-v1:0": 180000, # Alternate name for Bedrock - "claude-3-sonnet-latest": 180000, - "claude-3-sonnet-20240229": 180000, - "claude-3-sonnet@20240229": 180000, # Alternate name for Vertex AI - "anthropic.claude-3-sonnet-20240229-v1:0": 180000, # Alternate name for Bedrock - "claude-3-haiku-latest": 180000, - "claude-3-haiku-20240307": 180000, - "claude-3-haiku@20240307": 180000, # Alternate name for Vertex AI - "anthropic.claude-3-haiku-20240307-v1:0": 180000, # Alternate name for Bedrock - "claude-3-5-sonnet-latest": 180000, - "claude-3-5-sonnet-20240620": 180000, - "claude-3-5-sonnet-20241022": 180000, - "claude-3-5-sonnet-v2@20241022": 180000, # Alternate name for Vertex AI - "anthropic.claude-3-5-sonnet-20241022-v2:0": 180000, # Alternate name for Bedrock - "claude-3-5-sonnet@20240620": 180000, # Alternate name for Vertex AI - "claude-3-5-haiku-20241022": 180000, - "claude-3-5-haiku@20241022": 180000, # Alternate name for Vertex AI - "anthropic.claude-3-5-haiku-20241022-v1:0": 180000, # Alternate name for Bedrock + "claude-3-opus-latest": 200000, + "claude-3-opus-20240229": 200000, + "claude-3-sonnet-latest": 200000, + "claude-3-sonnet-20240229": 200000, + "claude-3-haiku-latest": 200000, + "claude-3-haiku-20240307": 200000, + "claude-3-5-sonnet-latest": 200000, + "claude-3-5-sonnet-20240620": 200000, + "claude-3-5-sonnet-20241022": 200000, + "claude-3-5-haiku-20241022": 200000, +} + +# All provider Anthropic identifiers +CLAUDE_MODELS: Dict[str, int] = { + **BEDROCK_INFERENCE_PROFILE_CLAUDE_MODELS, + **BEDROCK_CLAUDE_MODELS, + **VERTEX_CLAUDE_MODELS, + **ANTHROPIC_MODELS, } @@ -48,6 +75,19 @@ def is_function_calling_model(modelname: str) -> bool: def anthropic_modelname_to_contextsize(modelname: str) -> int: + """Get the context size for an Anthropic model. + + Args: + modelname (str): Anthropic model name. + + Returns: + int: Context size for the specific model. + """ + for model, context_size in BEDROCK_INFERENCE_PROFILE_CLAUDE_MODELS.items(): + # Only US & EU inference profiles are currently supported by AWS + CLAUDE_MODELS[f"us.{model}"] = context_size + CLAUDE_MODELS[f"eu.{model}"] = context_size + if modelname not in CLAUDE_MODELS: raise ValueError( f"Unknown model: {modelname}. Please provide a valid Anthropic model name."