diff --git a/docs/source/conf.py b/docs/source/conf.py index a16c1325337ff130dc52e8600f424db4e04a86b9..7f3485122f1992618712a76ccfb24ee1e9a4bdb9 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -15,7 +15,7 @@ sys.path.insert(0, os.path.abspath("../..")) # Source code dir relative to this project = "Semantic Router" copyright = "2025, Aurelio AI" author = "Aurelio AI" -release = "0.1.0.dev10" +release = "0.1.1" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/pyproject.toml b/pyproject.toml index 3375e9d4bb839ea050d25e3c37f94407c7188176..2a10684582c4384ff743a0229e0dbfd36ab3d9a9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "semantic-router" -version = "0.1.0" +version = "0.1.1" description = "Super fast semantic router for AI decision making" authors = [{ name = "Aurelio AI", email = "hello@aurelio.ai" }] requires-python = ">=3.9,<3.14" diff --git a/semantic_router/__init__.py b/semantic_router/__init__.py index d9d8cba5af2fe6fdb0da5080315fc59915e9744f..3d7a4d7a867bfc2e120ac91dcaf5f4f70e99547b 100644 --- a/semantic_router/__init__.py +++ b/semantic_router/__init__.py @@ -3,4 +3,4 @@ from semantic_router.routers import HybridRouter, RouterConfig, SemanticRouter __all__ = ["SemanticRouter", "HybridRouter", "Route", "RouterConfig"] -__version__ = "0.1.0.dev10" +__version__ = "0.1.1" diff --git a/semantic_router/encoders/__init__.py b/semantic_router/encoders/__init__.py index 5fdeb789098aa009996df1c6b264f1db1bbeaf82..764e7cafa30c0171d38955034684527e0921ba77 100644 --- a/semantic_router/encoders/__init__.py +++ b/semantic_router/encoders/__init__.py @@ -1,7 +1,8 @@ from typing import List, Optional +from semantic_router.encoders.base import DenseEncoder, SparseEncoder # isort: skip from semantic_router.encoders.aurelio import AurelioSparseEncoder -from semantic_router.encoders.base import DenseEncoder, SparseEncoder +from semantic_router.encoders.azure_openai import AzureOpenAIEncoder from semantic_router.encoders.bedrock import BedrockEncoder from semantic_router.encoders.bm25 import BM25Encoder from semantic_router.encoders.clip import CLIPEncoder @@ -13,7 +14,6 @@ from semantic_router.encoders.mistral import MistralEncoder from semantic_router.encoders.openai import OpenAIEncoder from semantic_router.encoders.tfidf import TfidfEncoder from semantic_router.encoders.vit import VitEncoder -from semantic_router.encoders.zure import AzureOpenAIEncoder from semantic_router.schema import EncoderType, SparseEmbedding __all__ = [ @@ -45,8 +45,7 @@ class AutoEncoder: self.type = EncoderType(type) self.name = name if self.type == EncoderType.AZURE: - # TODO should change `model` to `name` JB - self.model = AzureOpenAIEncoder(model=name) + self.model = AzureOpenAIEncoder(name=name) elif self.type == EncoderType.COHERE: self.model = CohereEncoder(name=name) elif self.type == EncoderType.OPENAI: diff --git a/semantic_router/encoders/zure.py b/semantic_router/encoders/azure_openai.py similarity index 51% rename from semantic_router/encoders/zure.py rename to semantic_router/encoders/azure_openai.py index faab1c90d59336980fa9509620dd9ed2d70502ff..2ca45ada9a2559e657853b8f2f6af434379431fc 100644 --- a/semantic_router/encoders/zure.py +++ b/semantic_router/encoders/azure_openai.py @@ -1,8 +1,9 @@ import os from asyncio import sleep as asleep from time import sleep -from typing import List, Optional, Union +from typing import Any, Callable, Dict, List, Optional, Union +import httpx import openai from openai import OpenAIError from openai._types import NotGiven @@ -24,100 +25,135 @@ class AzureOpenAIEncoder(DenseEncoder): async_client: Optional[openai.AsyncAzureOpenAI] = None dimensions: Union[int, NotGiven] = NotGiven() type: str = "azure" - api_key: Optional[str] = None - deployment_name: Optional[str] = None - azure_endpoint: Optional[str] = None - api_version: Optional[str] = None - model: Optional[str] = None + deployment_name: str | None = None max_retries: int = 3 def __init__( self, - api_key: Optional[str] = None, - deployment_name: Optional[str] = None, - azure_endpoint: Optional[str] = None, - api_version: Optional[str] = None, - model: Optional[str] = None, # TODO we should change to `name` JB + name: Optional[str] = None, + azure_endpoint: str | None = None, + api_version: str | None = None, + api_key: str | None = None, + azure_ad_token: str | None = None, + azure_ad_token_provider: Callable[[], str] | None = None, + http_client_options: Optional[Dict[str, Any]] = None, + deployment_name: str = EncoderDefault.AZURE.value["deployment_name"], score_threshold: float = 0.82, dimensions: Union[int, NotGiven] = NotGiven(), max_retries: int = 3, ): """Initialize the AzureOpenAIEncoder. - :param api_key: The API key for the Azure OpenAI API. - :type api_key: str - :param deployment_name: The name of the deployment to use. - :type deployment_name: str :param azure_endpoint: The endpoint for the Azure OpenAI API. - :type azure_endpoint: str + Example: ``https://accountname.openai.azure.com`` + :type azure_endpoint: str, optional + :param api_version: The version of the API to use. - :type api_version: str - :param model: The model to use. - :type model: str - :param score_threshold: The score threshold for the embeddings. - :type score_threshold: float - :param dimensions: The dimensions of the embeddings. - :type dimensions: int - :param max_retries: The maximum number of retries for the API call. - :type max_retries: int + Example: ``"2025-02-01-preview"`` + :type api_version: str, optional + + :param api_key: The API key for the Azure OpenAI API. + :type api_key: str, optional + + :param azure_ad_token: The Azure AD/Entra ID token for authentication. + https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id + :type azure_ad_token: str, optional + + :param azure_ad_token_provider: A callable function that returns an Azure AD/Entra ID token. + :type azure_ad_token_provider: Callable[[], str], optional + + :param http_client_options: Dictionary of options to configure httpx client + Example: + { + "proxies": "http://proxy.server:8080", + "timeout": 20.0, + "headers": {"Authorization": "Bearer xyz"} + } + :type http_client_options: Dict[str, Any], optional + + :param deployment_name: The name of the model deployment to use. + :type deployment_name: str, optional + + :param score_threshold: The score threshold for filtering embeddings. + Default is ``0.82``. + :type score_threshold: float, optional + + :param dimensions: The number of dimensions for the embeddings. If not given, it defaults to the model's default setting. + :type dimensions: int, optional + + :param max_retries: The maximum number of retries for API calls in case of failures. + Default is ``3``. + :type max_retries: int, optional """ - name = deployment_name if name is None: - name = EncoderDefault.AZURE.value["embedding_model"] + name = deployment_name + if name is None: + name = EncoderDefault.AZURE.value["embedding_model"] super().__init__(name=name, score_threshold=score_threshold) - self.api_key = api_key + + azure_endpoint = azure_endpoint or os.getenv("AZURE_OPENAI_ENDPOINT") + if not azure_endpoint: + raise ValueError("No Azure OpenAI endpoint provided.") + + api_version = api_version or os.getenv("AZURE_OPENAI_API_VERSION") + if not api_version: + raise ValueError("No Azure OpenAI API version provided.") + + if not ( + azure_ad_token + or azure_ad_token_provider + or api_key + or os.getenv("AZURE_OPENAI_API_KEY") + ): + raise ValueError( + "No authentication method provided. Please provide either `azure_ad_token`, " + "`azure_ad_token_provider`, or `api_key`." + ) + + # Only check API Key if no AD token or provider is used + if not azure_ad_token and not azure_ad_token_provider: + api_key = api_key or os.getenv("AZURE_OPENAI_API_KEY") + if not api_key: + raise ValueError("No Azure OpenAI API key provided.") + self.deployment_name = deployment_name - self.azure_endpoint = azure_endpoint - self.api_version = api_version - self.model = model + # set dimensions to support openai embed 3 dimensions param self.dimensions = dimensions - if self.api_key is None: - self.api_key = os.getenv("AZURE_OPENAI_API_KEY") - if self.api_key is None: - raise ValueError("No Azure OpenAI API key provided.") + if max_retries is not None: self.max_retries = max_retries - if self.deployment_name is None: - self.deployment_name = EncoderDefault.AZURE.value["deployment_name"] - # deployment_name may still be None, but it is optional in the API - if self.azure_endpoint is None: - self.azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT") - if self.azure_endpoint is None: - raise ValueError("No Azure OpenAI endpoint provided.") - if self.api_version is None: - self.api_version = os.getenv("AZURE_OPENAI_API_VERSION") - if self.api_version is None: - raise ValueError("No Azure OpenAI API version provided.") - if self.model is None: - self.model = os.getenv("AZURE_OPENAI_MODEL") - if self.model is None: - raise ValueError("No Azure OpenAI model provided.") - assert ( - self.api_key is not None - and self.azure_endpoint is not None - and self.api_version is not None - and self.model is not None + + # Only create HTTP clients if options are provided + sync_http_client = ( + httpx.Client(**http_client_options) if http_client_options else None + ) + async_http_client = ( + httpx.AsyncClient(**http_client_options) if http_client_options else None ) + assert azure_endpoint is not None and self.deployment_name is not None + try: self.client = openai.AzureOpenAI( - azure_deployment=( - str(self.deployment_name) if self.deployment_name else None - ), - api_key=str(self.api_key), - azure_endpoint=str(self.azure_endpoint), - api_version=str(self.api_version), + azure_endpoint=azure_endpoint, + api_version=api_version, + api_key=api_key, + azure_ad_token=azure_ad_token, + azure_ad_token_provider=azure_ad_token_provider, + http_client=sync_http_client, ) self.async_client = openai.AsyncAzureOpenAI( - azure_deployment=( - str(self.deployment_name) if self.deployment_name else None - ), - api_key=str(self.api_key), - azure_endpoint=str(self.azure_endpoint), - api_version=str(self.api_version), + azure_endpoint=azure_endpoint, + api_version=api_version, + api_key=api_key, + azure_ad_token=azure_ad_token, + azure_ad_token_provider=azure_ad_token_provider, + http_client=async_http_client, ) + except Exception as e: + logger.error("OpenAI API client failed to initialize. Error: %s", e) raise ValueError( f"OpenAI API client failed to initialize. Error: {e}" ) from e @@ -139,7 +175,7 @@ class AzureOpenAIEncoder(DenseEncoder): try: embeds = self.client.embeddings.create( input=docs, - model=str(self.model), + model=str(self.deployment_name), dimensions=self.dimensions, ) if embeds.data: @@ -149,12 +185,12 @@ class AzureOpenAIEncoder(DenseEncoder): if self.max_retries != 0 and j < self.max_retries: sleep(2**j) logger.warning( - f"Retrying in {2**j} seconds due to OpenAIError: {e}" + "Retrying in %d seconds due to OpenAIError: %s", 2**j, e ) else: raise except Exception as e: - logger.error(f"Azure OpenAI API call failed. Error: {e}") + logger.error("Azure OpenAI API call failed. Error: %s", e) raise ValueError(f"Azure OpenAI API call failed. Error: {e}") from e if ( @@ -183,23 +219,22 @@ class AzureOpenAIEncoder(DenseEncoder): try: embeds = await self.async_client.embeddings.create( input=docs, - model=str(self.model), + model=str(self.deployment_name), dimensions=self.dimensions, ) if embeds.data: break - except OpenAIError as e: logger.error("Exception occurred", exc_info=True) if self.max_retries != 0 and j < self.max_retries: await asleep(2**j) logger.warning( - f"Retrying in {2**j} seconds due to OpenAIError: {e}" + "Retrying in %d seconds due to OpenAIError: %s", 2**j, e ) else: raise except Exception as e: - logger.error(f"Azure OpenAI API call failed. Error: {e}") + logger.error("Azure OpenAI API call failed. Error: %s", e) raise ValueError(f"Azure OpenAI API call failed. Error: {e}") from e if ( diff --git a/tests/unit/encoders/test_azure.py b/tests/unit/encoders/test_azure.py index 01a495a44ec1f4e58f9ee0a0c45f211d62226d76..15554805e5b2891d72a15fce39cecd0c8f2f9299 100644 --- a/tests/unit/encoders/test_azure.py +++ b/tests/unit/encoders/test_azure.py @@ -23,11 +23,12 @@ def mock_openai_async_client(): @pytest.fixture def openai_encoder(mock_openai_client, mock_openai_async_client): return AzureOpenAIEncoder( + azure_endpoint="https://test-endpoint.openai.azure.com", + api_version="test-version", api_key="test_api_key", + http_client_options={"timeout": 10}, deployment_name="test-deployment", - azure_endpoint="test_endpoint", - api_version="test_version", - model="test_model", + dimensions=1536, max_retries=2, ) @@ -84,7 +85,7 @@ class TestAzureOpenAIEncoder: mocker.patch.object( openai_encoder.client.embeddings, "create", side_effect=responses ) - with patch("semantic_router.encoders.zure.sleep", return_value=None): + with patch("semantic_router.encoders.azure_openai.sleep", return_value=None): embeddings = openai_encoder(["test document"]) assert embeddings == [[0.1, 0.2]] @@ -96,7 +97,7 @@ class TestAzureOpenAIEncoder: "create", side_effect=Exception("Non-OpenAIError"), ) - with patch("semantic_router.encoders.zure.sleep", return_value=None): + with patch("semantic_router.encoders.azure_openai.sleep", return_value=None): with pytest.raises(ValueError) as e: openai_encoder(["test document"]) @@ -124,7 +125,7 @@ class TestAzureOpenAIEncoder: mocker.patch.object( openai_encoder.client.embeddings, "create", side_effect=responses ) - with patch("semantic_router.encoders.zure.sleep", return_value=None): + with patch("semantic_router.encoders.azure_openai.sleep", return_value=None): embeddings = openai_encoder(["test document"]) assert embeddings == [[0.1, 0.2]] @@ -150,7 +151,7 @@ class TestAzureOpenAIEncoder: mocker.patch("time.sleep", return_value=None) # To speed up the test # Patch the sleep function in the encoder module to avoid actual sleep - with patch("semantic_router.encoders.zure.sleep", return_value=None): + with patch("semantic_router.encoders.azure_openai.sleep", return_value=None): result = openai_encoder(["test document"]) assert result == [[0.1, 0.2, 0.3]] @@ -176,7 +177,7 @@ class TestAzureOpenAIEncoder: mocker.patch("time.sleep", return_value=None) # To speed up the test # Patch the sleep function in the encoder module to avoid actual sleep - with patch("semantic_router.encoders.zure.sleep", return_value=None): + with patch("semantic_router.encoders.azure_openai.sleep", return_value=None): with pytest.raises(OpenAIError): openai_encoder(["test document"]) @@ -207,7 +208,7 @@ class TestAzureOpenAIEncoder: mocker.patch("asyncio.sleep", return_value=None) # To speed up the test # Patch the asleep function in the encoder module to avoid actual sleep - with patch("semantic_router.encoders.zure.asleep", return_value=None): + with patch("semantic_router.encoders.azure_openai.asleep", return_value=None): result = await openai_encoder.acall(["test document"]) assert result == [[0.1, 0.2, 0.3]] @@ -226,7 +227,7 @@ class TestAzureOpenAIEncoder: mocker.patch("asyncio.sleep", return_value=None) # To speed up the test # Patch the asleep function in the encoder module to avoid actual sleep - with patch("semantic_router.encoders.zure.asleep", return_value=None): + with patch("semantic_router.encoders.azure_openai.asleep", return_value=None): with pytest.raises(OpenAIError): await openai_encoder.acall(["test document"]) diff --git a/uv.lock b/uv.lock index f859bf7600b00bff08297b00f81ee13955672999..855adaa29460cd9fc2e582c47b21f447937db1dd 100644 --- a/uv.lock +++ b/uv.lock @@ -3326,7 +3326,7 @@ wheels = [ [[package]] name = "semantic-router" -version = "0.1.0" +version = "0.1.1" source = { editable = "." } dependencies = [ { name = "aiohttp" },