Merge remote-tracking branch 'origin/main' into optimal_route_level_score_thresholds

24c3e7bf · Siraj R Aizlewood · 6494666b · ae18c732 · 24c3e7bf · 24c3e7bf
Unverified Commit 24c3e7bf authored 1 year ago by Siraj R Aizlewood
--- a/.gitignore
+++ b/.gitignore
@@ -25,5 +25,5 @@ output
 node_modules
 package-lock.json
 package.json
-
+test.ipynb
 ```
--- a/README.md
+++ b/README.md
@@ -142,3 +142,5 @@ Julian Horsey, [Semantic Router superfast decision layer for LLMs and AI agents]
 azhar, [Beyond Basic Chatbots: How Semantic Router is Changing the Game](https://medium.com/ai-insights-cobet/beyond-basic-chatbots-how-semantic-router-is-changing-the-game-783dd959a32d), AI Insights @ Medium

 Daniel Avila, [Semantic Router: Enhancing Control in LLM Conversations](https://blog.codegpt.co/semantic-router-enhancing-control-in-llm-conversations-68ce905c8d33), CodeGPT @ Medium
+
+Yogendra Sisodia, [Stop Chat-GPT From Going Rogue In Production With Semantic Router](https://medium.com/@scholarly360/stop-chat-gpt-from-going-rogue-in-production-with-semantic-router-937a4768ae19), Medium
\ No newline at end of file
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
 [tool.poetry]
 name = "semantic-router"
-version = "0.0.17"
+version = "0.0.18"
 description = "Super fast semantic router for AI decision making"
 authors = [
    "James Briggs <james@aurelio.ai>",
@@ -27,6 +27,7 @@ fastembed = {version = "^0.1.3", optional = true, python = "<3.12"}
 torch = {version = "^2.1.2", optional = true}
 transformers = {version = "^4.36.2", optional = true}
 llama-cpp-python = {version = "^0.2.28", optional = true}
+black = "^23.12.1"

 [tool.poetry.extras]
 hybrid = ["pinecone-text"]
@@ -36,7 +37,6 @@ local = ["torch", "transformers", "llama-cpp-python"]
 [tool.poetry.group.dev.dependencies]
 ipykernel = "^6.25.0"
 ruff = "^0.1.5"
-black = {extras = ["jupyter"], version = "^23.12.0"}
 pytest = "^7.4.3"
 pytest-mock = "^3.12.0"
 pytest-cov = "^4.1.0"

--- a/semantic_router/__init__.py
+++ b/semantic_router/__init__.py
@@ -4,4 +4,4 @@ from semantic_router.route import Route

 __all__ = ["RouteLayer", "HybridRouteLayer", "Route", "LayerConfig"]

-__version__ = "0.0.17"
+__version__ = "0.0.18"
--- a/semantic_router/layer.py
+++ b/semantic_router/layer.py
@@ -199,10 +199,12 @@ class RouteLayer:
                        "default. Ensure API key is set in OPENAI_API_KEY environment "
                        "variable."
                    )
+
                    self.llm = OpenAILLM()
                    route.llm = self.llm
                else:
                    route.llm = self.llm
+            logger.info(f"LLM  `{route.llm}` is chosen")
            return route(text)
        else:
            # if no route passes threshold, return empty route choice

--- a/semantic_router/llms/__init__.py
+++ b/semantic_router/llms/__init__.py
@@ -2,5 +2,6 @@ from semantic_router.llms.base import BaseLLM
 from semantic_router.llms.cohere import CohereLLM
 from semantic_router.llms.openai import OpenAILLM
 from semantic_router.llms.openrouter import OpenRouterLLM
+from semantic_router.llms.zure import AzureOpenAILLM

-__all__ = ["BaseLLM", "OpenAILLM", "OpenRouterLLM", "CohereLLM"]
+__all__ = ["BaseLLM", "OpenAILLM", "OpenRouterLLM", "CohereLLM", "AzureOpenAILLM"]
--- a/semantic_router/llms/base.py
+++ b/semantic_router/llms/base.py
@@ -31,7 +31,6 @@ class BaseLLM(BaseModel):
            param_types = [
                info.split(":")[1].strip().split("=")[0].strip() for info in param_info
            ]
-
            for name, type_str in zip(param_names, param_types):
                if name not in inputs:
                    logger.error(f"Input {name} missing from query")
@@ -76,12 +75,14 @@ class BaseLLM(BaseModel):
        """
        llm_input = [Message(role="user", content=prompt)]
        output = self(llm_input)
+
        if not output:
            raise Exception("No output generated for extract function input")

        output = output.replace("'", '"').strip().rstrip(",")
-
+        logger.info(f"LLM output: {output}")
        function_inputs = json.loads(output)
+        logger.info(f"Function inputs: {function_inputs}")
        if not self._is_valid_inputs(function_inputs, function_schema):
            raise ValueError("Invalid inputs")
        return function_inputs
--- a/semantic_router/llms/zure.py
+++ b/semantic_router/llms/zure.py
+import os
+from typing import List, Optional
+
+import openai
+
+from semantic_router.llms import BaseLLM
+from semantic_router.schema import Message
+from semantic_router.utils.logger import logger
+
+
+class AzureOpenAILLM(BaseLLM):
+    client: Optional[openai.AzureOpenAI]
+    temperature: Optional[float]
+    max_tokens: Optional[int]
+
+    def __init__(
+        self,
+        name: Optional[str] = None,
+        openai_api_key: Optional[str] = None,
+        azure_endpoint: Optional[str] = None,
+        temperature: float = 0.01,
+        max_tokens: int = 200,
+        api_version="2023-07-01-preview",
+    ):
+        if name is None:
+            name = os.getenv("OPENAI_CHAT_MODEL_NAME", "gpt-3.5-turbo")
+        super().__init__(name=name)
+        api_key = openai_api_key or os.getenv("AZURE_OPENAI_API_KEY")
+        if api_key is None:
+            raise ValueError("AzureOpenAI API key cannot be 'None'.")
+        azure_endpoint = azure_endpoint or os.getenv("AZURE_OPENAI_ENDPOINT")
+        if azure_endpoint is None:
+            raise ValueError("Azure endpoint API key cannot be 'None'.")
+        try:
+            self.client = openai.AzureOpenAI(
+                api_key=api_key, azure_endpoint=azure_endpoint, api_version=api_version
+            )
+        except Exception as e:
+            raise ValueError(f"AzureOpenAI API client failed to initialize. Error: {e}")
+        self.temperature = temperature
+        self.max_tokens = max_tokens
+
+    def __call__(self, messages: List[Message]) -> str:
+        if self.client is None:
+            raise ValueError("AzureOpenAI client is not initialized.")
+        try:
+            completion = self.client.chat.completions.create(
+                model=self.name,
+                messages=[m.to_openai() for m in messages],
+                temperature=self.temperature,
+                max_tokens=self.max_tokens,
+            )
+
+            output = completion.choices[0].message.content
+
+            if not output:
+                raise Exception("No output generated")
+            return output
+        except Exception as e:
+            logger.error(f"LLM error: {e}")
+            raise Exception(f"LLM error: {e}")
--- a/semantic_router/route.py
+++ b/semantic_router/route.py
@@ -47,6 +47,7 @@ class Route(BaseModel):
    score_threshold: Optional[float] = None

    def __call__(self, query: str) -> RouteChoice:
+        logger.info(f"this is the llm passed to route object {self.llm}")
        if self.function_schema:
            if not self.llm:
                raise ValueError(
@@ -97,29 +98,29 @@ class Route(BaseModel):
        logger.info("Generating dynamic route...")

        prompt = f"""
-You are tasked to generate a JSON configuration based on the provided
-function schema. Please follow the template below, no other tokens allowed:
-
-<config>
-{{
-    "name": "<function_name>",
-    "utterances": [
-        "<example_utterance_1>",
-        "<example_utterance_2>",
-        "<example_utterance_3>",
-        "<example_utterance_4>",
-        "<example_utterance_5>"]
-}}
-</config>
-
-Only include the "name" and "utterances" keys in your answer.
-The "name" should match the function name and the "utterances"
-should comprise a list of 5 example phrases that could be used to invoke
-the function. Use real values instead of placeholders.
-
-Input schema:
-{function_schema}
-"""
+        You are tasked to generate a JSON configuration based on the provided
+        function schema. Please follow the template below, no other tokens allowed:
+
+        <config>
+        {{
+            "name": "<function_name>",
+            "utterances": [
+                "<example_utterance_1>",
+                "<example_utterance_2>",
+                "<example_utterance_3>",
+                "<example_utterance_4>",
+                "<example_utterance_5>"]
+        }}
+        </config>
+
+        Only include the "name" and "utterances" keys in your answer.
+        The "name" should match the function name and the "utterances"
+        should comprise a list of 5 example phrases that could be used to invoke
+        the function. Use real values instead of placeholders.
+
+        Input schema:
+        {function_schema}
+        """

        llm_input = [Message(role="user", content=prompt)]
        output = llm(llm_input)

--- a/semantic_router/schema.py
+++ b/semantic_router/schema.py
 from enum import Enum
-from typing import Dict, List, Literal, Optional
+from typing import List, Literal, Optional

 from pydantic.v1 import BaseModel
 from pydantic.v1.dataclasses import dataclass
@@ -10,7 +10,7 @@ from semantic_router.encoders import (
    FastEmbedEncoder,
    OpenAIEncoder,
 )
-from semantic_router.utils.splitters import semantic_splitter
+from semantic_router.utils.splitters import semantic_splitter, DocumentSplit


 class EncoderType(Enum):
@@ -77,7 +77,7 @@ class Conversation(BaseModel):
        split_method: Literal[
            "consecutive_similarity_drop", "cumulative_similarity_drop"
        ] = "consecutive_similarity_drop",
-    ) -> Dict[str, List[str]]:
+    ) -> list[DocumentSplit]:
        docs = [f"{m.role}: {m.content}" for m in self.messages]
        return semantic_splitter(
            encoder=encoder, docs=docs, threshold=threshold, split_method=split_method

--- a/semantic_router/utils/splitters.py
+++ b/semantic_router/utils/splitters.py
-from typing import Dict, List, Literal
+from typing import List, Literal, Optional

 import numpy as np
+from pydantic.v1 import BaseModel

 from semantic_router.encoders import BaseEncoder


+class DocumentSplit(BaseModel):
+    docs: List[str]
+    is_triggered: bool = False
+    triggered_score: Optional[float] = None
+
+
 def semantic_splitter(
    encoder: BaseEncoder,
    docs: List[str],
@@ -12,7 +19,7 @@ def semantic_splitter(
    split_method: Literal[
        "consecutive_similarity_drop", "cumulative_similarity_drop"
    ] = "consecutive_similarity_drop",
-) -> Dict[str, List[str]]:
+) -> List[DocumentSplit]:
    """
    Splits a list of documents base on semantic similarity changes.

@@ -33,7 +40,7 @@ def semantic_splitter(
        Dict[str, List[str]]: Splits with corresponding documents.
    """
    total_docs = len(docs)
-    splits = {}
+    splits = []
    curr_split_start_idx = 0
    curr_split_num = 1

@@ -43,8 +50,15 @@ def semantic_splitter(
        sim_matrix = np.matmul(norm_embeds, norm_embeds.T)

        for idx in range(1, total_docs):
-            if idx < len(sim_matrix) and sim_matrix[idx - 1][idx] < threshold:
-                splits[f"split {curr_split_num}"] = docs[curr_split_start_idx:idx]
+            curr_sim_score = sim_matrix[idx - 1][idx]
+            if idx < len(sim_matrix) and curr_sim_score < threshold:
+                splits.append(
+                    DocumentSplit(
+                        docs=docs[curr_split_start_idx:idx],
+                        is_triggered=True,
+                        triggered_score=curr_sim_score,
+                    )
+                )
                curr_split_start_idx = idx
                curr_split_num += 1

@@ -57,15 +71,19 @@ def semantic_splitter(
                curr_split_docs_embed = encoder([curr_split_docs])[0]
                next_doc_embed = encoder([next_doc])[0]

-                similarity = np.dot(curr_split_docs_embed, next_doc_embed) / (
+                curr_sim_score = np.dot(curr_split_docs_embed, next_doc_embed) / (
                    np.linalg.norm(curr_split_docs_embed)
                    * np.linalg.norm(next_doc_embed)
                )

-                if similarity < threshold:
-                    splits[f"split {curr_split_num}"] = docs[
-                        curr_split_start_idx : idx + 1
-                    ]
+                if curr_sim_score < threshold:
+                    splits.append(
+                        DocumentSplit(
+                            docs=docs[curr_split_start_idx : idx + 1],
+                            is_triggered=True,
+                            triggered_score=curr_sim_score,
+                        )
+                    )
                    curr_split_start_idx = idx + 1
                    curr_split_num += 1

@@ -75,5 +93,5 @@ def semantic_splitter(
            " 'cumulative_similarity_drop'."
        )

-    splits[f"split {curr_split_num}"] = docs[curr_split_start_idx:]
+    splits.append(DocumentSplit(docs=docs[curr_split_start_idx:]))
    return splits
--- a/tests/unit/llms/test_llm_azure_openai.py
+++ b/tests/unit/llms/test_llm_azure_openai.py
+import pytest
+
+from semantic_router.llms import AzureOpenAILLM
+from semantic_router.schema import Message
+
+
+@pytest.fixture
+def azure_openai_llm(mocker):
+    mocker.patch("openai.Client")
+    return AzureOpenAILLM(openai_api_key="test_api_key", azure_endpoint="test_endpoint")
+
+
+class TestOpenAILLM:
+    def test_azure_openai_llm_init_with_api_key(self, azure_openai_llm):
+        assert azure_openai_llm.client is not None, "Client should be initialized"
+        assert (
+            azure_openai_llm.name == "gpt-3.5-turbo"
+        ), "Default name not set correctly"
+
+    def test_azure_openai_llm_init_success(self, mocker):
+        mocker.patch("os.getenv", return_value="fake-api-key")
+        llm = AzureOpenAILLM()
+        assert llm.client is not None
+
+    def test_azure_openai_llm_init_without_api_key(self, mocker):
+        mocker.patch("os.getenv", return_value=None)
+        with pytest.raises(ValueError) as _:
+            AzureOpenAILLM()
+
+    # def test_azure_openai_llm_init_without_azure_endpoint(self, mocker):
+    #     mocker.patch("os.getenv", side_effect=[None, "fake-api-key"])
+    #     with pytest.raises(ValueError) as e:
+    #         AzureOpenAILLM(openai_api_key="test_api_key")
+    #     assert "Azure endpoint API key cannot be 'None'." in str(e.value)
+
+    def test_azure_openai_llm_init_without_azure_endpoint(self, mocker):
+        mocker.patch(
+            "os.getenv",
+            side_effect=lambda key, default=None: {
+                "OPENAI_CHAT_MODEL_NAME": "test-model-name"
+            }.get(key, default),
+        )
+        with pytest.raises(ValueError) as e:
+            AzureOpenAILLM(openai_api_key="test_api_key")
+        assert "Azure endpoint API key cannot be 'None'" in str(e.value)
+
+    def test_azure_openai_llm_call_uninitialized_client(self, azure_openai_llm):
+        # Set the client to None to simulate an uninitialized client
+        azure_openai_llm.client = None
+        with pytest.raises(ValueError) as e:
+            llm_input = [Message(role="user", content="test")]
+            azure_openai_llm(llm_input)
+        assert "AzureOpenAI client is not initialized." in str(e.value)
+
+    def test_azure_openai_llm_init_exception(self, mocker):
+        mocker.patch("os.getenv", return_value="fake-api-key")
+        mocker.patch(
+            "openai.AzureOpenAI", side_effect=Exception("Initialization error")
+        )
+        with pytest.raises(ValueError) as e:
+            AzureOpenAILLM()
+        assert (
+            "AzureOpenAI API client failed to initialize. Error: Initialization error"
+            in str(e.value)
+        )
+
+    def test_azure_openai_llm_temperature_max_tokens_initialization(self):
+        test_temperature = 0.5
+        test_max_tokens = 100
+        azure_llm = AzureOpenAILLM(
+            openai_api_key="test_api_key",
+            azure_endpoint="test_endpoint",
+            temperature=test_temperature,
+            max_tokens=test_max_tokens,
+        )
+
+        assert (
+            azure_llm.temperature == test_temperature
+        ), "Temperature not set correctly"
+        assert azure_llm.max_tokens == test_max_tokens, "Max tokens not set correctly"
+
+    def test_azure_openai_llm_call_success(self, azure_openai_llm, mocker):
+        mock_completion = mocker.MagicMock()
+        mock_completion.choices[0].message.content = "test"
+
+        mocker.patch("os.getenv", return_value="fake-api-key")
+        mocker.patch.object(
+            azure_openai_llm.client.chat.completions,
+            "create",
+            return_value=mock_completion,
+        )
+        llm_input = [Message(role="user", content="test")]
+        output = azure_openai_llm(llm_input)
+        assert output == "test"
--- a/tests/unit/test_splitters.py
+++ b/tests/unit/test_splitters.py
@@ -17,7 +17,8 @@ def test_semantic_splitter_consecutive_similarity_drop():

    result = semantic_splitter(mock_encoder, docs, threshold, split_method)

-    assert result == {"split 1": ["doc1", "doc2", "doc3"], "split 2": ["doc4", "doc5"]}
+    assert result[0].docs == ["doc1", "doc2", "doc3"]
+    assert result[1].docs == ["doc4", "doc5"]


 def test_semantic_splitter_cumulative_similarity_drop():
@@ -33,7 +34,8 @@ def test_semantic_splitter_cumulative_similarity_drop():

    result = semantic_splitter(mock_encoder, docs, threshold, split_method)

-    assert result == {"split 1": ["doc1", "doc2"], "split 2": ["doc3", "doc4", "doc5"]}
+    assert result[0].docs == ["doc1", "doc2"]
+    assert result[1].docs == ["doc3", "doc4", "doc5"]


 def test_semantic_splitter_invalid_method():
@@ -62,7 +64,5 @@ def test_split_by_topic():
        encoder=mock_encoder, threshold=0.5, split_method="consecutive_similarity_drop"
    )

-    assert result == {
-        "split 1": ["User: What is the latest news?"],
-        "split 2": ["Bot: How is the weather today?"],
-    }
+    assert result[0].docs == ["User: What is the latest news?"]
+    assert result[1].docs == ["Bot: How is the weather today?"]