Skip to content
Snippets Groups Projects
Unverified Commit 24c3e7bf authored by Siraj R Aizlewood's avatar Siraj R Aizlewood
Browse files

Merge remote-tracking branch 'origin/main' into optimal_route_level_score_thresholds

parents 6494666b ae18c732
No related branches found
No related tags found
No related merge requests found
......@@ -25,5 +25,5 @@ output
node_modules
package-lock.json
package.json
test.ipynb
```
......@@ -142,3 +142,5 @@ Julian Horsey, [Semantic Router superfast decision layer for LLMs and AI agents]
azhar, [Beyond Basic Chatbots: How Semantic Router is Changing the Game](https://medium.com/ai-insights-cobet/beyond-basic-chatbots-how-semantic-router-is-changing-the-game-783dd959a32d), AI Insights @ Medium
Daniel Avila, [Semantic Router: Enhancing Control in LLM Conversations](https://blog.codegpt.co/semantic-router-enhancing-control-in-llm-conversations-68ce905c8d33), CodeGPT @ Medium
Yogendra Sisodia, [Stop Chat-GPT From Going Rogue In Production With Semantic Router](https://medium.com/@scholarly360/stop-chat-gpt-from-going-rogue-in-production-with-semantic-router-937a4768ae19), Medium
\ No newline at end of file
This diff is collapsed.
[tool.poetry]
name = "semantic-router"
version = "0.0.17"
version = "0.0.18"
description = "Super fast semantic router for AI decision making"
authors = [
"James Briggs <james@aurelio.ai>",
......@@ -27,6 +27,7 @@ fastembed = {version = "^0.1.3", optional = true, python = "<3.12"}
torch = {version = "^2.1.2", optional = true}
transformers = {version = "^4.36.2", optional = true}
llama-cpp-python = {version = "^0.2.28", optional = true}
black = "^23.12.1"
[tool.poetry.extras]
hybrid = ["pinecone-text"]
......@@ -36,7 +37,6 @@ local = ["torch", "transformers", "llama-cpp-python"]
[tool.poetry.group.dev.dependencies]
ipykernel = "^6.25.0"
ruff = "^0.1.5"
black = {extras = ["jupyter"], version = "^23.12.0"}
pytest = "^7.4.3"
pytest-mock = "^3.12.0"
pytest-cov = "^4.1.0"
......
......@@ -4,4 +4,4 @@ from semantic_router.route import Route
__all__ = ["RouteLayer", "HybridRouteLayer", "Route", "LayerConfig"]
__version__ = "0.0.17"
__version__ = "0.0.18"
......@@ -199,10 +199,12 @@ class RouteLayer:
"default. Ensure API key is set in OPENAI_API_KEY environment "
"variable."
)
self.llm = OpenAILLM()
route.llm = self.llm
else:
route.llm = self.llm
logger.info(f"LLM `{route.llm}` is chosen")
return route(text)
else:
# if no route passes threshold, return empty route choice
......
......@@ -2,5 +2,6 @@ from semantic_router.llms.base import BaseLLM
from semantic_router.llms.cohere import CohereLLM
from semantic_router.llms.openai import OpenAILLM
from semantic_router.llms.openrouter import OpenRouterLLM
from semantic_router.llms.zure import AzureOpenAILLM
__all__ = ["BaseLLM", "OpenAILLM", "OpenRouterLLM", "CohereLLM"]
__all__ = ["BaseLLM", "OpenAILLM", "OpenRouterLLM", "CohereLLM", "AzureOpenAILLM"]
......@@ -31,7 +31,6 @@ class BaseLLM(BaseModel):
param_types = [
info.split(":")[1].strip().split("=")[0].strip() for info in param_info
]
for name, type_str in zip(param_names, param_types):
if name not in inputs:
logger.error(f"Input {name} missing from query")
......@@ -76,12 +75,14 @@ class BaseLLM(BaseModel):
"""
llm_input = [Message(role="user", content=prompt)]
output = self(llm_input)
if not output:
raise Exception("No output generated for extract function input")
output = output.replace("'", '"').strip().rstrip(",")
logger.info(f"LLM output: {output}")
function_inputs = json.loads(output)
logger.info(f"Function inputs: {function_inputs}")
if not self._is_valid_inputs(function_inputs, function_schema):
raise ValueError("Invalid inputs")
return function_inputs
import os
from typing import List, Optional
import openai
from semantic_router.llms import BaseLLM
from semantic_router.schema import Message
from semantic_router.utils.logger import logger
class AzureOpenAILLM(BaseLLM):
client: Optional[openai.AzureOpenAI]
temperature: Optional[float]
max_tokens: Optional[int]
def __init__(
self,
name: Optional[str] = None,
openai_api_key: Optional[str] = None,
azure_endpoint: Optional[str] = None,
temperature: float = 0.01,
max_tokens: int = 200,
api_version="2023-07-01-preview",
):
if name is None:
name = os.getenv("OPENAI_CHAT_MODEL_NAME", "gpt-3.5-turbo")
super().__init__(name=name)
api_key = openai_api_key or os.getenv("AZURE_OPENAI_API_KEY")
if api_key is None:
raise ValueError("AzureOpenAI API key cannot be 'None'.")
azure_endpoint = azure_endpoint or os.getenv("AZURE_OPENAI_ENDPOINT")
if azure_endpoint is None:
raise ValueError("Azure endpoint API key cannot be 'None'.")
try:
self.client = openai.AzureOpenAI(
api_key=api_key, azure_endpoint=azure_endpoint, api_version=api_version
)
except Exception as e:
raise ValueError(f"AzureOpenAI API client failed to initialize. Error: {e}")
self.temperature = temperature
self.max_tokens = max_tokens
def __call__(self, messages: List[Message]) -> str:
if self.client is None:
raise ValueError("AzureOpenAI client is not initialized.")
try:
completion = self.client.chat.completions.create(
model=self.name,
messages=[m.to_openai() for m in messages],
temperature=self.temperature,
max_tokens=self.max_tokens,
)
output = completion.choices[0].message.content
if not output:
raise Exception("No output generated")
return output
except Exception as e:
logger.error(f"LLM error: {e}")
raise Exception(f"LLM error: {e}")
......@@ -47,6 +47,7 @@ class Route(BaseModel):
score_threshold: Optional[float] = None
def __call__(self, query: str) -> RouteChoice:
logger.info(f"this is the llm passed to route object {self.llm}")
if self.function_schema:
if not self.llm:
raise ValueError(
......@@ -97,29 +98,29 @@ class Route(BaseModel):
logger.info("Generating dynamic route...")
prompt = f"""
You are tasked to generate a JSON configuration based on the provided
function schema. Please follow the template below, no other tokens allowed:
<config>
{{
"name": "<function_name>",
"utterances": [
"<example_utterance_1>",
"<example_utterance_2>",
"<example_utterance_3>",
"<example_utterance_4>",
"<example_utterance_5>"]
}}
</config>
Only include the "name" and "utterances" keys in your answer.
The "name" should match the function name and the "utterances"
should comprise a list of 5 example phrases that could be used to invoke
the function. Use real values instead of placeholders.
Input schema:
{function_schema}
"""
You are tasked to generate a JSON configuration based on the provided
function schema. Please follow the template below, no other tokens allowed:
<config>
{{
"name": "<function_name>",
"utterances": [
"<example_utterance_1>",
"<example_utterance_2>",
"<example_utterance_3>",
"<example_utterance_4>",
"<example_utterance_5>"]
}}
</config>
Only include the "name" and "utterances" keys in your answer.
The "name" should match the function name and the "utterances"
should comprise a list of 5 example phrases that could be used to invoke
the function. Use real values instead of placeholders.
Input schema:
{function_schema}
"""
llm_input = [Message(role="user", content=prompt)]
output = llm(llm_input)
......
from enum import Enum
from typing import Dict, List, Literal, Optional
from typing import List, Literal, Optional
from pydantic.v1 import BaseModel
from pydantic.v1.dataclasses import dataclass
......@@ -10,7 +10,7 @@ from semantic_router.encoders import (
FastEmbedEncoder,
OpenAIEncoder,
)
from semantic_router.utils.splitters import semantic_splitter
from semantic_router.utils.splitters import semantic_splitter, DocumentSplit
class EncoderType(Enum):
......@@ -77,7 +77,7 @@ class Conversation(BaseModel):
split_method: Literal[
"consecutive_similarity_drop", "cumulative_similarity_drop"
] = "consecutive_similarity_drop",
) -> Dict[str, List[str]]:
) -> list[DocumentSplit]:
docs = [f"{m.role}: {m.content}" for m in self.messages]
return semantic_splitter(
encoder=encoder, docs=docs, threshold=threshold, split_method=split_method
......
from typing import Dict, List, Literal
from typing import List, Literal, Optional
import numpy as np
from pydantic.v1 import BaseModel
from semantic_router.encoders import BaseEncoder
class DocumentSplit(BaseModel):
docs: List[str]
is_triggered: bool = False
triggered_score: Optional[float] = None
def semantic_splitter(
encoder: BaseEncoder,
docs: List[str],
......@@ -12,7 +19,7 @@ def semantic_splitter(
split_method: Literal[
"consecutive_similarity_drop", "cumulative_similarity_drop"
] = "consecutive_similarity_drop",
) -> Dict[str, List[str]]:
) -> List[DocumentSplit]:
"""
Splits a list of documents base on semantic similarity changes.
......@@ -33,7 +40,7 @@ def semantic_splitter(
Dict[str, List[str]]: Splits with corresponding documents.
"""
total_docs = len(docs)
splits = {}
splits = []
curr_split_start_idx = 0
curr_split_num = 1
......@@ -43,8 +50,15 @@ def semantic_splitter(
sim_matrix = np.matmul(norm_embeds, norm_embeds.T)
for idx in range(1, total_docs):
if idx < len(sim_matrix) and sim_matrix[idx - 1][idx] < threshold:
splits[f"split {curr_split_num}"] = docs[curr_split_start_idx:idx]
curr_sim_score = sim_matrix[idx - 1][idx]
if idx < len(sim_matrix) and curr_sim_score < threshold:
splits.append(
DocumentSplit(
docs=docs[curr_split_start_idx:idx],
is_triggered=True,
triggered_score=curr_sim_score,
)
)
curr_split_start_idx = idx
curr_split_num += 1
......@@ -57,15 +71,19 @@ def semantic_splitter(
curr_split_docs_embed = encoder([curr_split_docs])[0]
next_doc_embed = encoder([next_doc])[0]
similarity = np.dot(curr_split_docs_embed, next_doc_embed) / (
curr_sim_score = np.dot(curr_split_docs_embed, next_doc_embed) / (
np.linalg.norm(curr_split_docs_embed)
* np.linalg.norm(next_doc_embed)
)
if similarity < threshold:
splits[f"split {curr_split_num}"] = docs[
curr_split_start_idx : idx + 1
]
if curr_sim_score < threshold:
splits.append(
DocumentSplit(
docs=docs[curr_split_start_idx : idx + 1],
is_triggered=True,
triggered_score=curr_sim_score,
)
)
curr_split_start_idx = idx + 1
curr_split_num += 1
......@@ -75,5 +93,5 @@ def semantic_splitter(
" 'cumulative_similarity_drop'."
)
splits[f"split {curr_split_num}"] = docs[curr_split_start_idx:]
splits.append(DocumentSplit(docs=docs[curr_split_start_idx:]))
return splits
import pytest
from semantic_router.llms import AzureOpenAILLM
from semantic_router.schema import Message
@pytest.fixture
def azure_openai_llm(mocker):
mocker.patch("openai.Client")
return AzureOpenAILLM(openai_api_key="test_api_key", azure_endpoint="test_endpoint")
class TestOpenAILLM:
def test_azure_openai_llm_init_with_api_key(self, azure_openai_llm):
assert azure_openai_llm.client is not None, "Client should be initialized"
assert (
azure_openai_llm.name == "gpt-3.5-turbo"
), "Default name not set correctly"
def test_azure_openai_llm_init_success(self, mocker):
mocker.patch("os.getenv", return_value="fake-api-key")
llm = AzureOpenAILLM()
assert llm.client is not None
def test_azure_openai_llm_init_without_api_key(self, mocker):
mocker.patch("os.getenv", return_value=None)
with pytest.raises(ValueError) as _:
AzureOpenAILLM()
# def test_azure_openai_llm_init_without_azure_endpoint(self, mocker):
# mocker.patch("os.getenv", side_effect=[None, "fake-api-key"])
# with pytest.raises(ValueError) as e:
# AzureOpenAILLM(openai_api_key="test_api_key")
# assert "Azure endpoint API key cannot be 'None'." in str(e.value)
def test_azure_openai_llm_init_without_azure_endpoint(self, mocker):
mocker.patch(
"os.getenv",
side_effect=lambda key, default=None: {
"OPENAI_CHAT_MODEL_NAME": "test-model-name"
}.get(key, default),
)
with pytest.raises(ValueError) as e:
AzureOpenAILLM(openai_api_key="test_api_key")
assert "Azure endpoint API key cannot be 'None'" in str(e.value)
def test_azure_openai_llm_call_uninitialized_client(self, azure_openai_llm):
# Set the client to None to simulate an uninitialized client
azure_openai_llm.client = None
with pytest.raises(ValueError) as e:
llm_input = [Message(role="user", content="test")]
azure_openai_llm(llm_input)
assert "AzureOpenAI client is not initialized." in str(e.value)
def test_azure_openai_llm_init_exception(self, mocker):
mocker.patch("os.getenv", return_value="fake-api-key")
mocker.patch(
"openai.AzureOpenAI", side_effect=Exception("Initialization error")
)
with pytest.raises(ValueError) as e:
AzureOpenAILLM()
assert (
"AzureOpenAI API client failed to initialize. Error: Initialization error"
in str(e.value)
)
def test_azure_openai_llm_temperature_max_tokens_initialization(self):
test_temperature = 0.5
test_max_tokens = 100
azure_llm = AzureOpenAILLM(
openai_api_key="test_api_key",
azure_endpoint="test_endpoint",
temperature=test_temperature,
max_tokens=test_max_tokens,
)
assert (
azure_llm.temperature == test_temperature
), "Temperature not set correctly"
assert azure_llm.max_tokens == test_max_tokens, "Max tokens not set correctly"
def test_azure_openai_llm_call_success(self, azure_openai_llm, mocker):
mock_completion = mocker.MagicMock()
mock_completion.choices[0].message.content = "test"
mocker.patch("os.getenv", return_value="fake-api-key")
mocker.patch.object(
azure_openai_llm.client.chat.completions,
"create",
return_value=mock_completion,
)
llm_input = [Message(role="user", content="test")]
output = azure_openai_llm(llm_input)
assert output == "test"
......@@ -17,7 +17,8 @@ def test_semantic_splitter_consecutive_similarity_drop():
result = semantic_splitter(mock_encoder, docs, threshold, split_method)
assert result == {"split 1": ["doc1", "doc2", "doc3"], "split 2": ["doc4", "doc5"]}
assert result[0].docs == ["doc1", "doc2", "doc3"]
assert result[1].docs == ["doc4", "doc5"]
def test_semantic_splitter_cumulative_similarity_drop():
......@@ -33,7 +34,8 @@ def test_semantic_splitter_cumulative_similarity_drop():
result = semantic_splitter(mock_encoder, docs, threshold, split_method)
assert result == {"split 1": ["doc1", "doc2"], "split 2": ["doc3", "doc4", "doc5"]}
assert result[0].docs == ["doc1", "doc2"]
assert result[1].docs == ["doc3", "doc4", "doc5"]
def test_semantic_splitter_invalid_method():
......@@ -62,7 +64,5 @@ def test_split_by_topic():
encoder=mock_encoder, threshold=0.5, split_method="consecutive_similarity_drop"
)
assert result == {
"split 1": ["User: What is the latest news?"],
"split 2": ["Bot: How is the weather today?"],
}
assert result[0].docs == ["User: What is the latest news?"]
assert result[1].docs == ["Bot: How is the weather today?"]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment