Skip to content
Snippets Groups Projects
Unverified Commit 3530c854 authored by James Briggs's avatar James Briggs
Browse files

restructure for how we use config object and fix circular imports

parent 550c64e8
No related branches found
No related tags found
No related merge requests found
from .hybrid_layer import HybridRouteLayer from semantic_router.hybrid_layer import HybridRouteLayer
from .layer import RouteLayer from semantic_router.layer import RouteLayer
from .route import Route, RouteConfig from semantic_router.route import Route, RouteConfig
__all__ = ["RouteLayer", "HybridRouteLayer", "Route", "RouteConfig"] __all__ = ["RouteLayer", "HybridRouteLayer", "Route", "RouteConfig"]
from .base import BaseEncoder from semantic_router.encoders.base import BaseEncoder
from .bm25 import BM25Encoder from semantic_router.encoders.bm25 import BM25Encoder
from .cohere import CohereEncoder from semantic_router.encoders.cohere import CohereEncoder
from .openai import OpenAIEncoder from semantic_router.encoders.openai import OpenAIEncoder
__all__ = ["BaseEncoder", "CohereEncoder", "OpenAIEncoder", "BM25Encoder"] __all__ = ["BaseEncoder", "CohereEncoder", "OpenAIEncoder", "BM25Encoder"]
...@@ -9,7 +9,7 @@ from semantic_router.encoders import ( ...@@ -9,7 +9,7 @@ from semantic_router.encoders import (
) )
from semantic_router.utils.logger import logger from semantic_router.utils.logger import logger
from .route import Route from semantic_router.route import Route
class HybridRouteLayer: class HybridRouteLayer:
......
import json import json
import os
import numpy as np import numpy as np
import yaml import yaml
...@@ -11,7 +12,121 @@ from semantic_router.encoders import ( ...@@ -11,7 +12,121 @@ from semantic_router.encoders import (
from semantic_router.linear import similarity_matrix, top_scores from semantic_router.linear import similarity_matrix, top_scores
from semantic_router.utils.logger import logger from semantic_router.utils.logger import logger
from .route import Route from semantic_router.route import Route
from semantic_router.schema import Encoder, EncoderType, RouteChoice
def is_valid(route_config: str) -> bool:
try:
output_json = json.loads(route_config)
required_keys = ["name", "utterances"]
if isinstance(output_json, list):
for item in output_json:
missing_keys = [key for key in required_keys if key not in item]
if missing_keys:
logger.warning(
f"Missing keys in route config: {', '.join(missing_keys)}"
)
return False
return True
else:
missing_keys = [key for key in required_keys if key not in output_json]
if missing_keys:
logger.warning(
f"Missing keys in route config: {', '.join(missing_keys)}"
)
return False
else:
return True
except json.JSONDecodeError as e:
logger.error(e)
return False
class LayerConfig:
"""
Generates a LayerConfig object that can be used for initializing a
RouteLayer.
"""
routes: list[Route] = []
def __init__(
self,
routes: list[Route] = [],
encoder_type: EncoderType = "openai",
encoder_name: str | None = None,
):
self.encoder_type = encoder_type
if encoder_name is None:
# if encoder_name is not provided, use the default encoder for type
if encoder_type == EncoderType.OPENAI:
encoder_name = "text-embedding-ada-002"
elif encoder_type == EncoderType.COHERE:
encoder_name = "embed-english-v3.0"
elif encoder_type == EncoderType.HUGGINGFACE:
raise NotImplementedError
logger.info(f"Using default {encoder_type} encoder: {encoder_name}")
self.encoder_name = encoder_name
self.routes = routes
@classmethod
def from_file(cls, path: str):
"""Load the routes from a file in JSON or YAML format"""
logger.info(f"Loading route config from {path}")
_, ext = os.path.splitext(path)
with open(path, "r") as f:
if ext == ".json":
routes = json.load(f)
elif ext in [".yaml", ".yml"]:
routes = yaml.safe_load(f)
else:
raise ValueError(
"Unsupported file type. Only .json and .yaml are supported"
)
route_config_str = json.dumps(routes)
if is_valid(route_config_str):
routes = [Route.from_dict(route) for route in routes]
return cls(routes=routes)
else:
raise Exception("Invalid config JSON or YAML")
def to_dict(self):
return [route.to_dict() for route in self.routes]
def to_file(self, path: str):
"""Save the routes to a file in JSON or YAML format"""
logger.info(f"Saving route config to {path}")
_, ext = os.path.splitext(path)
with open(path, "w") as f:
if ext == ".json":
json.dump(self.to_dict(), f)
elif ext in [".yaml", ".yml"]:
yaml.safe_dump(self.to_dict(), f)
else:
raise ValueError(
"Unsupported file type. Only .json and .yaml are supported"
)
def add(self, route: Route):
self.routes.append(route)
logger.info(f"Added route `{route.name}`")
def get(self, name: str) -> Route | None:
for route in self.routes:
if route.name == name:
return route
logger.error(f"Route `{name}` not found")
return None
def remove(self, name: str):
if name not in [route.name for route in self.routes]:
logger.error(f"Route `{name}` not found")
else:
self.routes = [route for route in self.routes if route.name != name]
logger.info(f"Removed route `{name}`")
class RouteLayer: class RouteLayer:
...@@ -34,28 +149,52 @@ class RouteLayer: ...@@ -34,28 +149,52 @@ class RouteLayer:
# initialize index now # initialize index now
self._add_routes(routes=routes) self._add_routes(routes=routes)
def __call__(self, text: str) -> str | None: def __call__(self, text: str) -> RouteChoice:
results = self._query(text) results = self._query(text)
top_class, top_class_scores = self._semantic_classify(results) top_class, top_class_scores = self._semantic_classify(results)
passed = self._pass_threshold(top_class_scores, self.score_threshold) passed = self._pass_threshold(top_class_scores, self.score_threshold)
if passed: if passed:
return top_class # get chosen route object
route = [route for route in self.routes if route.name == top_class][0]
return route(text)
else: else:
return None # if no route passes threshold, return empty route choice
return RouteChoice()
@classmethod @classmethod
def from_json(cls, file_path: str): def from_json(cls, file_path: str):
with open(file_path, "r") as f: config = LayerConfig.from_file(file_path)
routes_data = json.load(f) encoder = Encoder(
routes = [Route.from_dict(route_data) for route_data in routes_data] encoder_type=config.encoder_type,
return cls(routes=routes) encoder_name=config.encoder_name
)
return cls(
encoder=encoder,
routes=config.routes
)
@classmethod @classmethod
def from_yaml(cls, file_path: str): def from_yaml(cls, file_path: str):
with open(file_path, "r") as f: config = LayerConfig.from_file(file_path)
routes_data = yaml.load(f, Loader=yaml.FullLoader) encoder = Encoder(
routes = [Route.from_dict(route_data) for route_data in routes_data] encoder_type=config.encoder_type,
return cls(routes=routes) encoder_name=config.encoder_name
)
return cls(
encoder=encoder,
routes=config.routes
)
@classmethod
def from_config(cls, config: LayerConfig):
encoder = Encoder(
encoder_type=config.encoder_type,
encoder_name=config.encoder_name
)
return cls(
encoder=encoder,
routes=config.routes
)
def add(self, route: Route): def add(self, route: Route):
# create embeddings # create embeddings
...@@ -73,6 +212,8 @@ class RouteLayer: ...@@ -73,6 +212,8 @@ class RouteLayer:
else: else:
embed_arr = np.array(embeds) embed_arr = np.array(embeds)
self.index = np.concatenate([self.index, embed_arr]) self.index = np.concatenate([self.index, embed_arr])
# add route to routes list
self.routes.append(route)
def _add_routes(self, routes: list[Route]): def _add_routes(self, routes: list[Route]):
# create embeddings for all routes # create embeddings for all routes
......
import json import json
import os
import re import re
from typing import Any, Callable, Union from typing import Any, Callable, Union
import yaml
from pydantic import BaseModel from pydantic import BaseModel
from semantic_router.utils import function_call from semantic_router.utils import function_call
from semantic_router.utils.llm import llm from semantic_router.utils.llm import llm
from semantic_router.utils.logger import logger from semantic_router.utils.logger import logger
from semantic_router.schema import RouteChoice
def is_valid(route_config: str) -> bool:
try:
output_json = json.loads(route_config)
required_keys = ["name", "utterances"]
if isinstance(output_json, list):
for item in output_json:
missing_keys = [key for key in required_keys if key not in item]
if missing_keys:
logger.warning(
f"Missing keys in route config: {', '.join(missing_keys)}"
)
return False
return True
else:
missing_keys = [key for key in required_keys if key not in output_json]
if missing_keys:
logger.warning(
f"Missing keys in route config: {', '.join(missing_keys)}"
)
return False
else:
return True
except json.JSONDecodeError as e:
logger.error(e)
return False
class Route(BaseModel): class Route(BaseModel):
name: str name: str
utterances: list[str] utterances: list[str]
description: str | None = None description: str | None = None
function_schema: dict[str, Any] | None = None
def __call__(self, query: str) -> RouteChoice:
if self.function_schema:
# if a function schema is provided we generate the inputs
extracted_inputs = function_call.extract_function_inputs(
query=query, function_schema=self.function_schema
)
function_call = extracted_inputs
else:
# otherwise we just pass None for the call
function_call = None
return RouteChoice(
name=self.name,
function_call=function_call
)
def to_dict(self): def to_dict(self):
return self.dict() return self.dict()
...@@ -114,69 +101,3 @@ class Route(BaseModel): ...@@ -114,69 +101,3 @@ class Route(BaseModel):
raise Exception("No config generated") raise Exception("No config generated")
class RouteConfig:
"""
Generates a RouteConfig object from a list of Route objects
"""
routes: list[Route] = []
def __init__(self, routes: list[Route] = []):
self.routes = routes
@classmethod
def from_file(cls, path: str):
"""Load the routes from a file in JSON or YAML format"""
logger.info(f"Loading route config from {path}")
_, ext = os.path.splitext(path)
with open(path, "r") as f:
if ext == ".json":
routes = json.load(f)
elif ext in [".yaml", ".yml"]:
routes = yaml.safe_load(f)
else:
raise ValueError(
"Unsupported file type. Only .json and .yaml are supported"
)
route_config_str = json.dumps(routes)
if is_valid(route_config_str):
routes = [Route.from_dict(route) for route in routes]
return cls(routes=routes)
else:
raise Exception("Invalid config JSON or YAML")
def to_dict(self):
return [route.to_dict() for route in self.routes]
def to_file(self, path: str):
"""Save the routes to a file in JSON or YAML format"""
logger.info(f"Saving route config to {path}")
_, ext = os.path.splitext(path)
with open(path, "w") as f:
if ext == ".json":
json.dump(self.to_dict(), f)
elif ext in [".yaml", ".yml"]:
yaml.safe_dump(self.to_dict(), f)
else:
raise ValueError(
"Unsupported file type. Only .json and .yaml are supported"
)
def add(self, route: Route):
self.routes.append(route)
logger.info(f"Added route `{route.name}`")
def get(self, name: str) -> Route | None:
for route in self.routes:
if route.name == name:
return route
logger.error(f"Route `{name}` not found")
return None
def remove(self, name: str):
if name not in [route.name for route in self.routes]:
logger.error(f"Route `{name}` not found")
else:
self.routes = [route for route in self.routes if route.name != name]
logger.info(f"Removed route `{name}`")
from enum import Enum from enum import Enum
from pydantic.dataclasses import dataclass from pydantic.dataclasses import dataclass
from pydantic import BaseModel
from semantic_router import Route
from semantic_router.encoders import ( from semantic_router.encoders import (
BaseEncoder, BaseEncoder,
CohereEncoder, CohereEncoder,
...@@ -16,6 +16,11 @@ class EncoderType(Enum): ...@@ -16,6 +16,11 @@ class EncoderType(Enum):
COHERE = "cohere" COHERE = "cohere"
class RouteChoice(BaseModel):
name: str | None = None
function_call: dict | None = None
@dataclass @dataclass
class Encoder: class Encoder:
type: EncoderType type: EncoderType
...@@ -34,17 +39,3 @@ class Encoder: ...@@ -34,17 +39,3 @@ class Encoder:
def __call__(self, texts: list[str]) -> list[list[float]]: def __call__(self, texts: list[str]) -> list[list[float]]:
return self.model(texts) return self.model(texts)
@dataclass
class SemanticSpace:
id: str
routes: list[Route]
encoder: str = ""
def __init__(self, routes: list[Route] = []):
self.id = ""
self.routes = routes
def add(self, route: Route):
self.routes.append(route)
...@@ -40,7 +40,7 @@ def get_schema(item: Union[BaseModel, Callable]) -> dict[str, Any]: ...@@ -40,7 +40,7 @@ def get_schema(item: Union[BaseModel, Callable]) -> dict[str, Any]:
return schema return schema
async def extract_function_inputs(query: str, function_schema: dict[str, Any]) -> dict: def extract_function_inputs(query: str, function_schema: dict[str, Any]) -> dict:
logger.info("Extracting function input...") logger.info("Extracting function input...")
prompt = f""" prompt = f"""
...@@ -72,7 +72,7 @@ async def extract_function_inputs(query: str, function_schema: dict[str, Any]) - ...@@ -72,7 +72,7 @@ async def extract_function_inputs(query: str, function_schema: dict[str, Any]) -
Result: Result:
""" """
output = await llm(prompt) output = llm(prompt)
if not output: if not output:
raise Exception("No output generated for extract function input") raise Exception("No output generated for extract function input")
......
...@@ -5,7 +5,36 @@ import openai ...@@ -5,7 +5,36 @@ import openai
from semantic_router.utils.logger import logger from semantic_router.utils.logger import logger
async def llm(prompt: str) -> str | None: def llm(prompt: str) -> str | None:
try:
client = openai.OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=os.getenv("OPENROUTER_API_KEY"),
)
completion = client.chat.completions.create(
model="mistralai/mistral-7b-instruct",
messages=[
{
"role": "user",
"content": prompt,
},
],
temperature=0.01,
max_tokens=200,
)
output = completion.choices[0].message.content
if not output:
raise Exception("No output generated")
return output
except Exception as e:
logger.error(f"LLM error: {e}")
raise Exception(f"LLM error: {e}")
async def allm(prompt: str) -> str | None:
try: try:
client = openai.AsyncOpenAI( client = openai.AsyncOpenAI(
base_url="https://openrouter.ai/api/v1", base_url="https://openrouter.ai/api/v1",
......
...@@ -6,7 +6,6 @@ from semantic_router.schema import ( ...@@ -6,7 +6,6 @@ from semantic_router.schema import (
Encoder, Encoder,
EncoderType, EncoderType,
OpenAIEncoder, OpenAIEncoder,
SemanticSpace,
) )
...@@ -40,20 +39,3 @@ class TestEncoderDataclass: ...@@ -40,20 +39,3 @@ class TestEncoderDataclass:
encoder = Encoder(type="openai", name="test-engine") encoder = Encoder(type="openai", name="test-engine")
result = encoder(["test"]) result = encoder(["test"])
assert result == [0.1, 0.2, 0.3] assert result == [0.1, 0.2, 0.3]
class TestSemanticSpaceDataclass:
def test_semanticspace_initialization(self):
semantic_space = SemanticSpace()
assert semantic_space.id == ""
assert semantic_space.routes == []
def test_semanticspace_add_route(self):
route = Route(name="test", utterances=["hello", "hi"], description="greeting")
semantic_space = SemanticSpace()
semantic_space.add(route)
assert len(semantic_space.routes) == 1
assert semantic_space.routes[0].name == "test"
assert semantic_space.routes[0].utterances == ["hello", "hi"]
assert semantic_space.routes[0].description == "greeting"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment