diff --git a/docs/source/conf.py b/docs/source/conf.py index 49131e8d5c0b329b2cd76a768755d594e3791a21..7442f48048719f225117198522b748facbe6d98e 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -15,7 +15,7 @@ sys.path.insert(0, os.path.abspath("../..")) # Source code dir relative to this project = "Semantic Router" copyright = "2024, Aurelio AI" author = "Aurelio AI" -release = "0.1.0.dev3" +release = "0.1.0.dev4" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/pyproject.toml b/pyproject.toml index 141056306e9a921cb4f5a007f8778da4494714b3..e246a21d93e1765038ddb8a661bb6af061b4520e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "semantic-router" -version = "0.1.0.dev3" +version = "0.1.0.dev4" description = "Super fast semantic router for AI decision making" authors = ["Aurelio AI <hello@aurelio.ai>"] readme = "README.md" diff --git a/semantic_router/__init__.py b/semantic_router/__init__.py index 93f2fc442e353fd32dac317fdc8b5d5db4cf4663..5cac23dcb572d29331937ab8baa657d41e3bbed1 100644 --- a/semantic_router/__init__.py +++ b/semantic_router/__init__.py @@ -3,4 +3,4 @@ from semantic_router.route import Route __all__ = ["SemanticRouter", "HybridRouter", "Route", "RouterConfig"] -__version__ = "0.1.0.dev3" +__version__ = "0.1.0.dev4" diff --git a/semantic_router/routers/base.py b/semantic_router/routers/base.py index 328cf2b77c4ff45bed8c861dda72407568841451..0bfc4eea94b9940b5dc476ed167c92062d31b2ff 100644 --- a/semantic_router/routers/base.py +++ b/semantic_router/routers/base.py @@ -4,6 +4,7 @@ import os import random import hashlib from typing import Any, Callable, Dict, List, Optional, Tuple, Union +from typing_extensions import deprecated from pydantic import BaseModel, Field import numpy as np @@ -280,6 +281,20 @@ class RouterConfig: ) +def xq_reshape(xq: List[float] | np.ndarray) -> np.ndarray: + # convert to numpy array if not already + if not isinstance(xq, np.ndarray): + xq = np.array(xq) + # check if vector is 1D and expand to 2D if necessary + if len(xq.shape) == 1: + xq = np.expand_dims(xq, axis=0) + if xq.shape[0] != 1: + raise ValueError( + f"Expected (1, x) dimensional input for query, got {xq.shape}." + ) + return xq + + class BaseRouter(BaseModel): encoder: DenseEncoder = Field(default_factory=OpenAIEncoder) index: BaseIndex = Field(default_factory=BaseIndex) @@ -402,7 +417,7 @@ class BaseRouter(BaseModel): def __call__( self, text: Optional[str] = None, - vector: Optional[List[float]] = None, + vector: Optional[List[float] | np.ndarray] = None, simulate_static: bool = False, route_filter: Optional[List[str]] = None, ) -> RouteChoice: @@ -411,6 +426,9 @@ class BaseRouter(BaseModel): if text is None: raise ValueError("Either text or vector must be provided") vector = self._encode(text=[text]) + # convert to numpy array if not already + vector = xq_reshape(vector) + # calculate semantics route, top_class_scores = self._retrieve_top_route(vector, route_filter) passed = self._check_threshold(top_class_scores, route) if passed and route is not None and not simulate_static: @@ -444,7 +462,7 @@ class BaseRouter(BaseModel): async def acall( self, text: Optional[str] = None, - vector: Optional[List[float]] = None, + vector: Optional[List[float] | np.ndarray] = None, simulate_static: bool = False, route_filter: Optional[List[str]] = None, ) -> RouteChoice: @@ -453,7 +471,9 @@ class BaseRouter(BaseModel): if text is None: raise ValueError("Either text or vector must be provided") vector = await self._async_encode(text=[text]) - + # convert to numpy array if not already + vector = xq_reshape(vector) + # calculate semantics route, top_class_scores = await self._async_retrieve_top_route( vector, route_filter ) @@ -483,19 +503,21 @@ class BaseRouter(BaseModel): # if no route passes threshold, return empty route choice return RouteChoice() + # TODO: add multiple routes return to __call__ and acall + @deprecated("This method is deprecated. Use `__call__` instead.") def retrieve_multiple_routes( self, text: Optional[str] = None, - vector: Optional[List[float]] = None, + vector: Optional[List[float] | np.ndarray] = None, ) -> List[RouteChoice]: if vector is None: if text is None: raise ValueError("Either text or vector must be provided") - vector_arr = self._encode(text=[text]) - else: - vector_arr = np.array(vector) + vector = self._encode(text=[text]) + # convert to numpy array if not already + vector = xq_reshape(vector) # get relevant utterances - results = self._retrieve(xq=vector_arr) + results = self._retrieve(xq=vector) # decide most relevant routes categories_with_scores = self._semantic_classify_multiple_routes(results) return [ @@ -514,16 +536,14 @@ class BaseRouter(BaseModel): # return route_choices def _retrieve_top_route( - self, vector: List[float], route_filter: Optional[List[str]] = None + self, vector: np.ndarray, route_filter: Optional[List[str]] = None ) -> Tuple[Optional[Route], List[float]]: """ Retrieve the top matching route based on the given vector. Returns a tuple of the route (if any) and the scores of the top class. """ # get relevant results (scores and routes) - results = self._retrieve( - xq=np.array(vector), top_k=self.top_k, route_filter=route_filter - ) + results = self._retrieve(xq=vector, top_k=self.top_k, route_filter=route_filter) # decide most relevant routes top_class, top_class_scores = self._semantic_classify(results) # TODO do we need this check? @@ -531,11 +551,11 @@ class BaseRouter(BaseModel): return route, top_class_scores async def _async_retrieve_top_route( - self, vector: List[float], route_filter: Optional[List[str]] = None + self, vector: np.ndarray, route_filter: Optional[List[str]] = None ) -> Tuple[Optional[Route], List[float]]: # get relevant results (scores and routes) results = await self._async_retrieve( - xq=np.array(vector), top_k=self.top_k, route_filter=route_filter + xq=vector, top_k=self.top_k, route_filter=route_filter ) # decide most relevant routes top_class, top_class_scores = await self._async_semantic_classify(results) @@ -939,7 +959,7 @@ class BaseRouter(BaseModel): """Given a query vector, retrieve the top_k most similar records.""" # get scores and routes scores, routes = self.index.query( - vector=xq, top_k=top_k, route_filter=route_filter + vector=xq[0], top_k=top_k, route_filter=route_filter ) return [{"route": d, "score": s.item()} for d, s in zip(routes, scores)] @@ -949,7 +969,7 @@ class BaseRouter(BaseModel): """Given a query vector, retrieve the top_k most similar records.""" # get scores and routes scores, routes = await self.index.aquery( - vector=xq, top_k=top_k, route_filter=route_filter + vector=xq[0], top_k=top_k, route_filter=route_filter ) return [{"route": d, "score": s.item()} for d, s in zip(routes, scores)] diff --git a/semantic_router/routers/hybrid.py b/semantic_router/routers/hybrid.py index 994fcb2dfc8c5e4d85589ca5cf777c2ab7a26d26..54901d5e50a0116be8a88496869b17ec00aa06e8 100644 --- a/semantic_router/routers/hybrid.py +++ b/semantic_router/routers/hybrid.py @@ -14,7 +14,7 @@ from semantic_router.route import Route from semantic_router.index import BaseIndex, HybridLocalIndex from semantic_router.schema import RouteChoice, SparseEmbedding, Utterance from semantic_router.utils.logger import logger -from semantic_router.routers.base import BaseRouter +from semantic_router.routers.base import BaseRouter, xq_reshape from semantic_router.llms import BaseLLM @@ -197,18 +197,19 @@ class HybridRouter(BaseRouter): def __call__( self, text: Optional[str] = None, - vector: Optional[List[float]] = None, + vector: Optional[List[float] | np.ndarray] = None, simulate_static: bool = False, route_filter: Optional[List[str]] = None, sparse_vector: dict[int, float] | SparseEmbedding | None = None, ) -> RouteChoice: - vector_arr: np.ndarray | None = None potential_sparse_vector: List[SparseEmbedding] | None = None # if no vector provided, encode text to get vector if vector is None: if text is None: raise ValueError("Either text or vector must be provided") - vector_arr, potential_sparse_vector = self._encode(text=[text]) + vector, potential_sparse_vector = self._encode(text=[text]) + # convert to numpy array if not already + vector = xq_reshape(vector) if sparse_vector is None: if text is None: raise ValueError("Either text or sparse_vector must be provided") @@ -217,10 +218,9 @@ class HybridRouter(BaseRouter): ) if sparse_vector is None: raise ValueError("Sparse vector is required for HybridLocalIndex.") - vector_arr = vector_arr if vector_arr is not None else np.array(vector) # TODO: add alpha as a parameter scores, route_names = self.index.query( - vector=vector_arr, + vector=vector, top_k=self.top_k, route_filter=route_filter, sparse_vector=sparse_vector, diff --git a/semantic_router/routers/semantic.py b/semantic_router/routers/semantic.py index 33af2a32da0bff9942512cc83ee04e6ef9342037..41c92d538828409f6d36cab74086096fba52e844 100644 --- a/semantic_router/routers/semantic.py +++ b/semantic_router/routers/semantic.py @@ -40,14 +40,12 @@ class SemanticRouter(BaseRouter): """Given some text, encode it.""" # create query vector xq = np.array(self.encoder(text)) - xq = np.squeeze(xq) # Reduce to 1d array. return xq async def _async_encode(self, text: list[str]) -> Any: """Given some text, encode it.""" # create query vector xq = np.array(await self.encoder.acall(docs=text)) - xq = np.squeeze(xq) # Reduce to 1d array. return xq def add(self, routes: List[Route] | Route): diff --git a/tests/unit/test_router.py b/tests/unit/test_router.py index ef36e0ab4b4180b3ed5e5746d984691efd2b5150..1f743f1c8581fedc6232e57742b90c06d6671fe3 100644 --- a/tests/unit/test_router.py +++ b/tests/unit/test_router.py @@ -149,6 +149,13 @@ def routes_4(): ] +@pytest.fixture +def route_single_utterance(): + return [ + Route(name="Route 3", utterances=["Hello"]), + ] + + @pytest.fixture def dynamic_routes(): return [ @@ -251,6 +258,39 @@ class TestSemanticRouter: ) assert route_layer.score_threshold == openai_encoder.score_threshold + def test_add_single_utterance( + self, routes, route_single_utterance, openai_encoder, index_cls + ): + index = init_index(index_cls) + route_layer = SemanticRouter( + encoder=openai_encoder, + routes=routes, + index=index, + auto_sync="local", + ) + route_layer.add(routes=route_single_utterance) + assert route_layer.score_threshold == openai_encoder.score_threshold + if index_cls is PineconeIndex: + time.sleep(PINECONE_SLEEP) # allow for index to be updated + _ = route_layer("Hello") + assert len(route_layer.index.get_utterances()) == 6 + + def test_init_and_add_single_utterance( + self, route_single_utterance, openai_encoder, index_cls + ): + index = init_index(index_cls) + route_layer = SemanticRouter( + encoder=openai_encoder, + index=index, + auto_sync="local", + ) + if index_cls is PineconeIndex: + time.sleep(PINECONE_SLEEP) # allow for index to be updated + route_layer.add(routes=route_single_utterance) + assert route_layer.score_threshold == openai_encoder.score_threshold + _ = route_layer("Hello") + assert len(route_layer.index.get_utterances()) == 1 + def test_delete_index(self, openai_encoder, routes, index_cls): # TODO merge .delete_index() and .delete_all() and get working index = init_index(index_cls) @@ -786,6 +826,8 @@ class TestSemanticRouter: auto_sync="local", ) vector = [0.1, 0.2, 0.3] + if index_cls is PineconeIndex: + time.sleep(PINECONE_SLEEP) # allow for index to be populated results = route_layer.retrieve_multiple_routes(vector=vector) assert len(results) >= 1, "Expected at least one result" assert any(