diff --git a/semantic_router/encoders/base.py b/semantic_router/encoders/base.py index dcfe9aa322addd8e6f5cdae77c958d97fce403b9..ed0eb523fe7973a80b3a9669be2ec6af7cd4ae5a 100644 --- a/semantic_router/encoders/base.py +++ b/semantic_router/encoders/base.py @@ -35,12 +35,15 @@ class SparseEncoder(BaseModel): def __call__(self, docs: List[str]) -> List[SparseEmbedding]: raise NotImplementedError("Subclasses must implement this method") - async def acall(self, docs: List[str]) -> Coroutine[Any, Any, List[SparseEmbedding]]: + async def acall( + self, docs: List[str] + ) -> Coroutine[Any, Any, List[SparseEmbedding]]: raise NotImplementedError("Subclasses must implement this method") - - def _array_to_sparse_embeddings(self, sparse_arrays: np.ndarray) -> List[SparseEmbedding]: - """Consumes several sparse vectors containing zero-values and returns a compact array. - """ + + def _array_to_sparse_embeddings( + self, sparse_arrays: np.ndarray + ) -> List[SparseEmbedding]: + """Consumes several sparse vectors containing zero-values and returns a compact array.""" if sparse_arrays.ndim != 2: raise ValueError(f"Expected a 2D array, got a {sparse_arrays.ndim}D array.") # get coordinates of non-zero values @@ -50,4 +53,3 @@ class SparseEncoder(BaseModel): arr_range = range(compact_array[:, 0].max().astype(int) + 1) arrs = [compact_array[compact_array[:, 0] == i, :][:, 1:3] for i in arr_range] return [SparseEmbedding.from_compact_array(arr) for arr in arrs] - diff --git a/semantic_router/encoders/bm25.py b/semantic_router/encoders/bm25.py index 0ec36499dabee336a3857efffbf9f67bb9a58248..e2bb24c1f12795a829390beb8fd6b0f13656eb76 100644 --- a/semantic_router/encoders/bm25.py +++ b/semantic_router/encoders/bm25.py @@ -60,4 +60,3 @@ class BM25Encoder(TfidfEncoder): position = self.idx_mapping[idx] embeds[i][position] = val return embeds - diff --git a/semantic_router/routers/base.py b/semantic_router/routers/base.py index b092a297fa416ffe04f184575ae6d9d079532cda..21ff9a323cc1ac966ca974ef14e28a372c6e20be 100644 --- a/semantic_router/routers/base.py +++ b/semantic_router/routers/base.py @@ -317,7 +317,7 @@ class BaseRouter(BaseModel): self.llm = llm self.routes = routes.copy() if routes else [] # initialize index - self.index =self._get_index(index=index) + self.index = self._get_index(index=index) # set score threshold using default method self._set_score_threshold() self.top_k = top_k @@ -346,7 +346,7 @@ class BaseRouter(BaseModel): else: index = index return index - + def _get_encoder(self, encoder: Optional[DenseEncoder]) -> DenseEncoder: if encoder is None: logger.warning("No encoder provided. Using default OpenAIEncoder.") @@ -506,19 +506,20 @@ class BaseRouter(BaseModel): categories_with_scores = self._semantic_classify_multiple_routes(results) print(f"{categories_with_scores=}") return [ - RouteChoice(name=category, similarity_score=score) for category, score in categories_with_scores + RouteChoice(name=category, similarity_score=score) + for category, score in categories_with_scores ] - #route_choices = [] + # route_choices = [] # TODO JB: do we need this check? Maybe we should be returning directly - #for category, score in categories_with_scores: + # for category, score in categories_with_scores: # route = self.check_for_matching_routes(category) # if route: # route_choice = RouteChoice(name=route.name, similarity_score=score) # route_choices.append(route_choice) - #return route_choices - + # return route_choices + def _retrieve_top_route( self, vector: List[float], route_filter: Optional[List[str]] = None ) -> Tuple[Optional[Route], List[float]]: diff --git a/semantic_router/routers/hybrid.py b/semantic_router/routers/hybrid.py index 64418f983d410a47b9f05538bb710918ce672a3e..be3880abbbe9afd7e8f368695eaa6d34e7219ca8 100644 --- a/semantic_router/routers/hybrid.py +++ b/semantic_router/routers/hybrid.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, List, Optional import asyncio from pydantic.v1 import Field diff --git a/semantic_router/schema.py b/semantic_router/schema.py index b54a452f5f86e58a4a8212d1db95f7bf6605ce6d..7fcc8371853f7cf05cf55513168d90df6b05e85e 100644 --- a/semantic_router/schema.py +++ b/semantic_router/schema.py @@ -426,11 +426,10 @@ class SparseEmbedding(BaseModel): "Column 0 should contain index positions, and column 1 should contain respective values." ) return cls(embedding=array) - + @classmethod def from_vector(cls, vector: np.ndarray): - """Consumes an array of sparse vectors containing zero-values. - """ + """Consumes an array of sparse vectors containing zero-values.""" if vector.ndim != 1: raise ValueError(f"Expected a 1D array, got a {vector.ndim}D array.") return cls.from_compact_array(np.array([np.arange(len(vector)), vector]).T)