diff --git a/semantic_router/encoders/aurelio.py b/semantic_router/encoders/aurelio.py index 8824b2f1df866b9f3479b89b77f49586bef4203d..779fe6b14475625725119d453d133d7c7f929821 100644 --- a/semantic_router/encoders/aurelio.py +++ b/semantic_router/encoders/aurelio.py @@ -19,9 +19,11 @@ class AurelioSparseEncoder(SparseEncoder): def __init__( self, - name: str = "bm25", + name: str | None = None, api_key: Optional[str] = None, ): + if name is None: + name = "bm25" super().__init__(name=name) if api_key is None: api_key = os.getenv("AURELIO_API_KEY") diff --git a/semantic_router/encoders/base.py b/semantic_router/encoders/base.py index ed0eb523fe7973a80b3a9669be2ec6af7cd4ae5a..0a25f21050e2e5da1ea610fb5194324cdeef9a5f 100644 --- a/semantic_router/encoders/base.py +++ b/semantic_router/encoders/base.py @@ -35,9 +35,7 @@ class SparseEncoder(BaseModel): def __call__(self, docs: List[str]) -> List[SparseEmbedding]: raise NotImplementedError("Subclasses must implement this method") - async def acall( - self, docs: List[str] - ) -> Coroutine[Any, Any, List[SparseEmbedding]]: + async def acall(self, docs: List[str]) -> list[SparseEmbedding]: raise NotImplementedError("Subclasses must implement this method") def _array_to_sparse_embeddings( diff --git a/semantic_router/encoders/bm25.py b/semantic_router/encoders/bm25.py index e2bb24c1f12795a829390beb8fd6b0f13656eb76..3357ded8371be116c4ffba154c983304db5d5f42 100644 --- a/semantic_router/encoders/bm25.py +++ b/semantic_router/encoders/bm25.py @@ -12,9 +12,11 @@ class BM25Encoder(TfidfEncoder): def __init__( self, - name: str = "bm25", + name: str | None = None, use_default_params: bool = True, ): + if name is None: + name = "bm25" super().__init__(name=name) try: from pinecone_text.sparse import BM25Encoder as encoder diff --git a/semantic_router/encoders/tfidf.py b/semantic_router/encoders/tfidf.py index d9d97a473eadb06e12e0f34fa8c82cfefa6caf57..a7ac9136c08fb1717dfd1615f5b30c3ad263b767 100644 --- a/semantic_router/encoders/tfidf.py +++ b/semantic_router/encoders/tfidf.py @@ -15,7 +15,9 @@ class TfidfEncoder(SparseEncoder): idf: ndarray = np.array([]) word_index: Dict = {} - def __init__(self, name: str = "tfidf"): + def __init__(self, name: str | None = None): + if name is None: + name = "tfidf" super().__init__(name=name) self.word_index = {} self.idf = np.array([]) diff --git a/semantic_router/index/pinecone.py b/semantic_router/index/pinecone.py index bb6ed3ef1fcca3079363097b9847ecfc16386cf9..6086de1c5a3de8d18023c3488064399220a9f8a5 100644 --- a/semantic_router/index/pinecone.py +++ b/semantic_router/index/pinecone.py @@ -223,7 +223,7 @@ class PineconeIndex(BaseIndex): # if the index doesn't exist and we don't have the dimensions # we raise warning logger.warning("Index could not be initialized.") - self.host = index_stats["host"] if index_stats else None + self.host = index_stats["host"] if index_stats else "" def _batch_upsert(self, batch: List[Dict]): """Helper method for upserting a single batch of records.""" @@ -466,7 +466,7 @@ class PineconeIndex(BaseIndex): :rtype: Tuple[np.ndarray, List[str]] :raises ValueError: If the index is not populated. """ - if self.async_client is None or self.host is None: + if self.async_client is None or self.host == "": raise ValueError("Async client or host are not initialized.") query_vector_list = vector.tolist() if route_filter is not None: @@ -492,7 +492,7 @@ class PineconeIndex(BaseIndex): :return: A list of (route_name, utterance) objects. :rtype: List[Tuple] """ - if self.async_client is None or self.host is None: + if self.async_client is None or self.host == "": raise ValueError("Async client or host are not initialized.") return await self._async_get_routes() @@ -519,6 +519,8 @@ class PineconeIndex(BaseIndex): "top_k": top_k, "include_metadata": include_metadata, } + if self.host == "": + raise ValueError("self.host is not initialized.") async with self.async_client.post( f"https://{self.host}/query", json=params, @@ -569,6 +571,8 @@ class PineconeIndex(BaseIndex): """ if self.index is None: raise ValueError("Index is None, could not retrieve vector IDs.") + if self.host == "": + raise ValueError("self.host is not initialized.") all_vector_ids = [] next_page_token = None @@ -623,6 +627,8 @@ class PineconeIndex(BaseIndex): :return: A dictionary containing the metadata for the vector. :rtype: dict """ + if self.host == "": + raise ValueError("self.host is not initialized.") url = f"https://{self.host}/vectors/fetch" params = { diff --git a/semantic_router/routers/base.py b/semantic_router/routers/base.py index 21ff9a323cc1ac966ca974ef14e28a372c6e20be..0172e5d803c6a3e9f73aa4c44a41381f1b5de4d2 100644 --- a/semantic_router/routers/base.py +++ b/semantic_router/routers/base.py @@ -700,18 +700,27 @@ class BaseRouter(BaseModel): def from_json(cls, file_path: str): config = RouterConfig.from_file(file_path) encoder = AutoEncoder(type=config.encoder_type, name=config.encoder_name).model - return cls(encoder=encoder, routes=config.routes) + if isinstance(encoder, DenseEncoder): + return cls(encoder=encoder, routes=config.routes) + else: + raise ValueError(f"{type(encoder)} not supported for loading from JSON.") @classmethod def from_yaml(cls, file_path: str): config = RouterConfig.from_file(file_path) encoder = AutoEncoder(type=config.encoder_type, name=config.encoder_name).model - return cls(encoder=encoder, routes=config.routes) + if isinstance(encoder, DenseEncoder): + return cls(encoder=encoder, routes=config.routes) + else: + raise ValueError(f"{type(encoder)} not supported for loading from YAML.") @classmethod def from_config(cls, config: RouterConfig, index: Optional[BaseIndex] = None): encoder = AutoEncoder(type=config.encoder_type, name=config.encoder_name).model - return cls(encoder=encoder, routes=config.routes, index=index) + if isinstance(encoder, DenseEncoder): + return cls(encoder=encoder, routes=config.routes, index=index) + else: + raise ValueError(f"{type(encoder)} not supported for loading from config.") def add(self, route: Route): """Add a route to the local SemanticRouter and index.