diff --git a/semantic_router/encoders/tfidf.py b/semantic_router/encoders/tfidf.py index fe285e5ce9bd9db85f31c3899b89b02a69699f84..656ee117e54b60a293c0b425452065258bb5db96 100644 --- a/semantic_router/encoders/tfidf.py +++ b/semantic_router/encoders/tfidf.py @@ -53,13 +53,11 @@ class TfidfEncoder(SparseEncoder): raise TypeError("`routes` parameter must be a list of Route objects.") def _build_word_index(self, docs: List[str]) -> Dict: - print(docs) words = set() for doc in docs: for word in doc.split(): words.add(word) word_index = {word: i for i, word in enumerate(words)} - print(word_index) return word_index def _compute_tf(self, docs: List[str]) -> np.ndarray: diff --git a/semantic_router/routers/hybrid.py b/semantic_router/routers/hybrid.py index e603add396ef602beaf3ea673d3875d619840471..36ccd8f94c5e4aa1360462a34ad10382f0c92171 100644 --- a/semantic_router/routers/hybrid.py +++ b/semantic_router/routers/hybrid.py @@ -37,13 +37,10 @@ class HybridRouter(BaseRouter): auto_sync: Optional[str] = None, alpha: float = 0.3, ): - print("...2.1") if index is None: logger.warning("No index provided. Using default HybridLocalIndex.") index = HybridLocalIndex() - print("...2.2") encoder = self._get_encoder(encoder=encoder) - print("...2.3") super().__init__( encoder=encoder, llm=llm, @@ -53,22 +50,17 @@ class HybridRouter(BaseRouter): aggregation=aggregation, auto_sync=auto_sync, ) - print("...0") # initialize sparse encoder self.sparse_encoder = self._get_sparse_encoder(sparse_encoder=sparse_encoder) - print("...5") # set alpha self.alpha = alpha - print("...6") # fit sparse encoder if needed if ( isinstance(self.sparse_encoder, TfidfEncoder) and hasattr(self.sparse_encoder, "fit") and self.routes ): - print("...3") self.sparse_encoder.fit(self.routes) - print("...4") # run initialize index now if auto sync is active if self.auto_sync: self._init_index_state() @@ -94,7 +86,6 @@ class HybridRouter(BaseRouter): # TODO: to merge, self._encode should probably output a special # TODO Embedding type that can be either dense or hybrid dense_emb, sparse_emb = self._encode(all_utterances) - print(f"{sparse_emb=}") self.index.add( embeddings=dense_emb.tolist(), routes=route_names, @@ -180,8 +171,6 @@ class HybridRouter(BaseRouter): xq_s = self.sparse_encoder(text) # xq_s = np.squeeze(xq_s) # convex scaling - print(f"{self.sparse_encoder.__class__.__name__=}") - print(f"_encode: {xq_d.shape=}, {xq_s=}") xq_d, xq_s = self._convex_scaling(dense=xq_d, sparse=xq_s) return xq_d, xq_s @@ -202,7 +191,6 @@ class HybridRouter(BaseRouter): # create dense query vector xq_d = np.array(dense_vec) # convex scaling - print(f"_async_encode: {xq_d.shape=}, {xq_s=}") xq_d, xq_s = self._convex_scaling(dense=xq_d, sparse=xq_s) return xq_d, xq_s diff --git a/tests/unit/test_hybrid_layer.py b/tests/unit/test_hybrid_layer.py index f9f8ff6d4bd85d8d4ffd8e938df02a3a88030f62..b12ea2f5b12d199f809664a8b56630b0da9ae2a1 100644 --- a/tests/unit/test_hybrid_layer.py +++ b/tests/unit/test_hybrid_layer.py @@ -146,14 +146,12 @@ class TestHybridRouter: assert len(route_layer.routes) == 2, "route_layer.routes is not 2" def test_query_and_classification(self, openai_encoder, routes): - print("...1") route_layer = HybridRouter( encoder=openai_encoder, sparse_encoder=sparse_encoder, routes=routes, auto_sync="local", ) - print("...2") route_layer.set_threshold(0.0) query_result = route_layer(UTTERANCES[0]) assert query_result.name in ["Route 1", "Route 2"]