Skip to content
Snippets Groups Projects
Commit 4d3ba4d3 authored by “Daniel Griffiths”'s avatar “Daniel Griffiths”
Browse files

fixed tests

parent 4394759e
No related branches found
No related tags found
No related merge requests found
%% Cell type:markdown id: tags:
# Semantic Router: Hybrid Layer
%% Cell type:markdown id: tags:
The Hybrid Layer in the Semantic Router library can improve making performance particularly for niche use-cases that contain specific terminology, such as finance or medical. It helps us provide more importance to making based on the keywords contained in our utterances and user queries.
%% Cell type:markdown id: tags:
## Getting Started
%% Cell type:markdown id: tags:
We start by installing the library:
%% Cell type:code id: tags:
``` python
# !pip install -qU semantic-router==0.0.6
```
%% Cell type:markdown id: tags:
We start by defining a dictionary mapping s to example phrases that should trigger those s.
%% Cell type:code id: tags:
``` python
from semantic_router.schema import Route
politics = Route(
name="politics",
utterances=[
"isn't politics the best thing ever",
"why don't you tell me about your political opinions",
"don't you just love the president",
"don't you just hate the president",
"they're going to destroy this country!",
"they will save the country!",
],
)
```
%% Output
/Users/danielgriffiths/Coding_files/Aurelio_local/semantic-router/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
from .autonotebook import tqdm as notebook_tqdm
%% Cell type:markdown id: tags:
Let's define another for good measure:
%% Cell type:code id: tags:
``` python
chitchat = Route(
name="chitchat",
utterances=[
"how's the weather today?",
"how are things going?",
"lovely weather today",
"the weather is horrendous",
"let's go to the chippy",
],
)
routes = [politics, chitchat]
```
%% Cell type:markdown id: tags:
Now we initialize our embedding model:
%% Cell type:code id: tags:
``` python
import os
from semantic_router.encoders import CohereEncoder, BM25Encoder, TfidfEncoder
from getpass import getpass
os.environ["COHERE_API_KEY"] = os.environ["COHERE_API_KEY"] or getpass(
"Enter Cohere API Key: "
)
dense_encoder = CohereEncoder()
# sparse_encoder = BM25Encoder()
sparse_encoder = TfidfEncoder()
```
%% Cell type:markdown id: tags:
Now we define the `RouteLayer`. When called, the route layer will consume text (a query) and output the category (`Route`) it belongs to — to initialize a `RouteLayer` we need our `encoder` model and a list of `routes`.
%% Cell type:code id: tags:
``` python
from semantic_router.hybrid_layer import HybridRouteLayer
dl = HybridRouteLayer(dense_encoder=dense_encoder, sparse_encoder=sparse_encoder, routes=routes)
dl = HybridRouteLayer(
dense_encoder=dense_encoder, sparse_encoder=sparse_encoder, routes=routes
)
```
%% Output
100%|██████████| 2/2 [00:00<00:00, 2.58it/s]
100%|██████████| 2/2 [00:00<00:00, 4.22it/s]
%% Cell type:code id: tags:
``` python
dl("don't you love politics?")
```
%% Output
'politics'
%% Cell type:code id: tags:
``` python
dl("how's the weather today?")
```
%% Output
'chitchat'
%% Cell type:code id: tags:
``` python
religion = Route(
name="religion",
utterances=[
"what do you know about Buddhism?",
"tell me about Christianity",
"explain the principles of Hinduism",
"describe the teachings of Islam",
"what are the main beliefs of Judaism?",
],
)
```
%% Cell type:code id: tags:
``` python
dl.add(religion)
```
%% Cell type:code id: tags:
``` python
dl("what do you think of Hinduism?")
```
%% Output
'religion'
%% Cell type:markdown id: tags:
---
......
......@@ -2302,5 +2302,5 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
python-versions = "^3.9"
content-hash = "7e705f5c5f2a8bba630031c0ff6752972e7cddc8ec95f3fb05b5be2ad7962268"
......@@ -4,4 +4,10 @@ from .cohere import CohereEncoder
from .openai import OpenAIEncoder
from .tfidf import TfidfEncoder
__all__ = ["BaseEncoder", "CohereEncoder", "OpenAIEncoder", "BM25Encoder", "TfidfEncoder"]
__all__ = [
"BaseEncoder",
"CohereEncoder",
"OpenAIEncoder",
"BM25Encoder",
"TfidfEncoder",
]
......@@ -3,6 +3,7 @@ from sklearn.feature_extraction.text import TfidfVectorizer
from semantic_router.encoders import BaseEncoder
from semantic_router.schema import Route
class TfidfEncoder(BaseEncoder):
vectorizer: TfidfVectorizer | None = None
......@@ -28,6 +29,6 @@ class TfidfEncoder(BaseEncoder):
def _get_all_utterances(self, routes: list[Route]) -> list[str]:
utterances = []
for route in routes:
for utterance in route.utterances:
utterances.append(utterance)
return utterances
\ No newline at end of file
for utterance in route.utterances:
utterances.append(utterance)
return utterances
......@@ -7,7 +7,7 @@ from semantic_router.encoders import (
BM25Encoder,
CohereEncoder,
OpenAIEncoder,
TfidfEncoder
TfidfEncoder,
)
from semantic_router.schema import Route
from semantic_router.utils.logger import logger
......@@ -20,7 +20,11 @@ class HybridRouteLayer:
score_threshold = 0.82
def __init__(
self, dense_encoder: BaseEncoder, sparse_encoder: BaseEncoder, routes: list[Route] = [], alpha: float = 0.3
self,
dense_encoder: BaseEncoder,
sparse_encoder: BaseEncoder,
routes: list[Route] = [],
alpha: float = 0.3,
):
self.dense_encoder = dense_encoder
self.sparse_encoder = sparse_encoder
......
import pytest
from semantic_router.encoders import BaseEncoder, CohereEncoder, OpenAIEncoder
from semantic_router.encoders import (
BaseEncoder,
CohereEncoder,
OpenAIEncoder,
TfidfEncoder,
BM25Encoder,
)
from semantic_router.hybrid_layer import HybridRouteLayer
from semantic_router.schema import Route
......@@ -34,6 +40,12 @@ def openai_encoder(mocker):
return OpenAIEncoder(name="test-openai-encoder", openai_api_key="test_api_key")
@pytest.fixture
def bm25_encoder(mocker):
mocker.patch.object(BM25Encoder, "__call__", side_effect=mock_encoder_call)
return BM25Encoder(name="test-bm25-encoder")
@pytest.fixture
def routes():
return [
......@@ -73,8 +85,10 @@ class TestHybridRouteLayer:
assert len(route_layer.index) == 5
assert len(set(route_layer.categories)) == 2
def test_query_and_classification(self, openai_encoder, routes):
route_layer = HybridRouteLayer(encoder=openai_encoder, routes=routes)
def test_query_and_classification(self, openai_encoder, bm25_encoder, routes):
route_layer = HybridRouteLayer(
dense_encoder=openai_encoder, sparse_encoder=bm25_encoder, routes=routes
)
query_result = route_layer("Hello")
assert query_result in ["Route 1", "Route 2"]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment