From 8c0ba48000dda79be7d8c48da18ff997752902d1 Mon Sep 17 00:00:00 2001 From: tolgadevAI <164843802+tolgadevAI@users.noreply.github.com> Date: Mon, 19 Aug 2024 23:41:41 +0300 Subject: [PATCH] develop the process of storing the function_schemas field for the route_layer add function on Pinecone --- semantic_router/index/base.py | 1 + semantic_router/index/pinecone.py | 19 ++++++++++++++++--- semantic_router/layer.py | 7 ++++++- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/semantic_router/index/base.py b/semantic_router/index/base.py index d0f12ac6..d25d41dc 100644 --- a/semantic_router/index/base.py +++ b/semantic_router/index/base.py @@ -26,6 +26,7 @@ class BaseIndex(BaseModel): embeddings: List[List[float]], routes: List[str], utterances: List[Any], + function_schemas: List[Dict[str, Any]], ): """ Add embeddings to the index. diff --git a/semantic_router/index/pinecone.py b/semantic_router/index/pinecone.py index b4f6033f..0b84ad4c 100644 --- a/semantic_router/index/pinecone.py +++ b/semantic_router/index/pinecone.py @@ -22,6 +22,7 @@ class PineconeRecord(BaseModel): values: List[float] route: str utterance: str + function_schema: str def __init__(self, **data): super().__init__(**data) @@ -34,7 +35,11 @@ class PineconeRecord(BaseModel): return { "id": self.id, "values": self.values, - "metadata": {"sr_route": self.route, "sr_utterance": self.utterance}, + "metadata": { + "sr_route": self.route, + "sr_utterance": self.utterance, + "sr_function_schemas": self.function_schema, + }, } @@ -305,6 +310,7 @@ class PineconeIndex(BaseIndex): embeddings: List[List[float]], routes: List[str], utterances: List[str], + function_schemas: List[Dict[str, Any]] = "", batch_size: int = 100, ): """Add vectors to Pinecone in batches.""" @@ -313,8 +319,15 @@ class PineconeIndex(BaseIndex): self.index = self._init_index(force_create=True) vectors_to_upsert = [ - PineconeRecord(values=vector, route=route, utterance=utterance).to_dict() - for vector, route, utterance in zip(embeddings, routes, utterances) + PineconeRecord( + values=vector, + route=route, + utterance=utterance, + function_schema=str(function_schema), + ).to_dict() + for vector, route, utterance, function_schema in zip( + embeddings, routes, utterances, function_schemas + ) ] for i in range(0, len(vectors_to_upsert), batch_size): diff --git a/semantic_router/layer.py b/semantic_router/layer.py index 6b548fc0..2285a7d9 100644 --- a/semantic_router/layer.py +++ b/semantic_router/layer.py @@ -430,12 +430,17 @@ class RouteLayer: if route.score_threshold is None: route.score_threshold = self.score_threshold - # add routes to the index self.index.add( embeddings=embeds, routes=[route.name] * len(route.utterances), utterances=route.utterances, + function_schemas=( + route.function_schemas * len(route.utterances) + if route.function_schemas + else [""] * len(route.utterances) + ), ) + self.routes.append(route) def list_route_names(self) -> List[str]: -- GitLab