From 3cee0236335e31ce8adb8a50e5599b88a001951e Mon Sep 17 00:00:00 2001 From: Siraj R Aizlewood <siraj@aurelio.ai> Date: Thu, 8 Feb 2024 01:35:16 +0400 Subject: [PATCH] More bug fixes. --- semantic_router/indices/pinecone.py | 18 ++++++++++++++---- semantic_router/layer.py | 4 ++-- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/semantic_router/indices/pinecone.py b/semantic_router/indices/pinecone.py index 443565b6..8b7de905 100644 --- a/semantic_router/indices/pinecone.py +++ b/semantic_router/indices/pinecone.py @@ -12,9 +12,10 @@ class PineconeIndex(BaseIndex): cloud: str = "aws" region: str = "us-west-2" pinecone: Any = Field(default=None, exclude=True) + vector_id_counter: int = 0 def __init__(self, **data): - super().__init__(**data) + super().__init__(**data) # Initialize Pinecone environment with the new API self.pinecone = pinecone.Pinecone(api_key=os.getenv("PINECONE_API_KEY")) @@ -35,9 +36,18 @@ class PineconeIndex(BaseIndex): # Store the index name for potential deletion self.index_name = self.index_name - def add(self, embeds: List[np.ndarray]): - # Assuming embeds is a list of tuples (id, vector) - self.index.upsert(vectors=embeds) + def add(self, embeds: List[List[float]]): + # Format embeds as a list of dictionaries for Pinecone's upsert method + vectors_to_upsert = [] + for i, vector in enumerate(embeds): + # Generate a unique ID for each vector + vector_id = f"vec{i+1}" + + # Prepare for upsert + vectors_to_upsert.append({"id": vector_id, "values": vector}) + + # Perform the upsert operation + self.index.upsert(vectors=vectors_to_upsert) def remove(self, ids_to_remove: List[str]): self.index.delete(ids=ids_to_remove) diff --git a/semantic_router/layer.py b/semantic_router/layer.py index cff166c3..78195e6a 100644 --- a/semantic_router/layer.py +++ b/semantic_router/layer.py @@ -313,7 +313,7 @@ class RouteLayer: all_utterances = [ utterance for route in routes for utterance in route.utterances ] - embedded_utterance = self.encoder(all_utterances) + embedded_utterances = self.encoder(all_utterances) # create route array route_names = [route.name for route in routes for _ in route.utterances] @@ -323,7 +323,7 @@ class RouteLayer: if self.categories is not None else route_array ) - self.index.add(embedded_utterance) + self.index.add(embedded_utterances) def _encode(self, text: str) -> Any: """Given some text, encode it.""" -- GitLab