Skip to content
Snippets Groups Projects
Unverified Commit 396a881b authored by Simonas Jakubonis's avatar Simonas Jakubonis Committed by GitHub
Browse files

Merge branch 'main' into simonas/unstructured-splitter

parents ad2a7a8f 2f7d92c6
No related branches found
No related tags found
No related merge requests found
...@@ -105,21 +105,33 @@ class PineconeIndex(BaseIndex): ...@@ -105,21 +105,33 @@ class PineconeIndex(BaseIndex):
self.host = self.client.describe_index(self.index_name)["host"] self.host = self.client.describe_index(self.index_name)["host"]
return index return index
def _batch_upsert(self, batch: List[dict]):
"""Helper method for upserting a single batch of records."""
if self.index is not None:
self.index.upsert(vectors=batch)
else:
raise ValueError("Index is None, could not upsert.")
def add( def add(
self, embeddings: List[List[float]], routes: List[str], utterances: List[str] self,
embeddings: List[List[float]],
routes: List[str],
utterances: List[str],
batch_size: int = 100,
): ):
"""Add vectors to Pinecone in batches."""
if self.index is None: if self.index is None:
self.dimensions = self.dimensions or len(embeddings[0]) self.dimensions = self.dimensions or len(embeddings[0])
# we set force_create to True as we MUST have an index to add data
self.index = self._init_index(force_create=True) self.index = self._init_index(force_create=True)
vectors_to_upsert = []
for vector, route, utterance in zip(embeddings, routes, utterances): vectors_to_upsert = [
record = PineconeRecord(values=vector, route=route, utterance=utterance) PineconeRecord(values=vector, route=route, utterance=utterance).to_dict()
vectors_to_upsert.append(record.to_dict()) for vector, route, utterance in zip(embeddings, routes, utterances)
if self.index is not None: ]
self.index.upsert(vectors=vectors_to_upsert)
else: for i in range(0, len(vectors_to_upsert), batch_size):
raise ValueError("Index is None could not upsert.") batch = vectors_to_upsert[i : i + batch_size]
self._batch_upsert(batch)
def _get_route_ids(self, route_name: str): def _get_route_ids(self, route_name: str):
clean_route = clean_route_name(route_name) clean_route = clean_route_name(route_name)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment