From 669d7a396dfab15816ff162a11be55a047bce8c8 Mon Sep 17 00:00:00 2001
From: Sean Hatfield <seanhatfield5@gmail.com>
Date: Mon, 30 Oct 2023 12:46:38 -0700
Subject: [PATCH] 282 return relevancy score with similarityresponse (#304)

* include score value in similarityResponse for weaviate

* include score value in si
milarityResponse for qdrant

* include score value in si
milarityResponse for pinecone

* include score value in similarityResponse for chroma

* include score value in similarityResponse for lancedb

* distance to similarity

---------

Co-authored-by: timothycarambat <rambat1010@gmail.com>
---
 server/utils/vectorDbProviders/chroma/index.js   | 8 ++++++++
 server/utils/vectorDbProviders/lance/index.js    | 8 ++++++++
 server/utils/vectorDbProviders/pinecone/index.js | 2 ++
 server/utils/vectorDbProviders/qdrant/index.js   | 3 +++
 server/utils/vectorDbProviders/weaviate/index.js | 6 ++++--
 5 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/server/utils/vectorDbProviders/chroma/index.js b/server/utils/vectorDbProviders/chroma/index.js
index aeaab47ae..8e33b35e4 100644
--- a/server/utils/vectorDbProviders/chroma/index.js
+++ b/server/utils/vectorDbProviders/chroma/index.js
@@ -49,6 +49,12 @@ const Chroma = {
     }
     return totalVectors;
   },
+  distanceToSimilarity: function (distance = null) {
+    if (distance === null || typeof distance !== "number") return 0.0;
+    if (distance >= 1.0) return 1;
+    if (distance <= 0) return 0;
+    return 1 - distance;
+  },
   namespaceCount: async function (_namespace = null) {
     const { client } = await this.connect();
     const namespace = await this.namespace(client, _namespace);
@@ -59,6 +65,7 @@ const Chroma = {
     const result = {
       contextTexts: [],
       sourceDocuments: [],
+      scores: [],
     };
 
     const response = await collection.query({
@@ -68,6 +75,7 @@ const Chroma = {
     response.ids[0].forEach((_, i) => {
       result.contextTexts.push(response.documents[0][i]);
       result.sourceDocuments.push(response.metadatas[0][i]);
+      result.scores.push(this.distanceToSimilarity(response.distances[0][i]));
     });
 
     return result;
diff --git a/server/utils/vectorDbProviders/lance/index.js b/server/utils/vectorDbProviders/lance/index.js
index 22d5730d0..9d4460586 100644
--- a/server/utils/vectorDbProviders/lance/index.js
+++ b/server/utils/vectorDbProviders/lance/index.js
@@ -18,6 +18,12 @@ const LanceDb = {
     const client = await lancedb.connect(this.uri);
     return { client };
   },
+  distanceToSimilarity: function (distance = null) {
+    if (distance === null || typeof distance !== "number") return 0.0;
+    if (distance >= 1.0) return 1;
+    if (distance <= 0) return 0;
+    return 1 - distance;
+  },
   heartbeat: async function () {
     await this.connect();
     return { heartbeat: Number(new Date()) };
@@ -54,6 +60,7 @@ const LanceDb = {
     const result = {
       contextTexts: [],
       sourceDocuments: [],
+      scores: [],
     };
 
     const response = await collection
@@ -66,6 +73,7 @@ const LanceDb = {
       const { vector: _, ...rest } = item;
       result.contextTexts.push(rest.text);
       result.sourceDocuments.push(rest);
+      result.scores.push(this.distanceToSimilarity(item.score));
     });
 
     return result;
diff --git a/server/utils/vectorDbProviders/pinecone/index.js b/server/utils/vectorDbProviders/pinecone/index.js
index bcdf07b5f..79b0d40b0 100644
--- a/server/utils/vectorDbProviders/pinecone/index.js
+++ b/server/utils/vectorDbProviders/pinecone/index.js
@@ -41,6 +41,7 @@ const Pinecone = {
     const result = {
       contextTexts: [],
       sourceDocuments: [],
+      scores: [],
     };
     const response = await index.query({
       queryRequest: {
@@ -54,6 +55,7 @@ const Pinecone = {
     response.matches.forEach((match) => {
       result.contextTexts.push(match.metadata.text);
       result.sourceDocuments.push(match);
+      result.scores.push(match.score);
     });
 
     return result;
diff --git a/server/utils/vectorDbProviders/qdrant/index.js b/server/utils/vectorDbProviders/qdrant/index.js
index 376c7d8bc..2ee8e6ed1 100644
--- a/server/utils/vectorDbProviders/qdrant/index.js
+++ b/server/utils/vectorDbProviders/qdrant/index.js
@@ -51,11 +51,13 @@ const QDrant = {
     const result = {
       contextTexts: [],
       sourceDocuments: [],
+      scores: [],
     };
 
     const responses = await client.search(namespace, {
       vector: queryVector,
       limit: 4,
+      with_payload: true,
     });
 
     responses.forEach((response) => {
@@ -64,6 +66,7 @@ const QDrant = {
         ...(response?.payload || {}),
         id: response.id,
       });
+      result.scores.push(response.score);
     });
 
     return result;
diff --git a/server/utils/vectorDbProviders/weaviate/index.js b/server/utils/vectorDbProviders/weaviate/index.js
index cdc40acd7..8543db7d7 100644
--- a/server/utils/vectorDbProviders/weaviate/index.js
+++ b/server/utils/vectorDbProviders/weaviate/index.js
@@ -77,6 +77,7 @@ const Weaviate = {
     const result = {
       contextTexts: [],
       sourceDocuments: [],
+      scores: [],
     };
 
     const weaviateClass = await this.namespace(client, namespace);
@@ -84,7 +85,7 @@ const Weaviate = {
     const queryResponse = await client.graphql
       .get()
       .withClassName(camelCase(namespace))
-      .withFields(`${fields} _additional { id }`)
+      .withFields(`${fields} _additional { id certainty }`)
       .withNearVector({ vector: queryVector })
       .withLimit(4)
       .do();
@@ -94,11 +95,12 @@ const Weaviate = {
       // In Weaviate we have to pluck id from _additional and spread it into the rest
       // of the properties.
       const {
-        _additional: { id },
+        _additional: { id, certainty },
         ...rest
       } = response;
       result.contextTexts.push(rest.text);
       result.sourceDocuments.push({ ...rest, id });
+      result.scores.push(certainty);
     });
 
     return result;
-- 
GitLab