From 19f3c857d5cb1675248e523b612f18412542fe9c Mon Sep 17 00:00:00 2001
From: Michael Tutty <mtutty@gforgegroup.com>
Date: Sat, 11 Nov 2023 18:13:41 +0000
Subject: [PATCH] Add comment blocks and support for collection filtering

---
 .../src/storage/vectorStore/PGVectorStore.ts  | 71 +++++++++++++++++--
 1 file changed, 66 insertions(+), 5 deletions(-)

diff --git a/packages/core/src/storage/vectorStore/PGVectorStore.ts b/packages/core/src/storage/vectorStore/PGVectorStore.ts
index 383d3a023..59cf8d461 100644
--- a/packages/core/src/storage/vectorStore/PGVectorStore.ts
+++ b/packages/core/src/storage/vectorStore/PGVectorStore.ts
@@ -13,6 +13,9 @@ import { GenericFileSystem } from '../FileSystem';
 export const PGVECTOR_SCHEMA = 'public';
 export const PGVECTOR_TABLE = 'llamaindex_embedding';
 
+/**
+ * Provides support for writing and querying vector data in Postgres.  
+ */
 export class PGVectorStore implements VectorStore {
   storesText: boolean = true;
 
@@ -40,15 +43,29 @@ export class PGVectorStore implements VectorStore {
 
   constructor() {}
 
+  /**
+   * Setter for the collection property.
+   * Using a collection allows for simple segregation of vector data,
+   * e.g. by user, source, or access-level.
+   * Leave/set blank to ignore the collection value when querying.
+   * @param coll Name for the collection.
+   */
   setCollection(coll: string) {
     this.collection = coll;
   }
 
+  /**
+   * Getter for the collection property.
+   * Using a collection allows for simple segregation of vector data,
+   * e.g. by user, source, or access-level.
+   * Leave/set blank to ignore the collection value when querying.
+   * @returns The currently-set collection value.  Default is empty string.
+   */
   getCollection(): string {
     return this.collection;
   }
 
-  async getDb(): Promise<pg.Client> {
+  private async getDb(): Promise<pg.Client> {
     if (! this.db) {
 
       try {
@@ -76,7 +93,7 @@ export class PGVectorStore implements VectorStore {
     return Promise.resolve(this.db);
   }
 
-  async checkSchema(db: pg.Client) {
+  private async checkSchema(db: pg.Client) {
     await db.query(`CREATE SCHEMA IF NOT EXISTS ${PGVECTOR_SCHEMA}`);
     
     const tbl = `CREATE TABLE IF NOT EXISTS ${PGVECTOR_SCHEMA}.${PGVECTOR_TABLE}(
@@ -99,9 +116,21 @@ export class PGVectorStore implements VectorStore {
 
   // isEmbeddingQuery?: boolean | undefined;
 
+  /**
+   * Connects to the database specified in environment vars.
+   * This method also checks and creates the vector extension,
+   * the destination table and indexes if not found.
+   * @returns A connection to the database, or the error encountered while connecting/setting up.
+   */
   client() {
     return this.getDb();
   }
+
+  /**
+   * Delete all vector records for the specified collection.
+   * NOTE: Uses the collection property controlled by setCollection/getCollection.
+   * @returns The result of the delete query.
+   */
   async clearCollection() {
     const sql: string = `DELETE FROM ${PGVECTOR_SCHEMA}.${PGVECTOR_TABLE} 
       WHERE collection = $1`;
@@ -112,6 +141,12 @@ export class PGVectorStore implements VectorStore {
     return ret;
   }
 
+  /**
+   * Adds vector record(s) to the table.
+   * NOTE: Uses the collection property controlled by setCollection/getCollection.
+   * @param embeddingResults The Nodes to be inserted, optionally including metadata tuples.
+   * @returns A list of zero or more id values for the created records.
+   */
   async add(embeddingResults: BaseNode<Metadata>[]): Promise<string[]> {
 
     const sql: string = `INSERT INTO ${PGVECTOR_SCHEMA}.${PGVECTOR_TABLE} 
@@ -153,15 +188,30 @@ export class PGVectorStore implements VectorStore {
     return Promise.resolve(ret);
   }
 
+  /**
+   * Deletes a single record from the database by id.
+   * NOTE: Uses the collection property controlled by setCollection/getCollection.
+   * @param refDocId Unique identifier for the record to delete.
+   * @param deleteKwargs Required by VectorStore interface.  Currently ignored.
+   * @returns Promise that resolves if the delete query did not throw an error.
+   */
   async delete(refDocId: string, deleteKwargs?: any): Promise<void> {
+    const collectionCriteria = this.collection.length ? "AND collection = $2": "";
     const sql: string = `DELETE FROM ${PGVECTOR_SCHEMA}.${PGVECTOR_TABLE} 
-      WHERE id = $1`;
+      WHERE id = $1 ${ collectionCriteria }`;
 
     const db = await this.getDb() as pg.Client;
-    await db.query(sql, [refDocId]);
+    const params = this.collection.length ? [refDocId, this.collection] : [refDocId];
+    await db.query(sql, params);
     return Promise.resolve();
   }
 
+  /**
+   * Query the vector store for the closest matching data to the query embeddings
+   * @param query The VectorStoreQuery to be used
+   * @param options Required by VectorStore interface.  Currently ignored.
+   * @returns Zero or more Document instances with data from the vector store.
+   */
   async query(query: VectorStoreQuery, options?: any): Promise<VectorStoreQueryResult> {
     // TODO QUERY TYPES:
     //    Distance:       SELECT embedding <-> $1 AS distance FROM items;
@@ -170,13 +220,17 @@ export class PGVectorStore implements VectorStore {
 
     const embedding = '[' + query.queryEmbedding?.join(',') + ']';
     const max = query.similarityTopK ?? 2;
+    const where = this.collection.length ? "WHERE collection = $2": "";
     // TODO Add collection filter if set
     const sql = `SELECT * FROM ${PGVECTOR_SCHEMA}.${PGVECTOR_TABLE}
+      ${ where }
       ORDER BY embeddings <-> $1 LIMIT ${ max }
     `;
 
     const db = await this.getDb() as pg.Client;
-    const results = await db.query(sql, [embedding]);
+    const params = this.collection.length ? 
+      [embedding, this.collection] : [ embedding]
+    const results = await db.query(sql, params);
 
     const nodes = results.rows.map(
       (row) => {
@@ -201,6 +255,13 @@ export class PGVectorStore implements VectorStore {
 
     return Promise.resolve(ret);
   }
+
+  /**
+   * Required by VectorStore interface.  Currently ignored.
+   * @param persistPath 
+   * @param fs 
+   * @returns Resolved Promise.
+   */
   persist(persistPath: string, fs?: GenericFileSystem | undefined): Promise<void> {
     return Promise.resolve();
   }
-- 
GitLab