diff --git a/packages/core/package.json b/packages/core/package.json index a08d79817a54b088a71b8bc248b91a6c4352ff4c..be8e1a8d2f176a313543a23615691ab7ea6e530f 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -13,6 +13,7 @@ }, "devDependencies": { "@types/lodash": "^4.14.195", + "@types/node": "^20.3.1", "node-stdlib-browser": "^1.2.0" } } diff --git a/packages/core/src/dataStructs.ts b/packages/core/src/dataStructs.ts new file mode 100644 index 0000000000000000000000000000000000000000..f561fbe4fcc7380afde6cc0db2560b834c5611d8 --- /dev/null +++ b/packages/core/src/dataStructs.ts @@ -0,0 +1,25 @@ +export enum IndexStructType { + SIMPLE_DICT = "simple_dict" +} + +export interface IndexStruct { + readonly indexId: string; + readonly summary?: string; + readonly type: IndexStructType; +} + +export function indexStructToJson(indexStruct: IndexStruct): {[key: string]: any} { + return { + indexId: indexStruct.indexId, + summary: indexStruct.summary, + type: indexStruct.type + }; +} + +export function jsonToIndexStruct(json: any): IndexStruct { + return { + indexId: json.indexId, + summary: json.summary, + type: json.type + }; +} diff --git a/packages/core/src/storage/FileSystem.ts b/packages/core/src/storage/FileSystem.ts index e984d44fd6eac69bc0fb26e53d92dd83742b3d65..d08684243f8986cea7e90cb9de27b89bf5a65aba 100644 --- a/packages/core/src/storage/FileSystem.ts +++ b/packages/core/src/storage/FileSystem.ts @@ -1,3 +1,4 @@ +import _ from "lodash"; /** * A filesystem interface that is meant to be compatible with * the 'fs' module from Node.js. @@ -16,17 +17,17 @@ export interface GenericFileSystem { * A filesystem implementation that stores files in memory. */ export class InMemoryFileSystem implements GenericFileSystem { - private files: {[filepath: string]: string} = {}; + private files: {[filepath: string]: any} = {}; async writeFile(path: string, content: string, options?: any): Promise<void> { - this.files[path] = content; + this.files[path] = _.cloneDeep(content); } async readFile(path: string, options?: any): Promise<string> { if (!(path in this.files)) { throw new Error(`File ${path} does not exist`); } - return this.files[path]; + return _.cloneDeep(this.files[path]); } async exists(path: string): Promise<boolean> { @@ -34,6 +35,14 @@ export class InMemoryFileSystem implements GenericFileSystem { } async mkdir(path: string, options?: any): Promise<void> { - // noop + this.files[path] = _.get(this.files, path, null); } } + +let fs = null; +try { + fs = require("fs"); +} catch (e) { + fs = new InMemoryFileSystem(); +} +export const DEFAULT_FS = fs as GenericFileSystem; diff --git a/packages/core/src/storage/StorageContext.ts b/packages/core/src/storage/StorageContext.ts index cb54cd2051b40db8d59de330d2df845f3b9dd15e..c3e88ccbd1444bcf18e6fef4298ff068f1d731d0 100644 --- a/packages/core/src/storage/StorageContext.ts +++ b/packages/core/src/storage/StorageContext.ts @@ -1,11 +1,40 @@ import { BaseDocumentStore } from "./docStore/types"; import { BaseIndexStore } from "./indexStore/types"; import { VectorStore } from "./vectorStore/types"; -import { GraphStore } from "./graphStore/types"; +import { SimpleDocumentStore } from "./docStore/SimpleDocumentStore"; +import { SimpleIndexStore } from "./indexStore/SimpleIndexStore"; +import { SimpleVectorStore } from "./vectorStore/SimpleVectorStore"; +import { GenericFileSystem } from "./FileSystem"; +import { DEFAULT_PERSIST_DIR, DEFAULT_FS } from "./constants"; export interface StorageContext { docStore?: BaseDocumentStore; indexStore?: BaseIndexStore; vectorStore?: VectorStore; - graphStore?: GraphStore; +} + +type BuilderParams = { + docStore?: BaseDocumentStore, + indexStore?: BaseIndexStore, + vectorStore?: VectorStore, + persistDir?: string, + fs?: GenericFileSystem, +}; + +export function storageContextFromDefaults({ + docStore, indexStore, vectorStore, persistDir, fs +}: BuilderParams): StorageContext { + persistDir = persistDir || DEFAULT_PERSIST_DIR; + + fs = fs || DEFAULT_FS; + + docStore = docStore || SimpleDocumentStore.fromPersistDir(persistDir, fs=fs); + indexStore = indexStore || SimpleIndexStore.fromPersistDir(persistDir, fs=fs); + vectorStore = vectorStore || SimpleVectorStore.fromPersistDir(persistDir, fs=fs); + + return { + docStore, + indexStore, + vectorStore, + }; } diff --git a/packages/core/src/storage/constants.ts b/packages/core/src/storage/constants.ts index 5e44bf926e21d82816e70cedb28d7e9f45f9cf4d..e00fa7bc128296c53ce2b9e22b7129f3234fdc05 100644 --- a/packages/core/src/storage/constants.ts +++ b/packages/core/src/storage/constants.ts @@ -1,5 +1,3 @@ -import fs from "fs"; - export const DEFAULT_COLLECTION = "data"; export const DEFAULT_PERSIST_DIR = "./storage"; export const DEFAULT_INDEX_STORE_PERSIST_FILENAME = "index_store.json"; @@ -7,6 +5,6 @@ export const DEFAULT_DOC_STORE_PERSIST_FILENAME = "docstore.json"; export const DEFAULT_VECTOR_STORE_PERSIST_FILENAME = "vector_store.json"; export const DEFAULT_GRAPH_STORE_PERSIST_FILENAME = "graph_store.json"; export const DEFAULT_NAMESPACE = "docstore"; -export const DEFAULT_FS = fs; export const TYPE_KEY = "__type__"; export const DATA_KEY = "__data__"; +export { DEFAULT_FS } from "./FileSystem"; \ No newline at end of file diff --git a/packages/core/src/storage/docStore/KeyValDocumentStore.ts b/packages/core/src/storage/docStore/KVDocumentStore.ts similarity index 100% rename from packages/core/src/storage/docStore/KeyValDocumentStore.ts rename to packages/core/src/storage/docStore/KVDocumentStore.ts diff --git a/packages/core/src/storage/docStore/SimpleDocumentStore.ts b/packages/core/src/storage/docStore/SimpleDocumentStore.ts index ac11061c800123320a140677f9f4f4fb426f6863..3d664b4e817b46cb4ec7c13e3339e63f122290d1 100644 --- a/packages/core/src/storage/docStore/SimpleDocumentStore.ts +++ b/packages/core/src/storage/docStore/SimpleDocumentStore.ts @@ -1,49 +1,54 @@ -import * as fs from 'fs'; import * as path from 'path'; import _ from 'lodash'; -import { KVDocumentStore } from './keyvalDocStore'; -import { SimpleKVStore } from '../kvStore/simpleKVStore'; +import { KVDocumentStore } from './KVDocumentStore'; +import { SimpleKVStore } from '../kvStore/SimpleKVStore'; import { BaseInMemoryKVStore } from '../kvStore/types'; +import { GenericFileSystem } from '../FileSystem'; import { - DEFAULT_PERSIST_DIR, - DEFAULT_DOC_STORE_PERSIST_FILENAME -} from './constants'; + DEFAULT_PERSIST_DIR, + DEFAULT_NAMESPACE, + DEFAULT_DOC_STORE_PERSIST_FILENAME, + DEFAULT_FS +} from '../constants'; type SaveDict = {[key: string]: any}; // Replace `any` with the appropriate type if possible. -class SimpleDocumentStore extends KVDocumentStore { +export class SimpleDocumentStore extends KVDocumentStore { private kvStore: SimpleKVStore; - private namespace?: string; - constructor({simpleKVStore?: SimpleKVStore , namespace?: string]) { - simpleKVStore = simpleKVStore || new SimpleKVStore(); - super(simpleKVStore, namespace); + constructor(kvStore?: SimpleKVStore , namespace?: string) { + kvStore = kvStore || new SimpleKVStore(); + namespace = namespace || DEFAULT_NAMESPACE; + super(kvStore, namespace); + this.kvStore = kvStore; } static fromPersistDir( persistDir: string = DEFAULT_PERSIST_DIR, namespace?: string, - fsModule?: typeof fs + fsModule?: GenericFileSystem ): SimpleDocumentStore { const persistPath = path.join(persistDir, DEFAULT_DOC_STORE_PERSIST_FILENAME); - return this.fromPersistPath(persistPath, namespace, fsModule); + return SimpleDocumentStore.fromPersistPath(persistPath, namespace, fsModule); } - static fromPersistPath( + static async fromPersistPath( persistPath: string, namespace?: string, - fsModule?: typeof fs - ): SimpleDocumentStore { - const simpleKVStore = SimpleKVStore.fromPersistPath(persistPath, fsModule); + fs?: GenericFileSystem + ): Promise<SimpleDocumentStore> { + fs = fs || DEFAULT_FS; + const simpleKVStore = await SimpleKVStore.fromPersistPath(persistPath, fs); return new SimpleDocumentStore(simpleKVStore, namespace); } - persist( + async persist( persistPath: string = path.join(DEFAULT_PERSIST_DIR, DEFAULT_DOC_STORE_PERSIST_FILENAME), - fsModule?: typeof fs - ): void { + fs?: GenericFileSystem + ): Promise<void> { + fs = fs || DEFAULT_FS; if (_.isObject(this.kvStore) && this.kvStore instanceof BaseInMemoryKVStore) { - this.kvStore.persist(persistPath, fsModule); + await this.kvStore.persist(persistPath, fs); } } diff --git a/packages/core/src/storage/graphStore/SimpleGraphStore.ts b/packages/core/src/storage/graphStore/SimpleGraphStore.ts deleted file mode 100644 index f1897ecc2aafaa8242f8d4c326283a8b12c172c2..0000000000000000000000000000000000000000 --- a/packages/core/src/storage/graphStore/SimpleGraphStore.ts +++ /dev/null @@ -1,145 +0,0 @@ -import * as path from 'path'; -import _ from 'lodash'; -import { - DEFAULT_PERSIST_DIR, - DEFAULT_GRAPH_STORE_PERSIST_FILENAME, - DEFAULT_FS -} from '../constants'; -import { GenericFileSystem } from '../FileSystem'; -import { GraphStore, GraphStoreData } from './types'; - -class SimpleGraphStoreData { - graphDict: GraphStoreData; - - constructor(graphDict?: GraphStoreData) { - this.graphDict = graphDict || {}; - } - - /** - * Get subjects' rel map in max depth. - */ - getRelMap(subjs?: string[], depth: number = 2): GraphStoreData { - if (!subjs) { - subjs = _.keys(this.graphDict); - } - let relMap: GraphStoreData = {}; - for (let subj of subjs) { - relMap[subj] = this._getRelMap(subj, depth); - } - return relMap; - } - - /** - * Get one subect's rel map in max depth. - */ - _getRelMap(subj: string, depth: number = 2): string[][] { - if (depth === 0) { - return []; - } - let relMap: string[][] = []; - if (subj in this.graphDict) { - for (let [rel, obj] of this.graphDict[subj] || []) { - relMap.push([rel, obj]); - relMap = relMap.concat(this._getRelMap(obj, depth - 1)); - } - } - return relMap; - } -} - -class SimpleGraphStore implements GraphStore { - private data: SimpleGraphStoreData; - private fs: GenericFileSystem; - - constructor( - data?: SimpleGraphStoreData, - fs: GenericFileSystem = DEFAULT_FS - ) { - this.data = data || new SimpleGraphStoreData(); - this.fs = fs; - } - - static async fromPersistDir( - persistDir: string = DEFAULT_PERSIST_DIR, - fs: GenericFileSystem = DEFAULT_FS - ): Promise<SimpleGraphStore> { - const persistPath = path.join(persistDir, DEFAULT_GRAPH_STORE_PERSIST_FILENAME); - return await this.fromPersistPath(persistPath, fs); - } - - get client(): null { - return null; - } - - get(subj: string): string[][] { - return _.get(this.data.graphDict, subj, []); - } - - getRelMap(subjs?: string[], depth: number = 2): GraphStoreData { - return this.data.getRelMap(subjs, depth); - } - - upsertTriplet(subj: string, rel: string, obj: string): void { - if (!(subj in this.data.graphDict)) { - this.data.graphDict[subj] = []; - } - const existingTriplet = _.find(this.data.graphDict[subj], (tuple) => { - return tuple[0] === rel && tuple[1] == obj; - }); - if (_.isNil(existingTriplet)) { - this.data.graphDict[subj].push([rel, obj]); - } - } - - delete(subj: string, rel: string, obj: string): void { - if (subj in this.data.graphDict) { - _.remove(this.data.graphDict[subj], (tuple) => { - return tuple[0] === rel && tuple[1] == obj; - }); - if (this.data.graphDict[subj].length === 0) { - delete this.data.graphDict[subj]; - } - } - } - - async persist( - persistPath: string = path.join(DEFAULT_PERSIST_DIR, DEFAULT_GRAPH_STORE_PERSIST_FILENAME), - fs?: GenericFileSystem - ): Promise<void> { - fs = fs || this.fs; - const dirpath = path.dirname(persistPath); - if (!(await fs.exists(dirpath))) { - await fs.mkdir(dirpath, { recursive: true }); - } - - await fs.writeFile(persistPath, JSON.stringify(this.data.graphDict)); - } - - static async fromPersistPath( - persistPath: string, - fs: GenericFileSystem = DEFAULT_FS - ): Promise<SimpleGraphStore> { - if (!(await fs.exists(persistPath))) { - console.warn( - `No existing SimpleGraphStore found at ${persistPath}. ` + - "Initializing a new graph store from scratch." - ); - return new SimpleGraphStore(); - } - - console.debug(`Loading SimpleGraphStore from ${persistPath}.`); - const fileContent = await fs.readFile(persistPath, { encoding: 'utf-8' }); - const dataDict = JSON.parse(fileContent) as GraphStoreData; - const data = new SimpleGraphStoreData(dataDict); - return new SimpleGraphStore(data); - } - - static fromDict(saveDict: GraphStoreData): SimpleGraphStore { - const data = new SimpleGraphStoreData(saveDict); - return new SimpleGraphStore(data); - } - - toDict(): GraphStoreData { - return this.data.graphDict; - } -} \ No newline at end of file diff --git a/packages/core/src/storage/graphStore/types.ts b/packages/core/src/storage/graphStore/types.ts deleted file mode 100644 index 1d78a0b900df935d5bbed7d1610a6754e877e222..0000000000000000000000000000000000000000 --- a/packages/core/src/storage/graphStore/types.ts +++ /dev/null @@ -1,12 +0,0 @@ -import { GenericFileSystem } from "../FileSystem"; - -export type GraphStoreData = {[key: string]: string[][]}; - -export interface GraphStore { - client: any; // Replace with actual type depending on your usage - get(subj: string): string[][]; - getRelMap(subjs?: string[], depth?: number): GraphStoreData; - upsertTriplet(subj: string, rel: string, obj: string): void; - delete(subj: string, rel: string, obj: string): void; - persist(persistPath: string, fs?: GenericFileSystem): void; -} diff --git a/packages/core/src/storage/indexStore/KVIndexStore.ts b/packages/core/src/storage/indexStore/KVIndexStore.ts new file mode 100644 index 0000000000000000000000000000000000000000..8e31f7760299e11037dcfa3061254a792c6ab9c8 --- /dev/null +++ b/packages/core/src/storage/indexStore/KVIndexStore.ts @@ -0,0 +1,47 @@ +import { BaseKVStore } from '../kvStore/types'; +import { IndexStruct, indexStructToJson, jsonToIndexStruct } from '../../dataStructs'; +import _ from 'lodash'; +import { DEFAULT_NAMESPACE } from './constants'; +import { BaseIndexStore } from './types'; + +export class KVIndexStore extends BaseIndexStore { + private _kvStore: BaseKVStore; + private _collection: string; + + constructor(kvStore: BaseKVStore, namespace: string = DEFAULT_NAMESPACE) { + super(); + this._kvStore = kvStore; + this._collection = `${namespace}/data`; + } + + async addIndexStruct(indexStruct: IndexStruct): Promise<void> { + let key = indexStruct.indexId; + let data = indexStructToJson(indexStruct); + await this._kvStore.put(key, data, this._collection); + } + + async deleteIndexStruct(key: string): Promise<void> { + await this._kvStore.delete(key, this._collection); + } + + async getIndexStruct(structId?: string): Promise<IndexStruct | undefined> { + if (_.isNil(structId)) { + let structs = await this.getIndexStructs(); + if (structs.length !== 1) { + throw new Error('More than one index struct found'); + } + return structs[0]; + } else { + let json = await this._kvStore.get(structId, this._collection); + if (_.isNil(json)) { + return; + } + return jsonToIndexStruct(json); + } + } + + async getIndexStructs(): Promise<IndexStruct[]> { + let jsons = await this._kvStore.getAll(this._collection) as {[key: string]: any}; + return _.values(jsons).map(json => jsonToIndexStruct(json)); + } +} diff --git a/packages/core/src/storage/indexStore/SimpleIndexStore.ts b/packages/core/src/storage/indexStore/SimpleIndexStore.ts index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..82a006913d4f28adefd332827304682f067121c8 100644 --- a/packages/core/src/storage/indexStore/SimpleIndexStore.ts +++ b/packages/core/src/storage/indexStore/SimpleIndexStore.ts @@ -0,0 +1,43 @@ +import * as path from 'path'; +import * as _ from 'lodash'; +import { BaseInMemoryKVStore } from "../kvStore/types"; +import { SimpleKVStore, DataType } from "../kvStore/SimpleKVStore"; +import { KVIndexStore } from "./KVIndexStore"; +import { DEFAULT_PERSIST_DIR, DEFAULT_INDEX_STORE_PERSIST_FILENAME, DEFAULT_FS } from '../constants'; +import { GenericFileSystem } from '../FileSystem'; + +export class SimpleIndexStore extends KVIndexStore { + private kvStore: BaseInMemoryKVStore; + + constructor(kvStore?: BaseInMemoryKVStore) { + kvStore = kvStore || new SimpleKVStore(); + super(kvStore); + this.kvStore = kvStore; + } + + static async fromPersistDir(persistDir: string = DEFAULT_PERSIST_DIR, fs: GenericFileSystem = DEFAULT_FS): Promise<SimpleIndexStore> {; + const persistPath = path.join(persistDir, DEFAULT_INDEX_STORE_PERSIST_FILENAME); + return this.fromPersistPath(persistPath, fs); + } + + static async fromPersistPath(persistPath: string, fs: GenericFileSystem = DEFAULT_FS): Promise<SimpleIndexStore> { + let simpleKVStore = await SimpleKVStore.fromPersistPath(persistPath, fs); + return new SimpleIndexStore(simpleKVStore); + } + + async persist(persistPath: string = DEFAULT_PERSIST_DIR, fs: GenericFileSystem = DEFAULT_FS): Promise<void> { + await this.kvStore.persist(persistPath, fs); + } + + static fromDict(saveDict: DataType): SimpleIndexStore { + let simpleKVStore = SimpleKVStore.fromDict(saveDict); + return new SimpleIndexStore(simpleKVStore); + } + + toDict(): Record<string, unknown> { + if (!(this.kvStore instanceof SimpleKVStore)) { + throw new Error("KVStore is not a SimpleKVStore"); + } + return this.kvStore.toDict(); + } +} diff --git a/packages/core/src/storage/indexStore/types.ts b/packages/core/src/storage/indexStore/types.ts index 2747baf505e51e987c61d0ede62ab199bdf7ec69..b2809863ba5470ab13591bc69d4eaa2a9030e503 100644 --- a/packages/core/src/storage/indexStore/types.ts +++ b/packages/core/src/storage/indexStore/types.ts @@ -5,15 +5,15 @@ import { DEFAULT_PERSIST_DIR, DEFAULT_INDEX_STORE_PERSIST_FILENAME } from "../co const defaultPersistPath = `${DEFAULT_PERSIST_DIR}/${DEFAULT_INDEX_STORE_PERSIST_FILENAME}`; export abstract class BaseIndexStore { - abstract getIndexStructs(): IndexStruct[]; + abstract getIndexStructs(): Promise<IndexStruct[]>; - abstract addIndexStruct(indexStruct: IndexStruct): void; + abstract addIndexStruct(indexStruct: IndexStruct): Promise<void>; - abstract deleteIndexStruct(key: string): void; + abstract deleteIndexStruct(key: string): Promise<void>; - abstract getIndexStruct(structId?: string): IndexStruct | null; + abstract getIndexStruct(structId?: string): Promise<IndexStruct | undefined>; - persist(persistPath: string = defaultPersistPath, fs?: GenericFileSystem): void { + async persist(persistPath: string = defaultPersistPath, fs?: GenericFileSystem): Promise<void> { // Persist the index store to disk. } } diff --git a/packages/core/src/storage/kvStore/SimpleKVStore.ts b/packages/core/src/storage/kvStore/SimpleKVStore.ts index 4c5125822b4461ded26852df6f1ef35ef45bd5c7..3cc2f4df4e3d266c1b0fb7bd7bc76026d131d904 100644 --- a/packages/core/src/storage/kvStore/SimpleKVStore.ts +++ b/packages/core/src/storage/kvStore/SimpleKVStore.ts @@ -1,10 +1,10 @@ import * as path from 'path'; import { GenericFileSystem } from '../FileSystem'; -import { DEFAULT_COLLECTION } from '../constants'; +import { DEFAULT_COLLECTION, DEFAULT_FS } from '../constants'; import * as _ from "lodash"; import { BaseKVStore } from "./types"; -interface DataType { +export interface DataType { [key: string]: { [key: string]: any }; } @@ -17,14 +17,14 @@ export class SimpleKVStore extends BaseKVStore { this.data = data || {}; } - put(key: string, val: any, collection: string = DEFAULT_COLLECTION): void { + async put(key: string, val: any, collection: string = DEFAULT_COLLECTION): Promise<void> { if (!(collection in this.data)) { this.data[collection] = {}; } this.data[collection][key] = _.clone(val); // Creating a shallow copy of the object } - get(key: string, collection: string = DEFAULT_COLLECTION): any { + async get(key: string, collection: string = DEFAULT_COLLECTION): Promise<any> { let collectionData = this.data[collection]; if (_.isNil(collectionData)) { return null; @@ -35,11 +35,11 @@ export class SimpleKVStore extends BaseKVStore { return _.clone(collectionData[key]); // Creating a shallow copy of the object } - getAll(collection: string = DEFAULT_COLLECTION): DataType { + async getAll(collection: string = DEFAULT_COLLECTION): Promise<DataType> { return _.clone(this.data[collection]); // Creating a shallow copy of the object } - delete(key: string, collection: string = DEFAULT_COLLECTION): boolean { + async delete(key: string, collection: string = DEFAULT_COLLECTION): Promise<boolean> { if (key in this.data[collection]) { delete this.data[collection][key]; return true; @@ -47,17 +47,19 @@ export class SimpleKVStore extends BaseKVStore { return false; } - async persist(persistPath: string, fsSystem: GenericFileSystem): Promise<void> { + async persist(persistPath: string, fs?: GenericFileSystem): Promise<void> { + fs = fs || DEFAULT_FS; // TODO: decide on a way to polyfill path let dirPath = path.dirname(persistPath); - if (!(await fsSystem.exists(dirPath))) { - await fsSystem.mkdir(dirPath); + if (!(await fs.exists(dirPath))) { + await fs.mkdir(dirPath); } - await fsSystem.writeFile(persistPath, JSON.stringify(this.data)); + await fs.writeFile(persistPath, JSON.stringify(this.data)); } - static async fromPersistPath(persistPath: string, fsSystem: GenericFileSystem ): Promise<SimpleKVStore> { - let data = JSON.parse(await fsSystem.readFile(persistPath, { encoding: 'utf-8' })); + static async fromPersistPath(persistPath: string, fs?: GenericFileSystem ): Promise<SimpleKVStore> { + fs = fs || DEFAULT_FS; + let data = JSON.parse(await fs.readFile(persistPath, { encoding: 'utf-8' })); return new SimpleKVStore(data); } diff --git a/packages/core/src/storage/vectorStore/SimpleVectorStore.ts b/packages/core/src/storage/vectorStore/SimpleVectorStore.ts new file mode 100644 index 0000000000000000000000000000000000000000..20d4a15faa97cce2a99652532a8f6607302bfdd1 --- /dev/null +++ b/packages/core/src/storage/vectorStore/SimpleVectorStore.ts @@ -0,0 +1,133 @@ +import _ from "lodash"; +import { GenericFileSystem } from "../FileSystem"; +import { NodeWithEmbedding, VectorStore, VectorStoreQuery, VectorStoreQueryMode, VectorStoreQueryResult } from "./types"; +import { getTopKEmbeddings, getTopKEmbeddingsLearner, getTopKMMREmbeddings } from './your-utils-file'; +import { DEFAULT_PERSIST_DIR, DEFAULT_FS } from '../constants'; + +const LEARNER_MODES = new Set<VectorStoreQueryMode>([ + VectorStoreQueryMode.SVM, + VectorStoreQueryMode.LINEAR_REGRESSION, + VectorStoreQueryMode.LOGISTIC_REGRESSION +]); + +const MMR_MODE = VectorStoreQueryMode.MMR; + +class SimpleVectorStoreData { + embeddingDict: {[key: string]: number[]} = {}; + textIdToRefDocId: {[key: string]: string} = {}; +} + +export class SimpleVectorStore implements VectorStore { + private data: SimpleVectorStoreData = new SimpleVectorStoreData(); + private fs: GenericFileSystem = DEFAULT_FS; + + constructor(data?: SimpleVectorStoreData, fs?: GenericFileSystem) { + this.data = data || new SimpleVectorStoreData(); + this.fs = fs || DEFAULT_FS; + } + + static async fromPersistDir(persistDir: string = DEFAULT_PERSIST_DIR, fs: GenericFileSystem = DEFAULT_FS): Promise<SimpleVectorStore> { + let persistPath = `${persistDir}/vector_store.json`; + return await SimpleVectorStore.fromPersistPath(persistPath, fs); + } + + get client(): any { + return null; + } + + get(textId: string): number[] { + return this.data.embeddingDict[textId]; + } + + add(embeddingResults: NodeWithEmbedding[]): string[] { + for (let result of embeddingResults) { + this.data.embeddingDict[result.id()] = result.embedding; + this.data.textIdToRefDocId[result.id()] = result.refDocId(); + } + return embeddingResults.map(result => result.id()); + } + + delete(refDocId: string): void { + let textIdsToDelete = Object.keys(this.data.textIdToRefDocId).filter(textId => this.data.textIdToRefDocId[textId] === refDocId); + for (let textId of textIdsToDelete) { + delete this.data.embeddingDict[textId]; + delete this.data.textIdToRefDocId[textId]; + } + } + + query(query: VectorStoreQuery): VectorStoreQueryResult { + if (!_.isNil(query.filters)) { + throw new Error("Metadata filters not implemented for SimpleVectorStore yet."); + } + + let items = Object.entries(this.data.embeddingDict); + + let nodeIds: string[], embeddings: number[][]; + if (query.docIds) { + let availableIds = new Set(query.docIds); + const queriedItems = items.filter(item => availableIds.has(item[0])); + nodeIds = queriedItems.map(item => item[0]); + embeddings = queriedItems.map(item => item[1]); + } else { + // No docIds specified, so use all available items + nodeIds = items.map(item => item[0]); + embeddings = items.map(item => item[1]); + } + + let queryEmbedding = query.queryEmbedding!; + + let topSimilarities: number[], topIds: string[]; + if (LEARNER_MODES.has(query.mode)) { + [topSimilarities, topIds] = getTopKEmbeddingsLearner(queryEmbedding, embeddings, query.similarityTopK, nodeIds); + } else if (query.mode === MMR_MODE) { + let mmrThreshold = query.mmrThreshold; + [topSimilarities, topIds] = getTopKMMREmbeddings(queryEmbedding, embeddings, query.similarityTopK, nodeIds, mmrThreshold); + } else if (query.mode === VectorStoreQueryMode.DEFAULT) { + [topSimilarities, topIds] = getTopKEmbeddings(queryEmbedding, embeddings, query.similarityTopK, nodeIds); + } else { + throw new Error(`Invalid query mode: ${query.mode}`); + } + + return { + similarities: topSimilarities, + ids: topIds + } + } + + async persist(persistPath: string = `${DEFAULT_PERSIST_DIR}/vector_store.json`, fs?: GenericFileSystem): Promise<void> { + fs = fs || this.fs; + if (!(await fs.exists(persistPath))) { + await fs.mkdir(persistPath); + } + + await fs.writeFile(persistPath, JSON.stringify(this.data)); + } + + static async fromPersistPath(persistPath: string, fs?: GenericFileSystem): Promise<SimpleVectorStore> { + fs = fs || DEFAULT_FS; + if (!(await fs.exists(persistPath))) { + throw new Error(`No existing SimpleVectorStore found at ${persistPath}, skipping load.`); + } + + console.debug(`Loading SimpleVectorStore from ${persistPath}.`); + let dataDict = JSON.parse(await fs.readFile(persistPath, 'utf-8')); + let data = new SimpleVectorStoreData(); + data.embeddingDict = dataDict.embeddingDict; + data.textIdToRefDocId = dataDict.textIdToRefDocId; + return new SimpleVectorStore(data); + } + + static fromDict(saveDict: SimpleVectorStoreData): SimpleVectorStore { + let data = new SimpleVectorStoreData(); + data.embeddingDict = saveDict.embeddingDict; + data.textIdToRefDocId = saveDict.textIdToRefDocId; + return new SimpleVectorStore(data); + } + + toDict(): SimpleVectorStoreData { + return { + embeddingDict: this.data.embeddingDict, + textIdToRefDocId: this.data.textIdToRefDocId + }; + } +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index b9af11bdda82dc2e8aa961d6ca0b5e8cfd290b82..b34e20b6b74adea7ec53001992f5c179456684fd 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -132,6 +132,9 @@ importers: '@types/lodash': specifier: ^4.14.195 version: 4.14.195 + '@types/node': + specifier: ^20.3.1 + version: 20.3.1 node-stdlib-browser: specifier: ^1.2.0 version: 1.2.0 @@ -1110,6 +1113,10 @@ packages: resolution: {integrity: sha512-WZ/6I1GL0DNAo4bb01lGGKTHH8BHJyECepf11kWONg3OJoHq2WYOm16Es1V54Er7NTUXsbDCpKRKdmBc4X2xhA==} dev: true + /@types/node@20.3.1: + resolution: {integrity: sha512-EhcH/wvidPy1WeML3TtYFGR83UzjxeWRen9V402T8aUGYsCHOmfoisV3ZSg03gAFIbLq8TnWOJ0f4cALtnSEUg==} + dev: true + /@types/prettier@2.7.3: resolution: {integrity: sha512-+68kP9yzs4LMp7VNh8gdzMSPZFL44MLGqiHWvttYJe+6qnuVr4Ek9wSBQoveqY/r+LwjCcU29kNVkidwim+kYA==} dev: true