From dece09395e2d9c5e3a391dff91e9660a7f99fb3c Mon Sep 17 00:00:00 2001 From: Yi Ding <yi.s.ding@gmail.com> Date: Thu, 22 Jun 2023 07:29:00 -0700 Subject: [PATCH] ran prettier on core --- packages/core/src/Document.ts | 2 +- packages/core/src/Node.ts | 2 +- packages/core/src/dataStructs.ts | 10 +- packages/core/src/embeddingUtils.ts | 20 +- packages/core/src/storage/FileSystem.ts | 8 +- packages/core/src/storage/StorageContext.ts | 36 ++- .../src/storage/docStore/KVDocumentStore.ts | 286 ++++++++++-------- .../storage/docStore/SimpleDocumentStore.ts | 56 ++-- packages/core/src/storage/docStore/types.ts | 90 +++--- packages/core/src/storage/docStore/utils.ts | 24 +- .../src/storage/indexStore/KVIndexStore.ts | 22 +- .../storage/indexStore/SimpleIndexStore.ts | 34 ++- packages/core/src/storage/indexStore/types.ts | 22 +- .../core/src/storage/kvStore/SimpleKVStore.ts | 32 +- packages/core/src/storage/kvStore/types.ts | 22 +- .../storage/vectorStore/SimpleVectorStore.ts | 91 ++++-- .../core/src/storage/vectorStore/types.ts | 84 ++--- .../core/src/tests/InMemoryFileSystem.test.ts | 50 +-- packages/core/src/tests/TextSplitter.test.ts | 57 +++- 19 files changed, 574 insertions(+), 374 deletions(-) diff --git a/packages/core/src/Document.ts b/packages/core/src/Document.ts index 2b2fc3a87..5cf3658e8 100644 --- a/packages/core/src/Document.ts +++ b/packages/core/src/Document.ts @@ -69,4 +69,4 @@ export class ImageDocument extends Document { getType() { return NodeType.IMAGE; } -} \ No newline at end of file +} diff --git a/packages/core/src/Node.ts b/packages/core/src/Node.ts index 8247e104b..f97b84a5f 100644 --- a/packages/core/src/Node.ts +++ b/packages/core/src/Node.ts @@ -69,4 +69,4 @@ export interface NodeWithEmbedding { export interface NodeWithScore { node: Node; score: number; -} \ No newline at end of file +} diff --git a/packages/core/src/dataStructs.ts b/packages/core/src/dataStructs.ts index f561fbe4f..82052c22f 100644 --- a/packages/core/src/dataStructs.ts +++ b/packages/core/src/dataStructs.ts @@ -1,5 +1,5 @@ export enum IndexStructType { - SIMPLE_DICT = "simple_dict" + SIMPLE_DICT = "simple_dict", } export interface IndexStruct { @@ -8,11 +8,13 @@ export interface IndexStruct { readonly type: IndexStructType; } -export function indexStructToJson(indexStruct: IndexStruct): {[key: string]: any} { +export function indexStructToJson(indexStruct: IndexStruct): { + [key: string]: any; +} { return { indexId: indexStruct.indexId, summary: indexStruct.summary, - type: indexStruct.type + type: indexStruct.type, }; } @@ -20,6 +22,6 @@ export function jsonToIndexStruct(json: any): IndexStruct { return { indexId: json.indexId, summary: json.summary, - type: json.type + type: json.type, }; } diff --git a/packages/core/src/embeddingUtils.ts b/packages/core/src/embeddingUtils.ts index 90e09e487..d9d9b7522 100644 --- a/packages/core/src/embeddingUtils.ts +++ b/packages/core/src/embeddingUtils.ts @@ -1,15 +1,15 @@ -import _ from 'lodash'; -import { VectorStoreQueryMode } from './storage/vectorStore/types'; +import _ from "lodash"; +import { VectorStoreQueryMode } from "./storage/vectorStore/types"; export function getTopKEmbeddings( queryEmbedding: number[], embeddings: number[][], - similarityFn?: ((queryEmbedding: number[], emb: number[]) => number), + similarityFn?: (queryEmbedding: number[], emb: number[]) => number, similarityTopK?: number, embeddingIds?: number[], - similarityCutoff?: number, + similarityCutoff?: number ): [number[], number[]] { - throw new Error('Not implemented'); + throw new Error("Not implemented"); } export function getTopKEmbeddingsLearner( @@ -17,19 +17,19 @@ export function getTopKEmbeddingsLearner( embeddings: number[][], similarityTopK?: number, embeddingIds?: number[], - queryMode: VectorStoreQueryMode = VectorStoreQueryMode.SVM, + queryMode: VectorStoreQueryMode = VectorStoreQueryMode.SVM ): [number[], number[]] { - throw new Error('Not implemented'); + throw new Error("Not implemented"); } export function getTopKMMREmbeddings( queryEmbedding: number[], embeddings: number[][], - similarityFn?: ((queryEmbedding: number[], emb: number[]) => number), + similarityFn?: (queryEmbedding: number[], emb: number[]) => number, similarityTopK?: number, embeddingIds?: number[], similarityCutoff?: number, - mmrThreshold?: number, + mmrThreshold?: number ): [number[], number[]] { - throw new Error('Not implemented'); + throw new Error("Not implemented"); } diff --git a/packages/core/src/storage/FileSystem.ts b/packages/core/src/storage/FileSystem.ts index 9b24e9329..228233c53 100644 --- a/packages/core/src/storage/FileSystem.ts +++ b/packages/core/src/storage/FileSystem.ts @@ -17,7 +17,7 @@ export interface GenericFileSystem { * A filesystem implementation that stores files in memory. */ export class InMemoryFileSystem implements GenericFileSystem { - private files: {[filepath: string]: any} = {}; + private files: { [filepath: string]: any } = {}; async writeFile(path: string, content: string, options?: any): Promise<void> { this.files[path] = _.cloneDeep(content); @@ -40,7 +40,7 @@ export class InMemoryFileSystem implements GenericFileSystem { } export function getNodeFS(): GenericFileSystem { - const fs = require('fs/promises'); + const fs = require("fs/promises"); return { exists: async (path: string) => { try { @@ -50,8 +50,8 @@ export function getNodeFS(): GenericFileSystem { return false; } }, - ...fs - } + ...fs, + }; } let fs = null; diff --git a/packages/core/src/storage/StorageContext.ts b/packages/core/src/storage/StorageContext.ts index c39893e65..8216497ed 100644 --- a/packages/core/src/storage/StorageContext.ts +++ b/packages/core/src/storage/StorageContext.ts @@ -5,7 +5,11 @@ import { SimpleDocumentStore } from "./docStore/SimpleDocumentStore"; import { SimpleIndexStore } from "./indexStore/SimpleIndexStore"; import { SimpleVectorStore } from "./vectorStore/SimpleVectorStore"; import { GenericFileSystem } from "./FileSystem"; -import { DEFAULT_PERSIST_DIR, DEFAULT_FS, DEFAULT_NAMESPACE } from "./constants"; +import { + DEFAULT_PERSIST_DIR, + DEFAULT_FS, + DEFAULT_NAMESPACE, +} from "./constants"; export interface StorageContext { docStore?: BaseDocumentStore; @@ -14,23 +18,35 @@ export interface StorageContext { } type BuilderParams = { - docStore?: BaseDocumentStore, - indexStore?: BaseIndexStore, - vectorStore?: VectorStore, - persistDir?: string, - fs?: GenericFileSystem, + docStore?: BaseDocumentStore; + indexStore?: BaseIndexStore; + vectorStore?: VectorStore; + persistDir?: string; + fs?: GenericFileSystem; }; export async function storageContextFromDefaults({ - docStore, indexStore, vectorStore, persistDir, fs + docStore, + indexStore, + vectorStore, + persistDir, + fs, }: BuilderParams): StorageContext { persistDir = persistDir || DEFAULT_PERSIST_DIR; fs = fs || DEFAULT_FS; - docStore = docStore || await SimpleDocumentStore.fromPersistDir(persistDir, DEFAULT_NAMESPACE, fs); - indexStore = indexStore || await SimpleIndexStore.fromPersistDir(persistDir, fs); - vectorStore = vectorStore || await SimpleVectorStore.fromPersistDir(persistDir, fs); + docStore = + docStore || + (await SimpleDocumentStore.fromPersistDir( + persistDir, + DEFAULT_NAMESPACE, + fs + )); + indexStore = + indexStore || (await SimpleIndexStore.fromPersistDir(persistDir, fs)); + vectorStore = + vectorStore || (await SimpleVectorStore.fromPersistDir(persistDir, fs)); return { docStore, diff --git a/packages/core/src/storage/docStore/KVDocumentStore.ts b/packages/core/src/storage/docStore/KVDocumentStore.ts index 7f72bc266..f7809ba59 100644 --- a/packages/core/src/storage/docStore/KVDocumentStore.ts +++ b/packages/core/src/storage/docStore/KVDocumentStore.ts @@ -1,158 +1,180 @@ import { Node } from "../../Node"; -import { BaseDocument } from '../../Document'; -import { BaseDocumentStore, RefDocInfo } from './types'; -import { BaseKVStore } from '../kvStore/types'; -import _, * as lodash from 'lodash'; -import { docToJson, jsonToDoc } from './utils'; -import { DEFAULT_NAMESPACE } from '../constants'; +import { BaseDocument } from "../../Document"; +import { BaseDocumentStore, RefDocInfo } from "./types"; +import { BaseKVStore } from "../kvStore/types"; +import _, * as lodash from "lodash"; +import { docToJson, jsonToDoc } from "./utils"; +import { DEFAULT_NAMESPACE } from "../constants"; -type DocMetaData = { docHash: string, refDocId?: string }; +type DocMetaData = { docHash: string; refDocId?: string }; export class KVDocumentStore extends BaseDocumentStore { - private kvstore: BaseKVStore; - private nodeCollection: string; - private refDocCollection: string; - private metadataCollection: string; - - constructor(kvstore: BaseKVStore, namespace: string = DEFAULT_NAMESPACE) { - super(); - this.kvstore = kvstore; - this.nodeCollection = `${namespace}/data`; - this.refDocCollection = `${namespace}/ref_doc_info`; - this.metadataCollection = `${namespace}/metadata`; + private kvstore: BaseKVStore; + private nodeCollection: string; + private refDocCollection: string; + private metadataCollection: string; + + constructor(kvstore: BaseKVStore, namespace: string = DEFAULT_NAMESPACE) { + super(); + this.kvstore = kvstore; + this.nodeCollection = `${namespace}/data`; + this.refDocCollection = `${namespace}/ref_doc_info`; + this.metadataCollection = `${namespace}/metadata`; + } + + async docs(): Promise<Record<string, BaseDocument>> { + let jsonDict = await this.kvstore.getAll(this.nodeCollection); + let docs: Record<string, BaseDocument> = {}; + for (let key in jsonDict) { + docs[key] = jsonToDoc(jsonDict[key] as Record<string, any>); } - - async docs(): Promise<Record<string, BaseDocument>> { - let jsonDict = await this.kvstore.getAll(this.nodeCollection); - let docs: Record<string, BaseDocument> = {}; - for (let key in jsonDict) { - docs[key] = jsonToDoc(jsonDict[key] as Record<string, any>); + return docs; + } + + async addDocuments( + docs: BaseDocument[], + allowUpdate: boolean = true + ): Promise<void> { + for (var idx = 0; idx < docs.length; idx++) { + const doc = docs[idx]; + if (doc.getDocId() === null) { + throw new Error("doc_id not set"); + } + if (!allowUpdate && (await this.documentExists(doc.getDocId()))) { + throw new Error( + `doc_id ${doc.getDocId()} already exists. Set allow_update to True to overwrite.` + ); + } + let nodeKey = doc.getDocId(); + let data = docToJson(doc); + await this.kvstore.put(nodeKey, data, this.nodeCollection); + let metadata: DocMetaData = { docHash: doc.getDocHash() }; + + if (doc instanceof Node && doc.refDocId() !== null) { + const nodeDoc = doc as Node; + let refDocInfo = (await this.getRefDocInfo(nodeDoc.refDocId()!)) || { + docIds: [], + extraInfo: {}, + }; + refDocInfo.docIds.push(nodeDoc.getDocId()); + if (_.isEmpty(refDocInfo.extraInfo)) { + refDocInfo.extraInfo = nodeDoc.getNodeInfo() || {}; } - return docs; + await this.kvstore.put( + nodeDoc.refDocId()!, + refDocInfo, + this.refDocCollection + ); + metadata.refDocId = nodeDoc.refDocId()!; + } + + this.kvstore.put(nodeKey, metadata, this.metadataCollection); } - - async addDocuments(docs: BaseDocument[], allowUpdate: boolean = true): Promise<void> { - for (var idx = 0; idx < docs.length; idx++) { - const doc = docs[idx]; - if (doc.getDocId() === null) { - throw new Error("doc_id not set"); - } - if (!allowUpdate && await this.documentExists(doc.getDocId())) { - throw new Error(`doc_id ${doc.getDocId()} already exists. Set allow_update to True to overwrite.`); - } - let nodeKey = doc.getDocId(); - let data = docToJson(doc); - await this.kvstore.put(nodeKey, data, this.nodeCollection); - let metadata: DocMetaData = { docHash: doc.getDocHash() }; - - if (doc instanceof Node && doc.refDocId() !== null) { - const nodeDoc = doc as Node; - let refDocInfo = await this.getRefDocInfo(nodeDoc.refDocId()!) || {docIds: [], extraInfo: {}}; - refDocInfo.docIds.push(nodeDoc.getDocId()); - if (_.isEmpty(refDocInfo.extraInfo)) { - refDocInfo.extraInfo = nodeDoc.getNodeInfo() || {}; - } - await this.kvstore.put(nodeDoc.refDocId()!, refDocInfo, this.refDocCollection); - metadata.refDocId = nodeDoc.refDocId()!; - } - - this.kvstore.put(nodeKey, metadata, this.metadataCollection); - } + } + + async getDocument( + docId: string, + raiseError: boolean = true + ): Promise<BaseDocument | undefined> { + let json = await this.kvstore.get(docId, this.nodeCollection); + if (_.isNil(json)) { + if (raiseError) { + throw new Error(`doc_id ${docId} not found.`); + } else { + return; + } } - - async getDocument(docId: string, raiseError: boolean = true): Promise<BaseDocument | undefined> { - let json = await this.kvstore.get(docId, this.nodeCollection); - if (_.isNil(json)) { - if (raiseError) { - throw new Error(`doc_id ${docId} not found.`); - } else { - return; - } - } - return jsonToDoc(json); + return jsonToDoc(json); + } + + async getRefDocInfo(refDocId: string): Promise<RefDocInfo | undefined> { + let refDocInfo = await this.kvstore.get(refDocId, this.refDocCollection); + return refDocInfo ? (_.clone(refDocInfo) as RefDocInfo) : undefined; + } + + async getAllRefDocInfo(): Promise<Record<string, RefDocInfo> | undefined> { + let refDocInfos = await this.kvstore.getAll(this.refDocCollection); + if (_.isNil(refDocInfos)) { + return; } + return refDocInfos as Record<string, RefDocInfo>; + } - async getRefDocInfo(refDocId: string): Promise<RefDocInfo | undefined> { - let refDocInfo = await this.kvstore.get(refDocId, this.refDocCollection); - return refDocInfo ? _.clone(refDocInfo) as RefDocInfo : undefined; - } + async refDocExists(refDocId: string): Promise<boolean> { + return !_.isNil(await this.getRefDocInfo(refDocId)); + } - async getAllRefDocInfo(): Promise<Record<string, RefDocInfo> | undefined> { - let refDocInfos = await this.kvstore.getAll(this.refDocCollection); - if (_.isNil(refDocInfos)) { - return; - } - return refDocInfos as Record<string, RefDocInfo>; - } + async documentExists(docId: string): Promise<boolean> { + return !_.isNil(await this.kvstore.get(docId, this.nodeCollection)); + } - async refDocExists(refDocId: string): Promise<boolean> { - return !_.isNil(await this.getRefDocInfo(refDocId)); + private async removeRefDocNode(docId: string): Promise<void> { + let metadata = await this.kvstore.get(docId, this.metadataCollection); + if (metadata === null) { + return; } - async documentExists(docId: string): Promise<boolean> { - return !_.isNil(await this.kvstore.get(docId, this.nodeCollection)); + let refDocId = metadata.refDocId; + if (_.isNil(refDocId)) { + return; } - private async removeRefDocNode(docId: string): Promise<void> { - let metadata = await this.kvstore.get(docId, this.metadataCollection); - if (metadata === null) { - return; - } - - let refDocId = metadata.refDocId; - if (_.isNil(refDocId)) { - return; - } - - const refDocInfo = await this.kvstore.get(refDocId, this.refDocCollection); - if (!_.isNil(refDocInfo)) { - lodash.pull(refDocInfo.docIds, docId); + const refDocInfo = await this.kvstore.get(refDocId, this.refDocCollection); + if (!_.isNil(refDocInfo)) { + lodash.pull(refDocInfo.docIds, docId); - if (refDocInfo.docIds.length > 0) { - this.kvstore.put(refDocId, refDocInfo.toDict(), this.refDocCollection); - } - this.kvstore.delete(refDocId, this.metadataCollection); - } + if (refDocInfo.docIds.length > 0) { + this.kvstore.put(refDocId, refDocInfo.toDict(), this.refDocCollection); + } + this.kvstore.delete(refDocId, this.metadataCollection); } - - async deleteDocument(docId: string, raiseError: boolean = true, removeRefDocNode: boolean = true): Promise<void> { - if (removeRefDocNode) { - await this.removeRefDocNode(docId); - } - - let deleteSuccess = await this.kvstore.delete(docId, this.nodeCollection); - await this.kvstore.delete(docId, this.metadataCollection); - - if (!deleteSuccess && raiseError) { - throw new Error(`doc_id ${docId} not found.`); - } + } + + async deleteDocument( + docId: string, + raiseError: boolean = true, + removeRefDocNode: boolean = true + ): Promise<void> { + if (removeRefDocNode) { + await this.removeRefDocNode(docId); } - async deleteRefDoc(refDocId: string, raiseError: boolean = true): Promise<void> { - let refDocInfo = await this.getRefDocInfo(refDocId); - if (_.isNil(refDocInfo)) { - if (raiseError) { - throw new Error(`ref_doc_id ${refDocId} not found.`); - } else { - return; - } - } - - for (let docId of refDocInfo.docIds) { - await this.deleteDocument(docId, false, false); - } + let deleteSuccess = await this.kvstore.delete(docId, this.nodeCollection); + await this.kvstore.delete(docId, this.metadataCollection); - await this.kvstore.delete(refDocId, this.metadataCollection); - await this.kvstore.delete(refDocId, this.refDocCollection); + if (!deleteSuccess && raiseError) { + throw new Error(`doc_id ${docId} not found.`); } - - async setDocumentHash(docId: string, docHash: string): Promise<void> { - let metadata = { docHash: docHash }; - await this.kvstore.put(docId, metadata, this.metadataCollection); + } + + async deleteRefDoc( + refDocId: string, + raiseError: boolean = true + ): Promise<void> { + let refDocInfo = await this.getRefDocInfo(refDocId); + if (_.isNil(refDocInfo)) { + if (raiseError) { + throw new Error(`ref_doc_id ${refDocId} not found.`); + } else { + return; + } } - async getDocumentHash(docId: string): Promise<string | undefined> { - let metadata = await this.kvstore.get(docId, this.metadataCollection); - return _.get(metadata, 'docHash'); + for (let docId of refDocInfo.docIds) { + await this.deleteDocument(docId, false, false); } + + await this.kvstore.delete(refDocId, this.metadataCollection); + await this.kvstore.delete(refDocId, this.refDocCollection); + } + + async setDocumentHash(docId: string, docHash: string): Promise<void> { + let metadata = { docHash: docHash }; + await this.kvstore.put(docId, metadata, this.metadataCollection); + } + + async getDocumentHash(docId: string): Promise<string | undefined> { + let metadata = await this.kvstore.get(docId, this.metadataCollection); + return _.get(metadata, "docHash"); + } } diff --git a/packages/core/src/storage/docStore/SimpleDocumentStore.ts b/packages/core/src/storage/docStore/SimpleDocumentStore.ts index 968d6247f..1f8fa7168 100644 --- a/packages/core/src/storage/docStore/SimpleDocumentStore.ts +++ b/packages/core/src/storage/docStore/SimpleDocumentStore.ts @@ -1,22 +1,22 @@ -import * as path from 'path'; -import _ from 'lodash'; -import { KVDocumentStore } from './KVDocumentStore'; -import { SimpleKVStore } from '../kvStore/SimpleKVStore'; -import { BaseInMemoryKVStore } from '../kvStore/types'; -import { GenericFileSystem } from '../FileSystem'; -import { +import * as path from "path"; +import _ from "lodash"; +import { KVDocumentStore } from "./KVDocumentStore"; +import { SimpleKVStore } from "../kvStore/SimpleKVStore"; +import { BaseInMemoryKVStore } from "../kvStore/types"; +import { GenericFileSystem } from "../FileSystem"; +import { DEFAULT_PERSIST_DIR, DEFAULT_NAMESPACE, DEFAULT_DOC_STORE_PERSIST_FILENAME, - DEFAULT_FS -} from '../constants'; + DEFAULT_FS, +} from "../constants"; -type SaveDict = {[key: string]: any}; +type SaveDict = { [key: string]: any }; export class SimpleDocumentStore extends KVDocumentStore { private kvStore: SimpleKVStore; - constructor(kvStore?: SimpleKVStore , namespace?: string) { + constructor(kvStore?: SimpleKVStore, namespace?: string) { kvStore = kvStore || new SimpleKVStore(); namespace = namespace || DEFAULT_NAMESPACE; super(kvStore, namespace); @@ -24,17 +24,24 @@ export class SimpleDocumentStore extends KVDocumentStore { } static async fromPersistDir( - persistDir: string = DEFAULT_PERSIST_DIR, - namespace?: string, + persistDir: string = DEFAULT_PERSIST_DIR, + namespace?: string, fsModule?: GenericFileSystem ): Promise<SimpleDocumentStore> { - const persistPath = path.join(persistDir, DEFAULT_DOC_STORE_PERSIST_FILENAME); - return await SimpleDocumentStore.fromPersistPath(persistPath, namespace, fsModule); + const persistPath = path.join( + persistDir, + DEFAULT_DOC_STORE_PERSIST_FILENAME + ); + return await SimpleDocumentStore.fromPersistPath( + persistPath, + namespace, + fsModule + ); } static async fromPersistPath( - persistPath: string, - namespace?: string, + persistPath: string, + namespace?: string, fs?: GenericFileSystem ): Promise<SimpleDocumentStore> { fs = fs || DEFAULT_FS; @@ -43,19 +50,22 @@ export class SimpleDocumentStore extends KVDocumentStore { } async persist( - persistPath: string = path.join(DEFAULT_PERSIST_DIR, DEFAULT_DOC_STORE_PERSIST_FILENAME), + persistPath: string = path.join( + DEFAULT_PERSIST_DIR, + DEFAULT_DOC_STORE_PERSIST_FILENAME + ), fs?: GenericFileSystem ): Promise<void> { fs = fs || DEFAULT_FS; - if (_.isObject(this.kvStore) && this.kvStore instanceof BaseInMemoryKVStore) { + if ( + _.isObject(this.kvStore) && + this.kvStore instanceof BaseInMemoryKVStore + ) { await this.kvStore.persist(persistPath, fs); } } - static fromDict( - saveDict: SaveDict, - namespace?: string - ): SimpleDocumentStore { + static fromDict(saveDict: SaveDict, namespace?: string): SimpleDocumentStore { const simpleKVStore = SimpleKVStore.fromDict(saveDict); return new SimpleDocumentStore(simpleKVStore, namespace); } diff --git a/packages/core/src/storage/docStore/types.ts b/packages/core/src/storage/docStore/types.ts index 9084e2d04..c58302adb 100644 --- a/packages/core/src/storage/docStore/types.ts +++ b/packages/core/src/storage/docStore/types.ts @@ -1,63 +1,77 @@ import { Node } from "../../Node"; import { BaseDocument } from "../../Document"; import { GenericFileSystem } from "../FileSystem"; -import { DEFAULT_PERSIST_DIR, DEFAULT_DOC_STORE_PERSIST_FILENAME } from "../constants"; +import { + DEFAULT_PERSIST_DIR, + DEFAULT_DOC_STORE_PERSIST_FILENAME, +} from "../constants"; const defaultPersistPath = `${DEFAULT_PERSIST_DIR}/${DEFAULT_DOC_STORE_PERSIST_FILENAME}`; - export interface RefDocInfo { - docIds: string[]; - extraInfo: {[key: string]: any}; + docIds: string[]; + extraInfo: { [key: string]: any }; } export abstract class BaseDocumentStore { - // Save/load - persist(persistPath: string = defaultPersistPath, fs?: GenericFileSystem): void { - // Persist the docstore to a file. - } + // Save/load + persist( + persistPath: string = defaultPersistPath, + fs?: GenericFileSystem + ): void { + // Persist the docstore to a file. + } - // Main interface - abstract docs(): Promise<Record<string, BaseDocument>>; + // Main interface + abstract docs(): Promise<Record<string, BaseDocument>>; - abstract addDocuments(docs: BaseDocument[], allowUpdate: boolean): void; + abstract addDocuments(docs: BaseDocument[], allowUpdate: boolean): void; - abstract getDocument(docId: string, raiseError: boolean): Promise<BaseDocument | undefined>; + abstract getDocument( + docId: string, + raiseError: boolean + ): Promise<BaseDocument | undefined>; - abstract deleteDocument(docId: string, raiseError: boolean): void; + abstract deleteDocument(docId: string, raiseError: boolean): void; - abstract documentExists(docId: string): Promise<boolean>; + abstract documentExists(docId: string): Promise<boolean>; - // Hash - abstract setDocumentHash(docId: string, docHash: string): void; + // Hash + abstract setDocumentHash(docId: string, docHash: string): void; - abstract getDocumentHash(docId: string): Promise<string | undefined>; + abstract getDocumentHash(docId: string): Promise<string | undefined>; - // Ref Docs - abstract getAllRefDocInfo(): Promise<{[key: string]: RefDocInfo} | undefined>; + // Ref Docs + abstract getAllRefDocInfo(): Promise< + { [key: string]: RefDocInfo } | undefined + >; - abstract getRefDocInfo(refDocId: string): Promise<RefDocInfo | undefined>; + abstract getRefDocInfo(refDocId: string): Promise<RefDocInfo | undefined>; - abstract deleteRefDoc(refDocId: string, raiseError: boolean): Promise<void>; + abstract deleteRefDoc(refDocId: string, raiseError: boolean): Promise<void>; - // Nodes - getNodes(nodeIds: string[], raiseError: boolean = true): Promise<Node[]> { - return Promise.all(nodeIds.map(nodeId => this.getNode(nodeId, raiseError))); - } + // Nodes + getNodes(nodeIds: string[], raiseError: boolean = true): Promise<Node[]> { + return Promise.all( + nodeIds.map((nodeId) => this.getNode(nodeId, raiseError)) + ); + } - async getNode(nodeId: string, raiseError: boolean = true): Promise<Node> { - let doc = await this.getDocument(nodeId, raiseError); - if (!(doc instanceof Node)) { - throw new Error(`Document ${nodeId} is not a Node.`); - } - return doc; + async getNode(nodeId: string, raiseError: boolean = true): Promise<Node> { + let doc = await this.getDocument(nodeId, raiseError); + if (!(doc instanceof Node)) { + throw new Error(`Document ${nodeId} is not a Node.`); } - - async getNodeDict(nodeIdDict: {[index: number]: string}): Promise<{[index: number]: Node}> { - let result: {[index: number]: Node} = {}; - for (let index in nodeIdDict) { - result[index] = await this.getNode(nodeIdDict[index]); - } - return result; + return doc; + } + + async getNodeDict(nodeIdDict: { + [index: number]: string; + }): Promise<{ [index: number]: Node }> { + let result: { [index: number]: Node } = {}; + for (let index in nodeIdDict) { + result[index] = await this.getNode(nodeIdDict[index]); } + return result; + } } diff --git a/packages/core/src/storage/docStore/utils.ts b/packages/core/src/storage/docStore/utils.ts index 059174429..eea6c81a4 100644 --- a/packages/core/src/storage/docStore/utils.ts +++ b/packages/core/src/storage/docStore/utils.ts @@ -1,14 +1,13 @@ import { Node } from "../../Node"; -import { BaseDocument, Document, NodeType } from '../../Document'; +import { BaseDocument, Document, NodeType } from "../../Document"; const TYPE_KEY = "__type__"; const DATA_KEY = "__data__"; - export function docToJson(doc: BaseDocument): Record<string, any> { return { - [DATA_KEY]: JSON.stringify(doc), - [TYPE_KEY]: doc.getType(), + [DATA_KEY]: JSON.stringify(doc), + [TYPE_KEY]: doc.getType(), }; } @@ -18,13 +17,22 @@ export function jsonToDoc(docDict: Record<string, any>): BaseDocument { let doc: BaseDocument; if (docType === NodeType.DOCUMENT) { - doc = new Document(dataDict.text, dataDict.docId, dataDict.embedding, dataDict.docHash); + doc = new Document( + dataDict.text, + dataDict.docId, + dataDict.embedding, + dataDict.docHash + ); } else if (docType === NodeType.TEXT) { const reslationships = dataDict.relationships; - doc = new Node(reslationships.text, reslationships.docId, - reslationships.embedding, reslationships.docHash); + doc = new Node( + reslationships.text, + reslationships.docId, + reslationships.embedding, + reslationships.docHash + ); } else { - throw new Error(`Unknown doc type: ${docType}`); + throw new Error(`Unknown doc type: ${docType}`); } return doc; diff --git a/packages/core/src/storage/indexStore/KVIndexStore.ts b/packages/core/src/storage/indexStore/KVIndexStore.ts index 7e2f5eed0..a31452a2b 100644 --- a/packages/core/src/storage/indexStore/KVIndexStore.ts +++ b/packages/core/src/storage/indexStore/KVIndexStore.ts @@ -1,8 +1,12 @@ -import { BaseKVStore } from '../kvStore/types'; -import { IndexStruct, indexStructToJson, jsonToIndexStruct } from '../../dataStructs'; -import _ from 'lodash'; -import { DEFAULT_NAMESPACE } from '../constants'; -import { BaseIndexStore } from './types'; +import { BaseKVStore } from "../kvStore/types"; +import { + IndexStruct, + indexStructToJson, + jsonToIndexStruct, +} from "../../dataStructs"; +import _ from "lodash"; +import { DEFAULT_NAMESPACE } from "../constants"; +import { BaseIndexStore } from "./types"; export class KVIndexStore extends BaseIndexStore { private _kvStore: BaseKVStore; @@ -28,7 +32,7 @@ export class KVIndexStore extends BaseIndexStore { if (_.isNil(structId)) { let structs = await this.getIndexStructs(); if (structs.length !== 1) { - throw new Error('More than one index struct found'); + throw new Error("More than one index struct found"); } return structs[0]; } else { @@ -41,7 +45,9 @@ export class KVIndexStore extends BaseIndexStore { } async getIndexStructs(): Promise<IndexStruct[]> { - let jsons = await this._kvStore.getAll(this._collection) as {[key: string]: any}; - return _.values(jsons).map(json => jsonToIndexStruct(json)); + let jsons = (await this._kvStore.getAll(this._collection)) as { + [key: string]: any; + }; + return _.values(jsons).map((json) => jsonToIndexStruct(json)); } } diff --git a/packages/core/src/storage/indexStore/SimpleIndexStore.ts b/packages/core/src/storage/indexStore/SimpleIndexStore.ts index 82a006913..7ca66f72d 100644 --- a/packages/core/src/storage/indexStore/SimpleIndexStore.ts +++ b/packages/core/src/storage/indexStore/SimpleIndexStore.ts @@ -1,10 +1,14 @@ -import * as path from 'path'; -import * as _ from 'lodash'; +import * as path from "path"; +import * as _ from "lodash"; import { BaseInMemoryKVStore } from "../kvStore/types"; import { SimpleKVStore, DataType } from "../kvStore/SimpleKVStore"; import { KVIndexStore } from "./KVIndexStore"; -import { DEFAULT_PERSIST_DIR, DEFAULT_INDEX_STORE_PERSIST_FILENAME, DEFAULT_FS } from '../constants'; -import { GenericFileSystem } from '../FileSystem'; +import { + DEFAULT_PERSIST_DIR, + DEFAULT_INDEX_STORE_PERSIST_FILENAME, + DEFAULT_FS, +} from "../constants"; +import { GenericFileSystem } from "../FileSystem"; export class SimpleIndexStore extends KVIndexStore { private kvStore: BaseInMemoryKVStore; @@ -15,17 +19,29 @@ export class SimpleIndexStore extends KVIndexStore { this.kvStore = kvStore; } - static async fromPersistDir(persistDir: string = DEFAULT_PERSIST_DIR, fs: GenericFileSystem = DEFAULT_FS): Promise<SimpleIndexStore> {; - const persistPath = path.join(persistDir, DEFAULT_INDEX_STORE_PERSIST_FILENAME); + static async fromPersistDir( + persistDir: string = DEFAULT_PERSIST_DIR, + fs: GenericFileSystem = DEFAULT_FS + ): Promise<SimpleIndexStore> { + const persistPath = path.join( + persistDir, + DEFAULT_INDEX_STORE_PERSIST_FILENAME + ); return this.fromPersistPath(persistPath, fs); } - static async fromPersistPath(persistPath: string, fs: GenericFileSystem = DEFAULT_FS): Promise<SimpleIndexStore> { + static async fromPersistPath( + persistPath: string, + fs: GenericFileSystem = DEFAULT_FS + ): Promise<SimpleIndexStore> { let simpleKVStore = await SimpleKVStore.fromPersistPath(persistPath, fs); return new SimpleIndexStore(simpleKVStore); } - async persist(persistPath: string = DEFAULT_PERSIST_DIR, fs: GenericFileSystem = DEFAULT_FS): Promise<void> { + async persist( + persistPath: string = DEFAULT_PERSIST_DIR, + fs: GenericFileSystem = DEFAULT_FS + ): Promise<void> { await this.kvStore.persist(persistPath, fs); } @@ -36,7 +52,7 @@ export class SimpleIndexStore extends KVIndexStore { toDict(): Record<string, unknown> { if (!(this.kvStore instanceof SimpleKVStore)) { - throw new Error("KVStore is not a SimpleKVStore"); + throw new Error("KVStore is not a SimpleKVStore"); } return this.kvStore.toDict(); } diff --git a/packages/core/src/storage/indexStore/types.ts b/packages/core/src/storage/indexStore/types.ts index b2809863b..1172f6657 100644 --- a/packages/core/src/storage/indexStore/types.ts +++ b/packages/core/src/storage/indexStore/types.ts @@ -1,19 +1,25 @@ import { IndexStruct } from "llama_index/data_structs/data_structs"; import { GenericFileSystem } from "../FileSystem"; -import { DEFAULT_PERSIST_DIR, DEFAULT_INDEX_STORE_PERSIST_FILENAME } from "../constants"; +import { + DEFAULT_PERSIST_DIR, + DEFAULT_INDEX_STORE_PERSIST_FILENAME, +} from "../constants"; const defaultPersistPath = `${DEFAULT_PERSIST_DIR}/${DEFAULT_INDEX_STORE_PERSIST_FILENAME}`; export abstract class BaseIndexStore { - abstract getIndexStructs(): Promise<IndexStruct[]>; + abstract getIndexStructs(): Promise<IndexStruct[]>; - abstract addIndexStruct(indexStruct: IndexStruct): Promise<void>; + abstract addIndexStruct(indexStruct: IndexStruct): Promise<void>; - abstract deleteIndexStruct(key: string): Promise<void>; + abstract deleteIndexStruct(key: string): Promise<void>; - abstract getIndexStruct(structId?: string): Promise<IndexStruct | undefined>; + abstract getIndexStruct(structId?: string): Promise<IndexStruct | undefined>; - async persist(persistPath: string = defaultPersistPath, fs?: GenericFileSystem): Promise<void> { - // Persist the index store to disk. - } + async persist( + persistPath: string = defaultPersistPath, + fs?: GenericFileSystem + ): Promise<void> { + // Persist the index store to disk. + } } diff --git a/packages/core/src/storage/kvStore/SimpleKVStore.ts b/packages/core/src/storage/kvStore/SimpleKVStore.ts index 3cc2f4df4..4fb82a42e 100644 --- a/packages/core/src/storage/kvStore/SimpleKVStore.ts +++ b/packages/core/src/storage/kvStore/SimpleKVStore.ts @@ -1,6 +1,6 @@ -import * as path from 'path'; -import { GenericFileSystem } from '../FileSystem'; -import { DEFAULT_COLLECTION, DEFAULT_FS } from '../constants'; +import * as path from "path"; +import { GenericFileSystem } from "../FileSystem"; +import { DEFAULT_COLLECTION, DEFAULT_FS } from "../constants"; import * as _ from "lodash"; import { BaseKVStore } from "./types"; @@ -8,7 +8,6 @@ export interface DataType { [key: string]: { [key: string]: any }; } - export class SimpleKVStore extends BaseKVStore { private data: DataType; @@ -17,14 +16,21 @@ export class SimpleKVStore extends BaseKVStore { this.data = data || {}; } - async put(key: string, val: any, collection: string = DEFAULT_COLLECTION): Promise<void> { + async put( + key: string, + val: any, + collection: string = DEFAULT_COLLECTION + ): Promise<void> { if (!(collection in this.data)) { this.data[collection] = {}; } this.data[collection][key] = _.clone(val); // Creating a shallow copy of the object } - async get(key: string, collection: string = DEFAULT_COLLECTION): Promise<any> { + async get( + key: string, + collection: string = DEFAULT_COLLECTION + ): Promise<any> { let collectionData = this.data[collection]; if (_.isNil(collectionData)) { return null; @@ -39,7 +45,10 @@ export class SimpleKVStore extends BaseKVStore { return _.clone(this.data[collection]); // Creating a shallow copy of the object } - async delete(key: string, collection: string = DEFAULT_COLLECTION): Promise<boolean> { + async delete( + key: string, + collection: string = DEFAULT_COLLECTION + ): Promise<boolean> { if (key in this.data[collection]) { delete this.data[collection][key]; return true; @@ -57,9 +66,14 @@ export class SimpleKVStore extends BaseKVStore { await fs.writeFile(persistPath, JSON.stringify(this.data)); } - static async fromPersistPath(persistPath: string, fs?: GenericFileSystem ): Promise<SimpleKVStore> { + static async fromPersistPath( + persistPath: string, + fs?: GenericFileSystem + ): Promise<SimpleKVStore> { fs = fs || DEFAULT_FS; - let data = JSON.parse(await fs.readFile(persistPath, { encoding: 'utf-8' })); + let data = JSON.parse( + await fs.readFile(persistPath, { encoding: "utf-8" }) + ); return new SimpleKVStore(data); } diff --git a/packages/core/src/storage/kvStore/types.ts b/packages/core/src/storage/kvStore/types.ts index 333c22e6e..b6c3785fd 100644 --- a/packages/core/src/storage/kvStore/types.ts +++ b/packages/core/src/storage/kvStore/types.ts @@ -1,18 +1,22 @@ import { GenericFileSystem } from "../FileSystem"; const defaultCollection = "data"; -type StoredValue = {[key: string]: any} | null; +type StoredValue = { [key: string]: any } | null; export abstract class BaseKVStore { - abstract put(key: string, val: {[key: string]: any}, collection?: string): Promise<void>; - abstract get(key: string, collection?: string): Promise<StoredValue>; - abstract getAll(collection?: string): Promise<{[key: string]: StoredValue}>; - abstract delete(key: string, collection?: string): Promise<boolean>; + abstract put( + key: string, + val: { [key: string]: any }, + collection?: string + ): Promise<void>; + abstract get(key: string, collection?: string): Promise<StoredValue>; + abstract getAll(collection?: string): Promise<{ [key: string]: StoredValue }>; + abstract delete(key: string, collection?: string): Promise<boolean>; } export abstract class BaseInMemoryKVStore extends BaseKVStore { - abstract persist(persistPath: string, fs?: GenericFileSystem): void; - static fromPersistPath(persistPath: string): BaseInMemoryKVStore { - throw new Error("Method not implemented."); - } + abstract persist(persistPath: string, fs?: GenericFileSystem): void; + static fromPersistPath(persistPath: string): BaseInMemoryKVStore { + throw new Error("Method not implemented."); + } } diff --git a/packages/core/src/storage/vectorStore/SimpleVectorStore.ts b/packages/core/src/storage/vectorStore/SimpleVectorStore.ts index c2ef189eb..be697305b 100644 --- a/packages/core/src/storage/vectorStore/SimpleVectorStore.ts +++ b/packages/core/src/storage/vectorStore/SimpleVectorStore.ts @@ -1,20 +1,30 @@ import _ from "lodash"; import { GenericFileSystem } from "../FileSystem"; -import { NodeWithEmbedding, VectorStore, VectorStoreQuery, VectorStoreQueryMode, VectorStoreQueryResult } from "./types"; -import { getTopKEmbeddings, getTopKEmbeddingsLearner, getTopKMMREmbeddings } from '../../Embedding'; -import { DEFAULT_PERSIST_DIR, DEFAULT_FS } from '../constants'; +import { + NodeWithEmbedding, + VectorStore, + VectorStoreQuery, + VectorStoreQueryMode, + VectorStoreQueryResult, +} from "./types"; +import { + getTopKEmbeddings, + getTopKEmbeddingsLearner, + getTopKMMREmbeddings, +} from "../../Embedding"; +import { DEFAULT_PERSIST_DIR, DEFAULT_FS } from "../constants"; const LEARNER_MODES = new Set<VectorStoreQueryMode>([ VectorStoreQueryMode.SVM, VectorStoreQueryMode.LINEAR_REGRESSION, - VectorStoreQueryMode.LOGISTIC_REGRESSION + VectorStoreQueryMode.LOGISTIC_REGRESSION, ]); const MMR_MODE = VectorStoreQueryMode.MMR; class SimpleVectorStoreData { - embeddingDict: {[key: string]: number[]} = {}; - textIdToRefDocId: {[key: string]: string} = {}; + embeddingDict: { [key: string]: number[] } = {}; + textIdToRefDocId: { [key: string]: string } = {}; } export class SimpleVectorStore implements VectorStore { @@ -27,7 +37,10 @@ export class SimpleVectorStore implements VectorStore { this.fs = fs || DEFAULT_FS; } - static async fromPersistDir(persistDir: string = DEFAULT_PERSIST_DIR, fs: GenericFileSystem = DEFAULT_FS): Promise<SimpleVectorStore> { + static async fromPersistDir( + persistDir: string = DEFAULT_PERSIST_DIR, + fs: GenericFileSystem = DEFAULT_FS + ): Promise<SimpleVectorStore> { let persistPath = `${persistDir}/vector_store.json`; return await SimpleVectorStore.fromPersistPath(persistPath, fs); } @@ -45,11 +58,13 @@ export class SimpleVectorStore implements VectorStore { this.data.embeddingDict[result.id()] = result.embedding; this.data.textIdToRefDocId[result.id()] = result.refDocId(); } - return embeddingResults.map(result => result.id()); + return embeddingResults.map((result) => result.id()); } delete(refDocId: string): void { - let textIdsToDelete = Object.keys(this.data.textIdToRefDocId).filter(textId => this.data.textIdToRefDocId[textId] === refDocId); + let textIdsToDelete = Object.keys(this.data.textIdToRefDocId).filter( + (textId) => this.data.textIdToRefDocId[textId] === refDocId + ); for (let textId of textIdsToDelete) { delete this.data.embeddingDict[textId]; delete this.data.textIdToRefDocId[textId]; @@ -58,7 +73,9 @@ export class SimpleVectorStore implements VectorStore { query(query: VectorStoreQuery): VectorStoreQueryResult { if (!_.isNil(query.filters)) { - throw new Error("Metadata filters not implemented for SimpleVectorStore yet."); + throw new Error( + "Metadata filters not implemented for SimpleVectorStore yet." + ); } let items = Object.entries(this.data.embeddingDict); @@ -66,36 +83,55 @@ export class SimpleVectorStore implements VectorStore { let nodeIds: string[], embeddings: number[][]; if (query.docIds) { let availableIds = new Set(query.docIds); - const queriedItems = items.filter(item => availableIds.has(item[0])); - nodeIds = queriedItems.map(item => item[0]); - embeddings = queriedItems.map(item => item[1]); + const queriedItems = items.filter((item) => availableIds.has(item[0])); + nodeIds = queriedItems.map((item) => item[0]); + embeddings = queriedItems.map((item) => item[1]); } else { // No docIds specified, so use all available items - nodeIds = items.map(item => item[0]); - embeddings = items.map(item => item[1]); + nodeIds = items.map((item) => item[0]); + embeddings = items.map((item) => item[1]); } let queryEmbedding = query.queryEmbedding!; let topSimilarities: number[], topIds: string[]; if (LEARNER_MODES.has(query.mode)) { - [topSimilarities, topIds] = getTopKEmbeddingsLearner(queryEmbedding, embeddings, query.similarityTopK, nodeIds); + [topSimilarities, topIds] = getTopKEmbeddingsLearner( + queryEmbedding, + embeddings, + query.similarityTopK, + nodeIds + ); } else if (query.mode === MMR_MODE) { let mmrThreshold = query.mmrThreshold; - [topSimilarities, topIds] = getTopKMMREmbeddings(queryEmbedding, embeddings, query.similarityTopK, nodeIds, mmrThreshold); + [topSimilarities, topIds] = getTopKMMREmbeddings( + queryEmbedding, + embeddings, + query.similarityTopK, + nodeIds, + mmrThreshold + ); } else if (query.mode === VectorStoreQueryMode.DEFAULT) { - [topSimilarities, topIds] = getTopKEmbeddings(queryEmbedding, embeddings, query.similarityTopK, nodeIds); + [topSimilarities, topIds] = getTopKEmbeddings( + queryEmbedding, + embeddings, + query.similarityTopK, + nodeIds + ); } else { throw new Error(`Invalid query mode: ${query.mode}`); } return { similarities: topSimilarities, - ids: topIds - } + ids: topIds, + }; } - async persist(persistPath: string = `${DEFAULT_PERSIST_DIR}/vector_store.json`, fs?: GenericFileSystem): Promise<void> { + async persist( + persistPath: string = `${DEFAULT_PERSIST_DIR}/vector_store.json`, + fs?: GenericFileSystem + ): Promise<void> { fs = fs || this.fs; if (!(await fs.exists(persistPath))) { await fs.mkdir(persistPath); @@ -104,14 +140,19 @@ export class SimpleVectorStore implements VectorStore { await fs.writeFile(persistPath, JSON.stringify(this.data)); } - static async fromPersistPath(persistPath: string, fs?: GenericFileSystem): Promise<SimpleVectorStore> { + static async fromPersistPath( + persistPath: string, + fs?: GenericFileSystem + ): Promise<SimpleVectorStore> { fs = fs || DEFAULT_FS; if (!(await fs.exists(persistPath))) { - throw new Error(`No existing SimpleVectorStore found at ${persistPath}, skipping load.`); + throw new Error( + `No existing SimpleVectorStore found at ${persistPath}, skipping load.` + ); } console.debug(`Loading SimpleVectorStore from ${persistPath}.`); - let dataDict = JSON.parse(await fs.readFile(persistPath, 'utf-8')); + let dataDict = JSON.parse(await fs.readFile(persistPath, "utf-8")); let data = new SimpleVectorStoreData(); data.embeddingDict = dataDict.embeddingDict; data.textIdToRefDocId = dataDict.textIdToRefDocId; @@ -128,7 +169,7 @@ export class SimpleVectorStore implements VectorStore { toDict(): SimpleVectorStoreData { return { embeddingDict: this.data.embeddingDict, - textIdToRefDocId: this.data.textIdToRefDocId + textIdToRefDocId: this.data.textIdToRefDocId, }; } } diff --git a/packages/core/src/storage/vectorStore/types.ts b/packages/core/src/storage/vectorStore/types.ts index 033301802..74dada213 100644 --- a/packages/core/src/storage/vectorStore/types.ts +++ b/packages/core/src/storage/vectorStore/types.ts @@ -2,74 +2,74 @@ import { Node } from "../../Node"; import { GenericFileSystem } from "../FileSystem"; export interface NodeWithEmbedding { - node: Node; - embedding: number[]; + node: Node; + embedding: number[]; - id(): string; - refDocId(): string; + id(): string; + refDocId(): string; } export interface VectorStoreQueryResult { - nodes?: Node[]; - similarities?: number[]; - ids?: string[]; + nodes?: Node[]; + similarities?: number[]; + ids?: string[]; } export enum VectorStoreQueryMode { - DEFAULT = "default", - SPARSE = "sparse", - HYBRID = "hybrid", - // fit learners - SVM = "svm", - LOGISTIC_REGRESSION = "logistic_regression", - LINEAR_REGRESSION = "linear_regression", - // maximum marginal relevance - MMR = "mmr" + DEFAULT = "default", + SPARSE = "sparse", + HYBRID = "hybrid", + // fit learners + SVM = "svm", + LOGISTIC_REGRESSION = "logistic_regression", + LINEAR_REGRESSION = "linear_regression", + // maximum marginal relevance + MMR = "mmr", } export interface ExactMatchFilter { - key: string; - value: string | number; + key: string; + value: string | number; } export interface MetadataFilters { - filters: ExactMatchFilter[]; + filters: ExactMatchFilter[]; } export interface VectorStoreQuerySpec { - query: string; - filters: ExactMatchFilter[]; - topK?: number; + query: string; + filters: ExactMatchFilter[]; + topK?: number; } export interface MetadataInfo { - name: string; - type: string; - description: string; + name: string; + type: string; + description: string; } export interface VectorStoreInfo { - metadataInfo: MetadataInfo[]; - contentInfo: string; + metadataInfo: MetadataInfo[]; + contentInfo: string; } export interface VectorStoreQuery { - queryEmbedding?: number[]; - similarityTopK: number; - docIds?: string[]; - queryStr?: string; - mode: VectorStoreQueryMode; - alpha?: number; - filters?: MetadataFilters; - mmrThreshold?: number; + queryEmbedding?: number[]; + similarityTopK: number; + docIds?: string[]; + queryStr?: string; + mode: VectorStoreQueryMode; + alpha?: number; + filters?: MetadataFilters; + mmrThreshold?: number; } export interface VectorStore { - storesText: boolean; - isEmbeddingQuery?: boolean; - client(): any; - add(embeddingResults: NodeWithEmbedding[]): string[]; - delete(refDocId: string, deleteKwargs?: any): void; - query(query: VectorStoreQuery, kwargs?: any): VectorStoreQueryResult; - persist(persistPath: string, fs?: GenericFileSystem): void; + storesText: boolean; + isEmbeddingQuery?: boolean; + client(): any; + add(embeddingResults: NodeWithEmbedding[]): string[]; + delete(refDocId: string, deleteKwargs?: any): void; + query(query: VectorStoreQuery, kwargs?: any): VectorStoreQueryResult; + persist(persistPath: string, fs?: GenericFileSystem): void; } diff --git a/packages/core/src/tests/InMemoryFileSystem.test.ts b/packages/core/src/tests/InMemoryFileSystem.test.ts index ac09d8164..f8e59434d 100644 --- a/packages/core/src/tests/InMemoryFileSystem.test.ts +++ b/packages/core/src/tests/InMemoryFileSystem.test.ts @@ -1,38 +1,42 @@ -import { GenericFileSystem, getNodeFS, InMemoryFileSystem } from "../storage/FileSystem"; -import os from 'os'; -import path from 'path'; +import { + GenericFileSystem, + getNodeFS, + InMemoryFileSystem, +} from "../storage/FileSystem"; +import os from "os"; +import path from "path"; type FileSystemUnderTest = { - name: string, - prepare: () => Promise<any>, - cleanup: () => Promise<any>, - implementation: GenericFileSystem, - tempDir: string + name: string; + prepare: () => Promise<any>; + cleanup: () => Promise<any>; + implementation: GenericFileSystem; + tempDir: string; }; const nodeFS = getNodeFS() as GenericFileSystem & any; describe.each<FileSystemUnderTest>([ { - name: 'InMemoryFileSystem', + name: "InMemoryFileSystem", prepare: async () => {}, - cleanup: async function() { + cleanup: async function () { this.implementation = new InMemoryFileSystem(); }, implementation: new InMemoryFileSystem(), - tempDir: './' + tempDir: "./", }, { - name: 'Node.js fs', - prepare: async function() { - this.tempDir = await nodeFS.mkdtemp(path.join(os.tmpdir(), 'jest-')); + name: "Node.js fs", + prepare: async function () { + this.tempDir = await nodeFS.mkdtemp(path.join(os.tmpdir(), "jest-")); }, - cleanup: async function() { + cleanup: async function () { await nodeFS.rm(this.tempDir, { recursive: true }); }, implementation: nodeFS, - tempDir: './' - } + tempDir: "./", + }, ])("Test %s", (testParams) => { let testFS: GenericFileSystem; let tempDir: string; @@ -54,19 +58,25 @@ describe.each<FileSystemUnderTest>([ describe("writeFile", () => { it("writes file to memory", async () => { await testFS.writeFile(`${tempDir}/test.txt`, "Hello, world!"); - expect(await testFS.readFile(`${tempDir}/test.txt`, "utf-8")).toBe("Hello, world!"); + expect(await testFS.readFile(`${tempDir}/test.txt`, "utf-8")).toBe( + "Hello, world!" + ); }); it("overwrites existing file", async () => { await testFS.writeFile(`${tempDir}/test.txt`, "Hello, world!"); await testFS.writeFile(`${tempDir}/test.txt`, "Hello, again!"); - expect(await testFS.readFile(`${tempDir}/test.txt`, "utf-8")).toBe("Hello, again!"); + expect(await testFS.readFile(`${tempDir}/test.txt`, "utf-8")).toBe( + "Hello, again!" + ); }); }); describe("readFile", () => { it("throws error for non-existing file", async () => { - await expect(testFS.readFile(`${tempDir}/not_exist.txt`, "utf-8")).rejects.toThrow(); + await expect( + testFS.readFile(`${tempDir}/not_exist.txt`, "utf-8") + ).rejects.toThrow(); }); }); diff --git a/packages/core/src/tests/TextSplitter.test.ts b/packages/core/src/tests/TextSplitter.test.ts index 1cfd7abf6..3f577b543 100644 --- a/packages/core/src/tests/TextSplitter.test.ts +++ b/packages/core/src/tests/TextSplitter.test.ts @@ -8,36 +8,67 @@ describe("SentenceSplitter", () => { test("splits paragraphs w/o effective chunk size", () => { const sentenceSplitter = new SentenceSplitter( - undefined, undefined, undefined, undefined, "\n" + undefined, + undefined, + undefined, + undefined, + "\n" ); // generate the same line as above but correct syntax errors - let splits = sentenceSplitter.getParagraphSplits("This is a paragraph.\nThis is another paragraph.", undefined); - expect(splits).toEqual(["This is a paragraph.", "This is another paragraph."]); + let splits = sentenceSplitter.getParagraphSplits( + "This is a paragraph.\nThis is another paragraph.", + undefined + ); + expect(splits).toEqual([ + "This is a paragraph.", + "This is another paragraph.", + ]); }); test("splits paragraphs with effective chunk size", () => { const sentenceSplitter = new SentenceSplitter( - undefined, undefined, undefined, undefined, "\n" + undefined, + undefined, + undefined, + undefined, + "\n" ); // generate the same line as above but correct syntax errors - let splits = sentenceSplitter.getParagraphSplits("This is a paragraph.\nThis is another paragraph.", 1000); - expect(splits).toEqual(["This is a paragraph.\nThis is another paragraph."]); + let splits = sentenceSplitter.getParagraphSplits( + "This is a paragraph.\nThis is another paragraph.", + 1000 + ); + expect(splits).toEqual([ + "This is a paragraph.\nThis is another paragraph.", + ]); }); - + test("splits sentences", () => { const sentenceSplitter = new SentenceSplitter(); - let splits = sentenceSplitter.getSentenceSplits("This is a sentence. This is another sentence.", undefined); - expect(splits).toEqual(["This is a sentence.", "This is another sentence."]); + let splits = sentenceSplitter.getSentenceSplits( + "This is a sentence. This is another sentence.", + undefined + ); + expect(splits).toEqual([ + "This is a sentence.", + "This is another sentence.", + ]); }); test("overall split text", () => { let sentenceSplitter = new SentenceSplitter(5, 0); - let splits = sentenceSplitter.splitText("This is a sentence. This is another sentence."); - expect(splits).toEqual(["This is a sentence.", "This is another sentence."]); + let splits = sentenceSplitter.splitText( + "This is a sentence. This is another sentence." + ); + expect(splits).toEqual([ + "This is a sentence.", + "This is another sentence.", + ]); sentenceSplitter = new SentenceSplitter(1000); - splits = sentenceSplitter.splitText("This is a sentence. This is another sentence."); + splits = sentenceSplitter.splitText( + "This is a sentence. This is another sentence." + ); expect(splits).toEqual(["This is a sentence. This is another sentence."]); }); - }); -- GitLab