From 95a5cc6ee134a0f1971ab2e51133d1baa9cd68be Mon Sep 17 00:00:00 2001 From: Alex Yang <himself65@outlook.com> Date: Fri, 8 Nov 2024 14:29:08 -0800 Subject: [PATCH] refactor: move storage into core (#1451) --- .changeset/mighty-clocks-share.md | 6 + packages/core/package.json | 42 +++++ .../core/src/data-structs/data-structs.ts | 46 +++++ packages/core/src/data-structs/index.ts | 8 +- .../src/data-structs/json-to-index-struct.ts | 26 +++ packages/core/src/query-engine/retriever.ts | 2 +- packages/core/src/schema/index.ts | 7 +- packages/core/src/schema/type.ts | 4 + packages/core/src/storage/doc-store/index.ts | 167 ++++++++++++++++++ .../core/src/storage/index-store/index.ts | 115 ++++++++++++ .../src/storage/kv-store/index.ts} | 39 +++- packages/core/storage/doc-store/package.json | 8 + .../core/storage/index-store/package.json | 8 + packages/core/storage/kv-store/package.json | 8 + packages/llamaindex/src/index.edge.ts | 5 + packages/llamaindex/src/indices/BaseIndex.ts | 4 +- .../llamaindex/src/indices/IndexStruct.ts | 28 --- packages/llamaindex/src/indices/index.ts | 3 +- .../src/indices/json-to-index-struct.ts | 80 --------- .../llamaindex/src/indices/keyword/index.ts | 5 +- .../llamaindex/src/indices/summary/index.ts | 10 +- .../src/indices/vectorStore/index.ts | 4 +- .../src/ingestion/IngestionCache.ts | 10 +- .../src/ingestion/IngestionPipeline.ts | 2 +- .../strategies/DuplicatesStrategy.ts | 2 +- .../strategies/UpsertsAndDeleteStrategy.ts | 2 +- .../ingestion/strategies/UpsertsStrategy.ts | 2 +- .../src/ingestion/strategies/classify.ts | 2 +- .../src/ingestion/strategies/index.ts | 2 +- .../llamaindex/src/storage/StorageContext.ts | 8 +- .../src/storage/docStore/KVDocumentStore.ts | 12 +- .../storage/docStore/PostgresDocumentStore.ts | 2 +- .../storage/docStore/SimpleDocumentStore.ts | 6 +- .../llamaindex/src/storage/docStore/types.ts | 77 -------- .../llamaindex/src/storage/docStore/utils.ts | 88 --------- packages/llamaindex/src/storage/index.ts | 8 +- .../src/storage/indexStore/KVIndexStore.ts | 10 +- .../storage/indexStore/SimpleIndexStore.ts | 50 ------ .../src/storage/indexStore/types.ts | 25 --- .../kvStore/AzureCosmosNoSqlKVStore.ts | 2 +- .../src/storage/kvStore/PostgresKVStore.ts | 3 +- .../llamaindex/src/storage/kvStore/types.ts | 23 --- 42 files changed, 537 insertions(+), 424 deletions(-) create mode 100644 .changeset/mighty-clocks-share.md create mode 100644 packages/core/src/data-structs/json-to-index-struct.ts create mode 100644 packages/core/src/storage/doc-store/index.ts create mode 100644 packages/core/src/storage/index-store/index.ts rename packages/{llamaindex/src/storage/kvStore/SimpleKVStore.ts => core/src/storage/kv-store/index.ts} (71%) create mode 100644 packages/core/storage/doc-store/package.json create mode 100644 packages/core/storage/index-store/package.json create mode 100644 packages/core/storage/kv-store/package.json delete mode 100644 packages/llamaindex/src/indices/IndexStruct.ts delete mode 100644 packages/llamaindex/src/indices/json-to-index-struct.ts delete mode 100644 packages/llamaindex/src/storage/docStore/types.ts delete mode 100644 packages/llamaindex/src/storage/docStore/utils.ts delete mode 100644 packages/llamaindex/src/storage/indexStore/SimpleIndexStore.ts delete mode 100644 packages/llamaindex/src/storage/indexStore/types.ts delete mode 100644 packages/llamaindex/src/storage/kvStore/types.ts diff --git a/.changeset/mighty-clocks-share.md b/.changeset/mighty-clocks-share.md new file mode 100644 index 000000000..00465956c --- /dev/null +++ b/.changeset/mighty-clocks-share.md @@ -0,0 +1,6 @@ +--- +"@llamaindex/core": patch +"llamaindex": patch +--- + +refactor: move storage into core diff --git a/packages/core/package.json b/packages/core/package.json index fb294db32..6edd64c15 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -214,6 +214,48 @@ "default": "./storage/chat-store/dist/index.js" } }, + "./storage/doc-store": { + "require": { + "types": "./storage/doc-store/dist/index.d.cts", + "default": "./storage/doc-store/dist/index.cjs" + }, + "import": { + "types": "./storage/doc-store/dist/index.d.ts", + "default": "./storage/doc-store/dist/index.js" + }, + "default": { + "types": "./storage/doc-store/dist/index.d.ts", + "default": "./storage/doc-store/dist/index.js" + } + }, + "./storage/index-store": { + "require": { + "types": "./storage/index-store/dist/index.d.cts", + "default": "./storage/index-store/dist/index.cjs" + }, + "import": { + "types": "./storage/index-store/dist/index.d.ts", + "default": "./storage/index-store/dist/index.js" + }, + "default": { + "types": "./storage/index-store/dist/index.d.ts", + "default": "./storage/index-store/dist/index.js" + } + }, + "./storage/kv-store": { + "require": { + "types": "./storage/kv-store/dist/index.d.cts", + "default": "./storage/kv-store/dist/index.cjs" + }, + "import": { + "types": "./storage/kv-store/dist/index.d.ts", + "default": "./storage/kv-store/dist/index.js" + }, + "default": { + "types": "./storage/kv-store/dist/index.d.ts", + "default": "./storage/kv-store/dist/index.js" + } + }, "./response-synthesizers": { "require": { "types": "./response-synthesizers/dist/index.d.cts", diff --git a/packages/core/src/data-structs/data-structs.ts b/packages/core/src/data-structs/data-structs.ts index 354341b47..a7b14e061 100644 --- a/packages/core/src/data-structs/data-structs.ts +++ b/packages/core/src/data-structs/data-structs.ts @@ -1,5 +1,6 @@ import { randomUUID } from "@llamaindex/env"; import type { UUID } from "../global"; +import { BaseNode } from "../schema"; import { IndexStructType } from "./struct-type"; export abstract class IndexStruct { @@ -65,3 +66,48 @@ export class KeywordTable extends IndexStruct { }; } } + +export class IndexDict extends IndexStruct { + nodesDict: Record<string, BaseNode> = {}; + type: IndexStructType = IndexStructType.SIMPLE_DICT; + + addNode(node: BaseNode, textId?: string) { + const vectorId = textId ?? node.id_; + this.nodesDict[vectorId] = node; + } + + toJson(): Record<string, unknown> { + const nodesDict: Record<string, unknown> = {}; + + for (const [key, node] of Object.entries(this.nodesDict)) { + nodesDict[key] = node.toJSON(); + } + + return { + ...super.toJson(), + nodesDict, + type: this.type, + }; + } + + delete(nodeId: string) { + delete this.nodesDict[nodeId]; + } +} + +export class IndexList extends IndexStruct { + nodes: string[] = []; + type: IndexStructType = IndexStructType.LIST; + + addNode(node: BaseNode) { + this.nodes.push(node.id_); + } + + toJson(): Record<string, unknown> { + return { + ...super.toJson(), + nodes: this.nodes, + type: this.type, + }; + } +} diff --git a/packages/core/src/data-structs/index.ts b/packages/core/src/data-structs/index.ts index e8dc315bb..9ff5e0670 100644 --- a/packages/core/src/data-structs/index.ts +++ b/packages/core/src/data-structs/index.ts @@ -1,2 +1,8 @@ -export { IndexStruct, KeywordTable } from "./data-structs"; +export { + IndexDict, + IndexList, + IndexStruct, + KeywordTable, +} from "./data-structs"; +export { jsonToIndexStruct } from "./json-to-index-struct"; export { IndexStructType } from "./struct-type"; diff --git a/packages/core/src/data-structs/json-to-index-struct.ts b/packages/core/src/data-structs/json-to-index-struct.ts new file mode 100644 index 000000000..bdacfabad --- /dev/null +++ b/packages/core/src/data-structs/json-to-index-struct.ts @@ -0,0 +1,26 @@ +import type { BaseNode } from "../schema"; +import { jsonToNode } from "../schema"; +import { IndexDict, IndexList, IndexStruct } from "./data-structs"; +import { IndexStructType } from "./struct-type"; + +export function jsonToIndexStruct( + // eslint-disable-next-line @typescript-eslint/no-explicit-any + json: any, +): IndexStruct { + if (json.type === IndexStructType.LIST) { + const indexList = new IndexList(json.indexId, json.summary); + indexList.nodes = json.nodes; + return indexList; + } else if (json.type === IndexStructType.SIMPLE_DICT) { + const indexDict = new IndexDict(json.indexId, json.summary); + indexDict.nodesDict = Object.entries(json.nodesDict).reduce< + Record<string, BaseNode> + >((acc, [key, value]) => { + acc[key] = jsonToNode(value); + return acc; + }, {}); + return indexDict; + } else { + throw new Error(`Unknown index struct type: ${json.type}`); + } +} diff --git a/packages/core/src/query-engine/retriever.ts b/packages/core/src/query-engine/retriever.ts index 8ca23a2fd..34a7edeaf 100644 --- a/packages/core/src/query-engine/retriever.ts +++ b/packages/core/src/query-engine/retriever.ts @@ -1,6 +1,5 @@ import type { MessageContent } from "../llms"; import type { BaseNodePostprocessor } from "../postprocessor"; -import { BaseQueryEngine, type QueryType } from "../query-engine"; import { type BaseSynthesizer, getResponseSynthesizer, @@ -8,6 +7,7 @@ import { import { BaseRetriever } from "../retriever"; import type { NodeWithScore } from "../schema"; import { extractText } from "../utils"; +import { BaseQueryEngine, type QueryType } from "./base"; export class RetrieverQueryEngine extends BaseQueryEngine { retriever: BaseRetriever; diff --git a/packages/core/src/schema/index.ts b/packages/core/src/schema/index.ts index 76d8e824d..5bbaeca3b 100644 --- a/packages/core/src/schema/index.ts +++ b/packages/core/src/schema/index.ts @@ -1,5 +1,10 @@ export * from "./node"; -export { FileReader, TransformComponent, type BaseReader } from "./type"; +export { + FileReader, + TransformComponent, + type BaseReader, + type StoredValue, +} from "./type"; export type { BaseOutputParser } from "./type/base-output-parser"; export { EngineResponse } from "./type/engine–response"; export * from "./zod"; diff --git a/packages/core/src/schema/type.ts b/packages/core/src/schema/type.ts index b5386ec3f..012f43f52 100644 --- a/packages/core/src/schema/type.ts +++ b/packages/core/src/schema/type.ts @@ -1,6 +1,10 @@ import { fs, path, randomUUID } from "@llamaindex/env"; import type { BaseNode, Document } from "./node"; +// fixme: remove any +// eslint-disable-next-line @typescript-eslint/no-explicit-any +export type StoredValue = Record<string, any> | null; + interface TransformComponentSignature< Result extends BaseNode[] | Promise<BaseNode[]>, > { diff --git a/packages/core/src/storage/doc-store/index.ts b/packages/core/src/storage/doc-store/index.ts new file mode 100644 index 000000000..ffb6725de --- /dev/null +++ b/packages/core/src/storage/doc-store/index.ts @@ -0,0 +1,167 @@ +import { path } from "@llamaindex/env"; +import { + DEFAULT_DOC_STORE_PERSIST_FILENAME, + DEFAULT_PERSIST_DIR, +} from "../../global"; +import type { StoredValue } from "../../schema"; +import { BaseNode, Document, ObjectType, TextNode } from "../../schema"; + +const TYPE_KEY = "__type__"; +const DATA_KEY = "__data__"; + +export interface Serializer<T> { + toPersistence(data: Record<string, unknown>): T; + + fromPersistence(data: T): Record<string, unknown>; +} + +export const jsonSerializer: Serializer<string> = { + toPersistence(data) { + return JSON.stringify(data); + }, + fromPersistence(data) { + return JSON.parse(data); + }, +}; + +export const noneSerializer: Serializer<Record<string, unknown>> = { + toPersistence(data) { + return data; + }, + fromPersistence(data) { + return data; + }, +}; + +type DocJson<Data> = { + [TYPE_KEY]: ObjectType; + [DATA_KEY]: Data; +}; + +export function isValidDocJson( + docJson: StoredValue | null | undefined, +): docJson is DocJson<unknown> { + return ( + typeof docJson === "object" && + docJson !== null && + docJson[TYPE_KEY] !== undefined && + docJson[DATA_KEY] !== undefined + ); +} + +export function docToJson( + doc: BaseNode, + serializer: Serializer<unknown>, +): DocJson<unknown> { + return { + [DATA_KEY]: serializer.toPersistence(doc.toJSON()), + [TYPE_KEY]: doc.type, + }; +} + +export function jsonToDoc<Data>( + docDict: DocJson<Data>, + serializer: Serializer<Data>, +): BaseNode { + const docType = docDict[TYPE_KEY]; + // fixme: zod type check this + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const dataDict: any = serializer.fromPersistence(docDict[DATA_KEY]); + let doc: BaseNode; + + if (docType === ObjectType.DOCUMENT) { + doc = new Document({ + text: dataDict.text, + id_: dataDict.id_, + embedding: dataDict.embedding, + hash: dataDict.hash, + metadata: dataDict.metadata, + }); + } else if (docType === ObjectType.TEXT) { + doc = new TextNode({ + text: dataDict.text, + id_: dataDict.id_, + hash: dataDict.hash, + metadata: dataDict.metadata, + relationships: dataDict.relationships, + }); + } else { + throw new Error(`Unknown doc type: ${docType}`); + } + + return doc; +} + +const DEFAULT_PERSIST_PATH = path.join( + DEFAULT_PERSIST_DIR, + DEFAULT_DOC_STORE_PERSIST_FILENAME, +); + +export interface RefDocInfo { + nodeIds: string[]; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + extraInfo: Record<string, any>; +} + +export abstract class BaseDocumentStore { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + serializer: Serializer<any> = jsonSerializer; + + // Save/load + persist(persistPath: string = DEFAULT_PERSIST_PATH): void { + // Persist the docstore to a file. + } + + // Main interface + abstract docs(): Promise<Record<string, BaseNode>>; + + abstract addDocuments(docs: BaseNode[], allowUpdate: boolean): Promise<void>; + + abstract getDocument( + docId: string, + raiseError: boolean, + ): Promise<BaseNode | undefined>; + + abstract deleteDocument(docId: string, raiseError: boolean): Promise<void>; + + abstract documentExists(docId: string): Promise<boolean>; + + // Hash + abstract setDocumentHash(docId: string, docHash: string): Promise<void>; + + abstract getDocumentHash(docId: string): Promise<string | undefined>; + + abstract getAllDocumentHashes(): Promise<Record<string, string>>; + + // Ref Docs + abstract getAllRefDocInfo(): Promise<Record<string, RefDocInfo> | undefined>; + + abstract getRefDocInfo(refDocId: string): Promise<RefDocInfo | undefined>; + + abstract deleteRefDoc(refDocId: string, raiseError: boolean): Promise<void>; + + // Nodes + getNodes(nodeIds: string[], raiseError: boolean = true): Promise<BaseNode[]> { + return Promise.all( + nodeIds.map((nodeId) => this.getNode(nodeId, raiseError)), + ); + } + + async getNode(nodeId: string, raiseError: boolean = true): Promise<BaseNode> { + const doc = await this.getDocument(nodeId, raiseError); + if (!(doc instanceof BaseNode)) { + throw new Error(`Document ${nodeId} is not a Node.`); + } + return doc; + } + + async getNodeDict(nodeIdDict: { + [index: number]: string; + }): Promise<Record<number, BaseNode>> { + const result: Record<number, BaseNode> = {}; + for (const index in nodeIdDict) { + result[index] = await this.getNode(nodeIdDict[index]!); + } + return result; + } +} diff --git a/packages/core/src/storage/index-store/index.ts b/packages/core/src/storage/index-store/index.ts new file mode 100644 index 000000000..40728d9dd --- /dev/null +++ b/packages/core/src/storage/index-store/index.ts @@ -0,0 +1,115 @@ +import { path } from "@llamaindex/env"; +import { IndexStruct, jsonToIndexStruct } from "../../data-structs"; +import { + DEFAULT_INDEX_STORE_PERSIST_FILENAME, + DEFAULT_NAMESPACE, + DEFAULT_PERSIST_DIR, +} from "../../global"; +import { + BaseInMemoryKVStore, + BaseKVStore, + type DataType, + SimpleKVStore, +} from "../kv-store"; + +export const DEFAULT_PERSIST_PATH = path.join( + DEFAULT_PERSIST_DIR, + DEFAULT_INDEX_STORE_PERSIST_FILENAME, +); + +export abstract class BaseIndexStore { + abstract getIndexStructs(): Promise<IndexStruct[]>; + + abstract addIndexStruct(indexStruct: IndexStruct): Promise<void>; + + abstract deleteIndexStruct(key: string): Promise<void>; + + abstract getIndexStruct(structId?: string): Promise<IndexStruct | undefined>; + + async persist(persistPath: string = DEFAULT_PERSIST_PATH): Promise<void> { + // Persist the index store to disk. + } +} + +export class KVIndexStore extends BaseIndexStore { + private _kvStore: BaseKVStore; + private _collection: string; + + constructor(kvStore: BaseKVStore, namespace: string = DEFAULT_NAMESPACE) { + super(); + this._kvStore = kvStore; + this._collection = `${namespace}/data`; + } + + async addIndexStruct(indexStruct: IndexStruct): Promise<void> { + const key = indexStruct.indexId; + const data = indexStruct.toJson(); + await this._kvStore.put(key, data, this._collection); + } + + async deleteIndexStruct(key: string): Promise<void> { + await this._kvStore.delete(key, this._collection); + } + + async getIndexStruct(structId?: string): Promise<IndexStruct | undefined> { + if (!structId) { + const structs = await this.getIndexStructs(); + if (structs.length !== 1) { + throw new Error("More than one index struct found"); + } + return structs[0]; + } else { + const json = await this._kvStore.get(structId, this._collection); + if (json == null) { + return; + } + return jsonToIndexStruct(json); + } + } + + async getIndexStructs(): Promise<IndexStruct[]> { + const jsons = await this._kvStore.getAll(this._collection); + return Object.values(jsons).map((json) => jsonToIndexStruct(json)); + } +} + +export class SimpleIndexStore extends KVIndexStore { + private kvStore: BaseInMemoryKVStore; + + constructor(kvStore?: BaseInMemoryKVStore) { + kvStore = kvStore || new SimpleKVStore(); + super(kvStore); + this.kvStore = kvStore; + } + + static async fromPersistDir( + persistDir: string = DEFAULT_PERSIST_DIR, + ): Promise<SimpleIndexStore> { + const persistPath = path.join( + persistDir, + DEFAULT_INDEX_STORE_PERSIST_FILENAME, + ); + return this.fromPersistPath(persistPath); + } + + static async fromPersistPath(persistPath: string): Promise<SimpleIndexStore> { + const simpleKVStore = await SimpleKVStore.fromPersistPath(persistPath); + return new SimpleIndexStore(simpleKVStore); + } + + async persist(persistPath: string = DEFAULT_PERSIST_DIR): Promise<void> { + this.kvStore.persist(persistPath); + } + + static fromDict(saveDict: DataType): SimpleIndexStore { + const simpleKVStore = SimpleKVStore.fromDict(saveDict); + return new SimpleIndexStore(simpleKVStore); + } + + toDict(): Record<string, unknown> { + if (!(this.kvStore instanceof SimpleKVStore)) { + throw new Error("KVStore is not a SimpleKVStore"); + } + return this.kvStore.toDict(); + } +} diff --git a/packages/llamaindex/src/storage/kvStore/SimpleKVStore.ts b/packages/core/src/storage/kv-store/index.ts similarity index 71% rename from packages/llamaindex/src/storage/kvStore/SimpleKVStore.ts rename to packages/core/src/storage/kv-store/index.ts index 7d08692b4..3d1924967 100644 --- a/packages/llamaindex/src/storage/kvStore/SimpleKVStore.ts +++ b/packages/core/src/storage/kv-store/index.ts @@ -1,7 +1,34 @@ -import { DEFAULT_COLLECTION } from "@llamaindex/core/global"; import { fs, path } from "@llamaindex/env"; -import { exists } from "../FileSystem.js"; -import { BaseKVStore, type StoredValue } from "./types.js"; + +import { DEFAULT_COLLECTION } from "../../global"; +import type { StoredValue } from "../../schema"; + +async function exists(path: string): Promise<boolean> { + try { + await fs.access(path); + return true; + } catch { + return false; + } +} + +export abstract class BaseKVStore { + abstract put( + key: string, + val: StoredValue, + collection?: string, + ): Promise<void>; + abstract get(key: string, collection?: string): Promise<StoredValue>; + abstract getAll(collection?: string): Promise<Record<string, StoredValue>>; + abstract delete(key: string, collection?: string): Promise<boolean>; +} + +export abstract class BaseInMemoryKVStore extends BaseKVStore { + abstract persist(persistPath: string): void; + static fromPersistPath(persistPath: string): BaseInMemoryKVStore { + throw new Error("Method not implemented."); + } +} export type DataType = Record<string, Record<string, StoredValue>>; @@ -42,8 +69,10 @@ export class SimpleKVStore extends BaseKVStore { } async getAll(collection: string = DEFAULT_COLLECTION) { - // fixme: null value here - return structuredClone(this.data[collection])!; // Creating a shallow copy of the object + if (this.data[collection]) { + return structuredClone(this.data[collection]); + } + return {}; } async delete( diff --git a/packages/core/storage/doc-store/package.json b/packages/core/storage/doc-store/package.json new file mode 100644 index 000000000..2fdf125e2 --- /dev/null +++ b/packages/core/storage/doc-store/package.json @@ -0,0 +1,8 @@ +{ + "type": "module", + "main": "./dist/index.cjs", + "module": "./dist/index.js", + "types": "./dist/index.d.ts", + "exports": "./dist/index.js", + "private": true +} diff --git a/packages/core/storage/index-store/package.json b/packages/core/storage/index-store/package.json new file mode 100644 index 000000000..2fdf125e2 --- /dev/null +++ b/packages/core/storage/index-store/package.json @@ -0,0 +1,8 @@ +{ + "type": "module", + "main": "./dist/index.cjs", + "module": "./dist/index.js", + "types": "./dist/index.d.ts", + "exports": "./dist/index.js", + "private": true +} diff --git a/packages/core/storage/kv-store/package.json b/packages/core/storage/kv-store/package.json new file mode 100644 index 000000000..2fdf125e2 --- /dev/null +++ b/packages/core/storage/kv-store/package.json @@ -0,0 +1,8 @@ +{ + "type": "module", + "main": "./dist/index.cjs", + "module": "./dist/index.js", + "types": "./dist/index.d.ts", + "exports": "./dist/index.js", + "private": true +} diff --git a/packages/llamaindex/src/index.edge.ts b/packages/llamaindex/src/index.edge.ts index 888be4b75..15f3242ae 100644 --- a/packages/llamaindex/src/index.edge.ts +++ b/packages/llamaindex/src/index.edge.ts @@ -18,6 +18,7 @@ export { } from "@llamaindex/cloud/reader"; export * from "@llamaindex/core/agent"; export * from "@llamaindex/core/chat-engine"; +export * from "@llamaindex/core/data-structs"; export { CallbackManager, DEFAULT_BASE_URL, @@ -57,6 +58,10 @@ export * from "@llamaindex/core/query-engine"; export * from "@llamaindex/core/response-synthesizers"; export * from "@llamaindex/core/retriever"; export * from "@llamaindex/core/schema"; +export * from "@llamaindex/core/storage/chat-store"; +export * from "@llamaindex/core/storage/doc-store"; +export * from "@llamaindex/core/storage/index-store"; +export * from "@llamaindex/core/storage/kv-store"; export * from "./agent/index.js"; export * from "./cloud/index.js"; export * from "./embeddings/index.js"; diff --git a/packages/llamaindex/src/indices/BaseIndex.ts b/packages/llamaindex/src/indices/BaseIndex.ts index 6f591946a..e95fdf749 100644 --- a/packages/llamaindex/src/indices/BaseIndex.ts +++ b/packages/llamaindex/src/indices/BaseIndex.ts @@ -2,12 +2,12 @@ import type { BaseQueryEngine } from "@llamaindex/core/query-engine"; import type { BaseSynthesizer } from "@llamaindex/core/response-synthesizers"; import type { BaseRetriever } from "@llamaindex/core/retriever"; import type { BaseNode, Document } from "@llamaindex/core/schema"; +import type { BaseDocumentStore } from "@llamaindex/core/storage/doc-store"; +import type { BaseIndexStore } from "@llamaindex/core/storage/index-store"; import type { ServiceContext } from "../ServiceContext.js"; import { nodeParserFromSettingsOrContext } from "../Settings.js"; import { runTransformations } from "../ingestion/IngestionPipeline.js"; import type { StorageContext } from "../storage/StorageContext.js"; -import type { BaseDocumentStore } from "../storage/docStore/types.js"; -import type { BaseIndexStore } from "../storage/indexStore/types.js"; export interface BaseIndexInit<T> { serviceContext?: ServiceContext | undefined; diff --git a/packages/llamaindex/src/indices/IndexStruct.ts b/packages/llamaindex/src/indices/IndexStruct.ts deleted file mode 100644 index fd15a09e5..000000000 --- a/packages/llamaindex/src/indices/IndexStruct.ts +++ /dev/null @@ -1,28 +0,0 @@ -import { randomUUID } from "@llamaindex/env"; - -/** - * The underlying structure of each index. - */ -export abstract class IndexStruct { - indexId: string; - summary?: string | undefined; - - constructor(indexId = randomUUID(), summary: string | undefined = undefined) { - this.indexId = indexId; - this.summary = summary; - } - - toJson(): Record<string, unknown> { - return { - indexId: this.indexId, - summary: this.summary, - }; - } - - getSummary(): string { - if (this.summary === undefined) { - throw new Error("summary field of the index dict is not set"); - } - return this.summary; - } -} diff --git a/packages/llamaindex/src/indices/index.ts b/packages/llamaindex/src/indices/index.ts index 196287e2a..a6d82e6cb 100644 --- a/packages/llamaindex/src/indices/index.ts +++ b/packages/llamaindex/src/indices/index.ts @@ -1,6 +1,5 @@ +export * from "@llamaindex/core/indices"; export * from "./BaseIndex.js"; -export * from "./IndexStruct.js"; -export * from "./json-to-index-struct.js"; export * from "./keyword/index.js"; export * from "./summary/index.js"; export * from "./vectorStore/index.js"; diff --git a/packages/llamaindex/src/indices/json-to-index-struct.ts b/packages/llamaindex/src/indices/json-to-index-struct.ts deleted file mode 100644 index e060a6554..000000000 --- a/packages/llamaindex/src/indices/json-to-index-struct.ts +++ /dev/null @@ -1,80 +0,0 @@ -import type { BaseNode } from "@llamaindex/core/schema"; -import { jsonToNode } from "@llamaindex/core/schema"; -import { IndexStruct } from "./IndexStruct.js"; - -export enum IndexStructType { - SIMPLE_DICT = "simple_dict", - LIST = "list", - KEYWORD_TABLE = "keyword_table", -} -export class IndexDict extends IndexStruct { - nodesDict: Record<string, BaseNode> = {}; - type: IndexStructType = IndexStructType.SIMPLE_DICT; - - getSummary(): string { - if (this.summary === undefined) { - throw new Error("summary field of the index dict is not set"); - } - return this.summary; - } - - addNode(node: BaseNode, textId?: string) { - const vectorId = textId ?? node.id_; - this.nodesDict[vectorId] = node; - } - - toJson(): Record<string, unknown> { - const nodesDict: Record<string, unknown> = {}; - - for (const [key, node] of Object.entries(this.nodesDict)) { - nodesDict[key] = node.toJSON(); - } - - return { - ...super.toJson(), - nodesDict, - type: this.type, - }; - } - - delete(nodeId: string) { - delete this.nodesDict[nodeId]; - } -} - -// eslint-disable-next-line @typescript-eslint/no-explicit-any -export function jsonToIndexStruct(json: any): IndexStruct { - if (json.type === IndexStructType.LIST) { - const indexList = new IndexList(json.indexId, json.summary); - indexList.nodes = json.nodes; - return indexList; - } else if (json.type === IndexStructType.SIMPLE_DICT) { - const indexDict = new IndexDict(json.indexId, json.summary); - indexDict.nodesDict = Object.entries(json.nodesDict).reduce< - Record<string, BaseNode> - >((acc, [key, value]) => { - acc[key] = jsonToNode(value); - return acc; - }, {}); - return indexDict; - } else { - throw new Error(`Unknown index struct type: ${json.type}`); - } -} - -export class IndexList extends IndexStruct { - nodes: string[] = []; - type: IndexStructType = IndexStructType.LIST; - - addNode(node: BaseNode) { - this.nodes.push(node.id_); - } - - toJson(): Record<string, unknown> { - return { - ...super.toJson(), - nodes: this.nodes, - type: this.type, - }; - } -} diff --git a/packages/llamaindex/src/indices/keyword/index.ts b/packages/llamaindex/src/indices/keyword/index.ts index 082145292..7c17c6a1d 100644 --- a/packages/llamaindex/src/indices/keyword/index.ts +++ b/packages/llamaindex/src/indices/keyword/index.ts @@ -10,17 +10,15 @@ import { serviceContextFromDefaults } from "../../ServiceContext.js"; import { RetrieverQueryEngine } from "../../engines/query/index.js"; import type { StorageContext } from "../../storage/StorageContext.js"; import { storageContextFromDefaults } from "../../storage/StorageContext.js"; -import type { BaseDocumentStore } from "../../storage/docStore/types.js"; import type { BaseIndexInit } from "../BaseIndex.js"; import { BaseIndex } from "../BaseIndex.js"; -import { IndexStructType } from "../json-to-index-struct.js"; import { extractKeywordsGivenResponse, rakeExtractKeywords, simpleExtractKeywords, } from "./utils.js"; -import { KeywordTable } from "@llamaindex/core/data-structs"; +import { IndexStructType, KeywordTable } from "@llamaindex/core/data-structs"; import type { LLM } from "@llamaindex/core/llms"; import type { BaseNodePostprocessor } from "@llamaindex/core/postprocessor"; import { @@ -34,6 +32,7 @@ import type { QueryBundle, } from "@llamaindex/core/query-engine"; import { BaseRetriever } from "@llamaindex/core/retriever"; +import type { BaseDocumentStore } from "@llamaindex/core/storage/doc-store"; import { extractText } from "@llamaindex/core/utils"; import { llmFromSettingsOrContext } from "../../Settings.js"; diff --git a/packages/llamaindex/src/indices/summary/index.ts b/packages/llamaindex/src/indices/summary/index.ts index 6e09c7ad9..a3fa20444 100644 --- a/packages/llamaindex/src/indices/summary/index.ts +++ b/packages/llamaindex/src/indices/summary/index.ts @@ -1,3 +1,4 @@ +import { IndexList, IndexStructType } from "@llamaindex/core/data-structs"; import type { BaseNodePostprocessor } from "@llamaindex/core/postprocessor"; import { type ChoiceSelectPrompt, @@ -12,6 +13,10 @@ import type { Document, NodeWithScore, } from "@llamaindex/core/schema"; +import type { + BaseDocumentStore, + RefDocInfo, +} from "@llamaindex/core/storage/doc-store"; import { extractText } from "@llamaindex/core/utils"; import _ from "lodash"; import type { ServiceContext } from "../../ServiceContext.js"; @@ -22,13 +27,8 @@ import { import { RetrieverQueryEngine } from "../../engines/query/index.js"; import type { StorageContext } from "../../storage/StorageContext.js"; import { storageContextFromDefaults } from "../../storage/StorageContext.js"; -import type { - BaseDocumentStore, - RefDocInfo, -} from "../../storage/docStore/types.js"; import type { BaseIndexInit } from "../BaseIndex.js"; import { BaseIndex } from "../BaseIndex.js"; -import { IndexList, IndexStructType } from "../json-to-index-struct.js"; import type { ChoiceSelectParserFunction, NodeFormatterFunction, diff --git a/packages/llamaindex/src/indices/vectorStore/index.ts b/packages/llamaindex/src/indices/vectorStore/index.ts index 74909a191..994c73951 100644 --- a/packages/llamaindex/src/indices/vectorStore/index.ts +++ b/packages/llamaindex/src/indices/vectorStore/index.ts @@ -1,3 +1,4 @@ +import { IndexDict, IndexStructType } from "@llamaindex/core/data-structs"; import { DEFAULT_SIMILARITY_TOP_K, type BaseEmbedding, @@ -16,6 +17,7 @@ import { type Document, type NodeWithScore, } from "@llamaindex/core/schema"; +import type { BaseIndexStore } from "@llamaindex/core/storage/index-store"; import type { ServiceContext } from "../../ServiceContext.js"; import { nodeParserFromSettingsOrContext } from "../../Settings.js"; import { RetrieverQueryEngine } from "../../engines/query/RetrieverQueryEngine.js"; @@ -29,7 +31,6 @@ import { } from "../../ingestion/strategies/index.js"; import type { StorageContext } from "../../storage/StorageContext.js"; import { storageContextFromDefaults } from "../../storage/StorageContext.js"; -import type { BaseIndexStore } from "../../storage/indexStore/types.js"; import type { BaseVectorStore, MetadataFilters, @@ -39,7 +40,6 @@ import type { import { VectorStoreQueryMode } from "../../vector-store/types.js"; import type { BaseIndexInit } from "../BaseIndex.js"; import { BaseIndex } from "../BaseIndex.js"; -import { IndexDict, IndexStructType } from "../json-to-index-struct.js"; interface IndexStructOptions { indexStruct?: IndexDict | undefined; diff --git a/packages/llamaindex/src/ingestion/IngestionCache.ts b/packages/llamaindex/src/ingestion/IngestionCache.ts index 0ab7da476..fc2a20e02 100644 --- a/packages/llamaindex/src/ingestion/IngestionCache.ts +++ b/packages/llamaindex/src/ingestion/IngestionCache.ts @@ -1,13 +1,15 @@ import type { BaseNode, TransformComponent } from "@llamaindex/core/schema"; import { MetadataMode } from "@llamaindex/core/schema"; -import { createSHA256 } from "@llamaindex/env"; import { docToJson, jsonSerializer, jsonToDoc, -} from "../storage/docStore/utils.js"; -import { SimpleKVStore } from "../storage/kvStore/SimpleKVStore.js"; -import type { BaseKVStore } from "../storage/kvStore/types.js"; +} from "@llamaindex/core/storage/doc-store"; +import { + SimpleKVStore, + type BaseKVStore, +} from "@llamaindex/core/storage/kv-store"; +import { createSHA256 } from "@llamaindex/env"; const transformToJSON = (obj: TransformComponent) => { // eslint-disable-next-line @typescript-eslint/no-explicit-any diff --git a/packages/llamaindex/src/ingestion/IngestionPipeline.ts b/packages/llamaindex/src/ingestion/IngestionPipeline.ts index 7f1039161..bf9cb3a0b 100644 --- a/packages/llamaindex/src/ingestion/IngestionPipeline.ts +++ b/packages/llamaindex/src/ingestion/IngestionPipeline.ts @@ -6,7 +6,7 @@ import { type Document, type Metadata, } from "@llamaindex/core/schema"; -import type { BaseDocumentStore } from "../storage/docStore/types.js"; +import type { BaseDocumentStore } from "@llamaindex/core/storage/doc-store"; import type { BaseVectorStore, VectorStoreByType, diff --git a/packages/llamaindex/src/ingestion/strategies/DuplicatesStrategy.ts b/packages/llamaindex/src/ingestion/strategies/DuplicatesStrategy.ts index c06451484..dc97aa6d0 100644 --- a/packages/llamaindex/src/ingestion/strategies/DuplicatesStrategy.ts +++ b/packages/llamaindex/src/ingestion/strategies/DuplicatesStrategy.ts @@ -1,5 +1,5 @@ import { BaseNode, TransformComponent } from "@llamaindex/core/schema"; -import type { BaseDocumentStore } from "../../storage/docStore/types.js"; +import type { BaseDocumentStore } from "@llamaindex/core/storage/doc-store"; /** * Handle doc store duplicates by checking all hashes. diff --git a/packages/llamaindex/src/ingestion/strategies/UpsertsAndDeleteStrategy.ts b/packages/llamaindex/src/ingestion/strategies/UpsertsAndDeleteStrategy.ts index e1b3e373f..e28a508b6 100644 --- a/packages/llamaindex/src/ingestion/strategies/UpsertsAndDeleteStrategy.ts +++ b/packages/llamaindex/src/ingestion/strategies/UpsertsAndDeleteStrategy.ts @@ -1,5 +1,5 @@ import { BaseNode, TransformComponent } from "@llamaindex/core/schema"; -import type { BaseDocumentStore } from "../../storage/docStore/types.js"; +import type { BaseDocumentStore } from "@llamaindex/core/storage/doc-store"; import type { BaseVectorStore } from "../../vector-store/types.js"; import { classify } from "./classify.js"; diff --git a/packages/llamaindex/src/ingestion/strategies/UpsertsStrategy.ts b/packages/llamaindex/src/ingestion/strategies/UpsertsStrategy.ts index 92639ea07..3fb158e98 100644 --- a/packages/llamaindex/src/ingestion/strategies/UpsertsStrategy.ts +++ b/packages/llamaindex/src/ingestion/strategies/UpsertsStrategy.ts @@ -1,5 +1,5 @@ import { BaseNode, TransformComponent } from "@llamaindex/core/schema"; -import type { BaseDocumentStore } from "../../storage/docStore/types.js"; +import type { BaseDocumentStore } from "@llamaindex/core/storage/doc-store"; import type { BaseVectorStore } from "../../vector-store/types.js"; import { classify } from "./classify.js"; diff --git a/packages/llamaindex/src/ingestion/strategies/classify.ts b/packages/llamaindex/src/ingestion/strategies/classify.ts index 2d48a7c2b..29379b5d7 100644 --- a/packages/llamaindex/src/ingestion/strategies/classify.ts +++ b/packages/llamaindex/src/ingestion/strategies/classify.ts @@ -1,5 +1,5 @@ import type { BaseNode } from "@llamaindex/core/schema"; -import type { BaseDocumentStore } from "../../storage/docStore/types.js"; +import type { BaseDocumentStore } from "@llamaindex/core/storage/doc-store"; export async function classify(docStore: BaseDocumentStore, nodes: BaseNode[]) { const existingDocIds = Object.values(await docStore.getAllDocumentHashes()); diff --git a/packages/llamaindex/src/ingestion/strategies/index.ts b/packages/llamaindex/src/ingestion/strategies/index.ts index defc2b752..e3516fca9 100644 --- a/packages/llamaindex/src/ingestion/strategies/index.ts +++ b/packages/llamaindex/src/ingestion/strategies/index.ts @@ -1,5 +1,5 @@ import { TransformComponent } from "@llamaindex/core/schema"; -import type { BaseDocumentStore } from "../../storage/docStore/types.js"; +import type { BaseDocumentStore } from "@llamaindex/core/storage/doc-store"; import type { BaseVectorStore } from "../../vector-store/types.js"; import { DuplicatesStrategy } from "./DuplicatesStrategy.js"; import { UpsertsAndDeleteStrategy } from "./UpsertsAndDeleteStrategy.js"; diff --git a/packages/llamaindex/src/storage/StorageContext.ts b/packages/llamaindex/src/storage/StorageContext.ts index bba1acb12..b6a1cbe68 100644 --- a/packages/llamaindex/src/storage/StorageContext.ts +++ b/packages/llamaindex/src/storage/StorageContext.ts @@ -4,6 +4,11 @@ import { DEFAULT_NAMESPACE, } from "@llamaindex/core/global"; import { ModalityType, ObjectType } from "@llamaindex/core/schema"; +import type { BaseDocumentStore } from "@llamaindex/core/storage/doc-store"; +import { + BaseIndexStore, + SimpleIndexStore, +} from "@llamaindex/core/storage/index-store"; import { path } from "@llamaindex/env"; import type { ServiceContext } from "../ServiceContext.js"; import { SimpleVectorStore } from "../vector-store/SimpleVectorStore.js"; @@ -12,9 +17,6 @@ import type { VectorStoreByType, } from "../vector-store/types.js"; import { SimpleDocumentStore } from "./docStore/SimpleDocumentStore.js"; -import type { BaseDocumentStore } from "./docStore/types.js"; -import { SimpleIndexStore } from "./indexStore/SimpleIndexStore.js"; -import type { BaseIndexStore } from "./indexStore/types.js"; export interface StorageContext { docStore: BaseDocumentStore; diff --git a/packages/llamaindex/src/storage/docStore/KVDocumentStore.ts b/packages/llamaindex/src/storage/docStore/KVDocumentStore.ts index 3865c0beb..6657a38a2 100644 --- a/packages/llamaindex/src/storage/docStore/KVDocumentStore.ts +++ b/packages/llamaindex/src/storage/docStore/KVDocumentStore.ts @@ -1,11 +1,15 @@ import { DEFAULT_NAMESPACE } from "@llamaindex/core/global"; import type { BaseNode } from "@llamaindex/core/schema"; import { ObjectType } from "@llamaindex/core/schema"; +import type { RefDocInfo } from "@llamaindex/core/storage/doc-store"; +import { + BaseDocumentStore, + docToJson, + isValidDocJson, + jsonToDoc, +} from "@llamaindex/core/storage/doc-store"; +import type { BaseKVStore } from "@llamaindex/core/storage/kv-store"; import _ from "lodash"; -import type { BaseKVStore } from "../kvStore/types.js"; -import type { RefDocInfo } from "./types.js"; -import { BaseDocumentStore } from "./types.js"; -import { docToJson, isValidDocJson, jsonToDoc } from "./utils.js"; type DocMetaData = { docHash: string; refDocId?: string }; diff --git a/packages/llamaindex/src/storage/docStore/PostgresDocumentStore.ts b/packages/llamaindex/src/storage/docStore/PostgresDocumentStore.ts index a910b4a29..e15ad6c6c 100644 --- a/packages/llamaindex/src/storage/docStore/PostgresDocumentStore.ts +++ b/packages/llamaindex/src/storage/docStore/PostgresDocumentStore.ts @@ -1,10 +1,10 @@ import { DEFAULT_NAMESPACE } from "@llamaindex/core/global"; +import { noneSerializer } from "@llamaindex/core/storage/doc-store"; import { PostgresKVStore, type PostgresKVStoreConfig, } from "../kvStore/PostgresKVStore.js"; import { KVDocumentStore } from "./KVDocumentStore.js"; -import { noneSerializer } from "./utils.js"; const DEFAULT_TABLE_NAME = "llamaindex_doc_store"; diff --git a/packages/llamaindex/src/storage/docStore/SimpleDocumentStore.ts b/packages/llamaindex/src/storage/docStore/SimpleDocumentStore.ts index d1fd47760..7a057737c 100644 --- a/packages/llamaindex/src/storage/docStore/SimpleDocumentStore.ts +++ b/packages/llamaindex/src/storage/docStore/SimpleDocumentStore.ts @@ -3,10 +3,12 @@ import { DEFAULT_NAMESPACE, DEFAULT_PERSIST_DIR, } from "@llamaindex/core/global"; +import { + BaseInMemoryKVStore, + SimpleKVStore, +} from "@llamaindex/core/storage/kv-store"; import { path } from "@llamaindex/env"; import _ from "lodash"; -import { SimpleKVStore } from "../kvStore/SimpleKVStore.js"; -import { BaseInMemoryKVStore } from "../kvStore/types.js"; import { KVDocumentStore } from "./KVDocumentStore.js"; // eslint-disable-next-line @typescript-eslint/no-explicit-any diff --git a/packages/llamaindex/src/storage/docStore/types.ts b/packages/llamaindex/src/storage/docStore/types.ts deleted file mode 100644 index a97cb8cbd..000000000 --- a/packages/llamaindex/src/storage/docStore/types.ts +++ /dev/null @@ -1,77 +0,0 @@ -import { - DEFAULT_DOC_STORE_PERSIST_FILENAME, - DEFAULT_PERSIST_DIR, -} from "@llamaindex/core/global"; -import { BaseNode } from "@llamaindex/core/schema"; -import { jsonSerializer, type Serializer } from "./utils.js"; - -const defaultPersistPath = `${DEFAULT_PERSIST_DIR}/${DEFAULT_DOC_STORE_PERSIST_FILENAME}`; - -export interface RefDocInfo { - nodeIds: string[]; - // eslint-disable-next-line @typescript-eslint/no-explicit-any - extraInfo: Record<string, any>; -} - -export abstract class BaseDocumentStore { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - serializer: Serializer<any> = jsonSerializer; - - // Save/load - persist(persistPath: string = defaultPersistPath): void { - // Persist the docstore to a file. - } - - // Main interface - abstract docs(): Promise<Record<string, BaseNode>>; - - abstract addDocuments(docs: BaseNode[], allowUpdate: boolean): Promise<void>; - - abstract getDocument( - docId: string, - raiseError: boolean, - ): Promise<BaseNode | undefined>; - - abstract deleteDocument(docId: string, raiseError: boolean): Promise<void>; - - abstract documentExists(docId: string): Promise<boolean>; - - // Hash - abstract setDocumentHash(docId: string, docHash: string): Promise<void>; - - abstract getDocumentHash(docId: string): Promise<string | undefined>; - - abstract getAllDocumentHashes(): Promise<Record<string, string>>; - - // Ref Docs - abstract getAllRefDocInfo(): Promise<Record<string, RefDocInfo> | undefined>; - - abstract getRefDocInfo(refDocId: string): Promise<RefDocInfo | undefined>; - - abstract deleteRefDoc(refDocId: string, raiseError: boolean): Promise<void>; - - // Nodes - getNodes(nodeIds: string[], raiseError: boolean = true): Promise<BaseNode[]> { - return Promise.all( - nodeIds.map((nodeId) => this.getNode(nodeId, raiseError)), - ); - } - - async getNode(nodeId: string, raiseError: boolean = true): Promise<BaseNode> { - const doc = await this.getDocument(nodeId, raiseError); - if (!(doc instanceof BaseNode)) { - throw new Error(`Document ${nodeId} is not a Node.`); - } - return doc; - } - - async getNodeDict(nodeIdDict: { - [index: number]: string; - }): Promise<Record<number, BaseNode>> { - const result: Record<number, BaseNode> = {}; - for (const index in nodeIdDict) { - result[index] = await this.getNode(nodeIdDict[index]!); - } - return result; - } -} diff --git a/packages/llamaindex/src/storage/docStore/utils.ts b/packages/llamaindex/src/storage/docStore/utils.ts deleted file mode 100644 index 8d8ee3ec4..000000000 --- a/packages/llamaindex/src/storage/docStore/utils.ts +++ /dev/null @@ -1,88 +0,0 @@ -import type { BaseNode } from "@llamaindex/core/schema"; -import { Document, ObjectType, TextNode } from "@llamaindex/core/schema"; -import type { StoredValue } from "../kvStore/types.js"; - -const TYPE_KEY = "__type__"; -const DATA_KEY = "__data__"; - -export interface Serializer<T> { - toPersistence(data: Record<string, unknown>): T; - fromPersistence(data: T): Record<string, unknown>; -} - -export const jsonSerializer: Serializer<string> = { - toPersistence(data) { - return JSON.stringify(data); - }, - fromPersistence(data) { - return JSON.parse(data); - }, -}; - -export const noneSerializer: Serializer<Record<string, unknown>> = { - toPersistence(data) { - return data; - }, - fromPersistence(data) { - return data; - }, -}; - -type DocJson<Data> = { - [TYPE_KEY]: ObjectType; - [DATA_KEY]: Data; -}; - -export function isValidDocJson( - docJson: StoredValue | null | undefined, -): docJson is DocJson<unknown> { - return ( - typeof docJson === "object" && - docJson !== null && - docJson[TYPE_KEY] !== undefined && - docJson[DATA_KEY] !== undefined - ); -} - -export function docToJson( - doc: BaseNode, - serializer: Serializer<unknown>, -): DocJson<unknown> { - return { - [DATA_KEY]: serializer.toPersistence(doc.toJSON()), - [TYPE_KEY]: doc.type, - }; -} - -export function jsonToDoc<Data>( - docDict: DocJson<Data>, - serializer: Serializer<Data>, -): BaseNode { - const docType = docDict[TYPE_KEY]; - // fixme: zod type check this - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const dataDict: any = serializer.fromPersistence(docDict[DATA_KEY]); - let doc: BaseNode; - - if (docType === ObjectType.DOCUMENT) { - doc = new Document({ - text: dataDict.text, - id_: dataDict.id_, - embedding: dataDict.embedding, - hash: dataDict.hash, - metadata: dataDict.metadata, - }); - } else if (docType === ObjectType.TEXT) { - doc = new TextNode({ - text: dataDict.text, - id_: dataDict.id_, - hash: dataDict.hash, - metadata: dataDict.metadata, - relationships: dataDict.relationships, - }); - } else { - throw new Error(`Unknown doc type: ${docType}`); - } - - return doc; -} diff --git a/packages/llamaindex/src/storage/index.ts b/packages/llamaindex/src/storage/index.ts index a2a452081..546eaefab 100644 --- a/packages/llamaindex/src/storage/index.ts +++ b/packages/llamaindex/src/storage/index.ts @@ -1,15 +1,13 @@ export * from "@llamaindex/core/storage/chat-store"; +export * from "@llamaindex/core/storage/doc-store"; +export * from "@llamaindex/core/storage/index-store"; +export * from "@llamaindex/core/storage/kv-store"; export * from "./docStore/AzureCosmosNoSqlDocumentStore.js"; export { PostgresDocumentStore } from "./docStore/PostgresDocumentStore.js"; export { SimpleDocumentStore } from "./docStore/SimpleDocumentStore.js"; -export * from "./docStore/types.js"; export * from "./FileSystem.js"; export * from "./indexStore/AzureCosmosNoSqlIndexStore.js"; export { PostgresIndexStore } from "./indexStore/PostgresIndexStore.js"; -export { SimpleIndexStore } from "./indexStore/SimpleIndexStore.js"; -export * from "./indexStore/types.js"; export * from "./kvStore/AzureCosmosNoSqlKVStore.js"; export { PostgresKVStore } from "./kvStore/PostgresKVStore.js"; -export { SimpleKVStore } from "./kvStore/SimpleKVStore.js"; -export * from "./kvStore/types.js"; export * from "./StorageContext.js"; diff --git a/packages/llamaindex/src/storage/indexStore/KVIndexStore.ts b/packages/llamaindex/src/storage/indexStore/KVIndexStore.ts index 39a4879c0..492fe7f3b 100644 --- a/packages/llamaindex/src/storage/indexStore/KVIndexStore.ts +++ b/packages/llamaindex/src/storage/indexStore/KVIndexStore.ts @@ -1,9 +1,11 @@ +import { + type IndexStruct, + jsonToIndexStruct, +} from "@llamaindex/core/data-structs"; import { DEFAULT_NAMESPACE } from "@llamaindex/core/global"; +import { BaseIndexStore } from "@llamaindex/core/storage/index-store"; +import type { BaseKVStore } from "@llamaindex/core/storage/kv-store"; import _ from "lodash"; -import type { IndexStruct } from "../../indices/IndexStruct.js"; -import { jsonToIndexStruct } from "../../indices/json-to-index-struct.js"; -import type { BaseKVStore } from "../kvStore/types.js"; -import { BaseIndexStore } from "./types.js"; export class KVIndexStore extends BaseIndexStore { private _kvStore: BaseKVStore; diff --git a/packages/llamaindex/src/storage/indexStore/SimpleIndexStore.ts b/packages/llamaindex/src/storage/indexStore/SimpleIndexStore.ts deleted file mode 100644 index 516024be5..000000000 --- a/packages/llamaindex/src/storage/indexStore/SimpleIndexStore.ts +++ /dev/null @@ -1,50 +0,0 @@ -import { - DEFAULT_INDEX_STORE_PERSIST_FILENAME, - DEFAULT_PERSIST_DIR, -} from "@llamaindex/core/global"; -import { path } from "@llamaindex/env"; -import type { DataType } from "../kvStore/SimpleKVStore.js"; -import { SimpleKVStore } from "../kvStore/SimpleKVStore.js"; -import type { BaseInMemoryKVStore } from "../kvStore/types.js"; -import { KVIndexStore } from "./KVIndexStore.js"; - -export class SimpleIndexStore extends KVIndexStore { - private kvStore: BaseInMemoryKVStore; - - constructor(kvStore?: BaseInMemoryKVStore) { - kvStore = kvStore || new SimpleKVStore(); - super(kvStore); - this.kvStore = kvStore; - } - - static async fromPersistDir( - persistDir: string = DEFAULT_PERSIST_DIR, - ): Promise<SimpleIndexStore> { - const persistPath = path.join( - persistDir, - DEFAULT_INDEX_STORE_PERSIST_FILENAME, - ); - return this.fromPersistPath(persistPath); - } - - static async fromPersistPath(persistPath: string): Promise<SimpleIndexStore> { - const simpleKVStore = await SimpleKVStore.fromPersistPath(persistPath); - return new SimpleIndexStore(simpleKVStore); - } - - async persist(persistPath: string = DEFAULT_PERSIST_DIR): Promise<void> { - this.kvStore.persist(persistPath); - } - - static fromDict(saveDict: DataType): SimpleIndexStore { - const simpleKVStore = SimpleKVStore.fromDict(saveDict); - return new SimpleIndexStore(simpleKVStore); - } - - toDict(): Record<string, unknown> { - if (!(this.kvStore instanceof SimpleKVStore)) { - throw new Error("KVStore is not a SimpleKVStore"); - } - return this.kvStore.toDict(); - } -} diff --git a/packages/llamaindex/src/storage/indexStore/types.ts b/packages/llamaindex/src/storage/indexStore/types.ts deleted file mode 100644 index 2cab6aa77..000000000 --- a/packages/llamaindex/src/storage/indexStore/types.ts +++ /dev/null @@ -1,25 +0,0 @@ -import { - DEFAULT_INDEX_STORE_PERSIST_FILENAME, - DEFAULT_PERSIST_DIR, -} from "@llamaindex/core/global"; -import { path } from "@llamaindex/env"; -import type { IndexStruct } from "../../indices/IndexStruct.js"; - -const defaultPersistPath = path.join( - DEFAULT_PERSIST_DIR, - DEFAULT_INDEX_STORE_PERSIST_FILENAME, -); - -export abstract class BaseIndexStore { - abstract getIndexStructs(): Promise<IndexStruct[]>; - - abstract addIndexStruct(indexStruct: IndexStruct): Promise<void>; - - abstract deleteIndexStruct(key: string): Promise<void>; - - abstract getIndexStruct(structId?: string): Promise<IndexStruct | undefined>; - - async persist(persistPath: string = defaultPersistPath): Promise<void> { - // Persist the index store to disk. - } -} diff --git a/packages/llamaindex/src/storage/kvStore/AzureCosmosNoSqlKVStore.ts b/packages/llamaindex/src/storage/kvStore/AzureCosmosNoSqlKVStore.ts index 2c658c900..3662c563e 100644 --- a/packages/llamaindex/src/storage/kvStore/AzureCosmosNoSqlKVStore.ts +++ b/packages/llamaindex/src/storage/kvStore/AzureCosmosNoSqlKVStore.ts @@ -1,8 +1,8 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ import { Container, CosmosClient, Database } from "@azure/cosmos"; import { DefaultAzureCredential, type TokenCredential } from "@azure/identity"; +import { BaseKVStore } from "@llamaindex/core/storage/kv-store"; import { getEnv } from "@llamaindex/env"; -import { BaseKVStore } from "./types.js"; const USER_AGENT_SUFFIX = "LlamaIndex-CDBNoSQL-KVStore-JavaScript"; const DEFAULT_CHAT_DATABASE = "KVStoreDB"; const DEFAULT_CHAT_CONTAINER = "KVStoreContainer"; diff --git a/packages/llamaindex/src/storage/kvStore/PostgresKVStore.ts b/packages/llamaindex/src/storage/kvStore/PostgresKVStore.ts index 480971588..0d8d2550d 100644 --- a/packages/llamaindex/src/storage/kvStore/PostgresKVStore.ts +++ b/packages/llamaindex/src/storage/kvStore/PostgresKVStore.ts @@ -1,6 +1,7 @@ import { DEFAULT_COLLECTION } from "@llamaindex/core/global"; +import type { StoredValue } from "@llamaindex/core/schema"; +import { BaseKVStore } from "@llamaindex/core/storage/kv-store"; import type pg from "pg"; -import { BaseKVStore, type StoredValue } from "./types.js"; export type DataType = Record<string, Record<string, StoredValue>>; diff --git a/packages/llamaindex/src/storage/kvStore/types.ts b/packages/llamaindex/src/storage/kvStore/types.ts deleted file mode 100644 index e8c6900d6..000000000 --- a/packages/llamaindex/src/storage/kvStore/types.ts +++ /dev/null @@ -1,23 +0,0 @@ -const defaultCollection = "data"; - -// fixme: remove any -// eslint-disable-next-line @typescript-eslint/no-explicit-any -export type StoredValue = Record<string, any> | null; - -export abstract class BaseKVStore { - abstract put( - key: string, - val: StoredValue, - collection?: string, - ): Promise<void>; - abstract get(key: string, collection?: string): Promise<StoredValue>; - abstract getAll(collection?: string): Promise<Record<string, StoredValue>>; - abstract delete(key: string, collection?: string): Promise<boolean>; -} - -export abstract class BaseInMemoryKVStore extends BaseKVStore { - abstract persist(persistPath: string): void; - static fromPersistPath(persistPath: string): BaseInMemoryKVStore { - throw new Error("Method not implemented."); - } -} -- GitLab