From c2ef5057b3e019c5dc18ecc7f08f0990a1e2060a Mon Sep 17 00:00:00 2001 From: Sourabh Desai <sourabhdesai@gmail.com> Date: Mon, 3 Jul 2023 22:59:28 +0000 Subject: [PATCH] add init functions for list index. Still needs some refactoring + testing --- .gitignore | 2 +- apps/simple/listIndex.ts | 15 ++++ apps/simple/{index.ts => vectorIndex.ts} | 6 +- packages/core/src/BaseIndex.ts | 8 +- packages/core/src/index/list/ListIndex.ts | 89 +++++++++++++++++++++-- packages/core/src/index/list/index.ts | 5 ++ 6 files changed, 112 insertions(+), 13 deletions(-) create mode 100644 apps/simple/listIndex.ts rename apps/simple/{index.ts => vectorIndex.ts} (88%) create mode 100644 packages/core/src/index/list/index.ts diff --git a/.gitignore b/.gitignore index 6b28a0c08..2641a07fa 100644 --- a/.gitignore +++ b/.gitignore @@ -35,4 +35,4 @@ yarn-error.log* # vercel .vercel -storage/ \ No newline at end of file +storage/ diff --git a/apps/simple/listIndex.ts b/apps/simple/listIndex.ts new file mode 100644 index 000000000..5e03cfa01 --- /dev/null +++ b/apps/simple/listIndex.ts @@ -0,0 +1,15 @@ +import { Document } from "@llamaindex/core/src/Node"; +import { ListIndex } from "@llamaindex/core/src/index/list"; +import essay from "./essay"; + +async function main() { + const document = new Document({ text: essay }); + const index = await ListIndex.fromDocuments([document]); + const queryEngine = index.asQueryEngine(); + const response = await queryEngine.aquery( + "What did the author do growing up?" + ); + console.log(response.toString()); +} + +main().catch(console.error); diff --git a/apps/simple/index.ts b/apps/simple/vectorIndex.ts similarity index 88% rename from apps/simple/index.ts rename to apps/simple/vectorIndex.ts index 733bb7f07..d05b58749 100644 --- a/apps/simple/index.ts +++ b/apps/simple/vectorIndex.ts @@ -2,7 +2,7 @@ import { Document } from "@llamaindex/core/src/Node"; import { VectorStoreIndex } from "@llamaindex/core/src/BaseIndex"; import essay from "./essay"; -(async () => { +async function main() { const document = new Document({ text: essay }); const index = await VectorStoreIndex.fromDocuments([document]); const queryEngine = index.asQueryEngine(); @@ -10,4 +10,6 @@ import essay from "./essay"; "What did the author do growing up?" ); console.log(response.toString()); -})(); +} + +main().catch(console.error); diff --git a/packages/core/src/BaseIndex.ts b/packages/core/src/BaseIndex.ts index 65452fbd9..d8512f1b6 100644 --- a/packages/core/src/BaseIndex.ts +++ b/packages/core/src/BaseIndex.ts @@ -9,6 +9,7 @@ import { } from "./storage/StorageContext"; import { BaseDocumentStore } from "./storage/docStore/types"; import { VectorStore } from "./storage/vectorStore/types"; +import { BaseIndexStore } from "./storage/indexStore/types"; export abstract class IndexStruct { indexId: string; @@ -56,14 +57,16 @@ export interface BaseIndexInit<T> { serviceContext: ServiceContext; storageContext: StorageContext; docStore: BaseDocumentStore; - vectorStore: VectorStore; + vectorStore?: VectorStore; + indexStore?: BaseIndexStore; indexStruct: T; } export abstract class BaseIndex<T> { serviceContext: ServiceContext; storageContext: StorageContext; docStore: BaseDocumentStore; - vectorStore: VectorStore; + vectorStore?: VectorStore; + indexStore?: BaseIndexStore; indexStruct: T; constructor(init: BaseIndexInit<T>) { @@ -71,6 +74,7 @@ export abstract class BaseIndex<T> { this.storageContext = init.storageContext; this.docStore = init.docStore; this.vectorStore = init.vectorStore; + this.indexStore = init.indexStore; this.indexStruct = init.indexStruct; } diff --git a/packages/core/src/index/list/ListIndex.ts b/packages/core/src/index/list/ListIndex.ts index 44496db2b..78c1971b9 100644 --- a/packages/core/src/index/list/ListIndex.ts +++ b/packages/core/src/index/list/ListIndex.ts @@ -1,8 +1,16 @@ -import { BaseNode } from "../../Node"; +import { BaseNode, Document } from "../../Node"; import { BaseIndex, BaseIndexInit, IndexList } from "../../BaseIndex"; +import { BaseQueryEngine, RetrieverQueryEngine } from "../../QueryEngine"; +import { + StorageContext, + storageContextFromDefaults, +} from "../../storage/StorageContext"; import { BaseRetriever } from "../../Retriever"; import { ListIndexRetriever } from "./ListIndexRetriever"; -import { ServiceContext } from "../../ServiceContext"; +import { + ServiceContext, + serviceContextFromDefaults, +} from "../../ServiceContext"; import { RefDocInfo } from "../../storage/docStore/types"; import _ from "lodash"; @@ -12,17 +20,73 @@ export enum ListRetrieverMode { LLM = "llm", } -export interface ListIndexInit extends BaseIndexInit<IndexList> { +export interface ListIndexOptions { nodes?: BaseNode[]; - indexStruct: IndexList; - serviceContext: ServiceContext; + indexStruct?: IndexList; + serviceContext?: ServiceContext; + storageContext?: StorageContext; } export class ListIndex extends BaseIndex<IndexList> { - constructor(init: ListIndexInit) { + constructor(init: BaseIndexInit<IndexList>) { super(init); } + static async init(options: ListIndexOptions): Promise<ListIndex> { + const storageContext = + options.storageContext ?? (await storageContextFromDefaults({})); + const serviceContext = + options.serviceContext ?? serviceContextFromDefaults({}); + const { docStore, indexStore } = storageContext; + + let indexStruct: IndexList; + if (options.indexStruct) { + if (options.nodes) { + throw new Error( + "Cannot initialize VectorStoreIndex with both nodes and indexStruct" + ); + } + indexStruct = options.indexStruct; + } else { + if (!options.nodes) { + throw new Error( + "Cannot initialize VectorStoreIndex without nodes or indexStruct" + ); + } + indexStruct = ListIndex._buildIndexFromNodes(options.nodes); + } + + return new ListIndex({ + storageContext, + serviceContext, + docStore, + indexStore, + indexStruct, + }); + } + + static async fromDocuments( + documents: Document[], + storageContext?: StorageContext, + serviceContext?: ServiceContext + ): Promise<ListIndex> { + storageContext = storageContext ?? (await storageContextFromDefaults({})); + serviceContext = serviceContext ?? serviceContextFromDefaults({}); + const docStore = storageContext.docStore; + + for (const doc of documents) { + docStore.setDocumentHash(doc.id_, doc.hash); + } + + const nodes = serviceContext.nodeParser.getNodesFromDocuments(documents); + const index = await ListIndex.init({ + nodes, + storageContext, + serviceContext, + }); + return index; + } + asRetriever( mode: ListRetrieverMode = ListRetrieverMode.DEFAULT ): BaseRetriever { @@ -36,8 +100,17 @@ export class ListIndex extends BaseIndex<IndexList> { } } - protected _buildIndexFromNodes(nodes: BaseNode[]): IndexList { - const indexStruct = new IndexList(); + asQueryEngine( + mode: ListRetrieverMode = ListRetrieverMode.DEFAULT + ): BaseQueryEngine { + return new RetrieverQueryEngine(this.asRetriever()); + } + + static _buildIndexFromNodes( + nodes: BaseNode[], + indexStruct?: IndexList + ): IndexList { + indexStruct = indexStruct || new IndexList(); for (const node of nodes) { indexStruct.addNode(node); diff --git a/packages/core/src/index/list/index.ts b/packages/core/src/index/list/index.ts new file mode 100644 index 000000000..f8d0b8d5e --- /dev/null +++ b/packages/core/src/index/list/index.ts @@ -0,0 +1,5 @@ +export { ListIndex, ListRetrieverMode } from "./ListIndex"; +export { + ListIndexRetriever, + ListIndexLLMRetriever, +} from "./ListIndexRetriever"; -- GitLab