From c2ef5057b3e019c5dc18ecc7f08f0990a1e2060a Mon Sep 17 00:00:00 2001
From: Sourabh Desai <sourabhdesai@gmail.com>
Date: Mon, 3 Jul 2023 22:59:28 +0000
Subject: [PATCH] add init functions for list index. Still needs some
 refactoring + testing

---
 .gitignore                                |  2 +-
 apps/simple/listIndex.ts                  | 15 ++++
 apps/simple/{index.ts => vectorIndex.ts}  |  6 +-
 packages/core/src/BaseIndex.ts            |  8 +-
 packages/core/src/index/list/ListIndex.ts | 89 +++++++++++++++++++++--
 packages/core/src/index/list/index.ts     |  5 ++
 6 files changed, 112 insertions(+), 13 deletions(-)
 create mode 100644 apps/simple/listIndex.ts
 rename apps/simple/{index.ts => vectorIndex.ts} (88%)
 create mode 100644 packages/core/src/index/list/index.ts

diff --git a/.gitignore b/.gitignore
index 6b28a0c08..2641a07fa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -35,4 +35,4 @@ yarn-error.log*
 # vercel
 .vercel
 
-storage/
\ No newline at end of file
+storage/
diff --git a/apps/simple/listIndex.ts b/apps/simple/listIndex.ts
new file mode 100644
index 000000000..5e03cfa01
--- /dev/null
+++ b/apps/simple/listIndex.ts
@@ -0,0 +1,15 @@
+import { Document } from "@llamaindex/core/src/Node";
+import { ListIndex } from "@llamaindex/core/src/index/list";
+import essay from "./essay";
+
+async function main() {
+  const document = new Document({ text: essay });
+  const index = await ListIndex.fromDocuments([document]);
+  const queryEngine = index.asQueryEngine();
+  const response = await queryEngine.aquery(
+    "What did the author do growing up?"
+  );
+  console.log(response.toString());
+}
+
+main().catch(console.error);
diff --git a/apps/simple/index.ts b/apps/simple/vectorIndex.ts
similarity index 88%
rename from apps/simple/index.ts
rename to apps/simple/vectorIndex.ts
index 733bb7f07..d05b58749 100644
--- a/apps/simple/index.ts
+++ b/apps/simple/vectorIndex.ts
@@ -2,7 +2,7 @@ import { Document } from "@llamaindex/core/src/Node";
 import { VectorStoreIndex } from "@llamaindex/core/src/BaseIndex";
 import essay from "./essay";
 
-(async () => {
+async function main() {
   const document = new Document({ text: essay });
   const index = await VectorStoreIndex.fromDocuments([document]);
   const queryEngine = index.asQueryEngine();
@@ -10,4 +10,6 @@ import essay from "./essay";
     "What did the author do growing up?"
   );
   console.log(response.toString());
-})();
+}
+
+main().catch(console.error);
diff --git a/packages/core/src/BaseIndex.ts b/packages/core/src/BaseIndex.ts
index 65452fbd9..d8512f1b6 100644
--- a/packages/core/src/BaseIndex.ts
+++ b/packages/core/src/BaseIndex.ts
@@ -9,6 +9,7 @@ import {
 } from "./storage/StorageContext";
 import { BaseDocumentStore } from "./storage/docStore/types";
 import { VectorStore } from "./storage/vectorStore/types";
+import { BaseIndexStore } from "./storage/indexStore/types";
 
 export abstract class IndexStruct {
   indexId: string;
@@ -56,14 +57,16 @@ export interface BaseIndexInit<T> {
   serviceContext: ServiceContext;
   storageContext: StorageContext;
   docStore: BaseDocumentStore;
-  vectorStore: VectorStore;
+  vectorStore?: VectorStore;
+  indexStore?: BaseIndexStore;
   indexStruct: T;
 }
 export abstract class BaseIndex<T> {
   serviceContext: ServiceContext;
   storageContext: StorageContext;
   docStore: BaseDocumentStore;
-  vectorStore: VectorStore;
+  vectorStore?: VectorStore;
+  indexStore?: BaseIndexStore;
   indexStruct: T;
 
   constructor(init: BaseIndexInit<T>) {
@@ -71,6 +74,7 @@ export abstract class BaseIndex<T> {
     this.storageContext = init.storageContext;
     this.docStore = init.docStore;
     this.vectorStore = init.vectorStore;
+    this.indexStore = init.indexStore;
     this.indexStruct = init.indexStruct;
   }
 
diff --git a/packages/core/src/index/list/ListIndex.ts b/packages/core/src/index/list/ListIndex.ts
index 44496db2b..78c1971b9 100644
--- a/packages/core/src/index/list/ListIndex.ts
+++ b/packages/core/src/index/list/ListIndex.ts
@@ -1,8 +1,16 @@
-import { BaseNode } from "../../Node";
+import { BaseNode, Document } from "../../Node";
 import { BaseIndex, BaseIndexInit, IndexList } from "../../BaseIndex";
+import { BaseQueryEngine, RetrieverQueryEngine } from "../../QueryEngine";
+import {
+  StorageContext,
+  storageContextFromDefaults,
+} from "../../storage/StorageContext";
 import { BaseRetriever } from "../../Retriever";
 import { ListIndexRetriever } from "./ListIndexRetriever";
-import { ServiceContext } from "../../ServiceContext";
+import {
+  ServiceContext,
+  serviceContextFromDefaults,
+} from "../../ServiceContext";
 import { RefDocInfo } from "../../storage/docStore/types";
 import _ from "lodash";
 
@@ -12,17 +20,73 @@ export enum ListRetrieverMode {
   LLM = "llm",
 }
 
-export interface ListIndexInit extends BaseIndexInit<IndexList> {
+export interface ListIndexOptions {
   nodes?: BaseNode[];
-  indexStruct: IndexList;
-  serviceContext: ServiceContext;
+  indexStruct?: IndexList;
+  serviceContext?: ServiceContext;
+  storageContext?: StorageContext;
 }
 
 export class ListIndex extends BaseIndex<IndexList> {
-  constructor(init: ListIndexInit) {
+  constructor(init: BaseIndexInit<IndexList>) {
     super(init);
   }
 
+  static async init(options: ListIndexOptions): Promise<ListIndex> {
+    const storageContext =
+      options.storageContext ?? (await storageContextFromDefaults({}));
+    const serviceContext =
+      options.serviceContext ?? serviceContextFromDefaults({});
+    const { docStore, indexStore } = storageContext;
+
+    let indexStruct: IndexList;
+    if (options.indexStruct) {
+      if (options.nodes) {
+        throw new Error(
+          "Cannot initialize VectorStoreIndex with both nodes and indexStruct"
+        );
+      }
+      indexStruct = options.indexStruct;
+    } else {
+      if (!options.nodes) {
+        throw new Error(
+          "Cannot initialize VectorStoreIndex without nodes or indexStruct"
+        );
+      }
+      indexStruct = ListIndex._buildIndexFromNodes(options.nodes);
+    }
+
+    return new ListIndex({
+      storageContext,
+      serviceContext,
+      docStore,
+      indexStore,
+      indexStruct,
+    });
+  }
+
+  static async fromDocuments(
+    documents: Document[],
+    storageContext?: StorageContext,
+    serviceContext?: ServiceContext
+  ): Promise<ListIndex> {
+    storageContext = storageContext ?? (await storageContextFromDefaults({}));
+    serviceContext = serviceContext ?? serviceContextFromDefaults({});
+    const docStore = storageContext.docStore;
+
+    for (const doc of documents) {
+      docStore.setDocumentHash(doc.id_, doc.hash);
+    }
+
+    const nodes = serviceContext.nodeParser.getNodesFromDocuments(documents);
+    const index = await ListIndex.init({
+      nodes,
+      storageContext,
+      serviceContext,
+    });
+    return index;
+  }
+
   asRetriever(
     mode: ListRetrieverMode = ListRetrieverMode.DEFAULT
   ): BaseRetriever {
@@ -36,8 +100,17 @@ export class ListIndex extends BaseIndex<IndexList> {
     }
   }
 
-  protected _buildIndexFromNodes(nodes: BaseNode[]): IndexList {
-    const indexStruct = new IndexList();
+  asQueryEngine(
+    mode: ListRetrieverMode = ListRetrieverMode.DEFAULT
+  ): BaseQueryEngine {
+    return new RetrieverQueryEngine(this.asRetriever());
+  }
+
+  static _buildIndexFromNodes(
+    nodes: BaseNode[],
+    indexStruct?: IndexList
+  ): IndexList {
+    indexStruct = indexStruct || new IndexList();
 
     for (const node of nodes) {
       indexStruct.addNode(node);
diff --git a/packages/core/src/index/list/index.ts b/packages/core/src/index/list/index.ts
new file mode 100644
index 000000000..f8d0b8d5e
--- /dev/null
+++ b/packages/core/src/index/list/index.ts
@@ -0,0 +1,5 @@
+export { ListIndex, ListRetrieverMode } from "./ListIndex";
+export {
+  ListIndexRetriever,
+  ListIndexLLMRetriever,
+} from "./ListIndexRetriever";
-- 
GitLab