From 33b562938d59c6a57b0bd8896b1c7d8d9a3d22ee Mon Sep 17 00:00:00 2001
From: Alex Yang <himself65@outlook.com>
Date: Fri, 18 Oct 2024 14:52:39 -0700
Subject: [PATCH] refactor: move `data-structs` module (#1343)

---
 packages/core/data-structs/package.json       |  8 +++
 packages/core/package.json                    | 17 ++++-
 .../core/src/data-structs/data-structs.ts     | 67 +++++++++++++++++++
 packages/core/src/data-structs/index.ts       |  2 +
 packages/core/src/data-structs/struct-type.ts | 39 +++++++++++
 packages/llamaindex/src/indices/BaseIndex.ts  | 33 ---------
 .../llamaindex/src/indices/keyword/index.ts   |  3 +-
 7 files changed, 134 insertions(+), 35 deletions(-)
 create mode 100644 packages/core/data-structs/package.json
 create mode 100644 packages/core/src/data-structs/data-structs.ts
 create mode 100644 packages/core/src/data-structs/index.ts
 create mode 100644 packages/core/src/data-structs/struct-type.ts

diff --git a/packages/core/data-structs/package.json b/packages/core/data-structs/package.json
new file mode 100644
index 000000000..2fdf125e2
--- /dev/null
+++ b/packages/core/data-structs/package.json
@@ -0,0 +1,8 @@
+{
+  "type": "module",
+  "main": "./dist/index.cjs",
+  "module": "./dist/index.js",
+  "types": "./dist/index.d.ts",
+  "exports": "./dist/index.js",
+  "private": true
+}
diff --git a/packages/core/package.json b/packages/core/package.json
index 13614afcd..8bf589e54 100644
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -283,6 +283,20 @@
         "types": "./tools/dist/index.d.ts",
         "default": "./tools/dist/index.js"
       }
+    },
+    "./data-structs": {
+      "require": {
+        "types": "./data-structs/dist/index.d.cts",
+        "default": "./data-structs/dist/index.cjs"
+      },
+      "import": {
+        "types": "./data-structs/dist/index.d.ts",
+        "default": "./data-structs/dist/index.js"
+      },
+      "default": {
+        "types": "./data-structs/dist/index.d.ts",
+        "default": "./data-structs/dist/index.js"
+      }
     }
   },
   "files": [
@@ -305,7 +319,8 @@
     "./chat-engine",
     "./retriever",
     "./vector-store",
-    "./tools"
+    "./tools",
+    "./data-structs"
   ],
   "scripts": {
     "dev": "bunchee --watch",
diff --git a/packages/core/src/data-structs/data-structs.ts b/packages/core/src/data-structs/data-structs.ts
new file mode 100644
index 000000000..354341b47
--- /dev/null
+++ b/packages/core/src/data-structs/data-structs.ts
@@ -0,0 +1,67 @@
+import { randomUUID } from "@llamaindex/env";
+import type { UUID } from "../global";
+import { IndexStructType } from "./struct-type";
+
+export abstract class IndexStruct {
+  indexId: string;
+  summary: string | undefined;
+
+  constructor(
+    indexId: UUID = randomUUID(),
+    summary: string | undefined = undefined,
+  ) {
+    this.indexId = indexId;
+    this.summary = summary;
+  }
+
+  toJson(): Record<string, unknown> {
+    return {
+      indexId: this.indexId,
+      summary: this.summary,
+    };
+  }
+
+  getSummary(): string {
+    if (this.summary === undefined) {
+      throw new Error("summary field of the index struct is not set");
+    }
+    return this.summary;
+  }
+}
+
+// A table of keywords mapping keywords to text chunks.
+export class KeywordTable extends IndexStruct {
+  table: Map<string, Set<string>> = new Map();
+  type: IndexStructType = IndexStructType.KEYWORD_TABLE;
+
+  addNode(keywords: string[], nodeId: string): void {
+    keywords.forEach((keyword) => {
+      if (!this.table.has(keyword)) {
+        this.table.set(keyword, new Set());
+      }
+      this.table.get(keyword)!.add(nodeId);
+    });
+  }
+
+  deleteNode(keywords: string[], nodeId: string) {
+    keywords.forEach((keyword) => {
+      if (this.table.has(keyword)) {
+        this.table.get(keyword)!.delete(nodeId);
+      }
+    });
+  }
+
+  toJson(): Record<string, unknown> {
+    return {
+      ...super.toJson(),
+      table: Array.from(this.table.entries()).reduce(
+        (acc, [keyword, nodeIds]) => {
+          acc[keyword] = Array.from(nodeIds);
+          return acc;
+        },
+        {} as Record<string, string[]>,
+      ),
+      type: this.type,
+    };
+  }
+}
diff --git a/packages/core/src/data-structs/index.ts b/packages/core/src/data-structs/index.ts
new file mode 100644
index 000000000..e8dc315bb
--- /dev/null
+++ b/packages/core/src/data-structs/index.ts
@@ -0,0 +1,2 @@
+export { IndexStruct, KeywordTable } from "./data-structs";
+export { IndexStructType } from "./struct-type";
diff --git a/packages/core/src/data-structs/struct-type.ts b/packages/core/src/data-structs/struct-type.ts
new file mode 100644
index 000000000..464b7d3cb
--- /dev/null
+++ b/packages/core/src/data-structs/struct-type.ts
@@ -0,0 +1,39 @@
+export const IndexStructType = {
+  NODE: "node",
+  TREE: "tree",
+  LIST: "list",
+  KEYWORD_TABLE: "keyword_table",
+  DICT: "dict",
+  SIMPLE_DICT: "simple_dict",
+  WEAVIATE: "weaviate",
+  PINECONE: "pinecone",
+  QDRANT: "qdrant",
+  LANCEDB: "lancedb",
+  MILVUS: "milvus",
+  CHROMA: "chroma",
+  MYSCALE: "myscale",
+  CLICKHOUSE: "clickhouse",
+  VECTOR_STORE: "vector_store",
+  OPENSEARCH: "opensearch",
+  DASHVECTOR: "dashvector",
+  CHATGPT_RETRIEVAL_PLUGIN: "chatgpt_retrieval_plugin",
+  DEEPLAKE: "deeplake",
+  EPSILLA: "epsilla",
+  MULTIMODAL_VECTOR_STORE: "multimodal",
+  SQL: "sql",
+  KG: "kg",
+  SIMPLE_KG: "simple_kg",
+  SIMPLE_LPG: "simple_lpg",
+  NEBULAGRAPH: "nebulagraph",
+  FALKORDB: "falkordb",
+  EMPTY: "empty",
+  COMPOSITE: "composite",
+  PANDAS: "pandas",
+  DOCUMENT_SUMMARY: "document_summary",
+  VECTARA: "vectara",
+  ZILLIZ_CLOUD_PIPELINE: "zilliz_cloud_pipeline",
+  POSTGRESML: "postgresml",
+} as const;
+
+export type IndexStructType =
+  (typeof IndexStructType)[keyof typeof IndexStructType];
diff --git a/packages/llamaindex/src/indices/BaseIndex.ts b/packages/llamaindex/src/indices/BaseIndex.ts
index c5beb5d47..55d13246f 100644
--- a/packages/llamaindex/src/indices/BaseIndex.ts
+++ b/packages/llamaindex/src/indices/BaseIndex.ts
@@ -8,39 +8,6 @@ import { runTransformations } from "../ingestion/IngestionPipeline.js";
 import type { StorageContext } from "../storage/StorageContext.js";
 import type { BaseDocumentStore } from "../storage/docStore/types.js";
 import type { BaseIndexStore } from "../storage/indexStore/types.js";
-import { IndexStruct } from "./IndexStruct.js";
-import { IndexStructType } from "./json-to-index-struct.js";
-
-// A table of keywords mapping keywords to text chunks.
-export class KeywordTable extends IndexStruct {
-  table: Map<string, Set<string>> = new Map();
-  type: IndexStructType = IndexStructType.KEYWORD_TABLE;
-
-  addNode(keywords: string[], nodeId: string): void {
-    keywords.forEach((keyword) => {
-      if (!this.table.has(keyword)) {
-        this.table.set(keyword, new Set());
-      }
-      this.table.get(keyword)!.add(nodeId);
-    });
-  }
-
-  deleteNode(keywords: string[], nodeId: string) {
-    keywords.forEach((keyword) => {
-      if (this.table.has(keyword)) {
-        this.table.get(keyword)!.delete(nodeId);
-      }
-    });
-  }
-
-  toJson(): Record<string, unknown> {
-    return {
-      ...super.toJson(),
-      table: this.table,
-      type: this.type,
-    };
-  }
-}
 
 export interface BaseIndexInit<T> {
   serviceContext?: ServiceContext | undefined;
diff --git a/packages/llamaindex/src/indices/keyword/index.ts b/packages/llamaindex/src/indices/keyword/index.ts
index 369b52ed5..386f6d636 100644
--- a/packages/llamaindex/src/indices/keyword/index.ts
+++ b/packages/llamaindex/src/indices/keyword/index.ts
@@ -13,7 +13,7 @@ import type { StorageContext } from "../../storage/StorageContext.js";
 import { storageContextFromDefaults } from "../../storage/StorageContext.js";
 import type { BaseDocumentStore } from "../../storage/docStore/types.js";
 import type { BaseIndexInit } from "../BaseIndex.js";
-import { BaseIndex, KeywordTable } from "../BaseIndex.js";
+import { BaseIndex } from "../BaseIndex.js";
 import { IndexStructType } from "../json-to-index-struct.js";
 import {
   extractKeywordsGivenResponse,
@@ -21,6 +21,7 @@ import {
   simpleExtractKeywords,
 } from "./utils.js";
 
+import { KeywordTable } from "@llamaindex/core/data-structs";
 import type { LLM } from "@llamaindex/core/llms";
 import {
   defaultKeywordExtractPrompt,
-- 
GitLab