From 33b562938d59c6a57b0bd8896b1c7d8d9a3d22ee Mon Sep 17 00:00:00 2001 From: Alex Yang <himself65@outlook.com> Date: Fri, 18 Oct 2024 14:52:39 -0700 Subject: [PATCH] refactor: move `data-structs` module (#1343) --- packages/core/data-structs/package.json | 8 +++ packages/core/package.json | 17 ++++- .../core/src/data-structs/data-structs.ts | 67 +++++++++++++++++++ packages/core/src/data-structs/index.ts | 2 + packages/core/src/data-structs/struct-type.ts | 39 +++++++++++ packages/llamaindex/src/indices/BaseIndex.ts | 33 --------- .../llamaindex/src/indices/keyword/index.ts | 3 +- 7 files changed, 134 insertions(+), 35 deletions(-) create mode 100644 packages/core/data-structs/package.json create mode 100644 packages/core/src/data-structs/data-structs.ts create mode 100644 packages/core/src/data-structs/index.ts create mode 100644 packages/core/src/data-structs/struct-type.ts diff --git a/packages/core/data-structs/package.json b/packages/core/data-structs/package.json new file mode 100644 index 000000000..2fdf125e2 --- /dev/null +++ b/packages/core/data-structs/package.json @@ -0,0 +1,8 @@ +{ + "type": "module", + "main": "./dist/index.cjs", + "module": "./dist/index.js", + "types": "./dist/index.d.ts", + "exports": "./dist/index.js", + "private": true +} diff --git a/packages/core/package.json b/packages/core/package.json index 13614afcd..8bf589e54 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -283,6 +283,20 @@ "types": "./tools/dist/index.d.ts", "default": "./tools/dist/index.js" } + }, + "./data-structs": { + "require": { + "types": "./data-structs/dist/index.d.cts", + "default": "./data-structs/dist/index.cjs" + }, + "import": { + "types": "./data-structs/dist/index.d.ts", + "default": "./data-structs/dist/index.js" + }, + "default": { + "types": "./data-structs/dist/index.d.ts", + "default": "./data-structs/dist/index.js" + } } }, "files": [ @@ -305,7 +319,8 @@ "./chat-engine", "./retriever", "./vector-store", - "./tools" + "./tools", + "./data-structs" ], "scripts": { "dev": "bunchee --watch", diff --git a/packages/core/src/data-structs/data-structs.ts b/packages/core/src/data-structs/data-structs.ts new file mode 100644 index 000000000..354341b47 --- /dev/null +++ b/packages/core/src/data-structs/data-structs.ts @@ -0,0 +1,67 @@ +import { randomUUID } from "@llamaindex/env"; +import type { UUID } from "../global"; +import { IndexStructType } from "./struct-type"; + +export abstract class IndexStruct { + indexId: string; + summary: string | undefined; + + constructor( + indexId: UUID = randomUUID(), + summary: string | undefined = undefined, + ) { + this.indexId = indexId; + this.summary = summary; + } + + toJson(): Record<string, unknown> { + return { + indexId: this.indexId, + summary: this.summary, + }; + } + + getSummary(): string { + if (this.summary === undefined) { + throw new Error("summary field of the index struct is not set"); + } + return this.summary; + } +} + +// A table of keywords mapping keywords to text chunks. +export class KeywordTable extends IndexStruct { + table: Map<string, Set<string>> = new Map(); + type: IndexStructType = IndexStructType.KEYWORD_TABLE; + + addNode(keywords: string[], nodeId: string): void { + keywords.forEach((keyword) => { + if (!this.table.has(keyword)) { + this.table.set(keyword, new Set()); + } + this.table.get(keyword)!.add(nodeId); + }); + } + + deleteNode(keywords: string[], nodeId: string) { + keywords.forEach((keyword) => { + if (this.table.has(keyword)) { + this.table.get(keyword)!.delete(nodeId); + } + }); + } + + toJson(): Record<string, unknown> { + return { + ...super.toJson(), + table: Array.from(this.table.entries()).reduce( + (acc, [keyword, nodeIds]) => { + acc[keyword] = Array.from(nodeIds); + return acc; + }, + {} as Record<string, string[]>, + ), + type: this.type, + }; + } +} diff --git a/packages/core/src/data-structs/index.ts b/packages/core/src/data-structs/index.ts new file mode 100644 index 000000000..e8dc315bb --- /dev/null +++ b/packages/core/src/data-structs/index.ts @@ -0,0 +1,2 @@ +export { IndexStruct, KeywordTable } from "./data-structs"; +export { IndexStructType } from "./struct-type"; diff --git a/packages/core/src/data-structs/struct-type.ts b/packages/core/src/data-structs/struct-type.ts new file mode 100644 index 000000000..464b7d3cb --- /dev/null +++ b/packages/core/src/data-structs/struct-type.ts @@ -0,0 +1,39 @@ +export const IndexStructType = { + NODE: "node", + TREE: "tree", + LIST: "list", + KEYWORD_TABLE: "keyword_table", + DICT: "dict", + SIMPLE_DICT: "simple_dict", + WEAVIATE: "weaviate", + PINECONE: "pinecone", + QDRANT: "qdrant", + LANCEDB: "lancedb", + MILVUS: "milvus", + CHROMA: "chroma", + MYSCALE: "myscale", + CLICKHOUSE: "clickhouse", + VECTOR_STORE: "vector_store", + OPENSEARCH: "opensearch", + DASHVECTOR: "dashvector", + CHATGPT_RETRIEVAL_PLUGIN: "chatgpt_retrieval_plugin", + DEEPLAKE: "deeplake", + EPSILLA: "epsilla", + MULTIMODAL_VECTOR_STORE: "multimodal", + SQL: "sql", + KG: "kg", + SIMPLE_KG: "simple_kg", + SIMPLE_LPG: "simple_lpg", + NEBULAGRAPH: "nebulagraph", + FALKORDB: "falkordb", + EMPTY: "empty", + COMPOSITE: "composite", + PANDAS: "pandas", + DOCUMENT_SUMMARY: "document_summary", + VECTARA: "vectara", + ZILLIZ_CLOUD_PIPELINE: "zilliz_cloud_pipeline", + POSTGRESML: "postgresml", +} as const; + +export type IndexStructType = + (typeof IndexStructType)[keyof typeof IndexStructType]; diff --git a/packages/llamaindex/src/indices/BaseIndex.ts b/packages/llamaindex/src/indices/BaseIndex.ts index c5beb5d47..55d13246f 100644 --- a/packages/llamaindex/src/indices/BaseIndex.ts +++ b/packages/llamaindex/src/indices/BaseIndex.ts @@ -8,39 +8,6 @@ import { runTransformations } from "../ingestion/IngestionPipeline.js"; import type { StorageContext } from "../storage/StorageContext.js"; import type { BaseDocumentStore } from "../storage/docStore/types.js"; import type { BaseIndexStore } from "../storage/indexStore/types.js"; -import { IndexStruct } from "./IndexStruct.js"; -import { IndexStructType } from "./json-to-index-struct.js"; - -// A table of keywords mapping keywords to text chunks. -export class KeywordTable extends IndexStruct { - table: Map<string, Set<string>> = new Map(); - type: IndexStructType = IndexStructType.KEYWORD_TABLE; - - addNode(keywords: string[], nodeId: string): void { - keywords.forEach((keyword) => { - if (!this.table.has(keyword)) { - this.table.set(keyword, new Set()); - } - this.table.get(keyword)!.add(nodeId); - }); - } - - deleteNode(keywords: string[], nodeId: string) { - keywords.forEach((keyword) => { - if (this.table.has(keyword)) { - this.table.get(keyword)!.delete(nodeId); - } - }); - } - - toJson(): Record<string, unknown> { - return { - ...super.toJson(), - table: this.table, - type: this.type, - }; - } -} export interface BaseIndexInit<T> { serviceContext?: ServiceContext | undefined; diff --git a/packages/llamaindex/src/indices/keyword/index.ts b/packages/llamaindex/src/indices/keyword/index.ts index 369b52ed5..386f6d636 100644 --- a/packages/llamaindex/src/indices/keyword/index.ts +++ b/packages/llamaindex/src/indices/keyword/index.ts @@ -13,7 +13,7 @@ import type { StorageContext } from "../../storage/StorageContext.js"; import { storageContextFromDefaults } from "../../storage/StorageContext.js"; import type { BaseDocumentStore } from "../../storage/docStore/types.js"; import type { BaseIndexInit } from "../BaseIndex.js"; -import { BaseIndex, KeywordTable } from "../BaseIndex.js"; +import { BaseIndex } from "../BaseIndex.js"; import { IndexStructType } from "../json-to-index-struct.js"; import { extractKeywordsGivenResponse, @@ -21,6 +21,7 @@ import { simpleExtractKeywords, } from "./utils.js"; +import { KeywordTable } from "@llamaindex/core/data-structs"; import type { LLM } from "@llamaindex/core/llms"; import { defaultKeywordExtractPrompt, -- GitLab