From ed924641cab49bb0c47f2b5735d665820722f12a Mon Sep 17 00:00:00 2001 From: Sourabh Desai <sourabhdesai@gmail.com> Date: Mon, 3 Jul 2023 05:40:05 +0000 Subject: [PATCH] start implemetation of list index --- .gitignore | 2 + packages/core/package.json | 3 ++ packages/core/src/BaseIndex.ts | 1 + packages/core/src/ListIndex.ts | 88 ++++++++++++++++++++++++++++++++++ 4 files changed, 94 insertions(+) create mode 100644 packages/core/src/ListIndex.ts diff --git a/.gitignore b/.gitignore index d1595af42..6b28a0c08 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,5 @@ yarn-error.log* # vercel .vercel + +storage/ \ No newline at end of file diff --git a/packages/core/package.json b/packages/core/package.json index b303d0360..f0f5c71ed 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -10,6 +10,9 @@ "uuid": "^9.0.0", "wink-nlp": "^1.14.1" }, + "engines": { + "node": ">=18.0.0" + }, "main": "src/index.ts", "types": "src/index.ts", "scripts": { diff --git a/packages/core/src/BaseIndex.ts b/packages/core/src/BaseIndex.ts index 0a0d00dfa..2eb26544e 100644 --- a/packages/core/src/BaseIndex.ts +++ b/packages/core/src/BaseIndex.ts @@ -9,6 +9,7 @@ import { } from "./storage/StorageContext"; import { BaseDocumentStore } from "./storage/docStore/types"; import { VectorStore } from "./storage/vectorStore/types"; + export class IndexDict { indexId: string; summary?: string; diff --git a/packages/core/src/ListIndex.ts b/packages/core/src/ListIndex.ts new file mode 100644 index 000000000..e8e1f0810 --- /dev/null +++ b/packages/core/src/ListIndex.ts @@ -0,0 +1,88 @@ +import { BaseNode } from "./Node"; +import { BaseIndex, BaseIndexInit } from "./BaseIndex"; +import { IndexList } from "./dataStructs/IndexList"; +import { BaseRetriever } from "./Retriever"; +import { ListIndexRetriever } from "./retrievers/ListIndexRetriever"; +import { ListIndexEmbeddingRetriever } from "./retrievers/ListIndexEmbeddingRetriever"; +import { ListIndexLLMRetriever } from "./retrievers/ListIndexLLMRetriever"; +import { ServiceContext } from "./ServiceContext"; + +export enum ListRetrieverMode { + DEFAULT = "default", + EMBEDDING = "embedding", + LLM = "llm", +} + +export interface ListIndexInit extends BaseIndexInit<IndexList> { + nodes?: BaseNode[]; + indexStruct?: IndexList; + serviceContext?: ServiceContext; +} + +export class ListIndex extends BaseIndex<IndexList> { + constructor(init: ListIndexInit) { + super(init); + } + + asRetriever( + mode: ListRetrieverMode = ListRetrieverMode.DEFAULT + ): BaseRetriever { + switch (mode) { + case ListRetrieverMode.DEFAULT: + return new ListIndexRetriever(this); + case ListRetrieverMode.EMBEDDING: + throw new Error( + `Support for Embedding retriever mode is not implemented` + ); + case ListRetrieverMode.LLM: + throw new Error(`Support for LLM retriever mode is not implemented`); + default: + throw new Error(`Unknown retriever mode: ${mode}`); + } + } + + protected _buildIndexFromNodes(nodes: BaseNode[]): IndexList { + const indexStruct = new IndexList(); + + for (const node of nodes) { + indexStruct.addNode(node); + } + + return indexStruct; + } + + protected _insert(nodes: BaseNode[]): void { + for (const node of nodes) { + this.indexStruct.addNode(node); + } + } + + protected _deleteNode(nodeId: string): void { + this.indexStruct.nodes = this.indexStruct.nodes.filter( + (existingNodeId: string) => existingNodeId !== nodeId + ); + } + + async getRefDocInfo(): Promise<Record<string, RefDocInfo>> { + const nodeDocIds = this.indexStruct.nodes; + const nodes = await this.docStore.getNodes(nodeDocIds); + + const refDocInfoMap: Record<string, RefDocInfo> = {}; + + for (const node of nodes) { + const refNode = node.sourceNode; + if (!refNode) continue; + + const refDocInfo = this.docStore.getRefDocInfo(refNode.nodeId); + + if (!refDocInfo) continue; + + refDocInfoMap[refNode.nodeId] = refDocInfo; + } + + return refDocInfoMap; + } +} + +// Legacy +export type GPTListIndex = ListIndex; -- GitLab