From 372ac1a24b80198fc86c7f1327d5b6e954628096 Mon Sep 17 00:00:00 2001 From: Yi Ding <yi.s.ding@gmail.com> Date: Mon, 17 Jul 2023 21:24:10 -0700 Subject: [PATCH] move all indexes into indices folder --- CONTRIBUTING.md | 2 +- package.json | 3 +- packages/core/src/Retriever.ts | 62 --------- packages/core/src/index.ts | 4 +- packages/core/src/indices/BaseIndex.ts | 97 ++++++++++++++ packages/core/src/indices/index.ts | 0 .../src/{index => indices}/list/ListIndex.ts | 2 +- .../list/ListIndexRetriever.ts | 0 .../core/src/{index => indices}/list/index.ts | 0 .../core/src/{index => indices}/list/utils.ts | 0 .../vectorStore/VectorIndexRetriever.ts | 67 ++++++++++ .../vectorStore/VectorStoreIndex.ts} | 119 ++++-------------- .../core/src/indices/vectorStore/index.ts | 2 + .../core/src/tests/CallbackManager.test.ts | 4 +- 14 files changed, 194 insertions(+), 168 deletions(-) create mode 100644 packages/core/src/indices/BaseIndex.ts create mode 100644 packages/core/src/indices/index.ts rename packages/core/src/{index => indices}/list/ListIndex.ts (98%) rename packages/core/src/{index => indices}/list/ListIndexRetriever.ts (100%) rename packages/core/src/{index => indices}/list/index.ts (100%) rename packages/core/src/{index => indices}/list/utils.ts (100%) create mode 100644 packages/core/src/indices/vectorStore/VectorIndexRetriever.ts rename packages/core/src/{BaseIndex.ts => indices/vectorStore/VectorStoreIndex.ts} (61%) create mode 100644 packages/core/src/indices/vectorStore/index.ts diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 881d1c712..e441a4bb1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -18,7 +18,7 @@ You can checkout how Turborepo works using the built in [README-turborepo.md](RE Install NodeJS. Preferably v18 using nvm or n. -Inside the llamascript directory: +Inside the LlamaIndexTS directory: ``` npm i -g pnpm ts-node diff --git a/package.json b/package.json index 99799f1e1..6f6285174 100644 --- a/package.json +++ b/package.json @@ -20,6 +20,5 @@ "ts-jest": "^29.1.1", "turbo": "^1.10.8" }, - "packageManager": "pnpm@7.15.0", - "name": "llamascript" + "packageManager": "pnpm@7.15.0" } diff --git a/packages/core/src/Retriever.ts b/packages/core/src/Retriever.ts index 60fcf7d70..66e672e9e 100644 --- a/packages/core/src/Retriever.ts +++ b/packages/core/src/Retriever.ts @@ -1,13 +1,6 @@ -import { VectorStoreIndex } from "./BaseIndex"; -import { globalsHelper } from "./GlobalsHelper"; import { NodeWithScore } from "./Node"; import { ServiceContext } from "./ServiceContext"; import { Event } from "./callbacks/CallbackManager"; -import { DEFAULT_SIMILARITY_TOP_K } from "./constants"; -import { - VectorStoreQuery, - VectorStoreQueryMode, -} from "./storage/vectorStore/types"; /** * Retrievers retrieve the nodes that most closely match our query in similarity. @@ -16,58 +9,3 @@ export interface BaseRetriever { aretrieve(query: string, parentEvent?: Event): Promise<NodeWithScore[]>; getServiceContext(): ServiceContext; } - -/** - * VectorIndexRetriever retrieves nodes from a VectorIndex. - */ -export class VectorIndexRetriever implements BaseRetriever { - index: VectorStoreIndex; - similarityTopK = DEFAULT_SIMILARITY_TOP_K; - private serviceContext: ServiceContext; - - constructor(index: VectorStoreIndex) { - this.index = index; - this.serviceContext = this.index.serviceContext; - } - - async aretrieve( - query: string, - parentEvent?: Event - ): Promise<NodeWithScore[]> { - const queryEmbedding = - await this.serviceContext.embedModel.aGetQueryEmbedding(query); - - const q: VectorStoreQuery = { - queryEmbedding: queryEmbedding, - mode: VectorStoreQueryMode.DEFAULT, - similarityTopK: this.similarityTopK, - }; - const result = this.index.vectorStore.query(q); - - let nodesWithScores: NodeWithScore[] = []; - for (let i = 0; i < result.ids.length; i++) { - const node = this.index.indexStruct.nodesDict[result.ids[i]]; - nodesWithScores.push({ - node: node, - score: result.similarities[i], - }); - } - - if (this.serviceContext.callbackManager.onRetrieve) { - this.serviceContext.callbackManager.onRetrieve({ - query, - nodes: nodesWithScores, - event: globalsHelper.createEvent({ - parentEvent, - type: "retrieve", - }), - }); - } - - return nodesWithScores; - } - - getServiceContext(): ServiceContext { - return this.serviceContext; - } -} diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 7bf8c8264..f9fa64a27 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -1,4 +1,4 @@ -export * from "./BaseIndex"; +export * from "./indices/BaseIndex"; export * from "./ChatEngine"; export * from "./constants"; export * from "./Embedding"; @@ -19,7 +19,7 @@ export * from "./ServiceContext"; export * from "./TextSplitter"; export * from "./Tool"; -export * from "./index/list"; +export * from "./indices/list"; export * from "./callbacks/CallbackManager"; diff --git a/packages/core/src/indices/BaseIndex.ts b/packages/core/src/indices/BaseIndex.ts new file mode 100644 index 000000000..043f18e12 --- /dev/null +++ b/packages/core/src/indices/BaseIndex.ts @@ -0,0 +1,97 @@ +import { Document, BaseNode } from "../Node"; +import { v4 as uuidv4 } from "uuid"; +import { BaseRetriever } from "../Retriever"; +import { ServiceContext } from "../ServiceContext"; +import { StorageContext } from "../storage/StorageContext"; +import { BaseDocumentStore } from "../storage/docStore/types"; +import { VectorStore } from "../storage/vectorStore/types"; +import { BaseIndexStore } from "../storage/indexStore/types"; + +/** + * The underlying structure of each index. + */ +export abstract class IndexStruct { + indexId: string; + summary?: string; + + constructor(indexId = uuidv4(), summary = undefined) { + this.indexId = indexId; + this.summary = summary; + } + + getSummary(): string { + if (this.summary === undefined) { + throw new Error("summary field of the index dict is not set"); + } + return this.summary; + } +} + +export class IndexDict extends IndexStruct { + nodesDict: Record<string, BaseNode> = {}; + docStore: Record<string, Document> = {}; // FIXME: this should be implemented in storageContext + + getSummary(): string { + if (this.summary === undefined) { + throw new Error("summary field of the index dict is not set"); + } + return this.summary; + } + + addNode(node: BaseNode, textId?: string) { + const vectorId = textId ?? node.id_; + this.nodesDict[vectorId] = node; + } +} + +export class IndexList extends IndexStruct { + nodes: string[] = []; + + addNode(node: BaseNode) { + this.nodes.push(node.id_); + } +} + +export interface BaseIndexInit<T> { + serviceContext: ServiceContext; + storageContext: StorageContext; + docStore: BaseDocumentStore; + vectorStore?: VectorStore; + indexStore?: BaseIndexStore; + indexStruct: T; +} + +/** + * Indexes are the data structure that we store our nodes and embeddings in so + * they can be retrieved for our queries. + */ +export abstract class BaseIndex<T> { + serviceContext: ServiceContext; + storageContext: StorageContext; + docStore: BaseDocumentStore; + vectorStore?: VectorStore; + indexStore?: BaseIndexStore; + indexStruct: T; + + constructor(init: BaseIndexInit<T>) { + this.serviceContext = init.serviceContext; + this.storageContext = init.storageContext; + this.docStore = init.docStore; + this.vectorStore = init.vectorStore; + this.indexStore = init.indexStore; + this.indexStruct = init.indexStruct; + } + + abstract asRetriever(): BaseRetriever; +} + +export interface VectorIndexOptions { + nodes?: BaseNode[]; + indexStruct?: IndexDict; + serviceContext?: ServiceContext; + storageContext?: StorageContext; +} + +export interface VectorIndexConstructorProps extends BaseIndexInit<IndexDict> { + vectorStore: VectorStore; +} diff --git a/packages/core/src/indices/index.ts b/packages/core/src/indices/index.ts new file mode 100644 index 000000000..e69de29bb diff --git a/packages/core/src/index/list/ListIndex.ts b/packages/core/src/indices/list/ListIndex.ts similarity index 98% rename from packages/core/src/index/list/ListIndex.ts rename to packages/core/src/indices/list/ListIndex.ts index 54bd62e62..23d178f7d 100644 --- a/packages/core/src/index/list/ListIndex.ts +++ b/packages/core/src/indices/list/ListIndex.ts @@ -1,5 +1,5 @@ import { BaseNode, Document } from "../../Node"; -import { BaseIndex, BaseIndexInit, IndexList } from "../../BaseIndex"; +import { BaseIndex, BaseIndexInit, IndexList } from "../BaseIndex"; import { BaseQueryEngine, RetrieverQueryEngine } from "../../QueryEngine"; import { StorageContext, diff --git a/packages/core/src/index/list/ListIndexRetriever.ts b/packages/core/src/indices/list/ListIndexRetriever.ts similarity index 100% rename from packages/core/src/index/list/ListIndexRetriever.ts rename to packages/core/src/indices/list/ListIndexRetriever.ts diff --git a/packages/core/src/index/list/index.ts b/packages/core/src/indices/list/index.ts similarity index 100% rename from packages/core/src/index/list/index.ts rename to packages/core/src/indices/list/index.ts diff --git a/packages/core/src/index/list/utils.ts b/packages/core/src/indices/list/utils.ts similarity index 100% rename from packages/core/src/index/list/utils.ts rename to packages/core/src/indices/list/utils.ts diff --git a/packages/core/src/indices/vectorStore/VectorIndexRetriever.ts b/packages/core/src/indices/vectorStore/VectorIndexRetriever.ts new file mode 100644 index 000000000..c8106b90f --- /dev/null +++ b/packages/core/src/indices/vectorStore/VectorIndexRetriever.ts @@ -0,0 +1,67 @@ +import { VectorStoreIndex } from "./VectorStoreIndex"; +import { globalsHelper } from "../../GlobalsHelper"; +import { NodeWithScore } from "../../Node"; +import { ServiceContext } from "../../ServiceContext"; +import { Event } from "../../callbacks/CallbackManager"; +import { DEFAULT_SIMILARITY_TOP_K } from "../../constants"; +import { + VectorStoreQuery, + VectorStoreQueryMode, +} from "../../storage/vectorStore/types"; +import { BaseRetriever } from "../../Retriever"; + +/** + * VectorIndexRetriever retrieves nodes from a VectorIndex. + */ + +export class VectorIndexRetriever implements BaseRetriever { + index: VectorStoreIndex; + similarityTopK = DEFAULT_SIMILARITY_TOP_K; + private serviceContext: ServiceContext; + + constructor(index: VectorStoreIndex) { + this.index = index; + this.serviceContext = this.index.serviceContext; + } + + async aretrieve( + query: string, + parentEvent?: Event + ): Promise<NodeWithScore[]> { + const queryEmbedding = + await this.serviceContext.embedModel.aGetQueryEmbedding(query); + + const q: VectorStoreQuery = { + queryEmbedding: queryEmbedding, + mode: VectorStoreQueryMode.DEFAULT, + similarityTopK: this.similarityTopK, + }; + const result = this.index.vectorStore.query(q); + + let nodesWithScores: NodeWithScore[] = []; + for (let i = 0; i < result.ids.length; i++) { + const node = this.index.indexStruct.nodesDict[result.ids[i]]; + nodesWithScores.push({ + node: node, + score: result.similarities[i], + }); + } + + if (this.serviceContext.callbackManager.onRetrieve) { + this.serviceContext.callbackManager.onRetrieve({ + query, + nodes: nodesWithScores, + event: globalsHelper.createEvent({ + parentEvent, + type: "retrieve", + }), + }); + } + + return nodesWithScores; + } + + getServiceContext(): ServiceContext { + return this.serviceContext; + } +} diff --git a/packages/core/src/BaseIndex.ts b/packages/core/src/indices/vectorStore/VectorStoreIndex.ts similarity index 61% rename from packages/core/src/BaseIndex.ts rename to packages/core/src/indices/vectorStore/VectorStoreIndex.ts index ef0acc169..a50e29fea 100644 --- a/packages/core/src/BaseIndex.ts +++ b/packages/core/src/indices/vectorStore/VectorStoreIndex.ts @@ -1,108 +1,31 @@ -import { Document, BaseNode, MetadataMode, NodeWithEmbedding } from "./Node"; -import { BaseQueryEngine, RetrieverQueryEngine } from "./QueryEngine"; -import { v4 as uuidv4 } from "uuid"; -import { BaseRetriever, VectorIndexRetriever } from "./Retriever"; -import { ServiceContext, serviceContextFromDefaults } from "./ServiceContext"; +import { + Document, + BaseNode, + MetadataMode, + NodeWithEmbedding, +} from "../../Node"; +import { BaseQueryEngine, RetrieverQueryEngine } from "../../QueryEngine"; +import { VectorIndexRetriever } from "./VectorIndexRetriever"; +import { + ServiceContext, + serviceContextFromDefaults, +} from "../../ServiceContext"; import { StorageContext, storageContextFromDefaults, -} from "./storage/StorageContext"; -import { BaseDocumentStore } from "./storage/docStore/types"; -import { VectorStore } from "./storage/vectorStore/types"; -import { BaseIndexStore } from "./storage/indexStore/types"; - -/** - * The underlying structure of each index. - */ -export abstract class IndexStruct { - indexId: string; - summary?: string; - - constructor(indexId = uuidv4(), summary = undefined) { - this.indexId = indexId; - this.summary = summary; - } - - getSummary(): string { - if (this.summary === undefined) { - throw new Error("summary field of the index dict is not set"); - } - return this.summary; - } -} - -export class IndexDict extends IndexStruct { - nodesDict: Record<string, BaseNode> = {}; - docStore: Record<string, Document> = {}; // FIXME: this should be implemented in storageContext - - getSummary(): string { - if (this.summary === undefined) { - throw new Error("summary field of the index dict is not set"); - } - return this.summary; - } - - addNode(node: BaseNode, textId?: string) { - const vectorId = textId ?? node.id_; - this.nodesDict[vectorId] = node; - } -} - -export class IndexList extends IndexStruct { - nodes: string[] = []; - - addNode(node: BaseNode) { - this.nodes.push(node.id_); - } -} - -export interface BaseIndexInit<T> { - serviceContext: ServiceContext; - storageContext: StorageContext; - docStore: BaseDocumentStore; - vectorStore?: VectorStore; - indexStore?: BaseIndexStore; - indexStruct: T; -} - -/** - * Indexes are the data structure that we store our nodes and embeddings in so - * they can be retrieved for our queries. - */ -export abstract class BaseIndex<T> { - serviceContext: ServiceContext; - storageContext: StorageContext; - docStore: BaseDocumentStore; - vectorStore?: VectorStore; - indexStore?: BaseIndexStore; - indexStruct: T; - - constructor(init: BaseIndexInit<T>) { - this.serviceContext = init.serviceContext; - this.storageContext = init.storageContext; - this.docStore = init.docStore; - this.vectorStore = init.vectorStore; - this.indexStore = init.indexStore; - this.indexStruct = init.indexStruct; - } - - abstract asRetriever(): BaseRetriever; -} - -export interface VectorIndexOptions { - nodes?: BaseNode[]; - indexStruct?: IndexDict; - serviceContext?: ServiceContext; - storageContext?: StorageContext; -} - -interface VectorIndexConstructorProps extends BaseIndexInit<IndexDict> { - vectorStore: VectorStore; -} +} from "../../storage/StorageContext"; +import { VectorStore } from "../../storage/vectorStore/types"; +import { + BaseIndex, + IndexDict, + VectorIndexConstructorProps, + VectorIndexOptions, +} from "../BaseIndex"; /** * The VectorStoreIndex, an index that stores the nodes only according to their vector embedings. */ + export class VectorStoreIndex extends BaseIndex<IndexDict> { vectorStore: VectorStore; diff --git a/packages/core/src/indices/vectorStore/index.ts b/packages/core/src/indices/vectorStore/index.ts new file mode 100644 index 000000000..526610b11 --- /dev/null +++ b/packages/core/src/indices/vectorStore/index.ts @@ -0,0 +1,2 @@ +export { VectorStoreIndex } from "./VectorStoreIndex"; +export { VectorIndexRetriever } from "./VectorIndexRetriever"; diff --git a/packages/core/src/tests/CallbackManager.test.ts b/packages/core/src/tests/CallbackManager.test.ts index ea790ea11..b32a05b7e 100644 --- a/packages/core/src/tests/CallbackManager.test.ts +++ b/packages/core/src/tests/CallbackManager.test.ts @@ -1,4 +1,4 @@ -import { VectorStoreIndex } from "../BaseIndex"; +import { VectorStoreIndex } from "../indices/vectorStore/VectorStoreIndex"; import { OpenAIEmbedding } from "../Embedding"; import { OpenAI } from "../LLM"; import { Document } from "../Node"; @@ -8,7 +8,7 @@ import { RetrievalCallbackResponse, StreamCallbackResponse, } from "../callbacks/CallbackManager"; -import { ListIndex, ListRetrieverMode } from "../index/list"; +import { ListIndex, ListRetrieverMode } from "../indices/list"; import { ResponseSynthesizer, SimpleResponseBuilder, -- GitLab