diff --git a/apps/simple/index.ts b/apps/simple/index.ts index d7e804240f0158e99b27333501ed615e61fa4ca3..733bb7f07fedcf7c8b74309fe384b214f7cec6cc 100644 --- a/apps/simple/index.ts +++ b/apps/simple/index.ts @@ -1,9 +1,9 @@ -import { Document } from "@llamaindex/core/src/Document"; +import { Document } from "@llamaindex/core/src/Node"; import { VectorStoreIndex } from "@llamaindex/core/src/BaseIndex"; import essay from "./essay"; (async () => { - const document = new Document(essay); + const document = new Document({ text: essay }); const index = await VectorStoreIndex.fromDocuments([document]); const queryEngine = index.asQueryEngine(); const response = await queryEngine.aquery( diff --git a/apps/simple/lowlevel.ts b/apps/simple/lowlevel.ts new file mode 100644 index 0000000000000000000000000000000000000000..ebfdb076a51751183af42487e0570c6891f8bdab --- /dev/null +++ b/apps/simple/lowlevel.ts @@ -0,0 +1,31 @@ +import { Document, TextNode, NodeWithScore } from "@llamaindex/core/src/Node"; +import { ResponseSynthesizer } from "@llamaindex/core/src/ResponseSynthesizer"; +import { SimpleNodeParser } from "@llamaindex/core/src/NodeParser"; + +(async () => { + const nodeParser = new SimpleNodeParser(); + const nodes = nodeParser.getNodesFromDocuments([ + new Document({ text: "I am 10 years old. John is 20 years old." }), + ]); + + console.log(nodes); + + const responseSynthesizer = new ResponseSynthesizer(); + + const nodesWithScore: NodeWithScore[] = [ + { + node: new TextNode({ text: "I am 10 years old." }), + score: 1, + }, + { + node: new TextNode({ text: "John is 20 years old." }), + score: 0.5, + }, + ]; + + const response = await responseSynthesizer.asynthesize( + "What age am I?", + nodesWithScore + ); + console.log(response.response); +})(); diff --git a/packages/core/src/BaseIndex.ts b/packages/core/src/BaseIndex.ts index 9934e56071b23a91b65a536dabfcbe041a5c38a5..efb9eacbb8951f8a3ed720c04c71f76925e134e0 100644 --- a/packages/core/src/BaseIndex.ts +++ b/packages/core/src/BaseIndex.ts @@ -1,14 +1,13 @@ -import { Document } from "./Document"; -import { Node, NodeWithEmbedding } from "./Node"; +import { Document, TextNode } from "./Node"; import { SimpleNodeParser } from "./NodeParser"; import { BaseQueryEngine, RetrieverQueryEngine } from "./QueryEngine"; import { v4 as uuidv4 } from "uuid"; import { VectorIndexRetriever } from "./Retriever"; import { BaseEmbedding, OpenAIEmbedding } from "./Embedding"; export class BaseIndex { - nodes: Node[] = []; + nodes: TextNode[] = []; - constructor(nodes?: Node[]) { + constructor(nodes?: TextNode[]) { this.nodes = nodes ?? []; } } @@ -16,7 +15,7 @@ export class BaseIndex { export class IndexDict { indexId: string; summary?: string; - nodesDict: Record<string, Node> = {}; + nodesDict: Record<string, TextNode> = {}; docStore: Record<string, Document> = {}; // FIXME: this should be implemented in storageContext constructor(indexId = uuidv4(), summary = undefined) { @@ -31,8 +30,8 @@ export class IndexDict { return this.summary; } - addNode(node: Node, textId?: string) { - const vectorId = textId ?? node.getDocId(); + addNode(node: TextNode, textId?: string) { + const vectorId = textId ?? node.id_; this.nodesDict[vectorId] = node; } } diff --git a/packages/core/src/Document.ts b/packages/core/src/Document.ts deleted file mode 100644 index a8b9c8559bed7bc8a967ee5d6b96958010a5aa9d..0000000000000000000000000000000000000000 --- a/packages/core/src/Document.ts +++ /dev/null @@ -1,75 +0,0 @@ -import { v4 as uuidv4 } from "uuid"; - -export enum NodeType { - DOCUMENT, - TEXT, - IMAGE, - INDEX, -} - -export abstract class BaseDocument { - text: string; - docId?: string; - embedding?: number[]; - docHash?: string; - - constructor( - text: string, - docId?: string, - embedding?: number[], - docHash?: string - ) { - this.text = text; - this.docId = docId; - this.embedding = embedding; - this.docHash = docHash; - - if (!docId) { - this.docId = uuidv4(); - } - } - - getText() { - if (this.text === undefined) { - throw new Error("Text not set"); - } - return this.text; - } - - getDocId() { - if (this.docId === undefined) { - throw new Error("doc id not set"); - } - return this.docId; - } - - getEmbedding() { - if (this.embedding === undefined) { - throw new Error("Embedding not set"); - } - return this.embedding; - } - - getDocHash() { - if (this.docHash === undefined) { - throw new Error("Doc hash not set"); - } - return this.docHash; - } - - abstract getType(): NodeType; -} - -export class Document extends BaseDocument { - getType() { - return NodeType.DOCUMENT; - } -} - -export class ImageDocument extends Document { - image?: string; - - getType() { - return NodeType.IMAGE; - } -} diff --git a/packages/core/src/Node.ts b/packages/core/src/Node.ts index f97b84a5f4d5d62d9fea767520a31995b5984f81..2c50d0d8a26c4594195332a695e8936f3a8e0ee5 100644 --- a/packages/core/src/Node.ts +++ b/packages/core/src/Node.ts @@ -1,72 +1,233 @@ -import { BaseDocument, NodeType } from "./Document"; - -export enum DocumentRelationship { - SOURCE = "source", - PREVIOUS = "previous", - NEXT = "next", - PARENT = "parent", - CHILD = "child", +import { v4 as uuidv4 } from "uuid"; + +export enum NodeRelationship { + SOURCE = "SOURCE", + PREVIOUS = "PREVIOUS", + NEXT = "NEXT", + PARENT = "PARENT", + CHILD = "CHILD", +} + +export enum ObjectType { + TEXT = "TEXT", + IMAGE = "IMAGE", + INDEX = "INDEX", + DOCUMENT = "DOCUMENT", +} + +export enum MetadataMode { + ALL = "ALL", + EMBED = "EMBED", + LLM = "LLM", + NONE = "NONE", } -export class Node extends BaseDocument { - relationships: { [key in DocumentRelationship]: string | string[] | null }; - - constructor( - text: string, // Text is required - docId?: string, - embedding?: number[], - docHash?: string - ) { - if (text === undefined) { - throw new Error("Text is required"); +export interface RelatedNodeInfo { + nodeId: string; + nodeType?: ObjectType; + metadata: { [key: string]: any }; + hash?: string; +} + +export type RelatedNodeType = RelatedNodeInfo | RelatedNodeInfo[]; + +/** + * Generic abstract class for retrievable nodes + */ +export abstract class BaseNode { + id_: string = uuidv4(); + embedding?: number[]; + + // Metadata fields + metadata: { [key: string]: any } = {}; + excludedEmbedMetadataKeys: string[] = []; + excludedLlmMetadataKeys: string[] = []; + relationships: Partial<Record<NodeRelationship, RelatedNodeType>> = {}; + hash: string = ""; + + constructor(init?: Partial<BaseNode>) { + Object.assign(this, init); + } + + static getType(): ObjectType { + throw new Error("Not implemented"); + } + + abstract getContent(metadataMode: MetadataMode): string; + abstract getMetadataStr(metadataMode: MetadataMode): string; + abstract setContent(value: any): void; + + get nodeId(): string { + return this.id_; + } + + get sourceNode(): RelatedNodeInfo | undefined { + const relationship = this.relationships[NodeRelationship.SOURCE]; + + if (Array.isArray(relationship)) { + throw new Error("Source object must be a single RelatedNodeInfo object"); + } + + return relationship; + } + + get prevNode(): RelatedNodeInfo | undefined { + const relationship = this.relationships[NodeRelationship.PREVIOUS]; + + if (Array.isArray(relationship)) { + throw new Error( + "Previous object must be a single RelatedNodeInfo object" + ); + } + + return relationship; + } + + get nextNode(): RelatedNodeInfo | undefined { + const relationship = this.relationships[NodeRelationship.NEXT]; + + if (Array.isArray(relationship)) { + throw new Error("Next object must be a single RelatedNodeInfo object"); + } + + return relationship; + } + + get parentNode(): RelatedNodeInfo | undefined { + const relationship = this.relationships[NodeRelationship.PARENT]; + + if (Array.isArray(relationship)) { + throw new Error("Parent object must be a single RelatedNodeInfo object"); } - super(text, docId, embedding, docHash); + return relationship; + } + + get childNodes(): RelatedNodeInfo[] | undefined { + const relationship = this.relationships[NodeRelationship.CHILD]; - this.relationships = { - source: null, - previous: null, - next: null, - parent: null, - child: [], + if (!Array.isArray(relationship)) { + throw new Error( + "Child object must be a an array of RelatedNodeInfo objects" + ); + } + + return relationship; + } + + getEmbedding(): number[] { + if (this.embedding === undefined) { + throw new Error("Embedding not set"); + } + + return this.embedding; + } + + asRelatedNodeInfo(): RelatedNodeInfo { + return { + nodeId: this.nodeId, + metadata: this.metadata, + hash: this.hash, }; } +} - getNodeInfo(): { [key: string]: any } { - return {}; +export class TextNode extends BaseNode { + text: string = ""; + startCharIdx?: number; + endCharIdx?: number; + // textTemplate: NOTE write your own formatter if needed + // metadataTemplate: NOTE write your own formatter if needed + metadataSeperator: string = "\n"; + + constructor(init?: Partial<TextNode>) { + super(init); + Object.assign(this, init); } - refDocId(): string | null { - return ""; + generateHash() { + throw new Error("Not implemented"); } - prevNodeId(): string { - throw new Error("Node does not have previous node"); + static getType(): ObjectType { + return ObjectType.TEXT; } - nextNodeId(): string { - throw new Error("Node does not have next node"); + getContent(metadataMode: MetadataMode = MetadataMode.NONE): string { + const metadataStr = this.getMetadataStr(metadataMode).trim(); + return `${metadataStr}\n\n${this.text}`.trim(); } - parentNodeId(): string { - throw new Error("Node does not have parent node"); + getMetadataStr(metadataMode: MetadataMode): string { + if (metadataMode === MetadataMode.NONE) { + return ""; + } + + const usableMetadataKeys = new Set(Object.keys(this.metadata).sort()); + if (metadataMode === MetadataMode.LLM) { + for (const key of this.excludedLlmMetadataKeys) { + usableMetadataKeys.delete(key); + } + } else if (metadataMode === MetadataMode.EMBED) { + for (const key of this.excludedEmbedMetadataKeys) { + usableMetadataKeys.delete(key); + } + } + + return [...usableMetadataKeys] + .map((key) => `${key}: ${this.metadata[key]}`) + .join(this.metadataSeperator); } - childNodeIds(): string[] { - return []; + setContent(value: string) { + this.text = value; } - getType() { - return NodeType.TEXT; + getNodeInfo() { + return { start: this.startCharIdx, end: this.endCharIdx }; + } + + getText() { + return this.getContent(MetadataMode.NONE); + } +} + +export class ImageNode extends TextNode { + image: string = ""; + + static getType(): ObjectType { + return ObjectType.IMAGE; } } -export interface NodeWithEmbedding { - node: Node; - embedding: number[]; +export class IndexNode extends TextNode { + indexId: string = ""; + + static getType(): ObjectType { + return ObjectType.INDEX; + } } export interface NodeWithScore { - node: Node; + node: TextNode; score: number; } + +export class Document extends TextNode { + constructor(init?: Partial<Document>) { + super(init); + Object.assign(this, init); + } + + static getType() { + return ObjectType.DOCUMENT; + } + + get docId() { + return this.id_; + } +} + +export class ImageDocument extends Document { + image?: string; +} diff --git a/packages/core/src/NodeParser.ts b/packages/core/src/NodeParser.ts index 1ec66f6f643abaef7cc2c6f3d98f9dc442b3a156..52db5dfbae7bfc1471d98099e0d7d646cb3c2756 100644 --- a/packages/core/src/NodeParser.ts +++ b/packages/core/src/NodeParser.ts @@ -1,5 +1,4 @@ -import { Document } from "./Document"; -import { Node } from "./Node"; +import { Document, NodeRelationship, TextNode } from "./Node"; import { SentenceSplitter } from "./TextSplitter"; export function getTextSplitsFromDocument( @@ -16,13 +15,13 @@ export function getNodesFromDocument( document: Document, textSplitter: SentenceSplitter ) { - let nodes: Node[] = []; + let nodes: TextNode[] = []; const textSplits = getTextSplitsFromDocument(document, textSplitter); textSplits.forEach((textSplit, index) => { - const node = new Node(textSplit); - node.relationships.source = document.getDocId(); + const node = new TextNode({ text: textSplit }); + node.relationships[NodeRelationship.SOURCE] = document.asRelatedNodeInfo(); nodes.push(node); }); @@ -30,7 +29,7 @@ export function getNodesFromDocument( } export interface NodeParser { - getNodesFromDocuments(documents: Document[]): Node[]; + getNodesFromDocuments(documents: Document[]): TextNode[]; } export class SimpleNodeParser implements NodeParser { textSplitter: SentenceSplitter; diff --git a/packages/core/src/Response.ts b/packages/core/src/Response.ts index 03e0bb823c156bdc67538edade17f467ed8250c6..83121d77a8e3173bae41205a0c9ec579bac488b4 100644 --- a/packages/core/src/Response.ts +++ b/packages/core/src/Response.ts @@ -1,10 +1,10 @@ -import { Node } from "./Node"; +import { TextNode } from "./Node"; export class Response { response?: string; - sourceNodes: Node[]; + sourceNodes: TextNode[]; - constructor(response?: string, sourceNodes?: Node[]) { + constructor(response?: string, sourceNodes?: TextNode[]) { this.response = response; this.sourceNodes = sourceNodes || []; } diff --git a/packages/core/src/storage/docStore/KVDocumentStore.ts b/packages/core/src/storage/docStore/KVDocumentStore.ts index f7809ba59e58c00e5bccd0332299cd8d23ab3aa4..855e7163ee89f4954d0de746896e5d6538f66f51 100644 --- a/packages/core/src/storage/docStore/KVDocumentStore.ts +++ b/packages/core/src/storage/docStore/KVDocumentStore.ts @@ -1,5 +1,4 @@ -import { Node } from "../../Node"; -import { BaseDocument } from "../../Document"; +import { BaseNode } from "../../Node"; import { BaseDocumentStore, RefDocInfo } from "./types"; import { BaseKVStore } from "../kvStore/types"; import _, * as lodash from "lodash"; @@ -22,9 +21,9 @@ export class KVDocumentStore extends BaseDocumentStore { this.metadataCollection = `${namespace}/metadata`; } - async docs(): Promise<Record<string, BaseDocument>> { + async docs(): Promise<Record<string, BaseNode>> { let jsonDict = await this.kvstore.getAll(this.nodeCollection); - let docs: Record<string, BaseDocument> = {}; + let docs: Record<string, BaseNode> = {}; for (let key in jsonDict) { docs[key] = jsonToDoc(jsonDict[key] as Record<string, any>); } @@ -32,7 +31,7 @@ export class KVDocumentStore extends BaseDocumentStore { } async addDocuments( - docs: BaseDocument[], + docs: BaseNode[], allowUpdate: boolean = true ): Promise<void> { for (var idx = 0; idx < docs.length; idx++) { diff --git a/packages/core/src/storage/docStore/types.ts b/packages/core/src/storage/docStore/types.ts index c58302adbe97c91ede9d61089aa7e332a194d1b6..5f32f9b670f9adad59ca8eec43fd0b76e085816c 100644 --- a/packages/core/src/storage/docStore/types.ts +++ b/packages/core/src/storage/docStore/types.ts @@ -1,5 +1,4 @@ -import { Node } from "../../Node"; -import { BaseDocument } from "../../Document"; +import { BaseNode } from "../../Node"; import { GenericFileSystem } from "../FileSystem"; import { DEFAULT_PERSIST_DIR, @@ -23,14 +22,14 @@ export abstract class BaseDocumentStore { } // Main interface - abstract docs(): Promise<Record<string, BaseDocument>>; + abstract docs(): Promise<Record<string, BaseNode>>; - abstract addDocuments(docs: BaseDocument[], allowUpdate: boolean): void; + abstract addDocuments(docs: BaseNode[], allowUpdate: boolean): void; abstract getDocument( docId: string, raiseError: boolean - ): Promise<BaseDocument | undefined>; + ): Promise<BaseNode | undefined>; abstract deleteDocument(docId: string, raiseError: boolean): void; @@ -51,15 +50,15 @@ export abstract class BaseDocumentStore { abstract deleteRefDoc(refDocId: string, raiseError: boolean): Promise<void>; // Nodes - getNodes(nodeIds: string[], raiseError: boolean = true): Promise<Node[]> { + getNodes(nodeIds: string[], raiseError: boolean = true): Promise<BaseNode[]> { return Promise.all( nodeIds.map((nodeId) => this.getNode(nodeId, raiseError)) ); } - async getNode(nodeId: string, raiseError: boolean = true): Promise<Node> { + async getNode(nodeId: string, raiseError: boolean = true): Promise<BaseNode> { let doc = await this.getDocument(nodeId, raiseError); - if (!(doc instanceof Node)) { + if (!(doc instanceof BaseNode)) { throw new Error(`Document ${nodeId} is not a Node.`); } return doc; @@ -67,8 +66,8 @@ export abstract class BaseDocumentStore { async getNodeDict(nodeIdDict: { [index: number]: string; - }): Promise<{ [index: number]: Node }> { - let result: { [index: number]: Node } = {}; + }): Promise<{ [index: number]: BaseNode }> { + let result: { [index: number]: BaseNode } = {}; for (let index in nodeIdDict) { result[index] = await this.getNode(nodeIdDict[index]); } diff --git a/packages/core/src/storage/docStore/utils.ts b/packages/core/src/storage/docStore/utils.ts index eea6c81a4a19431eac43f32fad4881e91c29f5ae..f36f494c7a378f3eebcb6341dcfbb90df4134f53 100644 --- a/packages/core/src/storage/docStore/utils.ts +++ b/packages/core/src/storage/docStore/utils.ts @@ -1,36 +1,35 @@ -import { Node } from "../../Node"; -import { BaseDocument, Document, NodeType } from "../../Document"; +import { BaseNode, Document, TextNode, ObjectType } from "../../Node"; const TYPE_KEY = "__type__"; const DATA_KEY = "__data__"; -export function docToJson(doc: BaseDocument): Record<string, any> { +export function docToJson(doc: Document): Record<string, any> { return { [DATA_KEY]: JSON.stringify(doc), - [TYPE_KEY]: doc.getType(), + [TYPE_KEY]: Document.getType(), }; } -export function jsonToDoc(docDict: Record<string, any>): BaseDocument { +export function jsonToDoc(docDict: Record<string, any>): Document { let docType = docDict[TYPE_KEY]; let dataDict = docDict[DATA_KEY]; - let doc: BaseDocument; + let doc: Document; - if (docType === NodeType.DOCUMENT) { - doc = new Document( - dataDict.text, - dataDict.docId, - dataDict.embedding, - dataDict.docHash - ); - } else if (docType === NodeType.TEXT) { - const reslationships = dataDict.relationships; - doc = new Node( - reslationships.text, - reslationships.docId, - reslationships.embedding, - reslationships.docHash - ); + if (docType === ObjectType.DOCUMENT) { + doc = new Document({ + text: dataDict.text, + id_: dataDict.id_, + embedding: dataDict.embedding, + hash: dataDict.hash, + }); + } else if (docType === ObjectType.TEXT) { + const relationships = dataDict.relationships; + doc = new TextNode({ + text: relationships.text, + id_: relationships.id_, + embedding: relationships.embedding, + hash: relationships.hash, + }); } else { throw new Error(`Unknown doc type: ${docType}`); } diff --git a/packages/core/src/storage/vectorStore/types.ts b/packages/core/src/storage/vectorStore/types.ts index 74dada2135d437b33c1ad54997157b22ad4fda7a..d5169e2671b960677213c24ab4c78ce6ea9b5df0 100644 --- a/packages/core/src/storage/vectorStore/types.ts +++ b/packages/core/src/storage/vectorStore/types.ts @@ -1,8 +1,8 @@ -import { Node } from "../../Node"; +import { TextNode } from "../../Node"; import { GenericFileSystem } from "../FileSystem"; export interface NodeWithEmbedding { - node: Node; + node: TextNode; embedding: number[]; id(): string; @@ -10,7 +10,7 @@ export interface NodeWithEmbedding { } export interface VectorStoreQueryResult { - nodes?: Node[]; + nodes?: TextNode[]; similarities?: number[]; ids?: string[]; } diff --git a/packages/core/src/tests/Document.test.ts b/packages/core/src/tests/Document.test.ts index de799d517ce5f34315363f0082c8c0c90ae4e0dc..a0edb65aecf12c84e7c8ec6cd298955259415150 100644 --- a/packages/core/src/tests/Document.test.ts +++ b/packages/core/src/tests/Document.test.ts @@ -1,8 +1,8 @@ -import { Document } from "../Document"; +import { Document } from "../Node"; describe("Document", () => { test("initializes", () => { - const doc = new Document("text", "docId"); + const doc = new Document({ text: "text", id_: "docId" }); expect(doc).toBeDefined(); }); }); diff --git a/packages/core/tsconfig.json b/packages/core/tsconfig.json index 7ca71178d53835722a040fe55cb186962981aec6..059d82421d1946cd28447bcb7bfa231abb6e5bad 100644 --- a/packages/core/tsconfig.json +++ b/packages/core/tsconfig.json @@ -8,7 +8,8 @@ "skipLibCheck": true, "noEmit": true, "strict": true, - "lib": ["es2015", "dom"] + "lib": ["es2015", "dom"], + "target": "ES2015" }, "exclude": ["node_modules"] }