diff --git a/.changeset/short-boats-confess.md b/.changeset/short-boats-confess.md new file mode 100644 index 0000000000000000000000000000000000000000..ca16235328e8234eaa3bac3535dfa3a3e5ec3660 --- /dev/null +++ b/.changeset/short-boats-confess.md @@ -0,0 +1,5 @@ +--- +"llamaindex": patch +--- + +Fix persistence bug (thanks @HenryHengZJ) diff --git a/apps/simple/persist.ts b/apps/simple/persist.ts deleted file mode 100644 index 5412a48029f6777529432aa3830f0ea160315e8e..0000000000000000000000000000000000000000 --- a/apps/simple/persist.ts +++ /dev/null @@ -1,36 +0,0 @@ -import fs from "fs/promises"; -import { - Document, - VectorStoreIndex, - storageContextFromDefaults, -} from "llamaindex"; - -async function main() { - // Load essay from abramov.txt in Node - const essay = await fs.readFile( - "node_modules/llamaindex/examples/abramov.txt", - "utf-8" - ); - - // Create Document object with essay - const document = new Document({ text: essay }); - - // Split text and create embeddings. Store them in a VectorStoreIndex with persistence - const storageContext = await storageContextFromDefaults({ - persistDir: "./storage", - }); - const index = await VectorStoreIndex.fromDocuments([document], { - storageContext, - }); - - // Query the index - const queryEngine = index.asQueryEngine(); - const response = await queryEngine.query( - "What did the author do in college?" - ); - - // Output response - console.log(response.toString()); -} - -main().catch(console.error); diff --git a/apps/simple/storageContext.ts b/apps/simple/storageContext.ts index 56be8010a6f67032b4cbe630e8fc5b9000c0b4ff..cb9c7102bc5be7486193978981e6e148e599af66 100644 --- a/apps/simple/storageContext.ts +++ b/apps/simple/storageContext.ts @@ -1,15 +1,22 @@ -import { Document, VectorStoreIndex, storageContextFromDefaults } from "llamaindex"; +import { + Document, + VectorStoreIndex, + storageContextFromDefaults, +} from "llamaindex"; import essay from "./essay"; - async function main() { // Create Document object with essay const document = new Document({ text: essay }); // Split text and create embeddings. Store them in a VectorStoreIndex // persist the vector store automatically with the storage context - const storageContext = await storageContextFromDefaults({ persistDir: "./storage" }); - const index = await VectorStoreIndex.fromDocuments([document], { storageContext }); + const storageContext = await storageContextFromDefaults({ + persistDir: "./storage", + }); + const index = await VectorStoreIndex.fromDocuments([document], { + storageContext, + }); // Query the index const queryEngine = index.asQueryEngine(); @@ -21,9 +28,14 @@ async function main() { console.log(response.toString()); // load the index - const loadedIndex = await VectorStoreIndex.init({ storageContext }); - const laodedQueryEngine = loadedIndex.asQueryEngine(); - const loadedResponse = await laodedQueryEngine.query( + const secondStorageContext = await storageContextFromDefaults({ + persistDir: "./storage", + }); + const loadedIndex = await VectorStoreIndex.init({ + storageContext: secondStorageContext, + }); + const loadedQueryEngine = loadedIndex.asQueryEngine(); + const loadedResponse = await loadedQueryEngine.query( "What did the author do growing up?" ); console.log(loadedResponse.toString()); diff --git a/examples/storageContext.ts b/examples/storageContext.ts index 56be8010a6f67032b4cbe630e8fc5b9000c0b4ff..cb9c7102bc5be7486193978981e6e148e599af66 100644 --- a/examples/storageContext.ts +++ b/examples/storageContext.ts @@ -1,15 +1,22 @@ -import { Document, VectorStoreIndex, storageContextFromDefaults } from "llamaindex"; +import { + Document, + VectorStoreIndex, + storageContextFromDefaults, +} from "llamaindex"; import essay from "./essay"; - async function main() { // Create Document object with essay const document = new Document({ text: essay }); // Split text and create embeddings. Store them in a VectorStoreIndex // persist the vector store automatically with the storage context - const storageContext = await storageContextFromDefaults({ persistDir: "./storage" }); - const index = await VectorStoreIndex.fromDocuments([document], { storageContext }); + const storageContext = await storageContextFromDefaults({ + persistDir: "./storage", + }); + const index = await VectorStoreIndex.fromDocuments([document], { + storageContext, + }); // Query the index const queryEngine = index.asQueryEngine(); @@ -21,9 +28,14 @@ async function main() { console.log(response.toString()); // load the index - const loadedIndex = await VectorStoreIndex.init({ storageContext }); - const laodedQueryEngine = loadedIndex.asQueryEngine(); - const loadedResponse = await laodedQueryEngine.query( + const secondStorageContext = await storageContextFromDefaults({ + persistDir: "./storage", + }); + const loadedIndex = await VectorStoreIndex.init({ + storageContext: secondStorageContext, + }); + const loadedQueryEngine = loadedIndex.asQueryEngine(); + const loadedResponse = await loadedQueryEngine.query( "What did the author do growing up?" ); console.log(loadedResponse.toString()); diff --git a/packages/core/src/Node.ts b/packages/core/src/Node.ts index abbc58a6d5f6bbf05a7532297827b54d8d4bdd83..a775990157bd3ebdaa73b88c80156cda33de060a 100644 --- a/packages/core/src/Node.ts +++ b/packages/core/src/Node.ts @@ -128,6 +128,14 @@ export abstract class BaseNode { hash: this.hash, }; } + + /** + * Used with built in JSON.stringify + * @returns + */ + toJSON(): Record<string, any> { + return { ...this, type: this.getType() }; + } } /** @@ -232,6 +240,23 @@ export class Document extends TextNode { } } +export function jsonToNode(json: any) { + if (!json.type) { + throw new Error("Node type not found"); + } + + switch (json.type) { + case ObjectType.TEXT: + return new TextNode(json); + case ObjectType.INDEX: + return new IndexNode(json); + case ObjectType.DOCUMENT: + return new Document(json); + default: + throw new Error(`Invalid node type: ${json.type}`); + } +} + // export class ImageDocument extends Document { // image?: string; // } diff --git a/packages/core/src/indices/BaseIndex.ts b/packages/core/src/indices/BaseIndex.ts index f19bfcc88d9dcdb0c5a111ef413e138855808113..27c4e17da3a3f4b64d8c936fbce2af031924687a 100644 --- a/packages/core/src/indices/BaseIndex.ts +++ b/packages/core/src/indices/BaseIndex.ts @@ -1,4 +1,4 @@ -import { Document, BaseNode } from "../Node"; +import { Document, BaseNode, jsonToNode } from "../Node"; import { v4 as uuidv4 } from "uuid"; import { BaseRetriever } from "../Retriever"; import { ServiceContext } from "../ServiceContext"; @@ -74,7 +74,12 @@ export function jsonToIndexStruct(json: any): IndexStruct { return indexList; } else if (json.type === IndexStructType.SIMPLE_DICT) { const indexDict = new IndexDict(json.indexId, json.summary); - indexDict.nodesDict = json.nodesDict; + indexDict.nodesDict = Object.entries(json.nodesDict).reduce< + Record<string, BaseNode> + >((acc, [key, value]) => { + acc[key] = jsonToNode(value); + return acc; + }, {}); return indexDict; } else { throw new Error(`Unknown index struct type: ${json.type}`); diff --git a/packages/core/src/storage/constants.ts b/packages/core/src/storage/constants.ts index a0ded7c10f1218b27e5f9ebb88e069c35a83dc8c..15e87613c7b89f84ab3380be0a3e5242d87da89f 100644 --- a/packages/core/src/storage/constants.ts +++ b/packages/core/src/storage/constants.ts @@ -1,7 +1,7 @@ export const DEFAULT_COLLECTION = "data"; export const DEFAULT_PERSIST_DIR = "./storage"; export const DEFAULT_INDEX_STORE_PERSIST_FILENAME = "index_store.json"; -export const DEFAULT_DOC_STORE_PERSIST_FILENAME = "docstore.json"; +export const DEFAULT_DOC_STORE_PERSIST_FILENAME = "doc_store.json"; export const DEFAULT_VECTOR_STORE_PERSIST_FILENAME = "vector_store.json"; export const DEFAULT_GRAPH_STORE_PERSIST_FILENAME = "graph_store.json"; export const DEFAULT_NAMESPACE = "docstore";