diff --git a/apps/simple/README.md b/apps/simple/README.md new file mode 100644 index 0000000000000000000000000000000000000000..61658cbfeb159d90073be258aea114b612475542 --- /dev/null +++ b/apps/simple/README.md @@ -0,0 +1,5 @@ +# Simple Examples + +Due to packaging, you will need to run `pnpm --filter llamaindex build` before running these examples. + +Run them with ts-node, for example `npx ts-node vectorIndex.ts` diff --git a/apps/simple/persist.ts b/apps/simple/persist.ts new file mode 100644 index 0000000000000000000000000000000000000000..52e0e22be292c3eb30c42ea7b15764efeac73f69 --- /dev/null +++ b/apps/simple/persist.ts @@ -0,0 +1,28 @@ +import fs from "fs/promises"; +import { Document, VectorStoreIndex, storageContextFromDefaults } from "llamaindex"; + +async function main() { + // Load essay from abramov.txt in Node + const essay = await fs.readFile( + "node_modules/llamaindex/examples/abramov.txt", + "utf-8" + ); + + // Create Document object with essay + const document = new Document({ text: essay }); + + // Split text and create embeddings. Store them in a VectorStoreIndex with persistence + const storageContext = await storageContextFromDefaults({ persistDir: "./storage" }); + const index = await VectorStoreIndex.fromDocuments([document], storageContext); + + // Query the index + const queryEngine = index.asQueryEngine(); + const response = await queryEngine.query( + "What did the author do in college?" + ); + + // Output response + console.log(response.toString()); +} + +main().catch(console.error); diff --git a/package.json b/package.json index 70d34ef1a2262be3bc7000b49ce359a12e41d17d..2b61317490c074b60213cf22970bbda4a49647c6 100644 --- a/package.json +++ b/package.json @@ -3,10 +3,10 @@ "scripts": { "build": "turbo run build", "dev": "turbo run dev", - "lint": "turbo run lint", - "test": "turbo run test", "format": "prettier --write \"**/*.{ts,tsx,md}\"", - "prepare": "husky install" + "lint": "turbo run lint", + "prepare": "husky install", + "test": "turbo run test" }, "devDependencies": { "@turbo/gen": "^1.10.9", diff --git a/packages/core/package.json b/packages/core/package.json index a4676d19f484c3c1d6da4aa5cf23d7940a54cb31..78e19de53b477d07ae232823446b45da0a94ee3d 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -11,31 +11,35 @@ "uuid": "^9.0.0", "wink-nlp": "^1.14.2" }, + "devDependencies": { + "@types/lodash": "^4.14.195", + "@types/node": "^18.16.19", + "@types/pdf-parse": "^1.1.1", + "@types/uuid": "^9.0.2", + "axios": "^0.26.1", + "node-stdlib-browser": "^1.2.0", + "tsup": "^7.1.0" + }, "engines": { "node": ">=18.0.0" }, "exports": { - "import": { - "default": "./dist/index.mjs", - "types": "./dist/index.d.mts" - }, - "require": { - "default": "./dist/index.js", - "types": "./dist/index.d.ts" + ".": { + "import": { + "types": "./dist/index.d.mts", + "default": "./dist/index.mjs" + }, + "require": { + "types": "./dist/index.d.ts", + "default": "./dist/index.js" + } } }, + "types": "./dist/index.d.ts", + "main": "./dist/index.js", "scripts": { "lint": "eslint .", "test": "jest", "build": "tsup src/index.ts --format esm,cjs --dts" - }, - "devDependencies": { - "@types/lodash": "^4.14.195", - "@types/node": "^18.16.19", - "@types/pdf-parse": "^1.1.1", - "@types/uuid": "^9.0.2", - "axios": "^0.26.1", - "node-stdlib-browser": "^1.2.0", - "tsup": "^7.1.0" } } diff --git a/packages/core/src/storage/kvStore/SimpleKVStore.ts b/packages/core/src/storage/kvStore/SimpleKVStore.ts index bc4928325ed6824ec3357cbd04d81e30217661eb..54c866aa4b8d741c0e2b5e98a256d2ed8d4c5482 100644 --- a/packages/core/src/storage/kvStore/SimpleKVStore.ts +++ b/packages/core/src/storage/kvStore/SimpleKVStore.ts @@ -8,6 +8,8 @@ export type DataType = Record<string, Record<string, any>>; export class SimpleKVStore extends BaseKVStore { private data: DataType; + private persistPath: string | undefined; + private fs: GenericFileSystem | undefined; constructor(data?: DataType) { super(); @@ -23,6 +25,10 @@ export class SimpleKVStore extends BaseKVStore { this.data[collection] = {}; } this.data[collection][key] = _.clone(val); // Creating a shallow copy of the object + + if (this.persistPath) { + await this.persist(this.persistPath, this.fs); + } } async get( @@ -69,10 +75,25 @@ export class SimpleKVStore extends BaseKVStore { fs?: GenericFileSystem ): Promise<SimpleKVStore> { fs = fs || DEFAULT_FS; - let data = JSON.parse( - await fs.readFile(persistPath, { encoding: "utf-8" }) - ); - return new SimpleKVStore(data); + let dirPath = path.dirname(persistPath); + if (!(await exists(fs, dirPath))) { + await fs.mkdir(dirPath); + } + + let data: DataType = {}; + try { + let fileData = await fs.readFile(persistPath); + data = JSON.parse(fileData.toString()); + } catch (e) { + console.error( + `No valid data found at path: ${persistPath} starting new store.` + ); + } + + const store = new SimpleKVStore(data); + store.persistPath = persistPath; + store.fs = fs; + return store; } toDict(): DataType { diff --git a/packages/core/src/storage/vectorStore/SimpleVectorStore.ts b/packages/core/src/storage/vectorStore/SimpleVectorStore.ts index d0d9cab5181aa0e5ae2ef162d42bdc9b2c5d134b..43d267d6435c474f7fef1cbbb0390ea588155bfc 100644 --- a/packages/core/src/storage/vectorStore/SimpleVectorStore.ts +++ b/packages/core/src/storage/vectorStore/SimpleVectorStore.ts @@ -1,3 +1,4 @@ +import * as path from "path"; import _ from "lodash"; import { GenericFileSystem, exists } from "../FileSystem"; import { @@ -31,6 +32,7 @@ export class SimpleVectorStore implements VectorStore { storesText: boolean = false; private data: SimpleVectorStoreData = new SimpleVectorStoreData(); private fs: GenericFileSystem = DEFAULT_FS; + private persistPath: string | undefined; constructor(data?: SimpleVectorStoreData, fs?: GenericFileSystem) { this.data = data || new SimpleVectorStoreData(); @@ -65,6 +67,11 @@ export class SimpleVectorStore implements VectorStore { this.data.textIdToRefDocId[result.node.id_] = result.node.sourceNode?.nodeId; } + + if (this.persistPath) { + this.persist(this.persistPath, this.fs); + } + return embeddingResults.map((result) => result.node.id_); } @@ -141,8 +148,9 @@ export class SimpleVectorStore implements VectorStore { fs?: GenericFileSystem ): Promise<void> { fs = fs || this.fs; - if (!(await exists(fs, persistPath))) { - await fs.mkdir(persistPath); + let dirPath = path.dirname(persistPath); + if (!(await exists(fs, dirPath))) { + await fs.mkdir(dirPath); } await fs.writeFile(persistPath, JSON.stringify(this.data)); @@ -153,18 +161,29 @@ export class SimpleVectorStore implements VectorStore { fs?: GenericFileSystem ): Promise<SimpleVectorStore> { fs = fs || DEFAULT_FS; - if (!(await exists(fs, persistPath))) { - throw new Error( - `No existing SimpleVectorStore found at ${persistPath}, skipping load.` + + let dirPath = path.dirname(persistPath); + if (!(await exists(fs, dirPath))) { + await fs.mkdir(dirPath); + } + + let dataDict: any = {}; + try { + let fileData = await fs.readFile(persistPath); + dataDict = JSON.parse(fileData.toString()); + } catch (e) { + console.error( + `No valid data found at path: ${persistPath} starting new store.` ); } - console.debug(`Loading SimpleVectorStore from ${persistPath}.`); - let dataDict = JSON.parse(await fs.readFile(persistPath, "utf-8")); let data = new SimpleVectorStoreData(); - data.embeddingDict = dataDict.embeddingDict; - data.textIdToRefDocId = dataDict.textIdToRefDocId; - return new SimpleVectorStore(data); + data.embeddingDict = dataDict.embeddingDict ?? {}; + data.textIdToRefDocId = dataDict.textIdToRefDocId ?? {}; + const store = new SimpleVectorStore(data); + store.persistPath = persistPath; + store.fs = fs; + return store; } static fromDict(saveDict: SimpleVectorStoreData): SimpleVectorStore {