diff --git a/apps/simple/pinecone-vector-store/README.md b/apps/simple/pinecone-vector-store/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b0576bc26c4a6a8b4cff0a74150190f48acffe6e --- /dev/null +++ b/apps/simple/pinecone-vector-store/README.md @@ -0,0 +1,33 @@ +# Pinecone Vector Store + +There are two scripts available here: load-docs.ts and query.ts + +## Prerequisites + +You'll need a Pinecone account, project, and index. Pinecone does not allow automatic creation of indexes on the free plan, +so this vector store does not check and create the index (unlike, e.g., the PGVectorStore) + +Set the **PINECONE_API_KEY** and **PINECONE_ENVIRONMENT** environment variables to match your specific values. You will likely also need to set **PINECONE_INDEX_NAME**, unless your +index is the default value "llama". + +You'll also need a value for OPENAI_API_KEY in your environment. + +## Setup and Loading Docs + +Read and follow the instructions in the README.md file located one directory up to make sure your JS/TS dependencies are set up. The commands listed below are also run from that parent directory. + +To import documents and save the embedding vectors to your database: + +> `npx ts-node pinecone-vector-store/load-docs.ts data` + +where data is the directory containing your input files. Using the _data_ directory in the example above will read all of the files in that directory using the llamaindexTS default readers for each file type. + +**NOTE**: Sending text chunks as part of the Pinecone metadata means that upsert API calls can get arbitrarily large. Set the **PINECONE_CHUNK_SIZE** environment variable to a smaller value if the load script fails + +## RAG Querying + +To query using the resulting vector store: + +> `npx ts-node pinecone-vector-store/query.ts` + +The script will prompt for a question, then process and present the answer using the PineconeVectorStore data and your OpenAI API key. It will continue to prompt until you enter `q`, `quit` or `exit` as the next query. diff --git a/apps/simple/pinecone-vector-store/load-docs.ts b/apps/simple/pinecone-vector-store/load-docs.ts new file mode 100755 index 0000000000000000000000000000000000000000..f3ebb3dd360cd03877ae0017cf1b6a3711b55884 --- /dev/null +++ b/apps/simple/pinecone-vector-store/load-docs.ts @@ -0,0 +1,66 @@ +// load-docs.ts +import fs from "fs/promises"; +import { + SimpleDirectoryReader, + storageContextFromDefaults, + VectorStoreIndex, +} from "llamaindex"; +import { PineconeVectorStore } from "../../../packages/core/src/storage/vectorStore/PineconeVectorStore"; + +async function getSourceFilenames(sourceDir: string) { + return await fs + .readdir(sourceDir) + .then((fileNames) => fileNames.map((file) => sourceDir + "/" + file)); +} + +function callback( + category: string, + name: string, + status: any, + message: string = "", +): boolean { + console.log(category, name, status, message); + return true; +} + +async function main(args: any) { + const sourceDir: string = args.length > 2 ? args[2] : "../data"; + + console.log(`Finding documents in ${sourceDir}`); + const fileList = await getSourceFilenames(sourceDir); + const count = fileList.length; + console.log(`Found ${count} files`); + + console.log(`Importing contents from ${count} files in ${sourceDir}`); + var fileName = ""; + try { + // Passing callback fn to the ctor here + // will enable looging to console. + // See callback fn, defined above. + const rdr = new SimpleDirectoryReader(callback); + const docs = await rdr.loadData({ directoryPath: sourceDir }); + + const pcvs = new PineconeVectorStore(); + + const ctx = await storageContextFromDefaults({ vectorStore: pcvs }); + + console.debug(" - creating vector store"); + const index = await VectorStoreIndex.fromDocuments(docs, { + storageContext: ctx, + }); + console.debug(" - done."); + } catch (err) { + console.error(fileName, err); + console.log( + "If your PineconeVectorStore connection failed, make sure to set env vars for PINECONE_API_KEY and PINECONE_ENVIRONMENT. If the upserts failed, try setting PINECONE_CHUNK_SIZE to limit the content sent per chunk", + ); + process.exit(1); + } + + console.log( + "Done. Try running query.ts to ask questions against the imported embeddings.", + ); + process.exit(0); +} + +main(process.argv).catch((err) => console.error(err)); diff --git a/apps/simple/pinecone-vector-store/query.ts b/apps/simple/pinecone-vector-store/query.ts new file mode 100755 index 0000000000000000000000000000000000000000..513f2fc7151be1c25f8836db99995e87d0602e0c --- /dev/null +++ b/apps/simple/pinecone-vector-store/query.ts @@ -0,0 +1,65 @@ +import { VectorStoreIndex } from "../../../packages/core/src/indices/vectorStore/VectorStoreIndex"; +import { serviceContextFromDefaults } from "../../../packages/core/src/ServiceContext"; +import { PineconeVectorStore } from "../../../packages/core/src/storage/vectorStore/PineconeVectorStore"; + +async function main() { + const readline = require("readline").createInterface({ + input: process.stdin, + output: process.stdout, + }); + + try { + const pcvs = new PineconeVectorStore(); + + const ctx = serviceContextFromDefaults(); + const index = await VectorStoreIndex.fromVectorStore(pcvs, ctx); + + // Query the index + const queryEngine = await index.asQueryEngine(); + + let question = ""; + while (!isQuit(question)) { + question = await getUserInput(readline); + + if (isQuit(question)) { + readline.close(); + process.exit(0); + } + + try { + const answer = await queryEngine.query(question); + console.log(answer.response); + } catch (error) { + console.error("Error:", error); + } + } + } catch (err) { + console.error(err); + console.log( + "If your PineconeVectorStore connection failed, make sure to set env vars for PINECONE_API_KEY and PINECONE_ENVIRONMENT.", + ); + process.exit(1); + } +} + +function isQuit(question: string) { + return ["q", "quit", "exit"].includes(question.trim().toLowerCase()); +} + +// Function to get user input as a promise +function getUserInput(readline: any): Promise<string> { + return new Promise((resolve) => { + readline.question( + "What would you like to know?\n>", + (userInput: string) => { + resolve(userInput); + }, + ); + }); +} + +main() + .catch(console.error) + .finally(() => { + process.exit(1); + });