diff --git a/apps/simple/assemblyai.ts b/apps/simple/assemblyai.ts new file mode 100644 index 0000000000000000000000000000000000000000..d0333806349002eb500a241701c31b677fd67e87 --- /dev/null +++ b/apps/simple/assemblyai.ts @@ -0,0 +1,60 @@ +import { program } from "commander"; +import { AudioTranscriptReader, CreateTranscriptParameters } from "llamaindex"; +import { stdin as input, stdout as output } from "node:process"; +// readline/promises is still experimental so not in @types/node yet +// @ts-ignore +import readline from "node:readline/promises"; +import { VectorStoreIndex } from "../../packages/core/src/indices"; + +program + .option( + "-a, --audio-url [string]", + "URL or path of the audio file to transcribe", + ) + .option("-i, --transcript-id [string]", "ID of the AssemblyAI transcript") + .action(async (options) => { + if (!process.env.ASSEMBLYAI_API_KEY) { + console.log("No ASSEMBLYAI_API_KEY found in environment variables."); + return; + } + + const reader = new AudioTranscriptReader(); + let params: CreateTranscriptParameters | string; + console.log(options); + if (options.audioUrl) { + params = { + audio_url: options.audioUrl, + }; + } else if (options.transcriptId) { + params = options.transcriptId; + } else { + console.log( + "You must provide either an --audio-url or a --transcript-id", + ); + return; + } + + const documents = await reader.loadData(params); + console.log(documents); + + // Split text and create embeddings. Store them in a VectorStoreIndex + const index = await VectorStoreIndex.fromDocuments(documents); + + // Create query engine + const queryEngine = index.asQueryEngine(); + + const rl = readline.createInterface({ input, output }); + while (true) { + const query = await rl.question("Ask a question: "); + + if (!query) { + break; + } + + const response = await queryEngine.query(query); + + console.log(response.toString()); + } + }); + +program.parse(); diff --git a/apps/simple/pg-vector-store/README.md b/apps/simple/pg-vector-store/README.md index 7bcf6a854430165398eafdb80159d769645e4715..fc0316896e004fd0618787bcb68a619bf1cb07b5 100644 --- a/apps/simple/pg-vector-store/README.md +++ b/apps/simple/pg-vector-store/README.md @@ -1,28 +1,33 @@ # Postgres Vector Store + There are two scripts available here: load-docs.ts and query.ts ## Prerequisites -You'll need a postgres database instance against which to run these scripts. A simple docker command would look like this: ->`docker run -d --rm --name vector-db -p 5432:5432 -e "POSTGRES_HOST_AUTH_METHOD=trust" ankane/pgvector` +You'll need a postgres database instance against which to run these scripts. A simple docker command would look like this: + +> `docker run -d --rm --name vector-db -p 5432:5432 -e "POSTGRES_HOST_AUTH_METHOD=trust" ankane/pgvector` Set the PGHOST and PGUSER (and PGPASSWORD) environment variables to match your database setup. You'll also need a value for OPENAI_API_KEY in your environment. -**NOTE:** Using `--rm` in the example docker command above means that the vector store will be deleted every time the container is stopped. For production purposes, use a volume to ensure persistence across restarts. +**NOTE:** Using `--rm` in the example docker command above means that the vector store will be deleted every time the container is stopped. For production purposes, use a volume to ensure persistence across restarts. ## Setup and Loading Docs -Read and follow the instructions in the README.md file located one directory up to make sure your JS/TS dependencies are set up. The commands listed below are also run from that parent directory. + +Read and follow the instructions in the README.md file located one directory up to make sure your JS/TS dependencies are set up. The commands listed below are also run from that parent directory. To import documents and save the embedding vectors to your database: ->`npx ts-node pg-vector-store/load-docs.ts data` -where data is the directory containing your input files. Using the *data* directory in the example above will read all of the files in that directory using the llamaindexTS default readers for each file type. +> `npx ts-node pg-vector-store/load-docs.ts data` + +where data is the directory containing your input files. Using the _data_ directory in the example above will read all of the files in that directory using the llamaindexTS default readers for each file type. ## RAG Querying + To query using the resulting vector store: ->`npx ts-node pg-vector-store/query.ts` +> `npx ts-node pg-vector-store/query.ts` -The script will prompt for a question, then process and present the answer using the PGVectorStore data and your OpenAI API key. It will continue to prompt until you enter `q`, `quit` or `exit` as the next query. \ No newline at end of file +The script will prompt for a question, then process and present the answer using the PGVectorStore data and your OpenAI API key. It will continue to prompt until you enter `q`, `quit` or `exit` as the next query. diff --git a/apps/simple/pg-vector-store/load-docs.ts b/apps/simple/pg-vector-store/load-docs.ts index 2a5eb863b3035287549d1faa08a94170b13cd9f1..81e2ccc472b3f01835fc5ae98c3ca1628dc2877b 100755 --- a/apps/simple/pg-vector-store/load-docs.ts +++ b/apps/simple/pg-vector-store/load-docs.ts @@ -1,60 +1,68 @@ // load-docs.ts -import fs from 'fs/promises'; -import { SimpleDirectoryReader, storageContextFromDefaults, VectorStoreIndex } from 'llamaindex'; +import fs from "fs/promises"; +import { + SimpleDirectoryReader, + storageContextFromDefaults, + VectorStoreIndex, +} from "llamaindex"; import { PGVectorStore } from "../../../packages/core/src/storage/vectorStore/PGVectorStore"; async function getSourceFilenames(sourceDir: string) { - return await fs.readdir(sourceDir) - .then( - (fileNames) => fileNames.map( - (file) => sourceDir + '/' + file - ) - ); + return await fs + .readdir(sourceDir) + .then((fileNames) => fileNames.map((file) => sourceDir + "/" + file)); } -function callback(category: string, name: string, status: any, message: string = ''): boolean { - console.log(category, name, status, message); - return true; +function callback( + category: string, + name: string, + status: any, + message: string = "", +): boolean { + console.log(category, name, status, message); + return true; } async function main(args: any) { + const sourceDir: string = args.length > 2 ? args[2] : "../data"; - const sourceDir: string = args.length > 2 ? args[2] : '../data'; - - console.log(`Finding documents in ${sourceDir}`); - const fileList = await getSourceFilenames(sourceDir); - const count = fileList.length; - console.log(`Found ${count} files`); - - console.log(`Importing contents from ${count} files in ${sourceDir}`); - var fileName = ''; - try { - - // Passing callback fn to the ctor here - // will enable looging to console. - // See callback fn, defined above. - const rdr = new SimpleDirectoryReader(callback); - const docs = await rdr.loadData({ directoryPath: sourceDir }); - - const pgvs = new PGVectorStore(); - pgvs.setCollection(sourceDir); - pgvs.clearCollection(); - - const ctx = await storageContextFromDefaults( - { vectorStore: pgvs } - ); - - console.debug(' - creating vector store'); - const index = await VectorStoreIndex.fromDocuments(docs, { storageContext: ctx }); - console.debug(' - done.'); - } catch (err) { - console.error(fileName, err); - console.log("If your PGVectorStore init failed, make sure to set env vars for PGUSER or USER, PGHOST, PGPORT and PGPASSWORD as needed."); - process.exit(1); - } - - console.log("Done. Try running query.ts to ask questions against the imported embeddings."); - process.exit(0); + console.log(`Finding documents in ${sourceDir}`); + const fileList = await getSourceFilenames(sourceDir); + const count = fileList.length; + console.log(`Found ${count} files`); + + console.log(`Importing contents from ${count} files in ${sourceDir}`); + var fileName = ""; + try { + // Passing callback fn to the ctor here + // will enable looging to console. + // See callback fn, defined above. + const rdr = new SimpleDirectoryReader(callback); + const docs = await rdr.loadData({ directoryPath: sourceDir }); + + const pgvs = new PGVectorStore(); + pgvs.setCollection(sourceDir); + pgvs.clearCollection(); + + const ctx = await storageContextFromDefaults({ vectorStore: pgvs }); + + console.debug(" - creating vector store"); + const index = await VectorStoreIndex.fromDocuments(docs, { + storageContext: ctx, + }); + console.debug(" - done."); + } catch (err) { + console.error(fileName, err); + console.log( + "If your PGVectorStore init failed, make sure to set env vars for PGUSER or USER, PGHOST, PGPORT and PGPASSWORD as needed.", + ); + process.exit(1); + } + + console.log( + "Done. Try running query.ts to ask questions against the imported embeddings.", + ); + process.exit(0); } -main(process.argv).catch((err) => console.error(err)); \ No newline at end of file +main(process.argv).catch((err) => console.error(err)); diff --git a/apps/simple/pg-vector-store/query.ts b/apps/simple/pg-vector-store/query.ts index 9bb7c34a3bd4b679c2acf00bc4c3c9573712270a..bfda4dd1954d75e9a340d3f90b70002c5f215345 100755 --- a/apps/simple/pg-vector-store/query.ts +++ b/apps/simple/pg-vector-store/query.ts @@ -3,57 +3,65 @@ import { serviceContextFromDefaults } from "../../../packages/core/src/ServiceCo import { PGVectorStore } from "../../../packages/core/src/storage/vectorStore/PGVectorStore"; async function main() { + const readline = require("readline").createInterface({ + input: process.stdin, + output: process.stdout, + }); - const readline = require('readline').createInterface({ - input: process.stdin, - output: process.stdout - }); - - try { - const pgvs = new PGVectorStore(); - // Optional - set your collection name, default is no filter on this field. - // pgvs.setCollection(); - - const ctx = serviceContextFromDefaults(); - const index = await VectorStoreIndex.fromVectorStore(pgvs, ctx); - - // Query the index - const queryEngine = await index.asQueryEngine(); - - let question = ''; - while (! isQuit(question)) { - question = await getUserInput(readline); - - if (isQuit(question)) { - readline.close(); - process.exit(0); - } - - try { - const answer = await queryEngine.query(question); - console.log(answer.response); - } catch (error) { - console.error('Error:', error); - } - } - } catch (err) { - console.error(err); - console.log("If your PGVectorStore init failed, make sure to set env vars for PGUSER or USER, PGHOST, PGPORT and PGPASSWORD as needed."); - process.exit(1); + try { + const pgvs = new PGVectorStore(); + // Optional - set your collection name, default is no filter on this field. + // pgvs.setCollection(); + + const ctx = serviceContextFromDefaults(); + const index = await VectorStoreIndex.fromVectorStore(pgvs, ctx); + + // Query the index + const queryEngine = await index.asQueryEngine(); + + let question = ""; + while (!isQuit(question)) { + question = await getUserInput(readline); + + if (isQuit(question)) { + readline.close(); + process.exit(0); + } + + try { + const answer = await queryEngine.query(question); + console.log(answer.response); + } catch (error) { + console.error("Error:", error); + } } + } catch (err) { + console.error(err); + console.log( + "If your PGVectorStore init failed, make sure to set env vars for PGUSER or USER, PGHOST, PGPORT and PGPASSWORD as needed.", + ); + process.exit(1); + } } function isQuit(question: string) { - return ['q', 'quit', 'exit'].includes(question.trim().toLowerCase()); + return ["q", "quit", "exit"].includes(question.trim().toLowerCase()); } // Function to get user input as a promise function getUserInput(readline: any): Promise<string> { - return new Promise((resolve) => { - readline.question("What would you like to know?\n>", (userInput: string) => { - resolve(userInput); - }); - }); + return new Promise((resolve) => { + readline.question( + "What would you like to know?\n>", + (userInput: string) => { + resolve(userInput); + }, + ); + }); } -main().catch(console.error).finally(() => { process.exit(1) }); +main() + .catch(console.error) + .finally(() => { + process.exit(1); + }); diff --git a/examples/assemblyai.ts b/examples/assemblyai.ts new file mode 100644 index 0000000000000000000000000000000000000000..d0333806349002eb500a241701c31b677fd67e87 --- /dev/null +++ b/examples/assemblyai.ts @@ -0,0 +1,60 @@ +import { program } from "commander"; +import { AudioTranscriptReader, CreateTranscriptParameters } from "llamaindex"; +import { stdin as input, stdout as output } from "node:process"; +// readline/promises is still experimental so not in @types/node yet +// @ts-ignore +import readline from "node:readline/promises"; +import { VectorStoreIndex } from "../../packages/core/src/indices"; + +program + .option( + "-a, --audio-url [string]", + "URL or path of the audio file to transcribe", + ) + .option("-i, --transcript-id [string]", "ID of the AssemblyAI transcript") + .action(async (options) => { + if (!process.env.ASSEMBLYAI_API_KEY) { + console.log("No ASSEMBLYAI_API_KEY found in environment variables."); + return; + } + + const reader = new AudioTranscriptReader(); + let params: CreateTranscriptParameters | string; + console.log(options); + if (options.audioUrl) { + params = { + audio_url: options.audioUrl, + }; + } else if (options.transcriptId) { + params = options.transcriptId; + } else { + console.log( + "You must provide either an --audio-url or a --transcript-id", + ); + return; + } + + const documents = await reader.loadData(params); + console.log(documents); + + // Split text and create embeddings. Store them in a VectorStoreIndex + const index = await VectorStoreIndex.fromDocuments(documents); + + // Create query engine + const queryEngine = index.asQueryEngine(); + + const rl = readline.createInterface({ input, output }); + while (true) { + const query = await rl.question("Ask a question: "); + + if (!query) { + break; + } + + const response = await queryEngine.query(query); + + console.log(response.toString()); + } + }); + +program.parse(); diff --git a/examples/mongo.ts b/examples/mongo.ts index cf32a0af8070a5c90237377b2fb3d095136c238d..5b5f735ffb3f0ada9906d29fc429afa10deedf4c 100644 --- a/examples/mongo.ts +++ b/examples/mongo.ts @@ -1,6 +1,6 @@ import { MongoClient } from "mongodb"; -import { VectorStoreIndex } from "../../packages/core/src/indices"; import { Document } from "../../packages/core/src/Node"; +import { VectorStoreIndex } from "../../packages/core/src/indices"; import { SimpleMongoReader } from "../../packages/core/src/readers/SimpleMongoReader"; import { stdin as input, stdout as output } from "node:process"; diff --git a/packages/core/package.json b/packages/core/package.json index 3b6de230894dd1c3c973b474980b89d3f026ad96..db6e81e7242daa19ce95958550e1e1458a16b526 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -6,6 +6,7 @@ "@anthropic-ai/sdk": "^0.9.1", "@notionhq/client": "^2.2.13", "@xenova/transformers": "^2.8.0", + "assemblyai": "^3.0.1", "crypto-js": "^4.2.0", "js-tiktoken": "^1.0.8", "lodash": "^4.17.21", diff --git a/packages/core/src/embeddings/OpenAIEmbedding.ts b/packages/core/src/embeddings/OpenAIEmbedding.ts index ecc2509a75b93226cb34e6ecb0a810bfc0b49a6b..106c6cbff294d49bf87d912676e3656339b1b4c8 100644 --- a/packages/core/src/embeddings/OpenAIEmbedding.ts +++ b/packages/core/src/embeddings/OpenAIEmbedding.ts @@ -6,7 +6,7 @@ import { getAzureModel, shouldUseAzure, } from "../llm/azure"; -import { getOpenAISession, OpenAISession } from "../llm/openai"; +import { OpenAISession, getOpenAISession } from "../llm/openai"; import { BaseEmbedding } from "./types"; export enum OpenAIEmbeddingModelType { diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index a0d4b6d7551a18feefd6c5fbd0b326ec70c9b3af..c049a0710edd6b362be6784a3579850f3d1957c7 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -1,11 +1,6 @@ -export * from "./callbacks/CallbackManager"; export * from "./ChatEngine"; export * from "./ChatHistory"; -export * from "./constants"; -export * from "./embeddings"; export * from "./GlobalsHelper"; -export * from "./indices"; -export * from "./llm/LLM"; export * from "./Node"; export * from "./NodeParser"; export * from "./OutputParser"; @@ -13,7 +8,18 @@ export * from "./Prompt"; export * from "./PromptHelper"; export * from "./QueryEngine"; export * from "./QuestionGenerator"; -export * from "./readers/base"; +export * from "./Response"; +export * from "./ResponseSynthesizer"; +export * from "./Retriever"; +export * from "./ServiceContext"; +export * from "./TextSplitter"; +export * from "./Tool"; +export * from "./callbacks/CallbackManager"; +export * from "./constants"; +export * from "./embeddings"; +export * from "./indices"; +export * from "./llm/LLM"; +export * from "./readers/AssemblyAI"; export * from "./readers/CSVReader"; export * from "./readers/HTMLReader"; export * from "./readers/MarkdownReader"; @@ -21,10 +27,5 @@ export * from "./readers/NotionReader"; export * from "./readers/PDFReader"; export * from "./readers/SimpleDirectoryReader"; export * from "./readers/SimpleMongoReader"; -export * from "./Response"; -export * from "./ResponseSynthesizer"; -export * from "./Retriever"; -export * from "./ServiceContext"; +export * from "./readers/base"; export * from "./storage"; -export * from "./TextSplitter"; -export * from "./Tool"; diff --git a/packages/core/src/llm/LLM.ts b/packages/core/src/llm/LLM.ts index ef505224aff529071212daa6bb7cffcdd0cbf901..7ebc7bca5333f2a182d1dfa2e90d6c5783ea69ab 100644 --- a/packages/core/src/llm/LLM.ts +++ b/packages/core/src/llm/LLM.ts @@ -12,9 +12,9 @@ import { ChatCompletionMessageParam } from "openai/resources"; import { LLMOptions } from "portkey-ai"; import { globalsHelper, Tokenizers } from "../GlobalsHelper"; import { - AnthropicSession, ANTHROPIC_AI_PROMPT, ANTHROPIC_HUMAN_PROMPT, + AnthropicSession, getAnthropicSession, } from "./anthropic"; import { diff --git a/packages/core/src/readers/AssemblyAI.ts b/packages/core/src/readers/AssemblyAI.ts new file mode 100644 index 0000000000000000000000000000000000000000..6eea143bb0c529cb47b2aa4c9572417b975bdf6a --- /dev/null +++ b/packages/core/src/readers/AssemblyAI.ts @@ -0,0 +1,148 @@ +import { + AssemblyAI, + BaseServiceParams, + CreateTranscriptParameters, + SubtitleFormat, + TranscriptParagraph, + TranscriptSentence, +} from "assemblyai"; +import { Document } from "../Node"; +import { BaseReader } from "./base"; + +type AssemblyAIOptions = Partial<BaseServiceParams>; + +/** + * Base class for AssemblyAI Readers. + */ +abstract class AssemblyAIReader implements BaseReader { + protected client: AssemblyAI; + + /** + * Creates a new AssemblyAI Reader. + * @param assemblyAIOptions The options to configure the AssemblyAI Reader. + * Configure the `assemblyAIOptions.apiKey` with your AssemblyAI API key, or configure it as the `ASSEMBLYAI_API_KEY` environment variable. + */ + constructor(assemblyAIOptions?: AssemblyAIOptions) { + let options = assemblyAIOptions; + if (!options) { + options = {}; + } + if (!options.apiKey) { + options.apiKey = process.env.ASSEMBLYAI_API_KEY; + } + if (!options.apiKey) { + throw new Error("No AssemblyAI API key provided. Pass an `apiKey` option, or configure the `ASSEMBLYAI_API_KEY` environment variable."); + } + + this.client = new AssemblyAI(options as BaseServiceParams); + } + + abstract loadData(...args: any[]): Promise<Document[]>; + + protected async getOrCreateTranscript(params: CreateTranscriptParameters | string) { + if (typeof params === "string") { + return await this.client.transcripts.get(params); + } + else { + return await this.client.transcripts.create(params); + } + } + + protected async getTranscriptId(params: CreateTranscriptParameters | string) { + if (typeof params === "string") { + return params; + } + else { + return (await this.client.transcripts.create(params)).id; + } + } +} + +/** + * Creates and reads the transcript as a document using AssemblyAI. + */ +class AudioTranscriptReader extends AssemblyAIReader { + /** + * Creates or gets a transcript and loads the transcript as a document using AssemblyAI. + * @param params The parameters to create or get the transcript. + * @returns A promise that resolves to a single document containing the transcript text. + */ + async loadData(params: CreateTranscriptParameters | string): Promise<Document[]> { + const transcript = await this.getOrCreateTranscript(params); + return [ + new Document({ text: transcript.text || undefined }), + ]; + } +} + +/** + * Creates a transcript and returns a document for each paragraph. + */ +class AudioTranscriptParagraphsReader extends AssemblyAIReader { + /** + * Creates or gets a transcript, and returns a document for each paragraph. + * @param params The parameters to create or get the transcript. + * @returns A promise that resolves to an array of documents, each containing a paragraph of the transcript. + */ + async loadData(params: CreateTranscriptParameters | string): Promise<Document[]> { + let transcriptId = await this.getTranscriptId(params); + const paragraphsResponse = await this.client.transcripts.paragraphs( + transcriptId + ); + return paragraphsResponse.paragraphs.map((p: TranscriptParagraph) => + new Document({ text: p.text }), + ); + } +} + +/** + * Creates a transcript and returns a document for each sentence. + */ +class AudioTranscriptSentencesReader extends AssemblyAIReader { + /** + * Creates or gets a transcript, and returns a document for each sentence. + * @param params The parameters to create or get the transcript. + * @returns A promise that resolves to an array of documents, each containing a sentence of the transcript. + */ + async loadData(params: CreateTranscriptParameters | string): Promise<Document[]> { + let transcriptId = await this.getTranscriptId(params); + const sentencesResponse = await this.client.transcripts.sentences( + transcriptId + ); + return sentencesResponse.sentences.map((p: TranscriptSentence) => + new Document({ text: p.text }), + ); + } +} + +/** + * Creates a transcript and reads subtitles for the transcript as `srt` or `vtt` format. + */ +class AudioSubtitlesReader extends AssemblyAIReader { + /** + * Creates or gets a transcript and reads subtitles for the transcript as `srt` or `vtt` format. + * @param params The parameters to create or get the transcript. + * @param subtitleFormat The format of the subtitles, either `srt` or `vtt`. + * @returns A promise that resolves a document containing the subtitles as the page content. + */ + async loadData( + params: CreateTranscriptParameters | string, + subtitleFormat: SubtitleFormat = 'srt' + ): Promise<Document[]> { + let transcriptId = await this.getTranscriptId(params); + const subtitles = await this.client.transcripts.subtitles(transcriptId, subtitleFormat); + return [new Document({ text: subtitles })]; + } +} + +export { + AudioTranscriptReader, + AudioTranscriptParagraphsReader, + AudioTranscriptSentencesReader, + AudioSubtitlesReader, +} +export type { + AssemblyAIOptions, + CreateTranscriptParameters, + SubtitleFormat +} diff --git a/packages/core/src/storage/index.ts b/packages/core/src/storage/index.ts index 14f8a1ac137f4e7c87560671dae6b856ae6b8287..96f8743be6e564594ba4c1d662d536d8f33a6edc 100644 --- a/packages/core/src/storage/index.ts +++ b/packages/core/src/storage/index.ts @@ -1,12 +1,12 @@ +export * from "./FileSystem"; +export * from "./StorageContext"; export * from "./constants"; export { SimpleDocumentStore } from "./docStore/SimpleDocumentStore"; export * from "./docStore/types"; -export * from "./FileSystem"; export { SimpleIndexStore } from "./indexStore/SimpleIndexStore"; export * from "./indexStore/types"; export { SimpleKVStore } from "./kvStore/SimpleKVStore"; export * from "./kvStore/types"; -export * from "./StorageContext"; export { MongoDBAtlasVectorSearch } from "./vectorStore/MongoDBAtlasVectorStore"; export { SimpleVectorStore } from "./vectorStore/SimpleVectorStore"; export * from "./vectorStore/types"; diff --git a/packages/core/src/storage/vectorStore/PGVectorStore.ts b/packages/core/src/storage/vectorStore/PGVectorStore.ts index 59cf8d461528c2e9fc6593a657b8503da6a1336a..f116f10347a1a3e34013638c9f9edc515d99b130 100644 --- a/packages/core/src/storage/vectorStore/PGVectorStore.ts +++ b/packages/core/src/storage/vectorStore/PGVectorStore.ts @@ -1,25 +1,21 @@ -import pg from 'pg'; -import pgvector from 'pgvector/pg'; +import pg from "pg"; +import pgvector from "pgvector/pg"; -import { - VectorStore, - VectorStoreQuery, - VectorStoreQueryResult, -} from "./types"; +import { VectorStore, VectorStoreQuery, VectorStoreQueryResult } from "./types"; -import { BaseNode, Document, Metadata, MetadataMode } from '../../Node'; -import { GenericFileSystem } from '../FileSystem'; +import { BaseNode, Document, Metadata, MetadataMode } from "../../Node"; +import { GenericFileSystem } from "../FileSystem"; -export const PGVECTOR_SCHEMA = 'public'; -export const PGVECTOR_TABLE = 'llamaindex_embedding'; +export const PGVECTOR_SCHEMA = "public"; +export const PGVECTOR_TABLE = "llamaindex_embedding"; /** - * Provides support for writing and querying vector data in Postgres. + * Provides support for writing and querying vector data in Postgres. */ export class PGVectorStore implements VectorStore { storesText: boolean = true; - private collection: string = ''; + private collection: string = ""; /* FROM pg LIBRARY: @@ -66,8 +62,7 @@ export class PGVectorStore implements VectorStore { } private async getDb(): Promise<pg.Client> { - if (! this.db) { - + if (!this.db) { try { // Create DB connection // Read connection params from env - see comment block above @@ -83,7 +78,6 @@ export class PGVectorStore implements VectorStore { // All good? Keep the connection reference this.db = db; - } catch (err: any) { console.error(err); return Promise.reject(err); @@ -95,7 +89,7 @@ export class PGVectorStore implements VectorStore { private async checkSchema(db: pg.Client) { await db.query(`CREATE SCHEMA IF NOT EXISTS ${PGVECTOR_SCHEMA}`); - + const tbl = `CREATE TABLE IF NOT EXISTS ${PGVECTOR_SCHEMA}.${PGVECTOR_TABLE}( id uuid DEFAULT gen_random_uuid() PRIMARY KEY, external_id VARCHAR, @@ -135,7 +129,7 @@ export class PGVectorStore implements VectorStore { const sql: string = `DELETE FROM ${PGVECTOR_SCHEMA}.${PGVECTOR_TABLE} WHERE collection = $1`; - const db = await this.getDb() as pg.Client; + const db = (await this.getDb()) as pg.Client; const ret = await db.query(sql, [this.collection]); return ret; @@ -148,28 +142,27 @@ export class PGVectorStore implements VectorStore { * @returns A list of zero or more id values for the created records. */ async add(embeddingResults: BaseNode<Metadata>[]): Promise<string[]> { - const sql: string = `INSERT INTO ${PGVECTOR_SCHEMA}.${PGVECTOR_TABLE} (id, external_id, collection, document, metadata, embeddings) VALUES ($1, $2, $3, $4, $5, $6)`; - const db = await this.getDb() as pg.Client; + const db = (await this.getDb()) as pg.Client; let ret: string[] = []; for (let index = 0; index < embeddingResults.length; index++) { const row = embeddingResults[index]; - let id: any = row.id_.length? row.id_: null; + let id: any = row.id_.length ? row.id_ : null; let meta = row.metadata || {}; meta.create_date = new Date(); const params = [ id, - '', + "", this.collection, row.getContent(MetadataMode.EMBED), - meta, - '[' + row.getEmbedding().join(',') + ']' + meta, + "[" + row.getEmbedding().join(",") + "]", ]; try { @@ -180,7 +173,7 @@ export class PGVectorStore implements VectorStore { ret.push(id); } } catch (err) { - const msg = `${ err }`; + const msg = `${err}`; console.log(msg, err); } } @@ -196,12 +189,16 @@ export class PGVectorStore implements VectorStore { * @returns Promise that resolves if the delete query did not throw an error. */ async delete(refDocId: string, deleteKwargs?: any): Promise<void> { - const collectionCriteria = this.collection.length ? "AND collection = $2": ""; + const collectionCriteria = this.collection.length + ? "AND collection = $2" + : ""; const sql: string = `DELETE FROM ${PGVECTOR_SCHEMA}.${PGVECTOR_TABLE} - WHERE id = $1 ${ collectionCriteria }`; + WHERE id = $1 ${collectionCriteria}`; - const db = await this.getDb() as pg.Client; - const params = this.collection.length ? [refDocId, this.collection] : [refDocId]; + const db = (await this.getDb()) as pg.Client; + const params = this.collection.length + ? [refDocId, this.collection] + : [refDocId]; await db.query(sql, params); return Promise.resolve(); } @@ -212,45 +209,43 @@ export class PGVectorStore implements VectorStore { * @param options Required by VectorStore interface. Currently ignored. * @returns Zero or more Document instances with data from the vector store. */ - async query(query: VectorStoreQuery, options?: any): Promise<VectorStoreQueryResult> { + async query( + query: VectorStoreQuery, + options?: any, + ): Promise<VectorStoreQueryResult> { // TODO QUERY TYPES: // Distance: SELECT embedding <-> $1 AS distance FROM items; // Inner Product: SELECT (embedding <#> $1) * -1 AS inner_product FROM items; // Cosine Sim: SELECT 1 - (embedding <=> $1) AS cosine_similarity FROM items; - const embedding = '[' + query.queryEmbedding?.join(',') + ']'; + const embedding = "[" + query.queryEmbedding?.join(",") + "]"; const max = query.similarityTopK ?? 2; - const where = this.collection.length ? "WHERE collection = $2": ""; + const where = this.collection.length ? "WHERE collection = $2" : ""; // TODO Add collection filter if set const sql = `SELECT * FROM ${PGVECTOR_SCHEMA}.${PGVECTOR_TABLE} - ${ where } - ORDER BY embeddings <-> $1 LIMIT ${ max } + ${where} + ORDER BY embeddings <-> $1 LIMIT ${max} `; - const db = await this.getDb() as pg.Client; - const params = this.collection.length ? - [embedding, this.collection] : [ embedding] + const db = (await this.getDb()) as pg.Client; + const params = this.collection.length + ? [embedding, this.collection] + : [embedding]; const results = await db.query(sql, params); - const nodes = results.rows.map( - (row) => { - return new Document({ - id_: row.id, - text: row.document, - metadata: row.metadata, - embedding: row.embeddings - }) - } - ); + const nodes = results.rows.map((row) => { + return new Document({ + id_: row.id, + text: row.document, + metadata: row.metadata, + embedding: row.embeddings, + }); + }); const ret = { nodes: nodes, - similarities: results.rows.map( - (row) => row.embeddings - ), - ids: results.rows.map( - (row) => row.id - ) + similarities: results.rows.map((row) => row.embeddings), + ids: results.rows.map((row) => row.id), }; return Promise.resolve(ret); @@ -258,11 +253,14 @@ export class PGVectorStore implements VectorStore { /** * Required by VectorStore interface. Currently ignored. - * @param persistPath - * @param fs + * @param persistPath + * @param fs * @returns Resolved Promise. */ - persist(persistPath: string, fs?: GenericFileSystem | undefined): Promise<void> { + persist( + persistPath: string, + fs?: GenericFileSystem | undefined, + ): Promise<void> { return Promise.resolve(); } -} \ No newline at end of file +} diff --git a/packages/core/src/storage/vectorStore/SimpleVectorStore.ts b/packages/core/src/storage/vectorStore/SimpleVectorStore.ts index f314510a32f884bcfd3bd31b2bbf7fcc97931101..929ebe2c24b3cf9b7cc074902280d89785651b0d 100644 --- a/packages/core/src/storage/vectorStore/SimpleVectorStore.ts +++ b/packages/core/src/storage/vectorStore/SimpleVectorStore.ts @@ -1,13 +1,13 @@ import _ from "lodash"; import * as path from "path"; +import { BaseNode } from "../../Node"; import { getTopKEmbeddings, getTopKEmbeddingsLearner, getTopKMMREmbeddings, } from "../../embeddings"; -import { BaseNode } from "../../Node"; +import { GenericFileSystem, exists } from "../FileSystem"; import { DEFAULT_FS, DEFAULT_PERSIST_DIR } from "../constants"; -import { exists, GenericFileSystem } from "../FileSystem"; import { VectorStore, VectorStoreQuery, diff --git a/packages/eslint-config-custom/index.js b/packages/eslint-config-custom/index.js index 142321257509d7748ce9de170d9cae31406ba968..8417c5e6d90aef30254fec90a19fbb1d347ed4f1 100644 --- a/packages/eslint-config-custom/index.js +++ b/packages/eslint-config-custom/index.js @@ -9,6 +9,7 @@ module.exports = { "OPENAI_API_KEY", "REPLICATE_API_TOKEN", "ANTHROPIC_API_KEY", + "ASSEMBLYAI_API_KEY", "AZURE_OPENAI_KEY", "AZURE_OPENAI_ENDPOINT", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index c45482b6d8f626a3798ed38165f85c3d4e73e2e5..7b8fa48c4e4b4ce819d245eb0e2a17ebdb617073 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -17,7 +17,7 @@ importers: version: 2.26.2 '@turbo/gen': specifier: ^1.10.16 - version: 1.10.16(@types/node@18.18.12)(typescript@4.9.5) + version: 1.10.16(@types/node@18.18.12)(typescript@5.3.2) '@types/jest': specifier: ^29.5.10 version: 29.5.10 @@ -41,10 +41,10 @@ importers: version: 3.1.0 prettier-plugin-organize-imports: specifier: ^3.2.4 - version: 3.2.4(prettier@3.1.0)(typescript@4.9.5) + version: 3.2.4(prettier@3.1.0)(typescript@5.3.2) ts-jest: specifier: ^29.1.1 - version: 29.1.1(@babel/core@7.23.3)(jest@29.7.0)(typescript@4.9.5) + version: 29.1.1(@babel/core@7.23.3)(jest@29.7.0)(typescript@5.3.2) turbo: specifier: ^1.10.16 version: 1.10.16 @@ -121,7 +121,7 @@ importers: version: 18.18.8 ts-node: specifier: ^10.9.1 - version: 10.9.1(@types/node@18.18.8)(typescript@4.9.5) + version: 10.9.1(@types/node@18.18.8)(typescript@5.3.2) apps/simple: dependencies: @@ -143,7 +143,7 @@ importers: version: 18.18.7 ts-node: specifier: ^10.9.1 - version: 10.9.1(@types/node@18.18.7)(typescript@4.9.5) + version: 10.9.1(@types/node@18.18.7)(typescript@5.3.2) packages/core: dependencies: @@ -156,6 +156,9 @@ importers: '@xenova/transformers': specifier: ^2.8.0 version: 2.8.0 + assemblyai: + specifier: ^3.0.1 + version: 3.1.1 crypto-js: specifier: ^4.2.0 version: 4.2.0 @@ -315,7 +318,7 @@ importers: dependencies: eslint-config-next: specifier: ^13.4.1 - version: 13.4.1(eslint@8.54.0)(typescript@4.9.5) + version: 13.4.1(eslint@8.54.0)(typescript@5.3.2) eslint-config-prettier: specifier: ^8.3.0 version: 8.8.0(eslint@8.54.0) @@ -4141,7 +4144,7 @@ packages: resolution: {integrity: sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA==} dev: true - /@turbo/gen@1.10.16(@types/node@18.18.12)(typescript@4.9.5): + /@turbo/gen@1.10.16(@types/node@18.18.12)(typescript@5.3.2): resolution: {integrity: sha512-PzyluADjVuy5OcIi+/aRcD70OElQpRVRDdfZ9fH8G5Fv75lQcNrjd1bBGKmhjSw+g+eTEkXMGnY7s6gsCYjYTQ==} hasBin: true dependencies: @@ -4153,7 +4156,7 @@ packages: minimatch: 9.0.3 node-plop: 0.26.3 proxy-agent: 6.3.1 - ts-node: 10.9.1(@types/node@18.18.12)(typescript@4.9.5) + ts-node: 10.9.1(@types/node@18.18.12)(typescript@5.3.2) update-check: 1.5.4 validate-npm-package-name: 5.0.0 transitivePeerDependencies: @@ -4674,7 +4677,7 @@ packages: dependencies: '@types/yargs-parser': 21.0.3 - /@typescript-eslint/parser@5.59.2(eslint@8.54.0)(typescript@4.9.5): + /@typescript-eslint/parser@5.59.2(eslint@8.54.0)(typescript@5.3.2): resolution: {integrity: sha512-uq0sKyw6ao1iFOZZGk9F8Nro/8+gfB5ezl1cA06SrqbgJAt0SRoFhb9pXaHvkrxUpZaoLxt8KlovHNk8Gp6/HQ==} engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0} peerDependencies: @@ -4686,10 +4689,10 @@ packages: dependencies: '@typescript-eslint/scope-manager': 5.59.2 '@typescript-eslint/types': 5.59.2 - '@typescript-eslint/typescript-estree': 5.59.2(typescript@4.9.5) + '@typescript-eslint/typescript-estree': 5.59.2(typescript@5.3.2) debug: 4.3.4 eslint: 8.54.0 - typescript: 4.9.5 + typescript: 5.3.2 transitivePeerDependencies: - supports-color dev: false @@ -4707,7 +4710,7 @@ packages: engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0} dev: false - /@typescript-eslint/typescript-estree@5.59.2(typescript@4.9.5): + /@typescript-eslint/typescript-estree@5.59.2(typescript@5.3.2): resolution: {integrity: sha512-+j4SmbwVmZsQ9jEyBMgpuBD0rKwi9RxRpjX71Brr73RsYnEr3Lt5QZ624Bxphp8HUkSKfqGnPJp1kA5nl0Sh7Q==} engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0} peerDependencies: @@ -4722,8 +4725,8 @@ packages: globby: 11.1.0 is-glob: 4.0.3 semver: 7.5.4 - tsutils: 3.21.0(typescript@4.9.5) - typescript: 4.9.5 + tsutils: 3.21.0(typescript@5.3.2) + typescript: 5.3.2 transitivePeerDependencies: - supports-color dev: false @@ -5200,6 +5203,15 @@ packages: safer-buffer: 2.1.2 dev: true + /assemblyai@3.1.1: + resolution: {integrity: sha512-rEhLnIZU7TupOkdzN9WjxhVU5bxMVofkNslzTArsuilaqOusPUmh6I7SGcJCGwKKE8qEweBaoY4DZ03cxv4KoA==} + dependencies: + ws: 8.14.2 + transitivePeerDependencies: + - bufferutil + - utf-8-validate + dev: false + /assert@2.1.0: resolution: {integrity: sha512-eLHpSK/Y4nhMJ07gDaAzoX/XAKS8PSaojml3M0DM4JpV1LAi5JOJ/p6H/XWrl8L+DzVEvVCW1z3vWAaB9oTsQw==} dependencies: @@ -7549,7 +7561,7 @@ packages: source-map: 0.6.1 dev: true - /eslint-config-next@13.4.1(eslint@8.54.0)(typescript@4.9.5): + /eslint-config-next@13.4.1(eslint@8.54.0)(typescript@5.3.2): resolution: {integrity: sha512-ajuxjCkW1hvirr0EQZb3/B/bFH52Z7CT89uCtTcICFL9l30i5c8hN4p0LXvTjdOXNPV5fEDcxBgGHgXdzTj1/A==} peerDependencies: eslint: ^7.23.0 || ^8.0.0 @@ -7560,7 +7572,7 @@ packages: dependencies: '@next/eslint-plugin-next': 13.4.1 '@rushstack/eslint-patch': 1.2.0 - '@typescript-eslint/parser': 5.59.2(eslint@8.54.0)(typescript@4.9.5) + '@typescript-eslint/parser': 5.59.2(eslint@8.54.0)(typescript@5.3.2) eslint: 8.54.0 eslint-import-resolver-node: 0.3.7 eslint-import-resolver-typescript: 3.5.5(@typescript-eslint/parser@5.59.2)(eslint-import-resolver-node@0.3.7)(eslint-plugin-import@2.27.5)(eslint@8.54.0) @@ -7568,7 +7580,7 @@ packages: eslint-plugin-jsx-a11y: 6.7.1(eslint@8.54.0) eslint-plugin-react: 7.32.2(eslint@8.54.0) eslint-plugin-react-hooks: 4.6.0(eslint@8.54.0) - typescript: 4.9.5 + typescript: 5.3.2 transitivePeerDependencies: - eslint-import-resolver-webpack - supports-color @@ -7647,7 +7659,7 @@ packages: eslint-import-resolver-webpack: optional: true dependencies: - '@typescript-eslint/parser': 5.59.2(eslint@8.54.0)(typescript@4.9.5) + '@typescript-eslint/parser': 5.59.2(eslint@8.54.0)(typescript@5.3.2) debug: 3.2.7 eslint: 8.54.0 eslint-import-resolver-node: 0.3.7 @@ -7666,7 +7678,7 @@ packages: '@typescript-eslint/parser': optional: true dependencies: - '@typescript-eslint/parser': 5.59.2(eslint@8.54.0)(typescript@4.9.5) + '@typescript-eslint/parser': 5.59.2(eslint@8.54.0)(typescript@5.3.2) array-includes: 3.1.6 array.prototype.flat: 1.3.1 array.prototype.flatmap: 1.3.1 @@ -12514,7 +12526,7 @@ packages: engines: {node: '>=4'} dev: false - /prettier-plugin-organize-imports@3.2.4(prettier@3.1.0)(typescript@4.9.5): + /prettier-plugin-organize-imports@3.2.4(prettier@3.1.0)(typescript@5.3.2): resolution: {integrity: sha512-6m8WBhIp0dfwu0SkgfOxJqh+HpdyfqSSLfKKRZSFbDuEQXDDndb8fTpRWkUrX/uBenkex3MgnVk0J3b3Y5byog==} peerDependencies: '@volar/vue-language-plugin-pug': ^1.0.4 @@ -12528,7 +12540,7 @@ packages: optional: true dependencies: prettier: 3.1.0 - typescript: 4.9.5 + typescript: 5.3.2 dev: true /prettier@2.8.8: @@ -14681,7 +14693,7 @@ packages: resolution: {integrity: sha512-Y/arvbn+rrz3JCKl9C4kVNfTfSm2/mEp5FSz5EsZSANGPSlQrpRI5M4PKF+mJnE52jOO90PnPSc3Ur3bTQw0gA==} dev: true - /ts-jest@29.1.1(@babel/core@7.23.3)(jest@29.7.0)(typescript@4.9.5): + /ts-jest@29.1.1(@babel/core@7.23.3)(jest@29.7.0)(typescript@5.3.2): resolution: {integrity: sha512-D6xjnnbP17cC85nliwGiL+tpoKN0StpgE0TeOjXQTU6MVCfsB4v7aW05CgQ/1OywGb0x/oy9hHFnN+sczTiRaA==} engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} hasBin: true @@ -14711,11 +14723,11 @@ packages: lodash.memoize: 4.1.2 make-error: 1.3.6 semver: 7.5.4 - typescript: 4.9.5 + typescript: 5.3.2 yargs-parser: 21.1.1 dev: true - /ts-node@10.9.1(@types/node@18.18.12)(typescript@4.9.5): + /ts-node@10.9.1(@types/node@18.18.12)(typescript@5.3.2): resolution: {integrity: sha512-NtVysVPkxxrwFGUUxGYhfux8k78pQB3JqYBXlLRZgdGUqTO5wU/UyHop5p70iEbGhB7q5KmiZiU0Y3KlJrScEw==} hasBin: true peerDependencies: @@ -14741,12 +14753,12 @@ packages: create-require: 1.1.1 diff: 4.0.2 make-error: 1.3.6 - typescript: 4.9.5 + typescript: 5.3.2 v8-compile-cache-lib: 3.0.1 yn: 3.1.1 dev: true - /ts-node@10.9.1(@types/node@18.18.7)(typescript@4.9.5): + /ts-node@10.9.1(@types/node@18.18.7)(typescript@5.3.2): resolution: {integrity: sha512-NtVysVPkxxrwFGUUxGYhfux8k78pQB3JqYBXlLRZgdGUqTO5wU/UyHop5p70iEbGhB7q5KmiZiU0Y3KlJrScEw==} hasBin: true peerDependencies: @@ -14772,12 +14784,12 @@ packages: create-require: 1.1.1 diff: 4.0.2 make-error: 1.3.6 - typescript: 4.9.5 + typescript: 5.3.2 v8-compile-cache-lib: 3.0.1 yn: 3.1.1 dev: true - /ts-node@10.9.1(@types/node@18.18.8)(typescript@4.9.5): + /ts-node@10.9.1(@types/node@18.18.8)(typescript@5.3.2): resolution: {integrity: sha512-NtVysVPkxxrwFGUUxGYhfux8k78pQB3JqYBXlLRZgdGUqTO5wU/UyHop5p70iEbGhB7q5KmiZiU0Y3KlJrScEw==} hasBin: true peerDependencies: @@ -14803,7 +14815,7 @@ packages: create-require: 1.1.1 diff: 4.0.2 make-error: 1.3.6 - typescript: 4.9.5 + typescript: 5.3.2 v8-compile-cache-lib: 3.0.1 yn: 3.1.1 dev: true @@ -14863,14 +14875,14 @@ packages: - ts-node dev: true - /tsutils@3.21.0(typescript@4.9.5): + /tsutils@3.21.0(typescript@5.3.2): resolution: {integrity: sha512-mHKK3iUXL+3UF6xL5k0PEhKRUBKPBCv/+RkEOpjRWxxx27KKRBmmA60A9pgOUvMi8GKhRMPEmjBRPzs2W7O1OA==} engines: {node: '>= 6'} peerDependencies: typescript: '>=2.8.0 || >= 3.2.0-dev || >= 3.3.0-dev || >= 3.4.0-dev || >= 3.5.0-dev || >= 3.6.0-dev || >= 3.6.0-beta || >= 3.7.0-dev || >= 3.7.0-beta' dependencies: tslib: 1.14.1 - typescript: 4.9.5 + typescript: 5.3.2 dev: false /tty-browserify@0.0.1: @@ -15090,7 +15102,6 @@ packages: resolution: {integrity: sha512-6l+RyNy7oAHDfxC4FzSJcz9vnjTKxrLpDG5M2Vu4SHRVNg6xzqZp6LYSR9zjqQTu8DU/f5xwxUdADOkbrIX2gQ==} engines: {node: '>=14.17'} hasBin: true - dev: true /ua-parser-js@1.0.36: resolution: {integrity: sha512-znuyCIXzl8ciS3+y3fHJI/2OhQIXbXw9MWC/o3qwyR+RGppjZHrM27CGFSKCJXi2Kctiz537iOu2KnXs1lMQhw==}