diff --git a/apps/simple/assemblyai.ts b/apps/simple/assemblyai.ts new file mode 100644 index 0000000000000000000000000000000000000000..d0333806349002eb500a241701c31b677fd67e87 --- /dev/null +++ b/apps/simple/assemblyai.ts @@ -0,0 +1,60 @@ +import { program } from "commander"; +import { AudioTranscriptReader, CreateTranscriptParameters } from "llamaindex"; +import { stdin as input, stdout as output } from "node:process"; +// readline/promises is still experimental so not in @types/node yet +// @ts-ignore +import readline from "node:readline/promises"; +import { VectorStoreIndex } from "../../packages/core/src/indices"; + +program + .option( + "-a, --audio-url [string]", + "URL or path of the audio file to transcribe", + ) + .option("-i, --transcript-id [string]", "ID of the AssemblyAI transcript") + .action(async (options) => { + if (!process.env.ASSEMBLYAI_API_KEY) { + console.log("No ASSEMBLYAI_API_KEY found in environment variables."); + return; + } + + const reader = new AudioTranscriptReader(); + let params: CreateTranscriptParameters | string; + console.log(options); + if (options.audioUrl) { + params = { + audio_url: options.audioUrl, + }; + } else if (options.transcriptId) { + params = options.transcriptId; + } else { + console.log( + "You must provide either an --audio-url or a --transcript-id", + ); + return; + } + + const documents = await reader.loadData(params); + console.log(documents); + + // Split text and create embeddings. Store them in a VectorStoreIndex + const index = await VectorStoreIndex.fromDocuments(documents); + + // Create query engine + const queryEngine = index.asQueryEngine(); + + const rl = readline.createInterface({ input, output }); + while (true) { + const query = await rl.question("Ask a question: "); + + if (!query) { + break; + } + + const response = await queryEngine.query(query); + + console.log(response.toString()); + } + }); + +program.parse(); diff --git a/examples/assemblyai.ts b/examples/assemblyai.ts new file mode 100644 index 0000000000000000000000000000000000000000..d0333806349002eb500a241701c31b677fd67e87 --- /dev/null +++ b/examples/assemblyai.ts @@ -0,0 +1,60 @@ +import { program } from "commander"; +import { AudioTranscriptReader, CreateTranscriptParameters } from "llamaindex"; +import { stdin as input, stdout as output } from "node:process"; +// readline/promises is still experimental so not in @types/node yet +// @ts-ignore +import readline from "node:readline/promises"; +import { VectorStoreIndex } from "../../packages/core/src/indices"; + +program + .option( + "-a, --audio-url [string]", + "URL or path of the audio file to transcribe", + ) + .option("-i, --transcript-id [string]", "ID of the AssemblyAI transcript") + .action(async (options) => { + if (!process.env.ASSEMBLYAI_API_KEY) { + console.log("No ASSEMBLYAI_API_KEY found in environment variables."); + return; + } + + const reader = new AudioTranscriptReader(); + let params: CreateTranscriptParameters | string; + console.log(options); + if (options.audioUrl) { + params = { + audio_url: options.audioUrl, + }; + } else if (options.transcriptId) { + params = options.transcriptId; + } else { + console.log( + "You must provide either an --audio-url or a --transcript-id", + ); + return; + } + + const documents = await reader.loadData(params); + console.log(documents); + + // Split text and create embeddings. Store them in a VectorStoreIndex + const index = await VectorStoreIndex.fromDocuments(documents); + + // Create query engine + const queryEngine = index.asQueryEngine(); + + const rl = readline.createInterface({ input, output }); + while (true) { + const query = await rl.question("Ask a question: "); + + if (!query) { + break; + } + + const response = await queryEngine.query(query); + + console.log(response.toString()); + } + }); + +program.parse(); diff --git a/packages/core/package.json b/packages/core/package.json index 7442609f6d8d33d3ae1feb7ab25cdf685853eeef..d9d55b3ca8c8e965690a3b4788e0d4902f19cdfe 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -6,6 +6,7 @@ "@anthropic-ai/sdk": "^0.9.1", "@notionhq/client": "^2.2.13", "@xenova/transformers": "^2.8.0", + "assemblyai": "^3.0.1", "crypto-js": "^4.2.0", "js-tiktoken": "^1.0.8", "lodash": "^4.17.21", diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index dde8fff26fef4de5c78d7adb567f956952259183..c049a0710edd6b362be6784a3579850f3d1957c7 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -19,6 +19,7 @@ export * from "./constants"; export * from "./embeddings"; export * from "./indices"; export * from "./llm/LLM"; +export * from "./readers/AssemblyAI"; export * from "./readers/CSVReader"; export * from "./readers/HTMLReader"; export * from "./readers/MarkdownReader"; diff --git a/packages/core/src/readers/AssemblyAI.ts b/packages/core/src/readers/AssemblyAI.ts new file mode 100644 index 0000000000000000000000000000000000000000..6eea143bb0c529cb47b2aa4c9572417b975bdf6a --- /dev/null +++ b/packages/core/src/readers/AssemblyAI.ts @@ -0,0 +1,148 @@ +import { + AssemblyAI, + BaseServiceParams, + CreateTranscriptParameters, + SubtitleFormat, + TranscriptParagraph, + TranscriptSentence, +} from "assemblyai"; +import { Document } from "../Node"; +import { BaseReader } from "./base"; + +type AssemblyAIOptions = Partial<BaseServiceParams>; + +/** + * Base class for AssemblyAI Readers. + */ +abstract class AssemblyAIReader implements BaseReader { + protected client: AssemblyAI; + + /** + * Creates a new AssemblyAI Reader. + * @param assemblyAIOptions The options to configure the AssemblyAI Reader. + * Configure the `assemblyAIOptions.apiKey` with your AssemblyAI API key, or configure it as the `ASSEMBLYAI_API_KEY` environment variable. + */ + constructor(assemblyAIOptions?: AssemblyAIOptions) { + let options = assemblyAIOptions; + if (!options) { + options = {}; + } + if (!options.apiKey) { + options.apiKey = process.env.ASSEMBLYAI_API_KEY; + } + if (!options.apiKey) { + throw new Error("No AssemblyAI API key provided. Pass an `apiKey` option, or configure the `ASSEMBLYAI_API_KEY` environment variable."); + } + + this.client = new AssemblyAI(options as BaseServiceParams); + } + + abstract loadData(...args: any[]): Promise<Document[]>; + + protected async getOrCreateTranscript(params: CreateTranscriptParameters | string) { + if (typeof params === "string") { + return await this.client.transcripts.get(params); + } + else { + return await this.client.transcripts.create(params); + } + } + + protected async getTranscriptId(params: CreateTranscriptParameters | string) { + if (typeof params === "string") { + return params; + } + else { + return (await this.client.transcripts.create(params)).id; + } + } +} + +/** + * Creates and reads the transcript as a document using AssemblyAI. + */ +class AudioTranscriptReader extends AssemblyAIReader { + /** + * Creates or gets a transcript and loads the transcript as a document using AssemblyAI. + * @param params The parameters to create or get the transcript. + * @returns A promise that resolves to a single document containing the transcript text. + */ + async loadData(params: CreateTranscriptParameters | string): Promise<Document[]> { + const transcript = await this.getOrCreateTranscript(params); + return [ + new Document({ text: transcript.text || undefined }), + ]; + } +} + +/** + * Creates a transcript and returns a document for each paragraph. + */ +class AudioTranscriptParagraphsReader extends AssemblyAIReader { + /** + * Creates or gets a transcript, and returns a document for each paragraph. + * @param params The parameters to create or get the transcript. + * @returns A promise that resolves to an array of documents, each containing a paragraph of the transcript. + */ + async loadData(params: CreateTranscriptParameters | string): Promise<Document[]> { + let transcriptId = await this.getTranscriptId(params); + const paragraphsResponse = await this.client.transcripts.paragraphs( + transcriptId + ); + return paragraphsResponse.paragraphs.map((p: TranscriptParagraph) => + new Document({ text: p.text }), + ); + } +} + +/** + * Creates a transcript and returns a document for each sentence. + */ +class AudioTranscriptSentencesReader extends AssemblyAIReader { + /** + * Creates or gets a transcript, and returns a document for each sentence. + * @param params The parameters to create or get the transcript. + * @returns A promise that resolves to an array of documents, each containing a sentence of the transcript. + */ + async loadData(params: CreateTranscriptParameters | string): Promise<Document[]> { + let transcriptId = await this.getTranscriptId(params); + const sentencesResponse = await this.client.transcripts.sentences( + transcriptId + ); + return sentencesResponse.sentences.map((p: TranscriptSentence) => + new Document({ text: p.text }), + ); + } +} + +/** + * Creates a transcript and reads subtitles for the transcript as `srt` or `vtt` format. + */ +class AudioSubtitlesReader extends AssemblyAIReader { + /** + * Creates or gets a transcript and reads subtitles for the transcript as `srt` or `vtt` format. + * @param params The parameters to create or get the transcript. + * @param subtitleFormat The format of the subtitles, either `srt` or `vtt`. + * @returns A promise that resolves a document containing the subtitles as the page content. + */ + async loadData( + params: CreateTranscriptParameters | string, + subtitleFormat: SubtitleFormat = 'srt' + ): Promise<Document[]> { + let transcriptId = await this.getTranscriptId(params); + const subtitles = await this.client.transcripts.subtitles(transcriptId, subtitleFormat); + return [new Document({ text: subtitles })]; + } +} + +export { + AudioTranscriptReader, + AudioTranscriptParagraphsReader, + AudioTranscriptSentencesReader, + AudioSubtitlesReader, +} +export type { + AssemblyAIOptions, + CreateTranscriptParameters, + SubtitleFormat +} diff --git a/packages/eslint-config-custom/index.js b/packages/eslint-config-custom/index.js index 142321257509d7748ce9de170d9cae31406ba968..8417c5e6d90aef30254fec90a19fbb1d347ed4f1 100644 --- a/packages/eslint-config-custom/index.js +++ b/packages/eslint-config-custom/index.js @@ -9,6 +9,7 @@ module.exports = { "OPENAI_API_KEY", "REPLICATE_API_TOKEN", "ANTHROPIC_API_KEY", + "ASSEMBLYAI_API_KEY", "AZURE_OPENAI_KEY", "AZURE_OPENAI_ENDPOINT", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 410d9fc85dd67a28aae52624f0e7d8a7a0a98a07..f6d4d7717d01897a75367b91ae93ac7db047606b 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -156,6 +156,9 @@ importers: '@xenova/transformers': specifier: ^2.8.0 version: 2.8.0 + assemblyai: + specifier: ^3.0.1 + version: 3.1.1 crypto-js: specifier: ^4.2.0 version: 4.2.0 @@ -5200,6 +5203,15 @@ packages: safer-buffer: 2.1.2 dev: true + /assemblyai@3.1.1: + resolution: {integrity: sha512-rEhLnIZU7TupOkdzN9WjxhVU5bxMVofkNslzTArsuilaqOusPUmh6I7SGcJCGwKKE8qEweBaoY4DZ03cxv4KoA==} + dependencies: + ws: 8.14.2 + transitivePeerDependencies: + - bufferutil + - utf-8-validate + dev: false + /assert@2.1.0: resolution: {integrity: sha512-eLHpSK/Y4nhMJ07gDaAzoX/XAKS8PSaojml3M0DM4JpV1LAi5JOJ/p6H/XWrl8L+DzVEvVCW1z3vWAaB9oTsQw==} dependencies: