diff --git a/apps/simple/assemblyai.ts b/apps/simple/assemblyai.ts deleted file mode 100644 index d0333806349002eb500a241701c31b677fd67e87..0000000000000000000000000000000000000000 --- a/apps/simple/assemblyai.ts +++ /dev/null @@ -1,60 +0,0 @@ -import { program } from "commander"; -import { AudioTranscriptReader, CreateTranscriptParameters } from "llamaindex"; -import { stdin as input, stdout as output } from "node:process"; -// readline/promises is still experimental so not in @types/node yet -// @ts-ignore -import readline from "node:readline/promises"; -import { VectorStoreIndex } from "../../packages/core/src/indices"; - -program - .option( - "-a, --audio-url [string]", - "URL or path of the audio file to transcribe", - ) - .option("-i, --transcript-id [string]", "ID of the AssemblyAI transcript") - .action(async (options) => { - if (!process.env.ASSEMBLYAI_API_KEY) { - console.log("No ASSEMBLYAI_API_KEY found in environment variables."); - return; - } - - const reader = new AudioTranscriptReader(); - let params: CreateTranscriptParameters | string; - console.log(options); - if (options.audioUrl) { - params = { - audio_url: options.audioUrl, - }; - } else if (options.transcriptId) { - params = options.transcriptId; - } else { - console.log( - "You must provide either an --audio-url or a --transcript-id", - ); - return; - } - - const documents = await reader.loadData(params); - console.log(documents); - - // Split text and create embeddings. Store them in a VectorStoreIndex - const index = await VectorStoreIndex.fromDocuments(documents); - - // Create query engine - const queryEngine = index.asQueryEngine(); - - const rl = readline.createInterface({ input, output }); - while (true) { - const query = await rl.question("Ask a question: "); - - if (!query) { - break; - } - - const response = await queryEngine.query(query); - - console.log(response.toString()); - } - }); - -program.parse(); diff --git a/examples/assemblyai.ts b/examples/assemblyai.ts deleted file mode 100644 index d0333806349002eb500a241701c31b677fd67e87..0000000000000000000000000000000000000000 --- a/examples/assemblyai.ts +++ /dev/null @@ -1,60 +0,0 @@ -import { program } from "commander"; -import { AudioTranscriptReader, CreateTranscriptParameters } from "llamaindex"; -import { stdin as input, stdout as output } from "node:process"; -// readline/promises is still experimental so not in @types/node yet -// @ts-ignore -import readline from "node:readline/promises"; -import { VectorStoreIndex } from "../../packages/core/src/indices"; - -program - .option( - "-a, --audio-url [string]", - "URL or path of the audio file to transcribe", - ) - .option("-i, --transcript-id [string]", "ID of the AssemblyAI transcript") - .action(async (options) => { - if (!process.env.ASSEMBLYAI_API_KEY) { - console.log("No ASSEMBLYAI_API_KEY found in environment variables."); - return; - } - - const reader = new AudioTranscriptReader(); - let params: CreateTranscriptParameters | string; - console.log(options); - if (options.audioUrl) { - params = { - audio_url: options.audioUrl, - }; - } else if (options.transcriptId) { - params = options.transcriptId; - } else { - console.log( - "You must provide either an --audio-url or a --transcript-id", - ); - return; - } - - const documents = await reader.loadData(params); - console.log(documents); - - // Split text and create embeddings. Store them in a VectorStoreIndex - const index = await VectorStoreIndex.fromDocuments(documents); - - // Create query engine - const queryEngine = index.asQueryEngine(); - - const rl = readline.createInterface({ input, output }); - while (true) { - const query = await rl.question("Ask a question: "); - - if (!query) { - break; - } - - const response = await queryEngine.query(query); - - console.log(response.toString()); - } - }); - -program.parse(); diff --git a/packages/core/package.json b/packages/core/package.json index bb7c0cc2c0db14cd9ade0a7edcc2184bcdc6e761..0f3dbc5c9956f68e3573fe15aa7a541585d7c69e 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -6,7 +6,6 @@ "@anthropic-ai/sdk": "^0.9.1", "@notionhq/client": "^2.2.13", "@xenova/transformers": "^2.8.0", - "assemblyai": "^3.0.1", "crypto-js": "^4.2.0", "js-tiktoken": "^1.0.8", "lodash": "^4.17.21", diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index c049a0710edd6b362be6784a3579850f3d1957c7..dde8fff26fef4de5c78d7adb567f956952259183 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -19,7 +19,6 @@ export * from "./constants"; export * from "./embeddings"; export * from "./indices"; export * from "./llm/LLM"; -export * from "./readers/AssemblyAI"; export * from "./readers/CSVReader"; export * from "./readers/HTMLReader"; export * from "./readers/MarkdownReader"; diff --git a/packages/core/src/readers/AssemblyAI.ts b/packages/core/src/readers/AssemblyAI.ts deleted file mode 100644 index 6eea143bb0c529cb47b2aa4c9572417b975bdf6a..0000000000000000000000000000000000000000 --- a/packages/core/src/readers/AssemblyAI.ts +++ /dev/null @@ -1,148 +0,0 @@ -import { - AssemblyAI, - BaseServiceParams, - CreateTranscriptParameters, - SubtitleFormat, - TranscriptParagraph, - TranscriptSentence, -} from "assemblyai"; -import { Document } from "../Node"; -import { BaseReader } from "./base"; - -type AssemblyAIOptions = Partial<BaseServiceParams>; - -/** - * Base class for AssemblyAI Readers. - */ -abstract class AssemblyAIReader implements BaseReader { - protected client: AssemblyAI; - - /** - * Creates a new AssemblyAI Reader. - * @param assemblyAIOptions The options to configure the AssemblyAI Reader. - * Configure the `assemblyAIOptions.apiKey` with your AssemblyAI API key, or configure it as the `ASSEMBLYAI_API_KEY` environment variable. - */ - constructor(assemblyAIOptions?: AssemblyAIOptions) { - let options = assemblyAIOptions; - if (!options) { - options = {}; - } - if (!options.apiKey) { - options.apiKey = process.env.ASSEMBLYAI_API_KEY; - } - if (!options.apiKey) { - throw new Error("No AssemblyAI API key provided. Pass an `apiKey` option, or configure the `ASSEMBLYAI_API_KEY` environment variable."); - } - - this.client = new AssemblyAI(options as BaseServiceParams); - } - - abstract loadData(...args: any[]): Promise<Document[]>; - - protected async getOrCreateTranscript(params: CreateTranscriptParameters | string) { - if (typeof params === "string") { - return await this.client.transcripts.get(params); - } - else { - return await this.client.transcripts.create(params); - } - } - - protected async getTranscriptId(params: CreateTranscriptParameters | string) { - if (typeof params === "string") { - return params; - } - else { - return (await this.client.transcripts.create(params)).id; - } - } -} - -/** - * Creates and reads the transcript as a document using AssemblyAI. - */ -class AudioTranscriptReader extends AssemblyAIReader { - /** - * Creates or gets a transcript and loads the transcript as a document using AssemblyAI. - * @param params The parameters to create or get the transcript. - * @returns A promise that resolves to a single document containing the transcript text. - */ - async loadData(params: CreateTranscriptParameters | string): Promise<Document[]> { - const transcript = await this.getOrCreateTranscript(params); - return [ - new Document({ text: transcript.text || undefined }), - ]; - } -} - -/** - * Creates a transcript and returns a document for each paragraph. - */ -class AudioTranscriptParagraphsReader extends AssemblyAIReader { - /** - * Creates or gets a transcript, and returns a document for each paragraph. - * @param params The parameters to create or get the transcript. - * @returns A promise that resolves to an array of documents, each containing a paragraph of the transcript. - */ - async loadData(params: CreateTranscriptParameters | string): Promise<Document[]> { - let transcriptId = await this.getTranscriptId(params); - const paragraphsResponse = await this.client.transcripts.paragraphs( - transcriptId - ); - return paragraphsResponse.paragraphs.map((p: TranscriptParagraph) => - new Document({ text: p.text }), - ); - } -} - -/** - * Creates a transcript and returns a document for each sentence. - */ -class AudioTranscriptSentencesReader extends AssemblyAIReader { - /** - * Creates or gets a transcript, and returns a document for each sentence. - * @param params The parameters to create or get the transcript. - * @returns A promise that resolves to an array of documents, each containing a sentence of the transcript. - */ - async loadData(params: CreateTranscriptParameters | string): Promise<Document[]> { - let transcriptId = await this.getTranscriptId(params); - const sentencesResponse = await this.client.transcripts.sentences( - transcriptId - ); - return sentencesResponse.sentences.map((p: TranscriptSentence) => - new Document({ text: p.text }), - ); - } -} - -/** - * Creates a transcript and reads subtitles for the transcript as `srt` or `vtt` format. - */ -class AudioSubtitlesReader extends AssemblyAIReader { - /** - * Creates or gets a transcript and reads subtitles for the transcript as `srt` or `vtt` format. - * @param params The parameters to create or get the transcript. - * @param subtitleFormat The format of the subtitles, either `srt` or `vtt`. - * @returns A promise that resolves a document containing the subtitles as the page content. - */ - async loadData( - params: CreateTranscriptParameters | string, - subtitleFormat: SubtitleFormat = 'srt' - ): Promise<Document[]> { - let transcriptId = await this.getTranscriptId(params); - const subtitles = await this.client.transcripts.subtitles(transcriptId, subtitleFormat); - return [new Document({ text: subtitles })]; - } -} - -export { - AudioTranscriptReader, - AudioTranscriptParagraphsReader, - AudioTranscriptSentencesReader, - AudioSubtitlesReader, -} -export type { - AssemblyAIOptions, - CreateTranscriptParameters, - SubtitleFormat -} diff --git a/packages/eslint-config-custom/index.js b/packages/eslint-config-custom/index.js index 8417c5e6d90aef30254fec90a19fbb1d347ed4f1..142321257509d7748ce9de170d9cae31406ba968 100644 --- a/packages/eslint-config-custom/index.js +++ b/packages/eslint-config-custom/index.js @@ -9,7 +9,6 @@ module.exports = { "OPENAI_API_KEY", "REPLICATE_API_TOKEN", "ANTHROPIC_API_KEY", - "ASSEMBLYAI_API_KEY", "AZURE_OPENAI_KEY", "AZURE_OPENAI_ENDPOINT", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 7b8fa48c4e4b4ce819d245eb0e2a17ebdb617073..3bdf8e4f5a3459f6fcaba09136e0b4074f5d215d 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -156,9 +156,6 @@ importers: '@xenova/transformers': specifier: ^2.8.0 version: 2.8.0 - assemblyai: - specifier: ^3.0.1 - version: 3.1.1 crypto-js: specifier: ^4.2.0 version: 4.2.0 @@ -5203,15 +5200,6 @@ packages: safer-buffer: 2.1.2 dev: true - /assemblyai@3.1.1: - resolution: {integrity: sha512-rEhLnIZU7TupOkdzN9WjxhVU5bxMVofkNslzTArsuilaqOusPUmh6I7SGcJCGwKKE8qEweBaoY4DZ03cxv4KoA==} - dependencies: - ws: 8.14.2 - transitivePeerDependencies: - - bufferutil - - utf-8-validate - dev: false - /assert@2.1.0: resolution: {integrity: sha512-eLHpSK/Y4nhMJ07gDaAzoX/XAKS8PSaojml3M0DM4JpV1LAi5JOJ/p6H/XWrl8L+DzVEvVCW1z3vWAaB9oTsQw==} dependencies: