diff --git a/examples/extractors/keywordExtractor.ts b/examples/extractors/keywordExtractor.ts index 0a329c898336267c2c2a33ce7162d101b9bab99d..8669c00048b7ba1b2a7a3bb8415a912427ee6ef6 100644 --- a/examples/extractors/keywordExtractor.ts +++ b/examples/extractors/keywordExtractor.ts @@ -16,7 +16,10 @@ import { console.log(nodes); - const keywordExtractor = new KeywordExtractor(openaiLLM, 5); + const keywordExtractor = new KeywordExtractor({ + llm: openaiLLM, + keywords: 5, + }); const nodesWithKeywordMetadata = await keywordExtractor.processNodes(nodes); diff --git a/examples/extractors/questionsAnsweredExtractor.ts b/examples/extractors/questionsAnsweredExtractor.ts index 3eb6161442602ddaa9f0b85958416bdf1c6d62b9..30c55d98d0e6f27b9210f5b1485e0d1173656a5c 100644 --- a/examples/extractors/questionsAnsweredExtractor.ts +++ b/examples/extractors/questionsAnsweredExtractor.ts @@ -19,10 +19,10 @@ import { }), ]); - const questionsAnsweredExtractor = new QuestionsAnsweredExtractor( - openaiLLM, - 5, - ); + const questionsAnsweredExtractor = new QuestionsAnsweredExtractor({ + llm: openaiLLM, + questions: 5, + }); const nodesWithQuestionsMetadata = await questionsAnsweredExtractor.processNodes(nodes); diff --git a/examples/extractors/summaryExtractor.ts b/examples/extractors/summaryExtractor.ts index b6c5bd217cba409af31a526646f6d5a7becc3f2c..1bb2c903af27baea7848a3e0f81502f9bd50d863 100644 --- a/examples/extractors/summaryExtractor.ts +++ b/examples/extractors/summaryExtractor.ts @@ -16,7 +16,9 @@ import { }), ]); - const summaryExtractor = new SummaryExtractor(openaiLLM); + const summaryExtractor = new SummaryExtractor({ + llm: openaiLLM, + }); const nodesWithSummaryMetadata = await summaryExtractor.processNodes(nodes); diff --git a/examples/extractors/titleExtractor.ts b/examples/extractors/titleExtractor.ts index 95fab03274092383fca748367e925672a4ff423c..527bd046f30face159b9070d076a3ebe280f8391 100644 --- a/examples/extractors/titleExtractor.ts +++ b/examples/extractors/titleExtractor.ts @@ -11,7 +11,10 @@ import { Document, OpenAI, SimpleNodeParser, TitleExtractor } from "llamaindex"; }), ]); - const titleExtractor = new TitleExtractor(openaiLLM, 1); + const titleExtractor = new TitleExtractor({ + llm: openaiLLM, + nodes: 5, + }); const nodesWithTitledMetadata = await titleExtractor.processNodes(nodes); diff --git a/packages/core/src/extractors/MetadataExtractors.ts b/packages/core/src/extractors/MetadataExtractors.ts index 8fc884c12b7f57bb25e42639055989efbbc87d75..73173bd5ef39c2464a3c9cfdfc4491215c5d28cc 100644 --- a/packages/core/src/extractors/MetadataExtractors.ts +++ b/packages/core/src/extractors/MetadataExtractors.ts @@ -1,5 +1,5 @@ import { BaseNode, MetadataMode, TextNode } from "../Node"; -import { LLM } from "../llm"; +import { LLM, OpenAI } from "../llm"; import { defaultKeywordExtractorPromptTemplate, defaultQuestionAnswerPromptTemplate, @@ -11,6 +11,11 @@ import { BaseExtractor } from "./types"; const STRIP_REGEX = /(\r\n|\n|\r)/gm; +type KeywordExtractArgs = { + llm?: LLM; + keywords?: number; +}; + type ExtractKeyword = { excerptKeywords: string; }; @@ -38,12 +43,14 @@ export class KeywordExtractor extends BaseExtractor { * @param {number} keywords Number of keywords to extract. * @throws {Error} If keywords is less than 1. */ - constructor(llm: LLM, keywords: number = 5) { - if (keywords < 1) throw new Error("Keywords must be greater than 0"); + constructor(options?: KeywordExtractArgs) { + if (options?.keywords && options.keywords < 1) + throw new Error("Keywords must be greater than 0"); super(); - this.llm = llm; - this.keywords = keywords; + + this.llm = options?.llm ?? new OpenAI(); + this.keywords = options?.keywords ?? 5; } /** @@ -81,6 +88,13 @@ export class KeywordExtractor extends BaseExtractor { } } +type TitleExtractorsArgs = { + llm?: LLM; + nodes?: number; + nodeTemplate?: string; + combineTemplate?: string; +}; + type ExtractTitle = { documentTitle: string; }; @@ -128,20 +142,16 @@ export class TitleExtractor extends BaseExtractor { * @param {string} node_template The prompt template to use for the title extractor. * @param {string} combine_template The prompt template to merge title with.. */ - constructor( - llm: LLM, - nodes: number = 5, - node_template?: string, - combine_template?: string, - ) { + constructor(options?: TitleExtractorsArgs) { super(); - this.llm = llm; - this.nodes = nodes; + this.llm = options?.llm ?? new OpenAI(); + this.nodes = options?.nodes ?? 5; - this.nodeTemplate = node_template ?? defaultTitleExtractorPromptTemplate(); + this.nodeTemplate = + options?.nodeTemplate ?? defaultTitleExtractorPromptTemplate(); this.combineTemplate = - combine_template ?? defaultTitleCombinePromptTemplate(); + options?.combineTemplate ?? defaultTitleCombinePromptTemplate(); } /** @@ -197,6 +207,13 @@ export class TitleExtractor extends BaseExtractor { } } +type QuestionAnswerExtractArgs = { + llm?: LLM; + questions?: number; + promptTemplate?: string; + embeddingOnly?: boolean; +}; + type ExtractQuestion = { questionsThisExcerptCanAnswer: string; }; @@ -238,25 +255,21 @@ export class QuestionsAnsweredExtractor extends BaseExtractor { * @param {string} promptTemplate The prompt template to use for the question extractor. * @param {boolean} embeddingOnly Wheter to use metadata for embeddings only. */ - constructor( - llm: LLM, - questions: number = 5, - promptTemplate?: string, - embeddingOnly: boolean = false, - ) { - if (questions < 1) throw new Error("Questions must be greater than 0"); + constructor(options?: QuestionAnswerExtractArgs) { + if (options?.questions && options.questions < 1) + throw new Error("Questions must be greater than 0"); super(); - this.llm = llm; - this.questions = questions; + this.llm = options?.llm ?? new OpenAI(); + this.questions = options?.questions ?? 5; this.promptTemplate = - promptTemplate ?? + options?.promptTemplate ?? defaultQuestionAnswerPromptTemplate({ - numQuestions: questions, + numQuestions: this.questions, contextStr: "", }); - this.embeddingOnly = embeddingOnly; + this.embeddingOnly = options?.embeddingOnly ?? false; } /** @@ -303,6 +316,12 @@ export class QuestionsAnsweredExtractor extends BaseExtractor { } } +type SummaryExtractArgs = { + llm?: LLM; + summaries?: string[]; + promptTemplate?: string; +}; + type ExtractSummary = { sectionSummary: string; prevSectionSummary: string; @@ -335,24 +354,25 @@ export class SummaryExtractor extends BaseExtractor { private _prevSummary: boolean; private _nextSummary: boolean; - constructor( - llm: LLM, - summaries: string[] = ["self"], - promptTemplate?: string, - ) { - if (!summaries.some((s) => ["self", "prev", "next"].includes(s))) + constructor(options?: SummaryExtractArgs) { + const summaries = options?.summaries ?? ["self"]; + + if ( + summaries && + !summaries.some((s) => ["self", "prev", "next"].includes(s)) + ) throw new Error("Summaries must be one of 'self', 'prev', 'next'"); super(); - this.llm = llm; + this.llm = options?.llm ?? new OpenAI(); this.summaries = summaries; this.promptTemplate = - promptTemplate ?? defaultSummaryExtractorPromptTemplate(); + options?.promptTemplate ?? defaultSummaryExtractorPromptTemplate(); - this._selfSummary = summaries.includes("self"); - this._prevSummary = summaries.includes("prev"); - this._nextSummary = summaries.includes("next"); + this._selfSummary = summaries?.includes("self") ?? false; + this._prevSummary = summaries?.includes("prev") ?? false; + this._nextSummary = summaries?.includes("next") ?? false; } /** diff --git a/packages/core/src/tests/MetadataExtractors.test.ts b/packages/core/src/tests/MetadataExtractors.test.ts index 57026943edba5b56fcee30935cad9d4c9853e4f9..abc1b78ba6559e036c86bcf573442df4a6b4f685 100644 --- a/packages/core/src/tests/MetadataExtractors.test.ts +++ b/packages/core/src/tests/MetadataExtractors.test.ts @@ -75,7 +75,10 @@ describe("[MetadataExtractor]: Extractors should populate the metadata", () => { new Document({ text: DEFAULT_LLM_TEXT_OUTPUT }), ]); - const keywordExtractor = new KeywordExtractor(serviceContext.llm, 5); + const keywordExtractor = new KeywordExtractor({ + llm: serviceContext.llm, + keywords: 5, + }); const nodesWithKeywordMetadata = await keywordExtractor.processNodes(nodes); @@ -91,7 +94,10 @@ describe("[MetadataExtractor]: Extractors should populate the metadata", () => { new Document({ text: DEFAULT_LLM_TEXT_OUTPUT }), ]); - const titleExtractor = new TitleExtractor(serviceContext.llm, 5); + const titleExtractor = new TitleExtractor({ + llm: serviceContext.llm, + nodes: 5, + }); const nodesWithKeywordMetadata = await titleExtractor.processNodes(nodes); @@ -107,10 +113,10 @@ describe("[MetadataExtractor]: Extractors should populate the metadata", () => { new Document({ text: DEFAULT_LLM_TEXT_OUTPUT }), ]); - const questionsAnsweredExtractor = new QuestionsAnsweredExtractor( - serviceContext.llm, - 5, - ); + const questionsAnsweredExtractor = new QuestionsAnsweredExtractor({ + llm: serviceContext.llm, + questions: 5, + }); const nodesWithKeywordMetadata = await questionsAnsweredExtractor.processNodes(nodes); @@ -127,7 +133,9 @@ describe("[MetadataExtractor]: Extractors should populate the metadata", () => { new Document({ text: DEFAULT_LLM_TEXT_OUTPUT }), ]); - const summaryExtractor = new SummaryExtractor(serviceContext.llm); + const summaryExtractor = new SummaryExtractor({ + llm: serviceContext.llm, + }); const nodesWithKeywordMetadata = await summaryExtractor.processNodes(nodes);